postgres_to_redshift 0.0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.travis.yml +10 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +44 -0
- data/README.md +6 -1
- data/Rakefile +8 -0
- data/bin/postgres_to_redshift +0 -1
- data/lib/postgres_to_redshift/column.rb +85 -0
- data/lib/postgres_to_redshift/table.rb +54 -0
- data/lib/postgres_to_redshift/version.rb +1 -1
- data/lib/postgres_to_redshift.rb +72 -75
- data/postgres_to_redshift.gemspec +1 -1
- data/spec/lib/postgres_to_redshift/column_spec.rb +168 -0
- data/spec/lib/postgres_to_redshift/table_spec.rb +83 -0
- data/spec/lib/postgres_to_redshift_spec.rb +29 -0
- data/spec/spec_helper.rb +90 -0
- data/spec/spec_prepare.rb +15 -0
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9aca81574e39acdc400fcfd77f454cb9c93f682
|
4
|
+
data.tar.gz: f4a8430406d25028ff1ac9b305837d34d0463464
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8136aaffdd83cb676290393235f3cb1e99cf257c8c3dc193c0b234feec09b3f3cfedb29a8e6ad7f9f55641a2e1ef02c624f6d86073add5fec9ee5e0a2eacdde4
|
7
|
+
data.tar.gz: c0b9b49f1ba41e26fd2bb87328c5f672648a98ddd432335d8a899f6dbd722b1c32b6d6cb85b4de19630e42690eaf835957a6e801001d8a46972f917f0678482e
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
language: ruby
|
2
|
+
bundler_args: --without development --deployment --jobs=3 --retry=3
|
3
|
+
cache: bundler
|
4
|
+
rvm:
|
5
|
+
- 2.2.0
|
6
|
+
- 2.1.0
|
7
|
+
before_script:
|
8
|
+
- psql -c 'create database travis_ci_test;' -U postgres
|
9
|
+
env:
|
10
|
+
- POSTGRES_TO_REDSHIFT_SOURCE_URI=postgres://postgres@localhost/travis_ci_test
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
postgres_to_redshift (0.1.1)
|
5
|
+
aws-sdk (~> 1.54)
|
6
|
+
pg (~> 0.17.0)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
aws-sdk (1.61.0)
|
12
|
+
aws-sdk-v1 (= 1.61.0)
|
13
|
+
aws-sdk-v1 (1.61.0)
|
14
|
+
json (~> 1.4)
|
15
|
+
nokogiri (>= 1.4.4)
|
16
|
+
diff-lcs (1.2.5)
|
17
|
+
json (1.8.2)
|
18
|
+
mini_portile (0.6.2)
|
19
|
+
nokogiri (1.6.6.2)
|
20
|
+
mini_portile (~> 0.6.0)
|
21
|
+
pg (0.17.1)
|
22
|
+
rake (10.4.2)
|
23
|
+
rspec (3.2.0)
|
24
|
+
rspec-core (~> 3.2.0)
|
25
|
+
rspec-expectations (~> 3.2.0)
|
26
|
+
rspec-mocks (~> 3.2.0)
|
27
|
+
rspec-core (3.2.0)
|
28
|
+
rspec-support (~> 3.2.0)
|
29
|
+
rspec-expectations (3.2.0)
|
30
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
31
|
+
rspec-support (~> 3.2.0)
|
32
|
+
rspec-mocks (3.2.0)
|
33
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
34
|
+
rspec-support (~> 3.2.0)
|
35
|
+
rspec-support (3.2.1)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
ruby
|
39
|
+
|
40
|
+
DEPENDENCIES
|
41
|
+
bundler (~> 1.6)
|
42
|
+
postgres_to_redshift!
|
43
|
+
rake
|
44
|
+
rspec
|
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
|
4
4
|
|
5
|
+
[![Build Status](https://travis-ci.org/kitchensurfing/postgres_to_redshift.svg?branch=master)](https://travis-ci.org/kitchensurfing/postgres_to_redshift)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
Add this line to your application's Gemfile:
|
@@ -20,8 +22,11 @@ Or install it yourself as:
|
|
20
22
|
|
21
23
|
## Usage
|
22
24
|
|
25
|
+
Set your source and target databases, as well as your s3 intermediary.
|
26
|
+
|
23
27
|
```bash
|
24
|
-
export
|
28
|
+
export POSTGRES_TO_REDSHIFT_SOURCE_URI='postgres://username:password@host:port/database-name'
|
29
|
+
export POSTGRES_TO_REDSHIFT_TARGET_URI='postgres://username:password@host:port/database-name'
|
25
30
|
export S3_DATABASE_EXPORT_ID='yourid'
|
26
31
|
export S3_DATABASE_EXPORT_KEY='yourkey'
|
27
32
|
export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
|
data/Rakefile
CHANGED
data/bin/postgres_to_redshift
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
# table_catalog | postgres_to_redshift
|
2
|
+
# table_schema | public
|
3
|
+
# table_name | films
|
4
|
+
# column_name | description
|
5
|
+
# ordinal_position | 2
|
6
|
+
# column_default |
|
7
|
+
# is_nullable | YES
|
8
|
+
# data_type | character varying
|
9
|
+
# character_maximum_length | 255
|
10
|
+
# character_octet_length | 1020
|
11
|
+
# numeric_precision |
|
12
|
+
# numeric_precision_radix |
|
13
|
+
# numeric_scale |
|
14
|
+
# datetime_precision |
|
15
|
+
# interval_type |
|
16
|
+
# interval_precision |
|
17
|
+
# character_set_catalog |
|
18
|
+
# character_set_schema |
|
19
|
+
# character_set_name |
|
20
|
+
# collation_catalog |
|
21
|
+
# collation_schema |
|
22
|
+
# collation_name |
|
23
|
+
# domain_catalog |
|
24
|
+
# domain_schema |
|
25
|
+
# domain_name |
|
26
|
+
# udt_catalog | postgres_to_redshift
|
27
|
+
# udt_schema | pg_catalog
|
28
|
+
# udt_name | varchar
|
29
|
+
# scope_catalog |
|
30
|
+
# scope_schema |
|
31
|
+
# scope_name |
|
32
|
+
# maximum_cardinality |
|
33
|
+
# dtd_identifier | 2
|
34
|
+
# is_self_referencing | NO
|
35
|
+
# is_identity | NO
|
36
|
+
# identity_generation |
|
37
|
+
# identity_start |
|
38
|
+
# identity_increment |
|
39
|
+
# identity_maximum |
|
40
|
+
# identity_minimum |
|
41
|
+
# identity_cycle |
|
42
|
+
# is_generated | NEVER
|
43
|
+
# generation_expression |
|
44
|
+
# is_updatable | YES
|
45
|
+
#
|
46
|
+
class PostgresToRedshift::Column
|
47
|
+
attr_accessor :attributes
|
48
|
+
|
49
|
+
CAST_TYPES_FOR_COPY = {
|
50
|
+
"text" => "CHARACTER VARYING(65535)",
|
51
|
+
"json" => "CHARACTER VARYING(65535)",
|
52
|
+
"bytea" => "CHARACTER VARYING(65535)",
|
53
|
+
"money" => "DECIMAL(19,2)",
|
54
|
+
"oid" => "CHARACTER VARYING(65535)",
|
55
|
+
}
|
56
|
+
|
57
|
+
def initialize(attributes: )
|
58
|
+
self.attributes = attributes
|
59
|
+
end
|
60
|
+
|
61
|
+
def name
|
62
|
+
attributes["column_name"]
|
63
|
+
end
|
64
|
+
|
65
|
+
def name_for_copy
|
66
|
+
if needs_type_cast?
|
67
|
+
%Q[CAST("#{name}" AS #{data_type_for_copy}) AS #{name}]
|
68
|
+
else
|
69
|
+
%Q["#{name}"]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def data_type
|
74
|
+
attributes["data_type"]
|
75
|
+
end
|
76
|
+
|
77
|
+
def data_type_for_copy
|
78
|
+
CAST_TYPES_FOR_COPY[data_type] || data_type
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def needs_type_cast?
|
83
|
+
data_type != data_type_for_copy
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# table_catalog | postgres_to_redshift
|
2
|
+
# table_schema | public
|
3
|
+
# table_name | acquisition_pages
|
4
|
+
# table_type | BASE TABLE
|
5
|
+
# self_referencing_column_name |
|
6
|
+
# reference_generation |
|
7
|
+
# user_defined_type_catalog |
|
8
|
+
# user_defined_type_schema |
|
9
|
+
# user_defined_type_name |
|
10
|
+
# is_insertable_into | YES
|
11
|
+
# is_typed | NO
|
12
|
+
# commit_action |
|
13
|
+
#
|
14
|
+
class PostgresToRedshift
|
15
|
+
class Table
|
16
|
+
attr_accessor :attributes, :columns
|
17
|
+
|
18
|
+
def initialize(attributes: , columns: [])
|
19
|
+
self.attributes = attributes
|
20
|
+
self.columns = columns
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
attributes["table_name"]
|
25
|
+
end
|
26
|
+
alias_method :to_s, :name
|
27
|
+
|
28
|
+
def target_table_name
|
29
|
+
name.gsub(/_view$/, '')
|
30
|
+
end
|
31
|
+
|
32
|
+
def columns=(column_definitions = [])
|
33
|
+
@columns = column_definitions.map do |column_definition|
|
34
|
+
Column.new(attributes: column_definition)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def columns_for_create
|
39
|
+
columns.map do |column|
|
40
|
+
%Q["#{column.name}" #{column.data_type_for_copy}]
|
41
|
+
end.join(", ")
|
42
|
+
end
|
43
|
+
|
44
|
+
def columns_for_copy
|
45
|
+
columns.map do |column|
|
46
|
+
column.name_for_copy
|
47
|
+
end.join(", ")
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_view?
|
51
|
+
attributes["table_type"] == "VIEW"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/postgres_to_redshift.rb
CHANGED
@@ -2,50 +2,74 @@ require "postgres_to_redshift/version"
|
|
2
2
|
require 'pg'
|
3
3
|
require 'uri'
|
4
4
|
require 'aws-sdk'
|
5
|
+
require 'zlib'
|
6
|
+
require 'stringio'
|
7
|
+
require "postgres_to_redshift/table"
|
8
|
+
require "postgres_to_redshift/column"
|
5
9
|
|
6
10
|
class PostgresToRedshift
|
11
|
+
class << self
|
12
|
+
attr_accessor :source_uri, :target_uri
|
13
|
+
end
|
14
|
+
|
7
15
|
attr_reader :source_connection, :target_connection, :s3
|
8
16
|
|
9
17
|
def self.update_tables
|
10
|
-
update_tables = PostgresToRedshift.new
|
11
|
-
|
18
|
+
update_tables = PostgresToRedshift.new
|
19
|
+
|
20
|
+
update_tables.tables.each do |table|
|
21
|
+
target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table.target_table_name} (#{table.columns_for_create})")
|
22
|
+
|
23
|
+
update_tables.copy_table(table)
|
24
|
+
|
25
|
+
update_tables.import_table(table)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.source_uri
|
30
|
+
@source_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_SOURCE_URI'])
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.target_uri
|
34
|
+
@target_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_TARGET_URI'])
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.source_connection
|
38
|
+
unless instance_variable_defined?(:"@source_connection")
|
39
|
+
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
|
40
|
+
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
41
|
+
end
|
42
|
+
|
43
|
+
@source_connection
|
44
|
+
end
|
12
45
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
46
|
+
def self.target_connection
|
47
|
+
unless instance_variable_defined?(:"@target_connection")
|
48
|
+
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user || ENV['USER'], password: target_uri.password, dbname: target_uri.path[1..-1])
|
49
|
+
end
|
50
|
+
|
51
|
+
@target_connection
|
17
52
|
end
|
18
53
|
|
19
|
-
def
|
20
|
-
|
21
|
-
target_uri = URI.parse(ENV['REDSHIFT_URI'])
|
22
|
-
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
|
23
|
-
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
24
|
-
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
|
54
|
+
def source_connection
|
55
|
+
self.class.source_connection
|
25
56
|
end
|
26
57
|
|
27
|
-
def
|
28
|
-
|
58
|
+
def target_connection
|
59
|
+
self.class.target_connection
|
29
60
|
end
|
30
61
|
|
31
62
|
def tables
|
32
|
-
source_connection.exec("SELECT
|
63
|
+
source_connection.exec("SELECT * FROM information_schema.tables WHERE table_schema = 'public' AND table_type in ('BASE TABLE', 'VIEW')").map do |table_attributes|
|
64
|
+
table = Table.new(attributes: table_attributes)
|
65
|
+
next if table.name =~ /^pg_/
|
66
|
+
table.columns = column_definitions(table)
|
67
|
+
table
|
68
|
+
end.compact
|
33
69
|
end
|
34
70
|
|
35
|
-
def
|
36
|
-
source_connection.exec("SELECT
|
37
|
-
data_type = row["data_type"]
|
38
|
-
data_type.gsub!(/text/, 'character varying(max)')
|
39
|
-
data_type.gsub!(/json/, 'character varying(max)')
|
40
|
-
data_type.gsub!(/bytea/, 'character varying(max)')
|
41
|
-
data_type.gsub!(/money/, 'character varying(max)')
|
42
|
-
|
43
|
-
if row["character_maximum_length"].to_s.length > 0
|
44
|
-
%Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
|
45
|
-
else
|
46
|
-
%Q|"#{row["column_name"]}" #{data_type}|
|
47
|
-
end
|
48
|
-
end.join(", ")
|
71
|
+
def column_definitions(table)
|
72
|
+
source_connection.exec("SELECT * FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table.name}' order by ordinal_position")
|
49
73
|
end
|
50
74
|
|
51
75
|
def s3
|
@@ -56,68 +80,41 @@ class PostgresToRedshift
|
|
56
80
|
@bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
|
57
81
|
end
|
58
82
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
end
|
63
|
-
end
|
83
|
+
def copy_table(table)
|
84
|
+
buffer = StringIO.new
|
85
|
+
zip = Zlib::GzipWriter.new(buffer)
|
64
86
|
|
65
|
-
|
66
|
-
|
67
|
-
puts "Downloading #{source_table}"
|
68
|
-
copy_command =
|
69
|
-
if is_view
|
70
|
-
"COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
|
71
|
-
else
|
72
|
-
"COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
|
73
|
-
end
|
87
|
+
puts "Downloading #{table}"
|
88
|
+
copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
|
74
89
|
|
75
90
|
source_connection.copy_data(copy_command) do
|
76
91
|
while row = source_connection.get_copy_data
|
77
|
-
|
92
|
+
zip.write(row)
|
78
93
|
end
|
79
94
|
end
|
80
|
-
|
95
|
+
zip.finish
|
96
|
+
buffer.rewind
|
97
|
+
upload_table(table, buffer)
|
81
98
|
end
|
82
99
|
|
83
|
-
def upload_table(
|
84
|
-
puts "Uploading #{
|
85
|
-
bucket.objects["export/#{
|
86
|
-
bucket.objects["export/#{
|
100
|
+
def upload_table(table, buffer)
|
101
|
+
puts "Uploading #{table.target_table_name}"
|
102
|
+
bucket.objects["export/#{table.target_table_name}.psv.gz"].delete
|
103
|
+
bucket.objects["export/#{table.target_table_name}.psv.gz"].write(buffer, acl: :authenticated_read)
|
87
104
|
end
|
88
105
|
|
89
|
-
def import_table(
|
90
|
-
puts "Importing #{
|
91
|
-
target_connection.exec("DROP TABLE IF EXISTS public.#{
|
106
|
+
def import_table(table)
|
107
|
+
puts "Importing #{table.target_table_name}"
|
108
|
+
target_connection.exec("DROP TABLE IF EXISTS public.#{table.target_table_name}_updating")
|
92
109
|
|
93
110
|
target_connection.exec("BEGIN;")
|
94
111
|
|
95
|
-
target_connection.exec("ALTER TABLE public.#{
|
112
|
+
target_connection.exec("ALTER TABLE public.#{table.target_table_name} RENAME TO #{table.target_table_name}_updating")
|
96
113
|
|
97
|
-
target_connection.exec("CREATE TABLE public.#{
|
114
|
+
target_connection.exec("CREATE TABLE public.#{table.target_table_name} (#{table.columns_for_create})")
|
98
115
|
|
99
|
-
target_connection.exec("COPY public.#{
|
116
|
+
target_connection.exec("COPY public.#{table.target_table_name} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
|
100
117
|
|
101
118
|
target_connection.exec("COMMIT;")
|
102
119
|
end
|
103
|
-
|
104
|
-
def copy_tables
|
105
|
-
tables.each do |table|
|
106
|
-
copy_table(table, table)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def copy_views
|
111
|
-
views.each do |view|
|
112
|
-
table = view.gsub(/_view/, '')
|
113
|
-
copy_table(view, table, true)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
# FIXME: This relies on views being uploaded after tables.
|
118
|
-
def import_tables
|
119
|
-
tables.each do |table|
|
120
|
-
import_table(table)
|
121
|
-
end
|
122
|
-
end
|
123
120
|
end
|
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "bundler", "~> 1.
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
22
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
23
|
spec.add_dependency "pg", "~> 0.17.0"
|
24
24
|
spec.add_dependency "aws-sdk", "~> 1.54"
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift::Column do
|
4
|
+
context 'with a simple column' do
|
5
|
+
before do
|
6
|
+
attributes = {
|
7
|
+
"table_catalog" => "postgres_to_redshift",
|
8
|
+
"table_schema" => "public",
|
9
|
+
"table_name" => "films",
|
10
|
+
"column_name" => "description",
|
11
|
+
"ordinal_position" => "2",
|
12
|
+
"column_default" => nil,
|
13
|
+
"is_nullable" => "YES",
|
14
|
+
"data_type" => "character varying",
|
15
|
+
"character_maximum_length" => "255",
|
16
|
+
"character_octet_length" => "1020"
|
17
|
+
}
|
18
|
+
|
19
|
+
@column = PostgresToRedshift::Column.new attributes: attributes
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#name' do
|
23
|
+
it 'returns the column name' do
|
24
|
+
expect(@column.name).to eq("description")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#name_for_copy' do
|
30
|
+
it 'casts fields to appropriate type' do
|
31
|
+
attributes = {
|
32
|
+
"table_catalog" => "postgres_to_redshift",
|
33
|
+
"table_schema" => "public",
|
34
|
+
"table_name" => "films",
|
35
|
+
"column_name" => "description",
|
36
|
+
"ordinal_position" => "2",
|
37
|
+
"column_default" => nil,
|
38
|
+
"is_nullable" => "YES",
|
39
|
+
"data_type" => "text",
|
40
|
+
"character_maximum_length" => nil,
|
41
|
+
"character_octet_length" => "1073741824"
|
42
|
+
}
|
43
|
+
|
44
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
45
|
+
expect(column.name_for_copy).to eq('CAST("description" AS CHARACTER VARYING(65535)) AS description')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'does not cast fields that do not need casting' do
|
49
|
+
attributes = {
|
50
|
+
"table_catalog" => "postgres_to_redshift",
|
51
|
+
"table_schema" => "public",
|
52
|
+
"table_name" => "films",
|
53
|
+
"column_name" => "description",
|
54
|
+
"ordinal_position" => "2",
|
55
|
+
"column_default" => nil,
|
56
|
+
"is_nullable" => "YES",
|
57
|
+
"data_type" => "character varying",
|
58
|
+
"character_maximum_length" => "255",
|
59
|
+
"character_octet_length" => "1020"
|
60
|
+
}
|
61
|
+
|
62
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
63
|
+
expect(column.name_for_copy).to eq('"description"')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#data_type_for_copy" do
|
68
|
+
it 'casts text to character varying(65535)' do
|
69
|
+
attributes = {
|
70
|
+
"table_catalog" => "postgres_to_redshift",
|
71
|
+
"table_schema" => "public",
|
72
|
+
"table_name" => "films",
|
73
|
+
"column_name" => "description",
|
74
|
+
"ordinal_position" => "2",
|
75
|
+
"column_default" => nil,
|
76
|
+
"is_nullable" => "YES",
|
77
|
+
"data_type" => "text",
|
78
|
+
"character_maximum_length" => nil,
|
79
|
+
"character_octet_length" => "1073741824"
|
80
|
+
}
|
81
|
+
|
82
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
83
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'casts json to character varying(65535)' do
|
87
|
+
attributes = {
|
88
|
+
"table_catalog" => "postgres_to_redshift",
|
89
|
+
"table_schema" => "public",
|
90
|
+
"table_name" => "films",
|
91
|
+
"column_name" => "description",
|
92
|
+
"ordinal_position" => "2",
|
93
|
+
"column_default" => nil,
|
94
|
+
"is_nullable" => "YES",
|
95
|
+
"data_type" => "json",
|
96
|
+
}
|
97
|
+
|
98
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
99
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'casts bytea to character varying(65535)' do
|
103
|
+
attributes = {
|
104
|
+
"table_catalog" => "postgres_to_redshift",
|
105
|
+
"table_schema" => "public",
|
106
|
+
"table_name" => "films",
|
107
|
+
"column_name" => "description",
|
108
|
+
"ordinal_position" => "2",
|
109
|
+
"column_default" => nil,
|
110
|
+
"is_nullable" => "YES",
|
111
|
+
"data_type" => "bytea",
|
112
|
+
}
|
113
|
+
|
114
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
115
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'casts money to decimal(19,2)' do
|
119
|
+
attributes = {
|
120
|
+
"table_catalog" => "postgres_to_redshift",
|
121
|
+
"table_schema" => "public",
|
122
|
+
"table_name" => "films",
|
123
|
+
"column_name" => "description",
|
124
|
+
"ordinal_position" => "2",
|
125
|
+
"column_default" => nil,
|
126
|
+
"is_nullable" => "YES",
|
127
|
+
"data_type" => "money",
|
128
|
+
}
|
129
|
+
|
130
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
131
|
+
expect(column.data_type_for_copy).to eq("DECIMAL(19,2)")
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'casts oid to character varying' do
|
135
|
+
attributes = {
|
136
|
+
"table_catalog" => "postgres_to_redshift",
|
137
|
+
"table_schema" => "public",
|
138
|
+
"table_name" => "films",
|
139
|
+
"column_name" => "description",
|
140
|
+
"ordinal_position" => "2",
|
141
|
+
"column_default" => nil,
|
142
|
+
"is_nullable" => "YES",
|
143
|
+
"data_type" => "oid",
|
144
|
+
}
|
145
|
+
|
146
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
147
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
148
|
+
end
|
149
|
+
|
150
|
+
it "returns the data type if no cast necessary" do
|
151
|
+
attributes = {
|
152
|
+
"table_catalog" => "postgres_to_redshift",
|
153
|
+
"table_schema" => "public",
|
154
|
+
"table_name" => "films",
|
155
|
+
"column_name" => "description",
|
156
|
+
"ordinal_position" => "2",
|
157
|
+
"column_default" => nil,
|
158
|
+
"is_nullable" => "YES",
|
159
|
+
"data_type" => "character varying",
|
160
|
+
"character_maximum_length" => "255",
|
161
|
+
"character_octet_length" => "1020"
|
162
|
+
}
|
163
|
+
|
164
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
165
|
+
expect(column.data_type_for_copy).to eq("character varying")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift::Table do
|
4
|
+
context 'with a simple table' do
|
5
|
+
before do
|
6
|
+
attributes = {
|
7
|
+
"table_catalog" => "postgres_to_redshift",
|
8
|
+
"table_schema" => "public",
|
9
|
+
"table_name" => "films",
|
10
|
+
"table_type" => "BASE TABLE",
|
11
|
+
}
|
12
|
+
columns = [
|
13
|
+
{
|
14
|
+
"table_catalog" => "postgres_to_redshift",
|
15
|
+
"table_schema" => "public",
|
16
|
+
"table_name" => "films",
|
17
|
+
"column_name" => "description",
|
18
|
+
"ordinal_position" => "2",
|
19
|
+
"column_default" => nil,
|
20
|
+
"is_nullable" => "YES",
|
21
|
+
"data_type" => "character varying",
|
22
|
+
"character_maximum_length" => "255",
|
23
|
+
"character_octet_length" => "1020"
|
24
|
+
}
|
25
|
+
]
|
26
|
+
|
27
|
+
@table = PostgresToRedshift::Table.new(attributes: attributes, columns: columns)
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#name' do
|
31
|
+
it 'returns the name of the table' do
|
32
|
+
expect(@table.name).to eq("films")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#columns' do
|
37
|
+
it 'returns a list of columns' do
|
38
|
+
expect(@table.columns.size).to eq(1)
|
39
|
+
expect(@table.columns.first.name).to eq("description")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#is_view?' do
|
45
|
+
it 'returns true if it is a view' do
|
46
|
+
attributes = {
|
47
|
+
"table_catalog" => "postgres_to_redshift",
|
48
|
+
"table_schema" => "public",
|
49
|
+
"table_name" => "films",
|
50
|
+
"table_type" => "VIEW",
|
51
|
+
}
|
52
|
+
|
53
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
54
|
+
expect(table.is_view?).to be_truthy
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns false if it is not a view' do
|
58
|
+
attributes = {
|
59
|
+
"table_catalog" => "postgres_to_redshift",
|
60
|
+
"table_schema" => "public",
|
61
|
+
"table_name" => "films",
|
62
|
+
"table_type" => "BASE TABLE",
|
63
|
+
}
|
64
|
+
|
65
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
66
|
+
expect(table.is_view?).to be_falsey
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe 'target_table_name' do
|
71
|
+
it 'strips _view from the end of the table name' do
|
72
|
+
attributes = {
|
73
|
+
"table_catalog" => "postgres_to_redshift",
|
74
|
+
"table_schema" => "public",
|
75
|
+
"table_name" => "films_view",
|
76
|
+
"table_type" => "VIEW",
|
77
|
+
}
|
78
|
+
|
79
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
80
|
+
expect(table.target_table_name).to eq("films")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift do
|
4
|
+
it 'opens a read only connection to source database' do
|
5
|
+
read_only_state = PostgresToRedshift.source_connection.exec("SHOW transaction_read_only").first["transaction_read_only"]
|
6
|
+
|
7
|
+
expect(read_only_state).to eq("on")
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'with a simple table' do
|
11
|
+
before do
|
12
|
+
PostgresToRedshift::Test.test_connection.exec(%Q[DROP TABLE IF EXISTS "films"; CREATE TABLE IF NOT EXISTS "films" ("id" SERIAL PRIMARY KEY, "title" text);])
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'lists available tables' do
|
16
|
+
expect(PostgresToRedshift.new.tables.size).to eq(1)
|
17
|
+
expect(PostgresToRedshift.new.tables.first.name).to eq("films")
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'lists column definitions' do
|
21
|
+
table = PostgresToRedshift.new.tables.first
|
22
|
+
film_columns = PostgresToRedshift.new.column_definitions(table)
|
23
|
+
|
24
|
+
expect(film_columns.to_a.size).to eq(2)
|
25
|
+
expect(film_columns.first["column_name"]).to eq("id")
|
26
|
+
expect(table.columns.first.name).to eq("id")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require File.expand_path("../../lib/postgres_to_redshift", __FILE__)
|
2
|
+
require 'spec_prepare'
|
3
|
+
|
4
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
5
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
6
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause this
|
7
|
+
# file to always be loaded, without a need to explicitly require it in any files.
|
8
|
+
#
|
9
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
10
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
11
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
12
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
13
|
+
# a separate helper file that requires the additional dependencies and performs
|
14
|
+
# the additional setup, and require it from the spec files that actually need it.
|
15
|
+
#
|
16
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
17
|
+
# users commonly want.
|
18
|
+
#
|
19
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
20
|
+
RSpec.configure do |config|
|
21
|
+
# rspec-expectations config goes here. You can use an alternate
|
22
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
23
|
+
# assertions if you prefer.
|
24
|
+
config.expect_with :rspec do |expectations|
|
25
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
26
|
+
# and `failure_message` of custom matchers include text for helper methods
|
27
|
+
# defined using `chain`, e.g.:
|
28
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
29
|
+
# # => "be bigger than 2 and smaller than 4"
|
30
|
+
# ...rather than:
|
31
|
+
# # => "be bigger than 2"
|
32
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
33
|
+
end
|
34
|
+
|
35
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
36
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
37
|
+
config.mock_with :rspec do |mocks|
|
38
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
39
|
+
# a real object. This is generally recommended, and will default to
|
40
|
+
# `true` in RSpec 4.
|
41
|
+
mocks.verify_partial_doubles = true
|
42
|
+
end
|
43
|
+
|
44
|
+
# The settings below are suggested to provide a good initial experience
|
45
|
+
# with RSpec, but feel free to customize to your heart's content.
|
46
|
+
# These two settings work together to allow you to limit a spec run
|
47
|
+
# to individual examples or groups you care about by tagging them with
|
48
|
+
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
49
|
+
# get run.
|
50
|
+
config.filter_run :focus
|
51
|
+
config.run_all_when_everything_filtered = true
|
52
|
+
|
53
|
+
# Limits the available syntax to the non-monkey patched syntax that is recommended.
|
54
|
+
# For more details, see:
|
55
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
56
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
57
|
+
# - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
|
58
|
+
config.disable_monkey_patching!
|
59
|
+
|
60
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
61
|
+
# be too noisy due to issues in dependencies.
|
62
|
+
config.warnings = true
|
63
|
+
|
64
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
65
|
+
# file, and it's useful to allow more verbose output when running an
|
66
|
+
# individual spec file.
|
67
|
+
if config.files_to_run.one?
|
68
|
+
# Use the documentation formatter for detailed output,
|
69
|
+
# unless a formatter has already been configured
|
70
|
+
# (e.g. via a command-line flag).
|
71
|
+
config.default_formatter = 'doc'
|
72
|
+
end
|
73
|
+
|
74
|
+
# Print the 10 slowest examples and example groups at the
|
75
|
+
# end of the spec run, to help surface which specs are running
|
76
|
+
# particularly slow.
|
77
|
+
config.profile_examples = 10
|
78
|
+
|
79
|
+
# Run specs in random order to surface order dependencies. If you find an
|
80
|
+
# order dependency and want to debug it, you can fix the order by providing
|
81
|
+
# the seed, which is printed after each run.
|
82
|
+
# --seed 1234
|
83
|
+
config.order = :random
|
84
|
+
|
85
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
86
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
87
|
+
# test failures related to randomization by passing the same `--seed` value
|
88
|
+
# as the one that triggered the failure.
|
89
|
+
Kernel.srand config.seed
|
90
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module PostgresToRedshift::Test
|
2
|
+
def self.source_uri
|
3
|
+
PostgresToRedshift.source_uri
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.test_connection
|
7
|
+
@test_connection ||= PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
RSpec.configure do |config|
|
12
|
+
config.before :suite do
|
13
|
+
PostgresToRedshift::Test.test_connection
|
14
|
+
end
|
15
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: postgres_to_redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Rakoczy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.6'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,14 +76,24 @@ extensions: []
|
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
78
|
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".travis.yml"
|
79
81
|
- Gemfile
|
82
|
+
- Gemfile.lock
|
80
83
|
- LICENSE.txt
|
81
84
|
- README.md
|
82
85
|
- Rakefile
|
83
86
|
- bin/postgres_to_redshift
|
84
87
|
- lib/postgres_to_redshift.rb
|
88
|
+
- lib/postgres_to_redshift/column.rb
|
89
|
+
- lib/postgres_to_redshift/table.rb
|
85
90
|
- lib/postgres_to_redshift/version.rb
|
86
91
|
- postgres_to_redshift.gemspec
|
92
|
+
- spec/lib/postgres_to_redshift/column_spec.rb
|
93
|
+
- spec/lib/postgres_to_redshift/table_spec.rb
|
94
|
+
- spec/lib/postgres_to_redshift_spec.rb
|
95
|
+
- spec/spec_helper.rb
|
96
|
+
- spec/spec_prepare.rb
|
87
97
|
homepage: https://github.com/kitchensurfing/postgres_to_redshift
|
88
98
|
licenses:
|
89
99
|
- MIT
|
@@ -108,4 +118,9 @@ rubygems_version: 2.4.5
|
|
108
118
|
signing_key:
|
109
119
|
specification_version: 4
|
110
120
|
summary: Load postgres databases into Amazon Redshift
|
111
|
-
test_files:
|
121
|
+
test_files:
|
122
|
+
- spec/lib/postgres_to_redshift/column_spec.rb
|
123
|
+
- spec/lib/postgres_to_redshift/table_spec.rb
|
124
|
+
- spec/lib/postgres_to_redshift_spec.rb
|
125
|
+
- spec/spec_helper.rb
|
126
|
+
- spec/spec_prepare.rb
|