postgres_to_redshift 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.travis.yml +10 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +44 -0
- data/README.md +6 -1
- data/Rakefile +8 -0
- data/bin/postgres_to_redshift +0 -1
- data/lib/postgres_to_redshift/column.rb +85 -0
- data/lib/postgres_to_redshift/table.rb +54 -0
- data/lib/postgres_to_redshift/version.rb +1 -1
- data/lib/postgres_to_redshift.rb +72 -75
- data/postgres_to_redshift.gemspec +1 -1
- data/spec/lib/postgres_to_redshift/column_spec.rb +168 -0
- data/spec/lib/postgres_to_redshift/table_spec.rb +83 -0
- data/spec/lib/postgres_to_redshift_spec.rb +29 -0
- data/spec/spec_helper.rb +90 -0
- data/spec/spec_prepare.rb +15 -0
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9aca81574e39acdc400fcfd77f454cb9c93f682
|
4
|
+
data.tar.gz: f4a8430406d25028ff1ac9b305837d34d0463464
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8136aaffdd83cb676290393235f3cb1e99cf257c8c3dc193c0b234feec09b3f3cfedb29a8e6ad7f9f55641a2e1ef02c624f6d86073add5fec9ee5e0a2eacdde4
|
7
|
+
data.tar.gz: c0b9b49f1ba41e26fd2bb87328c5f672648a98ddd432335d8a899f6dbd722b1c32b6d6cb85b4de19630e42690eaf835957a6e801001d8a46972f917f0678482e
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
language: ruby
|
2
|
+
bundler_args: --without development --deployment --jobs=3 --retry=3
|
3
|
+
cache: bundler
|
4
|
+
rvm:
|
5
|
+
- 2.2.0
|
6
|
+
- 2.1.0
|
7
|
+
before_script:
|
8
|
+
- psql -c 'create database travis_ci_test;' -U postgres
|
9
|
+
env:
|
10
|
+
- POSTGRES_TO_REDSHIFT_SOURCE_URI=postgres://postgres@localhost/travis_ci_test
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
postgres_to_redshift (0.1.1)
|
5
|
+
aws-sdk (~> 1.54)
|
6
|
+
pg (~> 0.17.0)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
aws-sdk (1.61.0)
|
12
|
+
aws-sdk-v1 (= 1.61.0)
|
13
|
+
aws-sdk-v1 (1.61.0)
|
14
|
+
json (~> 1.4)
|
15
|
+
nokogiri (>= 1.4.4)
|
16
|
+
diff-lcs (1.2.5)
|
17
|
+
json (1.8.2)
|
18
|
+
mini_portile (0.6.2)
|
19
|
+
nokogiri (1.6.6.2)
|
20
|
+
mini_portile (~> 0.6.0)
|
21
|
+
pg (0.17.1)
|
22
|
+
rake (10.4.2)
|
23
|
+
rspec (3.2.0)
|
24
|
+
rspec-core (~> 3.2.0)
|
25
|
+
rspec-expectations (~> 3.2.0)
|
26
|
+
rspec-mocks (~> 3.2.0)
|
27
|
+
rspec-core (3.2.0)
|
28
|
+
rspec-support (~> 3.2.0)
|
29
|
+
rspec-expectations (3.2.0)
|
30
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
31
|
+
rspec-support (~> 3.2.0)
|
32
|
+
rspec-mocks (3.2.0)
|
33
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
34
|
+
rspec-support (~> 3.2.0)
|
35
|
+
rspec-support (3.2.1)
|
36
|
+
|
37
|
+
PLATFORMS
|
38
|
+
ruby
|
39
|
+
|
40
|
+
DEPENDENCIES
|
41
|
+
bundler (~> 1.6)
|
42
|
+
postgres_to_redshift!
|
43
|
+
rake
|
44
|
+
rspec
|
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
|
4
4
|
|
5
|
+
[](https://travis-ci.org/kitchensurfing/postgres_to_redshift)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
Add this line to your application's Gemfile:
|
@@ -20,8 +22,11 @@ Or install it yourself as:
|
|
20
22
|
|
21
23
|
## Usage
|
22
24
|
|
25
|
+
Set your source and target databases, as well as your s3 intermediary.
|
26
|
+
|
23
27
|
```bash
|
24
|
-
export
|
28
|
+
export POSTGRES_TO_REDSHIFT_SOURCE_URI='postgres://username:password@host:port/database-name'
|
29
|
+
export POSTGRES_TO_REDSHIFT_TARGET_URI='postgres://username:password@host:port/database-name'
|
25
30
|
export S3_DATABASE_EXPORT_ID='yourid'
|
26
31
|
export S3_DATABASE_EXPORT_KEY='yourkey'
|
27
32
|
export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
|
data/Rakefile
CHANGED
data/bin/postgres_to_redshift
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
# table_catalog | postgres_to_redshift
|
2
|
+
# table_schema | public
|
3
|
+
# table_name | films
|
4
|
+
# column_name | description
|
5
|
+
# ordinal_position | 2
|
6
|
+
# column_default |
|
7
|
+
# is_nullable | YES
|
8
|
+
# data_type | character varying
|
9
|
+
# character_maximum_length | 255
|
10
|
+
# character_octet_length | 1020
|
11
|
+
# numeric_precision |
|
12
|
+
# numeric_precision_radix |
|
13
|
+
# numeric_scale |
|
14
|
+
# datetime_precision |
|
15
|
+
# interval_type |
|
16
|
+
# interval_precision |
|
17
|
+
# character_set_catalog |
|
18
|
+
# character_set_schema |
|
19
|
+
# character_set_name |
|
20
|
+
# collation_catalog |
|
21
|
+
# collation_schema |
|
22
|
+
# collation_name |
|
23
|
+
# domain_catalog |
|
24
|
+
# domain_schema |
|
25
|
+
# domain_name |
|
26
|
+
# udt_catalog | postgres_to_redshift
|
27
|
+
# udt_schema | pg_catalog
|
28
|
+
# udt_name | varchar
|
29
|
+
# scope_catalog |
|
30
|
+
# scope_schema |
|
31
|
+
# scope_name |
|
32
|
+
# maximum_cardinality |
|
33
|
+
# dtd_identifier | 2
|
34
|
+
# is_self_referencing | NO
|
35
|
+
# is_identity | NO
|
36
|
+
# identity_generation |
|
37
|
+
# identity_start |
|
38
|
+
# identity_increment |
|
39
|
+
# identity_maximum |
|
40
|
+
# identity_minimum |
|
41
|
+
# identity_cycle |
|
42
|
+
# is_generated | NEVER
|
43
|
+
# generation_expression |
|
44
|
+
# is_updatable | YES
|
45
|
+
#
|
46
|
+
class PostgresToRedshift::Column
|
47
|
+
attr_accessor :attributes
|
48
|
+
|
49
|
+
CAST_TYPES_FOR_COPY = {
|
50
|
+
"text" => "CHARACTER VARYING(65535)",
|
51
|
+
"json" => "CHARACTER VARYING(65535)",
|
52
|
+
"bytea" => "CHARACTER VARYING(65535)",
|
53
|
+
"money" => "DECIMAL(19,2)",
|
54
|
+
"oid" => "CHARACTER VARYING(65535)",
|
55
|
+
}
|
56
|
+
|
57
|
+
def initialize(attributes: )
|
58
|
+
self.attributes = attributes
|
59
|
+
end
|
60
|
+
|
61
|
+
def name
|
62
|
+
attributes["column_name"]
|
63
|
+
end
|
64
|
+
|
65
|
+
def name_for_copy
|
66
|
+
if needs_type_cast?
|
67
|
+
%Q[CAST("#{name}" AS #{data_type_for_copy}) AS #{name}]
|
68
|
+
else
|
69
|
+
%Q["#{name}"]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def data_type
|
74
|
+
attributes["data_type"]
|
75
|
+
end
|
76
|
+
|
77
|
+
def data_type_for_copy
|
78
|
+
CAST_TYPES_FOR_COPY[data_type] || data_type
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def needs_type_cast?
|
83
|
+
data_type != data_type_for_copy
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# table_catalog | postgres_to_redshift
|
2
|
+
# table_schema | public
|
3
|
+
# table_name | acquisition_pages
|
4
|
+
# table_type | BASE TABLE
|
5
|
+
# self_referencing_column_name |
|
6
|
+
# reference_generation |
|
7
|
+
# user_defined_type_catalog |
|
8
|
+
# user_defined_type_schema |
|
9
|
+
# user_defined_type_name |
|
10
|
+
# is_insertable_into | YES
|
11
|
+
# is_typed | NO
|
12
|
+
# commit_action |
|
13
|
+
#
|
14
|
+
class PostgresToRedshift
|
15
|
+
class Table
|
16
|
+
attr_accessor :attributes, :columns
|
17
|
+
|
18
|
+
def initialize(attributes: , columns: [])
|
19
|
+
self.attributes = attributes
|
20
|
+
self.columns = columns
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
attributes["table_name"]
|
25
|
+
end
|
26
|
+
alias_method :to_s, :name
|
27
|
+
|
28
|
+
def target_table_name
|
29
|
+
name.gsub(/_view$/, '')
|
30
|
+
end
|
31
|
+
|
32
|
+
def columns=(column_definitions = [])
|
33
|
+
@columns = column_definitions.map do |column_definition|
|
34
|
+
Column.new(attributes: column_definition)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def columns_for_create
|
39
|
+
columns.map do |column|
|
40
|
+
%Q["#{column.name}" #{column.data_type_for_copy}]
|
41
|
+
end.join(", ")
|
42
|
+
end
|
43
|
+
|
44
|
+
def columns_for_copy
|
45
|
+
columns.map do |column|
|
46
|
+
column.name_for_copy
|
47
|
+
end.join(", ")
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_view?
|
51
|
+
attributes["table_type"] == "VIEW"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/postgres_to_redshift.rb
CHANGED
@@ -2,50 +2,74 @@ require "postgres_to_redshift/version"
|
|
2
2
|
require 'pg'
|
3
3
|
require 'uri'
|
4
4
|
require 'aws-sdk'
|
5
|
+
require 'zlib'
|
6
|
+
require 'stringio'
|
7
|
+
require "postgres_to_redshift/table"
|
8
|
+
require "postgres_to_redshift/column"
|
5
9
|
|
6
10
|
class PostgresToRedshift
|
11
|
+
class << self
|
12
|
+
attr_accessor :source_uri, :target_uri
|
13
|
+
end
|
14
|
+
|
7
15
|
attr_reader :source_connection, :target_connection, :s3
|
8
16
|
|
9
17
|
def self.update_tables
|
10
|
-
update_tables = PostgresToRedshift.new
|
11
|
-
|
18
|
+
update_tables = PostgresToRedshift.new
|
19
|
+
|
20
|
+
update_tables.tables.each do |table|
|
21
|
+
target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table.target_table_name} (#{table.columns_for_create})")
|
22
|
+
|
23
|
+
update_tables.copy_table(table)
|
24
|
+
|
25
|
+
update_tables.import_table(table)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.source_uri
|
30
|
+
@source_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_SOURCE_URI'])
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.target_uri
|
34
|
+
@target_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_TARGET_URI'])
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.source_connection
|
38
|
+
unless instance_variable_defined?(:"@source_connection")
|
39
|
+
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
|
40
|
+
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
41
|
+
end
|
42
|
+
|
43
|
+
@source_connection
|
44
|
+
end
|
12
45
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
46
|
+
def self.target_connection
|
47
|
+
unless instance_variable_defined?(:"@target_connection")
|
48
|
+
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user || ENV['USER'], password: target_uri.password, dbname: target_uri.path[1..-1])
|
49
|
+
end
|
50
|
+
|
51
|
+
@target_connection
|
17
52
|
end
|
18
53
|
|
19
|
-
def
|
20
|
-
|
21
|
-
target_uri = URI.parse(ENV['REDSHIFT_URI'])
|
22
|
-
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
|
23
|
-
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
24
|
-
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
|
54
|
+
def source_connection
|
55
|
+
self.class.source_connection
|
25
56
|
end
|
26
57
|
|
27
|
-
def
|
28
|
-
|
58
|
+
def target_connection
|
59
|
+
self.class.target_connection
|
29
60
|
end
|
30
61
|
|
31
62
|
def tables
|
32
|
-
source_connection.exec("SELECT
|
63
|
+
source_connection.exec("SELECT * FROM information_schema.tables WHERE table_schema = 'public' AND table_type in ('BASE TABLE', 'VIEW')").map do |table_attributes|
|
64
|
+
table = Table.new(attributes: table_attributes)
|
65
|
+
next if table.name =~ /^pg_/
|
66
|
+
table.columns = column_definitions(table)
|
67
|
+
table
|
68
|
+
end.compact
|
33
69
|
end
|
34
70
|
|
35
|
-
def
|
36
|
-
source_connection.exec("SELECT
|
37
|
-
data_type = row["data_type"]
|
38
|
-
data_type.gsub!(/text/, 'character varying(max)')
|
39
|
-
data_type.gsub!(/json/, 'character varying(max)')
|
40
|
-
data_type.gsub!(/bytea/, 'character varying(max)')
|
41
|
-
data_type.gsub!(/money/, 'character varying(max)')
|
42
|
-
|
43
|
-
if row["character_maximum_length"].to_s.length > 0
|
44
|
-
%Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
|
45
|
-
else
|
46
|
-
%Q|"#{row["column_name"]}" #{data_type}|
|
47
|
-
end
|
48
|
-
end.join(", ")
|
71
|
+
def column_definitions(table)
|
72
|
+
source_connection.exec("SELECT * FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table.name}' order by ordinal_position")
|
49
73
|
end
|
50
74
|
|
51
75
|
def s3
|
@@ -56,68 +80,41 @@ class PostgresToRedshift
|
|
56
80
|
@bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
|
57
81
|
end
|
58
82
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
end
|
63
|
-
end
|
83
|
+
def copy_table(table)
|
84
|
+
buffer = StringIO.new
|
85
|
+
zip = Zlib::GzipWriter.new(buffer)
|
64
86
|
|
65
|
-
|
66
|
-
|
67
|
-
puts "Downloading #{source_table}"
|
68
|
-
copy_command =
|
69
|
-
if is_view
|
70
|
-
"COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
|
71
|
-
else
|
72
|
-
"COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
|
73
|
-
end
|
87
|
+
puts "Downloading #{table}"
|
88
|
+
copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
|
74
89
|
|
75
90
|
source_connection.copy_data(copy_command) do
|
76
91
|
while row = source_connection.get_copy_data
|
77
|
-
|
92
|
+
zip.write(row)
|
78
93
|
end
|
79
94
|
end
|
80
|
-
|
95
|
+
zip.finish
|
96
|
+
buffer.rewind
|
97
|
+
upload_table(table, buffer)
|
81
98
|
end
|
82
99
|
|
83
|
-
def upload_table(
|
84
|
-
puts "Uploading #{
|
85
|
-
bucket.objects["export/#{
|
86
|
-
bucket.objects["export/#{
|
100
|
+
def upload_table(table, buffer)
|
101
|
+
puts "Uploading #{table.target_table_name}"
|
102
|
+
bucket.objects["export/#{table.target_table_name}.psv.gz"].delete
|
103
|
+
bucket.objects["export/#{table.target_table_name}.psv.gz"].write(buffer, acl: :authenticated_read)
|
87
104
|
end
|
88
105
|
|
89
|
-
def import_table(
|
90
|
-
puts "Importing #{
|
91
|
-
target_connection.exec("DROP TABLE IF EXISTS public.#{
|
106
|
+
def import_table(table)
|
107
|
+
puts "Importing #{table.target_table_name}"
|
108
|
+
target_connection.exec("DROP TABLE IF EXISTS public.#{table.target_table_name}_updating")
|
92
109
|
|
93
110
|
target_connection.exec("BEGIN;")
|
94
111
|
|
95
|
-
target_connection.exec("ALTER TABLE public.#{
|
112
|
+
target_connection.exec("ALTER TABLE public.#{table.target_table_name} RENAME TO #{table.target_table_name}_updating")
|
96
113
|
|
97
|
-
target_connection.exec("CREATE TABLE public.#{
|
114
|
+
target_connection.exec("CREATE TABLE public.#{table.target_table_name} (#{table.columns_for_create})")
|
98
115
|
|
99
|
-
target_connection.exec("COPY public.#{
|
116
|
+
target_connection.exec("COPY public.#{table.target_table_name} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
|
100
117
|
|
101
118
|
target_connection.exec("COMMIT;")
|
102
119
|
end
|
103
|
-
|
104
|
-
def copy_tables
|
105
|
-
tables.each do |table|
|
106
|
-
copy_table(table, table)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def copy_views
|
111
|
-
views.each do |view|
|
112
|
-
table = view.gsub(/_view/, '')
|
113
|
-
copy_table(view, table, true)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
# FIXME: This relies on views being uploaded after tables.
|
118
|
-
def import_tables
|
119
|
-
tables.each do |table|
|
120
|
-
import_table(table)
|
121
|
-
end
|
122
|
-
end
|
123
120
|
end
|
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "bundler", "~> 1.
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
22
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
23
|
spec.add_dependency "pg", "~> 0.17.0"
|
24
24
|
spec.add_dependency "aws-sdk", "~> 1.54"
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift::Column do
|
4
|
+
context 'with a simple column' do
|
5
|
+
before do
|
6
|
+
attributes = {
|
7
|
+
"table_catalog" => "postgres_to_redshift",
|
8
|
+
"table_schema" => "public",
|
9
|
+
"table_name" => "films",
|
10
|
+
"column_name" => "description",
|
11
|
+
"ordinal_position" => "2",
|
12
|
+
"column_default" => nil,
|
13
|
+
"is_nullable" => "YES",
|
14
|
+
"data_type" => "character varying",
|
15
|
+
"character_maximum_length" => "255",
|
16
|
+
"character_octet_length" => "1020"
|
17
|
+
}
|
18
|
+
|
19
|
+
@column = PostgresToRedshift::Column.new attributes: attributes
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#name' do
|
23
|
+
it 'returns the column name' do
|
24
|
+
expect(@column.name).to eq("description")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#name_for_copy' do
|
30
|
+
it 'casts fields to appropriate type' do
|
31
|
+
attributes = {
|
32
|
+
"table_catalog" => "postgres_to_redshift",
|
33
|
+
"table_schema" => "public",
|
34
|
+
"table_name" => "films",
|
35
|
+
"column_name" => "description",
|
36
|
+
"ordinal_position" => "2",
|
37
|
+
"column_default" => nil,
|
38
|
+
"is_nullable" => "YES",
|
39
|
+
"data_type" => "text",
|
40
|
+
"character_maximum_length" => nil,
|
41
|
+
"character_octet_length" => "1073741824"
|
42
|
+
}
|
43
|
+
|
44
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
45
|
+
expect(column.name_for_copy).to eq('CAST("description" AS CHARACTER VARYING(65535)) AS description')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'does not cast fields that do not need casting' do
|
49
|
+
attributes = {
|
50
|
+
"table_catalog" => "postgres_to_redshift",
|
51
|
+
"table_schema" => "public",
|
52
|
+
"table_name" => "films",
|
53
|
+
"column_name" => "description",
|
54
|
+
"ordinal_position" => "2",
|
55
|
+
"column_default" => nil,
|
56
|
+
"is_nullable" => "YES",
|
57
|
+
"data_type" => "character varying",
|
58
|
+
"character_maximum_length" => "255",
|
59
|
+
"character_octet_length" => "1020"
|
60
|
+
}
|
61
|
+
|
62
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
63
|
+
expect(column.name_for_copy).to eq('"description"')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#data_type_for_copy" do
|
68
|
+
it 'casts text to character varying(65535)' do
|
69
|
+
attributes = {
|
70
|
+
"table_catalog" => "postgres_to_redshift",
|
71
|
+
"table_schema" => "public",
|
72
|
+
"table_name" => "films",
|
73
|
+
"column_name" => "description",
|
74
|
+
"ordinal_position" => "2",
|
75
|
+
"column_default" => nil,
|
76
|
+
"is_nullable" => "YES",
|
77
|
+
"data_type" => "text",
|
78
|
+
"character_maximum_length" => nil,
|
79
|
+
"character_octet_length" => "1073741824"
|
80
|
+
}
|
81
|
+
|
82
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
83
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'casts json to character varying(65535)' do
|
87
|
+
attributes = {
|
88
|
+
"table_catalog" => "postgres_to_redshift",
|
89
|
+
"table_schema" => "public",
|
90
|
+
"table_name" => "films",
|
91
|
+
"column_name" => "description",
|
92
|
+
"ordinal_position" => "2",
|
93
|
+
"column_default" => nil,
|
94
|
+
"is_nullable" => "YES",
|
95
|
+
"data_type" => "json",
|
96
|
+
}
|
97
|
+
|
98
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
99
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'casts bytea to character varying(65535)' do
|
103
|
+
attributes = {
|
104
|
+
"table_catalog" => "postgres_to_redshift",
|
105
|
+
"table_schema" => "public",
|
106
|
+
"table_name" => "films",
|
107
|
+
"column_name" => "description",
|
108
|
+
"ordinal_position" => "2",
|
109
|
+
"column_default" => nil,
|
110
|
+
"is_nullable" => "YES",
|
111
|
+
"data_type" => "bytea",
|
112
|
+
}
|
113
|
+
|
114
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
115
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'casts money to decimal(19,2)' do
|
119
|
+
attributes = {
|
120
|
+
"table_catalog" => "postgres_to_redshift",
|
121
|
+
"table_schema" => "public",
|
122
|
+
"table_name" => "films",
|
123
|
+
"column_name" => "description",
|
124
|
+
"ordinal_position" => "2",
|
125
|
+
"column_default" => nil,
|
126
|
+
"is_nullable" => "YES",
|
127
|
+
"data_type" => "money",
|
128
|
+
}
|
129
|
+
|
130
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
131
|
+
expect(column.data_type_for_copy).to eq("DECIMAL(19,2)")
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'casts oid to character varying' do
|
135
|
+
attributes = {
|
136
|
+
"table_catalog" => "postgres_to_redshift",
|
137
|
+
"table_schema" => "public",
|
138
|
+
"table_name" => "films",
|
139
|
+
"column_name" => "description",
|
140
|
+
"ordinal_position" => "2",
|
141
|
+
"column_default" => nil,
|
142
|
+
"is_nullable" => "YES",
|
143
|
+
"data_type" => "oid",
|
144
|
+
}
|
145
|
+
|
146
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
147
|
+
expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
|
148
|
+
end
|
149
|
+
|
150
|
+
it "returns the data type if no cast necessary" do
|
151
|
+
attributes = {
|
152
|
+
"table_catalog" => "postgres_to_redshift",
|
153
|
+
"table_schema" => "public",
|
154
|
+
"table_name" => "films",
|
155
|
+
"column_name" => "description",
|
156
|
+
"ordinal_position" => "2",
|
157
|
+
"column_default" => nil,
|
158
|
+
"is_nullable" => "YES",
|
159
|
+
"data_type" => "character varying",
|
160
|
+
"character_maximum_length" => "255",
|
161
|
+
"character_octet_length" => "1020"
|
162
|
+
}
|
163
|
+
|
164
|
+
column = PostgresToRedshift::Column.new attributes: attributes
|
165
|
+
expect(column.data_type_for_copy).to eq("character varying")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift::Table do
|
4
|
+
context 'with a simple table' do
|
5
|
+
before do
|
6
|
+
attributes = {
|
7
|
+
"table_catalog" => "postgres_to_redshift",
|
8
|
+
"table_schema" => "public",
|
9
|
+
"table_name" => "films",
|
10
|
+
"table_type" => "BASE TABLE",
|
11
|
+
}
|
12
|
+
columns = [
|
13
|
+
{
|
14
|
+
"table_catalog" => "postgres_to_redshift",
|
15
|
+
"table_schema" => "public",
|
16
|
+
"table_name" => "films",
|
17
|
+
"column_name" => "description",
|
18
|
+
"ordinal_position" => "2",
|
19
|
+
"column_default" => nil,
|
20
|
+
"is_nullable" => "YES",
|
21
|
+
"data_type" => "character varying",
|
22
|
+
"character_maximum_length" => "255",
|
23
|
+
"character_octet_length" => "1020"
|
24
|
+
}
|
25
|
+
]
|
26
|
+
|
27
|
+
@table = PostgresToRedshift::Table.new(attributes: attributes, columns: columns)
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#name' do
|
31
|
+
it 'returns the name of the table' do
|
32
|
+
expect(@table.name).to eq("films")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#columns' do
|
37
|
+
it 'returns a list of columns' do
|
38
|
+
expect(@table.columns.size).to eq(1)
|
39
|
+
expect(@table.columns.first.name).to eq("description")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#is_view?' do
|
45
|
+
it 'returns true if it is a view' do
|
46
|
+
attributes = {
|
47
|
+
"table_catalog" => "postgres_to_redshift",
|
48
|
+
"table_schema" => "public",
|
49
|
+
"table_name" => "films",
|
50
|
+
"table_type" => "VIEW",
|
51
|
+
}
|
52
|
+
|
53
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
54
|
+
expect(table.is_view?).to be_truthy
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns false if it is not a view' do
|
58
|
+
attributes = {
|
59
|
+
"table_catalog" => "postgres_to_redshift",
|
60
|
+
"table_schema" => "public",
|
61
|
+
"table_name" => "films",
|
62
|
+
"table_type" => "BASE TABLE",
|
63
|
+
}
|
64
|
+
|
65
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
66
|
+
expect(table.is_view?).to be_falsey
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe 'target_table_name' do
|
71
|
+
it 'strips _view from the end of the table name' do
|
72
|
+
attributes = {
|
73
|
+
"table_catalog" => "postgres_to_redshift",
|
74
|
+
"table_schema" => "public",
|
75
|
+
"table_name" => "films_view",
|
76
|
+
"table_type" => "VIEW",
|
77
|
+
}
|
78
|
+
|
79
|
+
table = PostgresToRedshift::Table.new(attributes: attributes)
|
80
|
+
expect(table.target_table_name).to eq("films")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe PostgresToRedshift do
|
4
|
+
it 'opens a read only connection to source database' do
|
5
|
+
read_only_state = PostgresToRedshift.source_connection.exec("SHOW transaction_read_only").first["transaction_read_only"]
|
6
|
+
|
7
|
+
expect(read_only_state).to eq("on")
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'with a simple table' do
|
11
|
+
before do
|
12
|
+
PostgresToRedshift::Test.test_connection.exec(%Q[DROP TABLE IF EXISTS "films"; CREATE TABLE IF NOT EXISTS "films" ("id" SERIAL PRIMARY KEY, "title" text);])
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'lists available tables' do
|
16
|
+
expect(PostgresToRedshift.new.tables.size).to eq(1)
|
17
|
+
expect(PostgresToRedshift.new.tables.first.name).to eq("films")
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'lists column definitions' do
|
21
|
+
table = PostgresToRedshift.new.tables.first
|
22
|
+
film_columns = PostgresToRedshift.new.column_definitions(table)
|
23
|
+
|
24
|
+
expect(film_columns.to_a.size).to eq(2)
|
25
|
+
expect(film_columns.first["column_name"]).to eq("id")
|
26
|
+
expect(table.columns.first.name).to eq("id")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require File.expand_path("../../lib/postgres_to_redshift", __FILE__)
|
2
|
+
require 'spec_prepare'
|
3
|
+
|
4
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
5
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
6
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause this
|
7
|
+
# file to always be loaded, without a need to explicitly require it in any files.
|
8
|
+
#
|
9
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
10
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
11
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
12
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
13
|
+
# a separate helper file that requires the additional dependencies and performs
|
14
|
+
# the additional setup, and require it from the spec files that actually need it.
|
15
|
+
#
|
16
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
17
|
+
# users commonly want.
|
18
|
+
#
|
19
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
20
|
+
RSpec.configure do |config|
|
21
|
+
# rspec-expectations config goes here. You can use an alternate
|
22
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
23
|
+
# assertions if you prefer.
|
24
|
+
config.expect_with :rspec do |expectations|
|
25
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
26
|
+
# and `failure_message` of custom matchers include text for helper methods
|
27
|
+
# defined using `chain`, e.g.:
|
28
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
29
|
+
# # => "be bigger than 2 and smaller than 4"
|
30
|
+
# ...rather than:
|
31
|
+
# # => "be bigger than 2"
|
32
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
33
|
+
end
|
34
|
+
|
35
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
36
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
37
|
+
config.mock_with :rspec do |mocks|
|
38
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
39
|
+
# a real object. This is generally recommended, and will default to
|
40
|
+
# `true` in RSpec 4.
|
41
|
+
mocks.verify_partial_doubles = true
|
42
|
+
end
|
43
|
+
|
44
|
+
# The settings below are suggested to provide a good initial experience
|
45
|
+
# with RSpec, but feel free to customize to your heart's content.
|
46
|
+
# These two settings work together to allow you to limit a spec run
|
47
|
+
# to individual examples or groups you care about by tagging them with
|
48
|
+
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
49
|
+
# get run.
|
50
|
+
config.filter_run :focus
|
51
|
+
config.run_all_when_everything_filtered = true
|
52
|
+
|
53
|
+
# Limits the available syntax to the non-monkey patched syntax that is recommended.
|
54
|
+
# For more details, see:
|
55
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
56
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
57
|
+
# - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
|
58
|
+
config.disable_monkey_patching!
|
59
|
+
|
60
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
61
|
+
# be too noisy due to issues in dependencies.
|
62
|
+
config.warnings = true
|
63
|
+
|
64
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
65
|
+
# file, and it's useful to allow more verbose output when running an
|
66
|
+
# individual spec file.
|
67
|
+
if config.files_to_run.one?
|
68
|
+
# Use the documentation formatter for detailed output,
|
69
|
+
# unless a formatter has already been configured
|
70
|
+
# (e.g. via a command-line flag).
|
71
|
+
config.default_formatter = 'doc'
|
72
|
+
end
|
73
|
+
|
74
|
+
# Print the 10 slowest examples and example groups at the
|
75
|
+
# end of the spec run, to help surface which specs are running
|
76
|
+
# particularly slow.
|
77
|
+
config.profile_examples = 10
|
78
|
+
|
79
|
+
# Run specs in random order to surface order dependencies. If you find an
|
80
|
+
# order dependency and want to debug it, you can fix the order by providing
|
81
|
+
# the seed, which is printed after each run.
|
82
|
+
# --seed 1234
|
83
|
+
config.order = :random
|
84
|
+
|
85
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
86
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
87
|
+
# test failures related to randomization by passing the same `--seed` value
|
88
|
+
# as the one that triggered the failure.
|
89
|
+
Kernel.srand config.seed
|
90
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module PostgresToRedshift::Test
|
2
|
+
def self.source_uri
|
3
|
+
PostgresToRedshift.source_uri
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.test_connection
|
7
|
+
@test_connection ||= PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
RSpec.configure do |config|
|
12
|
+
config.before :suite do
|
13
|
+
PostgresToRedshift::Test.test_connection
|
14
|
+
end
|
15
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: postgres_to_redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Rakoczy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.6'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,14 +76,24 @@ extensions: []
|
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
78
|
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".travis.yml"
|
79
81
|
- Gemfile
|
82
|
+
- Gemfile.lock
|
80
83
|
- LICENSE.txt
|
81
84
|
- README.md
|
82
85
|
- Rakefile
|
83
86
|
- bin/postgres_to_redshift
|
84
87
|
- lib/postgres_to_redshift.rb
|
88
|
+
- lib/postgres_to_redshift/column.rb
|
89
|
+
- lib/postgres_to_redshift/table.rb
|
85
90
|
- lib/postgres_to_redshift/version.rb
|
86
91
|
- postgres_to_redshift.gemspec
|
92
|
+
- spec/lib/postgres_to_redshift/column_spec.rb
|
93
|
+
- spec/lib/postgres_to_redshift/table_spec.rb
|
94
|
+
- spec/lib/postgres_to_redshift_spec.rb
|
95
|
+
- spec/spec_helper.rb
|
96
|
+
- spec/spec_prepare.rb
|
87
97
|
homepage: https://github.com/kitchensurfing/postgres_to_redshift
|
88
98
|
licenses:
|
89
99
|
- MIT
|
@@ -108,4 +118,9 @@ rubygems_version: 2.4.5
|
|
108
118
|
signing_key:
|
109
119
|
specification_version: 4
|
110
120
|
summary: Load postgres databases into Amazon Redshift
|
111
|
-
test_files:
|
121
|
+
test_files:
|
122
|
+
- spec/lib/postgres_to_redshift/column_spec.rb
|
123
|
+
- spec/lib/postgres_to_redshift/table_spec.rb
|
124
|
+
- spec/lib/postgres_to_redshift_spec.rb
|
125
|
+
- spec/spec_helper.rb
|
126
|
+
- spec/spec_prepare.rb
|