postgres_to_redshift 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
4
- data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
3
+ metadata.gz: b9aca81574e39acdc400fcfd77f454cb9c93f682
4
+ data.tar.gz: f4a8430406d25028ff1ac9b305837d34d0463464
5
5
  SHA512:
6
- metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
7
- data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
6
+ metadata.gz: 8136aaffdd83cb676290393235f3cb1e99cf257c8c3dc193c0b234feec09b3f3cfedb29a8e6ad7f9f55641a2e1ef02c624f6d86073add5fec9ee5e0a2eacdde4
7
+ data.tar.gz: c0b9b49f1ba41e26fd2bb87328c5f672648a98ddd432335d8a899f6dbd722b1c32b6d6cb85b4de19630e42690eaf835957a6e801001d8a46972f917f0678482e
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  /.bundle/
2
+ /vendor/bundle
2
3
  /.yardoc
3
4
  /Gemfile.lock
4
5
  /_yardoc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ bundler_args: --without development --deployment --jobs=3 --retry=3
3
+ cache: bundler
4
+ rvm:
5
+ - 2.2.0
6
+ - 2.1.0
7
+ before_script:
8
+ - psql -c 'create database travis_ci_test;' -U postgres
9
+ env:
10
+ - POSTGRES_TO_REDSHIFT_SOURCE_URI=postgres://postgres@localhost/travis_ci_test
data/Gemfile CHANGED
@@ -2,3 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in postgres_to_redshift.gemspec
4
4
  gemspec
5
+
6
+ group :test do
7
+ gem 'rake'
8
+ gem 'rspec'
9
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,44 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_to_redshift (0.1.1)
5
+ aws-sdk (~> 1.54)
6
+ pg (~> 0.17.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ aws-sdk (1.61.0)
12
+ aws-sdk-v1 (= 1.61.0)
13
+ aws-sdk-v1 (1.61.0)
14
+ json (~> 1.4)
15
+ nokogiri (>= 1.4.4)
16
+ diff-lcs (1.2.5)
17
+ json (1.8.2)
18
+ mini_portile (0.6.2)
19
+ nokogiri (1.6.6.2)
20
+ mini_portile (~> 0.6.0)
21
+ pg (0.17.1)
22
+ rake (10.4.2)
23
+ rspec (3.2.0)
24
+ rspec-core (~> 3.2.0)
25
+ rspec-expectations (~> 3.2.0)
26
+ rspec-mocks (~> 3.2.0)
27
+ rspec-core (3.2.0)
28
+ rspec-support (~> 3.2.0)
29
+ rspec-expectations (3.2.0)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.2.0)
32
+ rspec-mocks (3.2.0)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.2.0)
35
+ rspec-support (3.2.1)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler (~> 1.6)
42
+ postgres_to_redshift!
43
+ rake
44
+ rspec
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
4
4
 
5
+ [![Build Status](https://travis-ci.org/kitchensurfing/postgres_to_redshift.svg?branch=master)](https://travis-ci.org/kitchensurfing/postgres_to_redshift)
6
+
5
7
  ## Installation
6
8
 
7
9
  Add this line to your application's Gemfile:
@@ -20,8 +22,11 @@ Or install it yourself as:
20
22
 
21
23
  ## Usage
22
24
 
25
+ Set your source and target databases, as well as your s3 intermediary.
26
+
23
27
  ```bash
24
- export REDSHIFT_URI='postgres://username:password@host:port/database-name'
28
+ export POSTGRES_TO_REDSHIFT_SOURCE_URI='postgres://username:password@host:port/database-name'
29
+ export POSTGRES_TO_REDSHIFT_TARGET_URI='postgres://username:password@host:port/database-name'
25
30
  export S3_DATABASE_EXPORT_ID='yourid'
26
31
  export S3_DATABASE_EXPORT_KEY='yourkey'
27
32
  export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
2
 
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ rescue LoadError
7
+ end
8
+
9
+ task(:default).clear
10
+ task :default => :spec
@@ -3,4 +3,3 @@
3
3
  require 'postgres_to_redshift'
4
4
 
5
5
  PostgresToRedshift.update_tables
6
-
@@ -0,0 +1,85 @@
1
+ # table_catalog | postgres_to_redshift
2
+ # table_schema | public
3
+ # table_name | films
4
+ # column_name | description
5
+ # ordinal_position | 2
6
+ # column_default |
7
+ # is_nullable | YES
8
+ # data_type | character varying
9
+ # character_maximum_length | 255
10
+ # character_octet_length | 1020
11
+ # numeric_precision |
12
+ # numeric_precision_radix |
13
+ # numeric_scale |
14
+ # datetime_precision |
15
+ # interval_type |
16
+ # interval_precision |
17
+ # character_set_catalog |
18
+ # character_set_schema |
19
+ # character_set_name |
20
+ # collation_catalog |
21
+ # collation_schema |
22
+ # collation_name |
23
+ # domain_catalog |
24
+ # domain_schema |
25
+ # domain_name |
26
+ # udt_catalog | postgres_to_redshift
27
+ # udt_schema | pg_catalog
28
+ # udt_name | varchar
29
+ # scope_catalog |
30
+ # scope_schema |
31
+ # scope_name |
32
+ # maximum_cardinality |
33
+ # dtd_identifier | 2
34
+ # is_self_referencing | NO
35
+ # is_identity | NO
36
+ # identity_generation |
37
+ # identity_start |
38
+ # identity_increment |
39
+ # identity_maximum |
40
+ # identity_minimum |
41
+ # identity_cycle |
42
+ # is_generated | NEVER
43
+ # generation_expression |
44
+ # is_updatable | YES
45
+ #
46
+ class PostgresToRedshift::Column
47
+ attr_accessor :attributes
48
+
49
+ CAST_TYPES_FOR_COPY = {
50
+ "text" => "CHARACTER VARYING(65535)",
51
+ "json" => "CHARACTER VARYING(65535)",
52
+ "bytea" => "CHARACTER VARYING(65535)",
53
+ "money" => "DECIMAL(19,2)",
54
+ "oid" => "CHARACTER VARYING(65535)",
55
+ }
56
+
57
+ def initialize(attributes: )
58
+ self.attributes = attributes
59
+ end
60
+
61
+ def name
62
+ attributes["column_name"]
63
+ end
64
+
65
+ def name_for_copy
66
+ if needs_type_cast?
67
+ %Q[CAST("#{name}" AS #{data_type_for_copy}) AS #{name}]
68
+ else
69
+ %Q["#{name}"]
70
+ end
71
+ end
72
+
73
+ def data_type
74
+ attributes["data_type"]
75
+ end
76
+
77
+ def data_type_for_copy
78
+ CAST_TYPES_FOR_COPY[data_type] || data_type
79
+ end
80
+
81
+ private
82
+ def needs_type_cast?
83
+ data_type != data_type_for_copy
84
+ end
85
+ end
@@ -0,0 +1,54 @@
1
+ # table_catalog | postgres_to_redshift
2
+ # table_schema | public
3
+ # table_name | acquisition_pages
4
+ # table_type | BASE TABLE
5
+ # self_referencing_column_name |
6
+ # reference_generation |
7
+ # user_defined_type_catalog |
8
+ # user_defined_type_schema |
9
+ # user_defined_type_name |
10
+ # is_insertable_into | YES
11
+ # is_typed | NO
12
+ # commit_action |
13
+ #
14
+ class PostgresToRedshift
15
+ class Table
16
+ attr_accessor :attributes, :columns
17
+
18
+ def initialize(attributes: , columns: [])
19
+ self.attributes = attributes
20
+ self.columns = columns
21
+ end
22
+
23
+ def name
24
+ attributes["table_name"]
25
+ end
26
+ alias_method :to_s, :name
27
+
28
+ def target_table_name
29
+ name.gsub(/_view$/, '')
30
+ end
31
+
32
+ def columns=(column_definitions = [])
33
+ @columns = column_definitions.map do |column_definition|
34
+ Column.new(attributes: column_definition)
35
+ end
36
+ end
37
+
38
+ def columns_for_create
39
+ columns.map do |column|
40
+ %Q["#{column.name}" #{column.data_type_for_copy}]
41
+ end.join(", ")
42
+ end
43
+
44
+ def columns_for_copy
45
+ columns.map do |column|
46
+ column.name_for_copy
47
+ end.join(", ")
48
+ end
49
+
50
+ def is_view?
51
+ attributes["table_type"] == "VIEW"
52
+ end
53
+ end
54
+ end
@@ -1,3 +1,3 @@
1
1
  class PostgresToRedshift
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -2,50 +2,74 @@ require "postgres_to_redshift/version"
2
2
  require 'pg'
3
3
  require 'uri'
4
4
  require 'aws-sdk'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require "postgres_to_redshift/table"
8
+ require "postgres_to_redshift/column"
5
9
 
6
10
  class PostgresToRedshift
11
+ class << self
12
+ attr_accessor :source_uri, :target_uri
13
+ end
14
+
7
15
  attr_reader :source_connection, :target_connection, :s3
8
16
 
9
17
  def self.update_tables
10
- update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
11
- update_tables.create_new_tables
18
+ update_tables = PostgresToRedshift.new
19
+
20
+ update_tables.tables.each do |table|
21
+ target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table.target_table_name} (#{table.columns_for_create})")
22
+
23
+ update_tables.copy_table(table)
24
+
25
+ update_tables.import_table(table)
26
+ end
27
+ end
28
+
29
+ def self.source_uri
30
+ @source_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_SOURCE_URI'])
31
+ end
32
+
33
+ def self.target_uri
34
+ @target_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_TARGET_URI'])
35
+ end
36
+
37
+ def self.source_connection
38
+ unless instance_variable_defined?(:"@source_connection")
39
+ @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
40
+ @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
41
+ end
42
+
43
+ @source_connection
44
+ end
12
45
 
13
- # FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
14
- update_tables.copy_tables
15
- update_tables.copy_views
16
- update_tables.import_tables
46
+ def self.target_connection
47
+ unless instance_variable_defined?(:"@target_connection")
48
+ @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user || ENV['USER'], password: target_uri.password, dbname: target_uri.path[1..-1])
49
+ end
50
+
51
+ @target_connection
17
52
  end
18
53
 
19
- def initialize(source_uri:)
20
- source_uri = URI.parse(source_uri)
21
- target_uri = URI.parse(ENV['REDSHIFT_URI'])
22
- @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
23
- @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
24
- @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
54
+ def source_connection
55
+ self.class.source_connection
25
56
  end
26
57
 
27
- def views
28
- source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
58
+ def target_connection
59
+ self.class.target_connection
29
60
  end
30
61
 
31
62
  def tables
32
- source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
63
+ source_connection.exec("SELECT * FROM information_schema.tables WHERE table_schema = 'public' AND table_type in ('BASE TABLE', 'VIEW')").map do |table_attributes|
64
+ table = Table.new(attributes: table_attributes)
65
+ next if table.name =~ /^pg_/
66
+ table.columns = column_definitions(table)
67
+ table
68
+ end.compact
33
69
  end
34
70
 
35
- def table_columns(table_name)
36
- source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
37
- data_type = row["data_type"]
38
- data_type.gsub!(/text/, 'character varying(max)')
39
- data_type.gsub!(/json/, 'character varying(max)')
40
- data_type.gsub!(/bytea/, 'character varying(max)')
41
- data_type.gsub!(/money/, 'character varying(max)')
42
-
43
- if row["character_maximum_length"].to_s.length > 0
44
- %Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
45
- else
46
- %Q|"#{row["column_name"]}" #{data_type}|
47
- end
48
- end.join(", ")
71
+ def column_definitions(table)
72
+ source_connection.exec("SELECT * FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table.name}' order by ordinal_position")
49
73
  end
50
74
 
51
75
  def s3
@@ -56,68 +80,41 @@ class PostgresToRedshift
56
80
  @bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
57
81
  end
58
82
 
59
- def create_new_tables
60
- tables.each do |table|
61
- target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
62
- end
63
- end
83
+ def copy_table(table)
84
+ buffer = StringIO.new
85
+ zip = Zlib::GzipWriter.new(buffer)
64
86
 
65
- def copy_table(source_table, target_table, is_view = false)
66
- buffer = ""
67
- puts "Downloading #{source_table}"
68
- copy_command =
69
- if is_view
70
- "COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
71
- else
72
- "COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
73
- end
87
+ puts "Downloading #{table}"
88
+ copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
74
89
 
75
90
  source_connection.copy_data(copy_command) do
76
91
  while row = source_connection.get_copy_data
77
- buffer << row
92
+ zip.write(row)
78
93
  end
79
94
  end
80
- upload_table(target_table, buffer)
95
+ zip.finish
96
+ buffer.rewind
97
+ upload_table(table, buffer)
81
98
  end
82
99
 
83
- def upload_table(target_table, buffer)
84
- puts "Uploading #{target_table}"
85
- bucket.objects["export/#{target_table}.psv"].delete
86
- bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
100
+ def upload_table(table, buffer)
101
+ puts "Uploading #{table.target_table_name}"
102
+ bucket.objects["export/#{table.target_table_name}.psv.gz"].delete
103
+ bucket.objects["export/#{table.target_table_name}.psv.gz"].write(buffer, acl: :authenticated_read)
87
104
  end
88
105
 
89
- def import_table(target_table)
90
- puts "Importing #{target_table}"
91
- target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
106
+ def import_table(table)
107
+ puts "Importing #{table.target_table_name}"
108
+ target_connection.exec("DROP TABLE IF EXISTS public.#{table.target_table_name}_updating")
92
109
 
93
110
  target_connection.exec("BEGIN;")
94
111
 
95
- target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
112
+ target_connection.exec("ALTER TABLE public.#{table.target_table_name} RENAME TO #{table.target_table_name}_updating")
96
113
 
97
- target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
114
+ target_connection.exec("CREATE TABLE public.#{table.target_table_name} (#{table.columns_for_create})")
98
115
 
99
- target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
116
+ target_connection.exec("COPY public.#{table.target_table_name} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
100
117
 
101
118
  target_connection.exec("COMMIT;")
102
119
  end
103
-
104
- def copy_tables
105
- tables.each do |table|
106
- copy_table(table, table)
107
- end
108
- end
109
-
110
- def copy_views
111
- views.each do |view|
112
- table = view.gsub(/_view/, '')
113
- copy_table(view, table, true)
114
- end
115
- end
116
-
117
- # FIXME: This relies on views being uploaded after tables.
118
- def import_tables
119
- tables.each do |table|
120
- import_table(table)
121
- end
122
- end
123
120
  end
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
  spec.add_dependency "pg", "~> 0.17.0"
24
24
  spec.add_dependency "aws-sdk", "~> 1.54"
@@ -0,0 +1,168 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift::Column do
4
+ context 'with a simple column' do
5
+ before do
6
+ attributes = {
7
+ "table_catalog" => "postgres_to_redshift",
8
+ "table_schema" => "public",
9
+ "table_name" => "films",
10
+ "column_name" => "description",
11
+ "ordinal_position" => "2",
12
+ "column_default" => nil,
13
+ "is_nullable" => "YES",
14
+ "data_type" => "character varying",
15
+ "character_maximum_length" => "255",
16
+ "character_octet_length" => "1020"
17
+ }
18
+
19
+ @column = PostgresToRedshift::Column.new attributes: attributes
20
+ end
21
+
22
+ describe '#name' do
23
+ it 'returns the column name' do
24
+ expect(@column.name).to eq("description")
25
+ end
26
+ end
27
+ end
28
+
29
+ describe '#name_for_copy' do
30
+ it 'casts fields to appropriate type' do
31
+ attributes = {
32
+ "table_catalog" => "postgres_to_redshift",
33
+ "table_schema" => "public",
34
+ "table_name" => "films",
35
+ "column_name" => "description",
36
+ "ordinal_position" => "2",
37
+ "column_default" => nil,
38
+ "is_nullable" => "YES",
39
+ "data_type" => "text",
40
+ "character_maximum_length" => nil,
41
+ "character_octet_length" => "1073741824"
42
+ }
43
+
44
+ column = PostgresToRedshift::Column.new attributes: attributes
45
+ expect(column.name_for_copy).to eq('CAST("description" AS CHARACTER VARYING(65535)) AS description')
46
+ end
47
+
48
+ it 'does not cast fields that do not need casting' do
49
+ attributes = {
50
+ "table_catalog" => "postgres_to_redshift",
51
+ "table_schema" => "public",
52
+ "table_name" => "films",
53
+ "column_name" => "description",
54
+ "ordinal_position" => "2",
55
+ "column_default" => nil,
56
+ "is_nullable" => "YES",
57
+ "data_type" => "character varying",
58
+ "character_maximum_length" => "255",
59
+ "character_octet_length" => "1020"
60
+ }
61
+
62
+ column = PostgresToRedshift::Column.new attributes: attributes
63
+ expect(column.name_for_copy).to eq('"description"')
64
+ end
65
+ end
66
+
67
+ describe "#data_type_for_copy" do
68
+ it 'casts text to character varying(65535)' do
69
+ attributes = {
70
+ "table_catalog" => "postgres_to_redshift",
71
+ "table_schema" => "public",
72
+ "table_name" => "films",
73
+ "column_name" => "description",
74
+ "ordinal_position" => "2",
75
+ "column_default" => nil,
76
+ "is_nullable" => "YES",
77
+ "data_type" => "text",
78
+ "character_maximum_length" => nil,
79
+ "character_octet_length" => "1073741824"
80
+ }
81
+
82
+ column = PostgresToRedshift::Column.new attributes: attributes
83
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
84
+ end
85
+
86
+ it 'casts json to character varying(65535)' do
87
+ attributes = {
88
+ "table_catalog" => "postgres_to_redshift",
89
+ "table_schema" => "public",
90
+ "table_name" => "films",
91
+ "column_name" => "description",
92
+ "ordinal_position" => "2",
93
+ "column_default" => nil,
94
+ "is_nullable" => "YES",
95
+ "data_type" => "json",
96
+ }
97
+
98
+ column = PostgresToRedshift::Column.new attributes: attributes
99
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
100
+ end
101
+
102
+ it 'casts bytea to character varying(65535)' do
103
+ attributes = {
104
+ "table_catalog" => "postgres_to_redshift",
105
+ "table_schema" => "public",
106
+ "table_name" => "films",
107
+ "column_name" => "description",
108
+ "ordinal_position" => "2",
109
+ "column_default" => nil,
110
+ "is_nullable" => "YES",
111
+ "data_type" => "bytea",
112
+ }
113
+
114
+ column = PostgresToRedshift::Column.new attributes: attributes
115
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
116
+ end
117
+
118
+ it 'casts money to decimal(19,2)' do
119
+ attributes = {
120
+ "table_catalog" => "postgres_to_redshift",
121
+ "table_schema" => "public",
122
+ "table_name" => "films",
123
+ "column_name" => "description",
124
+ "ordinal_position" => "2",
125
+ "column_default" => nil,
126
+ "is_nullable" => "YES",
127
+ "data_type" => "money",
128
+ }
129
+
130
+ column = PostgresToRedshift::Column.new attributes: attributes
131
+ expect(column.data_type_for_copy).to eq("DECIMAL(19,2)")
132
+ end
133
+
134
+ it 'casts oid to character varying' do
135
+ attributes = {
136
+ "table_catalog" => "postgres_to_redshift",
137
+ "table_schema" => "public",
138
+ "table_name" => "films",
139
+ "column_name" => "description",
140
+ "ordinal_position" => "2",
141
+ "column_default" => nil,
142
+ "is_nullable" => "YES",
143
+ "data_type" => "oid",
144
+ }
145
+
146
+ column = PostgresToRedshift::Column.new attributes: attributes
147
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
148
+ end
149
+
150
+ it "returns the data type if no cast necessary" do
151
+ attributes = {
152
+ "table_catalog" => "postgres_to_redshift",
153
+ "table_schema" => "public",
154
+ "table_name" => "films",
155
+ "column_name" => "description",
156
+ "ordinal_position" => "2",
157
+ "column_default" => nil,
158
+ "is_nullable" => "YES",
159
+ "data_type" => "character varying",
160
+ "character_maximum_length" => "255",
161
+ "character_octet_length" => "1020"
162
+ }
163
+
164
+ column = PostgresToRedshift::Column.new attributes: attributes
165
+ expect(column.data_type_for_copy).to eq("character varying")
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,83 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift::Table do
4
+ context 'with a simple table' do
5
+ before do
6
+ attributes = {
7
+ "table_catalog" => "postgres_to_redshift",
8
+ "table_schema" => "public",
9
+ "table_name" => "films",
10
+ "table_type" => "BASE TABLE",
11
+ }
12
+ columns = [
13
+ {
14
+ "table_catalog" => "postgres_to_redshift",
15
+ "table_schema" => "public",
16
+ "table_name" => "films",
17
+ "column_name" => "description",
18
+ "ordinal_position" => "2",
19
+ "column_default" => nil,
20
+ "is_nullable" => "YES",
21
+ "data_type" => "character varying",
22
+ "character_maximum_length" => "255",
23
+ "character_octet_length" => "1020"
24
+ }
25
+ ]
26
+
27
+ @table = PostgresToRedshift::Table.new(attributes: attributes, columns: columns)
28
+ end
29
+
30
+ describe '#name' do
31
+ it 'returns the name of the table' do
32
+ expect(@table.name).to eq("films")
33
+ end
34
+ end
35
+
36
+ describe '#columns' do
37
+ it 'returns a list of columns' do
38
+ expect(@table.columns.size).to eq(1)
39
+ expect(@table.columns.first.name).to eq("description")
40
+ end
41
+ end
42
+ end
43
+
44
+ describe '#is_view?' do
45
+ it 'returns true if it is a view' do
46
+ attributes = {
47
+ "table_catalog" => "postgres_to_redshift",
48
+ "table_schema" => "public",
49
+ "table_name" => "films",
50
+ "table_type" => "VIEW",
51
+ }
52
+
53
+ table = PostgresToRedshift::Table.new(attributes: attributes)
54
+ expect(table.is_view?).to be_truthy
55
+ end
56
+
57
+ it 'returns false if it is not a view' do
58
+ attributes = {
59
+ "table_catalog" => "postgres_to_redshift",
60
+ "table_schema" => "public",
61
+ "table_name" => "films",
62
+ "table_type" => "BASE TABLE",
63
+ }
64
+
65
+ table = PostgresToRedshift::Table.new(attributes: attributes)
66
+ expect(table.is_view?).to be_falsey
67
+ end
68
+ end
69
+
70
+ describe 'target_table_name' do
71
+ it 'strips _view from the end of the table name' do
72
+ attributes = {
73
+ "table_catalog" => "postgres_to_redshift",
74
+ "table_schema" => "public",
75
+ "table_name" => "films_view",
76
+ "table_type" => "VIEW",
77
+ }
78
+
79
+ table = PostgresToRedshift::Table.new(attributes: attributes)
80
+ expect(table.target_table_name).to eq("films")
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift do
4
+ it 'opens a read only connection to source database' do
5
+ read_only_state = PostgresToRedshift.source_connection.exec("SHOW transaction_read_only").first["transaction_read_only"]
6
+
7
+ expect(read_only_state).to eq("on")
8
+ end
9
+
10
+ context 'with a simple table' do
11
+ before do
12
+ PostgresToRedshift::Test.test_connection.exec(%Q[DROP TABLE IF EXISTS "films"; CREATE TABLE IF NOT EXISTS "films" ("id" SERIAL PRIMARY KEY, "title" text);])
13
+ end
14
+
15
+ it 'lists available tables' do
16
+ expect(PostgresToRedshift.new.tables.size).to eq(1)
17
+ expect(PostgresToRedshift.new.tables.first.name).to eq("films")
18
+ end
19
+
20
+ it 'lists column definitions' do
21
+ table = PostgresToRedshift.new.tables.first
22
+ film_columns = PostgresToRedshift.new.column_definitions(table)
23
+
24
+ expect(film_columns.to_a.size).to eq(2)
25
+ expect(film_columns.first["column_name"]).to eq("id")
26
+ expect(table.columns.first.name).to eq("id")
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,90 @@
1
+ require File.expand_path("../../lib/postgres_to_redshift", __FILE__)
2
+ require 'spec_prepare'
3
+
4
+ # This file was generated by the `rspec --init` command. Conventionally, all
5
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
6
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
7
+ # file to always be loaded, without a need to explicitly require it in any files.
8
+ #
9
+ # Given that it is always loaded, you are encouraged to keep this file as
10
+ # light-weight as possible. Requiring heavyweight dependencies from this file
11
+ # will add to the boot time of your test suite on EVERY test run, even for an
12
+ # individual file that may not need all of that loaded. Instead, consider making
13
+ # a separate helper file that requires the additional dependencies and performs
14
+ # the additional setup, and require it from the spec files that actually need it.
15
+ #
16
+ # The `.rspec` file also contains a few flags that are not defaults but that
17
+ # users commonly want.
18
+ #
19
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
20
+ RSpec.configure do |config|
21
+ # rspec-expectations config goes here. You can use an alternate
22
+ # assertion/expectation library such as wrong or the stdlib/minitest
23
+ # assertions if you prefer.
24
+ config.expect_with :rspec do |expectations|
25
+ # This option will default to `true` in RSpec 4. It makes the `description`
26
+ # and `failure_message` of custom matchers include text for helper methods
27
+ # defined using `chain`, e.g.:
28
+ # be_bigger_than(2).and_smaller_than(4).description
29
+ # # => "be bigger than 2 and smaller than 4"
30
+ # ...rather than:
31
+ # # => "be bigger than 2"
32
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
33
+ end
34
+
35
+ # rspec-mocks config goes here. You can use an alternate test double
36
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
37
+ config.mock_with :rspec do |mocks|
38
+ # Prevents you from mocking or stubbing a method that does not exist on
39
+ # a real object. This is generally recommended, and will default to
40
+ # `true` in RSpec 4.
41
+ mocks.verify_partial_doubles = true
42
+ end
43
+
44
+ # The settings below are suggested to provide a good initial experience
45
+ # with RSpec, but feel free to customize to your heart's content.
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
54
+ # For more details, see:
55
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
56
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
57
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
58
+ config.disable_monkey_patching!
59
+
60
+ # This setting enables warnings. It's recommended, but in some cases may
61
+ # be too noisy due to issues in dependencies.
62
+ config.warnings = true
63
+
64
+ # Many RSpec users commonly either run the entire suite or an individual
65
+ # file, and it's useful to allow more verbose output when running an
66
+ # individual spec file.
67
+ if config.files_to_run.one?
68
+ # Use the documentation formatter for detailed output,
69
+ # unless a formatter has already been configured
70
+ # (e.g. via a command-line flag).
71
+ config.default_formatter = 'doc'
72
+ end
73
+
74
+ # Print the 10 slowest examples and example groups at the
75
+ # end of the spec run, to help surface which specs are running
76
+ # particularly slow.
77
+ config.profile_examples = 10
78
+
79
+ # Run specs in random order to surface order dependencies. If you find an
80
+ # order dependency and want to debug it, you can fix the order by providing
81
+ # the seed, which is printed after each run.
82
+ # --seed 1234
83
+ config.order = :random
84
+
85
+ # Seed global randomization in this process using the `--seed` CLI option.
86
+ # Setting this allows you to use `--seed` to deterministically reproduce
87
+ # test failures related to randomization by passing the same `--seed` value
88
+ # as the one that triggered the failure.
89
+ Kernel.srand config.seed
90
+ end
@@ -0,0 +1,15 @@
1
+ module PostgresToRedshift::Test
2
+ def self.source_uri
3
+ PostgresToRedshift.source_uri
4
+ end
5
+
6
+ def self.test_connection
7
+ @test_connection ||= PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
8
+ end
9
+ end
10
+
11
+ RSpec.configure do |config|
12
+ config.before :suite do
13
+ PostgresToRedshift::Test.test_connection
14
+ end
15
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgres_to_redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Rakoczy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-20 00:00:00.000000000 Z
11
+ date: 2015-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.7'
19
+ version: '1.6'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.7'
26
+ version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -76,14 +76,24 @@ extensions: []
76
76
  extra_rdoc_files: []
77
77
  files:
78
78
  - ".gitignore"
79
+ - ".rspec"
80
+ - ".travis.yml"
79
81
  - Gemfile
82
+ - Gemfile.lock
80
83
  - LICENSE.txt
81
84
  - README.md
82
85
  - Rakefile
83
86
  - bin/postgres_to_redshift
84
87
  - lib/postgres_to_redshift.rb
88
+ - lib/postgres_to_redshift/column.rb
89
+ - lib/postgres_to_redshift/table.rb
85
90
  - lib/postgres_to_redshift/version.rb
86
91
  - postgres_to_redshift.gemspec
92
+ - spec/lib/postgres_to_redshift/column_spec.rb
93
+ - spec/lib/postgres_to_redshift/table_spec.rb
94
+ - spec/lib/postgres_to_redshift_spec.rb
95
+ - spec/spec_helper.rb
96
+ - spec/spec_prepare.rb
87
97
  homepage: https://github.com/kitchensurfing/postgres_to_redshift
88
98
  licenses:
89
99
  - MIT
@@ -108,4 +118,9 @@ rubygems_version: 2.4.5
108
118
  signing_key:
109
119
  specification_version: 4
110
120
  summary: Load postgres databases into Amazon Redshift
111
- test_files: []
121
+ test_files:
122
+ - spec/lib/postgres_to_redshift/column_spec.rb
123
+ - spec/lib/postgres_to_redshift/table_spec.rb
124
+ - spec/lib/postgres_to_redshift_spec.rb
125
+ - spec/spec_helper.rb
126
+ - spec/spec_prepare.rb