postgres_to_redshift 0.0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
4
- data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
3
+ metadata.gz: b9aca81574e39acdc400fcfd77f454cb9c93f682
4
+ data.tar.gz: f4a8430406d25028ff1ac9b305837d34d0463464
5
5
  SHA512:
6
- metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
7
- data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
6
+ metadata.gz: 8136aaffdd83cb676290393235f3cb1e99cf257c8c3dc193c0b234feec09b3f3cfedb29a8e6ad7f9f55641a2e1ef02c624f6d86073add5fec9ee5e0a2eacdde4
7
+ data.tar.gz: c0b9b49f1ba41e26fd2bb87328c5f672648a98ddd432335d8a899f6dbd722b1c32b6d6cb85b4de19630e42690eaf835957a6e801001d8a46972f917f0678482e
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  /.bundle/
2
+ /vendor/bundle
2
3
  /.yardoc
3
4
  /Gemfile.lock
4
5
  /_yardoc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ bundler_args: --without development --deployment --jobs=3 --retry=3
3
+ cache: bundler
4
+ rvm:
5
+ - 2.2.0
6
+ - 2.1.0
7
+ before_script:
8
+ - psql -c 'create database travis_ci_test;' -U postgres
9
+ env:
10
+ - POSTGRES_TO_REDSHIFT_SOURCE_URI=postgres://postgres@localhost/travis_ci_test
data/Gemfile CHANGED
@@ -2,3 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in postgres_to_redshift.gemspec
4
4
  gemspec
5
+
6
+ group :test do
7
+ gem 'rake'
8
+ gem 'rspec'
9
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,44 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_to_redshift (0.1.1)
5
+ aws-sdk (~> 1.54)
6
+ pg (~> 0.17.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ aws-sdk (1.61.0)
12
+ aws-sdk-v1 (= 1.61.0)
13
+ aws-sdk-v1 (1.61.0)
14
+ json (~> 1.4)
15
+ nokogiri (>= 1.4.4)
16
+ diff-lcs (1.2.5)
17
+ json (1.8.2)
18
+ mini_portile (0.6.2)
19
+ nokogiri (1.6.6.2)
20
+ mini_portile (~> 0.6.0)
21
+ pg (0.17.1)
22
+ rake (10.4.2)
23
+ rspec (3.2.0)
24
+ rspec-core (~> 3.2.0)
25
+ rspec-expectations (~> 3.2.0)
26
+ rspec-mocks (~> 3.2.0)
27
+ rspec-core (3.2.0)
28
+ rspec-support (~> 3.2.0)
29
+ rspec-expectations (3.2.0)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.2.0)
32
+ rspec-mocks (3.2.0)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.2.0)
35
+ rspec-support (3.2.1)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler (~> 1.6)
42
+ postgres_to_redshift!
43
+ rake
44
+ rspec
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
4
4
 
5
+ [![Build Status](https://travis-ci.org/kitchensurfing/postgres_to_redshift.svg?branch=master)](https://travis-ci.org/kitchensurfing/postgres_to_redshift)
6
+
5
7
  ## Installation
6
8
 
7
9
  Add this line to your application's Gemfile:
@@ -20,8 +22,11 @@ Or install it yourself as:
20
22
 
21
23
  ## Usage
22
24
 
25
+ Set your source and target databases, as well as your s3 intermediary.
26
+
23
27
  ```bash
24
- export REDSHIFT_URI='postgres://username:password@host:port/database-name'
28
+ export POSTGRES_TO_REDSHIFT_SOURCE_URI='postgres://username:password@host:port/database-name'
29
+ export POSTGRES_TO_REDSHIFT_TARGET_URI='postgres://username:password@host:port/database-name'
25
30
  export S3_DATABASE_EXPORT_ID='yourid'
26
31
  export S3_DATABASE_EXPORT_KEY='yourkey'
27
32
  export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
2
 
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ rescue LoadError
7
+ end
8
+
9
+ task(:default).clear
10
+ task :default => :spec
@@ -3,4 +3,3 @@
3
3
  require 'postgres_to_redshift'
4
4
 
5
5
  PostgresToRedshift.update_tables
6
-
@@ -0,0 +1,85 @@
1
+ # table_catalog | postgres_to_redshift
2
+ # table_schema | public
3
+ # table_name | films
4
+ # column_name | description
5
+ # ordinal_position | 2
6
+ # column_default |
7
+ # is_nullable | YES
8
+ # data_type | character varying
9
+ # character_maximum_length | 255
10
+ # character_octet_length | 1020
11
+ # numeric_precision |
12
+ # numeric_precision_radix |
13
+ # numeric_scale |
14
+ # datetime_precision |
15
+ # interval_type |
16
+ # interval_precision |
17
+ # character_set_catalog |
18
+ # character_set_schema |
19
+ # character_set_name |
20
+ # collation_catalog |
21
+ # collation_schema |
22
+ # collation_name |
23
+ # domain_catalog |
24
+ # domain_schema |
25
+ # domain_name |
26
+ # udt_catalog | postgres_to_redshift
27
+ # udt_schema | pg_catalog
28
+ # udt_name | varchar
29
+ # scope_catalog |
30
+ # scope_schema |
31
+ # scope_name |
32
+ # maximum_cardinality |
33
+ # dtd_identifier | 2
34
+ # is_self_referencing | NO
35
+ # is_identity | NO
36
+ # identity_generation |
37
+ # identity_start |
38
+ # identity_increment |
39
+ # identity_maximum |
40
+ # identity_minimum |
41
+ # identity_cycle |
42
+ # is_generated | NEVER
43
+ # generation_expression |
44
+ # is_updatable | YES
45
+ #
46
+ class PostgresToRedshift::Column
47
+ attr_accessor :attributes
48
+
49
+ CAST_TYPES_FOR_COPY = {
50
+ "text" => "CHARACTER VARYING(65535)",
51
+ "json" => "CHARACTER VARYING(65535)",
52
+ "bytea" => "CHARACTER VARYING(65535)",
53
+ "money" => "DECIMAL(19,2)",
54
+ "oid" => "CHARACTER VARYING(65535)",
55
+ }
56
+
57
+ def initialize(attributes: )
58
+ self.attributes = attributes
59
+ end
60
+
61
+ def name
62
+ attributes["column_name"]
63
+ end
64
+
65
+ def name_for_copy
66
+ if needs_type_cast?
67
+ %Q[CAST("#{name}" AS #{data_type_for_copy}) AS #{name}]
68
+ else
69
+ %Q["#{name}"]
70
+ end
71
+ end
72
+
73
+ def data_type
74
+ attributes["data_type"]
75
+ end
76
+
77
+ def data_type_for_copy
78
+ CAST_TYPES_FOR_COPY[data_type] || data_type
79
+ end
80
+
81
+ private
82
+ def needs_type_cast?
83
+ data_type != data_type_for_copy
84
+ end
85
+ end
@@ -0,0 +1,54 @@
1
+ # table_catalog | postgres_to_redshift
2
+ # table_schema | public
3
+ # table_name | acquisition_pages
4
+ # table_type | BASE TABLE
5
+ # self_referencing_column_name |
6
+ # reference_generation |
7
+ # user_defined_type_catalog |
8
+ # user_defined_type_schema |
9
+ # user_defined_type_name |
10
+ # is_insertable_into | YES
11
+ # is_typed | NO
12
+ # commit_action |
13
+ #
14
+ class PostgresToRedshift
15
+ class Table
16
+ attr_accessor :attributes, :columns
17
+
18
+ def initialize(attributes: , columns: [])
19
+ self.attributes = attributes
20
+ self.columns = columns
21
+ end
22
+
23
+ def name
24
+ attributes["table_name"]
25
+ end
26
+ alias_method :to_s, :name
27
+
28
+ def target_table_name
29
+ name.gsub(/_view$/, '')
30
+ end
31
+
32
+ def columns=(column_definitions = [])
33
+ @columns = column_definitions.map do |column_definition|
34
+ Column.new(attributes: column_definition)
35
+ end
36
+ end
37
+
38
+ def columns_for_create
39
+ columns.map do |column|
40
+ %Q["#{column.name}" #{column.data_type_for_copy}]
41
+ end.join(", ")
42
+ end
43
+
44
+ def columns_for_copy
45
+ columns.map do |column|
46
+ column.name_for_copy
47
+ end.join(", ")
48
+ end
49
+
50
+ def is_view?
51
+ attributes["table_type"] == "VIEW"
52
+ end
53
+ end
54
+ end
@@ -1,3 +1,3 @@
1
1
  class PostgresToRedshift
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -2,50 +2,74 @@ require "postgres_to_redshift/version"
2
2
  require 'pg'
3
3
  require 'uri'
4
4
  require 'aws-sdk'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require "postgres_to_redshift/table"
8
+ require "postgres_to_redshift/column"
5
9
 
6
10
  class PostgresToRedshift
11
+ class << self
12
+ attr_accessor :source_uri, :target_uri
13
+ end
14
+
7
15
  attr_reader :source_connection, :target_connection, :s3
8
16
 
9
17
  def self.update_tables
10
- update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
11
- update_tables.create_new_tables
18
+ update_tables = PostgresToRedshift.new
19
+
20
+ update_tables.tables.each do |table|
21
+ target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table.target_table_name} (#{table.columns_for_create})")
22
+
23
+ update_tables.copy_table(table)
24
+
25
+ update_tables.import_table(table)
26
+ end
27
+ end
28
+
29
+ def self.source_uri
30
+ @source_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_SOURCE_URI'])
31
+ end
32
+
33
+ def self.target_uri
34
+ @target_uri ||= URI.parse(ENV['POSTGRES_TO_REDSHIFT_TARGET_URI'])
35
+ end
36
+
37
+ def self.source_connection
38
+ unless instance_variable_defined?(:"@source_connection")
39
+ @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
40
+ @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
41
+ end
42
+
43
+ @source_connection
44
+ end
12
45
 
13
- # FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
14
- update_tables.copy_tables
15
- update_tables.copy_views
16
- update_tables.import_tables
46
+ def self.target_connection
47
+ unless instance_variable_defined?(:"@target_connection")
48
+ @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user || ENV['USER'], password: target_uri.password, dbname: target_uri.path[1..-1])
49
+ end
50
+
51
+ @target_connection
17
52
  end
18
53
 
19
- def initialize(source_uri:)
20
- source_uri = URI.parse(source_uri)
21
- target_uri = URI.parse(ENV['REDSHIFT_URI'])
22
- @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
23
- @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
24
- @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
54
+ def source_connection
55
+ self.class.source_connection
25
56
  end
26
57
 
27
- def views
28
- source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
58
+ def target_connection
59
+ self.class.target_connection
29
60
  end
30
61
 
31
62
  def tables
32
- source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
63
+ source_connection.exec("SELECT * FROM information_schema.tables WHERE table_schema = 'public' AND table_type in ('BASE TABLE', 'VIEW')").map do |table_attributes|
64
+ table = Table.new(attributes: table_attributes)
65
+ next if table.name =~ /^pg_/
66
+ table.columns = column_definitions(table)
67
+ table
68
+ end.compact
33
69
  end
34
70
 
35
- def table_columns(table_name)
36
- source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
37
- data_type = row["data_type"]
38
- data_type.gsub!(/text/, 'character varying(max)')
39
- data_type.gsub!(/json/, 'character varying(max)')
40
- data_type.gsub!(/bytea/, 'character varying(max)')
41
- data_type.gsub!(/money/, 'character varying(max)')
42
-
43
- if row["character_maximum_length"].to_s.length > 0
44
- %Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
45
- else
46
- %Q|"#{row["column_name"]}" #{data_type}|
47
- end
48
- end.join(", ")
71
+ def column_definitions(table)
72
+ source_connection.exec("SELECT * FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table.name}' order by ordinal_position")
49
73
  end
50
74
 
51
75
  def s3
@@ -56,68 +80,41 @@ class PostgresToRedshift
56
80
  @bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
57
81
  end
58
82
 
59
- def create_new_tables
60
- tables.each do |table|
61
- target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
62
- end
63
- end
83
+ def copy_table(table)
84
+ buffer = StringIO.new
85
+ zip = Zlib::GzipWriter.new(buffer)
64
86
 
65
- def copy_table(source_table, target_table, is_view = false)
66
- buffer = ""
67
- puts "Downloading #{source_table}"
68
- copy_command =
69
- if is_view
70
- "COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
71
- else
72
- "COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
73
- end
87
+ puts "Downloading #{table}"
88
+ copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
74
89
 
75
90
  source_connection.copy_data(copy_command) do
76
91
  while row = source_connection.get_copy_data
77
- buffer << row
92
+ zip.write(row)
78
93
  end
79
94
  end
80
- upload_table(target_table, buffer)
95
+ zip.finish
96
+ buffer.rewind
97
+ upload_table(table, buffer)
81
98
  end
82
99
 
83
- def upload_table(target_table, buffer)
84
- puts "Uploading #{target_table}"
85
- bucket.objects["export/#{target_table}.psv"].delete
86
- bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
100
+ def upload_table(table, buffer)
101
+ puts "Uploading #{table.target_table_name}"
102
+ bucket.objects["export/#{table.target_table_name}.psv.gz"].delete
103
+ bucket.objects["export/#{table.target_table_name}.psv.gz"].write(buffer, acl: :authenticated_read)
87
104
  end
88
105
 
89
- def import_table(target_table)
90
- puts "Importing #{target_table}"
91
- target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
106
+ def import_table(table)
107
+ puts "Importing #{table.target_table_name}"
108
+ target_connection.exec("DROP TABLE IF EXISTS public.#{table.target_table_name}_updating")
92
109
 
93
110
  target_connection.exec("BEGIN;")
94
111
 
95
- target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
112
+ target_connection.exec("ALTER TABLE public.#{table.target_table_name} RENAME TO #{table.target_table_name}_updating")
96
113
 
97
- target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
114
+ target_connection.exec("CREATE TABLE public.#{table.target_table_name} (#{table.columns_for_create})")
98
115
 
99
- target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
116
+ target_connection.exec("COPY public.#{table.target_table_name} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
100
117
 
101
118
  target_connection.exec("COMMIT;")
102
119
  end
103
-
104
- def copy_tables
105
- tables.each do |table|
106
- copy_table(table, table)
107
- end
108
- end
109
-
110
- def copy_views
111
- views.each do |view|
112
- table = view.gsub(/_view/, '')
113
- copy_table(view, table, true)
114
- end
115
- end
116
-
117
- # FIXME: This relies on views being uploaded after tables.
118
- def import_tables
119
- tables.each do |table|
120
- import_table(table)
121
- end
122
- end
123
120
  end
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
  spec.add_dependency "pg", "~> 0.17.0"
24
24
  spec.add_dependency "aws-sdk", "~> 1.54"
@@ -0,0 +1,168 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift::Column do
4
+ context 'with a simple column' do
5
+ before do
6
+ attributes = {
7
+ "table_catalog" => "postgres_to_redshift",
8
+ "table_schema" => "public",
9
+ "table_name" => "films",
10
+ "column_name" => "description",
11
+ "ordinal_position" => "2",
12
+ "column_default" => nil,
13
+ "is_nullable" => "YES",
14
+ "data_type" => "character varying",
15
+ "character_maximum_length" => "255",
16
+ "character_octet_length" => "1020"
17
+ }
18
+
19
+ @column = PostgresToRedshift::Column.new attributes: attributes
20
+ end
21
+
22
+ describe '#name' do
23
+ it 'returns the column name' do
24
+ expect(@column.name).to eq("description")
25
+ end
26
+ end
27
+ end
28
+
29
+ describe '#name_for_copy' do
30
+ it 'casts fields to appropriate type' do
31
+ attributes = {
32
+ "table_catalog" => "postgres_to_redshift",
33
+ "table_schema" => "public",
34
+ "table_name" => "films",
35
+ "column_name" => "description",
36
+ "ordinal_position" => "2",
37
+ "column_default" => nil,
38
+ "is_nullable" => "YES",
39
+ "data_type" => "text",
40
+ "character_maximum_length" => nil,
41
+ "character_octet_length" => "1073741824"
42
+ }
43
+
44
+ column = PostgresToRedshift::Column.new attributes: attributes
45
+ expect(column.name_for_copy).to eq('CAST("description" AS CHARACTER VARYING(65535)) AS description')
46
+ end
47
+
48
+ it 'does not cast fields that do not need casting' do
49
+ attributes = {
50
+ "table_catalog" => "postgres_to_redshift",
51
+ "table_schema" => "public",
52
+ "table_name" => "films",
53
+ "column_name" => "description",
54
+ "ordinal_position" => "2",
55
+ "column_default" => nil,
56
+ "is_nullable" => "YES",
57
+ "data_type" => "character varying",
58
+ "character_maximum_length" => "255",
59
+ "character_octet_length" => "1020"
60
+ }
61
+
62
+ column = PostgresToRedshift::Column.new attributes: attributes
63
+ expect(column.name_for_copy).to eq('"description"')
64
+ end
65
+ end
66
+
67
+ describe "#data_type_for_copy" do
68
+ it 'casts text to character varying(65535)' do
69
+ attributes = {
70
+ "table_catalog" => "postgres_to_redshift",
71
+ "table_schema" => "public",
72
+ "table_name" => "films",
73
+ "column_name" => "description",
74
+ "ordinal_position" => "2",
75
+ "column_default" => nil,
76
+ "is_nullable" => "YES",
77
+ "data_type" => "text",
78
+ "character_maximum_length" => nil,
79
+ "character_octet_length" => "1073741824"
80
+ }
81
+
82
+ column = PostgresToRedshift::Column.new attributes: attributes
83
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
84
+ end
85
+
86
+ it 'casts json to character varying(65535)' do
87
+ attributes = {
88
+ "table_catalog" => "postgres_to_redshift",
89
+ "table_schema" => "public",
90
+ "table_name" => "films",
91
+ "column_name" => "description",
92
+ "ordinal_position" => "2",
93
+ "column_default" => nil,
94
+ "is_nullable" => "YES",
95
+ "data_type" => "json",
96
+ }
97
+
98
+ column = PostgresToRedshift::Column.new attributes: attributes
99
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
100
+ end
101
+
102
+ it 'casts bytea to character varying(65535)' do
103
+ attributes = {
104
+ "table_catalog" => "postgres_to_redshift",
105
+ "table_schema" => "public",
106
+ "table_name" => "films",
107
+ "column_name" => "description",
108
+ "ordinal_position" => "2",
109
+ "column_default" => nil,
110
+ "is_nullable" => "YES",
111
+ "data_type" => "bytea",
112
+ }
113
+
114
+ column = PostgresToRedshift::Column.new attributes: attributes
115
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
116
+ end
117
+
118
+ it 'casts money to decimal(19,2)' do
119
+ attributes = {
120
+ "table_catalog" => "postgres_to_redshift",
121
+ "table_schema" => "public",
122
+ "table_name" => "films",
123
+ "column_name" => "description",
124
+ "ordinal_position" => "2",
125
+ "column_default" => nil,
126
+ "is_nullable" => "YES",
127
+ "data_type" => "money",
128
+ }
129
+
130
+ column = PostgresToRedshift::Column.new attributes: attributes
131
+ expect(column.data_type_for_copy).to eq("DECIMAL(19,2)")
132
+ end
133
+
134
+ it 'casts oid to character varying' do
135
+ attributes = {
136
+ "table_catalog" => "postgres_to_redshift",
137
+ "table_schema" => "public",
138
+ "table_name" => "films",
139
+ "column_name" => "description",
140
+ "ordinal_position" => "2",
141
+ "column_default" => nil,
142
+ "is_nullable" => "YES",
143
+ "data_type" => "oid",
144
+ }
145
+
146
+ column = PostgresToRedshift::Column.new attributes: attributes
147
+ expect(column.data_type_for_copy).to eq("CHARACTER VARYING(65535)")
148
+ end
149
+
150
+ it "returns the data type if no cast necessary" do
151
+ attributes = {
152
+ "table_catalog" => "postgres_to_redshift",
153
+ "table_schema" => "public",
154
+ "table_name" => "films",
155
+ "column_name" => "description",
156
+ "ordinal_position" => "2",
157
+ "column_default" => nil,
158
+ "is_nullable" => "YES",
159
+ "data_type" => "character varying",
160
+ "character_maximum_length" => "255",
161
+ "character_octet_length" => "1020"
162
+ }
163
+
164
+ column = PostgresToRedshift::Column.new attributes: attributes
165
+ expect(column.data_type_for_copy).to eq("character varying")
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,83 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift::Table do
4
+ context 'with a simple table' do
5
+ before do
6
+ attributes = {
7
+ "table_catalog" => "postgres_to_redshift",
8
+ "table_schema" => "public",
9
+ "table_name" => "films",
10
+ "table_type" => "BASE TABLE",
11
+ }
12
+ columns = [
13
+ {
14
+ "table_catalog" => "postgres_to_redshift",
15
+ "table_schema" => "public",
16
+ "table_name" => "films",
17
+ "column_name" => "description",
18
+ "ordinal_position" => "2",
19
+ "column_default" => nil,
20
+ "is_nullable" => "YES",
21
+ "data_type" => "character varying",
22
+ "character_maximum_length" => "255",
23
+ "character_octet_length" => "1020"
24
+ }
25
+ ]
26
+
27
+ @table = PostgresToRedshift::Table.new(attributes: attributes, columns: columns)
28
+ end
29
+
30
+ describe '#name' do
31
+ it 'returns the name of the table' do
32
+ expect(@table.name).to eq("films")
33
+ end
34
+ end
35
+
36
+ describe '#columns' do
37
+ it 'returns a list of columns' do
38
+ expect(@table.columns.size).to eq(1)
39
+ expect(@table.columns.first.name).to eq("description")
40
+ end
41
+ end
42
+ end
43
+
44
+ describe '#is_view?' do
45
+ it 'returns true if it is a view' do
46
+ attributes = {
47
+ "table_catalog" => "postgres_to_redshift",
48
+ "table_schema" => "public",
49
+ "table_name" => "films",
50
+ "table_type" => "VIEW",
51
+ }
52
+
53
+ table = PostgresToRedshift::Table.new(attributes: attributes)
54
+ expect(table.is_view?).to be_truthy
55
+ end
56
+
57
+ it 'returns false if it is not a view' do
58
+ attributes = {
59
+ "table_catalog" => "postgres_to_redshift",
60
+ "table_schema" => "public",
61
+ "table_name" => "films",
62
+ "table_type" => "BASE TABLE",
63
+ }
64
+
65
+ table = PostgresToRedshift::Table.new(attributes: attributes)
66
+ expect(table.is_view?).to be_falsey
67
+ end
68
+ end
69
+
70
+ describe 'target_table_name' do
71
+ it 'strips _view from the end of the table name' do
72
+ attributes = {
73
+ "table_catalog" => "postgres_to_redshift",
74
+ "table_schema" => "public",
75
+ "table_name" => "films_view",
76
+ "table_type" => "VIEW",
77
+ }
78
+
79
+ table = PostgresToRedshift::Table.new(attributes: attributes)
80
+ expect(table.target_table_name).to eq("films")
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe PostgresToRedshift do
4
+ it 'opens a read only connection to source database' do
5
+ read_only_state = PostgresToRedshift.source_connection.exec("SHOW transaction_read_only").first["transaction_read_only"]
6
+
7
+ expect(read_only_state).to eq("on")
8
+ end
9
+
10
+ context 'with a simple table' do
11
+ before do
12
+ PostgresToRedshift::Test.test_connection.exec(%Q[DROP TABLE IF EXISTS "films"; CREATE TABLE IF NOT EXISTS "films" ("id" SERIAL PRIMARY KEY, "title" text);])
13
+ end
14
+
15
+ it 'lists available tables' do
16
+ expect(PostgresToRedshift.new.tables.size).to eq(1)
17
+ expect(PostgresToRedshift.new.tables.first.name).to eq("films")
18
+ end
19
+
20
+ it 'lists column definitions' do
21
+ table = PostgresToRedshift.new.tables.first
22
+ film_columns = PostgresToRedshift.new.column_definitions(table)
23
+
24
+ expect(film_columns.to_a.size).to eq(2)
25
+ expect(film_columns.first["column_name"]).to eq("id")
26
+ expect(table.columns.first.name).to eq("id")
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,90 @@
1
+ require File.expand_path("../../lib/postgres_to_redshift", __FILE__)
2
+ require 'spec_prepare'
3
+
4
+ # This file was generated by the `rspec --init` command. Conventionally, all
5
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
6
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
7
+ # file to always be loaded, without a need to explicitly require it in any files.
8
+ #
9
+ # Given that it is always loaded, you are encouraged to keep this file as
10
+ # light-weight as possible. Requiring heavyweight dependencies from this file
11
+ # will add to the boot time of your test suite on EVERY test run, even for an
12
+ # individual file that may not need all of that loaded. Instead, consider making
13
+ # a separate helper file that requires the additional dependencies and performs
14
+ # the additional setup, and require it from the spec files that actually need it.
15
+ #
16
+ # The `.rspec` file also contains a few flags that are not defaults but that
17
+ # users commonly want.
18
+ #
19
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
20
+ RSpec.configure do |config|
21
+ # rspec-expectations config goes here. You can use an alternate
22
+ # assertion/expectation library such as wrong or the stdlib/minitest
23
+ # assertions if you prefer.
24
+ config.expect_with :rspec do |expectations|
25
+ # This option will default to `true` in RSpec 4. It makes the `description`
26
+ # and `failure_message` of custom matchers include text for helper methods
27
+ # defined using `chain`, e.g.:
28
+ # be_bigger_than(2).and_smaller_than(4).description
29
+ # # => "be bigger than 2 and smaller than 4"
30
+ # ...rather than:
31
+ # # => "be bigger than 2"
32
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
33
+ end
34
+
35
+ # rspec-mocks config goes here. You can use an alternate test double
36
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
37
+ config.mock_with :rspec do |mocks|
38
+ # Prevents you from mocking or stubbing a method that does not exist on
39
+ # a real object. This is generally recommended, and will default to
40
+ # `true` in RSpec 4.
41
+ mocks.verify_partial_doubles = true
42
+ end
43
+
44
+ # The settings below are suggested to provide a good initial experience
45
+ # with RSpec, but feel free to customize to your heart's content.
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
54
+ # For more details, see:
55
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
56
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
57
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
58
+ config.disable_monkey_patching!
59
+
60
+ # This setting enables warnings. It's recommended, but in some cases may
61
+ # be too noisy due to issues in dependencies.
62
+ config.warnings = true
63
+
64
+ # Many RSpec users commonly either run the entire suite or an individual
65
+ # file, and it's useful to allow more verbose output when running an
66
+ # individual spec file.
67
+ if config.files_to_run.one?
68
+ # Use the documentation formatter for detailed output,
69
+ # unless a formatter has already been configured
70
+ # (e.g. via a command-line flag).
71
+ config.default_formatter = 'doc'
72
+ end
73
+
74
+ # Print the 10 slowest examples and example groups at the
75
+ # end of the spec run, to help surface which specs are running
76
+ # particularly slow.
77
+ config.profile_examples = 10
78
+
79
+ # Run specs in random order to surface order dependencies. If you find an
80
+ # order dependency and want to debug it, you can fix the order by providing
81
+ # the seed, which is printed after each run.
82
+ # --seed 1234
83
+ config.order = :random
84
+
85
+ # Seed global randomization in this process using the `--seed` CLI option.
86
+ # Setting this allows you to use `--seed` to deterministically reproduce
87
+ # test failures related to randomization by passing the same `--seed` value
88
+ # as the one that triggered the failure.
89
+ Kernel.srand config.seed
90
+ end
@@ -0,0 +1,15 @@
1
+ module PostgresToRedshift::Test
2
+ def self.source_uri
3
+ PostgresToRedshift.source_uri
4
+ end
5
+
6
+ def self.test_connection
7
+ @test_connection ||= PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
8
+ end
9
+ end
10
+
11
+ RSpec.configure do |config|
12
+ config.before :suite do
13
+ PostgresToRedshift::Test.test_connection
14
+ end
15
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgres_to_redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Rakoczy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-20 00:00:00.000000000 Z
11
+ date: 2015-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.7'
19
+ version: '1.6'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.7'
26
+ version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -76,14 +76,24 @@ extensions: []
76
76
  extra_rdoc_files: []
77
77
  files:
78
78
  - ".gitignore"
79
+ - ".rspec"
80
+ - ".travis.yml"
79
81
  - Gemfile
82
+ - Gemfile.lock
80
83
  - LICENSE.txt
81
84
  - README.md
82
85
  - Rakefile
83
86
  - bin/postgres_to_redshift
84
87
  - lib/postgres_to_redshift.rb
88
+ - lib/postgres_to_redshift/column.rb
89
+ - lib/postgres_to_redshift/table.rb
85
90
  - lib/postgres_to_redshift/version.rb
86
91
  - postgres_to_redshift.gemspec
92
+ - spec/lib/postgres_to_redshift/column_spec.rb
93
+ - spec/lib/postgres_to_redshift/table_spec.rb
94
+ - spec/lib/postgres_to_redshift_spec.rb
95
+ - spec/spec_helper.rb
96
+ - spec/spec_prepare.rb
87
97
  homepage: https://github.com/kitchensurfing/postgres_to_redshift
88
98
  licenses:
89
99
  - MIT
@@ -108,4 +118,9 @@ rubygems_version: 2.4.5
108
118
  signing_key:
109
119
  specification_version: 4
110
120
  summary: Load postgres databases into Amazon Redshift
111
- test_files: []
121
+ test_files:
122
+ - spec/lib/postgres_to_redshift/column_spec.rb
123
+ - spec/lib/postgres_to_redshift/table_spec.rb
124
+ - spec/lib/postgres_to_redshift_spec.rb
125
+ - spec/spec_helper.rb
126
+ - spec/spec_prepare.rb