postgres_to_redshift 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b9aca81574e39acdc400fcfd77f454cb9c93f682
4
- data.tar.gz: f4a8430406d25028ff1ac9b305837d34d0463464
3
+ metadata.gz: 085f85fd5e501b248eeab36aa540449977881d45
4
+ data.tar.gz: 21a8909abb27984a06b7d23b25f0ebda47647282
5
5
  SHA512:
6
- metadata.gz: 8136aaffdd83cb676290393235f3cb1e99cf257c8c3dc193c0b234feec09b3f3cfedb29a8e6ad7f9f55641a2e1ef02c624f6d86073add5fec9ee5e0a2eacdde4
7
- data.tar.gz: c0b9b49f1ba41e26fd2bb87328c5f672648a98ddd432335d8a899f6dbd722b1c32b6d6cb85b4de19630e42690eaf835957a6e801001d8a46972f917f0678482e
6
+ metadata.gz: 48dad16338a06b1deb436c1bcf3448b461444ac94697802a3e4f7ec2877ce65f6b2ade48cfc3f70cd7062e55996c8a0ab28c8553d9340d18ff0fe3c785a0b3d6
7
+ data.tar.gz: 5f08cae1688d81e6034083394072a06774915d9e2d1511f8cd56c27572226be3ef952eb5f0b4ebdf871946cdcdba2855cc5c4c41f880e021d66c8127a6cf5224
data/.gitignore CHANGED
@@ -14,3 +14,5 @@
14
14
  *.a
15
15
  mkmf.log
16
16
  *swp
17
+ .env
18
+ .idea
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
1
  --color
2
2
  --require spec_helper
3
+ --tag ~type:feature
data/README.md CHANGED
@@ -31,7 +31,7 @@ export S3_DATABASE_EXPORT_ID='yourid'
31
31
  export S3_DATABASE_EXPORT_KEY='yourkey'
32
32
  export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
33
33
 
34
- postgres_to_redshift $MY_SOURCE_DATABASE
34
+ postgres_to_redshift
35
35
  ```
36
36
 
37
37
  ## Contributing
data/Rakefile CHANGED
@@ -1,10 +1,17 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
2
 
3
3
  begin
4
4
  require 'rspec/core/rake_task'
5
- RSpec::Core::RakeTask.new(:spec)
5
+ namespace :spec do
6
+ RSpec::Core::RakeTask.new(:units) do |t|
7
+ t.rspec_opts = '--tag ~type:feature'
8
+ end
9
+ RSpec::Core::RakeTask.new(:features) do |t|
10
+ t.rspec_opts = '--tag type:feature'
11
+ end
12
+ end
6
13
  rescue LoadError
7
14
  end
8
15
 
9
16
  task(:default).clear
10
- task :default => :spec
17
+ task :default => 'spec:units'
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
4
+
5
+ S3_DATABASE_EXPORT_ID=""
6
+ S3_DATABASE_EXPORT_KEY=""
7
+
8
+ source ${DIR}/../.env
9
+
10
+ if [ ${S3_DATABASE_EXPORT_ID} = "" ]; then
11
+ exit 1
12
+ fi
13
+
14
+ if [ ${S3_DATABASE_EXPORT_KEY} = "" ]; then
15
+ exit 1
16
+ fi
17
+
18
+ aws configure set aws_access_key_id ${S3_DATABASE_EXPORT_ID} --profile p2r
19
+ aws configure set aws_secret_access_key ${S3_DATABASE_EXPORT_KEY} --profile p2r
20
+ aws configure set default.region us-east-1 --profile p2r
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
4
+
5
+ source ${DIR}/../.env
6
+
7
+ aws cloudformation describe-stacks --profile p2r --stack-name p2r
8
+ EXISTS=$?
9
+
10
+ if (( EXISTS == 0 )); then
11
+ echo "Stack already exists. Exiting."
12
+ exit 1
13
+ fi
14
+
15
+ aws cloudformation create-stack --profile p2r --stack-name p2r --template-body file:///${DIR}/../config/cloud-formation-local-postgres.json --on-failure DELETE
16
+ if [ $? -ne 0 ]; then
17
+ echo "Error creating stack. Exiting."
18
+ fi
19
+
20
+ echo "Waiting for stack to finish creating."
21
+ aws cloudformation wait stack-create-complete --profile p2r --stack-name p2r
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
4
+
5
+ source ${DIR}/../.env
6
+
7
+ aws cloudformation describe-stacks --profile p2r --stack-name p2r
8
+ if [ $? -ne 0 ]; then
9
+ echo "Stack does not exist. Exiting."
10
+ exit 1
11
+ fi
12
+
13
+ aws cloudformation delete-stack --profile p2r --stack-name p2r
14
+ if [ $? -ne 0 ]; then
15
+ echo "Error deleting stack. Exiting."
16
+ fi
17
+
18
+ echo "Waiting for stack to finish deleting."
19
+ aws cloudformation wait stack-delete-complete --profile p2r --stack-name p2r
20
+ if [ $? -ne 0 ]; then
21
+ echo "Error waiting for stack to delete. Exiting."
22
+ fi
23
+
24
+ echo "Stack deleted."
data/bin/stack_env.sh ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env bash
2
+
3
+ aws cloudformation describe-stacks --profile p2r --stack-name p2r > /dev/null
4
+ if [ $? -ne 0 ]; then
5
+ echo "Stack does not exist. Exiting."
6
+ exit 1
7
+ fi
8
+
9
+ export S3_DATABASE_EXPORT_BUCKET=$(aws cloudformation describe-stack-resources --profile p2r --stack-name p2r --logical-resource-id s3Bucket | jq -r ".StackResources[0].PhysicalResourceId")
10
+ P2R_REDSHIFT_ID=$(aws cloudformation describe-stack-resources --profile p2r --stack-name p2r --logical-resource-id Redshift | jq -r ".StackResources[0].PhysicalResourceId")
11
+ P2R_REDSHIFT_ADDRESS=$(aws redshift describe-clusters --profile p2r --cluster-identifier ${P2R_REDSHIFT_ID} | jq -r ".Clusters[0].Endpoint.Address")
12
+ export POSTGRES_TO_REDSHIFT_TARGET_URI="postgres://test:Testtesttest1@${P2R_REDSHIFT_ADDRESS}:5439/test"
13
+
14
+ echo "S3_DATABASE_EXPORT_BUCKET=${P2R_S3_BUCKET}"
15
+ echo "POSTGRES_TO_REDSHIFT_TARGET_URI=${POSTGRES_TO_REDSHIFT_TARGET_URI}"
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
4
+
5
+ source ${DIR}/../.env
6
+
7
+ aws cloudformation describe-stacks --profile p2r --stack-name p2r
8
+ if [ $? -ne 0 ]; then
9
+ echo "Stack does not exist. Exiting."
10
+ exit 1
11
+ fi
12
+
13
+ aws cloudformation update-stack --profile p2r --stack-name p2r --template-body file:///${DIR}/../config/cloud-formation-local-postgres.json --on-failure DELETE
14
+ if [ $? -ne 0 ]; then
15
+ echo "Error updating stack. Exiting."
16
+ fi
17
+
18
+ echo "Waiting for stack to finish updating."
19
+ aws cloudformation wait stack-update-complete --profile p2r --stack-name p2r
@@ -0,0 +1,39 @@
1
+ {
2
+ "AWSTemplateFormatVersion": "2010-09-09",
3
+ "Metadata": {
4
+ },
5
+ "Resources": {
6
+ "redshiftSecurityGroup" : {
7
+ "Type": "AWS::Redshift::ClusterSecurityGroup",
8
+ "Properties": {
9
+ "Description" : "Security group to determine where connections to the Amazon Redshift cluster can come from"
10
+ }
11
+ },
12
+ "redshiftSecurityGroupIngress" : {
13
+ "Type": "AWS::Redshift::ClusterSecurityGroupIngress",
14
+ "Properties": {
15
+ "ClusterSecurityGroupName" : {"Ref":"redshiftSecurityGroup"},
16
+ "CIDRIP" : "0.0.0.0/0"
17
+ }
18
+ },
19
+ "Redshift": {
20
+ "Type": "AWS::Redshift::Cluster",
21
+ "Properties": {
22
+ "ClusterType": "single-node",
23
+ "ClusterSecurityGroups": [{"Ref":"redshiftSecurityGroup"}],
24
+ "DBName": "test",
25
+ "MasterUsername": "test",
26
+ "MasterUserPassword": "Testtesttest1",
27
+ "NodeType": "dc1.large"
28
+ },
29
+ "Metadata": {
30
+ }
31
+ },
32
+ "s3Bucket": {
33
+ "Type": "AWS::S3::Bucket",
34
+ "Properties": {},
35
+ "Metadata": {
36
+ }
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,55 @@
1
+ {
2
+ "AWSTemplateFormatVersion": "2010-09-09",
3
+ "Metadata": {
4
+ },
5
+ "Resources": {
6
+ "postgresSecurityGroup" : {
7
+ "Type" : "AWS::RDS::DBSecurityGroup",
8
+ "Properties" :
9
+ {
10
+ "DBSecurityGroupIngress" : [ {"CIDRIP": "0.0.0.0/0"} ],
11
+ "GroupDescription" : "Sup"
12
+ }
13
+ },
14
+ "postgres": {
15
+ "Type": "AWS::RDS::DBInstance",
16
+ "Properties": {
17
+ "AllowMajorVersionUpgrade": true,
18
+ "DBInstanceClass": "db.m3.medium",
19
+ "DBSecurityGroups": [{"Ref" :"postgresSecurityGroup"}],
20
+ "BackupRetentionPeriod": "0",
21
+ "DBName": "test",
22
+ "AllocatedStorage": "5",
23
+ "Engine": "postgres",
24
+ "EngineVersion": "9.5.2",
25
+ "MasterUsername": "test",
26
+ "MasterUserPassword": "testtesttest",
27
+ "StorageType": "standard",
28
+ "PubliclyAccessible": true
29
+ },
30
+ "Metadata": {
31
+ },
32
+ "DependsOn": [
33
+ "postgresSecurityGroup"
34
+ ]
35
+ },
36
+ "Redshift": {
37
+ "Type": "AWS::Redshift::Cluster",
38
+ "Properties": {
39
+ "ClusterType": "single-node",
40
+ "DBName": "test",
41
+ "MasterUsername": "test",
42
+ "MasterUserPassword": "Testtesttest1",
43
+ "NodeType": "dc1.large"
44
+ },
45
+ "Metadata": {
46
+ }
47
+ },
48
+ "s3Bucket": {
49
+ "Type": "AWS::S3::Bucket",
50
+ "Properties": {},
51
+ "Metadata": {
52
+ }
53
+ }
54
+ }
55
+ }
@@ -1,9 +1,9 @@
1
1
  require "postgres_to_redshift/version"
2
2
  require 'pg'
3
3
  require 'uri'
4
- require 'aws-sdk'
4
+ require 'aws-sdk-v1'
5
5
  require 'zlib'
6
- require 'stringio'
6
+ require 'tempfile'
7
7
  require "postgres_to_redshift/table"
8
8
  require "postgres_to_redshift/column"
9
9
 
@@ -14,11 +14,15 @@ class PostgresToRedshift
14
14
 
15
15
  attr_reader :source_connection, :target_connection, :s3
16
16
 
17
+ KILOBYTE = 1024
18
+ MEGABYTE = KILOBYTE * 1024
19
+ GIGABYTE = MEGABYTE * 1024
20
+
17
21
  def self.update_tables
18
22
  update_tables = PostgresToRedshift.new
19
23
 
20
24
  update_tables.tables.each do |table|
21
- target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table.target_table_name} (#{table.columns_for_create})")
25
+ target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{target_connection.quote_ident(table.target_table_name)} (#{table.columns_for_create})")
22
26
 
23
27
  update_tables.copy_table(table)
24
28
 
@@ -81,26 +85,42 @@ class PostgresToRedshift
81
85
  end
82
86
 
83
87
  def copy_table(table)
84
- buffer = StringIO.new
85
- zip = Zlib::GzipWriter.new(buffer)
86
-
87
- puts "Downloading #{table}"
88
- copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
89
-
90
- source_connection.copy_data(copy_command) do
91
- while row = source_connection.get_copy_data
92
- zip.write(row)
88
+ tmpfile = Tempfile.new("psql2rs")
89
+ zip = Zlib::GzipWriter.new(tmpfile)
90
+ chunksize = 5 * GIGABYTE # uncompressed
91
+ chunk = 1
92
+ bucket.objects.with_prefix("export/#{table.target_table_name}.psv.gz").delete_all
93
+ begin
94
+ puts "Downloading #{table}"
95
+ copy_command = "COPY (SELECT #{table.columns_for_copy} FROM #{table.name}) TO STDOUT WITH DELIMITER '|'"
96
+
97
+ source_connection.copy_data(copy_command) do
98
+ while row = source_connection.get_copy_data
99
+ zip.write(row)
100
+ if (zip.pos > chunksize)
101
+ zip.finish
102
+ tmpfile.rewind
103
+ upload_table(table, tmpfile, chunk)
104
+ chunk += 1
105
+ zip.close unless zip.closed?
106
+ tmpfile.unlink
107
+ tmpfile = Tempfile.new("psql2rs")
108
+ zip = Zlib::GzipWriter.new(tmpfile)
109
+ end
110
+ end
93
111
  end
112
+ zip.finish
113
+ tmpfile.rewind
114
+ upload_table(table, tmpfile, chunk)
115
+ ensure
116
+ zip.close unless zip.closed?
117
+ tmpfile.unlink
94
118
  end
95
- zip.finish
96
- buffer.rewind
97
- upload_table(table, buffer)
98
119
  end
99
120
 
100
- def upload_table(table, buffer)
101
- puts "Uploading #{table.target_table_name}"
102
- bucket.objects["export/#{table.target_table_name}.psv.gz"].delete
103
- bucket.objects["export/#{table.target_table_name}.psv.gz"].write(buffer, acl: :authenticated_read)
121
+ def upload_table(table, buffer, chunk)
122
+ puts "Uploading #{table.target_table_name}.#{chunk}"
123
+ bucket.objects["export/#{table.target_table_name}.psv.gz.#{chunk}"].write(buffer, acl: :authenticated_read)
104
124
  end
105
125
 
106
126
  def import_table(table)
@@ -109,11 +129,11 @@ class PostgresToRedshift
109
129
 
110
130
  target_connection.exec("BEGIN;")
111
131
 
112
- target_connection.exec("ALTER TABLE public.#{table.target_table_name} RENAME TO #{table.target_table_name}_updating")
132
+ target_connection.exec("ALTER TABLE public.#{target_connection.quote_ident(table.target_table_name)} RENAME TO #{table.target_table_name}_updating")
113
133
 
114
- target_connection.exec("CREATE TABLE public.#{table.target_table_name} (#{table.columns_for_create})")
134
+ target_connection.exec("CREATE TABLE public.#{target_connection.quote_ident(table.target_table_name)} (#{table.columns_for_create})")
115
135
 
116
- target_connection.exec("COPY public.#{table.target_table_name} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
136
+ target_connection.exec("COPY public.#{target_connection.quote_ident(table.target_table_name)} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{table.target_table_name}.psv.gz' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' GZIP TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
117
137
 
118
138
  target_connection.exec("COMMIT;")
119
139
  end
@@ -1,3 +1,3 @@
1
1
  class PostgresToRedshift
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["arakoczy@gmail.com"]
11
11
  spec.summary = %q{Load postgres databases into Amazon Redshift}
12
12
  spec.description = %q{Load postgres databases into Amazon Redshift. It's designed to work on Heroku Scheduler, or other *nix/BSD hosts.}
13
- spec.homepage = "https://github.com/kitchensurfing/postgres_to_redshift"
13
+ spec.homepage = "https://github.com/toothrot/postgres_to_redshift"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files -z`.split("\x0")
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.6"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
  spec.add_dependency "pg", "~> 0.17.0"
24
- spec.add_dependency "aws-sdk", "~> 1.54"
24
+ spec.add_dependency "aws-sdk-v1", "~> 1.54"
25
25
  end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe 'a small source database', type: :feature do
4
+ context 'with a simple column' do
5
+ before(:all) do
6
+ PostgresToRedshift::Test.test_connection.exec(<<-EOS
7
+ DROP TABLE IF EXISTS "p2r_integration";
8
+ CREATE TABLE IF NOT EXISTS "p2r_integration" ("id" SERIAL PRIMARY KEY, "title" text);
9
+ INSERT INTO "p2r_integration" ("title") VALUES ('Casablanca');
10
+ EOS
11
+ )
12
+ end
13
+ after(:all) do
14
+ PostgresToRedshift::Test.test_connection.exec(%q[DROP TABLE IF EXISTS "p2r_integration";])
15
+ PostgresToRedshift::Test.test_target_connection.exec(%q[DROP TABLE IF EXISTS "p2r_integration";])
16
+
17
+ end
18
+
19
+ it 'Copies all rows to target table' do
20
+ PostgresToRedshift.update_tables
21
+ result = PostgresToRedshift::Test.test_target_connection.exec(
22
+ 'SELECT * FROM "p2r_integration";'
23
+ )
24
+ expect(result.num_tuples).to eq(1)
25
+ expect(result[0]).to eq('title' => 'Casablanca', 'id' => '1')
26
+ end
27
+ end
28
+ end
data/spec/spec_prepare.rb CHANGED
@@ -3,8 +3,28 @@ module PostgresToRedshift::Test
3
3
  PostgresToRedshift.source_uri
4
4
  end
5
5
 
6
+ def self.target_uri
7
+ PostgresToRedshift.target_uri
8
+ end
9
+
6
10
  def self.test_connection
7
- @test_connection ||= PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user || ENV['USER'], password: source_uri.password, dbname: source_uri.path[1..-1])
11
+ @test_connection ||= PG::Connection.new(
12
+ host: source_uri.host,
13
+ port: source_uri.port,
14
+ user: source_uri.user || ENV['USER'],
15
+ password: source_uri.password,
16
+ dbname: source_uri.path[1..-1]
17
+ )
18
+ end
19
+
20
+ def self.test_target_connection
21
+ @test_connection ||= PG::Connection.new(
22
+ host: target_uri.host,
23
+ port: target_uri.port,
24
+ user: target_uri.user || ENV['USER'],
25
+ password: target_uri.password,
26
+ dbname: target_uri.path[1..-1]
27
+ )
8
28
  end
9
29
  end
10
30
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgres_to_redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Rakoczy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-06 00:00:00.000000000 Z
11
+ date: 2016-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.17.0
55
55
  - !ruby/object:Gem::Dependency
56
- name: aws-sdk
56
+ name: aws-sdk-v1
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
@@ -71,7 +71,12 @@ description: Load postgres databases into Amazon Redshift. It's designed to work
71
71
  email:
72
72
  - arakoczy@gmail.com
73
73
  executables:
74
+ - configure_aws.sh
75
+ - create_stack.sh
76
+ - delete_stack.sh
74
77
  - postgres_to_redshift
78
+ - stack_env.sh
79
+ - update_stack.sh
75
80
  extensions: []
76
81
  extra_rdoc_files: []
77
82
  files:
@@ -83,18 +88,26 @@ files:
83
88
  - LICENSE.txt
84
89
  - README.md
85
90
  - Rakefile
91
+ - bin/configure_aws.sh
92
+ - bin/create_stack.sh
93
+ - bin/delete_stack.sh
86
94
  - bin/postgres_to_redshift
95
+ - bin/stack_env.sh
96
+ - bin/update_stack.sh
97
+ - config/cloud-formation-local-postgres.json
98
+ - config/cloud-formation-rds.json
87
99
  - lib/postgres_to_redshift.rb
88
100
  - lib/postgres_to_redshift/column.rb
89
101
  - lib/postgres_to_redshift/table.rb
90
102
  - lib/postgres_to_redshift/version.rb
91
103
  - postgres_to_redshift.gemspec
104
+ - spec/features/small_table_spec.rb
92
105
  - spec/lib/postgres_to_redshift/column_spec.rb
93
106
  - spec/lib/postgres_to_redshift/table_spec.rb
94
107
  - spec/lib/postgres_to_redshift_spec.rb
95
108
  - spec/spec_helper.rb
96
109
  - spec/spec_prepare.rb
97
- homepage: https://github.com/kitchensurfing/postgres_to_redshift
110
+ homepage: https://github.com/toothrot/postgres_to_redshift
98
111
  licenses:
99
112
  - MIT
100
113
  metadata: {}
@@ -119,8 +132,10 @@ signing_key:
119
132
  specification_version: 4
120
133
  summary: Load postgres databases into Amazon Redshift
121
134
  test_files:
135
+ - spec/features/small_table_spec.rb
122
136
  - spec/lib/postgres_to_redshift/column_spec.rb
123
137
  - spec/lib/postgres_to_redshift/table_spec.rb
124
138
  - spec/lib/postgres_to_redshift_spec.rb
125
139
  - spec/spec_helper.rb
126
140
  - spec/spec_prepare.rb
141
+ has_rdoc: