postgres_to_redshift 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
4
+ data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
5
+ SHA512:
6
+ metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
7
+ data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ *swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in postgres_to_redshift.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Kitchensurfing
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # PostgresToRedshift
2
+
3
+ This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'postgres_to_redshift'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install postgres_to_redshift
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ export REDSHIFT_URI='postgres://username:password@host:port/database-name'
25
+ export S3_DATABASE_EXPORT_ID='yourid'
26
+ export S3_DATABASE_EXPORT_KEY='yourkey'
27
+ export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
28
+
29
+ postgres_to_redshift $MY_SOURCE_DATABASE
30
+ ```
31
+
32
+ ## Contributing
33
+
34
+ 1. Fork it ( https://github.com/kitchensurfing/postgres_to_redshift/fork )
35
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
36
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
37
+ 4. Push to the branch (`git push origin my-new-feature`)
38
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'postgres_to_redshift'
4
+
5
+ PostgresToRedshift.update_tables
6
+
@@ -0,0 +1,3 @@
1
+ class PostgresToRedshift
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,123 @@
1
+ require "postgres_to_redshift/version"
2
+ require 'pg'
3
+ require 'uri'
4
+ require 'aws-sdk'
5
+
6
+ class PostgresToRedshift
7
+ attr_reader :source_connection, :target_connection, :s3
8
+
9
+ def self.update_tables
10
+ update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
11
+ update_tables.create_new_tables
12
+
13
+ # FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
14
+ update_tables.copy_tables
15
+ update_tables.copy_views
16
+ update_tables.import_tables
17
+ end
18
+
19
+ def initialize(source_uri:)
20
+ source_uri = URI.parse(source_uri)
21
+ target_uri = URI.parse(ENV['REDSHIFT_URI'])
22
+ @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
23
+ @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
24
+ @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
25
+ end
26
+
27
+ def views
28
+ source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
29
+ end
30
+
31
+ def tables
32
+ source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
33
+ end
34
+
35
+ def table_columns(table_name)
36
+ source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
37
+ data_type = row["data_type"]
38
+ data_type.gsub!(/text/, 'character varying(max)')
39
+ data_type.gsub!(/json/, 'character varying(max)')
40
+ data_type.gsub!(/bytea/, 'character varying(max)')
41
+ data_type.gsub!(/money/, 'character varying(max)')
42
+
43
+ if row["character_maximum_length"].to_s.length > 0
44
+ %Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
45
+ else
46
+ %Q|"#{row["column_name"]}" #{data_type}|
47
+ end
48
+ end.join(", ")
49
+ end
50
+
51
+ def s3
52
+ @s3 ||= AWS::S3.new(access_key_id: ENV['S3_DATABASE_EXPORT_ID'], secret_access_key: ENV['S3_DATABASE_EXPORT_KEY'])
53
+ end
54
+
55
+ def bucket
56
+ @bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
57
+ end
58
+
59
+ def create_new_tables
60
+ tables.each do |table|
61
+ target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
62
+ end
63
+ end
64
+
65
+ def copy_table(source_table, target_table, is_view = false)
66
+ buffer = ""
67
+ puts "Downloading #{source_table}"
68
+ copy_command =
69
+ if is_view
70
+ "COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
71
+ else
72
+ "COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
73
+ end
74
+
75
+ source_connection.copy_data(copy_command) do
76
+ while row = source_connection.get_copy_data
77
+ buffer << row
78
+ end
79
+ end
80
+ upload_table(target_table, buffer)
81
+ end
82
+
83
+ def upload_table(target_table, buffer)
84
+ puts "Uploading #{target_table}"
85
+ bucket.objects["export/#{target_table}.psv"].delete
86
+ bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
87
+ end
88
+
89
+ def import_table(target_table)
90
+ puts "Importing #{target_table}"
91
+ target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
92
+
93
+ target_connection.exec("BEGIN;")
94
+
95
+ target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
96
+
97
+ target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
98
+
99
+ target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
100
+
101
+ target_connection.exec("COMMIT;")
102
+ end
103
+
104
+ def copy_tables
105
+ tables.each do |table|
106
+ copy_table(table, table)
107
+ end
108
+ end
109
+
110
+ def copy_views
111
+ views.each do |view|
112
+ table = view.gsub(/_view/, '')
113
+ copy_table(view, table, true)
114
+ end
115
+ end
116
+
117
+ # FIXME: This relies on views being uploaded after tables.
118
+ def import_tables
119
+ tables.each do |table|
120
+ import_table(table)
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'postgres_to_redshift/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "postgres_to_redshift"
8
+ spec.version = PostgresToRedshift::VERSION
9
+ spec.authors = ["Alex Rakoczy"]
10
+ spec.email = ["arakoczy@gmail.com"]
11
+ spec.summary = %q{Load postgres databases into Amazon Redshift}
12
+ spec.description = %q{Load postgres databases into Amazon Redshift. It's designed to work on Heroku Scheduler, or other *nix/BSD hosts.}
13
+ spec.homepage = "https://github.com/kitchensurfing/postgres_to_redshift"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_dependency "pg", "~> 0.17.0"
24
+ spec.add_dependency "aws-sdk", "~> 1.54"
25
+ end
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_to_redshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alex Rakoczy
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.17.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.17.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: aws-sdk
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.54'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.54'
69
+ description: Load postgres databases into Amazon Redshift. It's designed to work on
70
+ Heroku Scheduler, or other *nix/BSD hosts.
71
+ email:
72
+ - arakoczy@gmail.com
73
+ executables:
74
+ - postgres_to_redshift
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - bin/postgres_to_redshift
84
+ - lib/postgres_to_redshift.rb
85
+ - lib/postgres_to_redshift/version.rb
86
+ - postgres_to_redshift.gemspec
87
+ homepage: https://github.com/kitchensurfing/postgres_to_redshift
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.4.5
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Load postgres databases into Amazon Redshift
111
+ test_files: []