postgres_to_redshift 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
4
+ data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
5
+ SHA512:
6
+ metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
7
+ data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ *swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in postgres_to_redshift.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Kitchensurfing
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # PostgresToRedshift
2
+
3
+ This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'postgres_to_redshift'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install postgres_to_redshift
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ export REDSHIFT_URI='postgres://username:password@host:port/database-name'
25
+ export S3_DATABASE_EXPORT_ID='yourid'
26
+ export S3_DATABASE_EXPORT_KEY='yourkey'
27
+ export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
28
+
29
+ postgres_to_redshift $MY_SOURCE_DATABASE
30
+ ```
31
+
32
+ ## Contributing
33
+
34
+ 1. Fork it ( https://github.com/kitchensurfing/postgres_to_redshift/fork )
35
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
36
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
37
+ 4. Push to the branch (`git push origin my-new-feature`)
38
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'postgres_to_redshift'
4
+
5
+ PostgresToRedshift.update_tables
6
+
@@ -0,0 +1,3 @@
1
+ class PostgresToRedshift
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,123 @@
1
+ require "postgres_to_redshift/version"
2
+ require 'pg'
3
+ require 'uri'
4
+ require 'aws-sdk'
5
+
6
+ class PostgresToRedshift
7
+ attr_reader :source_connection, :target_connection, :s3
8
+
9
+ def self.update_tables
10
+ update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
11
+ update_tables.create_new_tables
12
+
13
+ # FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
14
+ update_tables.copy_tables
15
+ update_tables.copy_views
16
+ update_tables.import_tables
17
+ end
18
+
19
+ def initialize(source_uri:)
20
+ source_uri = URI.parse(source_uri)
21
+ target_uri = URI.parse(ENV['REDSHIFT_URI'])
22
+ @source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
23
+ @source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
24
+ @target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
25
+ end
26
+
27
+ def views
28
+ source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
29
+ end
30
+
31
+ def tables
32
+ source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
33
+ end
34
+
35
+ def table_columns(table_name)
36
+ source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
37
+ data_type = row["data_type"]
38
+ data_type.gsub!(/text/, 'character varying(max)')
39
+ data_type.gsub!(/json/, 'character varying(max)')
40
+ data_type.gsub!(/bytea/, 'character varying(max)')
41
+ data_type.gsub!(/money/, 'character varying(max)')
42
+
43
+ if row["character_maximum_length"].to_s.length > 0
44
+ %Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
45
+ else
46
+ %Q|"#{row["column_name"]}" #{data_type}|
47
+ end
48
+ end.join(", ")
49
+ end
50
+
51
+ def s3
52
+ @s3 ||= AWS::S3.new(access_key_id: ENV['S3_DATABASE_EXPORT_ID'], secret_access_key: ENV['S3_DATABASE_EXPORT_KEY'])
53
+ end
54
+
55
+ def bucket
56
+ @bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
57
+ end
58
+
59
+ def create_new_tables
60
+ tables.each do |table|
61
+ target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
62
+ end
63
+ end
64
+
65
+ def copy_table(source_table, target_table, is_view = false)
66
+ buffer = ""
67
+ puts "Downloading #{source_table}"
68
+ copy_command =
69
+ if is_view
70
+ "COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
71
+ else
72
+ "COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
73
+ end
74
+
75
+ source_connection.copy_data(copy_command) do
76
+ while row = source_connection.get_copy_data
77
+ buffer << row
78
+ end
79
+ end
80
+ upload_table(target_table, buffer)
81
+ end
82
+
83
+ def upload_table(target_table, buffer)
84
+ puts "Uploading #{target_table}"
85
+ bucket.objects["export/#{target_table}.psv"].delete
86
+ bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
87
+ end
88
+
89
+ def import_table(target_table)
90
+ puts "Importing #{target_table}"
91
+ target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
92
+
93
+ target_connection.exec("BEGIN;")
94
+
95
+ target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
96
+
97
+ target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
98
+
99
+ target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
100
+
101
+ target_connection.exec("COMMIT;")
102
+ end
103
+
104
+ def copy_tables
105
+ tables.each do |table|
106
+ copy_table(table, table)
107
+ end
108
+ end
109
+
110
+ def copy_views
111
+ views.each do |view|
112
+ table = view.gsub(/_view/, '')
113
+ copy_table(view, table, true)
114
+ end
115
+ end
116
+
117
+ # FIXME: This relies on views being uploaded after tables.
118
+ def import_tables
119
+ tables.each do |table|
120
+ import_table(table)
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'postgres_to_redshift/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "postgres_to_redshift"
8
+ spec.version = PostgresToRedshift::VERSION
9
+ spec.authors = ["Alex Rakoczy"]
10
+ spec.email = ["arakoczy@gmail.com"]
11
+ spec.summary = %q{Load postgres databases into Amazon Redshift}
12
+ spec.description = %q{Load postgres databases into Amazon Redshift. It's designed to work on Heroku Scheduler, or other *nix/BSD hosts.}
13
+ spec.homepage = "https://github.com/kitchensurfing/postgres_to_redshift"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_dependency "pg", "~> 0.17.0"
24
+ spec.add_dependency "aws-sdk", "~> 1.54"
25
+ end
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_to_redshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alex Rakoczy
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.17.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.17.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: aws-sdk
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.54'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.54'
69
+ description: Load postgres databases into Amazon Redshift. It's designed to work on
70
+ Heroku Scheduler, or other *nix/BSD hosts.
71
+ email:
72
+ - arakoczy@gmail.com
73
+ executables:
74
+ - postgres_to_redshift
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - bin/postgres_to_redshift
84
+ - lib/postgres_to_redshift.rb
85
+ - lib/postgres_to_redshift/version.rb
86
+ - postgres_to_redshift.gemspec
87
+ homepage: https://github.com/kitchensurfing/postgres_to_redshift
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.4.5
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Load postgres databases into Amazon Redshift
111
+ test_files: []