postgres_to_redshift 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +2 -0
- data/bin/postgres_to_redshift +6 -0
- data/lib/postgres_to_redshift/version.rb +3 -0
- data/lib/postgres_to_redshift.rb +123 -0
- data/postgres_to_redshift.gemspec +25 -0
- metadata +111 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
|
4
|
+
data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
|
7
|
+
data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Kitchensurfing
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# PostgresToRedshift
|
2
|
+
|
3
|
+
This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'postgres_to_redshift'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install postgres_to_redshift
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```bash
|
24
|
+
export REDSHIFT_URI='postgres://username:password@host:port/database-name'
|
25
|
+
export S3_DATABASE_EXPORT_ID='yourid'
|
26
|
+
export S3_DATABASE_EXPORT_KEY='yourkey'
|
27
|
+
export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
|
28
|
+
|
29
|
+
postgres_to_redshift $MY_SOURCE_DATABASE
|
30
|
+
```
|
31
|
+
|
32
|
+
## Contributing
|
33
|
+
|
34
|
+
1. Fork it ( https://github.com/kitchensurfing/postgres_to_redshift/fork )
|
35
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
36
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
37
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
38
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require "postgres_to_redshift/version"
|
2
|
+
require 'pg'
|
3
|
+
require 'uri'
|
4
|
+
require 'aws-sdk'
|
5
|
+
|
6
|
+
class PostgresToRedshift
|
7
|
+
attr_reader :source_connection, :target_connection, :s3
|
8
|
+
|
9
|
+
def self.update_tables
|
10
|
+
update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
|
11
|
+
update_tables.create_new_tables
|
12
|
+
|
13
|
+
# FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
|
14
|
+
update_tables.copy_tables
|
15
|
+
update_tables.copy_views
|
16
|
+
update_tables.import_tables
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(source_uri:)
|
20
|
+
source_uri = URI.parse(source_uri)
|
21
|
+
target_uri = URI.parse(ENV['REDSHIFT_URI'])
|
22
|
+
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
|
23
|
+
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
24
|
+
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
|
25
|
+
end
|
26
|
+
|
27
|
+
def views
|
28
|
+
source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
|
29
|
+
end
|
30
|
+
|
31
|
+
def tables
|
32
|
+
source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def table_columns(table_name)
|
36
|
+
source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
|
37
|
+
data_type = row["data_type"]
|
38
|
+
data_type.gsub!(/text/, 'character varying(max)')
|
39
|
+
data_type.gsub!(/json/, 'character varying(max)')
|
40
|
+
data_type.gsub!(/bytea/, 'character varying(max)')
|
41
|
+
data_type.gsub!(/money/, 'character varying(max)')
|
42
|
+
|
43
|
+
if row["character_maximum_length"].to_s.length > 0
|
44
|
+
%Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
|
45
|
+
else
|
46
|
+
%Q|"#{row["column_name"]}" #{data_type}|
|
47
|
+
end
|
48
|
+
end.join(", ")
|
49
|
+
end
|
50
|
+
|
51
|
+
def s3
|
52
|
+
@s3 ||= AWS::S3.new(access_key_id: ENV['S3_DATABASE_EXPORT_ID'], secret_access_key: ENV['S3_DATABASE_EXPORT_KEY'])
|
53
|
+
end
|
54
|
+
|
55
|
+
def bucket
|
56
|
+
@bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_new_tables
|
60
|
+
tables.each do |table|
|
61
|
+
target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def copy_table(source_table, target_table, is_view = false)
|
66
|
+
buffer = ""
|
67
|
+
puts "Downloading #{source_table}"
|
68
|
+
copy_command =
|
69
|
+
if is_view
|
70
|
+
"COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
|
71
|
+
else
|
72
|
+
"COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
|
73
|
+
end
|
74
|
+
|
75
|
+
source_connection.copy_data(copy_command) do
|
76
|
+
while row = source_connection.get_copy_data
|
77
|
+
buffer << row
|
78
|
+
end
|
79
|
+
end
|
80
|
+
upload_table(target_table, buffer)
|
81
|
+
end
|
82
|
+
|
83
|
+
def upload_table(target_table, buffer)
|
84
|
+
puts "Uploading #{target_table}"
|
85
|
+
bucket.objects["export/#{target_table}.psv"].delete
|
86
|
+
bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
|
87
|
+
end
|
88
|
+
|
89
|
+
def import_table(target_table)
|
90
|
+
puts "Importing #{target_table}"
|
91
|
+
target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
|
92
|
+
|
93
|
+
target_connection.exec("BEGIN;")
|
94
|
+
|
95
|
+
target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
|
96
|
+
|
97
|
+
target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
|
98
|
+
|
99
|
+
target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
|
100
|
+
|
101
|
+
target_connection.exec("COMMIT;")
|
102
|
+
end
|
103
|
+
|
104
|
+
def copy_tables
|
105
|
+
tables.each do |table|
|
106
|
+
copy_table(table, table)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def copy_views
|
111
|
+
views.each do |view|
|
112
|
+
table = view.gsub(/_view/, '')
|
113
|
+
copy_table(view, table, true)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# FIXME: This relies on views being uploaded after tables.
|
118
|
+
def import_tables
|
119
|
+
tables.each do |table|
|
120
|
+
import_table(table)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'postgres_to_redshift/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "postgres_to_redshift"
|
8
|
+
spec.version = PostgresToRedshift::VERSION
|
9
|
+
spec.authors = ["Alex Rakoczy"]
|
10
|
+
spec.email = ["arakoczy@gmail.com"]
|
11
|
+
spec.summary = %q{Load postgres databases into Amazon Redshift}
|
12
|
+
spec.description = %q{Load postgres databases into Amazon Redshift. It's designed to work on Heroku Scheduler, or other *nix/BSD hosts.}
|
13
|
+
spec.homepage = "https://github.com/kitchensurfing/postgres_to_redshift"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_dependency "pg", "~> 0.17.0"
|
24
|
+
spec.add_dependency "aws-sdk", "~> 1.54"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: postgres_to_redshift
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Rakoczy
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pg
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.17.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.17.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: aws-sdk
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.54'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.54'
|
69
|
+
description: Load postgres databases into Amazon Redshift. It's designed to work on
|
70
|
+
Heroku Scheduler, or other *nix/BSD hosts.
|
71
|
+
email:
|
72
|
+
- arakoczy@gmail.com
|
73
|
+
executables:
|
74
|
+
- postgres_to_redshift
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- Gemfile
|
80
|
+
- LICENSE.txt
|
81
|
+
- README.md
|
82
|
+
- Rakefile
|
83
|
+
- bin/postgres_to_redshift
|
84
|
+
- lib/postgres_to_redshift.rb
|
85
|
+
- lib/postgres_to_redshift/version.rb
|
86
|
+
- postgres_to_redshift.gemspec
|
87
|
+
homepage: https://github.com/kitchensurfing/postgres_to_redshift
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.4.5
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: Load postgres databases into Amazon Redshift
|
111
|
+
test_files: []
|