postgres_to_redshift 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +2 -0
- data/bin/postgres_to_redshift +6 -0
- data/lib/postgres_to_redshift/version.rb +3 -0
- data/lib/postgres_to_redshift.rb +123 -0
- data/postgres_to_redshift.gemspec +25 -0
- metadata +111 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 32377367b67f4405e131d319f810bbc38fbc2ac5
|
4
|
+
data.tar.gz: 97facb3cae58afb4c65b6cb8c17415e1e6e79f23
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bb62207056a659dce0b1cc6ceac28ee92af14794510b7c59150e5fe61d19e7f4e4c79fa8a1601989dfba3b097b0e0702bac67b258058a93f57d5f1c1dffd959f
|
7
|
+
data.tar.gz: e8f4cffccd43dbd33748c49b24bd4a0dadc670077ac798b2d5533da7cb144faa8ee60829fdbd60fcd34549c2af70b63d40edb05b71f730af9108680420140bde
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Kitchensurfing
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# PostgresToRedshift
|
2
|
+
|
3
|
+
This gem copies data from postgres to redshift. It's especially useful to copy data from postgres to redshift in heroku.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'postgres_to_redshift'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install postgres_to_redshift
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```bash
|
24
|
+
export REDSHIFT_URI='postgres://username:password@host:port/database-name'
|
25
|
+
export S3_DATABASE_EXPORT_ID='yourid'
|
26
|
+
export S3_DATABASE_EXPORT_KEY='yourkey'
|
27
|
+
export S3_DATABASE_EXPORT_BUCKET='some-bucket-to-use'
|
28
|
+
|
29
|
+
postgres_to_redshift $MY_SOURCE_DATABASE
|
30
|
+
```
|
31
|
+
|
32
|
+
## Contributing
|
33
|
+
|
34
|
+
1. Fork it ( https://github.com/kitchensurfing/postgres_to_redshift/fork )
|
35
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
36
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
37
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
38
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require "postgres_to_redshift/version"
|
2
|
+
require 'pg'
|
3
|
+
require 'uri'
|
4
|
+
require 'aws-sdk'
|
5
|
+
|
6
|
+
class PostgresToRedshift
|
7
|
+
attr_reader :source_connection, :target_connection, :s3
|
8
|
+
|
9
|
+
def self.update_tables
|
10
|
+
update_tables = PostgresToRedshift.new(source_uri: ARGV[0])
|
11
|
+
update_tables.create_new_tables
|
12
|
+
|
13
|
+
# FIXME: BIG WARNING HERE: this order is important. We want the views to overwrite the tables. We should make it so the order doesn't matter later.
|
14
|
+
update_tables.copy_tables
|
15
|
+
update_tables.copy_views
|
16
|
+
update_tables.import_tables
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(source_uri:)
|
20
|
+
source_uri = URI.parse(source_uri)
|
21
|
+
target_uri = URI.parse(ENV['REDSHIFT_URI'])
|
22
|
+
@source_connection = PG::Connection.new(host: source_uri.host, port: source_uri.port, user: source_uri.user, password: source_uri.password, dbname: source_uri.path[1..-1])
|
23
|
+
@source_connection.exec("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY;")
|
24
|
+
@target_connection = PG::Connection.new(host: target_uri.host, port: target_uri.port, user: target_uri.user, password: target_uri.password, dbname: target_uri.path[1..-1])
|
25
|
+
end
|
26
|
+
|
27
|
+
def views
|
28
|
+
source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'VIEW'").map { |row| row["table_name"] } - ["pg_stat_statements"]
|
29
|
+
end
|
30
|
+
|
31
|
+
def tables
|
32
|
+
source_connection.exec("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE'").map { |row| row["table_name"] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def table_columns(table_name)
|
36
|
+
source_connection.exec("SELECT column_name, data_type, character_maximum_length FROM information_schema.columns WHERE table_schema='public' AND table_name='#{table_name}'").map do |row|
|
37
|
+
data_type = row["data_type"]
|
38
|
+
data_type.gsub!(/text/, 'character varying(max)')
|
39
|
+
data_type.gsub!(/json/, 'character varying(max)')
|
40
|
+
data_type.gsub!(/bytea/, 'character varying(max)')
|
41
|
+
data_type.gsub!(/money/, 'character varying(max)')
|
42
|
+
|
43
|
+
if row["character_maximum_length"].to_s.length > 0
|
44
|
+
%Q|"#{row["column_name"]}" #{data_type}(#{row["character_maximum_length"]})|
|
45
|
+
else
|
46
|
+
%Q|"#{row["column_name"]}" #{data_type}|
|
47
|
+
end
|
48
|
+
end.join(", ")
|
49
|
+
end
|
50
|
+
|
51
|
+
def s3
|
52
|
+
@s3 ||= AWS::S3.new(access_key_id: ENV['S3_DATABASE_EXPORT_ID'], secret_access_key: ENV['S3_DATABASE_EXPORT_KEY'])
|
53
|
+
end
|
54
|
+
|
55
|
+
def bucket
|
56
|
+
@bucket ||= s3.buckets[ENV['S3_DATABASE_EXPORT_BUCKET']]
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_new_tables
|
60
|
+
tables.each do |table|
|
61
|
+
target_connection.exec("CREATE TABLE IF NOT EXISTS public.#{table} (#{table_columns(table)})")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def copy_table(source_table, target_table, is_view = false)
|
66
|
+
buffer = ""
|
67
|
+
puts "Downloading #{source_table}"
|
68
|
+
copy_command =
|
69
|
+
if is_view
|
70
|
+
"COPY (SELECT * FROM #{source_table}) TO STDOUT WITH DELIMITER '|'"
|
71
|
+
else
|
72
|
+
"COPY #{source_table} TO STDOUT WITH DELIMITER '|'"
|
73
|
+
end
|
74
|
+
|
75
|
+
source_connection.copy_data(copy_command) do
|
76
|
+
while row = source_connection.get_copy_data
|
77
|
+
buffer << row
|
78
|
+
end
|
79
|
+
end
|
80
|
+
upload_table(target_table, buffer)
|
81
|
+
end
|
82
|
+
|
83
|
+
def upload_table(target_table, buffer)
|
84
|
+
puts "Uploading #{target_table}"
|
85
|
+
bucket.objects["export/#{target_table}.psv"].delete
|
86
|
+
bucket.objects["export/#{target_table}.psv"].write(buffer, acl: :authenticated_read)
|
87
|
+
end
|
88
|
+
|
89
|
+
def import_table(target_table)
|
90
|
+
puts "Importing #{target_table}"
|
91
|
+
target_connection.exec("DROP TABLE IF EXISTS public.#{target_table}_updating")
|
92
|
+
|
93
|
+
target_connection.exec("BEGIN;")
|
94
|
+
|
95
|
+
target_connection.exec("ALTER TABLE public.#{target_table} RENAME TO #{target_table}_updating")
|
96
|
+
|
97
|
+
target_connection.exec("CREATE TABLE public.#{target_table} (#{table_columns(target_table)})")
|
98
|
+
|
99
|
+
target_connection.exec("COPY public.#{target_table} FROM 's3://#{ENV['S3_DATABASE_EXPORT_BUCKET']}/export/#{target_table}.psv' CREDENTIALS 'aws_access_key_id=#{ENV['S3_DATABASE_EXPORT_ID']};aws_secret_access_key=#{ENV['S3_DATABASE_EXPORT_KEY']}' TRUNCATECOLUMNS ESCAPE DELIMITER as '|';")
|
100
|
+
|
101
|
+
target_connection.exec("COMMIT;")
|
102
|
+
end
|
103
|
+
|
104
|
+
def copy_tables
|
105
|
+
tables.each do |table|
|
106
|
+
copy_table(table, table)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def copy_views
|
111
|
+
views.each do |view|
|
112
|
+
table = view.gsub(/_view/, '')
|
113
|
+
copy_table(view, table, true)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# FIXME: This relies on views being uploaded after tables.
|
118
|
+
def import_tables
|
119
|
+
tables.each do |table|
|
120
|
+
import_table(table)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'postgres_to_redshift/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "postgres_to_redshift"
|
8
|
+
spec.version = PostgresToRedshift::VERSION
|
9
|
+
spec.authors = ["Alex Rakoczy"]
|
10
|
+
spec.email = ["arakoczy@gmail.com"]
|
11
|
+
spec.summary = %q{Load postgres databases into Amazon Redshift}
|
12
|
+
spec.description = %q{Load postgres databases into Amazon Redshift. It's designed to work on Heroku Scheduler, or other *nix/BSD hosts.}
|
13
|
+
spec.homepage = "https://github.com/kitchensurfing/postgres_to_redshift"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_dependency "pg", "~> 0.17.0"
|
24
|
+
spec.add_dependency "aws-sdk", "~> 1.54"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: postgres_to_redshift
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Rakoczy
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pg
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.17.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.17.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: aws-sdk
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.54'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.54'
|
69
|
+
description: Load postgres databases into Amazon Redshift. It's designed to work on
|
70
|
+
Heroku Scheduler, or other *nix/BSD hosts.
|
71
|
+
email:
|
72
|
+
- arakoczy@gmail.com
|
73
|
+
executables:
|
74
|
+
- postgres_to_redshift
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- Gemfile
|
80
|
+
- LICENSE.txt
|
81
|
+
- README.md
|
82
|
+
- Rakefile
|
83
|
+
- bin/postgres_to_redshift
|
84
|
+
- lib/postgres_to_redshift.rb
|
85
|
+
- lib/postgres_to_redshift/version.rb
|
86
|
+
- postgres_to_redshift.gemspec
|
87
|
+
homepage: https://github.com/kitchensurfing/postgres_to_redshift
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.4.5
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: Load postgres databases into Amazon Redshift
|
111
|
+
test_files: []
|