bigshift 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/bigshift/cli.rb +15 -4
- data/lib/bigshift/redshift_table_schema.rb +4 -3
- data/lib/bigshift/redshift_unloader.rb +3 -3
- data/lib/bigshift/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0a1088f4c4d8c66c8af35a4e67a8377d8b6f805
|
4
|
+
data.tar.gz: bff611a9528b2b08a3587177cb3a448b7dbea4de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 045ba2e30068a4259ac34763f3597d7087053ad8889443c077643576e5cb8df55ad02d0a01e50577b4865afa130a68311188bd4db05bc6c34a632d2ab9bfe39d
|
7
|
+
data.tar.gz: 31cf2ec5852d2a1c200398a089b1be9c3093abb5e17a90cab3f130f43ef50df5d0c5749f117651f4ddc123d6504397fa1602a02dcf0e99d4fed511d320b803cf
|
data/README.md
CHANGED
@@ -32,7 +32,7 @@ BigShift tells Redshift to compress the dumps by default, even if that means tha
|
|
32
32
|
|
33
33
|
## Arguments
|
34
34
|
|
35
|
-
Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
|
35
|
+
Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--rs-schema`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
|
36
36
|
|
37
37
|
### GCP credentials
|
38
38
|
|
@@ -106,6 +106,10 @@ If you don't want to put the data dumped from Redshift directly into the root of
|
|
106
106
|
|
107
107
|
Because of how GCS' Transfer Service works the transferred files will have exactly the same keys in the destination bucket, this cannot be configured.
|
108
108
|
|
109
|
+
### Redshift schema
|
110
|
+
|
111
|
+
By default the schema in Redshift is called `public`, but in case you're not using that one, you can use the argument `--rs-schema` to specify the schema your table is in.
|
112
|
+
|
109
113
|
### BigQuery table ID
|
110
114
|
|
111
115
|
By default the BigQuery table ID will be the same as the Redshift table name, but the optional argument `--bq-table` can be used to tell BigShift to use another table ID.
|
data/lib/bigshift/cli.rb
CHANGED
@@ -4,6 +4,7 @@ require 'json'
|
|
4
4
|
require 'stringio'
|
5
5
|
require 'logger'
|
6
6
|
require 'optparse'
|
7
|
+
require 'socket'
|
7
8
|
require 'bigshift'
|
8
9
|
|
9
10
|
module BigShift
|
@@ -47,7 +48,7 @@ module BigShift
|
|
47
48
|
def unload
|
48
49
|
if run?(:unload)
|
49
50
|
s3_uri = "s3://#{@config[:s3_bucket_name]}/#{s3_table_prefix}"
|
50
|
-
@factory.redshift_unloader.unload_to(@config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
|
51
|
+
@factory.redshift_unloader.unload_to(@config[:rs_schema_name], @config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
|
51
52
|
else
|
52
53
|
@logger.debug('Skipping unload')
|
53
54
|
end
|
@@ -56,7 +57,7 @@ module BigShift
|
|
56
57
|
|
57
58
|
def transfer
|
58
59
|
if run?(:transfer)
|
59
|
-
description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
|
60
|
+
description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_schema_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
|
60
61
|
@factory.cloud_storage_transfer.copy_to_cloud_storage(@unload_manifest, @config[:cs_bucket_name], description: description, allow_overwrite: false)
|
61
62
|
else
|
62
63
|
@logger.debug('Skipping transfer')
|
@@ -99,6 +100,7 @@ module BigShift
|
|
99
100
|
['--aws-credentials', 'PATH', String, :aws_credentials_path, nil],
|
100
101
|
['--rs-credentials', 'PATH', String, :rs_credentials_path, :required],
|
101
102
|
['--rs-database', 'DB_NAME', String, :rs_database_name, :required],
|
103
|
+
['--rs-schema', 'SCHEMA_NAME', String, :rs_schema_name, nil],
|
102
104
|
['--rs-table', 'TABLE_NAME', String, :rs_table_name, :required],
|
103
105
|
['--bq-dataset', 'DATASET_ID', String, :bq_dataset_id, :required],
|
104
106
|
['--bq-table', 'TABLE_ID', String, :bq_table_id, nil],
|
@@ -136,6 +138,7 @@ module BigShift
|
|
136
138
|
end
|
137
139
|
end
|
138
140
|
config[:bq_table_id] ||= config[:rs_table_name]
|
141
|
+
config[:rs_schema_name] ||= 'public'
|
139
142
|
if config[:steps] && !config[:steps].empty?
|
140
143
|
config[:steps] = STEPS.select { |s| config[:steps].include?(s.to_s) }
|
141
144
|
else
|
@@ -150,8 +153,9 @@ module BigShift
|
|
150
153
|
def s3_table_prefix
|
151
154
|
@s3_table_prefix ||= begin
|
152
155
|
db_name = @config[:rs_database_name]
|
156
|
+
schema_name = @config[:rs_schema_name]
|
153
157
|
table_name = @config[:rs_table_name]
|
154
|
-
prefix = "#{db_name}/#{table_name}/#{db_name}-#{table_name}-"
|
158
|
+
prefix = "#{db_name}/#{schema_name}/#{table_name}/#{db_name}-#{schema_name}-#{table_name}-"
|
155
159
|
if (s3_prefix = @config[:s3_prefix])
|
156
160
|
s3_prefix = s3_prefix.gsub(%r{\A/|/\Z}, '')
|
157
161
|
prefix = "#{s3_prefix}/#{prefix}"
|
@@ -175,7 +179,7 @@ module BigShift
|
|
175
179
|
end
|
176
180
|
|
177
181
|
def redshift_table_schema
|
178
|
-
@redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_table_name], rs_connection)
|
182
|
+
@redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_schema_name], @config[:rs_table_name], rs_connection)
|
179
183
|
end
|
180
184
|
|
181
185
|
def big_query_dataset
|
@@ -212,6 +216,13 @@ module BigShift
|
|
212
216
|
password: @config[:rs_credentials]['password'],
|
213
217
|
sslmode: 'require'
|
214
218
|
)
|
219
|
+
socket = Socket.for_fd(@rs_connection.socket)
|
220
|
+
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, 1)
|
221
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPCNT, 5)
|
222
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPINTVL, 2)
|
223
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPIDLE, 2) if defined?(Socket::TCP_KEEPIDLE)
|
224
|
+
@rs_connection.exec("SET search_path = \"#{@config[:rs_schema_name]}\"")
|
225
|
+
@rs_connection
|
215
226
|
end
|
216
227
|
|
217
228
|
def cs_transfer_service
|
@@ -1,15 +1,16 @@
|
|
1
1
|
module BigShift
|
2
2
|
class RedshiftTableSchema
|
3
|
-
def initialize(table_name, redshift_connection)
|
3
|
+
def initialize(schema_name, table_name, redshift_connection)
|
4
|
+
@schema_name = schema_name
|
4
5
|
@table_name = table_name
|
5
6
|
@redshift_connection = redshift_connection
|
6
7
|
end
|
7
8
|
|
8
9
|
def columns
|
9
10
|
@columns ||= begin
|
10
|
-
rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" =
|
11
|
+
rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" = $1 AND "tablename" = $2|, [@schema_name, @table_name])
|
11
12
|
if rows.count == 0
|
12
|
-
raise sprintf('Table
|
13
|
+
raise sprintf('Table %s for schema %s not found', @table_name.inspect, @schema_name.inspect)
|
13
14
|
else
|
14
15
|
columns = rows.map do |row|
|
15
16
|
name = row['column']
|
@@ -6,12 +6,12 @@ module BigShift
|
|
6
6
|
@logger = options[:logger] || NullLogger::INSTANCE
|
7
7
|
end
|
8
8
|
|
9
|
-
def unload_to(table_name, s3_uri, options={})
|
10
|
-
table_schema = RedshiftTableSchema.new(table_name, @redshift_connection)
|
9
|
+
def unload_to(schema_name, table_name, s3_uri, options={})
|
10
|
+
table_schema = RedshiftTableSchema.new(schema_name, table_name, @redshift_connection)
|
11
11
|
credentials_string = "aws_access_key_id=#{@aws_credentials.access_key_id};aws_secret_access_key=#{@aws_credentials.secret_access_key}"
|
12
12
|
select_sql = 'SELECT '
|
13
13
|
select_sql << table_schema.columns.map(&:to_sql).join(', ')
|
14
|
-
select_sql << %Q< FROM "#{table_name}">
|
14
|
+
select_sql << %Q< FROM "#{schema_name}"."#{table_name}">
|
15
15
|
select_sql.gsub!('\'') { |s| '\\\'' }
|
16
16
|
unload_sql = %Q<UNLOAD ('#{select_sql}')>
|
17
17
|
unload_sql << %Q< TO '#{s3_uri}'>
|
data/lib/bigshift/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Theo Hultberg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
114
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.4.
|
115
|
+
rubygems_version: 2.4.5
|
116
116
|
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: A tool for moving tables from Redshift to BigQuery
|