bigshift 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/bigshift/cli.rb +15 -4
- data/lib/bigshift/redshift_table_schema.rb +4 -3
- data/lib/bigshift/redshift_unloader.rb +3 -3
- data/lib/bigshift/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0a1088f4c4d8c66c8af35a4e67a8377d8b6f805
|
4
|
+
data.tar.gz: bff611a9528b2b08a3587177cb3a448b7dbea4de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 045ba2e30068a4259ac34763f3597d7087053ad8889443c077643576e5cb8df55ad02d0a01e50577b4865afa130a68311188bd4db05bc6c34a632d2ab9bfe39d
|
7
|
+
data.tar.gz: 31cf2ec5852d2a1c200398a089b1be9c3093abb5e17a90cab3f130f43ef50df5d0c5749f117651f4ddc123d6504397fa1602a02dcf0e99d4fed511d320b803cf
|
data/README.md
CHANGED
@@ -32,7 +32,7 @@ BigShift tells Redshift to compress the dumps by default, even if that means tha
|
|
32
32
|
|
33
33
|
## Arguments
|
34
34
|
|
35
|
-
Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
|
35
|
+
Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--rs-schema`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
|
36
36
|
|
37
37
|
### GCP credentials
|
38
38
|
|
@@ -106,6 +106,10 @@ If you don't want to put the data dumped from Redshift directly into the root of
|
|
106
106
|
|
107
107
|
Because of how GCS' Transfer Service works the transferred files will have exactly the same keys in the destination bucket, this cannot be configured.
|
108
108
|
|
109
|
+
### Redshift schema
|
110
|
+
|
111
|
+
By default the schema in Redshift is called `public`, but in case you're not using that one, you can use the argument `--rs-schema` to specify the schema your table is in.
|
112
|
+
|
109
113
|
### BigQuery table ID
|
110
114
|
|
111
115
|
By default the BigQuery table ID will be the same as the Redshift table name, but the optional argument `--bq-table` can be used to tell BigShift to use another table ID.
|
data/lib/bigshift/cli.rb
CHANGED
@@ -4,6 +4,7 @@ require 'json'
|
|
4
4
|
require 'stringio'
|
5
5
|
require 'logger'
|
6
6
|
require 'optparse'
|
7
|
+
require 'socket'
|
7
8
|
require 'bigshift'
|
8
9
|
|
9
10
|
module BigShift
|
@@ -47,7 +48,7 @@ module BigShift
|
|
47
48
|
def unload
|
48
49
|
if run?(:unload)
|
49
50
|
s3_uri = "s3://#{@config[:s3_bucket_name]}/#{s3_table_prefix}"
|
50
|
-
@factory.redshift_unloader.unload_to(@config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
|
51
|
+
@factory.redshift_unloader.unload_to(@config[:rs_schema_name], @config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
|
51
52
|
else
|
52
53
|
@logger.debug('Skipping unload')
|
53
54
|
end
|
@@ -56,7 +57,7 @@ module BigShift
|
|
56
57
|
|
57
58
|
def transfer
|
58
59
|
if run?(:transfer)
|
59
|
-
description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
|
60
|
+
description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_schema_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
|
60
61
|
@factory.cloud_storage_transfer.copy_to_cloud_storage(@unload_manifest, @config[:cs_bucket_name], description: description, allow_overwrite: false)
|
61
62
|
else
|
62
63
|
@logger.debug('Skipping transfer')
|
@@ -99,6 +100,7 @@ module BigShift
|
|
99
100
|
['--aws-credentials', 'PATH', String, :aws_credentials_path, nil],
|
100
101
|
['--rs-credentials', 'PATH', String, :rs_credentials_path, :required],
|
101
102
|
['--rs-database', 'DB_NAME', String, :rs_database_name, :required],
|
103
|
+
['--rs-schema', 'SCHEMA_NAME', String, :rs_schema_name, nil],
|
102
104
|
['--rs-table', 'TABLE_NAME', String, :rs_table_name, :required],
|
103
105
|
['--bq-dataset', 'DATASET_ID', String, :bq_dataset_id, :required],
|
104
106
|
['--bq-table', 'TABLE_ID', String, :bq_table_id, nil],
|
@@ -136,6 +138,7 @@ module BigShift
|
|
136
138
|
end
|
137
139
|
end
|
138
140
|
config[:bq_table_id] ||= config[:rs_table_name]
|
141
|
+
config[:rs_schema_name] ||= 'public'
|
139
142
|
if config[:steps] && !config[:steps].empty?
|
140
143
|
config[:steps] = STEPS.select { |s| config[:steps].include?(s.to_s) }
|
141
144
|
else
|
@@ -150,8 +153,9 @@ module BigShift
|
|
150
153
|
def s3_table_prefix
|
151
154
|
@s3_table_prefix ||= begin
|
152
155
|
db_name = @config[:rs_database_name]
|
156
|
+
schema_name = @config[:rs_schema_name]
|
153
157
|
table_name = @config[:rs_table_name]
|
154
|
-
prefix = "#{db_name}/#{table_name}/#{db_name}-#{table_name}-"
|
158
|
+
prefix = "#{db_name}/#{schema_name}/#{table_name}/#{db_name}-#{schema_name}-#{table_name}-"
|
155
159
|
if (s3_prefix = @config[:s3_prefix])
|
156
160
|
s3_prefix = s3_prefix.gsub(%r{\A/|/\Z}, '')
|
157
161
|
prefix = "#{s3_prefix}/#{prefix}"
|
@@ -175,7 +179,7 @@ module BigShift
|
|
175
179
|
end
|
176
180
|
|
177
181
|
def redshift_table_schema
|
178
|
-
@redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_table_name], rs_connection)
|
182
|
+
@redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_schema_name], @config[:rs_table_name], rs_connection)
|
179
183
|
end
|
180
184
|
|
181
185
|
def big_query_dataset
|
@@ -212,6 +216,13 @@ module BigShift
|
|
212
216
|
password: @config[:rs_credentials]['password'],
|
213
217
|
sslmode: 'require'
|
214
218
|
)
|
219
|
+
socket = Socket.for_fd(@rs_connection.socket)
|
220
|
+
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, 1)
|
221
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPCNT, 5)
|
222
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPINTVL, 2)
|
223
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPIDLE, 2) if defined?(Socket::TCP_KEEPIDLE)
|
224
|
+
@rs_connection.exec("SET search_path = \"#{@config[:rs_schema_name]}\"")
|
225
|
+
@rs_connection
|
215
226
|
end
|
216
227
|
|
217
228
|
def cs_transfer_service
|
@@ -1,15 +1,16 @@
|
|
1
1
|
module BigShift
|
2
2
|
class RedshiftTableSchema
|
3
|
-
def initialize(table_name, redshift_connection)
|
3
|
+
def initialize(schema_name, table_name, redshift_connection)
|
4
|
+
@schema_name = schema_name
|
4
5
|
@table_name = table_name
|
5
6
|
@redshift_connection = redshift_connection
|
6
7
|
end
|
7
8
|
|
8
9
|
def columns
|
9
10
|
@columns ||= begin
|
10
|
-
rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" =
|
11
|
+
rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" = $1 AND "tablename" = $2|, [@schema_name, @table_name])
|
11
12
|
if rows.count == 0
|
12
|
-
raise sprintf('Table
|
13
|
+
raise sprintf('Table %s for schema %s not found', @table_name.inspect, @schema_name.inspect)
|
13
14
|
else
|
14
15
|
columns = rows.map do |row|
|
15
16
|
name = row['column']
|
@@ -6,12 +6,12 @@ module BigShift
|
|
6
6
|
@logger = options[:logger] || NullLogger::INSTANCE
|
7
7
|
end
|
8
8
|
|
9
|
-
def unload_to(table_name, s3_uri, options={})
|
10
|
-
table_schema = RedshiftTableSchema.new(table_name, @redshift_connection)
|
9
|
+
def unload_to(schema_name, table_name, s3_uri, options={})
|
10
|
+
table_schema = RedshiftTableSchema.new(schema_name, table_name, @redshift_connection)
|
11
11
|
credentials_string = "aws_access_key_id=#{@aws_credentials.access_key_id};aws_secret_access_key=#{@aws_credentials.secret_access_key}"
|
12
12
|
select_sql = 'SELECT '
|
13
13
|
select_sql << table_schema.columns.map(&:to_sql).join(', ')
|
14
|
-
select_sql << %Q< FROM "#{table_name}">
|
14
|
+
select_sql << %Q< FROM "#{schema_name}"."#{table_name}">
|
15
15
|
select_sql.gsub!('\'') { |s| '\\\'' }
|
16
16
|
unload_sql = %Q<UNLOAD ('#{select_sql}')>
|
17
17
|
unload_sql << %Q< TO '#{s3_uri}'>
|
data/lib/bigshift/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Theo Hultberg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
114
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.4.
|
115
|
+
rubygems_version: 2.4.5
|
116
116
|
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: A tool for moving tables from Redshift to BigQuery
|