bigshift 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b44f1ad6233e29754f8ae3b66f5857141660529e
4
- data.tar.gz: a42978dfd9b983592e1051a727373b6bde9b73ff
3
+ metadata.gz: b0a1088f4c4d8c66c8af35a4e67a8377d8b6f805
4
+ data.tar.gz: bff611a9528b2b08a3587177cb3a448b7dbea4de
5
5
  SHA512:
6
- metadata.gz: 82fc075bcc22f380ff8492acb83b6a73e5ef6a741be04b8078811d2587efe2bbecd900d5eab5f25a51307afbe3b3475f7768ff5605fc3a781cc70c518c68488d
7
- data.tar.gz: 30b626b8b7fce0445a7c8ef613a446d769b04580803a4438558c876775ac99abd81c1f6f3cdff7b58343a1889222f289bff4976d56ca2bb0900813a9fbeba376
6
+ metadata.gz: 045ba2e30068a4259ac34763f3597d7087053ad8889443c077643576e5cb8df55ad02d0a01e50577b4865afa130a68311188bd4db05bc6c34a632d2ab9bfe39d
7
+ data.tar.gz: 31cf2ec5852d2a1c200398a089b1be9c3093abb5e17a90cab3f130f43ef50df5d0c5749f117651f4ddc123d6504397fa1602a02dcf0e99d4fed511d320b803cf
data/README.md CHANGED
@@ -32,7 +32,7 @@ BigShift tells Redshift to compress the dumps by default, even if that means tha
32
32
 
33
33
  ## Arguments
34
34
 
35
- Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
35
+ Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--rs-schema`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
36
36
 
37
37
  ### GCP credentials
38
38
 
@@ -106,6 +106,10 @@ If you don't want to put the data dumped from Redshift directly into the root of
106
106
 
107
107
  Because of how GCS' Transfer Service works the transferred files will have exactly the same keys in the destination bucket, this cannot be configured.
108
108
 
109
+ ### Redshift schema
110
+
111
+ By default the schema in Redshift is called `public`, but in case you're not using that one, you can use the argument `--rs-schema` to specify the schema your table is in.
112
+
109
113
  ### BigQuery table ID
110
114
 
111
115
  By default the BigQuery table ID will be the same as the Redshift table name, but the optional argument `--bq-table` can be used to tell BigShift to use another table ID.
@@ -4,6 +4,7 @@ require 'json'
4
4
  require 'stringio'
5
5
  require 'logger'
6
6
  require 'optparse'
7
+ require 'socket'
7
8
  require 'bigshift'
8
9
 
9
10
  module BigShift
@@ -47,7 +48,7 @@ module BigShift
47
48
  def unload
48
49
  if run?(:unload)
49
50
  s3_uri = "s3://#{@config[:s3_bucket_name]}/#{s3_table_prefix}"
50
- @factory.redshift_unloader.unload_to(@config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
51
+ @factory.redshift_unloader.unload_to(@config[:rs_schema_name], @config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
51
52
  else
52
53
  @logger.debug('Skipping unload')
53
54
  end
@@ -56,7 +57,7 @@ module BigShift
56
57
 
57
58
  def transfer
58
59
  if run?(:transfer)
59
- description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
60
+ description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_schema_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
60
61
  @factory.cloud_storage_transfer.copy_to_cloud_storage(@unload_manifest, @config[:cs_bucket_name], description: description, allow_overwrite: false)
61
62
  else
62
63
  @logger.debug('Skipping transfer')
@@ -99,6 +100,7 @@ module BigShift
99
100
  ['--aws-credentials', 'PATH', String, :aws_credentials_path, nil],
100
101
  ['--rs-credentials', 'PATH', String, :rs_credentials_path, :required],
101
102
  ['--rs-database', 'DB_NAME', String, :rs_database_name, :required],
103
+ ['--rs-schema', 'SCHEMA_NAME', String, :rs_schema_name, nil],
102
104
  ['--rs-table', 'TABLE_NAME', String, :rs_table_name, :required],
103
105
  ['--bq-dataset', 'DATASET_ID', String, :bq_dataset_id, :required],
104
106
  ['--bq-table', 'TABLE_ID', String, :bq_table_id, nil],
@@ -136,6 +138,7 @@ module BigShift
136
138
  end
137
139
  end
138
140
  config[:bq_table_id] ||= config[:rs_table_name]
141
+ config[:rs_schema_name] ||= 'public'
139
142
  if config[:steps] && !config[:steps].empty?
140
143
  config[:steps] = STEPS.select { |s| config[:steps].include?(s.to_s) }
141
144
  else
@@ -150,8 +153,9 @@ module BigShift
150
153
  def s3_table_prefix
151
154
  @s3_table_prefix ||= begin
152
155
  db_name = @config[:rs_database_name]
156
+ schema_name = @config[:rs_schema_name]
153
157
  table_name = @config[:rs_table_name]
154
- prefix = "#{db_name}/#{table_name}/#{db_name}-#{table_name}-"
158
+ prefix = "#{db_name}/#{schema_name}/#{table_name}/#{db_name}-#{schema_name}-#{table_name}-"
155
159
  if (s3_prefix = @config[:s3_prefix])
156
160
  s3_prefix = s3_prefix.gsub(%r{\A/|/\Z}, '')
157
161
  prefix = "#{s3_prefix}/#{prefix}"
@@ -175,7 +179,7 @@ module BigShift
175
179
  end
176
180
 
177
181
  def redshift_table_schema
178
- @redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_table_name], rs_connection)
182
+ @redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_schema_name], @config[:rs_table_name], rs_connection)
179
183
  end
180
184
 
181
185
  def big_query_dataset
@@ -212,6 +216,13 @@ module BigShift
212
216
  password: @config[:rs_credentials]['password'],
213
217
  sslmode: 'require'
214
218
  )
219
+ socket = Socket.for_fd(@rs_connection.socket)
220
+ socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, 1)
221
+ socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPCNT, 5)
222
+ socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPINTVL, 2)
223
+ socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPIDLE, 2) if defined?(Socket::TCP_KEEPIDLE)
224
+ @rs_connection.exec("SET search_path = \"#{@config[:rs_schema_name]}\"")
225
+ @rs_connection
215
226
  end
216
227
 
217
228
  def cs_transfer_service
@@ -1,15 +1,16 @@
1
1
  module BigShift
2
2
  class RedshiftTableSchema
3
- def initialize(table_name, redshift_connection)
3
+ def initialize(schema_name, table_name, redshift_connection)
4
+ @schema_name = schema_name
4
5
  @table_name = table_name
5
6
  @redshift_connection = redshift_connection
6
7
  end
7
8
 
8
9
  def columns
9
10
  @columns ||= begin
10
- rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" = 'public' AND "tablename" = $1|, [@table_name])
11
+ rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" = $1 AND "tablename" = $2|, [@schema_name, @table_name])
11
12
  if rows.count == 0
12
- raise sprintf('Table not found: %s', @table_name.inspect)
13
+ raise sprintf('Table %s for schema %s not found', @table_name.inspect, @schema_name.inspect)
13
14
  else
14
15
  columns = rows.map do |row|
15
16
  name = row['column']
@@ -6,12 +6,12 @@ module BigShift
6
6
  @logger = options[:logger] || NullLogger::INSTANCE
7
7
  end
8
8
 
9
- def unload_to(table_name, s3_uri, options={})
10
- table_schema = RedshiftTableSchema.new(table_name, @redshift_connection)
9
+ def unload_to(schema_name, table_name, s3_uri, options={})
10
+ table_schema = RedshiftTableSchema.new(schema_name, table_name, @redshift_connection)
11
11
  credentials_string = "aws_access_key_id=#{@aws_credentials.access_key_id};aws_secret_access_key=#{@aws_credentials.secret_access_key}"
12
12
  select_sql = 'SELECT '
13
13
  select_sql << table_schema.columns.map(&:to_sql).join(', ')
14
- select_sql << %Q< FROM "#{table_name}">
14
+ select_sql << %Q< FROM "#{schema_name}"."#{table_name}">
15
15
  select_sql.gsub!('\'') { |s| '\\\'' }
16
16
  unload_sql = %Q<UNLOAD ('#{select_sql}')>
17
17
  unload_sql << %Q< TO '#{s3_uri}'>
@@ -1,3 +1,3 @@
1
1
  module BigShift
2
- VERSION = '0.3.1'.freeze
2
+ VERSION = '0.3.2'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bigshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Theo Hultberg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-12 00:00:00.000000000 Z
11
+ date: 2016-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  requirements: []
114
114
  rubyforge_project:
115
- rubygems_version: 2.4.8
115
+ rubygems_version: 2.4.5
116
116
  signing_key:
117
117
  specification_version: 4
118
118
  summary: A tool for moving tables from Redshift to BigQuery