dataduck 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 497a92b4bfb99dba50ee81efb9bdf2b5b802023c
4
- data.tar.gz: 7a5b3ef29d7bf2856b3ba196ed8a6cf25158b3f1
3
+ metadata.gz: be1cff2044ac902e2482b8cf432fdfded2b69ca6
4
+ data.tar.gz: dcf5456ce914d227b25e002a7bf4b10b8f0892e7
5
5
  SHA512:
6
- metadata.gz: 89c16aa7ca78cbed8522f6c137eb4a3b0dd6ae6a87c3466b39ba937f319ce3d3d0db8370557d6259c6157ed2150ed94072a7489d6811bb3ad7908333fa0483f0
7
- data.tar.gz: 314e2a64179b39b0f0f4d32fcdd5522a4b8b8574df9994c97956c5ff3066cde5c59e1a5a72d0c4c296594ee75ecaf4c30374c6d4a585a72d8b03ab80055fdfc2
6
+ metadata.gz: 49ebaa72e9ca8eb7dc503853d5b8829c03c5da17cbc4bd391a4c0076a922046cded8002acfc13ed70b6d1f572ead6102d5b3e43e94d7c6981ac8864534dfe07d
7
+ data.tar.gz: c5b3e2c56c5d323a8876792f56be753a743860edc8978c06fb3126fd17949e01797b16e13a3d735b90e478251ddc455f820395b0093a8e4be4bd403c2581e5df
@@ -55,7 +55,7 @@ module DataDuck
55
55
  def create_columns_on_data_warehouse!(table)
56
56
  columns = get_columns_in_data_warehouse(table.building_name)
57
57
  column_names = columns.map { |col| col[:name].to_s }
58
- table.output_schema.map do |name, data_type|
58
+ table.create_schema.map do |name, data_type|
59
59
  if !column_names.include?(name.to_s)
60
60
  redshift_data_type = self.type_to_redshift_type(data_type)
61
61
  self.query("ALTER TABLE #{ table.building_name } ADD #{ name } #{ redshift_data_type }")
@@ -65,7 +65,7 @@ module DataDuck
65
65
 
66
66
  def create_table_query(table, table_name = nil)
67
67
  table_name ||= table.name
68
- props_array = table.output_schema.map do |name, data_type|
68
+ props_array = table.create_schema.map do |name, data_type|
69
69
  redshift_data_type = self.type_to_redshift_type(data_type)
70
70
  "\"#{ name }\" #{ redshift_data_type }"
71
71
  end
@@ -253,7 +253,8 @@ module DataDuck
253
253
  DataDuck::Logs.info "Loading table #{ table.name }..."
254
254
  s3_object = self.upload_table_to_s3!(table)
255
255
  self.create_output_tables!(table)
256
- self.query(self.copy_query(table, s3_object.s3_path))
256
+ query_to_run = self.copy_query(table, s3_object.s3_path)
257
+ self.query(query_to_run)
257
258
  s3_object.delete!
258
259
 
259
260
  if table.staging_name != table.building_name
@@ -271,7 +272,7 @@ module DataDuck
271
272
 
272
273
  recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
273
274
  self.create_output_table_with_name!(table, recreating_temp_name)
274
- self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ table.name }")
275
+ self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.create_column_names.join('","') }\") SELECT \"#{ table.create_column_names.join('","') }\" FROM #{ table.name }")
275
276
  self.query("ALTER TABLE #{ table.name } RENAME TO zz_dataduck_recreating_old_#{ table.name }")
276
277
  self.query("ALTER TABLE #{ recreating_temp_name } RENAME TO #{ table.name }")
277
278
  self.query("DROP TABLE zz_dataduck_recreating_old_#{ table.name }")
@@ -212,7 +212,11 @@ module DataDuck
212
212
  end
213
213
 
214
214
  def should_fully_reload?
215
- false # Set to true if you want to fully reload a table with each ETL
215
+ false
216
+ end
217
+
218
+ def autogenerate_identity?
219
+ false
216
220
  end
217
221
 
218
222
  def building_name
@@ -223,12 +227,24 @@ module DataDuck
223
227
  "zz_dataduck_#{ self.name }"
224
228
  end
225
229
 
230
+ def create_schema
231
+ if self.autogenerate_identity?
232
+ Util.deep_merge(output_schema, {dataduck_identity: 'bigint identity(1, 1)'}) # Redshift only
233
+ else
234
+ output_schema
235
+ end
236
+ end
237
+
238
+ def create_column_names
239
+ self.create_schema.keys.map(&:to_s).sort
240
+ end
241
+
226
242
  def output_schema
227
243
  self.class.output_schema || self.class.superclass.output_schema || {}
228
244
  end
229
245
 
230
246
  def output_column_names
231
- self.output_schema.keys.sort.map(&:to_s)
247
+ self.output_schema.keys.map(&:to_s).sort
232
248
  end
233
249
 
234
250
  def postprocess!(destination, options = {})
@@ -1,7 +1,7 @@
1
1
  module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 0
4
- VERSION_MINOR = 8
4
+ VERSION_MINOR = 9
5
5
  VERSION_PATCH = 0
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-25 00:00:00.000000000 Z
11
+ date: 2016-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler