dataduck 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d5833f516296cc44f7b74449d1c25014601cc89
4
- data.tar.gz: 8ec72ab186743c80968626663eecade1e6cb1f99
3
+ metadata.gz: b906e1a110fc140a059051ef242a79ef00bcbcd2
4
+ data.tar.gz: 9648034a52cf23fb544aaa3d66db86ee1a424c23
5
5
  SHA512:
6
- metadata.gz: a4e570b9756368f3a7a15821f9b8c661e477141d7ca21705434face9e4f78ba7e0e98aa7a0db9b01c363074a413d46b6380fd8d9ada84a2965e893b06ba48bda
7
- data.tar.gz: 664b543ee47821cc4eae039e40a738990652bac539d7b8ee3f942b98244174b2681b2c47acb584682601c29ccf6573001557b3987c8bfd86cd96f2295bb9fe62
6
+ metadata.gz: c89642012f23632273c564ea4d09744304f053df3e4fe4ee0b9cb9734bd7403207246ae7f6109b6b53cd2d2b2c0361698aba92ccd6608934232b7ad4d6acc01a
7
+ data.tar.gz: 7f4c1682b11a97a9fe9777906f089a596e6022765ddafd4e2e38da42a534c9dadd81ee8af82d4dc7ca8d43ec2df643b791feec0eb76a0f5f5f79d877732a2a91
@@ -102,9 +102,9 @@ module DataDuck
102
102
 
103
103
  only_destination = DataDuck::Destination.only_destination
104
104
 
105
+ etl = nil
105
106
  if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
106
107
  etl = ETL.new(destinations: [only_destination], autoload_tables: true)
107
- etl.process!
108
108
  else
109
109
  tables = []
110
110
  table_names_underscore.each do |table_name|
@@ -122,7 +122,11 @@ module DataDuck
122
122
  autoload_tables: false,
123
123
  tables: tables
124
124
  })
125
- etl.process!
125
+ end
126
+ etl.process!
127
+
128
+ if etl.errored?
129
+ exit(1)
126
130
  end
127
131
  end
128
132
 
@@ -13,11 +13,13 @@ module DataDuck
13
13
 
14
14
  attr_accessor :destinations
15
15
  attr_accessor :tables
16
+ attr_accessor :errored_tables
16
17
 
17
18
  def initialize(options = {})
18
19
  self.class.destinations ||= []
19
20
  @tables = options[:tables] || []
20
21
  @destinations = options[:destinations] || []
22
+ @errored_tables = []
21
23
 
22
24
  @autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
23
25
  if @autoload_tables
@@ -32,6 +34,10 @@ module DataDuck
32
34
  end
33
35
  end
34
36
  end
37
+
38
+ def errored?
39
+ @errored_tables.length > 0
40
+ end
35
41
 
36
42
  def process!
37
43
  Logs.info("Processing ETL on pid #{ Process.pid }...")
@@ -44,7 +50,7 @@ module DataDuck
44
50
  destinations_to_use << DataDuck::Destination.only_destination
45
51
  end
46
52
 
47
- errored_tables = []
53
+ @errored_tables = []
48
54
 
49
55
  @tables.each do |table_or_class|
50
56
  table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
@@ -53,13 +59,13 @@ module DataDuck
53
59
  table.etl!(destinations_to_use)
54
60
  rescue => err
55
61
  Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
- errored_tables << table
62
+ @errored_tables << table
57
63
  end
58
64
  end
59
65
 
60
- Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
61
- if errored_tables.length > 0
62
- Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
66
+ Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - @errored_tables.length } succeeded, #{ @errored_tables.length } failed")
67
+ if @errored_tables.length > 0
68
+ Logs.info("The following tables encountered errors: '#{ @errored_tables.map(&:name).join("', '") }'")
63
69
  end
64
70
  end
65
71
  end
@@ -48,7 +48,7 @@ module DataDuck
48
48
  query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
49
49
  query_fragments << "REGION '#{ self.s3_region }'"
50
50
  query_fragments << "CSV IGNOREHEADER 1 TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
51
- query_fragments << "DATEFORMAT 'auto'"
51
+ query_fragments << "DATEFORMAT 'auto' GZIP"
52
52
  return query_fragments.join(" ")
53
53
  end
54
54
 
@@ -226,16 +226,24 @@ module DataDuck
226
226
  self.query("SELECT DISTINCT(tablename) AS name FROM pg_table_def WHERE schemaname='public' ORDER BY name").map { |item| item[:name] }
227
227
  end
228
228
 
229
+ def gzip(data)
230
+ sio = StringIO.new
231
+ gz = Zlib::GzipWriter.new(sio)
232
+ gz.write(data)
233
+ gz.close
234
+ sio.string
235
+ end
236
+
229
237
  def upload_table_to_s3!(table)
230
238
  now_epoch = Time.now.to_i.to_s
231
- filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
239
+ filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv.gz"
232
240
 
233
- table_csv = self.data_as_csv_string(table.data, table.output_column_names)
241
+ table_csv = self.gzip(self.data_as_csv_string(table.data, table.output_column_names))
234
242
 
235
243
  s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
236
244
  self.s3_bucket, self.s3_region)
237
245
  s3_obj.upload!
238
- return s3_obj
246
+ s3_obj
239
247
  end
240
248
 
241
249
  def finish_fully_reloading_table!(table)
@@ -2,7 +2,7 @@ module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 1
4
4
  VERSION_MINOR = 2
5
- VERSION_PATCH = 2
5
+ VERSION_PATCH = 3
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-20 00:00:00.000000000 Z
11
+ date: 2017-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler