dataduck 1.2.2 → 1.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d5833f516296cc44f7b74449d1c25014601cc89
4
- data.tar.gz: 8ec72ab186743c80968626663eecade1e6cb1f99
3
+ metadata.gz: b906e1a110fc140a059051ef242a79ef00bcbcd2
4
+ data.tar.gz: 9648034a52cf23fb544aaa3d66db86ee1a424c23
5
5
  SHA512:
6
- metadata.gz: a4e570b9756368f3a7a15821f9b8c661e477141d7ca21705434face9e4f78ba7e0e98aa7a0db9b01c363074a413d46b6380fd8d9ada84a2965e893b06ba48bda
7
- data.tar.gz: 664b543ee47821cc4eae039e40a738990652bac539d7b8ee3f942b98244174b2681b2c47acb584682601c29ccf6573001557b3987c8bfd86cd96f2295bb9fe62
6
+ metadata.gz: c89642012f23632273c564ea4d09744304f053df3e4fe4ee0b9cb9734bd7403207246ae7f6109b6b53cd2d2b2c0361698aba92ccd6608934232b7ad4d6acc01a
7
+ data.tar.gz: 7f4c1682b11a97a9fe9777906f089a596e6022765ddafd4e2e38da42a534c9dadd81ee8af82d4dc7ca8d43ec2df643b791feec0eb76a0f5f5f79d877732a2a91
@@ -102,9 +102,9 @@ module DataDuck
102
102
 
103
103
  only_destination = DataDuck::Destination.only_destination
104
104
 
105
+ etl = nil
105
106
  if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
106
107
  etl = ETL.new(destinations: [only_destination], autoload_tables: true)
107
- etl.process!
108
108
  else
109
109
  tables = []
110
110
  table_names_underscore.each do |table_name|
@@ -122,7 +122,11 @@ module DataDuck
122
122
  autoload_tables: false,
123
123
  tables: tables
124
124
  })
125
- etl.process!
125
+ end
126
+ etl.process!
127
+
128
+ if etl.errored?
129
+ exit(1)
126
130
  end
127
131
  end
128
132
 
@@ -13,11 +13,13 @@ module DataDuck
13
13
 
14
14
  attr_accessor :destinations
15
15
  attr_accessor :tables
16
+ attr_accessor :errored_tables
16
17
 
17
18
  def initialize(options = {})
18
19
  self.class.destinations ||= []
19
20
  @tables = options[:tables] || []
20
21
  @destinations = options[:destinations] || []
22
+ @errored_tables = []
21
23
 
22
24
  @autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
23
25
  if @autoload_tables
@@ -32,6 +34,10 @@ module DataDuck
32
34
  end
33
35
  end
34
36
  end
37
+
38
+ def errored?
39
+ @errored_tables.length > 0
40
+ end
35
41
 
36
42
  def process!
37
43
  Logs.info("Processing ETL on pid #{ Process.pid }...")
@@ -44,7 +50,7 @@ module DataDuck
44
50
  destinations_to_use << DataDuck::Destination.only_destination
45
51
  end
46
52
 
47
- errored_tables = []
53
+ @errored_tables = []
48
54
 
49
55
  @tables.each do |table_or_class|
50
56
  table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
@@ -53,13 +59,13 @@ module DataDuck
53
59
  table.etl!(destinations_to_use)
54
60
  rescue => err
55
61
  Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
- errored_tables << table
62
+ @errored_tables << table
57
63
  end
58
64
  end
59
65
 
60
- Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
61
- if errored_tables.length > 0
62
- Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
66
+ Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - @errored_tables.length } succeeded, #{ @errored_tables.length } failed")
67
+ if @errored_tables.length > 0
68
+ Logs.info("The following tables encountered errors: '#{ @errored_tables.map(&:name).join("', '") }'")
63
69
  end
64
70
  end
65
71
  end
@@ -48,7 +48,7 @@ module DataDuck
48
48
  query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
49
49
  query_fragments << "REGION '#{ self.s3_region }'"
50
50
  query_fragments << "CSV IGNOREHEADER 1 TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
51
- query_fragments << "DATEFORMAT 'auto'"
51
+ query_fragments << "DATEFORMAT 'auto' GZIP"
52
52
  return query_fragments.join(" ")
53
53
  end
54
54
 
@@ -226,16 +226,24 @@ module DataDuck
226
226
  self.query("SELECT DISTINCT(tablename) AS name FROM pg_table_def WHERE schemaname='public' ORDER BY name").map { |item| item[:name] }
227
227
  end
228
228
 
229
+ def gzip(data)
230
+ sio = StringIO.new
231
+ gz = Zlib::GzipWriter.new(sio)
232
+ gz.write(data)
233
+ gz.close
234
+ sio.string
235
+ end
236
+
229
237
  def upload_table_to_s3!(table)
230
238
  now_epoch = Time.now.to_i.to_s
231
- filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
239
+ filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv.gz"
232
240
 
233
- table_csv = self.data_as_csv_string(table.data, table.output_column_names)
241
+ table_csv = self.gzip(self.data_as_csv_string(table.data, table.output_column_names))
234
242
 
235
243
  s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
236
244
  self.s3_bucket, self.s3_region)
237
245
  s3_obj.upload!
238
- return s3_obj
246
+ s3_obj
239
247
  end
240
248
 
241
249
  def finish_fully_reloading_table!(table)
@@ -2,7 +2,7 @@ module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 1
4
4
  VERSION_MINOR = 2
5
- VERSION_PATCH = 2
5
+ VERSION_PATCH = 3
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-20 00:00:00.000000000 Z
11
+ date: 2017-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler