dataduck 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dataduck/commands.rb +6 -2
- data/lib/dataduck/etl.rb +11 -5
- data/lib/dataduck/redshift_destination.rb +12 -4
- data/lib/dataduck/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b906e1a110fc140a059051ef242a79ef00bcbcd2
|
4
|
+
data.tar.gz: 9648034a52cf23fb544aaa3d66db86ee1a424c23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c89642012f23632273c564ea4d09744304f053df3e4fe4ee0b9cb9734bd7403207246ae7f6109b6b53cd2d2b2c0361698aba92ccd6608934232b7ad4d6acc01a
|
7
|
+
data.tar.gz: 7f4c1682b11a97a9fe9777906f089a596e6022765ddafd4e2e38da42a534c9dadd81ee8af82d4dc7ca8d43ec2df643b791feec0eb76a0f5f5f79d877732a2a91
|
data/lib/dataduck/commands.rb
CHANGED
@@ -102,9 +102,9 @@ module DataDuck
|
|
102
102
|
|
103
103
|
only_destination = DataDuck::Destination.only_destination
|
104
104
|
|
105
|
+
etl = nil
|
105
106
|
if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
|
106
107
|
etl = ETL.new(destinations: [only_destination], autoload_tables: true)
|
107
|
-
etl.process!
|
108
108
|
else
|
109
109
|
tables = []
|
110
110
|
table_names_underscore.each do |table_name|
|
@@ -122,7 +122,11 @@ module DataDuck
|
|
122
122
|
autoload_tables: false,
|
123
123
|
tables: tables
|
124
124
|
})
|
125
|
-
|
125
|
+
end
|
126
|
+
etl.process!
|
127
|
+
|
128
|
+
if etl.errored?
|
129
|
+
exit(1)
|
126
130
|
end
|
127
131
|
end
|
128
132
|
|
data/lib/dataduck/etl.rb
CHANGED
@@ -13,11 +13,13 @@ module DataDuck
|
|
13
13
|
|
14
14
|
attr_accessor :destinations
|
15
15
|
attr_accessor :tables
|
16
|
+
attr_accessor :errored_tables
|
16
17
|
|
17
18
|
def initialize(options = {})
|
18
19
|
self.class.destinations ||= []
|
19
20
|
@tables = options[:tables] || []
|
20
21
|
@destinations = options[:destinations] || []
|
22
|
+
@errored_tables = []
|
21
23
|
|
22
24
|
@autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
|
23
25
|
if @autoload_tables
|
@@ -32,6 +34,10 @@ module DataDuck
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
37
|
+
|
38
|
+
def errored?
|
39
|
+
@errored_tables.length > 0
|
40
|
+
end
|
35
41
|
|
36
42
|
def process!
|
37
43
|
Logs.info("Processing ETL on pid #{ Process.pid }...")
|
@@ -44,7 +50,7 @@ module DataDuck
|
|
44
50
|
destinations_to_use << DataDuck::Destination.only_destination
|
45
51
|
end
|
46
52
|
|
47
|
-
errored_tables = []
|
53
|
+
@errored_tables = []
|
48
54
|
|
49
55
|
@tables.each do |table_or_class|
|
50
56
|
table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
|
@@ -53,13 +59,13 @@ module DataDuck
|
|
53
59
|
table.etl!(destinations_to_use)
|
54
60
|
rescue => err
|
55
61
|
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
|
-
errored_tables << table
|
62
|
+
@errored_tables << table
|
57
63
|
end
|
58
64
|
end
|
59
65
|
|
60
|
-
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
|
61
|
-
if errored_tables.length > 0
|
62
|
-
Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
|
66
|
+
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - @errored_tables.length } succeeded, #{ @errored_tables.length } failed")
|
67
|
+
if @errored_tables.length > 0
|
68
|
+
Logs.info("The following tables encountered errors: '#{ @errored_tables.map(&:name).join("', '") }'")
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -48,7 +48,7 @@ module DataDuck
|
|
48
48
|
query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
|
49
49
|
query_fragments << "REGION '#{ self.s3_region }'"
|
50
50
|
query_fragments << "CSV IGNOREHEADER 1 TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
|
51
|
-
query_fragments << "DATEFORMAT 'auto'"
|
51
|
+
query_fragments << "DATEFORMAT 'auto' GZIP"
|
52
52
|
return query_fragments.join(" ")
|
53
53
|
end
|
54
54
|
|
@@ -226,16 +226,24 @@ module DataDuck
|
|
226
226
|
self.query("SELECT DISTINCT(tablename) AS name FROM pg_table_def WHERE schemaname='public' ORDER BY name").map { |item| item[:name] }
|
227
227
|
end
|
228
228
|
|
229
|
+
def gzip(data)
|
230
|
+
sio = StringIO.new
|
231
|
+
gz = Zlib::GzipWriter.new(sio)
|
232
|
+
gz.write(data)
|
233
|
+
gz.close
|
234
|
+
sio.string
|
235
|
+
end
|
236
|
+
|
229
237
|
def upload_table_to_s3!(table)
|
230
238
|
now_epoch = Time.now.to_i.to_s
|
231
|
-
filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
|
239
|
+
filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv.gz"
|
232
240
|
|
233
|
-
table_csv = self.data_as_csv_string(table.data, table.output_column_names)
|
241
|
+
table_csv = self.gzip(self.data_as_csv_string(table.data, table.output_column_names))
|
234
242
|
|
235
243
|
s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
|
236
244
|
self.s3_bucket, self.s3_region)
|
237
245
|
s3_obj.upload!
|
238
|
-
|
246
|
+
s3_obj
|
239
247
|
end
|
240
248
|
|
241
249
|
def finish_fully_reloading_table!(table)
|
data/lib/dataduck/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|