dataduck 1.2.2 → 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dataduck/commands.rb +6 -2
- data/lib/dataduck/etl.rb +11 -5
- data/lib/dataduck/redshift_destination.rb +12 -4
- data/lib/dataduck/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b906e1a110fc140a059051ef242a79ef00bcbcd2
|
4
|
+
data.tar.gz: 9648034a52cf23fb544aaa3d66db86ee1a424c23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c89642012f23632273c564ea4d09744304f053df3e4fe4ee0b9cb9734bd7403207246ae7f6109b6b53cd2d2b2c0361698aba92ccd6608934232b7ad4d6acc01a
|
7
|
+
data.tar.gz: 7f4c1682b11a97a9fe9777906f089a596e6022765ddafd4e2e38da42a534c9dadd81ee8af82d4dc7ca8d43ec2df643b791feec0eb76a0f5f5f79d877732a2a91
|
data/lib/dataduck/commands.rb
CHANGED
@@ -102,9 +102,9 @@ module DataDuck
|
|
102
102
|
|
103
103
|
only_destination = DataDuck::Destination.only_destination
|
104
104
|
|
105
|
+
etl = nil
|
105
106
|
if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
|
106
107
|
etl = ETL.new(destinations: [only_destination], autoload_tables: true)
|
107
|
-
etl.process!
|
108
108
|
else
|
109
109
|
tables = []
|
110
110
|
table_names_underscore.each do |table_name|
|
@@ -122,7 +122,11 @@ module DataDuck
|
|
122
122
|
autoload_tables: false,
|
123
123
|
tables: tables
|
124
124
|
})
|
125
|
-
|
125
|
+
end
|
126
|
+
etl.process!
|
127
|
+
|
128
|
+
if etl.errored?
|
129
|
+
exit(1)
|
126
130
|
end
|
127
131
|
end
|
128
132
|
|
data/lib/dataduck/etl.rb
CHANGED
@@ -13,11 +13,13 @@ module DataDuck
|
|
13
13
|
|
14
14
|
attr_accessor :destinations
|
15
15
|
attr_accessor :tables
|
16
|
+
attr_accessor :errored_tables
|
16
17
|
|
17
18
|
def initialize(options = {})
|
18
19
|
self.class.destinations ||= []
|
19
20
|
@tables = options[:tables] || []
|
20
21
|
@destinations = options[:destinations] || []
|
22
|
+
@errored_tables = []
|
21
23
|
|
22
24
|
@autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
|
23
25
|
if @autoload_tables
|
@@ -32,6 +34,10 @@ module DataDuck
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
37
|
+
|
38
|
+
def errored?
|
39
|
+
@errored_tables.length > 0
|
40
|
+
end
|
35
41
|
|
36
42
|
def process!
|
37
43
|
Logs.info("Processing ETL on pid #{ Process.pid }...")
|
@@ -44,7 +50,7 @@ module DataDuck
|
|
44
50
|
destinations_to_use << DataDuck::Destination.only_destination
|
45
51
|
end
|
46
52
|
|
47
|
-
errored_tables = []
|
53
|
+
@errored_tables = []
|
48
54
|
|
49
55
|
@tables.each do |table_or_class|
|
50
56
|
table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
|
@@ -53,13 +59,13 @@ module DataDuck
|
|
53
59
|
table.etl!(destinations_to_use)
|
54
60
|
rescue => err
|
55
61
|
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
|
-
errored_tables << table
|
62
|
+
@errored_tables << table
|
57
63
|
end
|
58
64
|
end
|
59
65
|
|
60
|
-
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
|
61
|
-
if errored_tables.length > 0
|
62
|
-
Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
|
66
|
+
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - @errored_tables.length } succeeded, #{ @errored_tables.length } failed")
|
67
|
+
if @errored_tables.length > 0
|
68
|
+
Logs.info("The following tables encountered errors: '#{ @errored_tables.map(&:name).join("', '") }'")
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -48,7 +48,7 @@ module DataDuck
|
|
48
48
|
query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
|
49
49
|
query_fragments << "REGION '#{ self.s3_region }'"
|
50
50
|
query_fragments << "CSV IGNOREHEADER 1 TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
|
51
|
-
query_fragments << "DATEFORMAT 'auto'"
|
51
|
+
query_fragments << "DATEFORMAT 'auto' GZIP"
|
52
52
|
return query_fragments.join(" ")
|
53
53
|
end
|
54
54
|
|
@@ -226,16 +226,24 @@ module DataDuck
|
|
226
226
|
self.query("SELECT DISTINCT(tablename) AS name FROM pg_table_def WHERE schemaname='public' ORDER BY name").map { |item| item[:name] }
|
227
227
|
end
|
228
228
|
|
229
|
+
def gzip(data)
|
230
|
+
sio = StringIO.new
|
231
|
+
gz = Zlib::GzipWriter.new(sio)
|
232
|
+
gz.write(data)
|
233
|
+
gz.close
|
234
|
+
sio.string
|
235
|
+
end
|
236
|
+
|
229
237
|
def upload_table_to_s3!(table)
|
230
238
|
now_epoch = Time.now.to_i.to_s
|
231
|
-
filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
|
239
|
+
filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv.gz"
|
232
240
|
|
233
|
-
table_csv = self.data_as_csv_string(table.data, table.output_column_names)
|
241
|
+
table_csv = self.gzip(self.data_as_csv_string(table.data, table.output_column_names))
|
234
242
|
|
235
243
|
s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
|
236
244
|
self.s3_bucket, self.s3_region)
|
237
245
|
s3_obj.upload!
|
238
|
-
|
246
|
+
s3_obj
|
239
247
|
end
|
240
248
|
|
241
249
|
def finish_fully_reloading_table!(table)
|
data/lib/dataduck/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|