bricolage-streamingload 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8bddfd0337158c01e5acb0fac9ff9d4b3e152029
|
|
4
|
+
data.tar.gz: 7243c54d80c5bcd4c8e157fd8d5a14e66e629abe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3e85208c4fcbf1a199169e75b40490181127fbaf7398e6ebb78daaead3f36704e76b7177ce43edfe404e5509e95f5a592ed8db4d4f7fa379efcca0d382fdedd6
|
|
7
|
+
data.tar.gz: eb8187f0b900731bdf21c4e3b3ffbb0ac24ad133bd6d7819b20e4b7eb24420e649bf690d43f0b0e3aa5caf496146e208a0d13fa33c6be72183b8469eca96bb1c
|
|
@@ -18,9 +18,10 @@ module Bricolage
|
|
|
18
18
|
|
|
19
19
|
class Job
|
|
20
20
|
|
|
21
|
-
def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
|
|
21
|
+
def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
|
22
22
|
@context = context
|
|
23
23
|
@ctl_ds = ctl_ds
|
|
24
|
+
@log_table = log_table
|
|
24
25
|
@task_id = task_id
|
|
25
26
|
@force = force
|
|
26
27
|
@logger = logger
|
|
@@ -53,14 +54,17 @@ module Bricolage
|
|
|
53
54
|
@logger.error ex.message
|
|
54
55
|
wait_for_connection('ctl', @ctl_ds) unless fail_fast
|
|
55
56
|
return false
|
|
56
|
-
rescue DataConnectionFailed
|
|
57
|
+
rescue DataConnectionFailed => ex
|
|
58
|
+
@logger.error ex.message
|
|
57
59
|
wait_for_connection('data', @data_ds) unless fail_fast
|
|
58
60
|
# FIXME: tmp: We don't know the transaction was succeeded or not in the Redshift, auto-retry is too dangerous.
|
|
59
61
|
#return false
|
|
60
62
|
return true
|
|
61
|
-
rescue JobFailure
|
|
63
|
+
rescue JobFailure => ex
|
|
64
|
+
@logger.error ex.message
|
|
62
65
|
return false
|
|
63
|
-
rescue JobError
|
|
66
|
+
rescue JobError => ex
|
|
67
|
+
@logger.error ex.message
|
|
64
68
|
return true
|
|
65
69
|
rescue Exception => ex
|
|
66
70
|
@logger.exception ex
|
|
@@ -101,14 +105,12 @@ module Bricolage
|
|
|
101
105
|
|
|
102
106
|
# FIXME: tmp: should be a failure, not an error.
|
|
103
107
|
rescue DataConnectionFailed => ex
|
|
104
|
-
@logger.error ex.message
|
|
105
108
|
ctl.open {
|
|
106
109
|
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
|
107
110
|
}
|
|
108
111
|
raise
|
|
109
112
|
|
|
110
113
|
rescue JobFailure => ex
|
|
111
|
-
@logger.error ex.message
|
|
112
114
|
ctl.open {
|
|
113
115
|
fail_count = ctl.fail_count(@task_id)
|
|
114
116
|
final_retry = (fail_count >= MAX_RETRY)
|
|
@@ -118,7 +120,6 @@ module Bricolage
|
|
|
118
120
|
}
|
|
119
121
|
raise
|
|
120
122
|
rescue JobError => ex
|
|
121
|
-
@logger.error ex.message
|
|
122
123
|
ctl.open {
|
|
123
124
|
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
|
124
125
|
}
|
|
@@ -138,9 +139,9 @@ module Bricolage
|
|
|
138
139
|
@manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
|
|
139
140
|
DataConnection.open(params.ds, @logger) {|data|
|
|
140
141
|
if params.enable_work_table?
|
|
141
|
-
data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
|
|
142
|
+
data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source, @log_table, job_id
|
|
142
143
|
else
|
|
143
|
-
data.load_objects params.dest_table, @manifest, params.load_options_string
|
|
144
|
+
data.load_objects params.dest_table, @manifest, params.load_options_string, @log_table, job_id
|
|
144
145
|
end
|
|
145
146
|
}
|
|
146
147
|
end
|
|
@@ -192,18 +193,26 @@ module Bricolage
|
|
|
192
193
|
raise DataConnectionFailed, "data connection failed: #{ex.message}"
|
|
193
194
|
end
|
|
194
195
|
|
|
195
|
-
def load_with_work_table(work_table, manifest, options, sql_source)
|
|
196
|
+
def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
|
|
196
197
|
@connection.transaction {|txn|
|
|
197
198
|
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
|
198
199
|
# from the second time. So don't worry about DELETE cost here.
|
|
199
200
|
@connection.execute("delete from #{work_table}")
|
|
200
|
-
|
|
201
|
+
execute_copy work_table, manifest, options
|
|
201
202
|
@connection.execute sql_source
|
|
203
|
+
write_load_log log_table, job_id
|
|
202
204
|
txn.truncate_and_commit work_table
|
|
203
205
|
}
|
|
204
206
|
end
|
|
205
207
|
|
|
206
|
-
def load_objects(dest_table, manifest, options)
|
|
208
|
+
def load_objects(dest_table, manifest, options, log_table, job_id)
|
|
209
|
+
@connection.transaction {|txn|
|
|
210
|
+
execute_copy dest_table, manifest, options
|
|
211
|
+
write_load_log log_table, job_id
|
|
212
|
+
}
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def execute_copy(dest_table, manifest, options)
|
|
207
216
|
@connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
|
|
208
217
|
copy #{dest_table}
|
|
209
218
|
from #{s manifest.url}
|
|
@@ -217,6 +226,10 @@ module Bricolage
|
|
|
217
226
|
@logger.info "load succeeded: #{manifest.url}"
|
|
218
227
|
end
|
|
219
228
|
|
|
229
|
+
def write_load_log(log_table, job_id)
|
|
230
|
+
@connection.execute("insert into #{log_table} (job_id, finish_time) values (#{job_id}, current_timestamp)")
|
|
231
|
+
end
|
|
232
|
+
|
|
220
233
|
end # class DataConnection
|
|
221
234
|
|
|
222
235
|
|
|
@@ -30,6 +30,7 @@ module Bricolage
|
|
|
30
30
|
|
|
31
31
|
ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
|
|
32
32
|
task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
|
|
33
|
+
log_table = config.fetch('log-table', 'strload_load_logs')
|
|
33
34
|
service_logger =
|
|
34
35
|
if config.key?('alert-level')
|
|
35
36
|
new_alerting_logger(ctx, config)
|
|
@@ -40,6 +41,7 @@ module Bricolage
|
|
|
40
41
|
task_handler = new(
|
|
41
42
|
context: ctx,
|
|
42
43
|
ctl_ds: ctl_ds,
|
|
44
|
+
log_table: log_table,
|
|
43
45
|
task_queue: task_queue,
|
|
44
46
|
working_dir: opts.working_dir,
|
|
45
47
|
logger: service_logger,
|
|
@@ -90,9 +92,10 @@ module Bricolage
|
|
|
90
92
|
# ignore
|
|
91
93
|
end
|
|
92
94
|
|
|
93
|
-
def initialize(context:, ctl_ds:, task_queue:, working_dir:, logger:, job_class: Job)
|
|
95
|
+
def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
|
94
96
|
@ctx = context
|
|
95
97
|
@ctl_ds = ctl_ds
|
|
98
|
+
@log_table = log_table
|
|
96
99
|
@task_queue = task_queue
|
|
97
100
|
@working_dir = working_dir
|
|
98
101
|
@logger = logger
|
|
@@ -102,7 +105,7 @@ module Bricolage
|
|
|
102
105
|
attr_reader :logger
|
|
103
106
|
|
|
104
107
|
def execute_task_by_id(task_id, force: false)
|
|
105
|
-
job =
|
|
108
|
+
job = new_job(task_id, force)
|
|
106
109
|
job.execute(fail_fast: true)
|
|
107
110
|
end
|
|
108
111
|
|
|
@@ -119,7 +122,7 @@ module Bricolage
|
|
|
119
122
|
# message handler
|
|
120
123
|
def handle_streaming_load_v3(t)
|
|
121
124
|
Dir.chdir(@working_dir) {
|
|
122
|
-
job =
|
|
125
|
+
job = new_job(t.task_id, t.force?)
|
|
123
126
|
if job.execute
|
|
124
127
|
@task_queue.delete_message(t)
|
|
125
128
|
end
|
|
@@ -128,6 +131,10 @@ module Bricolage
|
|
|
128
131
|
@logger.exception ex
|
|
129
132
|
end
|
|
130
133
|
|
|
134
|
+
def new_job(task_id, force)
|
|
135
|
+
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
|
136
|
+
end
|
|
137
|
+
|
|
131
138
|
def job_class
|
|
132
139
|
@job_class ||= Job
|
|
133
140
|
end
|
|
@@ -27,8 +27,13 @@ module Bricolage
|
|
|
27
27
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
|
28
28
|
job.execute_task
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
assert_equal [
|
|
31
|
+
"begin transaction;",
|
|
32
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
33
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
|
34
|
+
"commit;"
|
|
35
|
+
], job.data_ds.sql_list
|
|
36
|
+
|
|
32
37
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
33
38
|
assert_equal 1, job_row['task_id'].to_i
|
|
34
39
|
assert_equal job.process_id, job_row['process_id']
|
|
@@ -48,11 +53,14 @@ module Bricolage
|
|
|
48
53
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
|
49
54
|
job.execute_task
|
|
50
55
|
|
|
51
|
-
assert_equal
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
assert_equal [
|
|
57
|
+
"begin transaction;",
|
|
58
|
+
"delete from testschema.with_work_table_wk",
|
|
59
|
+
"copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
60
|
+
"insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
|
|
61
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
|
62
|
+
"truncate testschema.with_work_table_wk;"
|
|
63
|
+
], job.data_ds.sql_list
|
|
56
64
|
|
|
57
65
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
58
66
|
assert_equal 11, job_row['task_id'].to_i
|
|
@@ -103,8 +111,12 @@ module Bricolage
|
|
|
103
111
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
|
|
104
112
|
job.execute_task
|
|
105
113
|
|
|
106
|
-
|
|
107
|
-
|
|
114
|
+
assert_equal [
|
|
115
|
+
"begin transaction;",
|
|
116
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
117
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
|
118
|
+
"commit;"
|
|
119
|
+
], job.data_ds.sql_list
|
|
108
120
|
|
|
109
121
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
110
122
|
assert_equal 11, job_row['task_id'].to_i
|
|
@@ -127,8 +139,11 @@ module Bricolage
|
|
|
127
139
|
assert_raise(JobFailure) {
|
|
128
140
|
job.execute_task
|
|
129
141
|
}
|
|
130
|
-
|
|
131
|
-
|
|
142
|
+
assert_equal [
|
|
143
|
+
"begin transaction;",
|
|
144
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
145
|
+
"abort;"
|
|
146
|
+
], job.data_ds.sql_list
|
|
132
147
|
|
|
133
148
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
134
149
|
assert_equal 11, job_row['task_id'].to_i
|
|
@@ -153,8 +168,11 @@ module Bricolage
|
|
|
153
168
|
assert_raise(JobFailure) {
|
|
154
169
|
job.execute_task
|
|
155
170
|
}
|
|
156
|
-
|
|
157
|
-
|
|
171
|
+
assert_equal [
|
|
172
|
+
"begin transaction;",
|
|
173
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
174
|
+
"abort;"
|
|
175
|
+
], job.data_ds.sql_list
|
|
158
176
|
|
|
159
177
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
160
178
|
assert_equal 11, job_row['task_id'].to_i
|
|
@@ -183,8 +201,12 @@ module Bricolage
|
|
|
183
201
|
assert_raise(JobCancelled) {
|
|
184
202
|
job.execute_task
|
|
185
203
|
}
|
|
186
|
-
|
|
187
|
-
|
|
204
|
+
assert_equal [
|
|
205
|
+
"begin transaction;",
|
|
206
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
207
|
+
"abort;"
|
|
208
|
+
], job.data_ds.sql_list
|
|
209
|
+
|
|
188
210
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
189
211
|
assert_equal 11, job_row['task_id'].to_i
|
|
190
212
|
assert_equal job.process_id, job_row['process_id']
|
|
@@ -206,7 +228,12 @@ module Bricolage
|
|
|
206
228
|
assert_raise(JobError) {
|
|
207
229
|
job.execute_task
|
|
208
230
|
}
|
|
209
|
-
assert_equal
|
|
231
|
+
assert_equal [
|
|
232
|
+
"begin transaction;",
|
|
233
|
+
"copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
234
|
+
"abort;"
|
|
235
|
+
], job.data_ds.sql_list
|
|
236
|
+
|
|
210
237
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
211
238
|
assert_equal 11, job_row['task_id'].to_i
|
|
212
239
|
assert_equal job.process_id, job_row['process_id']
|
|
@@ -227,7 +254,12 @@ module Bricolage
|
|
|
227
254
|
assert_raise(JobError) {
|
|
228
255
|
job.execute_task
|
|
229
256
|
}
|
|
230
|
-
assert_equal
|
|
257
|
+
assert_equal [
|
|
258
|
+
"begin transaction;",
|
|
259
|
+
"copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
260
|
+
"abort;"
|
|
261
|
+
], job.data_ds.sql_list
|
|
262
|
+
|
|
231
263
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
232
264
|
assert_equal 11, job_row['task_id'].to_i
|
|
233
265
|
assert_equal job.process_id, job_row['process_id']
|
|
@@ -379,21 +411,39 @@ module Bricolage
|
|
|
379
411
|
|
|
380
412
|
def transaction
|
|
381
413
|
@ds.issue_sql "begin transaction;"
|
|
382
|
-
|
|
414
|
+
txn = Transaction.new(@ds)
|
|
415
|
+
yield txn
|
|
416
|
+
rescue
|
|
417
|
+
txn.abort unless txn.committed?
|
|
418
|
+
raise
|
|
419
|
+
ensure
|
|
420
|
+
txn.commit unless txn.committed?
|
|
383
421
|
end
|
|
384
422
|
end
|
|
385
423
|
|
|
386
424
|
class Transaction
|
|
387
425
|
def initialize(ds)
|
|
388
426
|
@ds = ds
|
|
427
|
+
@commit = false
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
def committed?
|
|
431
|
+
@commit
|
|
389
432
|
end
|
|
390
433
|
|
|
391
434
|
def commit
|
|
392
435
|
@ds.issue_sql "commit;"
|
|
436
|
+
@commit = true
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def abort
|
|
440
|
+
@ds.issue_sql "abort;"
|
|
441
|
+
@commit = true
|
|
393
442
|
end
|
|
394
443
|
|
|
395
444
|
def truncate_and_commit(table)
|
|
396
445
|
@ds.issue_sql "truncate #{table};"
|
|
446
|
+
@commit = true
|
|
397
447
|
end
|
|
398
448
|
end
|
|
399
449
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bricolage-streamingload
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Minero Aoki
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2016-10-
|
|
12
|
+
date: 2016-10-20 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bricolage
|