bricolage-streamingload 0.8.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bddfd0337158c01e5acb0fac9ff9d4b3e152029
|
4
|
+
data.tar.gz: 7243c54d80c5bcd4c8e157fd8d5a14e66e629abe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e85208c4fcbf1a199169e75b40490181127fbaf7398e6ebb78daaead3f36704e76b7177ce43edfe404e5509e95f5a592ed8db4d4f7fa379efcca0d382fdedd6
|
7
|
+
data.tar.gz: eb8187f0b900731bdf21c4e3b3ffbb0ac24ad133bd6d7819b20e4b7eb24420e649bf690d43f0b0e3aa5caf496146e208a0d13fa33c6be72183b8469eca96bb1c
|
@@ -18,9 +18,10 @@ module Bricolage
|
|
18
18
|
|
19
19
|
class Job
|
20
20
|
|
21
|
-
def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
|
21
|
+
def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
22
22
|
@context = context
|
23
23
|
@ctl_ds = ctl_ds
|
24
|
+
@log_table = log_table
|
24
25
|
@task_id = task_id
|
25
26
|
@force = force
|
26
27
|
@logger = logger
|
@@ -53,14 +54,17 @@ module Bricolage
|
|
53
54
|
@logger.error ex.message
|
54
55
|
wait_for_connection('ctl', @ctl_ds) unless fail_fast
|
55
56
|
return false
|
56
|
-
rescue DataConnectionFailed
|
57
|
+
rescue DataConnectionFailed => ex
|
58
|
+
@logger.error ex.message
|
57
59
|
wait_for_connection('data', @data_ds) unless fail_fast
|
58
60
|
# FIXME: tmp: We don't know the transaction was succeeded or not in the Redshift, auto-retry is too dangerous.
|
59
61
|
#return false
|
60
62
|
return true
|
61
|
-
rescue JobFailure
|
63
|
+
rescue JobFailure => ex
|
64
|
+
@logger.error ex.message
|
62
65
|
return false
|
63
|
-
rescue JobError
|
66
|
+
rescue JobError => ex
|
67
|
+
@logger.error ex.message
|
64
68
|
return true
|
65
69
|
rescue Exception => ex
|
66
70
|
@logger.exception ex
|
@@ -101,14 +105,12 @@ module Bricolage
|
|
101
105
|
|
102
106
|
# FIXME: tmp: should be a failure, not an error.
|
103
107
|
rescue DataConnectionFailed => ex
|
104
|
-
@logger.error ex.message
|
105
108
|
ctl.open {
|
106
109
|
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
107
110
|
}
|
108
111
|
raise
|
109
112
|
|
110
113
|
rescue JobFailure => ex
|
111
|
-
@logger.error ex.message
|
112
114
|
ctl.open {
|
113
115
|
fail_count = ctl.fail_count(@task_id)
|
114
116
|
final_retry = (fail_count >= MAX_RETRY)
|
@@ -118,7 +120,6 @@ module Bricolage
|
|
118
120
|
}
|
119
121
|
raise
|
120
122
|
rescue JobError => ex
|
121
|
-
@logger.error ex.message
|
122
123
|
ctl.open {
|
123
124
|
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
124
125
|
}
|
@@ -138,9 +139,9 @@ module Bricolage
|
|
138
139
|
@manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
|
139
140
|
DataConnection.open(params.ds, @logger) {|data|
|
140
141
|
if params.enable_work_table?
|
141
|
-
data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
|
142
|
+
data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source, @log_table, job_id
|
142
143
|
else
|
143
|
-
data.load_objects params.dest_table, @manifest, params.load_options_string
|
144
|
+
data.load_objects params.dest_table, @manifest, params.load_options_string, @log_table, job_id
|
144
145
|
end
|
145
146
|
}
|
146
147
|
end
|
@@ -192,18 +193,26 @@ module Bricolage
|
|
192
193
|
raise DataConnectionFailed, "data connection failed: #{ex.message}"
|
193
194
|
end
|
194
195
|
|
195
|
-
def load_with_work_table(work_table, manifest, options, sql_source)
|
196
|
+
def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
|
196
197
|
@connection.transaction {|txn|
|
197
198
|
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
198
199
|
# from the second time. So don't worry about DELETE cost here.
|
199
200
|
@connection.execute("delete from #{work_table}")
|
200
|
-
|
201
|
+
execute_copy work_table, manifest, options
|
201
202
|
@connection.execute sql_source
|
203
|
+
write_load_log log_table, job_id
|
202
204
|
txn.truncate_and_commit work_table
|
203
205
|
}
|
204
206
|
end
|
205
207
|
|
206
|
-
def load_objects(dest_table, manifest, options)
|
208
|
+
def load_objects(dest_table, manifest, options, log_table, job_id)
|
209
|
+
@connection.transaction {|txn|
|
210
|
+
execute_copy dest_table, manifest, options
|
211
|
+
write_load_log log_table, job_id
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
def execute_copy(dest_table, manifest, options)
|
207
216
|
@connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
|
208
217
|
copy #{dest_table}
|
209
218
|
from #{s manifest.url}
|
@@ -217,6 +226,10 @@ module Bricolage
|
|
217
226
|
@logger.info "load succeeded: #{manifest.url}"
|
218
227
|
end
|
219
228
|
|
229
|
+
def write_load_log(log_table, job_id)
|
230
|
+
@connection.execute("insert into #{log_table} (job_id, finish_time) values (#{job_id}, current_timestamp)")
|
231
|
+
end
|
232
|
+
|
220
233
|
end # class DataConnection
|
221
234
|
|
222
235
|
|
@@ -30,6 +30,7 @@ module Bricolage
|
|
30
30
|
|
31
31
|
ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
|
32
32
|
task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
|
33
|
+
log_table = config.fetch('log-table', 'strload_load_logs')
|
33
34
|
service_logger =
|
34
35
|
if config.key?('alert-level')
|
35
36
|
new_alerting_logger(ctx, config)
|
@@ -40,6 +41,7 @@ module Bricolage
|
|
40
41
|
task_handler = new(
|
41
42
|
context: ctx,
|
42
43
|
ctl_ds: ctl_ds,
|
44
|
+
log_table: log_table,
|
43
45
|
task_queue: task_queue,
|
44
46
|
working_dir: opts.working_dir,
|
45
47
|
logger: service_logger,
|
@@ -90,9 +92,10 @@ module Bricolage
|
|
90
92
|
# ignore
|
91
93
|
end
|
92
94
|
|
93
|
-
def initialize(context:, ctl_ds:, task_queue:, working_dir:, logger:, job_class: Job)
|
95
|
+
def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
94
96
|
@ctx = context
|
95
97
|
@ctl_ds = ctl_ds
|
98
|
+
@log_table = log_table
|
96
99
|
@task_queue = task_queue
|
97
100
|
@working_dir = working_dir
|
98
101
|
@logger = logger
|
@@ -102,7 +105,7 @@ module Bricolage
|
|
102
105
|
attr_reader :logger
|
103
106
|
|
104
107
|
def execute_task_by_id(task_id, force: false)
|
105
|
-
job =
|
108
|
+
job = new_job(task_id, force)
|
106
109
|
job.execute(fail_fast: true)
|
107
110
|
end
|
108
111
|
|
@@ -119,7 +122,7 @@ module Bricolage
|
|
119
122
|
# message handler
|
120
123
|
def handle_streaming_load_v3(t)
|
121
124
|
Dir.chdir(@working_dir) {
|
122
|
-
job =
|
125
|
+
job = new_job(t.task_id, t.force?)
|
123
126
|
if job.execute
|
124
127
|
@task_queue.delete_message(t)
|
125
128
|
end
|
@@ -128,6 +131,10 @@ module Bricolage
|
|
128
131
|
@logger.exception ex
|
129
132
|
end
|
130
133
|
|
134
|
+
def new_job(task_id, force)
|
135
|
+
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
136
|
+
end
|
137
|
+
|
131
138
|
def job_class
|
132
139
|
@job_class ||= Job
|
133
140
|
end
|
@@ -27,8 +27,13 @@ module Bricolage
|
|
27
27
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
28
28
|
job.execute_task
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
assert_equal [
|
31
|
+
"begin transaction;",
|
32
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
33
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
34
|
+
"commit;"
|
35
|
+
], job.data_ds.sql_list
|
36
|
+
|
32
37
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
33
38
|
assert_equal 1, job_row['task_id'].to_i
|
34
39
|
assert_equal job.process_id, job_row['process_id']
|
@@ -48,11 +53,14 @@ module Bricolage
|
|
48
53
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
49
54
|
job.execute_task
|
50
55
|
|
51
|
-
assert_equal
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
+
assert_equal [
|
57
|
+
"begin transaction;",
|
58
|
+
"delete from testschema.with_work_table_wk",
|
59
|
+
"copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
60
|
+
"insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
|
61
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
62
|
+
"truncate testschema.with_work_table_wk;"
|
63
|
+
], job.data_ds.sql_list
|
56
64
|
|
57
65
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
58
66
|
assert_equal 11, job_row['task_id'].to_i
|
@@ -103,8 +111,12 @@ module Bricolage
|
|
103
111
|
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
|
104
112
|
job.execute_task
|
105
113
|
|
106
|
-
|
107
|
-
|
114
|
+
assert_equal [
|
115
|
+
"begin transaction;",
|
116
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
117
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
118
|
+
"commit;"
|
119
|
+
], job.data_ds.sql_list
|
108
120
|
|
109
121
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
110
122
|
assert_equal 11, job_row['task_id'].to_i
|
@@ -127,8 +139,11 @@ module Bricolage
|
|
127
139
|
assert_raise(JobFailure) {
|
128
140
|
job.execute_task
|
129
141
|
}
|
130
|
-
|
131
|
-
|
142
|
+
assert_equal [
|
143
|
+
"begin transaction;",
|
144
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
145
|
+
"abort;"
|
146
|
+
], job.data_ds.sql_list
|
132
147
|
|
133
148
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
134
149
|
assert_equal 11, job_row['task_id'].to_i
|
@@ -153,8 +168,11 @@ module Bricolage
|
|
153
168
|
assert_raise(JobFailure) {
|
154
169
|
job.execute_task
|
155
170
|
}
|
156
|
-
|
157
|
-
|
171
|
+
assert_equal [
|
172
|
+
"begin transaction;",
|
173
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
174
|
+
"abort;"
|
175
|
+
], job.data_ds.sql_list
|
158
176
|
|
159
177
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
160
178
|
assert_equal 11, job_row['task_id'].to_i
|
@@ -183,8 +201,12 @@ module Bricolage
|
|
183
201
|
assert_raise(JobCancelled) {
|
184
202
|
job.execute_task
|
185
203
|
}
|
186
|
-
|
187
|
-
|
204
|
+
assert_equal [
|
205
|
+
"begin transaction;",
|
206
|
+
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
207
|
+
"abort;"
|
208
|
+
], job.data_ds.sql_list
|
209
|
+
|
188
210
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
189
211
|
assert_equal 11, job_row['task_id'].to_i
|
190
212
|
assert_equal job.process_id, job_row['process_id']
|
@@ -206,7 +228,12 @@ module Bricolage
|
|
206
228
|
assert_raise(JobError) {
|
207
229
|
job.execute_task
|
208
230
|
}
|
209
|
-
assert_equal
|
231
|
+
assert_equal [
|
232
|
+
"begin transaction;",
|
233
|
+
"copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
234
|
+
"abort;"
|
235
|
+
], job.data_ds.sql_list
|
236
|
+
|
210
237
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
211
238
|
assert_equal 11, job_row['task_id'].to_i
|
212
239
|
assert_equal job.process_id, job_row['process_id']
|
@@ -227,7 +254,12 @@ module Bricolage
|
|
227
254
|
assert_raise(JobError) {
|
228
255
|
job.execute_task
|
229
256
|
}
|
230
|
-
assert_equal
|
257
|
+
assert_equal [
|
258
|
+
"begin transaction;",
|
259
|
+
"copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
260
|
+
"abort;"
|
261
|
+
], job.data_ds.sql_list
|
262
|
+
|
231
263
|
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
232
264
|
assert_equal 11, job_row['task_id'].to_i
|
233
265
|
assert_equal job.process_id, job_row['process_id']
|
@@ -379,21 +411,39 @@ module Bricolage
|
|
379
411
|
|
380
412
|
def transaction
|
381
413
|
@ds.issue_sql "begin transaction;"
|
382
|
-
|
414
|
+
txn = Transaction.new(@ds)
|
415
|
+
yield txn
|
416
|
+
rescue
|
417
|
+
txn.abort unless txn.committed?
|
418
|
+
raise
|
419
|
+
ensure
|
420
|
+
txn.commit unless txn.committed?
|
383
421
|
end
|
384
422
|
end
|
385
423
|
|
386
424
|
class Transaction
|
387
425
|
def initialize(ds)
|
388
426
|
@ds = ds
|
427
|
+
@commit = false
|
428
|
+
end
|
429
|
+
|
430
|
+
def committed?
|
431
|
+
@commit
|
389
432
|
end
|
390
433
|
|
391
434
|
def commit
|
392
435
|
@ds.issue_sql "commit;"
|
436
|
+
@commit = true
|
437
|
+
end
|
438
|
+
|
439
|
+
def abort
|
440
|
+
@ds.issue_sql "abort;"
|
441
|
+
@commit = true
|
393
442
|
end
|
394
443
|
|
395
444
|
def truncate_and_commit(table)
|
396
445
|
@ds.issue_sql "truncate #{table};"
|
446
|
+
@commit = true
|
397
447
|
end
|
398
448
|
end
|
399
449
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage-streamingload
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-10-
|
12
|
+
date: 2016-10-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bricolage
|