bricolage-streamingload 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a46109dbac16f28b807d4e2b29808d254ebe209
|
4
|
+
data.tar.gz: f97bf6c58057d05f89f6fe36b1b23b18b5b1d089
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4bc1033f8c06cb6f56a782129d1927919f3689828710182494a03edccc750b9dd4ab915ba7efc7f79255e754d36f4c466ebd6189fce9de1dee6985b1b6cfecce
|
7
|
+
data.tar.gz: aef6bd65662f942641ea2701880c41e28c88caa8dcb9aab9e02c93a2d76d28fdd0aec28dc0c4d4b159a3c2098520da8cfad27f9d0cd1824ae40428c3f5b91988
|
@@ -18,9 +18,10 @@ module Bricolage
|
|
18
18
|
|
19
19
|
class Job
|
20
20
|
|
21
|
-
def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
21
|
+
def initialize(context:, ctl_ds:, data_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
22
22
|
@context = context
|
23
23
|
@ctl_ds = ctl_ds
|
24
|
+
@data_ds = data_ds
|
24
25
|
@log_table = log_table
|
25
26
|
@task_id = task_id
|
26
27
|
@force = force
|
@@ -28,7 +29,6 @@ module Bricolage
|
|
28
29
|
|
29
30
|
@task = nil
|
30
31
|
@job_id = nil
|
31
|
-
@data_ds = nil
|
32
32
|
@manifest = nil
|
33
33
|
end
|
34
34
|
|
@@ -57,9 +57,7 @@ module Bricolage
|
|
57
57
|
rescue DataConnectionFailed => ex
|
58
58
|
@logger.error ex.message
|
59
59
|
wait_for_connection('data', @data_ds) unless fail_fast
|
60
|
-
|
61
|
-
#return false
|
62
|
-
return true
|
60
|
+
return false
|
63
61
|
rescue JobFailure => ex
|
64
62
|
@logger.error ex.message
|
65
63
|
return false
|
@@ -87,6 +85,16 @@ module Bricolage
|
|
87
85
|
raise JobDefered, "defered: task_id=#{@task_id}"
|
88
86
|
end
|
89
87
|
|
88
|
+
if @task.unknown_state?
|
89
|
+
true_status = DataConnection.open(@data_ds, @logger) {|data|
|
90
|
+
data.get_job_status(@log_table, @task.last_job_id)
|
91
|
+
}
|
92
|
+
@logger.info "fixiating unknown job status: job_id=#{@task.last_job_id}, status=(unknown->#{true_status})"
|
93
|
+
@task.fix_last_job_status true_status
|
94
|
+
ctl.fix_job_status @task.last_job_id, true_status
|
95
|
+
@logger.info "job status fixed."
|
96
|
+
end
|
97
|
+
|
90
98
|
@job_id = ctl.begin_job(@task_id, @process_id, @force)
|
91
99
|
unless @job_id
|
92
100
|
@logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
|
@@ -102,17 +110,14 @@ module Bricolage
|
|
102
110
|
}
|
103
111
|
rescue ControlConnectionFailed
|
104
112
|
raise
|
105
|
-
|
106
|
-
# FIXME: tmp: should be a failure, not an error.
|
107
113
|
rescue DataConnectionFailed => ex
|
108
114
|
ctl.open {
|
109
|
-
ctl.abort_job job_id, '
|
115
|
+
ctl.abort_job job_id, 'unknown', ex.message.lines.first.strip
|
110
116
|
}
|
111
117
|
raise
|
112
|
-
|
113
118
|
rescue JobFailure => ex
|
114
119
|
ctl.open {
|
115
|
-
fail_count =
|
120
|
+
fail_count = @task.failure_count
|
116
121
|
final_retry = (fail_count >= MAX_RETRY)
|
117
122
|
retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
|
118
123
|
ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
|
@@ -193,6 +198,11 @@ module Bricolage
|
|
193
198
|
raise DataConnectionFailed, "data connection failed: #{ex.message}"
|
194
199
|
end
|
195
200
|
|
201
|
+
def get_job_status(log_table, job_id)
|
202
|
+
count = @connection.query_value("select count(*) from #{log_table} where job_id = #{job_id}")
|
203
|
+
count.to_i > 0 ? 'success' : 'failure'
|
204
|
+
end
|
205
|
+
|
196
206
|
def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
|
197
207
|
@connection.transaction {|txn|
|
198
208
|
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
@@ -255,7 +265,32 @@ module Bricolage
|
|
255
265
|
raise ControlConnectionFailed, "control connection failed: #{ex.message}"
|
256
266
|
end
|
257
267
|
|
258
|
-
TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
|
268
|
+
TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls, :jobs)
|
269
|
+
class TaskInfo
|
270
|
+
def unknown_state?
|
271
|
+
return false if jobs.empty?
|
272
|
+
jobs.last.status == 'unknown'
|
273
|
+
end
|
274
|
+
|
275
|
+
def last_job_id
|
276
|
+
return nil if jobs.empty?
|
277
|
+
jobs.last.job_id
|
278
|
+
end
|
279
|
+
|
280
|
+
def fix_last_job_status(st)
|
281
|
+
jobs.last.status = st unless jobs.empty?
|
282
|
+
end
|
283
|
+
|
284
|
+
def failure_count
|
285
|
+
@failure_count ||= begin
|
286
|
+
statuses = jobs.map(&:status)
|
287
|
+
statuses.delete('duplicated')
|
288
|
+
last_succ = statuses.rindex('success')
|
289
|
+
statuses[0..last_succ] = [] if last_succ
|
290
|
+
statuses.size
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
259
294
|
|
260
295
|
def load_task(task_id)
|
261
296
|
rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
|
@@ -277,10 +312,29 @@ module Bricolage
|
|
277
312
|
rec['schema_name'],
|
278
313
|
rec['table_name'],
|
279
314
|
(rec['disabled'] != 'f'),
|
280
|
-
load_object_urls(task_id)
|
315
|
+
load_object_urls(task_id),
|
316
|
+
load_jobs(task_id)
|
281
317
|
)
|
282
318
|
end
|
283
319
|
|
320
|
+
def load_jobs(task_id)
|
321
|
+
records = @connection.query_rows(<<-EndSQL)
|
322
|
+
select
|
323
|
+
job_id
|
324
|
+
, status
|
325
|
+
from
|
326
|
+
strload_jobs
|
327
|
+
where
|
328
|
+
task_id = #{task_id}
|
329
|
+
order by
|
330
|
+
start_time
|
331
|
+
;
|
332
|
+
EndSQL
|
333
|
+
records.map {|rec| JobInfo.new(rec['job_id'].to_i, rec['status']) }
|
334
|
+
end
|
335
|
+
|
336
|
+
JobInfo = Struct.new(:job_id, :status, :start_time)
|
337
|
+
|
284
338
|
def load_object_urls(task_id)
|
285
339
|
urls = @connection.query_values(<<-EndSQL)
|
286
340
|
select
|
@@ -296,6 +350,20 @@ module Bricolage
|
|
296
350
|
urls
|
297
351
|
end
|
298
352
|
|
353
|
+
def fix_job_status(job_id, status)
|
354
|
+
@connection.update(<<-EndSQL)
|
355
|
+
update
|
356
|
+
strload_jobs
|
357
|
+
set
|
358
|
+
status = #{s status}
|
359
|
+
, message = 'status fixed: ' || message
|
360
|
+
where
|
361
|
+
job_id = #{job_id}
|
362
|
+
and status = 'unknown'
|
363
|
+
;
|
364
|
+
EndSQL
|
365
|
+
end
|
366
|
+
|
299
367
|
def begin_job(task_id, process_id, force)
|
300
368
|
job_id = @connection.query_value(<<-EndSQL)
|
301
369
|
insert into strload_jobs
|
@@ -320,22 +388,6 @@ module Bricolage
|
|
320
388
|
return job_id ? job_id.to_i : nil
|
321
389
|
end
|
322
390
|
|
323
|
-
def fail_count(task_id)
|
324
|
-
statuses = @connection.query_values(<<-EndSQL)
|
325
|
-
select
|
326
|
-
j.status
|
327
|
-
from
|
328
|
-
strload_tasks t
|
329
|
-
inner join strload_jobs j using (task_id)
|
330
|
-
where
|
331
|
-
t.task_id = #{task_id}
|
332
|
-
order by
|
333
|
-
j.job_id desc
|
334
|
-
EndSQL
|
335
|
-
statuses.shift if statuses.first == 'running' # current job
|
336
|
-
statuses.take_while {|st| %w[failure error].include?(st) }.size
|
337
|
-
end
|
338
|
-
|
339
391
|
def commit_job(job_id, message = nil)
|
340
392
|
@connection.transaction {|txn|
|
341
393
|
write_job_result job_id, 'success', (message || '')
|
@@ -29,6 +29,7 @@ module Bricolage
|
|
29
29
|
ctx = Context.for_application(opts.working_dir, environment: opts.environment, logger: logger)
|
30
30
|
|
31
31
|
ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
|
32
|
+
data_ds = ctx.get_data_source('sql', config.fetch('redshift-ds', 'db_data'))
|
32
33
|
task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
|
33
34
|
log_table = config.fetch('log-table', 'strload_load_logs')
|
34
35
|
service_logger =
|
@@ -41,6 +42,7 @@ module Bricolage
|
|
41
42
|
task_handler = new(
|
42
43
|
context: ctx,
|
43
44
|
ctl_ds: ctl_ds,
|
45
|
+
data_ds: data_ds,
|
44
46
|
log_table: log_table,
|
45
47
|
task_queue: task_queue,
|
46
48
|
working_dir: opts.working_dir,
|
@@ -92,9 +94,10 @@ module Bricolage
|
|
92
94
|
# ignore
|
93
95
|
end
|
94
96
|
|
95
|
-
def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
97
|
+
def initialize(context:, ctl_ds:, data_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
96
98
|
@ctx = context
|
97
99
|
@ctl_ds = ctl_ds
|
100
|
+
@data_ds = data_ds
|
98
101
|
@log_table = log_table
|
99
102
|
@task_queue = task_queue
|
100
103
|
@working_dir = working_dir
|
@@ -132,7 +135,7 @@ module Bricolage
|
|
132
135
|
end
|
133
136
|
|
134
137
|
def new_job(task_id, force)
|
135
|
-
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
138
|
+
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, data_ds: data_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
136
139
|
end
|
137
140
|
|
138
141
|
def job_class
|
@@ -16,7 +16,7 @@ module Bricolage
|
|
16
16
|
class TestJob < Test::Unit::TestCase
|
17
17
|
|
18
18
|
test "execute_task" do
|
19
|
-
setup_context {|
|
19
|
+
setup_context {|db|
|
20
20
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
21
21
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
22
22
|
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
@@ -24,7 +24,7 @@ module Bricolage
|
|
24
24
|
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
25
25
|
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
26
26
|
|
27
|
-
job =
|
27
|
+
job = new_job(task_id: 1, force: false)
|
28
28
|
job.execute_task
|
29
29
|
|
30
30
|
assert_equal [
|
@@ -42,7 +42,7 @@ module Bricolage
|
|
42
42
|
end
|
43
43
|
|
44
44
|
test "execute_task (with work table)" do
|
45
|
-
setup_context {|
|
45
|
+
setup_context {|db|
|
46
46
|
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
47
47
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
48
48
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -50,7 +50,7 @@ module Bricolage
|
|
50
50
|
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
51
51
|
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
52
52
|
|
53
|
-
job =
|
53
|
+
job = new_job(task_id: 11, force: false)
|
54
54
|
job.execute_task
|
55
55
|
|
56
56
|
assert_equal [
|
@@ -70,11 +70,11 @@ module Bricolage
|
|
70
70
|
end
|
71
71
|
|
72
72
|
test "execute_task (disabled)" do
|
73
|
-
setup_context {|
|
73
|
+
setup_context {|db|
|
74
74
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
75
75
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
76
76
|
|
77
|
-
job =
|
77
|
+
job = new_job(task_id: 1, force: false)
|
78
78
|
assert_raise(JobDefered) {
|
79
79
|
job.execute_task
|
80
80
|
}
|
@@ -84,7 +84,7 @@ module Bricolage
|
|
84
84
|
end
|
85
85
|
|
86
86
|
test "execute_task (duplicated)" do
|
87
|
-
setup_context {|
|
87
|
+
setup_context {|db|
|
88
88
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
89
89
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
90
90
|
db.insert_into 'strload_jobs',
|
@@ -92,7 +92,7 @@ module Bricolage
|
|
92
92
|
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
93
93
|
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
94
94
|
|
95
|
-
job =
|
95
|
+
job = new_job(task_id: 1, force: false)
|
96
96
|
assert_raise(JobDuplicated) {
|
97
97
|
job.execute_task
|
98
98
|
}
|
@@ -100,7 +100,7 @@ module Bricolage
|
|
100
100
|
end
|
101
101
|
|
102
102
|
test "execute_task (duplicated but forced)" do
|
103
|
-
setup_context {|
|
103
|
+
setup_context {|db|
|
104
104
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
105
105
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
106
106
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -108,7 +108,7 @@ module Bricolage
|
|
108
108
|
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
109
109
|
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
110
110
|
|
111
|
-
job =
|
111
|
+
job = new_job(task_id: 11, force: true)
|
112
112
|
job.execute_task
|
113
113
|
|
114
114
|
assert_equal [
|
@@ -127,7 +127,7 @@ module Bricolage
|
|
127
127
|
end
|
128
128
|
|
129
129
|
test "execute_task (load fails / first time)" do
|
130
|
-
setup_context {|
|
130
|
+
setup_context {|db|
|
131
131
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
132
132
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
133
133
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -135,7 +135,7 @@ module Bricolage
|
|
135
135
|
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
136
136
|
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
137
137
|
|
138
|
-
job =
|
138
|
+
job = new_job(task_id: 11, force: false)
|
139
139
|
assert_raise(JobFailure) {
|
140
140
|
job.execute_task
|
141
141
|
}
|
@@ -153,7 +153,7 @@ module Bricolage
|
|
153
153
|
end
|
154
154
|
|
155
155
|
test "execute_task (load fails / nth time)" do
|
156
|
-
setup_context {|
|
156
|
+
setup_context {|db|
|
157
157
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
158
158
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
159
159
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -164,7 +164,7 @@ module Bricolage
|
|
164
164
|
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
165
165
|
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
166
166
|
|
167
|
-
job =
|
167
|
+
job = new_job(task_id: 11, force: false)
|
168
168
|
assert_raise(JobFailure) {
|
169
169
|
job.execute_task
|
170
170
|
}
|
@@ -183,7 +183,7 @@ module Bricolage
|
|
183
183
|
end
|
184
184
|
|
185
185
|
test "execute_task (too many retry)" do
|
186
|
-
setup_context {|
|
186
|
+
setup_context {|db|
|
187
187
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
188
188
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
189
189
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -197,7 +197,7 @@ module Bricolage
|
|
197
197
|
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
198
198
|
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
199
199
|
|
200
|
-
job =
|
200
|
+
job = new_job(task_id: 11, force: false)
|
201
201
|
assert_raise(JobCancelled) {
|
202
202
|
job.execute_task
|
203
203
|
}
|
@@ -216,7 +216,7 @@ module Bricolage
|
|
216
216
|
end
|
217
217
|
|
218
218
|
test "execute_task (job error)" do
|
219
|
-
setup_context {|
|
219
|
+
setup_context {|db|
|
220
220
|
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
221
221
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
222
222
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -224,7 +224,7 @@ module Bricolage
|
|
224
224
|
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
225
225
|
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
226
226
|
|
227
|
-
job =
|
227
|
+
job = new_job(task_id: 11, force: false)
|
228
228
|
assert_raise(JobError) {
|
229
229
|
job.execute_task
|
230
230
|
}
|
@@ -242,15 +242,15 @@ module Bricolage
|
|
242
242
|
end
|
243
243
|
|
244
244
|
test "execute_task (unexpected error)" do
|
245
|
-
setup_context {|
|
245
|
+
setup_context {|db|
|
246
246
|
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
247
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1,
|
247
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
248
248
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
249
249
|
db.insert_into 'strload_objects',
|
250
250
|
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
251
251
|
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
252
252
|
|
253
|
-
job =
|
253
|
+
job = new_job(task_id: 11, force: false)
|
254
254
|
assert_raise(JobError) {
|
255
255
|
job.execute_task
|
256
256
|
}
|
@@ -267,16 +267,78 @@ module Bricolage
|
|
267
267
|
}
|
268
268
|
end
|
269
269
|
|
270
|
+
test "execute_task (unknown status, really=success)" do
|
271
|
+
setup_context {|db|
|
272
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
273
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
274
|
+
db.insert_into 'strload_jobs',
|
275
|
+
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
276
|
+
@data_ds.provide_job_status 101, true
|
277
|
+
|
278
|
+
job = new_job(task_id: 11, force: false)
|
279
|
+
assert_raise(JobDuplicated) {
|
280
|
+
job.execute_task
|
281
|
+
}
|
282
|
+
|
283
|
+
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
284
|
+
assert_equal 'success', job_row['status']
|
285
|
+
}
|
286
|
+
end
|
287
|
+
|
288
|
+
test "execute_task (unknown status, really=failure)" do
|
289
|
+
setup_context {|db|
|
290
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
291
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
292
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
293
|
+
db.insert_into 'strload_objects',
|
294
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
295
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
296
|
+
db.insert_into 'strload_jobs',
|
297
|
+
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
298
|
+
@data_ds.provide_job_status 101, false
|
299
|
+
|
300
|
+
job = new_job(task_id: 11, force: false)
|
301
|
+
job.execute_task
|
302
|
+
|
303
|
+
assert_equal [
|
304
|
+
"begin transaction;",
|
305
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
306
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
307
|
+
"commit;"
|
308
|
+
], job.data_ds.sql_list
|
309
|
+
|
310
|
+
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
311
|
+
assert_equal 'failure', job_row['status']
|
312
|
+
|
313
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
314
|
+
assert_equal 11, job_row['task_id'].to_i
|
315
|
+
assert_equal job.process_id, job_row['process_id']
|
316
|
+
assert_equal 'success', job_row['status']
|
317
|
+
}
|
318
|
+
end
|
319
|
+
|
270
320
|
def setup_context(verbose: false)
|
271
|
-
ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
272
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
273
|
-
|
321
|
+
@ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
322
|
+
@ctl_ds = @ctx.get_data_source('sql', 'dwhctl')
|
323
|
+
@data_ds = @ctx.get_data_source('sql', 'db_data_mock')
|
324
|
+
@ctl_ds.open {|conn|
|
274
325
|
client = SQLClient.new(conn)
|
275
326
|
clear_all_tables(client)
|
276
|
-
yield
|
327
|
+
yield client
|
277
328
|
}
|
278
329
|
end
|
279
330
|
|
331
|
+
def new_job(task_id:, force:)
|
332
|
+
Job.new(
|
333
|
+
context: @ctx,
|
334
|
+
ctl_ds: @ctl_ds,
|
335
|
+
data_ds: @data_ds,
|
336
|
+
logger: @ctx.logger,
|
337
|
+
task_id: task_id,
|
338
|
+
force: force
|
339
|
+
)
|
340
|
+
end
|
341
|
+
|
280
342
|
# FIXME: database cleaner
|
281
343
|
def clear_all_tables(client)
|
282
344
|
client.truncate_tables %w[
|
@@ -373,6 +435,7 @@ module Bricolage
|
|
373
435
|
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
374
436
|
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
375
437
|
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
438
|
+
@job_status = {}
|
376
439
|
end
|
377
440
|
|
378
441
|
attr_reader :sql_list
|
@@ -400,11 +463,30 @@ module Bricolage
|
|
400
463
|
end
|
401
464
|
end
|
402
465
|
|
466
|
+
def provide_job_status(job_id, succeeded)
|
467
|
+
@job_status[job_id] = succeeded
|
468
|
+
end
|
469
|
+
|
470
|
+
def job_succeeded?(job_id)
|
471
|
+
raise "job status unregistered: job_id=#{job_id}" unless @job_status.key?(job_id)
|
472
|
+
@job_status[job_id]
|
473
|
+
end
|
474
|
+
|
403
475
|
class Connection
|
404
476
|
def initialize(ds)
|
405
477
|
@ds = ds
|
406
478
|
end
|
407
479
|
|
480
|
+
def query_value(sql)
|
481
|
+
case sql
|
482
|
+
when /\bstrload_load_logs where job_id = (\d+)/
|
483
|
+
job_id = $1.to_i
|
484
|
+
@ds.job_succeeded?(job_id) ? 1 : 0
|
485
|
+
else
|
486
|
+
raise "unknown query: #{sql}"
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
408
490
|
def execute(sql)
|
409
491
|
@ds.issue_sql sql
|
410
492
|
end
|
@@ -482,7 +564,27 @@ module Bricolage
|
|
482
564
|
end
|
483
565
|
end
|
484
566
|
|
485
|
-
|
567
|
+
test "TaskInfo#failure_count" do
|
568
|
+
test_data = [
|
569
|
+
[%w[], 0],
|
570
|
+
[%w[success], 0],
|
571
|
+
[%w[failure], 1],
|
572
|
+
[%w[error], 1],
|
573
|
+
[%w[failure failure], 2],
|
574
|
+
[%w[failure error], 2],
|
575
|
+
[%w[failure success], 0],
|
576
|
+
[%w[success success], 0],
|
577
|
+
[%w[failure success failure], 1],
|
578
|
+
[%w[failure success failure success failure failure], 2]
|
579
|
+
]
|
580
|
+
c = Job::ControlConnection
|
581
|
+
test_data.each do |status_list, expected_count|
|
582
|
+
task = c::TaskInfo.new(nil,nil,nil,nil,nil,nil, status_list.map {|st| c::JobInfo.new(nil, st) })
|
583
|
+
assert_equal expected_count, task.failure_count
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
end # class TestJob
|
486
588
|
|
487
|
-
end
|
488
|
-
end
|
589
|
+
end # module StreamingLoad
|
590
|
+
end # module Bricolage
|