bricolage-streamingload 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a46109dbac16f28b807d4e2b29808d254ebe209
|
4
|
+
data.tar.gz: f97bf6c58057d05f89f6fe36b1b23b18b5b1d089
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4bc1033f8c06cb6f56a782129d1927919f3689828710182494a03edccc750b9dd4ab915ba7efc7f79255e754d36f4c466ebd6189fce9de1dee6985b1b6cfecce
|
7
|
+
data.tar.gz: aef6bd65662f942641ea2701880c41e28c88caa8dcb9aab9e02c93a2d76d28fdd0aec28dc0c4d4b159a3c2098520da8cfad27f9d0cd1824ae40428c3f5b91988
|
@@ -18,9 +18,10 @@ module Bricolage
|
|
18
18
|
|
19
19
|
class Job
|
20
20
|
|
21
|
-
def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
21
|
+
def initialize(context:, ctl_ds:, data_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
|
22
22
|
@context = context
|
23
23
|
@ctl_ds = ctl_ds
|
24
|
+
@data_ds = data_ds
|
24
25
|
@log_table = log_table
|
25
26
|
@task_id = task_id
|
26
27
|
@force = force
|
@@ -28,7 +29,6 @@ module Bricolage
|
|
28
29
|
|
29
30
|
@task = nil
|
30
31
|
@job_id = nil
|
31
|
-
@data_ds = nil
|
32
32
|
@manifest = nil
|
33
33
|
end
|
34
34
|
|
@@ -57,9 +57,7 @@ module Bricolage
|
|
57
57
|
rescue DataConnectionFailed => ex
|
58
58
|
@logger.error ex.message
|
59
59
|
wait_for_connection('data', @data_ds) unless fail_fast
|
60
|
-
|
61
|
-
#return false
|
62
|
-
return true
|
60
|
+
return false
|
63
61
|
rescue JobFailure => ex
|
64
62
|
@logger.error ex.message
|
65
63
|
return false
|
@@ -87,6 +85,16 @@ module Bricolage
|
|
87
85
|
raise JobDefered, "defered: task_id=#{@task_id}"
|
88
86
|
end
|
89
87
|
|
88
|
+
if @task.unknown_state?
|
89
|
+
true_status = DataConnection.open(@data_ds, @logger) {|data|
|
90
|
+
data.get_job_status(@log_table, @task.last_job_id)
|
91
|
+
}
|
92
|
+
@logger.info "fixiating unknown job status: job_id=#{@task.last_job_id}, status=(unknown->#{true_status})"
|
93
|
+
@task.fix_last_job_status true_status
|
94
|
+
ctl.fix_job_status @task.last_job_id, true_status
|
95
|
+
@logger.info "job status fixed."
|
96
|
+
end
|
97
|
+
|
90
98
|
@job_id = ctl.begin_job(@task_id, @process_id, @force)
|
91
99
|
unless @job_id
|
92
100
|
@logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
|
@@ -102,17 +110,14 @@ module Bricolage
|
|
102
110
|
}
|
103
111
|
rescue ControlConnectionFailed
|
104
112
|
raise
|
105
|
-
|
106
|
-
# FIXME: tmp: should be a failure, not an error.
|
107
113
|
rescue DataConnectionFailed => ex
|
108
114
|
ctl.open {
|
109
|
-
ctl.abort_job job_id, '
|
115
|
+
ctl.abort_job job_id, 'unknown', ex.message.lines.first.strip
|
110
116
|
}
|
111
117
|
raise
|
112
|
-
|
113
118
|
rescue JobFailure => ex
|
114
119
|
ctl.open {
|
115
|
-
fail_count =
|
120
|
+
fail_count = @task.failure_count
|
116
121
|
final_retry = (fail_count >= MAX_RETRY)
|
117
122
|
retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
|
118
123
|
ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
|
@@ -193,6 +198,11 @@ module Bricolage
|
|
193
198
|
raise DataConnectionFailed, "data connection failed: #{ex.message}"
|
194
199
|
end
|
195
200
|
|
201
|
+
def get_job_status(log_table, job_id)
|
202
|
+
count = @connection.query_value("select count(*) from #{log_table} where job_id = #{job_id}")
|
203
|
+
count.to_i > 0 ? 'success' : 'failure'
|
204
|
+
end
|
205
|
+
|
196
206
|
def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
|
197
207
|
@connection.transaction {|txn|
|
198
208
|
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
@@ -255,7 +265,32 @@ module Bricolage
|
|
255
265
|
raise ControlConnectionFailed, "control connection failed: #{ex.message}"
|
256
266
|
end
|
257
267
|
|
258
|
-
TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
|
268
|
+
TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls, :jobs)
|
269
|
+
class TaskInfo
|
270
|
+
def unknown_state?
|
271
|
+
return false if jobs.empty?
|
272
|
+
jobs.last.status == 'unknown'
|
273
|
+
end
|
274
|
+
|
275
|
+
def last_job_id
|
276
|
+
return nil if jobs.empty?
|
277
|
+
jobs.last.job_id
|
278
|
+
end
|
279
|
+
|
280
|
+
def fix_last_job_status(st)
|
281
|
+
jobs.last.status = st unless jobs.empty?
|
282
|
+
end
|
283
|
+
|
284
|
+
def failure_count
|
285
|
+
@failure_count ||= begin
|
286
|
+
statuses = jobs.map(&:status)
|
287
|
+
statuses.delete('duplicated')
|
288
|
+
last_succ = statuses.rindex('success')
|
289
|
+
statuses[0..last_succ] = [] if last_succ
|
290
|
+
statuses.size
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
259
294
|
|
260
295
|
def load_task(task_id)
|
261
296
|
rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
|
@@ -277,10 +312,29 @@ module Bricolage
|
|
277
312
|
rec['schema_name'],
|
278
313
|
rec['table_name'],
|
279
314
|
(rec['disabled'] != 'f'),
|
280
|
-
load_object_urls(task_id)
|
315
|
+
load_object_urls(task_id),
|
316
|
+
load_jobs(task_id)
|
281
317
|
)
|
282
318
|
end
|
283
319
|
|
320
|
+
def load_jobs(task_id)
|
321
|
+
records = @connection.query_rows(<<-EndSQL)
|
322
|
+
select
|
323
|
+
job_id
|
324
|
+
, status
|
325
|
+
from
|
326
|
+
strload_jobs
|
327
|
+
where
|
328
|
+
task_id = #{task_id}
|
329
|
+
order by
|
330
|
+
start_time
|
331
|
+
;
|
332
|
+
EndSQL
|
333
|
+
records.map {|rec| JobInfo.new(rec['job_id'].to_i, rec['status']) }
|
334
|
+
end
|
335
|
+
|
336
|
+
JobInfo = Struct.new(:job_id, :status, :start_time)
|
337
|
+
|
284
338
|
def load_object_urls(task_id)
|
285
339
|
urls = @connection.query_values(<<-EndSQL)
|
286
340
|
select
|
@@ -296,6 +350,20 @@ module Bricolage
|
|
296
350
|
urls
|
297
351
|
end
|
298
352
|
|
353
|
+
def fix_job_status(job_id, status)
|
354
|
+
@connection.update(<<-EndSQL)
|
355
|
+
update
|
356
|
+
strload_jobs
|
357
|
+
set
|
358
|
+
status = #{s status}
|
359
|
+
, message = 'status fixed: ' || message
|
360
|
+
where
|
361
|
+
job_id = #{job_id}
|
362
|
+
and status = 'unknown'
|
363
|
+
;
|
364
|
+
EndSQL
|
365
|
+
end
|
366
|
+
|
299
367
|
def begin_job(task_id, process_id, force)
|
300
368
|
job_id = @connection.query_value(<<-EndSQL)
|
301
369
|
insert into strload_jobs
|
@@ -320,22 +388,6 @@ module Bricolage
|
|
320
388
|
return job_id ? job_id.to_i : nil
|
321
389
|
end
|
322
390
|
|
323
|
-
def fail_count(task_id)
|
324
|
-
statuses = @connection.query_values(<<-EndSQL)
|
325
|
-
select
|
326
|
-
j.status
|
327
|
-
from
|
328
|
-
strload_tasks t
|
329
|
-
inner join strload_jobs j using (task_id)
|
330
|
-
where
|
331
|
-
t.task_id = #{task_id}
|
332
|
-
order by
|
333
|
-
j.job_id desc
|
334
|
-
EndSQL
|
335
|
-
statuses.shift if statuses.first == 'running' # current job
|
336
|
-
statuses.take_while {|st| %w[failure error].include?(st) }.size
|
337
|
-
end
|
338
|
-
|
339
391
|
def commit_job(job_id, message = nil)
|
340
392
|
@connection.transaction {|txn|
|
341
393
|
write_job_result job_id, 'success', (message || '')
|
@@ -29,6 +29,7 @@ module Bricolage
|
|
29
29
|
ctx = Context.for_application(opts.working_dir, environment: opts.environment, logger: logger)
|
30
30
|
|
31
31
|
ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
|
32
|
+
data_ds = ctx.get_data_source('sql', config.fetch('redshift-ds', 'db_data'))
|
32
33
|
task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
|
33
34
|
log_table = config.fetch('log-table', 'strload_load_logs')
|
34
35
|
service_logger =
|
@@ -41,6 +42,7 @@ module Bricolage
|
|
41
42
|
task_handler = new(
|
42
43
|
context: ctx,
|
43
44
|
ctl_ds: ctl_ds,
|
45
|
+
data_ds: data_ds,
|
44
46
|
log_table: log_table,
|
45
47
|
task_queue: task_queue,
|
46
48
|
working_dir: opts.working_dir,
|
@@ -92,9 +94,10 @@ module Bricolage
|
|
92
94
|
# ignore
|
93
95
|
end
|
94
96
|
|
95
|
-
def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
97
|
+
def initialize(context:, ctl_ds:, data_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
|
96
98
|
@ctx = context
|
97
99
|
@ctl_ds = ctl_ds
|
100
|
+
@data_ds = data_ds
|
98
101
|
@log_table = log_table
|
99
102
|
@task_queue = task_queue
|
100
103
|
@working_dir = working_dir
|
@@ -132,7 +135,7 @@ module Bricolage
|
|
132
135
|
end
|
133
136
|
|
134
137
|
def new_job(task_id, force)
|
135
|
-
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
138
|
+
@job_class.new(context: @ctx, ctl_ds: @ctl_ds, data_ds: data_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
|
136
139
|
end
|
137
140
|
|
138
141
|
def job_class
|
@@ -16,7 +16,7 @@ module Bricolage
|
|
16
16
|
class TestJob < Test::Unit::TestCase
|
17
17
|
|
18
18
|
test "execute_task" do
|
19
|
-
setup_context {|
|
19
|
+
setup_context {|db|
|
20
20
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
21
21
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
22
22
|
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
@@ -24,7 +24,7 @@ module Bricolage
|
|
24
24
|
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
25
25
|
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
26
26
|
|
27
|
-
job =
|
27
|
+
job = new_job(task_id: 1, force: false)
|
28
28
|
job.execute_task
|
29
29
|
|
30
30
|
assert_equal [
|
@@ -42,7 +42,7 @@ module Bricolage
|
|
42
42
|
end
|
43
43
|
|
44
44
|
test "execute_task (with work table)" do
|
45
|
-
setup_context {|
|
45
|
+
setup_context {|db|
|
46
46
|
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
47
47
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
48
48
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -50,7 +50,7 @@ module Bricolage
|
|
50
50
|
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
51
51
|
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
52
52
|
|
53
|
-
job =
|
53
|
+
job = new_job(task_id: 11, force: false)
|
54
54
|
job.execute_task
|
55
55
|
|
56
56
|
assert_equal [
|
@@ -70,11 +70,11 @@ module Bricolage
|
|
70
70
|
end
|
71
71
|
|
72
72
|
test "execute_task (disabled)" do
|
73
|
-
setup_context {|
|
73
|
+
setup_context {|db|
|
74
74
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
75
75
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
76
76
|
|
77
|
-
job =
|
77
|
+
job = new_job(task_id: 1, force: false)
|
78
78
|
assert_raise(JobDefered) {
|
79
79
|
job.execute_task
|
80
80
|
}
|
@@ -84,7 +84,7 @@ module Bricolage
|
|
84
84
|
end
|
85
85
|
|
86
86
|
test "execute_task (duplicated)" do
|
87
|
-
setup_context {|
|
87
|
+
setup_context {|db|
|
88
88
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
89
89
|
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
90
90
|
db.insert_into 'strload_jobs',
|
@@ -92,7 +92,7 @@ module Bricolage
|
|
92
92
|
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
93
93
|
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
94
94
|
|
95
|
-
job =
|
95
|
+
job = new_job(task_id: 1, force: false)
|
96
96
|
assert_raise(JobDuplicated) {
|
97
97
|
job.execute_task
|
98
98
|
}
|
@@ -100,7 +100,7 @@ module Bricolage
|
|
100
100
|
end
|
101
101
|
|
102
102
|
test "execute_task (duplicated but forced)" do
|
103
|
-
setup_context {|
|
103
|
+
setup_context {|db|
|
104
104
|
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
105
105
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
106
106
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -108,7 +108,7 @@ module Bricolage
|
|
108
108
|
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
109
109
|
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
110
110
|
|
111
|
-
job =
|
111
|
+
job = new_job(task_id: 11, force: true)
|
112
112
|
job.execute_task
|
113
113
|
|
114
114
|
assert_equal [
|
@@ -127,7 +127,7 @@ module Bricolage
|
|
127
127
|
end
|
128
128
|
|
129
129
|
test "execute_task (load fails / first time)" do
|
130
|
-
setup_context {|
|
130
|
+
setup_context {|db|
|
131
131
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
132
132
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
133
133
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -135,7 +135,7 @@ module Bricolage
|
|
135
135
|
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
136
136
|
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
137
137
|
|
138
|
-
job =
|
138
|
+
job = new_job(task_id: 11, force: false)
|
139
139
|
assert_raise(JobFailure) {
|
140
140
|
job.execute_task
|
141
141
|
}
|
@@ -153,7 +153,7 @@ module Bricolage
|
|
153
153
|
end
|
154
154
|
|
155
155
|
test "execute_task (load fails / nth time)" do
|
156
|
-
setup_context {|
|
156
|
+
setup_context {|db|
|
157
157
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
158
158
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
159
159
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -164,7 +164,7 @@ module Bricolage
|
|
164
164
|
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
165
165
|
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
166
166
|
|
167
|
-
job =
|
167
|
+
job = new_job(task_id: 11, force: false)
|
168
168
|
assert_raise(JobFailure) {
|
169
169
|
job.execute_task
|
170
170
|
}
|
@@ -183,7 +183,7 @@ module Bricolage
|
|
183
183
|
end
|
184
184
|
|
185
185
|
test "execute_task (too many retry)" do
|
186
|
-
setup_context {|
|
186
|
+
setup_context {|db|
|
187
187
|
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
188
188
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
189
189
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -197,7 +197,7 @@ module Bricolage
|
|
197
197
|
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
198
198
|
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
199
199
|
|
200
|
-
job =
|
200
|
+
job = new_job(task_id: 11, force: false)
|
201
201
|
assert_raise(JobCancelled) {
|
202
202
|
job.execute_task
|
203
203
|
}
|
@@ -216,7 +216,7 @@ module Bricolage
|
|
216
216
|
end
|
217
217
|
|
218
218
|
test "execute_task (job error)" do
|
219
|
-
setup_context {|
|
219
|
+
setup_context {|db|
|
220
220
|
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
221
221
|
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
222
222
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
@@ -224,7 +224,7 @@ module Bricolage
|
|
224
224
|
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
225
225
|
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
226
226
|
|
227
|
-
job =
|
227
|
+
job = new_job(task_id: 11, force: false)
|
228
228
|
assert_raise(JobError) {
|
229
229
|
job.execute_task
|
230
230
|
}
|
@@ -242,15 +242,15 @@ module Bricolage
|
|
242
242
|
end
|
243
243
|
|
244
244
|
test "execute_task (unexpected error)" do
|
245
|
-
setup_context {|
|
245
|
+
setup_context {|db|
|
246
246
|
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
247
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1,
|
247
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
248
248
|
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
249
249
|
db.insert_into 'strload_objects',
|
250
250
|
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
251
251
|
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
252
252
|
|
253
|
-
job =
|
253
|
+
job = new_job(task_id: 11, force: false)
|
254
254
|
assert_raise(JobError) {
|
255
255
|
job.execute_task
|
256
256
|
}
|
@@ -267,16 +267,78 @@ module Bricolage
|
|
267
267
|
}
|
268
268
|
end
|
269
269
|
|
270
|
+
test "execute_task (unknown status, really=success)" do
|
271
|
+
setup_context {|db|
|
272
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
273
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
274
|
+
db.insert_into 'strload_jobs',
|
275
|
+
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
276
|
+
@data_ds.provide_job_status 101, true
|
277
|
+
|
278
|
+
job = new_job(task_id: 11, force: false)
|
279
|
+
assert_raise(JobDuplicated) {
|
280
|
+
job.execute_task
|
281
|
+
}
|
282
|
+
|
283
|
+
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
284
|
+
assert_equal 'success', job_row['status']
|
285
|
+
}
|
286
|
+
end
|
287
|
+
|
288
|
+
test "execute_task (unknown status, really=failure)" do
|
289
|
+
setup_context {|db|
|
290
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
291
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
292
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
293
|
+
db.insert_into 'strload_objects',
|
294
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
295
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
296
|
+
db.insert_into 'strload_jobs',
|
297
|
+
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
298
|
+
@data_ds.provide_job_status 101, false
|
299
|
+
|
300
|
+
job = new_job(task_id: 11, force: false)
|
301
|
+
job.execute_task
|
302
|
+
|
303
|
+
assert_equal [
|
304
|
+
"begin transaction;",
|
305
|
+
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
306
|
+
"insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
|
307
|
+
"commit;"
|
308
|
+
], job.data_ds.sql_list
|
309
|
+
|
310
|
+
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
311
|
+
assert_equal 'failure', job_row['status']
|
312
|
+
|
313
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
314
|
+
assert_equal 11, job_row['task_id'].to_i
|
315
|
+
assert_equal job.process_id, job_row['process_id']
|
316
|
+
assert_equal 'success', job_row['status']
|
317
|
+
}
|
318
|
+
end
|
319
|
+
|
270
320
|
def setup_context(verbose: false)
|
271
|
-
ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
272
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
273
|
-
|
321
|
+
@ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
322
|
+
@ctl_ds = @ctx.get_data_source('sql', 'dwhctl')
|
323
|
+
@data_ds = @ctx.get_data_source('sql', 'db_data_mock')
|
324
|
+
@ctl_ds.open {|conn|
|
274
325
|
client = SQLClient.new(conn)
|
275
326
|
clear_all_tables(client)
|
276
|
-
yield
|
327
|
+
yield client
|
277
328
|
}
|
278
329
|
end
|
279
330
|
|
331
|
+
def new_job(task_id:, force:)
|
332
|
+
Job.new(
|
333
|
+
context: @ctx,
|
334
|
+
ctl_ds: @ctl_ds,
|
335
|
+
data_ds: @data_ds,
|
336
|
+
logger: @ctx.logger,
|
337
|
+
task_id: task_id,
|
338
|
+
force: force
|
339
|
+
)
|
340
|
+
end
|
341
|
+
|
280
342
|
# FIXME: database cleaner
|
281
343
|
def clear_all_tables(client)
|
282
344
|
client.truncate_tables %w[
|
@@ -373,6 +435,7 @@ module Bricolage
|
|
373
435
|
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
374
436
|
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
375
437
|
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
438
|
+
@job_status = {}
|
376
439
|
end
|
377
440
|
|
378
441
|
attr_reader :sql_list
|
@@ -400,11 +463,30 @@ module Bricolage
|
|
400
463
|
end
|
401
464
|
end
|
402
465
|
|
466
|
+
def provide_job_status(job_id, succeeded)
|
467
|
+
@job_status[job_id] = succeeded
|
468
|
+
end
|
469
|
+
|
470
|
+
def job_succeeded?(job_id)
|
471
|
+
raise "job status unregistered: job_id=#{job_id}" unless @job_status.key?(job_id)
|
472
|
+
@job_status[job_id]
|
473
|
+
end
|
474
|
+
|
403
475
|
class Connection
|
404
476
|
def initialize(ds)
|
405
477
|
@ds = ds
|
406
478
|
end
|
407
479
|
|
480
|
+
def query_value(sql)
|
481
|
+
case sql
|
482
|
+
when /\bstrload_load_logs where job_id = (\d+)/
|
483
|
+
job_id = $1.to_i
|
484
|
+
@ds.job_succeeded?(job_id) ? 1 : 0
|
485
|
+
else
|
486
|
+
raise "unknown query: #{sql}"
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
408
490
|
def execute(sql)
|
409
491
|
@ds.issue_sql sql
|
410
492
|
end
|
@@ -482,7 +564,27 @@ module Bricolage
|
|
482
564
|
end
|
483
565
|
end
|
484
566
|
|
485
|
-
|
567
|
+
test "TaskInfo#failure_count" do
|
568
|
+
test_data = [
|
569
|
+
[%w[], 0],
|
570
|
+
[%w[success], 0],
|
571
|
+
[%w[failure], 1],
|
572
|
+
[%w[error], 1],
|
573
|
+
[%w[failure failure], 2],
|
574
|
+
[%w[failure error], 2],
|
575
|
+
[%w[failure success], 0],
|
576
|
+
[%w[success success], 0],
|
577
|
+
[%w[failure success failure], 1],
|
578
|
+
[%w[failure success failure success failure failure], 2]
|
579
|
+
]
|
580
|
+
c = Job::ControlConnection
|
581
|
+
test_data.each do |status_list, expected_count|
|
582
|
+
task = c::TaskInfo.new(nil,nil,nil,nil,nil,nil, status_list.map {|st| c::JobInfo.new(nil, st) })
|
583
|
+
assert_equal expected_count, task.failure_count
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
end # class TestJob
|
486
588
|
|
487
|
-
end
|
488
|
-
end
|
589
|
+
end # module StreamingLoad
|
590
|
+
end # module Bricolage
|