bricolage-streamingload 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bddfd0337158c01e5acb0fac9ff9d4b3e152029
4
- data.tar.gz: 7243c54d80c5bcd4c8e157fd8d5a14e66e629abe
3
+ metadata.gz: 4a46109dbac16f28b807d4e2b29808d254ebe209
4
+ data.tar.gz: f97bf6c58057d05f89f6fe36b1b23b18b5b1d089
5
5
  SHA512:
6
- metadata.gz: 3e85208c4fcbf1a199169e75b40490181127fbaf7398e6ebb78daaead3f36704e76b7177ce43edfe404e5509e95f5a592ed8db4d4f7fa379efcca0d382fdedd6
7
- data.tar.gz: eb8187f0b900731bdf21c4e3b3ffbb0ac24ad133bd6d7819b20e4b7eb24420e649bf690d43f0b0e3aa5caf496146e208a0d13fa33c6be72183b8469eca96bb1c
6
+ metadata.gz: 4bc1033f8c06cb6f56a782129d1927919f3689828710182494a03edccc750b9dd4ab915ba7efc7f79255e754d36f4c466ebd6189fce9de1dee6985b1b6cfecce
7
+ data.tar.gz: aef6bd65662f942641ea2701880c41e28c88caa8dcb9aab9e02c93a2d76d28fdd0aec28dc0c4d4b159a3c2098520da8cfad27f9d0cd1824ae40428c3f5b91988
@@ -18,9 +18,10 @@ module Bricolage
18
18
 
19
19
  class Job
20
20
 
21
- def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
21
+ def initialize(context:, ctl_ds:, data_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
22
22
  @context = context
23
23
  @ctl_ds = ctl_ds
24
+ @data_ds = data_ds
24
25
  @log_table = log_table
25
26
  @task_id = task_id
26
27
  @force = force
@@ -28,7 +29,6 @@ module Bricolage
28
29
 
29
30
  @task = nil
30
31
  @job_id = nil
31
- @data_ds = nil
32
32
  @manifest = nil
33
33
  end
34
34
 
@@ -57,9 +57,7 @@ module Bricolage
57
57
  rescue DataConnectionFailed => ex
58
58
  @logger.error ex.message
59
59
  wait_for_connection('data', @data_ds) unless fail_fast
60
- # FIXME: tmp: We don't know the transaction was succeeded or not in the Redshift, auto-retry is too dangerous.
61
- #return false
62
- return true
60
+ return false
63
61
  rescue JobFailure => ex
64
62
  @logger.error ex.message
65
63
  return false
@@ -87,6 +85,16 @@ module Bricolage
87
85
  raise JobDefered, "defered: task_id=#{@task_id}"
88
86
  end
89
87
 
88
+ if @task.unknown_state?
89
+ true_status = DataConnection.open(@data_ds, @logger) {|data|
90
+ data.get_job_status(@log_table, @task.last_job_id)
91
+ }
92
+ @logger.info "fixiating unknown job status: job_id=#{@task.last_job_id}, status=(unknown->#{true_status})"
93
+ @task.fix_last_job_status true_status
94
+ ctl.fix_job_status @task.last_job_id, true_status
95
+ @logger.info "job status fixed."
96
+ end
97
+
90
98
  @job_id = ctl.begin_job(@task_id, @process_id, @force)
91
99
  unless @job_id
92
100
  @logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
@@ -102,17 +110,14 @@ module Bricolage
102
110
  }
103
111
  rescue ControlConnectionFailed
104
112
  raise
105
-
106
- # FIXME: tmp: should be a failure, not an error.
107
113
  rescue DataConnectionFailed => ex
108
114
  ctl.open {
109
- ctl.abort_job job_id, 'error', ex.message.lines.first.strip
115
+ ctl.abort_job job_id, 'unknown', ex.message.lines.first.strip
110
116
  }
111
117
  raise
112
-
113
118
  rescue JobFailure => ex
114
119
  ctl.open {
115
- fail_count = ctl.fail_count(@task_id)
120
+ fail_count = @task.failure_count
116
121
  final_retry = (fail_count >= MAX_RETRY)
117
122
  retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
118
123
  ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
@@ -193,6 +198,11 @@ module Bricolage
193
198
  raise DataConnectionFailed, "data connection failed: #{ex.message}"
194
199
  end
195
200
 
201
+ def get_job_status(log_table, job_id)
202
+ count = @connection.query_value("select count(*) from #{log_table} where job_id = #{job_id}")
203
+ count.to_i > 0 ? 'success' : 'failure'
204
+ end
205
+
196
206
  def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
197
207
  @connection.transaction {|txn|
198
208
  # NOTE: This transaction ends with truncation, this DELETE does nothing
@@ -255,7 +265,32 @@ module Bricolage
255
265
  raise ControlConnectionFailed, "control connection failed: #{ex.message}"
256
266
  end
257
267
 
258
- TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
268
+ TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls, :jobs)
269
+ class TaskInfo
270
+ def unknown_state?
271
+ return false if jobs.empty?
272
+ jobs.last.status == 'unknown'
273
+ end
274
+
275
+ def last_job_id
276
+ return nil if jobs.empty?
277
+ jobs.last.job_id
278
+ end
279
+
280
+ def fix_last_job_status(st)
281
+ jobs.last.status = st unless jobs.empty?
282
+ end
283
+
284
+ def failure_count
285
+ @failure_count ||= begin
286
+ statuses = jobs.map(&:status)
287
+ statuses.delete('duplicated')
288
+ last_succ = statuses.rindex('success')
289
+ statuses[0..last_succ] = [] if last_succ
290
+ statuses.size
291
+ end
292
+ end
293
+ end
259
294
 
260
295
  def load_task(task_id)
261
296
  rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
@@ -277,10 +312,29 @@ module Bricolage
277
312
  rec['schema_name'],
278
313
  rec['table_name'],
279
314
  (rec['disabled'] != 'f'),
280
- load_object_urls(task_id)
315
+ load_object_urls(task_id),
316
+ load_jobs(task_id)
281
317
  )
282
318
  end
283
319
 
320
+ def load_jobs(task_id)
321
+ records = @connection.query_rows(<<-EndSQL)
322
+ select
323
+ job_id
324
+ , status
325
+ from
326
+ strload_jobs
327
+ where
328
+ task_id = #{task_id}
329
+ order by
330
+ start_time
331
+ ;
332
+ EndSQL
333
+ records.map {|rec| JobInfo.new(rec['job_id'].to_i, rec['status']) }
334
+ end
335
+
336
+ JobInfo = Struct.new(:job_id, :status, :start_time)
337
+
284
338
  def load_object_urls(task_id)
285
339
  urls = @connection.query_values(<<-EndSQL)
286
340
  select
@@ -296,6 +350,20 @@ module Bricolage
296
350
  urls
297
351
  end
298
352
 
353
+ def fix_job_status(job_id, status)
354
+ @connection.update(<<-EndSQL)
355
+ update
356
+ strload_jobs
357
+ set
358
+ status = #{s status}
359
+ , message = 'status fixed: ' || message
360
+ where
361
+ job_id = #{job_id}
362
+ and status = 'unknown'
363
+ ;
364
+ EndSQL
365
+ end
366
+
299
367
  def begin_job(task_id, process_id, force)
300
368
  job_id = @connection.query_value(<<-EndSQL)
301
369
  insert into strload_jobs
@@ -320,22 +388,6 @@ module Bricolage
320
388
  return job_id ? job_id.to_i : nil
321
389
  end
322
390
 
323
- def fail_count(task_id)
324
- statuses = @connection.query_values(<<-EndSQL)
325
- select
326
- j.status
327
- from
328
- strload_tasks t
329
- inner join strload_jobs j using (task_id)
330
- where
331
- t.task_id = #{task_id}
332
- order by
333
- j.job_id desc
334
- EndSQL
335
- statuses.shift if statuses.first == 'running' # current job
336
- statuses.take_while {|st| %w[failure error].include?(st) }.size
337
- end
338
-
339
391
  def commit_job(job_id, message = nil)
340
392
  @connection.transaction {|txn|
341
393
  write_job_result job_id, 'success', (message || '')
@@ -29,6 +29,7 @@ module Bricolage
29
29
  ctx = Context.for_application(opts.working_dir, environment: opts.environment, logger: logger)
30
30
 
31
31
  ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
32
+ data_ds = ctx.get_data_source('sql', config.fetch('redshift-ds', 'db_data'))
32
33
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
33
34
  log_table = config.fetch('log-table', 'strload_load_logs')
34
35
  service_logger =
@@ -41,6 +42,7 @@ module Bricolage
41
42
  task_handler = new(
42
43
  context: ctx,
43
44
  ctl_ds: ctl_ds,
45
+ data_ds: data_ds,
44
46
  log_table: log_table,
45
47
  task_queue: task_queue,
46
48
  working_dir: opts.working_dir,
@@ -92,9 +94,10 @@ module Bricolage
92
94
  # ignore
93
95
  end
94
96
 
95
- def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
97
+ def initialize(context:, ctl_ds:, data_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
96
98
  @ctx = context
97
99
  @ctl_ds = ctl_ds
100
+ @data_ds = data_ds
98
101
  @log_table = log_table
99
102
  @task_queue = task_queue
100
103
  @working_dir = working_dir
@@ -132,7 +135,7 @@ module Bricolage
132
135
  end
133
136
 
134
137
  def new_job(task_id, force)
135
- @job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
138
+ @job_class.new(context: @ctx, ctl_ds: @ctl_ds, data_ds: data_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
136
139
  end
137
140
 
138
141
  def job_class
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.9.0'
3
+ VERSION = '0.10.0'
4
4
  end
5
5
  end
@@ -16,7 +16,7 @@ module Bricolage
16
16
  class TestJob < Test::Unit::TestCase
17
17
 
18
18
  test "execute_task" do
19
- setup_context {|ctx, ctl_ds, db|
19
+ setup_context {|db|
20
20
  db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
21
21
  db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
22
22
  db.insert_into 'strload_task_objects', [1, 1], [1, 2]
@@ -24,7 +24,7 @@ module Bricolage
24
24
  [1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
25
25
  [2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
26
26
 
27
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
27
+ job = new_job(task_id: 1, force: false)
28
28
  job.execute_task
29
29
 
30
30
  assert_equal [
@@ -42,7 +42,7 @@ module Bricolage
42
42
  end
43
43
 
44
44
  test "execute_task (with work table)" do
45
- setup_context {|ctx, ctl_ds, db|
45
+ setup_context {|db|
46
46
  db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
47
47
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
48
48
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -50,7 +50,7 @@ module Bricolage
50
50
  [1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
51
51
  [1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
52
52
 
53
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
53
+ job = new_job(task_id: 11, force: false)
54
54
  job.execute_task
55
55
 
56
56
  assert_equal [
@@ -70,11 +70,11 @@ module Bricolage
70
70
  end
71
71
 
72
72
  test "execute_task (disabled)" do
73
- setup_context {|ctx, ctl_ds, db|
73
+ setup_context {|db|
74
74
  db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
75
75
  db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
76
76
 
77
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
77
+ job = new_job(task_id: 1, force: false)
78
78
  assert_raise(JobDefered) {
79
79
  job.execute_task
80
80
  }
@@ -84,7 +84,7 @@ module Bricolage
84
84
  end
85
85
 
86
86
  test "execute_task (duplicated)" do
87
- setup_context {|ctx, ctl_ds, db|
87
+ setup_context {|db|
88
88
  db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
89
89
  db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
90
90
  db.insert_into 'strload_jobs',
@@ -92,7 +92,7 @@ module Bricolage
92
92
  [2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
93
93
  [3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
94
94
 
95
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
95
+ job = new_job(task_id: 1, force: false)
96
96
  assert_raise(JobDuplicated) {
97
97
  job.execute_task
98
98
  }
@@ -100,7 +100,7 @@ module Bricolage
100
100
  end
101
101
 
102
102
  test "execute_task (duplicated but forced)" do
103
- setup_context {|ctx, ctl_ds, db|
103
+ setup_context {|db|
104
104
  db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
105
105
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
106
106
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -108,7 +108,7 @@ module Bricolage
108
108
  [1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
109
109
  [1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
110
110
 
111
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
111
+ job = new_job(task_id: 11, force: true)
112
112
  job.execute_task
113
113
 
114
114
  assert_equal [
@@ -127,7 +127,7 @@ module Bricolage
127
127
  end
128
128
 
129
129
  test "execute_task (load fails / first time)" do
130
- setup_context {|ctx, ctl_ds, db|
130
+ setup_context {|db|
131
131
  db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
132
132
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
133
133
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -135,7 +135,7 @@ module Bricolage
135
135
  [1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
136
136
  [1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
137
137
 
138
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
138
+ job = new_job(task_id: 11, force: false)
139
139
  assert_raise(JobFailure) {
140
140
  job.execute_task
141
141
  }
@@ -153,7 +153,7 @@ module Bricolage
153
153
  end
154
154
 
155
155
  test "execute_task (load fails / nth time)" do
156
- setup_context {|ctx, ctl_ds, db|
156
+ setup_context {|db|
157
157
  db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
158
158
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
159
159
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -164,7 +164,7 @@ module Bricolage
164
164
  [101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
165
165
  [102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
166
166
 
167
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
167
+ job = new_job(task_id: 11, force: false)
168
168
  assert_raise(JobFailure) {
169
169
  job.execute_task
170
170
  }
@@ -183,7 +183,7 @@ module Bricolage
183
183
  end
184
184
 
185
185
  test "execute_task (too many retry)" do
186
- setup_context {|ctx, ctl_ds, db|
186
+ setup_context {|db|
187
187
  db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
188
188
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
189
189
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -197,7 +197,7 @@ module Bricolage
197
197
  [104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
198
198
  [105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
199
199
 
200
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
200
+ job = new_job(task_id: 11, force: false)
201
201
  assert_raise(JobCancelled) {
202
202
  job.execute_task
203
203
  }
@@ -216,7 +216,7 @@ module Bricolage
216
216
  end
217
217
 
218
218
  test "execute_task (job error)" do
219
- setup_context {|ctx, ctl_ds, db|
219
+ setup_context {|db|
220
220
  db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
221
221
  db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
222
222
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
@@ -224,7 +224,7 @@ module Bricolage
224
224
  [1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
225
225
  [1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
226
226
 
227
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
227
+ job = new_job(task_id: 11, force: false)
228
228
  assert_raise(JobError) {
229
229
  job.execute_task
230
230
  }
@@ -242,15 +242,15 @@ module Bricolage
242
242
  end
243
243
 
244
244
  test "execute_task (unexpected error)" do
245
- setup_context {|ctx, ctl_ds, db|
245
+ setup_context {|db|
246
246
  db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
247
- db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, sql('current_timestamp')]
247
+ db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
248
248
  db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
249
249
  db.insert_into 'strload_objects',
250
250
  [1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
251
251
  [1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
252
252
 
253
- job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
253
+ job = new_job(task_id: 11, force: false)
254
254
  assert_raise(JobError) {
255
255
  job.execute_task
256
256
  }
@@ -267,16 +267,78 @@ module Bricolage
267
267
  }
268
268
  end
269
269
 
270
+ test "execute_task (unknown status, really=success)" do
271
+ setup_context {|db|
272
+ db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
273
+ db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
274
+ db.insert_into 'strload_jobs',
275
+ [101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
276
+ @data_ds.provide_job_status 101, true
277
+
278
+ job = new_job(task_id: 11, force: false)
279
+ assert_raise(JobDuplicated) {
280
+ job.execute_task
281
+ }
282
+
283
+ job_row = db.query_row("select * from strload_jobs where job_id = 101")
284
+ assert_equal 'success', job_row['status']
285
+ }
286
+ end
287
+
288
+ test "execute_task (unknown status, really=failure)" do
289
+ setup_context {|db|
290
+ db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
291
+ db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
292
+ db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
293
+ db.insert_into 'strload_objects',
294
+ [1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
295
+ [1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
296
+ db.insert_into 'strload_jobs',
297
+ [101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
298
+ @data_ds.provide_job_status 101, false
299
+
300
+ job = new_job(task_id: 11, force: false)
301
+ job.execute_task
302
+
303
+ assert_equal [
304
+ "begin transaction;",
305
+ "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
306
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
307
+ "commit;"
308
+ ], job.data_ds.sql_list
309
+
310
+ job_row = db.query_row("select * from strload_jobs where job_id = 101")
311
+ assert_equal 'failure', job_row['status']
312
+
313
+ job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
314
+ assert_equal 11, job_row['task_id'].to_i
315
+ assert_equal job.process_id, job_row['process_id']
316
+ assert_equal 'success', job_row['status']
317
+ }
318
+ end
319
+
270
320
  def setup_context(verbose: false)
271
- ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
272
- ctl_ds = ctx.get_data_source('sql', 'dwhctl')
273
- ctl_ds.open {|conn|
321
+ @ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
322
+ @ctl_ds = @ctx.get_data_source('sql', 'dwhctl')
323
+ @data_ds = @ctx.get_data_source('sql', 'db_data_mock')
324
+ @ctl_ds.open {|conn|
274
325
  client = SQLClient.new(conn)
275
326
  clear_all_tables(client)
276
- yield ctx, ctl_ds, client
327
+ yield client
277
328
  }
278
329
  end
279
330
 
331
+ def new_job(task_id:, force:)
332
+ Job.new(
333
+ context: @ctx,
334
+ ctl_ds: @ctl_ds,
335
+ data_ds: @data_ds,
336
+ logger: @ctx.logger,
337
+ task_id: task_id,
338
+ force: force
339
+ )
340
+ end
341
+
280
342
  # FIXME: database cleaner
281
343
  def clear_all_tables(client)
282
344
  client.truncate_tables %w[
@@ -373,6 +435,7 @@ module Bricolage
373
435
  @fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
374
436
  @error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
375
437
  @exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
438
+ @job_status = {}
376
439
  end
377
440
 
378
441
  attr_reader :sql_list
@@ -400,11 +463,30 @@ module Bricolage
400
463
  end
401
464
  end
402
465
 
466
+ def provide_job_status(job_id, succeeded)
467
+ @job_status[job_id] = succeeded
468
+ end
469
+
470
+ def job_succeeded?(job_id)
471
+ raise "job status unregistered: job_id=#{job_id}" unless @job_status.key?(job_id)
472
+ @job_status[job_id]
473
+ end
474
+
403
475
  class Connection
404
476
  def initialize(ds)
405
477
  @ds = ds
406
478
  end
407
479
 
480
+ def query_value(sql)
481
+ case sql
482
+ when /\bstrload_load_logs where job_id = (\d+)/
483
+ job_id = $1.to_i
484
+ @ds.job_succeeded?(job_id) ? 1 : 0
485
+ else
486
+ raise "unknown query: #{sql}"
487
+ end
488
+ end
489
+
408
490
  def execute(sql)
409
491
  @ds.issue_sql sql
410
492
  end
@@ -482,7 +564,27 @@ module Bricolage
482
564
  end
483
565
  end
484
566
 
485
- end
567
+ test "TaskInfo#failure_count" do
568
+ test_data = [
569
+ [%w[], 0],
570
+ [%w[success], 0],
571
+ [%w[failure], 1],
572
+ [%w[error], 1],
573
+ [%w[failure failure], 2],
574
+ [%w[failure error], 2],
575
+ [%w[failure success], 0],
576
+ [%w[success success], 0],
577
+ [%w[failure success failure], 1],
578
+ [%w[failure success failure success failure failure], 2]
579
+ ]
580
+ c = Job::ControlConnection
581
+ test_data.each do |status_list, expected_count|
582
+ task = c::TaskInfo.new(nil,nil,nil,nil,nil,nil, status_list.map {|st| c::JobInfo.new(nil, st) })
583
+ assert_equal expected_count, task.failure_count
584
+ end
585
+ end
586
+
587
+ end # class TestJob
486
588
 
487
- end
488
- end
589
+ end # module StreamingLoad
590
+ end # module Bricolage
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki