bricolage-streamingload 0.8.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bc418b99ed85932c9f2483d643b34d6a76788f0
4
- data.tar.gz: 5a1310ac8711de6c9d009ea1405ab9ba7e50918b
3
+ metadata.gz: 8bddfd0337158c01e5acb0fac9ff9d4b3e152029
4
+ data.tar.gz: 7243c54d80c5bcd4c8e157fd8d5a14e66e629abe
5
5
  SHA512:
6
- metadata.gz: c205fe5bf75dfee2fb4abe845afb0c8ac91118a461c6b54e4f8534b69262f3db37cff1ddac9d874f8370b94ad89cfc2772b96cb95539cb8e9a453e919f7def62
7
- data.tar.gz: 6d4319ce64cf4a115606075d1edb16df725b781ab7c8a6c362df608d4ed6c095e50e80682cdaaf36285781bac34a8117d99688e832862e0b688b79e47f20c0ca
6
+ metadata.gz: 3e85208c4fcbf1a199169e75b40490181127fbaf7398e6ebb78daaead3f36704e76b7177ce43edfe404e5509e95f5a592ed8db4d4f7fa379efcca0d382fdedd6
7
+ data.tar.gz: eb8187f0b900731bdf21c4e3b3ffbb0ac24ad133bd6d7819b20e4b7eb24420e649bf690d43f0b0e3aa5caf496146e208a0d13fa33c6be72183b8469eca96bb1c
@@ -18,9 +18,10 @@ module Bricolage
18
18
 
19
19
  class Job
20
20
 
21
- def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
21
+ def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
22
22
  @context = context
23
23
  @ctl_ds = ctl_ds
24
+ @log_table = log_table
24
25
  @task_id = task_id
25
26
  @force = force
26
27
  @logger = logger
@@ -53,14 +54,17 @@ module Bricolage
53
54
  @logger.error ex.message
54
55
  wait_for_connection('ctl', @ctl_ds) unless fail_fast
55
56
  return false
56
- rescue DataConnectionFailed
57
+ rescue DataConnectionFailed => ex
58
+ @logger.error ex.message
57
59
  wait_for_connection('data', @data_ds) unless fail_fast
58
60
  # FIXME: tmp: We don't know the transaction was succeeded or not in the Redshift, auto-retry is too dangerous.
59
61
  #return false
60
62
  return true
61
- rescue JobFailure
63
+ rescue JobFailure => ex
64
+ @logger.error ex.message
62
65
  return false
63
- rescue JobError
66
+ rescue JobError => ex
67
+ @logger.error ex.message
64
68
  return true
65
69
  rescue Exception => ex
66
70
  @logger.exception ex
@@ -101,14 +105,12 @@ module Bricolage
101
105
 
102
106
  # FIXME: tmp: should be a failure, not an error.
103
107
  rescue DataConnectionFailed => ex
104
- @logger.error ex.message
105
108
  ctl.open {
106
109
  ctl.abort_job job_id, 'error', ex.message.lines.first.strip
107
110
  }
108
111
  raise
109
112
 
110
113
  rescue JobFailure => ex
111
- @logger.error ex.message
112
114
  ctl.open {
113
115
  fail_count = ctl.fail_count(@task_id)
114
116
  final_retry = (fail_count >= MAX_RETRY)
@@ -118,7 +120,6 @@ module Bricolage
118
120
  }
119
121
  raise
120
122
  rescue JobError => ex
121
- @logger.error ex.message
122
123
  ctl.open {
123
124
  ctl.abort_job job_id, 'error', ex.message.lines.first.strip
124
125
  }
@@ -138,9 +139,9 @@ module Bricolage
138
139
  @manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
139
140
  DataConnection.open(params.ds, @logger) {|data|
140
141
  if params.enable_work_table?
141
- data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
142
+ data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source, @log_table, job_id
142
143
  else
143
- data.load_objects params.dest_table, @manifest, params.load_options_string
144
+ data.load_objects params.dest_table, @manifest, params.load_options_string, @log_table, job_id
144
145
  end
145
146
  }
146
147
  end
@@ -192,18 +193,26 @@ module Bricolage
192
193
  raise DataConnectionFailed, "data connection failed: #{ex.message}"
193
194
  end
194
195
 
195
- def load_with_work_table(work_table, manifest, options, sql_source)
196
+ def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
196
197
  @connection.transaction {|txn|
197
198
  # NOTE: This transaction ends with truncation, this DELETE does nothing
198
199
  # from the second time. So don't worry about DELETE cost here.
199
200
  @connection.execute("delete from #{work_table}")
200
- load_objects work_table, manifest, options
201
+ execute_copy work_table, manifest, options
201
202
  @connection.execute sql_source
203
+ write_load_log log_table, job_id
202
204
  txn.truncate_and_commit work_table
203
205
  }
204
206
  end
205
207
 
206
- def load_objects(dest_table, manifest, options)
208
+ def load_objects(dest_table, manifest, options, log_table, job_id)
209
+ @connection.transaction {|txn|
210
+ execute_copy dest_table, manifest, options
211
+ write_load_log log_table, job_id
212
+ }
213
+ end
214
+
215
+ def execute_copy(dest_table, manifest, options)
207
216
  @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
208
217
  copy #{dest_table}
209
218
  from #{s manifest.url}
@@ -217,6 +226,10 @@ module Bricolage
217
226
  @logger.info "load succeeded: #{manifest.url}"
218
227
  end
219
228
 
229
+ def write_load_log(log_table, job_id)
230
+ @connection.execute("insert into #{log_table} (job_id, finish_time) values (#{job_id}, current_timestamp)")
231
+ end
232
+
220
233
  end # class DataConnection
221
234
 
222
235
 
@@ -30,6 +30,7 @@ module Bricolage
30
30
 
31
31
  ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
32
32
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
33
+ log_table = config.fetch('log-table', 'strload_load_logs')
33
34
  service_logger =
34
35
  if config.key?('alert-level')
35
36
  new_alerting_logger(ctx, config)
@@ -40,6 +41,7 @@ module Bricolage
40
41
  task_handler = new(
41
42
  context: ctx,
42
43
  ctl_ds: ctl_ds,
44
+ log_table: log_table,
43
45
  task_queue: task_queue,
44
46
  working_dir: opts.working_dir,
45
47
  logger: service_logger,
@@ -90,9 +92,10 @@ module Bricolage
90
92
  # ignore
91
93
  end
92
94
 
93
- def initialize(context:, ctl_ds:, task_queue:, working_dir:, logger:, job_class: Job)
95
+ def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
94
96
  @ctx = context
95
97
  @ctl_ds = ctl_ds
98
+ @log_table = log_table
96
99
  @task_queue = task_queue
97
100
  @working_dir = working_dir
98
101
  @logger = logger
@@ -102,7 +105,7 @@ module Bricolage
102
105
  attr_reader :logger
103
106
 
104
107
  def execute_task_by_id(task_id, force: false)
105
- job = @job_class.new(context: @ctx, ctl_ds: @ctl_ds, task_id: task_id, force: force, logger: @logger)
108
+ job = new_job(task_id, force)
106
109
  job.execute(fail_fast: true)
107
110
  end
108
111
 
@@ -119,7 +122,7 @@ module Bricolage
119
122
  # message handler
120
123
  def handle_streaming_load_v3(t)
121
124
  Dir.chdir(@working_dir) {
122
- job = @job_class.new(context: @ctx, ctl_ds: @ctl_ds, task_id: t.task_id, force: t.force?, logger: @logger)
125
+ job = new_job(t.task_id, t.force?)
123
126
  if job.execute
124
127
  @task_queue.delete_message(t)
125
128
  end
@@ -128,6 +131,10 @@ module Bricolage
128
131
  @logger.exception ex
129
132
  end
130
133
 
134
+ def new_job(task_id, force)
135
+ @job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
136
+ end
137
+
131
138
  def job_class
132
139
  @job_class ||= Job
133
140
  end
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.8.1'
3
+ VERSION = '0.9.0'
4
4
  end
5
5
  end
@@ -27,8 +27,13 @@ module Bricolage
27
27
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
28
28
  job.execute_task
29
29
 
30
- copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
31
- assert_equal [copy_stmt], job.data_ds.sql_list
30
+ assert_equal [
31
+ "begin transaction;",
32
+ "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
33
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
34
+ "commit;"
35
+ ], job.data_ds.sql_list
36
+
32
37
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
33
38
  assert_equal 1, job_row['task_id'].to_i
34
39
  assert_equal job.process_id, job_row['process_id']
@@ -48,11 +53,14 @@ module Bricolage
48
53
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
49
54
  job.execute_task
50
55
 
51
- assert_equal 'begin transaction;', job.data_ds.sql_list[0]
52
- assert_equal 'delete from testschema.with_work_table_wk', job.data_ds.sql_list[1]
53
- assert_equal "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;", job.data_ds.sql_list[2]
54
- assert_equal "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n", job.data_ds.sql_list[3]
55
- assert_equal 'truncate testschema.with_work_table_wk;', job.data_ds.sql_list[4]
56
+ assert_equal [
57
+ "begin transaction;",
58
+ "delete from testschema.with_work_table_wk",
59
+ "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
60
+ "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
61
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
62
+ "truncate testschema.with_work_table_wk;"
63
+ ], job.data_ds.sql_list
56
64
 
57
65
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
58
66
  assert_equal 11, job_row['task_id'].to_i
@@ -103,8 +111,12 @@ module Bricolage
103
111
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
104
112
  job.execute_task
105
113
 
106
- copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
107
- assert_equal [copy_stmt], job.data_ds.sql_list
114
+ assert_equal [
115
+ "begin transaction;",
116
+ "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
117
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
118
+ "commit;"
119
+ ], job.data_ds.sql_list
108
120
 
109
121
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
110
122
  assert_equal 11, job_row['task_id'].to_i
@@ -127,8 +139,11 @@ module Bricolage
127
139
  assert_raise(JobFailure) {
128
140
  job.execute_task
129
141
  }
130
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
131
- assert_equal [copy_stmt], job.data_ds.sql_list
142
+ assert_equal [
143
+ "begin transaction;",
144
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
145
+ "abort;"
146
+ ], job.data_ds.sql_list
132
147
 
133
148
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
134
149
  assert_equal 11, job_row['task_id'].to_i
@@ -153,8 +168,11 @@ module Bricolage
153
168
  assert_raise(JobFailure) {
154
169
  job.execute_task
155
170
  }
156
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
157
- assert_equal [copy_stmt], job.data_ds.sql_list
171
+ assert_equal [
172
+ "begin transaction;",
173
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
174
+ "abort;"
175
+ ], job.data_ds.sql_list
158
176
 
159
177
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
160
178
  assert_equal 11, job_row['task_id'].to_i
@@ -183,8 +201,12 @@ module Bricolage
183
201
  assert_raise(JobCancelled) {
184
202
  job.execute_task
185
203
  }
186
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
187
- assert_equal [copy_stmt], job.data_ds.sql_list
204
+ assert_equal [
205
+ "begin transaction;",
206
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
207
+ "abort;"
208
+ ], job.data_ds.sql_list
209
+
188
210
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
189
211
  assert_equal 11, job_row['task_id'].to_i
190
212
  assert_equal job.process_id, job_row['process_id']
@@ -206,7 +228,12 @@ module Bricolage
206
228
  assert_raise(JobError) {
207
229
  job.execute_task
208
230
  }
209
- assert_equal 1, job.data_ds.sql_list.size
231
+ assert_equal [
232
+ "begin transaction;",
233
+ "copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
234
+ "abort;"
235
+ ], job.data_ds.sql_list
236
+
210
237
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
211
238
  assert_equal 11, job_row['task_id'].to_i
212
239
  assert_equal job.process_id, job_row['process_id']
@@ -227,7 +254,12 @@ module Bricolage
227
254
  assert_raise(JobError) {
228
255
  job.execute_task
229
256
  }
230
- assert_equal 1, job.data_ds.sql_list.size
257
+ assert_equal [
258
+ "begin transaction;",
259
+ "copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
260
+ "abort;"
261
+ ], job.data_ds.sql_list
262
+
231
263
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
232
264
  assert_equal 11, job_row['task_id'].to_i
233
265
  assert_equal job.process_id, job_row['process_id']
@@ -379,21 +411,39 @@ module Bricolage
379
411
 
380
412
  def transaction
381
413
  @ds.issue_sql "begin transaction;"
382
- yield Transaction.new(@ds)
414
+ txn = Transaction.new(@ds)
415
+ yield txn
416
+ rescue
417
+ txn.abort unless txn.committed?
418
+ raise
419
+ ensure
420
+ txn.commit unless txn.committed?
383
421
  end
384
422
  end
385
423
 
386
424
  class Transaction
387
425
  def initialize(ds)
388
426
  @ds = ds
427
+ @commit = false
428
+ end
429
+
430
+ def committed?
431
+ @commit
389
432
  end
390
433
 
391
434
  def commit
392
435
  @ds.issue_sql "commit;"
436
+ @commit = true
437
+ end
438
+
439
+ def abort
440
+ @ds.issue_sql "abort;"
441
+ @commit = true
393
442
  end
394
443
 
395
444
  def truncate_and_commit(table)
396
445
  @ds.issue_sql "truncate #{table};"
446
+ @commit = true
397
447
  end
398
448
  end
399
449
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-19 00:00:00.000000000 Z
12
+ date: 2016-10-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bricolage