bricolage-streamingload 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bc418b99ed85932c9f2483d643b34d6a76788f0
4
- data.tar.gz: 5a1310ac8711de6c9d009ea1405ab9ba7e50918b
3
+ metadata.gz: 8bddfd0337158c01e5acb0fac9ff9d4b3e152029
4
+ data.tar.gz: 7243c54d80c5bcd4c8e157fd8d5a14e66e629abe
5
5
  SHA512:
6
- metadata.gz: c205fe5bf75dfee2fb4abe845afb0c8ac91118a461c6b54e4f8534b69262f3db37cff1ddac9d874f8370b94ad89cfc2772b96cb95539cb8e9a453e919f7def62
7
- data.tar.gz: 6d4319ce64cf4a115606075d1edb16df725b781ab7c8a6c362df608d4ed6c095e50e80682cdaaf36285781bac34a8117d99688e832862e0b688b79e47f20c0ca
6
+ metadata.gz: 3e85208c4fcbf1a199169e75b40490181127fbaf7398e6ebb78daaead3f36704e76b7177ce43edfe404e5509e95f5a592ed8db4d4f7fa379efcca0d382fdedd6
7
+ data.tar.gz: eb8187f0b900731bdf21c4e3b3ffbb0ac24ad133bd6d7819b20e4b7eb24420e649bf690d43f0b0e3aa5caf496146e208a0d13fa33c6be72183b8469eca96bb1c
@@ -18,9 +18,10 @@ module Bricolage
18
18
 
19
19
  class Job
20
20
 
21
- def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
21
+ def initialize(context:, ctl_ds:, log_table: 'strload_load_logs', task_id:, force: false, logger:)
22
22
  @context = context
23
23
  @ctl_ds = ctl_ds
24
+ @log_table = log_table
24
25
  @task_id = task_id
25
26
  @force = force
26
27
  @logger = logger
@@ -53,14 +54,17 @@ module Bricolage
53
54
  @logger.error ex.message
54
55
  wait_for_connection('ctl', @ctl_ds) unless fail_fast
55
56
  return false
56
- rescue DataConnectionFailed
57
+ rescue DataConnectionFailed => ex
58
+ @logger.error ex.message
57
59
  wait_for_connection('data', @data_ds) unless fail_fast
58
60
  # FIXME: tmp: We don't know the transaction was succeeded or not in the Redshift, auto-retry is too dangerous.
59
61
  #return false
60
62
  return true
61
- rescue JobFailure
63
+ rescue JobFailure => ex
64
+ @logger.error ex.message
62
65
  return false
63
- rescue JobError
66
+ rescue JobError => ex
67
+ @logger.error ex.message
64
68
  return true
65
69
  rescue Exception => ex
66
70
  @logger.exception ex
@@ -101,14 +105,12 @@ module Bricolage
101
105
 
102
106
  # FIXME: tmp: should be a failure, not an error.
103
107
  rescue DataConnectionFailed => ex
104
- @logger.error ex.message
105
108
  ctl.open {
106
109
  ctl.abort_job job_id, 'error', ex.message.lines.first.strip
107
110
  }
108
111
  raise
109
112
 
110
113
  rescue JobFailure => ex
111
- @logger.error ex.message
112
114
  ctl.open {
113
115
  fail_count = ctl.fail_count(@task_id)
114
116
  final_retry = (fail_count >= MAX_RETRY)
@@ -118,7 +120,6 @@ module Bricolage
118
120
  }
119
121
  raise
120
122
  rescue JobError => ex
121
- @logger.error ex.message
122
123
  ctl.open {
123
124
  ctl.abort_job job_id, 'error', ex.message.lines.first.strip
124
125
  }
@@ -138,9 +139,9 @@ module Bricolage
138
139
  @manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
139
140
  DataConnection.open(params.ds, @logger) {|data|
140
141
  if params.enable_work_table?
141
- data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
142
+ data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source, @log_table, job_id
142
143
  else
143
- data.load_objects params.dest_table, @manifest, params.load_options_string
144
+ data.load_objects params.dest_table, @manifest, params.load_options_string, @log_table, job_id
144
145
  end
145
146
  }
146
147
  end
@@ -192,18 +193,26 @@ module Bricolage
192
193
  raise DataConnectionFailed, "data connection failed: #{ex.message}"
193
194
  end
194
195
 
195
- def load_with_work_table(work_table, manifest, options, sql_source)
196
+ def load_with_work_table(work_table, manifest, options, sql_source, log_table, job_id)
196
197
  @connection.transaction {|txn|
197
198
  # NOTE: This transaction ends with truncation, this DELETE does nothing
198
199
  # from the second time. So don't worry about DELETE cost here.
199
200
  @connection.execute("delete from #{work_table}")
200
- load_objects work_table, manifest, options
201
+ execute_copy work_table, manifest, options
201
202
  @connection.execute sql_source
203
+ write_load_log log_table, job_id
202
204
  txn.truncate_and_commit work_table
203
205
  }
204
206
  end
205
207
 
206
- def load_objects(dest_table, manifest, options)
208
+ def load_objects(dest_table, manifest, options, log_table, job_id)
209
+ @connection.transaction {|txn|
210
+ execute_copy dest_table, manifest, options
211
+ write_load_log log_table, job_id
212
+ }
213
+ end
214
+
215
+ def execute_copy(dest_table, manifest, options)
207
216
  @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
208
217
  copy #{dest_table}
209
218
  from #{s manifest.url}
@@ -217,6 +226,10 @@ module Bricolage
217
226
  @logger.info "load succeeded: #{manifest.url}"
218
227
  end
219
228
 
229
+ def write_load_log(log_table, job_id)
230
+ @connection.execute("insert into #{log_table} (job_id, finish_time) values (#{job_id}, current_timestamp)")
231
+ end
232
+
220
233
  end # class DataConnection
221
234
 
222
235
 
@@ -30,6 +30,7 @@ module Bricolage
30
30
 
31
31
  ctl_ds = ctx.get_data_source('sql', config.fetch('ctl-postgres-ds', 'db_ctl'))
32
32
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds', 'sqs_task'))
33
+ log_table = config.fetch('log-table', 'strload_load_logs')
33
34
  service_logger =
34
35
  if config.key?('alert-level')
35
36
  new_alerting_logger(ctx, config)
@@ -40,6 +41,7 @@ module Bricolage
40
41
  task_handler = new(
41
42
  context: ctx,
42
43
  ctl_ds: ctl_ds,
44
+ log_table: log_table,
43
45
  task_queue: task_queue,
44
46
  working_dir: opts.working_dir,
45
47
  logger: service_logger,
@@ -90,9 +92,10 @@ module Bricolage
90
92
  # ignore
91
93
  end
92
94
 
93
- def initialize(context:, ctl_ds:, task_queue:, working_dir:, logger:, job_class: Job)
95
+ def initialize(context:, ctl_ds:, log_table:, task_queue:, working_dir:, logger:, job_class: Job)
94
96
  @ctx = context
95
97
  @ctl_ds = ctl_ds
98
+ @log_table = log_table
96
99
  @task_queue = task_queue
97
100
  @working_dir = working_dir
98
101
  @logger = logger
@@ -102,7 +105,7 @@ module Bricolage
102
105
  attr_reader :logger
103
106
 
104
107
  def execute_task_by_id(task_id, force: false)
105
- job = @job_class.new(context: @ctx, ctl_ds: @ctl_ds, task_id: task_id, force: force, logger: @logger)
108
+ job = new_job(task_id, force)
106
109
  job.execute(fail_fast: true)
107
110
  end
108
111
 
@@ -119,7 +122,7 @@ module Bricolage
119
122
  # message handler
120
123
  def handle_streaming_load_v3(t)
121
124
  Dir.chdir(@working_dir) {
122
- job = @job_class.new(context: @ctx, ctl_ds: @ctl_ds, task_id: t.task_id, force: t.force?, logger: @logger)
125
+ job = new_job(t.task_id, t.force?)
123
126
  if job.execute
124
127
  @task_queue.delete_message(t)
125
128
  end
@@ -128,6 +131,10 @@ module Bricolage
128
131
  @logger.exception ex
129
132
  end
130
133
 
134
+ def new_job(task_id, force)
135
+ @job_class.new(context: @ctx, ctl_ds: @ctl_ds, log_table: @log_table, task_id: task_id, force: force, logger: @logger)
136
+ end
137
+
131
138
  def job_class
132
139
  @job_class ||= Job
133
140
  end
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.8.1'
3
+ VERSION = '0.9.0'
4
4
  end
5
5
  end
@@ -27,8 +27,13 @@ module Bricolage
27
27
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
28
28
  job.execute_task
29
29
 
30
- copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
31
- assert_equal [copy_stmt], job.data_ds.sql_list
30
+ assert_equal [
31
+ "begin transaction;",
32
+ "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
33
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
34
+ "commit;"
35
+ ], job.data_ds.sql_list
36
+
32
37
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
33
38
  assert_equal 1, job_row['task_id'].to_i
34
39
  assert_equal job.process_id, job_row['process_id']
@@ -48,11 +53,14 @@ module Bricolage
48
53
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
49
54
  job.execute_task
50
55
 
51
- assert_equal 'begin transaction;', job.data_ds.sql_list[0]
52
- assert_equal 'delete from testschema.with_work_table_wk', job.data_ds.sql_list[1]
53
- assert_equal "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;", job.data_ds.sql_list[2]
54
- assert_equal "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n", job.data_ds.sql_list[3]
55
- assert_equal 'truncate testschema.with_work_table_wk;', job.data_ds.sql_list[4]
56
+ assert_equal [
57
+ "begin transaction;",
58
+ "delete from testschema.with_work_table_wk",
59
+ "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
60
+ "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
61
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
62
+ "truncate testschema.with_work_table_wk;"
63
+ ], job.data_ds.sql_list
56
64
 
57
65
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
58
66
  assert_equal 11, job_row['task_id'].to_i
@@ -103,8 +111,12 @@ module Bricolage
103
111
  job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
104
112
  job.execute_task
105
113
 
106
- copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
107
- assert_equal [copy_stmt], job.data_ds.sql_list
114
+ assert_equal [
115
+ "begin transaction;",
116
+ "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
117
+ "insert into strload_load_logs (job_id, finish_time) values (#{job.job_id}, current_timestamp)",
118
+ "commit;"
119
+ ], job.data_ds.sql_list
108
120
 
109
121
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
110
122
  assert_equal 11, job_row['task_id'].to_i
@@ -127,8 +139,11 @@ module Bricolage
127
139
  assert_raise(JobFailure) {
128
140
  job.execute_task
129
141
  }
130
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
131
- assert_equal [copy_stmt], job.data_ds.sql_list
142
+ assert_equal [
143
+ "begin transaction;",
144
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
145
+ "abort;"
146
+ ], job.data_ds.sql_list
132
147
 
133
148
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
134
149
  assert_equal 11, job_row['task_id'].to_i
@@ -153,8 +168,11 @@ module Bricolage
153
168
  assert_raise(JobFailure) {
154
169
  job.execute_task
155
170
  }
156
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
157
- assert_equal [copy_stmt], job.data_ds.sql_list
171
+ assert_equal [
172
+ "begin transaction;",
173
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
174
+ "abort;"
175
+ ], job.data_ds.sql_list
158
176
 
159
177
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
160
178
  assert_equal 11, job_row['task_id'].to_i
@@ -183,8 +201,12 @@ module Bricolage
183
201
  assert_raise(JobCancelled) {
184
202
  job.execute_task
185
203
  }
186
- copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
187
- assert_equal [copy_stmt], job.data_ds.sql_list
204
+ assert_equal [
205
+ "begin transaction;",
206
+ "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
207
+ "abort;"
208
+ ], job.data_ds.sql_list
209
+
188
210
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
189
211
  assert_equal 11, job_row['task_id'].to_i
190
212
  assert_equal job.process_id, job_row['process_id']
@@ -206,7 +228,12 @@ module Bricolage
206
228
  assert_raise(JobError) {
207
229
  job.execute_task
208
230
  }
209
- assert_equal 1, job.data_ds.sql_list.size
231
+ assert_equal [
232
+ "begin transaction;",
233
+ "copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
234
+ "abort;"
235
+ ], job.data_ds.sql_list
236
+
210
237
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
211
238
  assert_equal 11, job_row['task_id'].to_i
212
239
  assert_equal job.process_id, job_row['process_id']
@@ -227,7 +254,12 @@ module Bricolage
227
254
  assert_raise(JobError) {
228
255
  job.execute_task
229
256
  }
230
- assert_equal 1, job.data_ds.sql_list.size
257
+ assert_equal [
258
+ "begin transaction;",
259
+ "copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
260
+ "abort;"
261
+ ], job.data_ds.sql_list
262
+
231
263
  job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
232
264
  assert_equal 11, job_row['task_id'].to_i
233
265
  assert_equal job.process_id, job_row['process_id']
@@ -379,21 +411,39 @@ module Bricolage
379
411
 
380
412
  def transaction
381
413
  @ds.issue_sql "begin transaction;"
382
- yield Transaction.new(@ds)
414
+ txn = Transaction.new(@ds)
415
+ yield txn
416
+ rescue
417
+ txn.abort unless txn.committed?
418
+ raise
419
+ ensure
420
+ txn.commit unless txn.committed?
383
421
  end
384
422
  end
385
423
 
386
424
  class Transaction
387
425
  def initialize(ds)
388
426
  @ds = ds
427
+ @commit = false
428
+ end
429
+
430
+ def committed?
431
+ @commit
389
432
  end
390
433
 
391
434
  def commit
392
435
  @ds.issue_sql "commit;"
436
+ @commit = true
437
+ end
438
+
439
+ def abort
440
+ @ds.issue_sql "abort;"
441
+ @commit = true
393
442
  end
394
443
 
395
444
  def truncate_and_commit(table)
396
445
  @ds.issue_sql "truncate #{table};"
446
+ @commit = true
397
447
  end
398
448
  end
399
449
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-19 00:00:00.000000000 Z
12
+ date: 2016-10-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bricolage