bricolage-streamingload 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14a3e79cc5b39bd90b848b1d3b030bf244ddee15
4
- data.tar.gz: b82cce95ef1573cb2dd0fd74214777afd3f972ae
3
+ metadata.gz: 8a33b6a5561c4bf69b725a96bca4abf75f06fe8a
4
+ data.tar.gz: 9c6357983ba1fea216fd3e0931d08f549f610320
5
5
  SHA512:
6
- metadata.gz: 6daab7f6d72fd79261d047ac8f5a0ab0f8dcdd0fb392b1ee013dd6c0616fe90b83ab3637274dc4e7fdab23c81b3a6d3888ce614083ff1ca07b9778017aab6e76
7
- data.tar.gz: 1c0041319e7091f869f51c182dad162e81c37f4683d990b5cd224ff5ec93ec004936cc22d44eb961e9628e8962eaaf0c253c30a8036f44331ad5970bae5a3fc6
6
+ metadata.gz: e701e7aa67d9a9b6503b436342a5d2ef3b1edb49513fa6881bf4c9b758e5d44f50a7b444b950527518187f3c8d8da896ee7fc597d9d0878611c8c34c2dbb3a19
7
+ data.tar.gz: 77b535d15e8e0724c054655e8f351b7620454f08e67a86a5c61f01b8bb623361b22344b93d590608538ee59e9809886e64ae48ec3b80f8cf61570897e3814e41
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  Bundler.require(:default) if defined?(Bundler)
4
- require 'bricolage/streamingload/loaderservice'
4
+ require 'bricolage/streamingload/taskhandler'
5
5
 
6
- Bricolage::StreamingLoad::LoaderService.main
6
+ Bricolage::StreamingLoad::TaskHandler.main
@@ -1,5 +1,4 @@
1
1
  require 'bricolage/sqsdatasource'
2
- require 'bricolage/nulllogger'
3
2
  require 'json'
4
3
 
5
4
  module Bricolage
@@ -27,7 +27,7 @@ module Bricolage
27
27
  end
28
28
  config_path, * = opts.rest_arguments
29
29
  config = YAML.load(File.read(config_path))
30
- log = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
30
+ log = opts.log_file_path ? new_logger(File.expand_path(opts.log_file_path), config) : nil
31
31
  ctx = Context.for_application('.', environment: opts.environment, logger: log)
32
32
  logger = raw_logger = ctx.logger
33
33
  event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds', 'sqs_event'))
@@ -58,6 +58,7 @@ module Bricolage
58
58
 
59
59
  Process.daemon(true) if opts.daemon?
60
60
  create_pid_file opts.pid_file_path if opts.pid_file_path
61
+ Dir.chdir '/'
61
62
  dispatcher.event_loop
62
63
  rescue Exception => e
63
64
  logger.exception e
@@ -0,0 +1,387 @@
1
+ require 'bricolage/streamingload/jobparams'
2
+ require 'bricolage/streamingload/manifest'
3
+ require 'bricolage/sqlutils'
4
+ require 'socket'
5
+ require 'json'
6
+
7
+ module Bricolage
8
+
9
+ module StreamingLoad
10
+
11
+ class JobCancelled < ApplicationError; end
12
+ class JobDefered < ApplicationError; end
13
+ class JobDuplicated < ApplicationError; end
14
+
15
+ class ControlConnectionFailed < JobFailure; end
16
+ class DataConnectionFailed < JobFailure; end
17
+
18
+
19
+ class Job
20
+
21
+ def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
22
+ @context = context
23
+ @ctl_ds = ctl_ds
24
+ @task_id = task_id
25
+ @force = force
26
+ @logger = logger
27
+
28
+ @task = nil
29
+ @job_id = nil
30
+ @data_ds = nil
31
+ @manifest = nil
32
+ end
33
+
34
+ # For tests
35
+ attr_reader :job_id
36
+ attr_reader :process_id
37
+ attr_reader :task
38
+ attr_reader :data_ds
39
+ attr_reader :manifest
40
+
41
+ # Returns true -> Deletes a SQS message
42
+ # Returns false -> Keeps a SQS message
43
+ def execute(fail_fast: false)
44
+ execute_task
45
+ return true
46
+ rescue JobCancelled
47
+ return true
48
+ rescue JobDuplicated
49
+ return true
50
+ rescue JobDefered
51
+ return false
52
+ rescue ControlConnectionFailed => ex
53
+ @logger.error ex.message
54
+ wait_for_connection('ctl', @ctl_ds) unless fail_fast
55
+ return false
56
+ rescue DataConnectionFailed
57
+ wait_for_connection('data', @data_ds) unless fail_fast
58
+ return false
59
+ rescue JobFailure
60
+ return false
61
+ rescue JobError
62
+ return true
63
+ rescue Exception => ex
64
+ @logger.exception ex
65
+ return true
66
+ end
67
+
68
+ MAX_RETRY = 5
69
+
70
+ def execute_task
71
+ @process_id = "#{Socket.gethostname}-#{$$}"
72
+ @logger.info "execute task: task_id=#{@task_id} force=#{@force} process_id=#{@process_id}"
73
+ ctl = ControlConnection.new(@ctl_ds, @logger)
74
+
75
+ ctl.open {
76
+ @task = ctl.load_task(@task_id)
77
+ @logger.info "task details: task_id=#{@task_id} table=#{@task.schema_name}.#{@task.table_name}"
78
+ if @task.disabled
79
+ # We do not record disabled job in the DB.
80
+ @logger.info "task is disabled; defer task: task_id=#{@task_id}"
81
+ raise JobDefered, "defered: task_id=#{@task_id}"
82
+ end
83
+
84
+ @job_id = ctl.begin_job(@task_id, @process_id, @force)
85
+ unless @job_id
86
+ @logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
87
+ ctl.commit_duplicated_job @task_id, @process_id
88
+ raise JobDuplicated, "duplicated: task_id=#{@task_id}"
89
+ end
90
+ }
91
+
92
+ begin
93
+ do_load @task, @job_id
94
+ ctl.open {
95
+ ctl.commit_job @job_id, (@force ? 'forced' : nil)
96
+ }
97
+ rescue ControlConnectionFailed
98
+ raise
99
+ rescue JobFailure => ex
100
+ @logger.error ex.message
101
+ ctl.open {
102
+ fail_count = ctl.fail_count(@task_id)
103
+ final_retry = (fail_count >= MAX_RETRY)
104
+ retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
105
+ ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
106
+ raise JobCancelled, "retry count exceeds limit: task_id=#{@task_id}" if final_retry
107
+ }
108
+ raise
109
+ rescue JobError => ex
110
+ @logger.error ex.message
111
+ ctl.open {
112
+ ctl.abort_job job_id, 'error', ex.message.lines.first.strip
113
+ }
114
+ raise
115
+ rescue Exception => ex
116
+ @logger.exception ex
117
+ ctl.open {
118
+ ctl.abort_job job_id, 'error', ex.message.lines.first.strip
119
+ }
120
+ raise JobError, "#{ex.class}: #{ex.message}"
121
+ end
122
+ end
123
+
124
+ def do_load(task, job_id)
125
+ params = JobParams.load(@context, task.task_class, task.schema_name, task.table_name)
126
+ @data_ds = params.ds
127
+ @manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
128
+ DataConnection.open(params.ds, @logger) {|data|
129
+ if params.enable_work_table?
130
+ data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
131
+ else
132
+ data.load_objects params.dest_table, @manifest, params.load_options_string
133
+ end
134
+ }
135
+ end
136
+
137
+ def wait_for_connection(type, ds)
138
+ @logger.warn "loader: #{type} DB connection lost; polling..."
139
+ start_time = Time.now
140
+ n = 0
141
+ while true
142
+ begin
143
+ ds.open {}
144
+ @logger.warn "loader: #{type} DB connection recovered; return to normal state"
145
+ return true
146
+ rescue ConnectionError
147
+ end
148
+ sleep 15
149
+ n += 1
150
+ if n == 120 # 30 min
151
+ # Could not get a connection in 30 minutes, now we return to the queue loop.
152
+ # Next job may fail too, but we should not stop to receive the task queue too long,
153
+ # because it contains control tasks.
154
+ @logger.warn "loader: #{type} DB connection still failing (since #{start_time}); give up."
155
+ return false
156
+ end
157
+ end
158
+ end
159
+
160
+
161
+ class DataConnection
162
+
163
+ include SQLUtils
164
+
165
+ def DataConnection.open(ds, logger = ds.logger, &block)
166
+ new(ds, logger).open(&block)
167
+ end
168
+
169
+ def initialize(ds, logger = ds.logger)
170
+ @ds = ds
171
+ @connection = nil
172
+ @logger = logger
173
+ end
174
+
175
+ def open(&block)
176
+ @ds.open {|conn|
177
+ @connection = conn
178
+ yield self
179
+ }
180
+ rescue ConnectionError => ex
181
+ raise DataConnectionFailed, "data connection failed: #{ex.message}"
182
+ end
183
+
184
+ def load_with_work_table(work_table, manifest, options, sql_source)
185
+ @connection.transaction {|txn|
186
+ # NOTE: This transaction ends with truncation, this DELETE does nothing
187
+ # from the second time. So don't worry about DELETE cost here.
188
+ @connection.execute("delete from #{work_table}")
189
+ load_objects work_table, manifest, options
190
+ @connection.execute sql_source
191
+ txn.truncate_and_commit work_table
192
+ }
193
+ end
194
+
195
+ def load_objects(dest_table, manifest, options)
196
+ @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
197
+ copy #{dest_table}
198
+ from #{s manifest.url}
199
+ credentials #{s manifest.credential_string}
200
+ manifest
201
+ statupdate false
202
+ compupdate false
203
+ #{options}
204
+ ;
205
+ EndSQL
206
+ @logger.info "load succeeded: #{manifest.url}"
207
+ end
208
+
209
+ end # class DataConnection
210
+
211
+
212
+ class ControlConnection
213
+
214
+ include SQLUtils
215
+
216
+ def ControlConnection.open(ds, logger = ds.logger, &block)
217
+ new(ds, logger).open(&block)
218
+ end
219
+
220
+ def initialize(ds, logger = ds.logger)
221
+ @ds = ds
222
+ @connection = nil
223
+ end
224
+
225
+ def open(&block)
226
+ @ds.open {|conn|
227
+ @connection = conn
228
+ yield self
229
+ }
230
+ rescue ConnectionError => ex
231
+ raise ControlConnectionFailed, "control connection failed: #{ex.message}"
232
+ end
233
+
234
+ TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
235
+
236
+ def load_task(task_id)
237
+ rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
238
+ select
239
+ tsk.task_class
240
+ , tbl.schema_name
241
+ , tbl.table_name
242
+ , tbl.disabled
243
+ from
244
+ strload_tasks tsk
245
+ inner join strload_tables tbl using (table_id)
246
+ where
247
+ tsk.task_id = #{task_id}
248
+ ;
249
+ EndSQL
250
+ TaskInfo.new(
251
+ task_id,
252
+ rec['task_class'],
253
+ rec['schema_name'],
254
+ rec['table_name'],
255
+ (rec['disabled'] != 'f'),
256
+ load_object_urls(task_id)
257
+ )
258
+ end
259
+
260
+ def load_object_urls(task_id)
261
+ urls = @connection.query_values(<<-EndSQL)
262
+ select
263
+ o.object_url
264
+ from
265
+ strload_tasks t
266
+ inner join strload_task_objects tob using (task_id)
267
+ inner join strload_objects o using (object_id)
268
+ where
269
+ t.task_id = #{task_id}
270
+ ;
271
+ EndSQL
272
+ urls
273
+ end
274
+
275
+ def begin_job(task_id, process_id, force)
276
+ job_id = @connection.query_value(<<-EndSQL)
277
+ insert into strload_jobs
278
+ ( task_id
279
+ , process_id
280
+ , status
281
+ , start_time
282
+ )
283
+ select
284
+ task_id
285
+ , #{s process_id}
286
+ , 'running'
287
+ , current_timestamp
288
+ from
289
+ strload_tasks
290
+ where
291
+ task_id = #{task_id}
292
+ and (#{force ? 'true' : 'false'} or task_id not in (select task_id from strload_jobs where status = 'success'))
293
+ returning job_id
294
+ ;
295
+ EndSQL
296
+ return job_id ? job_id.to_i : nil
297
+ end
298
+
299
+ def fail_count(task_id)
300
+ statuses = @connection.query_values(<<-EndSQL)
301
+ select
302
+ j.status
303
+ from
304
+ strload_tasks t
305
+ inner join strload_jobs j using (task_id)
306
+ where
307
+ t.task_id = #{task_id}
308
+ order by
309
+ j.job_id desc
310
+ EndSQL
311
+ statuses.shift if statuses.first == 'running' # current job
312
+ statuses.take_while {|st| %w[failure error].include?(st) }.size
313
+ end
314
+
315
+ def commit_job(job_id, message = nil)
316
+ @connection.transaction {|txn|
317
+ write_job_result job_id, 'success', (message || '')
318
+ update_loaded_flag job_id
319
+ }
320
+ end
321
+
322
+ def abort_job(job_id, status, message)
323
+ write_job_result(job_id, status, message)
324
+ end
325
+
326
+ MAX_MESSAGE_LENGTH = 1000
327
+
328
+ def write_job_result(job_id, status, message)
329
+ @connection.execute(<<-EndSQL)
330
+ update
331
+ strload_jobs
332
+ set
333
+ (status, finish_time, message) = (#{s status}, current_timestamp, #{s message[0, MAX_MESSAGE_LENGTH]})
334
+ where
335
+ job_id = #{job_id}
336
+ ;
337
+ EndSQL
338
+ end
339
+
340
+ def update_loaded_flag(job_id)
341
+ @connection.execute(<<-EndSQL)
342
+ update
343
+ strload_objects
344
+ set
345
+ loaded = true
346
+ where
347
+ object_id in (
348
+ select
349
+ object_id
350
+ from
351
+ strload_task_objects
352
+ where task_id = (select task_id from strload_jobs where job_id = #{job_id})
353
+ )
354
+ ;
355
+ EndSQL
356
+ end
357
+
358
+ def commit_duplicated_job(task_id, process_id)
359
+ job_id = @connection.query_value(<<-EndSQL)
360
+ insert into strload_jobs
361
+ ( task_id
362
+ , process_id
363
+ , status
364
+ , start_time
365
+ , finish_time
366
+ , message
367
+ )
368
+ select
369
+ #{task_id}
370
+ , #{s process_id}
371
+ , 'duplicated'
372
+ , current_timestamp
373
+ , current_timestamp
374
+ , ''
375
+ returning job_id
376
+ ;
377
+ EndSQL
378
+ return job_id
379
+ end
380
+
381
+ end # class ControlConnection
382
+
383
+ end # class Job
384
+
385
+ end # module StreamingLoad
386
+
387
+ end # module Bricolage
@@ -6,69 +6,44 @@ module Bricolage
6
6
 
7
7
  module StreamingLoad
8
8
 
9
- class LoaderParams
9
+ class JobParams
10
10
 
11
- def LoaderParams.load(ctx, task)
12
- job = load_job(ctx, task)
13
- schema = resolve_schema(ctx, task.schema)
14
- job.provide_default 'dest-table', "#{schema}.#{task.table}"
11
+ def JobParams.load(ctx, job_class, schema, table)
12
+ job = load_bricolage_job(ctx, job_class, schema, table)
13
+ schema = resolve_schema(ctx, schema)
14
+ job.provide_default 'dest-table', "#{schema}.#{table}"
15
15
  #job.provide_sql_file_by_job_id # FIXME: provide only when exist
16
16
  job.compile
17
- new(task, job)
17
+ new(job)
18
18
  end
19
19
 
20
- def LoaderParams.load_job(ctx, task)
21
- if job_file = find_job_file(ctx, task.schema, task.table)
20
+ def JobParams.load_bricolage_job(ctx, job_class, schema, table)
21
+ if job_file = find_job_file(ctx, schema, table)
22
22
  ctx.logger.debug "using .job file: #{job_file}"
23
- Job.load_file(job_file, ctx.subsystem(task.schema))
23
+ Bricolage::Job.load_file(job_file, ctx.subsystem(schema))
24
24
  else
25
25
  ctx.logger.debug "using default job parameters (no .job file)"
26
- Job.instantiate(task.table, 'streaming_load_v3', ctx).tap {|job|
26
+ Bricolage::Job.instantiate(table, job_class, ctx).tap {|job|
27
27
  job.bind_parameters({})
28
28
  }
29
29
  end
30
30
  end
31
31
 
32
- def LoaderParams.find_job_file(ctx, schema, table)
32
+ def JobParams.find_job_file(ctx, schema, table)
33
33
  paths = Dir.glob("#{ctx.home_path}/#{schema}/#{table}.*")
34
34
  paths.select {|path| File.extname(path) == '.job' }.sort.first
35
35
  end
36
36
 
37
- def LoaderParams.resolve_schema(ctx, schema)
37
+ def JobParams.resolve_schema(ctx, schema)
38
38
  ctx.global_variables["#{schema}_schema"] || schema
39
39
  end
40
40
  private_class_method :resolve_schema
41
41
 
42
- def initialize(task, job)
43
- @task = task
42
+ def initialize(job)
44
43
  @job = job
45
44
  @params = job.params
46
45
  end
47
46
 
48
- def task_id
49
- @task.id
50
- end
51
-
52
- def task_id
53
- @task.id
54
- end
55
-
56
- def schema
57
- @task.schema
58
- end
59
-
60
- def table
61
- @task.table
62
- end
63
-
64
- def force?
65
- @task.force?
66
- end
67
-
68
- def object_urls
69
- @task.object_urls
70
- end
71
-
72
47
  def ds
73
48
  @params['redshift-ds']
74
49
  end
@@ -101,7 +76,7 @@ module Bricolage
101
76
  end
102
77
 
103
78
 
104
- class LoaderJob < RubyJobClass
79
+ class StreamingLoadV3Job < RubyJobClass
105
80
 
106
81
  job_class_id 'streaming_load_v3'
107
82
 
@@ -4,7 +4,7 @@ module Bricolage
4
4
 
5
5
  class ManifestFile
6
6
 
7
- def ManifestFile.create(ds, job_id:, object_urls:, logger:, noop: false, &block)
7
+ def ManifestFile.create(ds:, job_id:, object_urls:, logger:, noop: false, &block)
8
8
  manifest = new(ds, job_id, object_urls, logger: logger, noop: noop)
9
9
  if block
10
10
  manifest.create_temporary(&block)
@@ -49,11 +49,17 @@ module Bricolage
49
49
  def put
50
50
  @logger.info "s3: put: #{url}"
51
51
  @ds.object(name).put(body: content) unless @noop
52
+ rescue Aws::S3::Errors::ServiceError => ex
53
+ @logger.exception ex
54
+ raise S3Exception.wrap(ex)
52
55
  end
53
56
 
54
57
  def delete
55
58
  @logger.info "s3: delete: #{url}"
56
59
  @ds.object(name).delete unless @noop
60
+ rescue Aws::S3::Errors::ServiceError => ex
61
+ @logger.exception ex
62
+ raise S3Exception.wrap(ex)
57
63
  end
58
64
 
59
65
  def create_temporary
@@ -1,8 +1,5 @@
1
1
  require 'bricolage/streamingload/task'
2
- require 'bricolage/streamingload/loaderparams'
3
2
  require 'bricolage/sqlutils'
4
- require 'json'
5
- require 'securerandom'
6
3
  require 'forwardable'
7
4
 
8
5
  module Bricolage
@@ -1,5 +1,4 @@
1
1
  require 'bricolage/sqsdatasource'
2
- require 'json'
3
2
 
4
3
  module Bricolage
5
4
 
@@ -38,85 +37,23 @@ module Bricolage
38
37
  }
39
38
  end
40
39
 
41
- def LoadTask.load(conn, task_id, force: false)
42
- rec = conn.query_row(<<-EndSQL)
43
- select
44
- task_class
45
- , tbl.schema_name
46
- , tbl.table_name
47
- , disabled
48
- from
49
- strload_tasks tsk
50
- inner join strload_tables tbl
51
- using(table_id)
52
- where
53
- task_id = #{task_id}
54
- ;
55
- EndSQL
56
- object_urls = conn.query_values(<<-EndSQL)
57
- select
58
- object_url
59
- from
60
- strload_task_objects
61
- inner join strload_objects
62
- using (object_id)
63
- inner join strload_tasks
64
- using (task_id)
65
- where
66
- task_id = #{task_id}
67
- ;
68
- EndSQL
69
- return nil unless rec
70
- new(
71
- name: rec['task_class'],
72
- time: nil,
73
- source: nil,
74
- task_id: task_id,
75
- schema: rec['schema_name'],
76
- table: rec['table_name'],
77
- object_urls: object_urls,
78
- disabled: rec['disabled'] == 'f' ? false : true,
79
- force: force
80
- )
81
- end
82
-
83
40
  alias message_type name
84
41
 
85
- def init_message(task_id:, schema: nil, table: nil, object_urls: nil, disabled: false, force: false)
86
- @id = task_id
42
+ def init_message(task_id:, force: false)
43
+ @task_id = task_id
87
44
  @force = force
88
-
89
- # Effective only for queue reader process
90
- @schema = schema
91
- @table = table
92
- @object_urls = object_urls
93
- @disabled = disabled
94
45
  end
95
46
 
96
- attr_reader :id
47
+ attr_reader :task_id
97
48
 
98
49
  def force?
99
50
  !!@force
100
51
  end
101
52
 
102
- #
103
- # For writer only
104
- #
105
-
106
- attr_reader :schema, :table, :object_urls, :disabled
107
-
108
- def qualified_name
109
- "#{@schema}.#{@table}"
110
- end
111
-
112
53
  def body
113
54
  obj = super
114
- obj['taskId'] = @id
115
- obj['schemaName'] = @schema
116
- obj['tableName'] = @table
117
- obj['objectUrls'] = @object_urls
118
- obj['disabled'] = @disabled
119
- obj['force'] = @force
55
+ obj['taskId'] = @task_id
56
+ obj['force'] = true if @force
120
57
  obj
121
58
  end
122
59