bricolage-streamingload 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14a3e79cc5b39bd90b848b1d3b030bf244ddee15
4
- data.tar.gz: b82cce95ef1573cb2dd0fd74214777afd3f972ae
3
+ metadata.gz: 8a33b6a5561c4bf69b725a96bca4abf75f06fe8a
4
+ data.tar.gz: 9c6357983ba1fea216fd3e0931d08f549f610320
5
5
  SHA512:
6
- metadata.gz: 6daab7f6d72fd79261d047ac8f5a0ab0f8dcdd0fb392b1ee013dd6c0616fe90b83ab3637274dc4e7fdab23c81b3a6d3888ce614083ff1ca07b9778017aab6e76
7
- data.tar.gz: 1c0041319e7091f869f51c182dad162e81c37f4683d990b5cd224ff5ec93ec004936cc22d44eb961e9628e8962eaaf0c253c30a8036f44331ad5970bae5a3fc6
6
+ metadata.gz: e701e7aa67d9a9b6503b436342a5d2ef3b1edb49513fa6881bf4c9b758e5d44f50a7b444b950527518187f3c8d8da896ee7fc597d9d0878611c8c34c2dbb3a19
7
+ data.tar.gz: 77b535d15e8e0724c054655e8f351b7620454f08e67a86a5c61f01b8bb623361b22344b93d590608538ee59e9809886e64ae48ec3b80f8cf61570897e3814e41
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  Bundler.require(:default) if defined?(Bundler)
4
- require 'bricolage/streamingload/loaderservice'
4
+ require 'bricolage/streamingload/taskhandler'
5
5
 
6
- Bricolage::StreamingLoad::LoaderService.main
6
+ Bricolage::StreamingLoad::TaskHandler.main
@@ -1,5 +1,4 @@
1
1
  require 'bricolage/sqsdatasource'
2
- require 'bricolage/nulllogger'
3
2
  require 'json'
4
3
 
5
4
  module Bricolage
@@ -27,7 +27,7 @@ module Bricolage
27
27
  end
28
28
  config_path, * = opts.rest_arguments
29
29
  config = YAML.load(File.read(config_path))
30
- log = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
30
+ log = opts.log_file_path ? new_logger(File.expand_path(opts.log_file_path), config) : nil
31
31
  ctx = Context.for_application('.', environment: opts.environment, logger: log)
32
32
  logger = raw_logger = ctx.logger
33
33
  event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds', 'sqs_event'))
@@ -58,6 +58,7 @@ module Bricolage
58
58
 
59
59
  Process.daemon(true) if opts.daemon?
60
60
  create_pid_file opts.pid_file_path if opts.pid_file_path
61
+ Dir.chdir '/'
61
62
  dispatcher.event_loop
62
63
  rescue Exception => e
63
64
  logger.exception e
@@ -0,0 +1,387 @@
1
+ require 'bricolage/streamingload/jobparams'
2
+ require 'bricolage/streamingload/manifest'
3
+ require 'bricolage/sqlutils'
4
+ require 'socket'
5
+ require 'json'
6
+
7
+ module Bricolage
8
+
9
+ module StreamingLoad
10
+
11
+ class JobCancelled < ApplicationError; end
12
+ class JobDefered < ApplicationError; end
13
+ class JobDuplicated < ApplicationError; end
14
+
15
+ class ControlConnectionFailed < JobFailure; end
16
+ class DataConnectionFailed < JobFailure; end
17
+
18
+
19
+ class Job
20
+
21
+ def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
22
+ @context = context
23
+ @ctl_ds = ctl_ds
24
+ @task_id = task_id
25
+ @force = force
26
+ @logger = logger
27
+
28
+ @task = nil
29
+ @job_id = nil
30
+ @data_ds = nil
31
+ @manifest = nil
32
+ end
33
+
34
+ # For tests
35
+ attr_reader :job_id
36
+ attr_reader :process_id
37
+ attr_reader :task
38
+ attr_reader :data_ds
39
+ attr_reader :manifest
40
+
41
+ # Returns true -> Deletes a SQS message
42
+ # Returns false -> Keeps a SQS message
43
+ def execute(fail_fast: false)
44
+ execute_task
45
+ return true
46
+ rescue JobCancelled
47
+ return true
48
+ rescue JobDuplicated
49
+ return true
50
+ rescue JobDefered
51
+ return false
52
+ rescue ControlConnectionFailed => ex
53
+ @logger.error ex.message
54
+ wait_for_connection('ctl', @ctl_ds) unless fail_fast
55
+ return false
56
+ rescue DataConnectionFailed
57
+ wait_for_connection('data', @data_ds) unless fail_fast
58
+ return false
59
+ rescue JobFailure
60
+ return false
61
+ rescue JobError
62
+ return true
63
+ rescue Exception => ex
64
+ @logger.exception ex
65
+ return true
66
+ end
67
+
68
+ MAX_RETRY = 5
69
+
70
+ def execute_task
71
+ @process_id = "#{Socket.gethostname}-#{$$}"
72
+ @logger.info "execute task: task_id=#{@task_id} force=#{@force} process_id=#{@process_id}"
73
+ ctl = ControlConnection.new(@ctl_ds, @logger)
74
+
75
+ ctl.open {
76
+ @task = ctl.load_task(@task_id)
77
+ @logger.info "task details: task_id=#{@task_id} table=#{@task.schema_name}.#{@task.table_name}"
78
+ if @task.disabled
79
+ # We do not record disabled job in the DB.
80
+ @logger.info "task is disabled; defer task: task_id=#{@task_id}"
81
+ raise JobDefered, "defered: task_id=#{@task_id}"
82
+ end
83
+
84
+ @job_id = ctl.begin_job(@task_id, @process_id, @force)
85
+ unless @job_id
86
+ @logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
87
+ ctl.commit_duplicated_job @task_id, @process_id
88
+ raise JobDuplicated, "duplicated: task_id=#{@task_id}"
89
+ end
90
+ }
91
+
92
+ begin
93
+ do_load @task, @job_id
94
+ ctl.open {
95
+ ctl.commit_job @job_id, (@force ? 'forced' : nil)
96
+ }
97
+ rescue ControlConnectionFailed
98
+ raise
99
+ rescue JobFailure => ex
100
+ @logger.error ex.message
101
+ ctl.open {
102
+ fail_count = ctl.fail_count(@task_id)
103
+ final_retry = (fail_count >= MAX_RETRY)
104
+ retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
105
+ ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
106
+ raise JobCancelled, "retry count exceeds limit: task_id=#{@task_id}" if final_retry
107
+ }
108
+ raise
109
+ rescue JobError => ex
110
+ @logger.error ex.message
111
+ ctl.open {
112
+ ctl.abort_job job_id, 'error', ex.message.lines.first.strip
113
+ }
114
+ raise
115
+ rescue Exception => ex
116
+ @logger.exception ex
117
+ ctl.open {
118
+ ctl.abort_job job_id, 'error', ex.message.lines.first.strip
119
+ }
120
+ raise JobError, "#{ex.class}: #{ex.message}"
121
+ end
122
+ end
123
+
124
+ def do_load(task, job_id)
125
+ params = JobParams.load(@context, task.task_class, task.schema_name, task.table_name)
126
+ @data_ds = params.ds
127
+ @manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
128
+ DataConnection.open(params.ds, @logger) {|data|
129
+ if params.enable_work_table?
130
+ data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
131
+ else
132
+ data.load_objects params.dest_table, @manifest, params.load_options_string
133
+ end
134
+ }
135
+ end
136
+
137
+ def wait_for_connection(type, ds)
138
+ @logger.warn "loader: #{type} DB connection lost; polling..."
139
+ start_time = Time.now
140
+ n = 0
141
+ while true
142
+ begin
143
+ ds.open {}
144
+ @logger.warn "loader: #{type} DB connection recovered; return to normal state"
145
+ return true
146
+ rescue ConnectionError
147
+ end
148
+ sleep 15
149
+ n += 1
150
+ if n == 120 # 30 min
151
+ # Could not get a connection in 30 minutes, now we return to the queue loop.
152
+ # Next job may fail too, but we should not stop to receive the task queue too long,
153
+ # because it contains control tasks.
154
+ @logger.warn "loader: #{type} DB connection still failing (since #{start_time}); give up."
155
+ return false
156
+ end
157
+ end
158
+ end
159
+
160
+
161
+ class DataConnection
162
+
163
+ include SQLUtils
164
+
165
+ def DataConnection.open(ds, logger = ds.logger, &block)
166
+ new(ds, logger).open(&block)
167
+ end
168
+
169
+ def initialize(ds, logger = ds.logger)
170
+ @ds = ds
171
+ @connection = nil
172
+ @logger = logger
173
+ end
174
+
175
+ def open(&block)
176
+ @ds.open {|conn|
177
+ @connection = conn
178
+ yield self
179
+ }
180
+ rescue ConnectionError => ex
181
+ raise DataConnectionFailed, "data connection failed: #{ex.message}"
182
+ end
183
+
184
+ def load_with_work_table(work_table, manifest, options, sql_source)
185
+ @connection.transaction {|txn|
186
+ # NOTE: This transaction ends with truncation, this DELETE does nothing
187
+ # from the second time. So don't worry about DELETE cost here.
188
+ @connection.execute("delete from #{work_table}")
189
+ load_objects work_table, manifest, options
190
+ @connection.execute sql_source
191
+ txn.truncate_and_commit work_table
192
+ }
193
+ end
194
+
195
+ def load_objects(dest_table, manifest, options)
196
+ @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
197
+ copy #{dest_table}
198
+ from #{s manifest.url}
199
+ credentials #{s manifest.credential_string}
200
+ manifest
201
+ statupdate false
202
+ compupdate false
203
+ #{options}
204
+ ;
205
+ EndSQL
206
+ @logger.info "load succeeded: #{manifest.url}"
207
+ end
208
+
209
+ end # class DataConnection
210
+
211
+
212
+ class ControlConnection
213
+
214
+ include SQLUtils
215
+
216
+ def ControlConnection.open(ds, logger = ds.logger, &block)
217
+ new(ds, logger).open(&block)
218
+ end
219
+
220
+ def initialize(ds, logger = ds.logger)
221
+ @ds = ds
222
+ @connection = nil
223
+ end
224
+
225
+ def open(&block)
226
+ @ds.open {|conn|
227
+ @connection = conn
228
+ yield self
229
+ }
230
+ rescue ConnectionError => ex
231
+ raise ControlConnectionFailed, "control connection failed: #{ex.message}"
232
+ end
233
+
234
+ TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
235
+
236
+ def load_task(task_id)
237
+ rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
238
+ select
239
+ tsk.task_class
240
+ , tbl.schema_name
241
+ , tbl.table_name
242
+ , tbl.disabled
243
+ from
244
+ strload_tasks tsk
245
+ inner join strload_tables tbl using (table_id)
246
+ where
247
+ tsk.task_id = #{task_id}
248
+ ;
249
+ EndSQL
250
+ TaskInfo.new(
251
+ task_id,
252
+ rec['task_class'],
253
+ rec['schema_name'],
254
+ rec['table_name'],
255
+ (rec['disabled'] != 'f'),
256
+ load_object_urls(task_id)
257
+ )
258
+ end
259
+
260
+ def load_object_urls(task_id)
261
+ urls = @connection.query_values(<<-EndSQL)
262
+ select
263
+ o.object_url
264
+ from
265
+ strload_tasks t
266
+ inner join strload_task_objects tob using (task_id)
267
+ inner join strload_objects o using (object_id)
268
+ where
269
+ t.task_id = #{task_id}
270
+ ;
271
+ EndSQL
272
+ urls
273
+ end
274
+
275
+ def begin_job(task_id, process_id, force)
276
+ job_id = @connection.query_value(<<-EndSQL)
277
+ insert into strload_jobs
278
+ ( task_id
279
+ , process_id
280
+ , status
281
+ , start_time
282
+ )
283
+ select
284
+ task_id
285
+ , #{s process_id}
286
+ , 'running'
287
+ , current_timestamp
288
+ from
289
+ strload_tasks
290
+ where
291
+ task_id = #{task_id}
292
+ and (#{force ? 'true' : 'false'} or task_id not in (select task_id from strload_jobs where status = 'success'))
293
+ returning job_id
294
+ ;
295
+ EndSQL
296
+ return job_id ? job_id.to_i : nil
297
+ end
298
+
299
+ def fail_count(task_id)
300
+ statuses = @connection.query_values(<<-EndSQL)
301
+ select
302
+ j.status
303
+ from
304
+ strload_tasks t
305
+ inner join strload_jobs j using (task_id)
306
+ where
307
+ t.task_id = #{task_id}
308
+ order by
309
+ j.job_id desc
310
+ EndSQL
311
+ statuses.shift if statuses.first == 'running' # current job
312
+ statuses.take_while {|st| %w[failure error].include?(st) }.size
313
+ end
314
+
315
+ def commit_job(job_id, message = nil)
316
+ @connection.transaction {|txn|
317
+ write_job_result job_id, 'success', (message || '')
318
+ update_loaded_flag job_id
319
+ }
320
+ end
321
+
322
+ def abort_job(job_id, status, message)
323
+ write_job_result(job_id, status, message)
324
+ end
325
+
326
+ MAX_MESSAGE_LENGTH = 1000
327
+
328
+ def write_job_result(job_id, status, message)
329
+ @connection.execute(<<-EndSQL)
330
+ update
331
+ strload_jobs
332
+ set
333
+ (status, finish_time, message) = (#{s status}, current_timestamp, #{s message[0, MAX_MESSAGE_LENGTH]})
334
+ where
335
+ job_id = #{job_id}
336
+ ;
337
+ EndSQL
338
+ end
339
+
340
+ def update_loaded_flag(job_id)
341
+ @connection.execute(<<-EndSQL)
342
+ update
343
+ strload_objects
344
+ set
345
+ loaded = true
346
+ where
347
+ object_id in (
348
+ select
349
+ object_id
350
+ from
351
+ strload_task_objects
352
+ where task_id = (select task_id from strload_jobs where job_id = #{job_id})
353
+ )
354
+ ;
355
+ EndSQL
356
+ end
357
+
358
+ def commit_duplicated_job(task_id, process_id)
359
+ job_id = @connection.query_value(<<-EndSQL)
360
+ insert into strload_jobs
361
+ ( task_id
362
+ , process_id
363
+ , status
364
+ , start_time
365
+ , finish_time
366
+ , message
367
+ )
368
+ select
369
+ #{task_id}
370
+ , #{s process_id}
371
+ , 'duplicated'
372
+ , current_timestamp
373
+ , current_timestamp
374
+ , ''
375
+ returning job_id
376
+ ;
377
+ EndSQL
378
+ return job_id
379
+ end
380
+
381
+ end # class ControlConnection
382
+
383
+ end # class Job
384
+
385
+ end # module StreamingLoad
386
+
387
+ end # module Bricolage
@@ -6,69 +6,44 @@ module Bricolage
6
6
 
7
7
  module StreamingLoad
8
8
 
9
- class LoaderParams
9
+ class JobParams
10
10
 
11
- def LoaderParams.load(ctx, task)
12
- job = load_job(ctx, task)
13
- schema = resolve_schema(ctx, task.schema)
14
- job.provide_default 'dest-table', "#{schema}.#{task.table}"
11
+ def JobParams.load(ctx, job_class, schema, table)
12
+ job = load_bricolage_job(ctx, job_class, schema, table)
13
+ schema = resolve_schema(ctx, schema)
14
+ job.provide_default 'dest-table', "#{schema}.#{table}"
15
15
  #job.provide_sql_file_by_job_id # FIXME: provide only when exist
16
16
  job.compile
17
- new(task, job)
17
+ new(job)
18
18
  end
19
19
 
20
- def LoaderParams.load_job(ctx, task)
21
- if job_file = find_job_file(ctx, task.schema, task.table)
20
+ def JobParams.load_bricolage_job(ctx, job_class, schema, table)
21
+ if job_file = find_job_file(ctx, schema, table)
22
22
  ctx.logger.debug "using .job file: #{job_file}"
23
- Job.load_file(job_file, ctx.subsystem(task.schema))
23
+ Bricolage::Job.load_file(job_file, ctx.subsystem(schema))
24
24
  else
25
25
  ctx.logger.debug "using default job parameters (no .job file)"
26
- Job.instantiate(task.table, 'streaming_load_v3', ctx).tap {|job|
26
+ Bricolage::Job.instantiate(table, job_class, ctx).tap {|job|
27
27
  job.bind_parameters({})
28
28
  }
29
29
  end
30
30
  end
31
31
 
32
- def LoaderParams.find_job_file(ctx, schema, table)
32
+ def JobParams.find_job_file(ctx, schema, table)
33
33
  paths = Dir.glob("#{ctx.home_path}/#{schema}/#{table}.*")
34
34
  paths.select {|path| File.extname(path) == '.job' }.sort.first
35
35
  end
36
36
 
37
- def LoaderParams.resolve_schema(ctx, schema)
37
+ def JobParams.resolve_schema(ctx, schema)
38
38
  ctx.global_variables["#{schema}_schema"] || schema
39
39
  end
40
40
  private_class_method :resolve_schema
41
41
 
42
- def initialize(task, job)
43
- @task = task
42
+ def initialize(job)
44
43
  @job = job
45
44
  @params = job.params
46
45
  end
47
46
 
48
- def task_id
49
- @task.id
50
- end
51
-
52
- def task_id
53
- @task.id
54
- end
55
-
56
- def schema
57
- @task.schema
58
- end
59
-
60
- def table
61
- @task.table
62
- end
63
-
64
- def force?
65
- @task.force?
66
- end
67
-
68
- def object_urls
69
- @task.object_urls
70
- end
71
-
72
47
  def ds
73
48
  @params['redshift-ds']
74
49
  end
@@ -101,7 +76,7 @@ module Bricolage
101
76
  end
102
77
 
103
78
 
104
- class LoaderJob < RubyJobClass
79
+ class StreamingLoadV3Job < RubyJobClass
105
80
 
106
81
  job_class_id 'streaming_load_v3'
107
82
 
@@ -4,7 +4,7 @@ module Bricolage
4
4
 
5
5
  class ManifestFile
6
6
 
7
- def ManifestFile.create(ds, job_id:, object_urls:, logger:, noop: false, &block)
7
+ def ManifestFile.create(ds:, job_id:, object_urls:, logger:, noop: false, &block)
8
8
  manifest = new(ds, job_id, object_urls, logger: logger, noop: noop)
9
9
  if block
10
10
  manifest.create_temporary(&block)
@@ -49,11 +49,17 @@ module Bricolage
49
49
  def put
50
50
  @logger.info "s3: put: #{url}"
51
51
  @ds.object(name).put(body: content) unless @noop
52
+ rescue Aws::S3::Errors::ServiceError => ex
53
+ @logger.exception ex
54
+ raise S3Exception.wrap(ex)
52
55
  end
53
56
 
54
57
  def delete
55
58
  @logger.info "s3: delete: #{url}"
56
59
  @ds.object(name).delete unless @noop
60
+ rescue Aws::S3::Errors::ServiceError => ex
61
+ @logger.exception ex
62
+ raise S3Exception.wrap(ex)
57
63
  end
58
64
 
59
65
  def create_temporary
@@ -1,8 +1,5 @@
1
1
  require 'bricolage/streamingload/task'
2
- require 'bricolage/streamingload/loaderparams'
3
2
  require 'bricolage/sqlutils'
4
- require 'json'
5
- require 'securerandom'
6
3
  require 'forwardable'
7
4
 
8
5
  module Bricolage
@@ -1,5 +1,4 @@
1
1
  require 'bricolage/sqsdatasource'
2
- require 'json'
3
2
 
4
3
  module Bricolage
5
4
 
@@ -38,85 +37,23 @@ module Bricolage
38
37
  }
39
38
  end
40
39
 
41
- def LoadTask.load(conn, task_id, force: false)
42
- rec = conn.query_row(<<-EndSQL)
43
- select
44
- task_class
45
- , tbl.schema_name
46
- , tbl.table_name
47
- , disabled
48
- from
49
- strload_tasks tsk
50
- inner join strload_tables tbl
51
- using(table_id)
52
- where
53
- task_id = #{task_id}
54
- ;
55
- EndSQL
56
- object_urls = conn.query_values(<<-EndSQL)
57
- select
58
- object_url
59
- from
60
- strload_task_objects
61
- inner join strload_objects
62
- using (object_id)
63
- inner join strload_tasks
64
- using (task_id)
65
- where
66
- task_id = #{task_id}
67
- ;
68
- EndSQL
69
- return nil unless rec
70
- new(
71
- name: rec['task_class'],
72
- time: nil,
73
- source: nil,
74
- task_id: task_id,
75
- schema: rec['schema_name'],
76
- table: rec['table_name'],
77
- object_urls: object_urls,
78
- disabled: rec['disabled'] == 'f' ? false : true,
79
- force: force
80
- )
81
- end
82
-
83
40
  alias message_type name
84
41
 
85
- def init_message(task_id:, schema: nil, table: nil, object_urls: nil, disabled: false, force: false)
86
- @id = task_id
42
+ def init_message(task_id:, force: false)
43
+ @task_id = task_id
87
44
  @force = force
88
-
89
- # Effective only for queue reader process
90
- @schema = schema
91
- @table = table
92
- @object_urls = object_urls
93
- @disabled = disabled
94
45
  end
95
46
 
96
- attr_reader :id
47
+ attr_reader :task_id
97
48
 
98
49
  def force?
99
50
  !!@force
100
51
  end
101
52
 
102
- #
103
- # For writer only
104
- #
105
-
106
- attr_reader :schema, :table, :object_urls, :disabled
107
-
108
- def qualified_name
109
- "#{@schema}.#{@table}"
110
- end
111
-
112
53
  def body
113
54
  obj = super
114
- obj['taskId'] = @id
115
- obj['schemaName'] = @schema
116
- obj['tableName'] = @table
117
- obj['objectUrls'] = @object_urls
118
- obj['disabled'] = @disabled
119
- obj['force'] = @force
55
+ obj['taskId'] = @task_id
56
+ obj['force'] = true if @force
120
57
  obj
121
58
  end
122
59