bricolage-streamingload 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/bricolage-streaming-loader +2 -2
- data/lib/bricolage/sqsmock.rb +0 -1
- data/lib/bricolage/streamingload/dispatcher.rb +2 -1
- data/lib/bricolage/streamingload/job.rb +387 -0
- data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
- data/lib/bricolage/streamingload/manifest.rb +7 -1
- data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
- data/lib/bricolage/streamingload/task.rb +5 -68
- data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/test/streamingload/test_dispatcher.rb +6 -6
- data/test/streamingload/test_job.rb +438 -0
- metadata +8 -9
- data/lib/bricolage/nulllogger.rb +0 -20
- data/lib/bricolage/snsdatasource.rb +0 -40
- data/lib/bricolage/streamingload/loader.rb +0 -158
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a33b6a5561c4bf69b725a96bca4abf75f06fe8a
|
4
|
+
data.tar.gz: 9c6357983ba1fea216fd3e0931d08f549f610320
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e701e7aa67d9a9b6503b436342a5d2ef3b1edb49513fa6881bf4c9b758e5d44f50a7b444b950527518187f3c8d8da896ee7fc597d9d0878611c8c34c2dbb3a19
|
7
|
+
data.tar.gz: 77b535d15e8e0724c054655e8f351b7620454f08e67a86a5c61f01b8bb623361b22344b93d590608538ee59e9809886e64ae48ec3b80f8cf61570897e3814e41
|
data/lib/bricolage/sqsmock.rb
CHANGED
@@ -27,7 +27,7 @@ module Bricolage
|
|
27
27
|
end
|
28
28
|
config_path, * = opts.rest_arguments
|
29
29
|
config = YAML.load(File.read(config_path))
|
30
|
-
log = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
|
30
|
+
log = opts.log_file_path ? new_logger(File.expand_path(opts.log_file_path), config) : nil
|
31
31
|
ctx = Context.for_application('.', environment: opts.environment, logger: log)
|
32
32
|
logger = raw_logger = ctx.logger
|
33
33
|
event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds', 'sqs_event'))
|
@@ -58,6 +58,7 @@ module Bricolage
|
|
58
58
|
|
59
59
|
Process.daemon(true) if opts.daemon?
|
60
60
|
create_pid_file opts.pid_file_path if opts.pid_file_path
|
61
|
+
Dir.chdir '/'
|
61
62
|
dispatcher.event_loop
|
62
63
|
rescue Exception => e
|
63
64
|
logger.exception e
|
@@ -0,0 +1,387 @@
|
|
1
|
+
require 'bricolage/streamingload/jobparams'
|
2
|
+
require 'bricolage/streamingload/manifest'
|
3
|
+
require 'bricolage/sqlutils'
|
4
|
+
require 'socket'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
module Bricolage
|
8
|
+
|
9
|
+
module StreamingLoad
|
10
|
+
|
11
|
+
class JobCancelled < ApplicationError; end
|
12
|
+
class JobDefered < ApplicationError; end
|
13
|
+
class JobDuplicated < ApplicationError; end
|
14
|
+
|
15
|
+
class ControlConnectionFailed < JobFailure; end
|
16
|
+
class DataConnectionFailed < JobFailure; end
|
17
|
+
|
18
|
+
|
19
|
+
class Job
|
20
|
+
|
21
|
+
def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
|
22
|
+
@context = context
|
23
|
+
@ctl_ds = ctl_ds
|
24
|
+
@task_id = task_id
|
25
|
+
@force = force
|
26
|
+
@logger = logger
|
27
|
+
|
28
|
+
@task = nil
|
29
|
+
@job_id = nil
|
30
|
+
@data_ds = nil
|
31
|
+
@manifest = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# For tests
|
35
|
+
attr_reader :job_id
|
36
|
+
attr_reader :process_id
|
37
|
+
attr_reader :task
|
38
|
+
attr_reader :data_ds
|
39
|
+
attr_reader :manifest
|
40
|
+
|
41
|
+
# Returns true -> Deletes a SQS message
|
42
|
+
# Returns false -> Keeps a SQS message
|
43
|
+
def execute(fail_fast: false)
|
44
|
+
execute_task
|
45
|
+
return true
|
46
|
+
rescue JobCancelled
|
47
|
+
return true
|
48
|
+
rescue JobDuplicated
|
49
|
+
return true
|
50
|
+
rescue JobDefered
|
51
|
+
return false
|
52
|
+
rescue ControlConnectionFailed => ex
|
53
|
+
@logger.error ex.message
|
54
|
+
wait_for_connection('ctl', @ctl_ds) unless fail_fast
|
55
|
+
return false
|
56
|
+
rescue DataConnectionFailed
|
57
|
+
wait_for_connection('data', @data_ds) unless fail_fast
|
58
|
+
return false
|
59
|
+
rescue JobFailure
|
60
|
+
return false
|
61
|
+
rescue JobError
|
62
|
+
return true
|
63
|
+
rescue Exception => ex
|
64
|
+
@logger.exception ex
|
65
|
+
return true
|
66
|
+
end
|
67
|
+
|
68
|
+
MAX_RETRY = 5
|
69
|
+
|
70
|
+
def execute_task
|
71
|
+
@process_id = "#{Socket.gethostname}-#{$$}"
|
72
|
+
@logger.info "execute task: task_id=#{@task_id} force=#{@force} process_id=#{@process_id}"
|
73
|
+
ctl = ControlConnection.new(@ctl_ds, @logger)
|
74
|
+
|
75
|
+
ctl.open {
|
76
|
+
@task = ctl.load_task(@task_id)
|
77
|
+
@logger.info "task details: task_id=#{@task_id} table=#{@task.schema_name}.#{@task.table_name}"
|
78
|
+
if @task.disabled
|
79
|
+
# We do not record disabled job in the DB.
|
80
|
+
@logger.info "task is disabled; defer task: task_id=#{@task_id}"
|
81
|
+
raise JobDefered, "defered: task_id=#{@task_id}"
|
82
|
+
end
|
83
|
+
|
84
|
+
@job_id = ctl.begin_job(@task_id, @process_id, @force)
|
85
|
+
unless @job_id
|
86
|
+
@logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
|
87
|
+
ctl.commit_duplicated_job @task_id, @process_id
|
88
|
+
raise JobDuplicated, "duplicated: task_id=#{@task_id}"
|
89
|
+
end
|
90
|
+
}
|
91
|
+
|
92
|
+
begin
|
93
|
+
do_load @task, @job_id
|
94
|
+
ctl.open {
|
95
|
+
ctl.commit_job @job_id, (@force ? 'forced' : nil)
|
96
|
+
}
|
97
|
+
rescue ControlConnectionFailed
|
98
|
+
raise
|
99
|
+
rescue JobFailure => ex
|
100
|
+
@logger.error ex.message
|
101
|
+
ctl.open {
|
102
|
+
fail_count = ctl.fail_count(@task_id)
|
103
|
+
final_retry = (fail_count >= MAX_RETRY)
|
104
|
+
retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
|
105
|
+
ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
|
106
|
+
raise JobCancelled, "retry count exceeds limit: task_id=#{@task_id}" if final_retry
|
107
|
+
}
|
108
|
+
raise
|
109
|
+
rescue JobError => ex
|
110
|
+
@logger.error ex.message
|
111
|
+
ctl.open {
|
112
|
+
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
113
|
+
}
|
114
|
+
raise
|
115
|
+
rescue Exception => ex
|
116
|
+
@logger.exception ex
|
117
|
+
ctl.open {
|
118
|
+
ctl.abort_job job_id, 'error', ex.message.lines.first.strip
|
119
|
+
}
|
120
|
+
raise JobError, "#{ex.class}: #{ex.message}"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def do_load(task, job_id)
|
125
|
+
params = JobParams.load(@context, task.task_class, task.schema_name, task.table_name)
|
126
|
+
@data_ds = params.ds
|
127
|
+
@manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
|
128
|
+
DataConnection.open(params.ds, @logger) {|data|
|
129
|
+
if params.enable_work_table?
|
130
|
+
data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
|
131
|
+
else
|
132
|
+
data.load_objects params.dest_table, @manifest, params.load_options_string
|
133
|
+
end
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
def wait_for_connection(type, ds)
|
138
|
+
@logger.warn "loader: #{type} DB connection lost; polling..."
|
139
|
+
start_time = Time.now
|
140
|
+
n = 0
|
141
|
+
while true
|
142
|
+
begin
|
143
|
+
ds.open {}
|
144
|
+
@logger.warn "loader: #{type} DB connection recovered; return to normal state"
|
145
|
+
return true
|
146
|
+
rescue ConnectionError
|
147
|
+
end
|
148
|
+
sleep 15
|
149
|
+
n += 1
|
150
|
+
if n == 120 # 30 min
|
151
|
+
# Could not get a connection in 30 minutes, now we return to the queue loop.
|
152
|
+
# Next job may fail too, but we should not stop to receive the task queue too long,
|
153
|
+
# because it contains control tasks.
|
154
|
+
@logger.warn "loader: #{type} DB connection still failing (since #{start_time}); give up."
|
155
|
+
return false
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
|
161
|
+
class DataConnection
|
162
|
+
|
163
|
+
include SQLUtils
|
164
|
+
|
165
|
+
def DataConnection.open(ds, logger = ds.logger, &block)
|
166
|
+
new(ds, logger).open(&block)
|
167
|
+
end
|
168
|
+
|
169
|
+
def initialize(ds, logger = ds.logger)
|
170
|
+
@ds = ds
|
171
|
+
@connection = nil
|
172
|
+
@logger = logger
|
173
|
+
end
|
174
|
+
|
175
|
+
def open(&block)
|
176
|
+
@ds.open {|conn|
|
177
|
+
@connection = conn
|
178
|
+
yield self
|
179
|
+
}
|
180
|
+
rescue ConnectionError => ex
|
181
|
+
raise DataConnectionFailed, "data connection failed: #{ex.message}"
|
182
|
+
end
|
183
|
+
|
184
|
+
def load_with_work_table(work_table, manifest, options, sql_source)
|
185
|
+
@connection.transaction {|txn|
|
186
|
+
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
187
|
+
# from the second time. So don't worry about DELETE cost here.
|
188
|
+
@connection.execute("delete from #{work_table}")
|
189
|
+
load_objects work_table, manifest, options
|
190
|
+
@connection.execute sql_source
|
191
|
+
txn.truncate_and_commit work_table
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
195
|
+
def load_objects(dest_table, manifest, options)
|
196
|
+
@connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
|
197
|
+
copy #{dest_table}
|
198
|
+
from #{s manifest.url}
|
199
|
+
credentials #{s manifest.credential_string}
|
200
|
+
manifest
|
201
|
+
statupdate false
|
202
|
+
compupdate false
|
203
|
+
#{options}
|
204
|
+
;
|
205
|
+
EndSQL
|
206
|
+
@logger.info "load succeeded: #{manifest.url}"
|
207
|
+
end
|
208
|
+
|
209
|
+
end # class DataConnection
|
210
|
+
|
211
|
+
|
212
|
+
class ControlConnection
|
213
|
+
|
214
|
+
include SQLUtils
|
215
|
+
|
216
|
+
def ControlConnection.open(ds, logger = ds.logger, &block)
|
217
|
+
new(ds, logger).open(&block)
|
218
|
+
end
|
219
|
+
|
220
|
+
def initialize(ds, logger = ds.logger)
|
221
|
+
@ds = ds
|
222
|
+
@connection = nil
|
223
|
+
end
|
224
|
+
|
225
|
+
def open(&block)
|
226
|
+
@ds.open {|conn|
|
227
|
+
@connection = conn
|
228
|
+
yield self
|
229
|
+
}
|
230
|
+
rescue ConnectionError => ex
|
231
|
+
raise ControlConnectionFailed, "control connection failed: #{ex.message}"
|
232
|
+
end
|
233
|
+
|
234
|
+
TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
|
235
|
+
|
236
|
+
def load_task(task_id)
|
237
|
+
rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
|
238
|
+
select
|
239
|
+
tsk.task_class
|
240
|
+
, tbl.schema_name
|
241
|
+
, tbl.table_name
|
242
|
+
, tbl.disabled
|
243
|
+
from
|
244
|
+
strload_tasks tsk
|
245
|
+
inner join strload_tables tbl using (table_id)
|
246
|
+
where
|
247
|
+
tsk.task_id = #{task_id}
|
248
|
+
;
|
249
|
+
EndSQL
|
250
|
+
TaskInfo.new(
|
251
|
+
task_id,
|
252
|
+
rec['task_class'],
|
253
|
+
rec['schema_name'],
|
254
|
+
rec['table_name'],
|
255
|
+
(rec['disabled'] != 'f'),
|
256
|
+
load_object_urls(task_id)
|
257
|
+
)
|
258
|
+
end
|
259
|
+
|
260
|
+
def load_object_urls(task_id)
|
261
|
+
urls = @connection.query_values(<<-EndSQL)
|
262
|
+
select
|
263
|
+
o.object_url
|
264
|
+
from
|
265
|
+
strload_tasks t
|
266
|
+
inner join strload_task_objects tob using (task_id)
|
267
|
+
inner join strload_objects o using (object_id)
|
268
|
+
where
|
269
|
+
t.task_id = #{task_id}
|
270
|
+
;
|
271
|
+
EndSQL
|
272
|
+
urls
|
273
|
+
end
|
274
|
+
|
275
|
+
def begin_job(task_id, process_id, force)
|
276
|
+
job_id = @connection.query_value(<<-EndSQL)
|
277
|
+
insert into strload_jobs
|
278
|
+
( task_id
|
279
|
+
, process_id
|
280
|
+
, status
|
281
|
+
, start_time
|
282
|
+
)
|
283
|
+
select
|
284
|
+
task_id
|
285
|
+
, #{s process_id}
|
286
|
+
, 'running'
|
287
|
+
, current_timestamp
|
288
|
+
from
|
289
|
+
strload_tasks
|
290
|
+
where
|
291
|
+
task_id = #{task_id}
|
292
|
+
and (#{force ? 'true' : 'false'} or task_id not in (select task_id from strload_jobs where status = 'success'))
|
293
|
+
returning job_id
|
294
|
+
;
|
295
|
+
EndSQL
|
296
|
+
return job_id ? job_id.to_i : nil
|
297
|
+
end
|
298
|
+
|
299
|
+
def fail_count(task_id)
|
300
|
+
statuses = @connection.query_values(<<-EndSQL)
|
301
|
+
select
|
302
|
+
j.status
|
303
|
+
from
|
304
|
+
strload_tasks t
|
305
|
+
inner join strload_jobs j using (task_id)
|
306
|
+
where
|
307
|
+
t.task_id = #{task_id}
|
308
|
+
order by
|
309
|
+
j.job_id desc
|
310
|
+
EndSQL
|
311
|
+
statuses.shift if statuses.first == 'running' # current job
|
312
|
+
statuses.take_while {|st| %w[failure error].include?(st) }.size
|
313
|
+
end
|
314
|
+
|
315
|
+
def commit_job(job_id, message = nil)
|
316
|
+
@connection.transaction {|txn|
|
317
|
+
write_job_result job_id, 'success', (message || '')
|
318
|
+
update_loaded_flag job_id
|
319
|
+
}
|
320
|
+
end
|
321
|
+
|
322
|
+
def abort_job(job_id, status, message)
|
323
|
+
write_job_result(job_id, status, message)
|
324
|
+
end
|
325
|
+
|
326
|
+
MAX_MESSAGE_LENGTH = 1000
|
327
|
+
|
328
|
+
def write_job_result(job_id, status, message)
|
329
|
+
@connection.execute(<<-EndSQL)
|
330
|
+
update
|
331
|
+
strload_jobs
|
332
|
+
set
|
333
|
+
(status, finish_time, message) = (#{s status}, current_timestamp, #{s message[0, MAX_MESSAGE_LENGTH]})
|
334
|
+
where
|
335
|
+
job_id = #{job_id}
|
336
|
+
;
|
337
|
+
EndSQL
|
338
|
+
end
|
339
|
+
|
340
|
+
def update_loaded_flag(job_id)
|
341
|
+
@connection.execute(<<-EndSQL)
|
342
|
+
update
|
343
|
+
strload_objects
|
344
|
+
set
|
345
|
+
loaded = true
|
346
|
+
where
|
347
|
+
object_id in (
|
348
|
+
select
|
349
|
+
object_id
|
350
|
+
from
|
351
|
+
strload_task_objects
|
352
|
+
where task_id = (select task_id from strload_jobs where job_id = #{job_id})
|
353
|
+
)
|
354
|
+
;
|
355
|
+
EndSQL
|
356
|
+
end
|
357
|
+
|
358
|
+
def commit_duplicated_job(task_id, process_id)
|
359
|
+
job_id = @connection.query_value(<<-EndSQL)
|
360
|
+
insert into strload_jobs
|
361
|
+
( task_id
|
362
|
+
, process_id
|
363
|
+
, status
|
364
|
+
, start_time
|
365
|
+
, finish_time
|
366
|
+
, message
|
367
|
+
)
|
368
|
+
select
|
369
|
+
#{task_id}
|
370
|
+
, #{s process_id}
|
371
|
+
, 'duplicated'
|
372
|
+
, current_timestamp
|
373
|
+
, current_timestamp
|
374
|
+
, ''
|
375
|
+
returning job_id
|
376
|
+
;
|
377
|
+
EndSQL
|
378
|
+
return job_id
|
379
|
+
end
|
380
|
+
|
381
|
+
end # class ControlConnection
|
382
|
+
|
383
|
+
end # class Job
|
384
|
+
|
385
|
+
end # module StreamingLoad
|
386
|
+
|
387
|
+
end # module Bricolage
|
@@ -6,69 +6,44 @@ module Bricolage
|
|
6
6
|
|
7
7
|
module StreamingLoad
|
8
8
|
|
9
|
-
class
|
9
|
+
class JobParams
|
10
10
|
|
11
|
-
def
|
12
|
-
job =
|
13
|
-
schema = resolve_schema(ctx,
|
14
|
-
job.provide_default 'dest-table', "#{schema}.#{
|
11
|
+
def JobParams.load(ctx, job_class, schema, table)
|
12
|
+
job = load_bricolage_job(ctx, job_class, schema, table)
|
13
|
+
schema = resolve_schema(ctx, schema)
|
14
|
+
job.provide_default 'dest-table', "#{schema}.#{table}"
|
15
15
|
#job.provide_sql_file_by_job_id # FIXME: provide only when exist
|
16
16
|
job.compile
|
17
|
-
new(
|
17
|
+
new(job)
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
if job_file = find_job_file(ctx,
|
20
|
+
def JobParams.load_bricolage_job(ctx, job_class, schema, table)
|
21
|
+
if job_file = find_job_file(ctx, schema, table)
|
22
22
|
ctx.logger.debug "using .job file: #{job_file}"
|
23
|
-
Job.load_file(job_file, ctx.subsystem(
|
23
|
+
Bricolage::Job.load_file(job_file, ctx.subsystem(schema))
|
24
24
|
else
|
25
25
|
ctx.logger.debug "using default job parameters (no .job file)"
|
26
|
-
Job.instantiate(
|
26
|
+
Bricolage::Job.instantiate(table, job_class, ctx).tap {|job|
|
27
27
|
job.bind_parameters({})
|
28
28
|
}
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
32
|
+
def JobParams.find_job_file(ctx, schema, table)
|
33
33
|
paths = Dir.glob("#{ctx.home_path}/#{schema}/#{table}.*")
|
34
34
|
paths.select {|path| File.extname(path) == '.job' }.sort.first
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
37
|
+
def JobParams.resolve_schema(ctx, schema)
|
38
38
|
ctx.global_variables["#{schema}_schema"] || schema
|
39
39
|
end
|
40
40
|
private_class_method :resolve_schema
|
41
41
|
|
42
|
-
def initialize(
|
43
|
-
@task = task
|
42
|
+
def initialize(job)
|
44
43
|
@job = job
|
45
44
|
@params = job.params
|
46
45
|
end
|
47
46
|
|
48
|
-
def task_id
|
49
|
-
@task.id
|
50
|
-
end
|
51
|
-
|
52
|
-
def task_id
|
53
|
-
@task.id
|
54
|
-
end
|
55
|
-
|
56
|
-
def schema
|
57
|
-
@task.schema
|
58
|
-
end
|
59
|
-
|
60
|
-
def table
|
61
|
-
@task.table
|
62
|
-
end
|
63
|
-
|
64
|
-
def force?
|
65
|
-
@task.force?
|
66
|
-
end
|
67
|
-
|
68
|
-
def object_urls
|
69
|
-
@task.object_urls
|
70
|
-
end
|
71
|
-
|
72
47
|
def ds
|
73
48
|
@params['redshift-ds']
|
74
49
|
end
|
@@ -101,7 +76,7 @@ module Bricolage
|
|
101
76
|
end
|
102
77
|
|
103
78
|
|
104
|
-
class
|
79
|
+
class StreamingLoadV3Job < RubyJobClass
|
105
80
|
|
106
81
|
job_class_id 'streaming_load_v3'
|
107
82
|
|
@@ -4,7 +4,7 @@ module Bricolage
|
|
4
4
|
|
5
5
|
class ManifestFile
|
6
6
|
|
7
|
-
def ManifestFile.create(ds
|
7
|
+
def ManifestFile.create(ds:, job_id:, object_urls:, logger:, noop: false, &block)
|
8
8
|
manifest = new(ds, job_id, object_urls, logger: logger, noop: noop)
|
9
9
|
if block
|
10
10
|
manifest.create_temporary(&block)
|
@@ -49,11 +49,17 @@ module Bricolage
|
|
49
49
|
def put
|
50
50
|
@logger.info "s3: put: #{url}"
|
51
51
|
@ds.object(name).put(body: content) unless @noop
|
52
|
+
rescue Aws::S3::Errors::ServiceError => ex
|
53
|
+
@logger.exception ex
|
54
|
+
raise S3Exception.wrap(ex)
|
52
55
|
end
|
53
56
|
|
54
57
|
def delete
|
55
58
|
@logger.info "s3: delete: #{url}"
|
56
59
|
@ds.object(name).delete unless @noop
|
60
|
+
rescue Aws::S3::Errors::ServiceError => ex
|
61
|
+
@logger.exception ex
|
62
|
+
raise S3Exception.wrap(ex)
|
57
63
|
end
|
58
64
|
|
59
65
|
def create_temporary
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'bricolage/sqsdatasource'
|
2
|
-
require 'json'
|
3
2
|
|
4
3
|
module Bricolage
|
5
4
|
|
@@ -38,85 +37,23 @@ module Bricolage
|
|
38
37
|
}
|
39
38
|
end
|
40
39
|
|
41
|
-
def LoadTask.load(conn, task_id, force: false)
|
42
|
-
rec = conn.query_row(<<-EndSQL)
|
43
|
-
select
|
44
|
-
task_class
|
45
|
-
, tbl.schema_name
|
46
|
-
, tbl.table_name
|
47
|
-
, disabled
|
48
|
-
from
|
49
|
-
strload_tasks tsk
|
50
|
-
inner join strload_tables tbl
|
51
|
-
using(table_id)
|
52
|
-
where
|
53
|
-
task_id = #{task_id}
|
54
|
-
;
|
55
|
-
EndSQL
|
56
|
-
object_urls = conn.query_values(<<-EndSQL)
|
57
|
-
select
|
58
|
-
object_url
|
59
|
-
from
|
60
|
-
strload_task_objects
|
61
|
-
inner join strload_objects
|
62
|
-
using (object_id)
|
63
|
-
inner join strload_tasks
|
64
|
-
using (task_id)
|
65
|
-
where
|
66
|
-
task_id = #{task_id}
|
67
|
-
;
|
68
|
-
EndSQL
|
69
|
-
return nil unless rec
|
70
|
-
new(
|
71
|
-
name: rec['task_class'],
|
72
|
-
time: nil,
|
73
|
-
source: nil,
|
74
|
-
task_id: task_id,
|
75
|
-
schema: rec['schema_name'],
|
76
|
-
table: rec['table_name'],
|
77
|
-
object_urls: object_urls,
|
78
|
-
disabled: rec['disabled'] == 'f' ? false : true,
|
79
|
-
force: force
|
80
|
-
)
|
81
|
-
end
|
82
|
-
|
83
40
|
alias message_type name
|
84
41
|
|
85
|
-
def init_message(task_id:,
|
86
|
-
@
|
42
|
+
def init_message(task_id:, force: false)
|
43
|
+
@task_id = task_id
|
87
44
|
@force = force
|
88
|
-
|
89
|
-
# Effective only for queue reader process
|
90
|
-
@schema = schema
|
91
|
-
@table = table
|
92
|
-
@object_urls = object_urls
|
93
|
-
@disabled = disabled
|
94
45
|
end
|
95
46
|
|
96
|
-
attr_reader :
|
47
|
+
attr_reader :task_id
|
97
48
|
|
98
49
|
def force?
|
99
50
|
!!@force
|
100
51
|
end
|
101
52
|
|
102
|
-
#
|
103
|
-
# For writer only
|
104
|
-
#
|
105
|
-
|
106
|
-
attr_reader :schema, :table, :object_urls, :disabled
|
107
|
-
|
108
|
-
def qualified_name
|
109
|
-
"#{@schema}.#{@table}"
|
110
|
-
end
|
111
|
-
|
112
53
|
def body
|
113
54
|
obj = super
|
114
|
-
obj['taskId'] = @
|
115
|
-
obj['
|
116
|
-
obj['tableName'] = @table
|
117
|
-
obj['objectUrls'] = @object_urls
|
118
|
-
obj['disabled'] = @disabled
|
119
|
-
obj['force'] = @force
|
55
|
+
obj['taskId'] = @task_id
|
56
|
+
obj['force'] = true if @force
|
120
57
|
obj
|
121
58
|
end
|
122
59
|
|