bricolage-streamingload 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/bricolage-streaming-loader +2 -2
- data/lib/bricolage/sqsmock.rb +0 -1
- data/lib/bricolage/streamingload/dispatcher.rb +2 -1
- data/lib/bricolage/streamingload/job.rb +387 -0
- data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
- data/lib/bricolage/streamingload/manifest.rb +7 -1
- data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
- data/lib/bricolage/streamingload/task.rb +5 -68
- data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/test/streamingload/test_dispatcher.rb +6 -6
- data/test/streamingload/test_job.rb +438 -0
- metadata +8 -9
- data/lib/bricolage/nulllogger.rb +0 -20
- data/lib/bricolage/snsdatasource.rb +0 -40
- data/lib/bricolage/streamingload/loader.rb +0 -158
@@ -0,0 +1,438 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bricolage/streamingload/job'
|
3
|
+
require 'bricolage/context'
|
4
|
+
require 'bricolage/logger'
|
5
|
+
|
6
|
+
module Bricolage
|
7
|
+
|
8
|
+
# FIXME: patch
|
9
|
+
class NullLogger
|
10
|
+
def log(*args) end
|
11
|
+
def add(*args) end
|
12
|
+
end
|
13
|
+
|
14
|
+
module StreamingLoad
|
15
|
+
|
16
|
+
class TestJob < Test::Unit::TestCase
|
17
|
+
|
18
|
+
test "execute_task" do
|
19
|
+
setup_context {|ctx, ctl_ds, db|
|
20
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
21
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
22
|
+
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
23
|
+
db.insert_into 'strload_objects',
|
24
|
+
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
25
|
+
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
26
|
+
|
27
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
28
|
+
job.execute_task
|
29
|
+
|
30
|
+
copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
31
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
32
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
33
|
+
assert_equal 1, job_row['task_id'].to_i
|
34
|
+
assert_equal job.process_id, job_row['process_id']
|
35
|
+
assert_equal 'success', job_row['status']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
test "execute_task (with work table)" do
|
40
|
+
setup_context {|ctx, ctl_ds, db|
|
41
|
+
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
42
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
43
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
44
|
+
db.insert_into 'strload_objects',
|
45
|
+
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
46
|
+
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
47
|
+
|
48
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
49
|
+
job.execute_task
|
50
|
+
|
51
|
+
assert_equal 'begin transaction;', job.data_ds.sql_list[0]
|
52
|
+
assert_equal 'delete from testschema.with_work_table_wk', job.data_ds.sql_list[1]
|
53
|
+
assert_equal "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;", job.data_ds.sql_list[2]
|
54
|
+
assert_equal "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n", job.data_ds.sql_list[3]
|
55
|
+
assert_equal 'truncate testschema.with_work_table_wk;', job.data_ds.sql_list[4]
|
56
|
+
|
57
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
58
|
+
assert_equal 11, job_row['task_id'].to_i
|
59
|
+
assert_equal job.process_id, job_row['process_id']
|
60
|
+
assert_equal 'success', job_row['status']
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
test "execute_task (disabled)" do
|
65
|
+
setup_context {|ctx, ctl_ds, db|
|
66
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
67
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
68
|
+
|
69
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
70
|
+
assert_raise(JobDefered) {
|
71
|
+
job.execute_task
|
72
|
+
}
|
73
|
+
count = db.query_value("select count(*) from strload_jobs")
|
74
|
+
assert_equal 0, count.to_i
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
test "execute_task (duplicated)" do
|
79
|
+
setup_context {|ctx, ctl_ds, db|
|
80
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
81
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
82
|
+
db.insert_into 'strload_jobs',
|
83
|
+
[1, 1, 'localhost-1234', 'failure', current_timestamp, current_timestamp, ''],
|
84
|
+
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
85
|
+
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
86
|
+
|
87
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
88
|
+
assert_raise(JobDuplicated) {
|
89
|
+
job.execute_task
|
90
|
+
}
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
test "execute_task (duplicated but forced)" do
|
95
|
+
setup_context {|ctx, ctl_ds, db|
|
96
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
97
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
98
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
99
|
+
db.insert_into 'strload_objects',
|
100
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
101
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
102
|
+
|
103
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
|
104
|
+
job.execute_task
|
105
|
+
|
106
|
+
copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
107
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
108
|
+
|
109
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
110
|
+
assert_equal 11, job_row['task_id'].to_i
|
111
|
+
assert_equal job.process_id, job_row['process_id']
|
112
|
+
assert_equal 'success', job_row['status']
|
113
|
+
assert(/forced/ =~ job_row['message'])
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
test "execute_task (load fails / first time)" do
|
118
|
+
setup_context {|ctx, ctl_ds, db|
|
119
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
120
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
121
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
122
|
+
db.insert_into 'strload_objects',
|
123
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
124
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
125
|
+
|
126
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
127
|
+
assert_raise(JobFailure) {
|
128
|
+
job.execute_task
|
129
|
+
}
|
130
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
131
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
132
|
+
|
133
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
134
|
+
assert_equal 11, job_row['task_id'].to_i
|
135
|
+
assert_equal job.process_id, job_row['process_id']
|
136
|
+
assert_equal 'failure', job_row['status']
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
test "execute_task (load fails / nth time)" do
|
141
|
+
setup_context {|ctx, ctl_ds, db|
|
142
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
143
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
144
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
145
|
+
db.insert_into 'strload_objects',
|
146
|
+
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
147
|
+
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
148
|
+
db.insert_into 'strload_jobs',
|
149
|
+
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
150
|
+
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
151
|
+
|
152
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
153
|
+
assert_raise(JobFailure) {
|
154
|
+
job.execute_task
|
155
|
+
}
|
156
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
157
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
158
|
+
|
159
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
160
|
+
assert_equal 11, job_row['task_id'].to_i
|
161
|
+
assert_equal job.process_id, job_row['process_id']
|
162
|
+
assert_equal 'failure', job_row['status']
|
163
|
+
assert(/retry\#2/ =~ job_row['message'])
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
test "execute_task (too many retry)" do
|
168
|
+
setup_context {|ctx, ctl_ds, db|
|
169
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
170
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
171
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
172
|
+
db.insert_into 'strload_objects',
|
173
|
+
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
174
|
+
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
175
|
+
db.insert_into 'strload_jobs',
|
176
|
+
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
177
|
+
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#1 query failed'],
|
178
|
+
[103, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#2 query failed'],
|
179
|
+
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
180
|
+
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
181
|
+
|
182
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
183
|
+
assert_raise(JobCancelled) {
|
184
|
+
job.execute_task
|
185
|
+
}
|
186
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
187
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
188
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
189
|
+
assert_equal 11, job_row['task_id'].to_i
|
190
|
+
assert_equal job.process_id, job_row['process_id']
|
191
|
+
assert_equal 'failure', job_row['status']
|
192
|
+
assert(/retry\#5 FINAL/ =~ job_row['message'])
|
193
|
+
}
|
194
|
+
end
|
195
|
+
|
196
|
+
test "execute_task (job error)" do
|
197
|
+
setup_context {|ctx, ctl_ds, db|
|
198
|
+
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
199
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
200
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
201
|
+
db.insert_into 'strload_objects',
|
202
|
+
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
203
|
+
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
204
|
+
|
205
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
206
|
+
assert_raise(JobError) {
|
207
|
+
job.execute_task
|
208
|
+
}
|
209
|
+
assert_equal 1, job.data_ds.sql_list.size
|
210
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
211
|
+
assert_equal 11, job_row['task_id'].to_i
|
212
|
+
assert_equal job.process_id, job_row['process_id']
|
213
|
+
assert_equal 'error', job_row['status']
|
214
|
+
}
|
215
|
+
end
|
216
|
+
|
217
|
+
test "execute_task (unexpected error)" do
|
218
|
+
setup_context {|ctx, ctl_ds, db|
|
219
|
+
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
220
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, sql('current_timestamp')]
|
221
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
222
|
+
db.insert_into 'strload_objects',
|
223
|
+
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
224
|
+
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
225
|
+
|
226
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
227
|
+
assert_raise(JobError) {
|
228
|
+
job.execute_task
|
229
|
+
}
|
230
|
+
assert_equal 1, job.data_ds.sql_list.size
|
231
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
232
|
+
assert_equal 11, job_row['task_id'].to_i
|
233
|
+
assert_equal job.process_id, job_row['process_id']
|
234
|
+
assert_equal 'error', job_row['status']
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
def setup_context(verbose: false)
|
239
|
+
ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
240
|
+
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
241
|
+
ctl_ds.open {|conn|
|
242
|
+
client = SQLClient.new(conn)
|
243
|
+
clear_all_tables(client)
|
244
|
+
yield ctx, ctl_ds, client
|
245
|
+
}
|
246
|
+
end
|
247
|
+
|
248
|
+
# FIXME: database cleaner
|
249
|
+
def clear_all_tables(client)
|
250
|
+
client.truncate_tables %w[
|
251
|
+
strload_tables
|
252
|
+
strload_objects
|
253
|
+
strload_task_objects
|
254
|
+
strload_tasks
|
255
|
+
strload_jobs
|
256
|
+
]
|
257
|
+
end
|
258
|
+
|
259
|
+
class SQLClient
|
260
|
+
include SQLUtils
|
261
|
+
|
262
|
+
def initialize(conn)
|
263
|
+
@conn = conn
|
264
|
+
end
|
265
|
+
|
266
|
+
extend Forwardable
|
267
|
+
def_delegators '@conn',
|
268
|
+
:query,
|
269
|
+
:query_value,
|
270
|
+
:query_values,
|
271
|
+
:query_row,
|
272
|
+
:select,
|
273
|
+
:update
|
274
|
+
|
275
|
+
def insert_into(table, *records)
|
276
|
+
sql = "insert into #{table}"
|
277
|
+
sep = ' values '
|
278
|
+
records.each do |rec|
|
279
|
+
sql << sep; sep = ', '
|
280
|
+
sql << format_values(rec)
|
281
|
+
end
|
282
|
+
@conn.update(sql)
|
283
|
+
end
|
284
|
+
|
285
|
+
def truncate_tables(tables)
|
286
|
+
tables.each do |name|
|
287
|
+
@conn.update("truncate #{name}")
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
private
|
292
|
+
|
293
|
+
def format_values(rec)
|
294
|
+
'(' + rec.map {|val| format_value(val) }.join(', ') + ')'
|
295
|
+
end
|
296
|
+
|
297
|
+
def format_value(val)
|
298
|
+
case val
|
299
|
+
when nil then 'null'
|
300
|
+
when true, false then val.to_s
|
301
|
+
when Integer then val.to_s
|
302
|
+
when SQLExpr then val.to_s
|
303
|
+
when String then sql_string_literal(val)
|
304
|
+
else
|
305
|
+
raise ArgumentError, "unsupported value type: #{val.class}: #{val.inspect}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
module DSL
|
310
|
+
def null
|
311
|
+
nil
|
312
|
+
end
|
313
|
+
|
314
|
+
def current_timestamp
|
315
|
+
SQLExpr.new('current_timestamp')
|
316
|
+
end
|
317
|
+
|
318
|
+
def sql(expr)
|
319
|
+
SQLExpr.new(expr)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
class SQLExpr
|
324
|
+
def initialize(expr)
|
325
|
+
@expr = expr
|
326
|
+
end
|
327
|
+
|
328
|
+
def to_s
|
329
|
+
@expr
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
include SQLClient::DSL
|
335
|
+
|
336
|
+
class PSQLDataSourceMock < DataSource
|
337
|
+
declare_type 'psql_mock'
|
338
|
+
|
339
|
+
def initialize(fail_pattern: nil, error_pattern: nil, exception_pattern: nil, **params)
|
340
|
+
@sql_list = []
|
341
|
+
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
342
|
+
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
343
|
+
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
344
|
+
end
|
345
|
+
|
346
|
+
attr_reader :sql_list
|
347
|
+
|
348
|
+
def open
|
349
|
+
conn = Connection.new(self)
|
350
|
+
if block_given?
|
351
|
+
yield conn
|
352
|
+
else
|
353
|
+
conn
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
def issue_sql(sql)
|
358
|
+
logger.info "[mock] #{sql}"
|
359
|
+
@sql_list.push sql
|
360
|
+
if @fail_pattern and @fail_pattern =~ sql
|
361
|
+
raise JobFailure, "query failed: #{sql}"
|
362
|
+
end
|
363
|
+
if @error_pattern and @error_pattern =~ sql
|
364
|
+
raise JobError, "error"
|
365
|
+
end
|
366
|
+
if @exception_pattern and @exception_pattern =~ sql
|
367
|
+
raise ArgumentError, "unexpected exception"
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
class Connection
|
372
|
+
def initialize(ds)
|
373
|
+
@ds = ds
|
374
|
+
end
|
375
|
+
|
376
|
+
def execute(sql)
|
377
|
+
@ds.issue_sql sql
|
378
|
+
end
|
379
|
+
|
380
|
+
def transaction
|
381
|
+
@ds.issue_sql "begin transaction;"
|
382
|
+
yield Transaction.new(@ds)
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
class Transaction
|
387
|
+
def initialize(ds)
|
388
|
+
@ds = ds
|
389
|
+
end
|
390
|
+
|
391
|
+
def commit
|
392
|
+
@ds.issue_sql "commit;"
|
393
|
+
end
|
394
|
+
|
395
|
+
def truncate_and_commit(table)
|
396
|
+
@ds.issue_sql "truncate #{table};"
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
class S3DataSourceMock < DataSource
|
402
|
+
declare_type 's3_mock'
|
403
|
+
|
404
|
+
def initialize(**params)
|
405
|
+
end
|
406
|
+
|
407
|
+
def credential_string
|
408
|
+
'cccc'
|
409
|
+
end
|
410
|
+
|
411
|
+
def url(name)
|
412
|
+
"s3://bucket/prefix/#{name}"
|
413
|
+
end
|
414
|
+
|
415
|
+
def object(name)
|
416
|
+
ObjectMock.new(url(name), logger)
|
417
|
+
end
|
418
|
+
|
419
|
+
class ObjectMock
|
420
|
+
def initialize(url, logger)
|
421
|
+
@url = url
|
422
|
+
@logger = logger
|
423
|
+
end
|
424
|
+
|
425
|
+
def put(body:)
|
426
|
+
@logger.info "[mock] S3 PUT #{@url} content=#{body[0,20].inspect}..."
|
427
|
+
end
|
428
|
+
|
429
|
+
def delete
|
430
|
+
@logger.info "[mock] S3 DELETE #{@url}"
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|
436
|
+
|
437
|
+
end
|
438
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage-streamingload
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-10-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bricolage
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 5.
|
20
|
+
version: 5.19.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 5.
|
27
|
+
version: 5.19.0
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: pg
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -117,24 +117,23 @@ files:
|
|
117
117
|
- bin/send-flushtable-event
|
118
118
|
- bin/send-load-task
|
119
119
|
- bin/send-shutdown-event
|
120
|
-
- lib/bricolage/nulllogger.rb
|
121
|
-
- lib/bricolage/snsdatasource.rb
|
122
120
|
- lib/bricolage/sqsdatasource.rb
|
123
121
|
- lib/bricolage/sqsmock.rb
|
124
122
|
- lib/bricolage/streamingload/alertinglogger.rb
|
125
123
|
- lib/bricolage/streamingload/dispatcher.rb
|
126
124
|
- lib/bricolage/streamingload/event.rb
|
127
|
-
- lib/bricolage/streamingload/
|
128
|
-
- lib/bricolage/streamingload/
|
129
|
-
- lib/bricolage/streamingload/loaderservice.rb
|
125
|
+
- lib/bricolage/streamingload/job.rb
|
126
|
+
- lib/bricolage/streamingload/jobparams.rb
|
130
127
|
- lib/bricolage/streamingload/manifest.rb
|
131
128
|
- lib/bricolage/streamingload/objectbuffer.rb
|
132
129
|
- lib/bricolage/streamingload/task.rb
|
130
|
+
- lib/bricolage/streamingload/taskhandler.rb
|
133
131
|
- lib/bricolage/streamingload/urlpatterns.rb
|
134
132
|
- lib/bricolage/streamingload/version.rb
|
135
133
|
- test/all.rb
|
136
134
|
- test/streamingload/test_dispatcher.rb
|
137
135
|
- test/streamingload/test_event.rb
|
136
|
+
- test/streamingload/test_job.rb
|
138
137
|
- test/test_sqsdatasource.rb
|
139
138
|
homepage: https://github.com/aamine/bricolage-streamingload
|
140
139
|
licenses:
|
data/lib/bricolage/nulllogger.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'logger'
|
2
|
-
|
3
|
-
module Bricolage
|
4
|
-
# FIXME: should be defined in the Bricolage package
|
5
|
-
class NullLogger
|
6
|
-
def debug(*args) end
|
7
|
-
def debug?() false end
|
8
|
-
def info(*args) end
|
9
|
-
def info?() false end
|
10
|
-
def warn(*args) end
|
11
|
-
def warn?() false end
|
12
|
-
def error(*args) end
|
13
|
-
def error?() false end
|
14
|
-
def exception(*args) end
|
15
|
-
def with_elapsed_time(*args) yield end
|
16
|
-
def elapsed_time(*args) yield end
|
17
|
-
def level() Logger::ERROR end
|
18
|
-
def level=(l) l end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'bricolage/datasource'
|
2
|
-
require 'aws-sdk'
|
3
|
-
require 'json'
|
4
|
-
require 'time'
|
5
|
-
|
6
|
-
module Bricolage
|
7
|
-
|
8
|
-
class SNSTopicDataSource < DataSource
|
9
|
-
|
10
|
-
declare_type 'sns'
|
11
|
-
|
12
|
-
def initialize(region: 'ap-northeast-1', topic_arn:, access_key_id:, secret_access_key:)
|
13
|
-
@region = region
|
14
|
-
@topic_arn = topic_arn
|
15
|
-
@access_key_id = access_key_id
|
16
|
-
@secret_access_key = secret_access_key
|
17
|
-
@client = Aws::SNS::Client.new(region: region, access_key_id: access_key_id, secret_access_key: secret_access_key)
|
18
|
-
@topic = Aws::SNS::Topic.new(topic_arn, client: @client)
|
19
|
-
end
|
20
|
-
|
21
|
-
attr_reader :region
|
22
|
-
attr_reader :client, :topic
|
23
|
-
|
24
|
-
def publish(message)
|
25
|
-
@topic.publish(build_message(message))
|
26
|
-
end
|
27
|
-
|
28
|
-
alias write publish
|
29
|
-
|
30
|
-
def close
|
31
|
-
# do nothing
|
32
|
-
end
|
33
|
-
|
34
|
-
def build_message(message)
|
35
|
-
{message: message}
|
36
|
-
end
|
37
|
-
|
38
|
-
end # SNSDataSource
|
39
|
-
|
40
|
-
end # module Bricolage
|