bricolage-streamingload 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/bricolage-streaming-loader +2 -2
- data/lib/bricolage/sqsmock.rb +0 -1
- data/lib/bricolage/streamingload/dispatcher.rb +2 -1
- data/lib/bricolage/streamingload/job.rb +387 -0
- data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
- data/lib/bricolage/streamingload/manifest.rb +7 -1
- data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
- data/lib/bricolage/streamingload/task.rb +5 -68
- data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/test/streamingload/test_dispatcher.rb +6 -6
- data/test/streamingload/test_job.rb +438 -0
- metadata +8 -9
- data/lib/bricolage/nulllogger.rb +0 -20
- data/lib/bricolage/snsdatasource.rb +0 -40
- data/lib/bricolage/streamingload/loader.rb +0 -158
@@ -0,0 +1,438 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bricolage/streamingload/job'
|
3
|
+
require 'bricolage/context'
|
4
|
+
require 'bricolage/logger'
|
5
|
+
|
6
|
+
module Bricolage
|
7
|
+
|
8
|
+
# FIXME: patch
|
9
|
+
class NullLogger
|
10
|
+
def log(*args) end
|
11
|
+
def add(*args) end
|
12
|
+
end
|
13
|
+
|
14
|
+
module StreamingLoad
|
15
|
+
|
16
|
+
class TestJob < Test::Unit::TestCase
|
17
|
+
|
18
|
+
test "execute_task" do
|
19
|
+
setup_context {|ctx, ctl_ds, db|
|
20
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
21
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
22
|
+
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
23
|
+
db.insert_into 'strload_objects',
|
24
|
+
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
25
|
+
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
26
|
+
|
27
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
28
|
+
job.execute_task
|
29
|
+
|
30
|
+
copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
31
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
32
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
33
|
+
assert_equal 1, job_row['task_id'].to_i
|
34
|
+
assert_equal job.process_id, job_row['process_id']
|
35
|
+
assert_equal 'success', job_row['status']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
test "execute_task (with work table)" do
|
40
|
+
setup_context {|ctx, ctl_ds, db|
|
41
|
+
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
42
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
43
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
44
|
+
db.insert_into 'strload_objects',
|
45
|
+
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
46
|
+
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
47
|
+
|
48
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
49
|
+
job.execute_task
|
50
|
+
|
51
|
+
assert_equal 'begin transaction;', job.data_ds.sql_list[0]
|
52
|
+
assert_equal 'delete from testschema.with_work_table_wk', job.data_ds.sql_list[1]
|
53
|
+
assert_equal "copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;", job.data_ds.sql_list[2]
|
54
|
+
assert_equal "insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n", job.data_ds.sql_list[3]
|
55
|
+
assert_equal 'truncate testschema.with_work_table_wk;', job.data_ds.sql_list[4]
|
56
|
+
|
57
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
58
|
+
assert_equal 11, job_row['task_id'].to_i
|
59
|
+
assert_equal job.process_id, job_row['process_id']
|
60
|
+
assert_equal 'success', job_row['status']
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
test "execute_task (disabled)" do
|
65
|
+
setup_context {|ctx, ctl_ds, db|
|
66
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
67
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
68
|
+
|
69
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
70
|
+
assert_raise(JobDefered) {
|
71
|
+
job.execute_task
|
72
|
+
}
|
73
|
+
count = db.query_value("select count(*) from strload_jobs")
|
74
|
+
assert_equal 0, count.to_i
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
test "execute_task (duplicated)" do
|
79
|
+
setup_context {|ctx, ctl_ds, db|
|
80
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
81
|
+
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
82
|
+
db.insert_into 'strload_jobs',
|
83
|
+
[1, 1, 'localhost-1234', 'failure', current_timestamp, current_timestamp, ''],
|
84
|
+
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
85
|
+
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
86
|
+
|
87
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 1, force: false, logger: ctx.logger)
|
88
|
+
assert_raise(JobDuplicated) {
|
89
|
+
job.execute_task
|
90
|
+
}
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
test "execute_task (duplicated but forced)" do
|
95
|
+
setup_context {|ctx, ctl_ds, db|
|
96
|
+
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
97
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
98
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
99
|
+
db.insert_into 'strload_objects',
|
100
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
101
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
102
|
+
|
103
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: true, logger: ctx.logger)
|
104
|
+
job.execute_task
|
105
|
+
|
106
|
+
copy_stmt = "copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
107
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
108
|
+
|
109
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
110
|
+
assert_equal 11, job_row['task_id'].to_i
|
111
|
+
assert_equal job.process_id, job_row['process_id']
|
112
|
+
assert_equal 'success', job_row['status']
|
113
|
+
assert(/forced/ =~ job_row['message'])
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
test "execute_task (load fails / first time)" do
|
118
|
+
setup_context {|ctx, ctl_ds, db|
|
119
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
120
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
121
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
122
|
+
db.insert_into 'strload_objects',
|
123
|
+
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
124
|
+
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
125
|
+
|
126
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
127
|
+
assert_raise(JobFailure) {
|
128
|
+
job.execute_task
|
129
|
+
}
|
130
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
131
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
132
|
+
|
133
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
134
|
+
assert_equal 11, job_row['task_id'].to_i
|
135
|
+
assert_equal job.process_id, job_row['process_id']
|
136
|
+
assert_equal 'failure', job_row['status']
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
test "execute_task (load fails / nth time)" do
|
141
|
+
setup_context {|ctx, ctl_ds, db|
|
142
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
143
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
144
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
145
|
+
db.insert_into 'strload_objects',
|
146
|
+
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
147
|
+
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
148
|
+
db.insert_into 'strload_jobs',
|
149
|
+
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
150
|
+
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
151
|
+
|
152
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
153
|
+
assert_raise(JobFailure) {
|
154
|
+
job.execute_task
|
155
|
+
}
|
156
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
157
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
158
|
+
|
159
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
160
|
+
assert_equal 11, job_row['task_id'].to_i
|
161
|
+
assert_equal job.process_id, job_row['process_id']
|
162
|
+
assert_equal 'failure', job_row['status']
|
163
|
+
assert(/retry\#2/ =~ job_row['message'])
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
test "execute_task (too many retry)" do
|
168
|
+
setup_context {|ctx, ctl_ds, db|
|
169
|
+
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
170
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
171
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
172
|
+
db.insert_into 'strload_objects',
|
173
|
+
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
174
|
+
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
175
|
+
db.insert_into 'strload_jobs',
|
176
|
+
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
177
|
+
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#1 query failed'],
|
178
|
+
[103, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#2 query failed'],
|
179
|
+
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
180
|
+
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
181
|
+
|
182
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
183
|
+
assert_raise(JobCancelled) {
|
184
|
+
job.execute_task
|
185
|
+
}
|
186
|
+
copy_stmt = "copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;"
|
187
|
+
assert_equal [copy_stmt], job.data_ds.sql_list
|
188
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
189
|
+
assert_equal 11, job_row['task_id'].to_i
|
190
|
+
assert_equal job.process_id, job_row['process_id']
|
191
|
+
assert_equal 'failure', job_row['status']
|
192
|
+
assert(/retry\#5 FINAL/ =~ job_row['message'])
|
193
|
+
}
|
194
|
+
end
|
195
|
+
|
196
|
+
test "execute_task (job error)" do
|
197
|
+
setup_context {|ctx, ctl_ds, db|
|
198
|
+
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
199
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
200
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
201
|
+
db.insert_into 'strload_objects',
|
202
|
+
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
203
|
+
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
204
|
+
|
205
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
206
|
+
assert_raise(JobError) {
|
207
|
+
job.execute_task
|
208
|
+
}
|
209
|
+
assert_equal 1, job.data_ds.sql_list.size
|
210
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
211
|
+
assert_equal 11, job_row['task_id'].to_i
|
212
|
+
assert_equal job.process_id, job_row['process_id']
|
213
|
+
assert_equal 'error', job_row['status']
|
214
|
+
}
|
215
|
+
end
|
216
|
+
|
217
|
+
test "execute_task (unexpected error)" do
|
218
|
+
setup_context {|ctx, ctl_ds, db|
|
219
|
+
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
220
|
+
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, sql('current_timestamp')]
|
221
|
+
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
222
|
+
db.insert_into 'strload_objects',
|
223
|
+
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
224
|
+
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
225
|
+
|
226
|
+
job = Job.new(context: ctx, ctl_ds: ctl_ds, task_id: 11, force: false, logger: ctx.logger)
|
227
|
+
assert_raise(JobError) {
|
228
|
+
job.execute_task
|
229
|
+
}
|
230
|
+
assert_equal 1, job.data_ds.sql_list.size
|
231
|
+
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
232
|
+
assert_equal 11, job_row['task_id'].to_i
|
233
|
+
assert_equal job.process_id, job_row['process_id']
|
234
|
+
assert_equal 'error', job_row['status']
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
def setup_context(verbose: false)
|
239
|
+
ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
240
|
+
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
241
|
+
ctl_ds.open {|conn|
|
242
|
+
client = SQLClient.new(conn)
|
243
|
+
clear_all_tables(client)
|
244
|
+
yield ctx, ctl_ds, client
|
245
|
+
}
|
246
|
+
end
|
247
|
+
|
248
|
+
# FIXME: database cleaner
|
249
|
+
def clear_all_tables(client)
|
250
|
+
client.truncate_tables %w[
|
251
|
+
strload_tables
|
252
|
+
strload_objects
|
253
|
+
strload_task_objects
|
254
|
+
strload_tasks
|
255
|
+
strload_jobs
|
256
|
+
]
|
257
|
+
end
|
258
|
+
|
259
|
+
class SQLClient
|
260
|
+
include SQLUtils
|
261
|
+
|
262
|
+
def initialize(conn)
|
263
|
+
@conn = conn
|
264
|
+
end
|
265
|
+
|
266
|
+
extend Forwardable
|
267
|
+
def_delegators '@conn',
|
268
|
+
:query,
|
269
|
+
:query_value,
|
270
|
+
:query_values,
|
271
|
+
:query_row,
|
272
|
+
:select,
|
273
|
+
:update
|
274
|
+
|
275
|
+
def insert_into(table, *records)
|
276
|
+
sql = "insert into #{table}"
|
277
|
+
sep = ' values '
|
278
|
+
records.each do |rec|
|
279
|
+
sql << sep; sep = ', '
|
280
|
+
sql << format_values(rec)
|
281
|
+
end
|
282
|
+
@conn.update(sql)
|
283
|
+
end
|
284
|
+
|
285
|
+
def truncate_tables(tables)
|
286
|
+
tables.each do |name|
|
287
|
+
@conn.update("truncate #{name}")
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
private
|
292
|
+
|
293
|
+
def format_values(rec)
|
294
|
+
'(' + rec.map {|val| format_value(val) }.join(', ') + ')'
|
295
|
+
end
|
296
|
+
|
297
|
+
def format_value(val)
|
298
|
+
case val
|
299
|
+
when nil then 'null'
|
300
|
+
when true, false then val.to_s
|
301
|
+
when Integer then val.to_s
|
302
|
+
when SQLExpr then val.to_s
|
303
|
+
when String then sql_string_literal(val)
|
304
|
+
else
|
305
|
+
raise ArgumentError, "unsupported value type: #{val.class}: #{val.inspect}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
module DSL
|
310
|
+
def null
|
311
|
+
nil
|
312
|
+
end
|
313
|
+
|
314
|
+
def current_timestamp
|
315
|
+
SQLExpr.new('current_timestamp')
|
316
|
+
end
|
317
|
+
|
318
|
+
def sql(expr)
|
319
|
+
SQLExpr.new(expr)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
class SQLExpr
|
324
|
+
def initialize(expr)
|
325
|
+
@expr = expr
|
326
|
+
end
|
327
|
+
|
328
|
+
def to_s
|
329
|
+
@expr
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
include SQLClient::DSL
|
335
|
+
|
336
|
+
class PSQLDataSourceMock < DataSource
|
337
|
+
declare_type 'psql_mock'
|
338
|
+
|
339
|
+
def initialize(fail_pattern: nil, error_pattern: nil, exception_pattern: nil, **params)
|
340
|
+
@sql_list = []
|
341
|
+
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
342
|
+
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
343
|
+
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
344
|
+
end
|
345
|
+
|
346
|
+
attr_reader :sql_list
|
347
|
+
|
348
|
+
def open
|
349
|
+
conn = Connection.new(self)
|
350
|
+
if block_given?
|
351
|
+
yield conn
|
352
|
+
else
|
353
|
+
conn
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
def issue_sql(sql)
|
358
|
+
logger.info "[mock] #{sql}"
|
359
|
+
@sql_list.push sql
|
360
|
+
if @fail_pattern and @fail_pattern =~ sql
|
361
|
+
raise JobFailure, "query failed: #{sql}"
|
362
|
+
end
|
363
|
+
if @error_pattern and @error_pattern =~ sql
|
364
|
+
raise JobError, "error"
|
365
|
+
end
|
366
|
+
if @exception_pattern and @exception_pattern =~ sql
|
367
|
+
raise ArgumentError, "unexpected exception"
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
class Connection
|
372
|
+
def initialize(ds)
|
373
|
+
@ds = ds
|
374
|
+
end
|
375
|
+
|
376
|
+
def execute(sql)
|
377
|
+
@ds.issue_sql sql
|
378
|
+
end
|
379
|
+
|
380
|
+
def transaction
|
381
|
+
@ds.issue_sql "begin transaction;"
|
382
|
+
yield Transaction.new(@ds)
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
class Transaction
|
387
|
+
def initialize(ds)
|
388
|
+
@ds = ds
|
389
|
+
end
|
390
|
+
|
391
|
+
def commit
|
392
|
+
@ds.issue_sql "commit;"
|
393
|
+
end
|
394
|
+
|
395
|
+
def truncate_and_commit(table)
|
396
|
+
@ds.issue_sql "truncate #{table};"
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
class S3DataSourceMock < DataSource
|
402
|
+
declare_type 's3_mock'
|
403
|
+
|
404
|
+
def initialize(**params)
|
405
|
+
end
|
406
|
+
|
407
|
+
def credential_string
|
408
|
+
'cccc'
|
409
|
+
end
|
410
|
+
|
411
|
+
def url(name)
|
412
|
+
"s3://bucket/prefix/#{name}"
|
413
|
+
end
|
414
|
+
|
415
|
+
def object(name)
|
416
|
+
ObjectMock.new(url(name), logger)
|
417
|
+
end
|
418
|
+
|
419
|
+
class ObjectMock
|
420
|
+
def initialize(url, logger)
|
421
|
+
@url = url
|
422
|
+
@logger = logger
|
423
|
+
end
|
424
|
+
|
425
|
+
def put(body:)
|
426
|
+
@logger.info "[mock] S3 PUT #{@url} content=#{body[0,20].inspect}..."
|
427
|
+
end
|
428
|
+
|
429
|
+
def delete
|
430
|
+
@logger.info "[mock] S3 DELETE #{@url}"
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|
436
|
+
|
437
|
+
end
|
438
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage-streamingload
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-10-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bricolage
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 5.
|
20
|
+
version: 5.19.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 5.
|
27
|
+
version: 5.19.0
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: pg
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -117,24 +117,23 @@ files:
|
|
117
117
|
- bin/send-flushtable-event
|
118
118
|
- bin/send-load-task
|
119
119
|
- bin/send-shutdown-event
|
120
|
-
- lib/bricolage/nulllogger.rb
|
121
|
-
- lib/bricolage/snsdatasource.rb
|
122
120
|
- lib/bricolage/sqsdatasource.rb
|
123
121
|
- lib/bricolage/sqsmock.rb
|
124
122
|
- lib/bricolage/streamingload/alertinglogger.rb
|
125
123
|
- lib/bricolage/streamingload/dispatcher.rb
|
126
124
|
- lib/bricolage/streamingload/event.rb
|
127
|
-
- lib/bricolage/streamingload/
|
128
|
-
- lib/bricolage/streamingload/
|
129
|
-
- lib/bricolage/streamingload/loaderservice.rb
|
125
|
+
- lib/bricolage/streamingload/job.rb
|
126
|
+
- lib/bricolage/streamingload/jobparams.rb
|
130
127
|
- lib/bricolage/streamingload/manifest.rb
|
131
128
|
- lib/bricolage/streamingload/objectbuffer.rb
|
132
129
|
- lib/bricolage/streamingload/task.rb
|
130
|
+
- lib/bricolage/streamingload/taskhandler.rb
|
133
131
|
- lib/bricolage/streamingload/urlpatterns.rb
|
134
132
|
- lib/bricolage/streamingload/version.rb
|
135
133
|
- test/all.rb
|
136
134
|
- test/streamingload/test_dispatcher.rb
|
137
135
|
- test/streamingload/test_event.rb
|
136
|
+
- test/streamingload/test_job.rb
|
138
137
|
- test/test_sqsdatasource.rb
|
139
138
|
homepage: https://github.com/aamine/bricolage-streamingload
|
140
139
|
licenses:
|
data/lib/bricolage/nulllogger.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'logger'
|
2
|
-
|
3
|
-
module Bricolage
|
4
|
-
# FIXME: should be defined in the Bricolage package
|
5
|
-
class NullLogger
|
6
|
-
def debug(*args) end
|
7
|
-
def debug?() false end
|
8
|
-
def info(*args) end
|
9
|
-
def info?() false end
|
10
|
-
def warn(*args) end
|
11
|
-
def warn?() false end
|
12
|
-
def error(*args) end
|
13
|
-
def error?() false end
|
14
|
-
def exception(*args) end
|
15
|
-
def with_elapsed_time(*args) yield end
|
16
|
-
def elapsed_time(*args) yield end
|
17
|
-
def level() Logger::ERROR end
|
18
|
-
def level=(l) l end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'bricolage/datasource'
|
2
|
-
require 'aws-sdk'
|
3
|
-
require 'json'
|
4
|
-
require 'time'
|
5
|
-
|
6
|
-
module Bricolage
|
7
|
-
|
8
|
-
class SNSTopicDataSource < DataSource
|
9
|
-
|
10
|
-
declare_type 'sns'
|
11
|
-
|
12
|
-
def initialize(region: 'ap-northeast-1', topic_arn:, access_key_id:, secret_access_key:)
|
13
|
-
@region = region
|
14
|
-
@topic_arn = topic_arn
|
15
|
-
@access_key_id = access_key_id
|
16
|
-
@secret_access_key = secret_access_key
|
17
|
-
@client = Aws::SNS::Client.new(region: region, access_key_id: access_key_id, secret_access_key: secret_access_key)
|
18
|
-
@topic = Aws::SNS::Topic.new(topic_arn, client: @client)
|
19
|
-
end
|
20
|
-
|
21
|
-
attr_reader :region
|
22
|
-
attr_reader :client, :topic
|
23
|
-
|
24
|
-
def publish(message)
|
25
|
-
@topic.publish(build_message(message))
|
26
|
-
end
|
27
|
-
|
28
|
-
alias write publish
|
29
|
-
|
30
|
-
def close
|
31
|
-
# do nothing
|
32
|
-
end
|
33
|
-
|
34
|
-
def build_message(message)
|
35
|
-
{message: message}
|
36
|
-
end
|
37
|
-
|
38
|
-
end # SNSDataSource
|
39
|
-
|
40
|
-
end # module Bricolage
|