bricolage-streamingload 0.15.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +61 -0
- data/LICENSES +21 -0
- data/RELEASE.md +135 -0
- data/Rakefile +3 -0
- data/bin/send-data-event +1 -1
- data/bricolage-streamingload.gemspec +27 -0
- data/config/production/database.yml +66 -0
- data/config/production/password.yml +5 -0
- data/config/production/streamingload.yml +20 -0
- data/config/production/variable.yml +5 -0
- data/lib/bricolage/streamingload/dispatcher.rb +8 -1
- data/lib/bricolage/streamingload/job.rb +14 -3
- data/lib/bricolage/streamingload/jobparams.rb +1 -1
- data/lib/bricolage/streamingload/taskhandler.rb +5 -2
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/sample/sqs-message.txt +38 -0
- data/sample/sqs-result.txt +18 -0
- data/strload_load_logs.ct +13 -0
- data/testschema/strload_test.ct +11 -0
- data/testschema/testlog.json.gz +0 -0
- data/testschema/with_work_table.job +4 -0
- data/testschema/with_work_table.sql +1 -0
- data/utils/init_strload_tables.sql +13 -0
- data/utils/strload-stat.sql +36 -0
- metadata +46 -30
- data/test/all.rb +0 -3
- data/test/streamingload/test_dispatcher.rb +0 -241
- data/test/streamingload/test_dispatchermessage.rb +0 -31
- data/test/streamingload/test_job.rb +0 -620
- data/test/test_sqsdatasource.rb +0 -55
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'bricolage/streamingload/dispatchermessage'
|
3
|
-
|
4
|
-
module Bricolage::StreamingLoad
|
5
|
-
|
6
|
-
class TestDispatcherMessage < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, source: nil, region: nil, bucket: nil, key: nil, size: nil)
|
9
|
-
S3ObjectDispatcherMessage.new(
|
10
|
-
message_id: message_id,
|
11
|
-
receipt_handle: receipt_handle,
|
12
|
-
name: name,
|
13
|
-
time: time,
|
14
|
-
source: source,
|
15
|
-
region: region,
|
16
|
-
bucket: bucket,
|
17
|
-
key: key,
|
18
|
-
size: size
|
19
|
-
)
|
20
|
-
end
|
21
|
-
|
22
|
-
test "#created?" do
|
23
|
-
e = new_s3event(name: "ObjectCreated:Put")
|
24
|
-
assert_true e.created_event?
|
25
|
-
e = new_s3event(name: "ObjectCreated:Copy")
|
26
|
-
assert_false e.created_event?
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
@@ -1,620 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'bricolage/streamingload/job'
|
3
|
-
require 'bricolage/context'
|
4
|
-
require 'bricolage/logger'
|
5
|
-
|
6
|
-
module Bricolage
|
7
|
-
|
8
|
-
# FIXME: patch
|
9
|
-
class NullLogger
|
10
|
-
def log(*args) end
|
11
|
-
def add(*args) end
|
12
|
-
end
|
13
|
-
|
14
|
-
module StreamingLoad
|
15
|
-
|
16
|
-
class TestJob < Test::Unit::TestCase
|
17
|
-
|
18
|
-
test "execute_task" do
|
19
|
-
setup_context {|db|
|
20
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
21
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
22
|
-
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
23
|
-
db.insert_into 'strload_objects',
|
24
|
-
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
25
|
-
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
26
|
-
|
27
|
-
job = new_job(task_id: 1, force: false)
|
28
|
-
job.execute_task
|
29
|
-
|
30
|
-
assert_equal [
|
31
|
-
"begin transaction;",
|
32
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
33
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (1, #{job.job_id}, current_timestamp)",
|
34
|
-
"commit;"
|
35
|
-
], job.data_ds.sql_list
|
36
|
-
|
37
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
38
|
-
assert_equal 1, job_row['task_id'].to_i
|
39
|
-
assert_equal job.process_id, job_row['process_id']
|
40
|
-
assert_equal 'success', job_row['status']
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
44
|
-
test "execute_task (with work table)" do
|
45
|
-
setup_context {|db|
|
46
|
-
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
47
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
48
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
49
|
-
db.insert_into 'strload_objects',
|
50
|
-
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
51
|
-
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
52
|
-
|
53
|
-
job = new_job(task_id: 11, force: false)
|
54
|
-
job.execute_task
|
55
|
-
|
56
|
-
assert_equal [
|
57
|
-
"begin transaction;",
|
58
|
-
"delete from testschema.with_work_table_wk",
|
59
|
-
"copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
60
|
-
"insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
|
61
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
62
|
-
"truncate testschema.with_work_table_wk;"
|
63
|
-
], job.data_ds.sql_list
|
64
|
-
|
65
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
66
|
-
assert_equal 11, job_row['task_id'].to_i
|
67
|
-
assert_equal job.process_id, job_row['process_id']
|
68
|
-
assert_equal 'success', job_row['status']
|
69
|
-
}
|
70
|
-
end
|
71
|
-
|
72
|
-
test "execute_task (disabled)" do
|
73
|
-
setup_context {|db|
|
74
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
75
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
76
|
-
|
77
|
-
job = new_job(task_id: 1, force: false)
|
78
|
-
assert_raise(JobDefered) {
|
79
|
-
job.execute_task
|
80
|
-
}
|
81
|
-
count = db.query_value("select count(*) from strload_jobs")
|
82
|
-
assert_equal 0, count.to_i
|
83
|
-
}
|
84
|
-
end
|
85
|
-
|
86
|
-
test "execute_task (duplicated)" do
|
87
|
-
setup_context {|db|
|
88
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
89
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
90
|
-
db.insert_into 'strload_jobs',
|
91
|
-
[1, 1, 'localhost-1234', 'failure', current_timestamp, current_timestamp, ''],
|
92
|
-
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
93
|
-
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
94
|
-
|
95
|
-
job = new_job(task_id: 1, force: false)
|
96
|
-
assert_raise(JobDuplicated) {
|
97
|
-
job.execute_task
|
98
|
-
}
|
99
|
-
}
|
100
|
-
end
|
101
|
-
|
102
|
-
test "execute_task (duplicated but forced)" do
|
103
|
-
setup_context {|db|
|
104
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
105
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
106
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
107
|
-
db.insert_into 'strload_objects',
|
108
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
109
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
110
|
-
|
111
|
-
job = new_job(task_id: 11, force: true)
|
112
|
-
job.execute_task
|
113
|
-
|
114
|
-
assert_equal [
|
115
|
-
"begin transaction;",
|
116
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
117
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
118
|
-
"commit;"
|
119
|
-
], job.data_ds.sql_list
|
120
|
-
|
121
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
122
|
-
assert_equal 11, job_row['task_id'].to_i
|
123
|
-
assert_equal job.process_id, job_row['process_id']
|
124
|
-
assert_equal 'success', job_row['status']
|
125
|
-
assert(/forced/ =~ job_row['message'])
|
126
|
-
}
|
127
|
-
end
|
128
|
-
|
129
|
-
test "execute_task (load fails / first time)" do
|
130
|
-
setup_context {|db|
|
131
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
132
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
133
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
134
|
-
db.insert_into 'strload_objects',
|
135
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
136
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
137
|
-
|
138
|
-
job = new_job(task_id: 11, force: false)
|
139
|
-
assert_raise(JobFailure) {
|
140
|
-
job.execute_task
|
141
|
-
}
|
142
|
-
assert_equal [
|
143
|
-
"begin transaction;",
|
144
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
145
|
-
"abort;"
|
146
|
-
], job.data_ds.sql_list
|
147
|
-
|
148
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
149
|
-
assert_equal 11, job_row['task_id'].to_i
|
150
|
-
assert_equal job.process_id, job_row['process_id']
|
151
|
-
assert_equal 'failure', job_row['status']
|
152
|
-
}
|
153
|
-
end
|
154
|
-
|
155
|
-
test "execute_task (load fails / nth time)" do
|
156
|
-
setup_context {|db|
|
157
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
158
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
159
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
160
|
-
db.insert_into 'strload_objects',
|
161
|
-
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
162
|
-
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
163
|
-
db.insert_into 'strload_jobs',
|
164
|
-
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
165
|
-
|
166
|
-
job = new_job(task_id: 11, force: false)
|
167
|
-
assert_raise(JobFailure) {
|
168
|
-
job.execute_task
|
169
|
-
}
|
170
|
-
assert_equal [
|
171
|
-
"begin transaction;",
|
172
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
173
|
-
"abort;"
|
174
|
-
], job.data_ds.sql_list
|
175
|
-
|
176
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
177
|
-
assert_equal 11, job_row['task_id'].to_i
|
178
|
-
assert_equal job.process_id, job_row['process_id']
|
179
|
-
assert_equal 'failure', job_row['status']
|
180
|
-
assert(/retry\#1/ =~ job_row['message'])
|
181
|
-
}
|
182
|
-
end
|
183
|
-
|
184
|
-
test "execute_task (too many retry)" do
|
185
|
-
setup_context {|db|
|
186
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
187
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
188
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
189
|
-
db.insert_into 'strload_objects',
|
190
|
-
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
191
|
-
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
192
|
-
db.insert_into 'strload_jobs',
|
193
|
-
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
194
|
-
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#1 query failed'],
|
195
|
-
[103, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#2 query failed'],
|
196
|
-
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
197
|
-
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
198
|
-
|
199
|
-
job = new_job(task_id: 11, force: false)
|
200
|
-
assert_raise(JobCancelled) {
|
201
|
-
job.execute_task
|
202
|
-
}
|
203
|
-
assert_equal [
|
204
|
-
"begin transaction;",
|
205
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
206
|
-
"abort;"
|
207
|
-
], job.data_ds.sql_list
|
208
|
-
|
209
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
210
|
-
assert_equal 11, job_row['task_id'].to_i
|
211
|
-
assert_equal job.process_id, job_row['process_id']
|
212
|
-
assert_equal 'failure', job_row['status']
|
213
|
-
assert(/retry\#5 FINAL/ =~ job_row['message'])
|
214
|
-
}
|
215
|
-
end
|
216
|
-
|
217
|
-
test "execute_task (job error)" do
|
218
|
-
setup_context {|db|
|
219
|
-
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
220
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
221
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
222
|
-
db.insert_into 'strload_objects',
|
223
|
-
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
224
|
-
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
225
|
-
|
226
|
-
job = new_job(task_id: 11, force: false)
|
227
|
-
assert_raise(JobError) {
|
228
|
-
job.execute_task
|
229
|
-
}
|
230
|
-
assert_equal [
|
231
|
-
"begin transaction;",
|
232
|
-
"copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
233
|
-
"abort;"
|
234
|
-
], job.data_ds.sql_list
|
235
|
-
|
236
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
237
|
-
assert_equal 11, job_row['task_id'].to_i
|
238
|
-
assert_equal job.process_id, job_row['process_id']
|
239
|
-
assert_equal 'error', job_row['status']
|
240
|
-
}
|
241
|
-
end
|
242
|
-
|
243
|
-
test "execute_task (unexpected error)" do
|
244
|
-
setup_context {|db|
|
245
|
-
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
246
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
247
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
248
|
-
db.insert_into 'strload_objects',
|
249
|
-
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
250
|
-
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
251
|
-
|
252
|
-
job = new_job(task_id: 11, force: false)
|
253
|
-
assert_raise(JobError) {
|
254
|
-
job.execute_task
|
255
|
-
}
|
256
|
-
assert_equal [
|
257
|
-
"begin transaction;",
|
258
|
-
"copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
259
|
-
"abort;"
|
260
|
-
], job.data_ds.sql_list
|
261
|
-
|
262
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
263
|
-
assert_equal 11, job_row['task_id'].to_i
|
264
|
-
assert_equal job.process_id, job_row['process_id']
|
265
|
-
assert_equal 'error', job_row['status']
|
266
|
-
}
|
267
|
-
end
|
268
|
-
|
269
|
-
test "execute_task (load error)" do
|
270
|
-
setup_context {|db|
|
271
|
-
db.insert_into 'strload_tables', [1, 'testschema.load_error', 'testschema', 'load_error', 100, 1800, false]
|
272
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
273
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
274
|
-
db.insert_into 'strload_objects',
|
275
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
276
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
277
|
-
|
278
|
-
job = new_job(task_id: 11, force: false)
|
279
|
-
assert_raise(JobError) {
|
280
|
-
job.execute_task
|
281
|
-
}
|
282
|
-
assert_equal [
|
283
|
-
"begin transaction;",
|
284
|
-
"copy testschema.load_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
285
|
-
"abort;"
|
286
|
-
], job.data_ds.sql_list
|
287
|
-
|
288
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
289
|
-
assert_equal 11, job_row['task_id'].to_i
|
290
|
-
assert_equal job.process_id, job_row['process_id']
|
291
|
-
assert_equal 'error', job_row['status']
|
292
|
-
assert(/stl_load_errors/ =~ job_row['message'])
|
293
|
-
}
|
294
|
-
end
|
295
|
-
|
296
|
-
test "execute_task (unknown status, really=success)" do
|
297
|
-
setup_context {|db|
|
298
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
299
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
300
|
-
db.insert_into 'strload_jobs',
|
301
|
-
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
302
|
-
@data_ds.provide_job_status 101, true
|
303
|
-
|
304
|
-
job = new_job(task_id: 11, force: false)
|
305
|
-
assert_raise(JobDuplicated) {
|
306
|
-
job.execute_task
|
307
|
-
}
|
308
|
-
|
309
|
-
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
310
|
-
assert_equal 'success', job_row['status']
|
311
|
-
}
|
312
|
-
end
|
313
|
-
|
314
|
-
test "execute_task (unknown status, really=failure)" do
|
315
|
-
setup_context {|db|
|
316
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
317
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
318
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
319
|
-
db.insert_into 'strload_objects',
|
320
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
321
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
322
|
-
db.insert_into 'strload_jobs',
|
323
|
-
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
324
|
-
@data_ds.provide_job_status 101, false
|
325
|
-
|
326
|
-
job = new_job(task_id: 11, force: false)
|
327
|
-
job.execute_task
|
328
|
-
|
329
|
-
assert_equal [
|
330
|
-
"begin transaction;",
|
331
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
332
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
333
|
-
"commit;"
|
334
|
-
], job.data_ds.sql_list
|
335
|
-
|
336
|
-
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
337
|
-
assert_equal 'failure', job_row['status']
|
338
|
-
|
339
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
340
|
-
assert_equal 11, job_row['task_id'].to_i
|
341
|
-
assert_equal job.process_id, job_row['process_id']
|
342
|
-
assert_equal 'success', job_row['status']
|
343
|
-
}
|
344
|
-
end
|
345
|
-
|
346
|
-
def setup_context(verbose: false)
|
347
|
-
@ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
348
|
-
@ctl_ds = @ctx.get_data_source('sql', 'dwhctl')
|
349
|
-
@data_ds = @ctx.get_data_source('sql', 'db_data_mock')
|
350
|
-
@ctl_ds.open {|conn|
|
351
|
-
client = SQLClient.new(conn)
|
352
|
-
clear_all_tables(client)
|
353
|
-
yield client
|
354
|
-
}
|
355
|
-
end
|
356
|
-
|
357
|
-
def new_job(task_id:, force:)
|
358
|
-
Job.new(
|
359
|
-
context: @ctx,
|
360
|
-
ctl_ds: @ctl_ds,
|
361
|
-
data_ds: @data_ds,
|
362
|
-
logger: @ctx.logger,
|
363
|
-
task_id: task_id,
|
364
|
-
force: force
|
365
|
-
)
|
366
|
-
end
|
367
|
-
|
368
|
-
# FIXME: database cleaner
|
369
|
-
def clear_all_tables(client)
|
370
|
-
client.truncate_tables %w[
|
371
|
-
strload_tables
|
372
|
-
strload_objects
|
373
|
-
strload_task_objects
|
374
|
-
strload_tasks
|
375
|
-
strload_jobs
|
376
|
-
]
|
377
|
-
end
|
378
|
-
|
379
|
-
class SQLClient
|
380
|
-
include SQLUtils
|
381
|
-
|
382
|
-
def initialize(conn)
|
383
|
-
@conn = conn
|
384
|
-
end
|
385
|
-
|
386
|
-
extend Forwardable
|
387
|
-
def_delegators '@conn',
|
388
|
-
:query,
|
389
|
-
:query_value,
|
390
|
-
:query_values,
|
391
|
-
:query_row,
|
392
|
-
:select,
|
393
|
-
:update
|
394
|
-
|
395
|
-
def insert_into(table, *records)
|
396
|
-
sql = "insert into #{table}"
|
397
|
-
sep = ' values '
|
398
|
-
records.each do |rec|
|
399
|
-
sql << sep; sep = ', '
|
400
|
-
sql << format_values(rec)
|
401
|
-
end
|
402
|
-
@conn.update(sql)
|
403
|
-
end
|
404
|
-
|
405
|
-
def truncate_tables(tables)
|
406
|
-
tables.each do |name|
|
407
|
-
@conn.update("truncate #{name}")
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
|
-
private
|
412
|
-
|
413
|
-
def format_values(rec)
|
414
|
-
'(' + rec.map {|val| format_value(val) }.join(', ') + ')'
|
415
|
-
end
|
416
|
-
|
417
|
-
def format_value(val)
|
418
|
-
case val
|
419
|
-
when nil then 'null'
|
420
|
-
when true, false then val.to_s
|
421
|
-
when Integer then val.to_s
|
422
|
-
when SQLExpr then val.to_s
|
423
|
-
when String then sql_string_literal(val)
|
424
|
-
else
|
425
|
-
raise ArgumentError, "unsupported value type: #{val.class}: #{val.inspect}"
|
426
|
-
end
|
427
|
-
end
|
428
|
-
|
429
|
-
module DSL
|
430
|
-
def null
|
431
|
-
nil
|
432
|
-
end
|
433
|
-
|
434
|
-
def current_timestamp
|
435
|
-
SQLExpr.new('current_timestamp')
|
436
|
-
end
|
437
|
-
|
438
|
-
def sql(expr)
|
439
|
-
SQLExpr.new(expr)
|
440
|
-
end
|
441
|
-
end
|
442
|
-
|
443
|
-
class SQLExpr
|
444
|
-
def initialize(expr)
|
445
|
-
@expr = expr
|
446
|
-
end
|
447
|
-
|
448
|
-
def to_s
|
449
|
-
@expr
|
450
|
-
end
|
451
|
-
end
|
452
|
-
end
|
453
|
-
|
454
|
-
include SQLClient::DSL
|
455
|
-
|
456
|
-
class PSQLDataSourceMock < DataSource
|
457
|
-
declare_type 'psql_mock'
|
458
|
-
|
459
|
-
def initialize(fail_pattern: nil, error_pattern: nil, exception_pattern: nil, load_error_pattern: nil, **params)
|
460
|
-
@sql_list = []
|
461
|
-
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
462
|
-
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
463
|
-
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
464
|
-
@load_error_pattern = load_error_pattern ? Regexp.compile(load_error_pattern) : nil
|
465
|
-
@job_status = {}
|
466
|
-
end
|
467
|
-
|
468
|
-
attr_reader :sql_list
|
469
|
-
|
470
|
-
def open
|
471
|
-
conn = Connection.new(self)
|
472
|
-
if block_given?
|
473
|
-
yield conn
|
474
|
-
else
|
475
|
-
conn
|
476
|
-
end
|
477
|
-
end
|
478
|
-
|
479
|
-
def issue_sql(sql)
|
480
|
-
logger.info "[mock] #{sql}"
|
481
|
-
@sql_list.push sql
|
482
|
-
if @fail_pattern and @fail_pattern =~ sql
|
483
|
-
raise JobFailure, "query failed: #{sql}"
|
484
|
-
end
|
485
|
-
if @error_pattern and @error_pattern =~ sql
|
486
|
-
raise JobError, "error"
|
487
|
-
end
|
488
|
-
if @exception_pattern and @exception_pattern =~ sql
|
489
|
-
raise ArgumentError, "unexpected exception"
|
490
|
-
end
|
491
|
-
if @load_error_pattern and @load_error_pattern =~ sql
|
492
|
-
raise JobError, "Load into table 'xxxx_table' failed. Check 'stl_load_errors' system table for details."
|
493
|
-
end
|
494
|
-
end
|
495
|
-
|
496
|
-
def provide_job_status(job_id, succeeded)
|
497
|
-
@job_status[job_id] = succeeded
|
498
|
-
end
|
499
|
-
|
500
|
-
def job_succeeded?(job_id)
|
501
|
-
raise "job status unregistered: job_id=#{job_id}" unless @job_status.key?(job_id)
|
502
|
-
@job_status[job_id]
|
503
|
-
end
|
504
|
-
|
505
|
-
class Connection
|
506
|
-
def initialize(ds)
|
507
|
-
@ds = ds
|
508
|
-
end
|
509
|
-
|
510
|
-
def query_value(sql)
|
511
|
-
case sql
|
512
|
-
when /\bstrload_load_logs where job_id = (\d+)/
|
513
|
-
job_id = $1.to_i
|
514
|
-
@ds.job_succeeded?(job_id) ? 1 : 0
|
515
|
-
else
|
516
|
-
raise "unknown query: #{sql}"
|
517
|
-
end
|
518
|
-
end
|
519
|
-
|
520
|
-
def execute(sql)
|
521
|
-
@ds.issue_sql sql
|
522
|
-
end
|
523
|
-
|
524
|
-
def transaction
|
525
|
-
@ds.issue_sql "begin transaction;"
|
526
|
-
txn = Transaction.new(@ds)
|
527
|
-
yield txn
|
528
|
-
rescue
|
529
|
-
txn.abort unless txn.committed?
|
530
|
-
raise
|
531
|
-
ensure
|
532
|
-
txn.commit unless txn.committed?
|
533
|
-
end
|
534
|
-
end
|
535
|
-
|
536
|
-
class Transaction
|
537
|
-
def initialize(ds)
|
538
|
-
@ds = ds
|
539
|
-
@commit = false
|
540
|
-
end
|
541
|
-
|
542
|
-
def committed?
|
543
|
-
@commit
|
544
|
-
end
|
545
|
-
|
546
|
-
def commit
|
547
|
-
@ds.issue_sql "commit;"
|
548
|
-
@commit = true
|
549
|
-
end
|
550
|
-
|
551
|
-
def abort
|
552
|
-
@ds.issue_sql "abort;"
|
553
|
-
@commit = true
|
554
|
-
end
|
555
|
-
|
556
|
-
def truncate_and_commit(table)
|
557
|
-
@ds.issue_sql "truncate #{table};"
|
558
|
-
@commit = true
|
559
|
-
end
|
560
|
-
end
|
561
|
-
end
|
562
|
-
|
563
|
-
class S3DataSourceMock < DataSource
|
564
|
-
declare_type 's3_mock'
|
565
|
-
|
566
|
-
def initialize(**params)
|
567
|
-
end
|
568
|
-
|
569
|
-
def credential_string
|
570
|
-
'cccc'
|
571
|
-
end
|
572
|
-
|
573
|
-
def url(name)
|
574
|
-
"s3://bucket/prefix/#{name}"
|
575
|
-
end
|
576
|
-
|
577
|
-
def object(name)
|
578
|
-
ObjectMock.new(url(name), logger)
|
579
|
-
end
|
580
|
-
|
581
|
-
class ObjectMock
|
582
|
-
def initialize(url, logger)
|
583
|
-
@url = url
|
584
|
-
@logger = logger
|
585
|
-
end
|
586
|
-
|
587
|
-
def put(body:)
|
588
|
-
@logger.info "[mock] S3 PUT #{@url} content=#{body[0,20].inspect}..."
|
589
|
-
end
|
590
|
-
|
591
|
-
def delete
|
592
|
-
@logger.info "[mock] S3 DELETE #{@url}"
|
593
|
-
end
|
594
|
-
end
|
595
|
-
end
|
596
|
-
|
597
|
-
test "TaskInfo#failure_count" do
|
598
|
-
test_data = [
|
599
|
-
[%w[], 0],
|
600
|
-
[%w[success], 0],
|
601
|
-
[%w[failure], 1],
|
602
|
-
[%w[error], 1],
|
603
|
-
[%w[failure failure], 2],
|
604
|
-
[%w[failure error], 2],
|
605
|
-
[%w[failure success], 0],
|
606
|
-
[%w[success success], 0],
|
607
|
-
[%w[failure success failure], 1],
|
608
|
-
[%w[failure success failure success failure failure], 2]
|
609
|
-
]
|
610
|
-
c = Job::ControlConnection
|
611
|
-
test_data.each do |status_list, expected_count|
|
612
|
-
task = c::TaskInfo.new(nil,nil,nil,nil,nil,nil, status_list.map {|st| c::JobInfo.new(nil, st) })
|
613
|
-
assert_equal expected_count, task.failure_count
|
614
|
-
end
|
615
|
-
end
|
616
|
-
|
617
|
-
end # class TestJob
|
618
|
-
|
619
|
-
end # module StreamingLoad
|
620
|
-
end # module Bricolage
|