bricolage-streamingload 0.14.2 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +54 -0
- data/LICENSES +21 -0
- data/RELEASE.md +131 -0
- data/Rakefile +3 -0
- data/bin/send-data-event +1 -1
- data/bricolage-streamingload.gemspec +26 -0
- data/config/production/database.yml +66 -0
- data/config/production/password.yml +5 -0
- data/config/production/streamingload.yml +20 -0
- data/config/production/variable.yml +5 -0
- data/lib/bricolage/sqsdatasource.rb +1 -1
- data/lib/bricolage/streamingload/dispatcher.rb +0 -1
- data/lib/bricolage/streamingload/job.rb +6 -2
- data/lib/bricolage/streamingload/jobparams.rb +1 -1
- data/lib/bricolage/streamingload/manifest.rb +2 -0
- data/lib/bricolage/streamingload/taskhandler.rb +5 -2
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/sample/sqs-message.txt +38 -0
- data/sample/sqs-result.txt +18 -0
- data/strload_load_logs.ct +13 -0
- data/testschema/strload_test.ct +11 -0
- data/testschema/testlog.json.gz +0 -0
- data/testschema/with_work_table.job +4 -0
- data/testschema/with_work_table.sql +1 -0
- data/utils/init_strload_tables.sql +13 -0
- data/utils/strload-stat.sql +36 -0
- metadata +43 -27
- data/test/all.rb +0 -3
- data/test/streamingload/test_dispatcher.rb +0 -241
- data/test/streamingload/test_dispatchermessage.rb +0 -31
- data/test/streamingload/test_job.rb +0 -620
- data/test/test_sqsdatasource.rb +0 -55
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
require 'test/unit'
|
|
2
|
-
require 'bricolage/streamingload/dispatchermessage'
|
|
3
|
-
|
|
4
|
-
module Bricolage::StreamingLoad
|
|
5
|
-
|
|
6
|
-
class TestDispatcherMessage < Test::Unit::TestCase
|
|
7
|
-
|
|
8
|
-
def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, source: nil, region: nil, bucket: nil, key: nil, size: nil)
|
|
9
|
-
S3ObjectDispatcherMessage.new(
|
|
10
|
-
message_id: message_id,
|
|
11
|
-
receipt_handle: receipt_handle,
|
|
12
|
-
name: name,
|
|
13
|
-
time: time,
|
|
14
|
-
source: source,
|
|
15
|
-
region: region,
|
|
16
|
-
bucket: bucket,
|
|
17
|
-
key: key,
|
|
18
|
-
size: size
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
test "#created?" do
|
|
23
|
-
e = new_s3event(name: "ObjectCreated:Put")
|
|
24
|
-
assert_true e.created_event?
|
|
25
|
-
e = new_s3event(name: "ObjectCreated:Copy")
|
|
26
|
-
assert_false e.created_event?
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
@@ -1,620 +0,0 @@
|
|
|
1
|
-
require 'test/unit'
|
|
2
|
-
require 'bricolage/streamingload/job'
|
|
3
|
-
require 'bricolage/context'
|
|
4
|
-
require 'bricolage/logger'
|
|
5
|
-
|
|
6
|
-
module Bricolage
|
|
7
|
-
|
|
8
|
-
# FIXME: patch
|
|
9
|
-
class NullLogger
|
|
10
|
-
def log(*args) end
|
|
11
|
-
def add(*args) end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
module StreamingLoad
|
|
15
|
-
|
|
16
|
-
class TestJob < Test::Unit::TestCase
|
|
17
|
-
|
|
18
|
-
test "execute_task" do
|
|
19
|
-
setup_context {|db|
|
|
20
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
|
21
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
|
22
|
-
db.insert_into 'strload_task_objects', [1, 1], [1, 2]
|
|
23
|
-
db.insert_into 'strload_objects',
|
|
24
|
-
[1, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
|
25
|
-
[2, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
|
26
|
-
|
|
27
|
-
job = new_job(task_id: 1, force: false)
|
|
28
|
-
job.execute_task
|
|
29
|
-
|
|
30
|
-
assert_equal [
|
|
31
|
-
"begin transaction;",
|
|
32
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
33
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (1, #{job.job_id}, current_timestamp)",
|
|
34
|
-
"commit;"
|
|
35
|
-
], job.data_ds.sql_list
|
|
36
|
-
|
|
37
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
38
|
-
assert_equal 1, job_row['task_id'].to_i
|
|
39
|
-
assert_equal job.process_id, job_row['process_id']
|
|
40
|
-
assert_equal 'success', job_row['status']
|
|
41
|
-
}
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
test "execute_task (with work table)" do
|
|
45
|
-
setup_context {|db|
|
|
46
|
-
db.insert_into 'strload_tables', [1, 'testschema.with_work_table', 'testschema', 'with_work_table', 100, 1800, false]
|
|
47
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
48
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
49
|
-
db.insert_into 'strload_objects',
|
|
50
|
-
[1001, 's3://data-bucket/testschema.with_work_table/0001.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp],
|
|
51
|
-
[1002, 's3://data-bucket/testschema.with_work_table/0002.json.gz', 1024, 'testschema.with_work_table', 'mmmm', current_timestamp, current_timestamp]
|
|
52
|
-
|
|
53
|
-
job = new_job(task_id: 11, force: false)
|
|
54
|
-
job.execute_task
|
|
55
|
-
|
|
56
|
-
assert_equal [
|
|
57
|
-
"begin transaction;",
|
|
58
|
-
"delete from testschema.with_work_table_wk",
|
|
59
|
-
"copy testschema.with_work_table_wk from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
60
|
-
"insert into testschema.with_work_table select * from testschema.with_work_table_wk;\n",
|
|
61
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
|
62
|
-
"truncate testschema.with_work_table_wk;"
|
|
63
|
-
], job.data_ds.sql_list
|
|
64
|
-
|
|
65
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
66
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
67
|
-
assert_equal job.process_id, job_row['process_id']
|
|
68
|
-
assert_equal 'success', job_row['status']
|
|
69
|
-
}
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
test "execute_task (disabled)" do
|
|
73
|
-
setup_context {|db|
|
|
74
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, true]
|
|
75
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
|
76
|
-
|
|
77
|
-
job = new_job(task_id: 1, force: false)
|
|
78
|
-
assert_raise(JobDefered) {
|
|
79
|
-
job.execute_task
|
|
80
|
-
}
|
|
81
|
-
count = db.query_value("select count(*) from strload_jobs")
|
|
82
|
-
assert_equal 0, count.to_i
|
|
83
|
-
}
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
test "execute_task (duplicated)" do
|
|
87
|
-
setup_context {|db|
|
|
88
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
|
89
|
-
db.insert_into 'strload_tasks', [1, 'streaming_load_v3', 1, current_timestamp]
|
|
90
|
-
db.insert_into 'strload_jobs',
|
|
91
|
-
[1, 1, 'localhost-1234', 'failure', current_timestamp, current_timestamp, ''],
|
|
92
|
-
[2, 1, 'localhost-1234', 'success', current_timestamp, current_timestamp, ''],
|
|
93
|
-
[3, 1, 'localhost-1234', 'duplicated', current_timestamp, current_timestamp, '']
|
|
94
|
-
|
|
95
|
-
job = new_job(task_id: 1, force: false)
|
|
96
|
-
assert_raise(JobDuplicated) {
|
|
97
|
-
job.execute_task
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
test "execute_task (duplicated but forced)" do
|
|
103
|
-
setup_context {|db|
|
|
104
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
|
105
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
106
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
107
|
-
db.insert_into 'strload_objects',
|
|
108
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
|
109
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
|
110
|
-
|
|
111
|
-
job = new_job(task_id: 11, force: true)
|
|
112
|
-
job.execute_task
|
|
113
|
-
|
|
114
|
-
assert_equal [
|
|
115
|
-
"begin transaction;",
|
|
116
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
117
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
|
118
|
-
"commit;"
|
|
119
|
-
], job.data_ds.sql_list
|
|
120
|
-
|
|
121
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
122
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
123
|
-
assert_equal job.process_id, job_row['process_id']
|
|
124
|
-
assert_equal 'success', job_row['status']
|
|
125
|
-
assert(/forced/ =~ job_row['message'])
|
|
126
|
-
}
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
test "execute_task (load fails / first time)" do
|
|
130
|
-
setup_context {|db|
|
|
131
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
|
132
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
133
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
134
|
-
db.insert_into 'strload_objects',
|
|
135
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
|
136
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
|
137
|
-
|
|
138
|
-
job = new_job(task_id: 11, force: false)
|
|
139
|
-
assert_raise(JobFailure) {
|
|
140
|
-
job.execute_task
|
|
141
|
-
}
|
|
142
|
-
assert_equal [
|
|
143
|
-
"begin transaction;",
|
|
144
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
145
|
-
"abort;"
|
|
146
|
-
], job.data_ds.sql_list
|
|
147
|
-
|
|
148
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
149
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
150
|
-
assert_equal job.process_id, job_row['process_id']
|
|
151
|
-
assert_equal 'failure', job_row['status']
|
|
152
|
-
}
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
test "execute_task (load fails / nth time)" do
|
|
156
|
-
setup_context {|db|
|
|
157
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
|
158
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
159
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
160
|
-
db.insert_into 'strload_objects',
|
|
161
|
-
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
|
162
|
-
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
|
163
|
-
db.insert_into 'strload_jobs',
|
|
164
|
-
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed']
|
|
165
|
-
|
|
166
|
-
job = new_job(task_id: 11, force: false)
|
|
167
|
-
assert_raise(JobFailure) {
|
|
168
|
-
job.execute_task
|
|
169
|
-
}
|
|
170
|
-
assert_equal [
|
|
171
|
-
"begin transaction;",
|
|
172
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
173
|
-
"abort;"
|
|
174
|
-
], job.data_ds.sql_list
|
|
175
|
-
|
|
176
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
177
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
178
|
-
assert_equal job.process_id, job_row['process_id']
|
|
179
|
-
assert_equal 'failure', job_row['status']
|
|
180
|
-
assert(/retry\#1/ =~ job_row['message'])
|
|
181
|
-
}
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
test "execute_task (too many retry)" do
|
|
185
|
-
setup_context {|db|
|
|
186
|
-
db.insert_into 'strload_tables', [1, 'testschema.sql_fails', 'testschema', 'sql_fails', 100, 1800, false]
|
|
187
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
188
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
189
|
-
db.insert_into 'strload_objects',
|
|
190
|
-
[1001, 's3://data-bucket/testschema.sql_fails/0001.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp],
|
|
191
|
-
[1002, 's3://data-bucket/testschema.sql_fails/0002.json.gz', 1024, 'testschema.sql_fails', 'mmmm', current_timestamp, current_timestamp]
|
|
192
|
-
db.insert_into 'strload_jobs',
|
|
193
|
-
[101, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'query failed'],
|
|
194
|
-
[102, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#1 query failed'],
|
|
195
|
-
[103, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#2 query failed'],
|
|
196
|
-
[104, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#3 query failed'],
|
|
197
|
-
[105, 11, 'localhost-1234', 'failure', current_timestamp, current_timestamp, 'retry#4 query failed']
|
|
198
|
-
|
|
199
|
-
job = new_job(task_id: 11, force: false)
|
|
200
|
-
assert_raise(JobCancelled) {
|
|
201
|
-
job.execute_task
|
|
202
|
-
}
|
|
203
|
-
assert_equal [
|
|
204
|
-
"begin transaction;",
|
|
205
|
-
"copy testschema.sql_fails from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
206
|
-
"abort;"
|
|
207
|
-
], job.data_ds.sql_list
|
|
208
|
-
|
|
209
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
210
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
211
|
-
assert_equal job.process_id, job_row['process_id']
|
|
212
|
-
assert_equal 'failure', job_row['status']
|
|
213
|
-
assert(/retry\#5 FINAL/ =~ job_row['message'])
|
|
214
|
-
}
|
|
215
|
-
end
|
|
216
|
-
|
|
217
|
-
test "execute_task (job error)" do
|
|
218
|
-
setup_context {|db|
|
|
219
|
-
db.insert_into 'strload_tables', [1, 'testschema.job_error', 'testschema', 'job_error', 100, 1800, false]
|
|
220
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
221
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
222
|
-
db.insert_into 'strload_objects',
|
|
223
|
-
[1001, 's3://data-bucket/testschema.job_error/0001.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp],
|
|
224
|
-
[1002, 's3://data-bucket/testschema.job_error/0002.json.gz', 1024, 'testschema.job_error', 'mmmm', current_timestamp, current_timestamp]
|
|
225
|
-
|
|
226
|
-
job = new_job(task_id: 11, force: false)
|
|
227
|
-
assert_raise(JobError) {
|
|
228
|
-
job.execute_task
|
|
229
|
-
}
|
|
230
|
-
assert_equal [
|
|
231
|
-
"begin transaction;",
|
|
232
|
-
"copy testschema.job_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
233
|
-
"abort;"
|
|
234
|
-
], job.data_ds.sql_list
|
|
235
|
-
|
|
236
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
237
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
238
|
-
assert_equal job.process_id, job_row['process_id']
|
|
239
|
-
assert_equal 'error', job_row['status']
|
|
240
|
-
}
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
test "execute_task (unexpected error)" do
|
|
244
|
-
setup_context {|db|
|
|
245
|
-
db.insert_into 'strload_tables', [1, 'testschema.unexpected_error', 'testschema', 'unexpected_error', 100, 1800, false]
|
|
246
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
247
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
248
|
-
db.insert_into 'strload_objects',
|
|
249
|
-
[1001, 's3://data-bucket/testschema.unexpected_error/0001.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp],
|
|
250
|
-
[1002, 's3://data-bucket/testschema.unexpected_error/0002.json.gz', 1024, 'testschema.unexpected_error', 'mmmm', current_timestamp, current_timestamp]
|
|
251
|
-
|
|
252
|
-
job = new_job(task_id: 11, force: false)
|
|
253
|
-
assert_raise(JobError) {
|
|
254
|
-
job.execute_task
|
|
255
|
-
}
|
|
256
|
-
assert_equal [
|
|
257
|
-
"begin transaction;",
|
|
258
|
-
"copy testschema.unexpected_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
259
|
-
"abort;"
|
|
260
|
-
], job.data_ds.sql_list
|
|
261
|
-
|
|
262
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
263
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
264
|
-
assert_equal job.process_id, job_row['process_id']
|
|
265
|
-
assert_equal 'error', job_row['status']
|
|
266
|
-
}
|
|
267
|
-
end
|
|
268
|
-
|
|
269
|
-
test "execute_task (load error)" do
|
|
270
|
-
setup_context {|db|
|
|
271
|
-
db.insert_into 'strload_tables', [1, 'testschema.load_error', 'testschema', 'load_error', 100, 1800, false]
|
|
272
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
273
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
274
|
-
db.insert_into 'strload_objects',
|
|
275
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
|
276
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
|
277
|
-
|
|
278
|
-
job = new_job(task_id: 11, force: false)
|
|
279
|
-
assert_raise(JobError) {
|
|
280
|
-
job.execute_task
|
|
281
|
-
}
|
|
282
|
-
assert_equal [
|
|
283
|
-
"begin transaction;",
|
|
284
|
-
"copy testschema.load_error from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
285
|
-
"abort;"
|
|
286
|
-
], job.data_ds.sql_list
|
|
287
|
-
|
|
288
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
289
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
290
|
-
assert_equal job.process_id, job_row['process_id']
|
|
291
|
-
assert_equal 'error', job_row['status']
|
|
292
|
-
assert(/stl_load_errors/ =~ job_row['message'])
|
|
293
|
-
}
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
test "execute_task (unknown status, really=success)" do
|
|
297
|
-
setup_context {|db|
|
|
298
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
|
299
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
300
|
-
db.insert_into 'strload_jobs',
|
|
301
|
-
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
|
302
|
-
@data_ds.provide_job_status 101, true
|
|
303
|
-
|
|
304
|
-
job = new_job(task_id: 11, force: false)
|
|
305
|
-
assert_raise(JobDuplicated) {
|
|
306
|
-
job.execute_task
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
|
310
|
-
assert_equal 'success', job_row['status']
|
|
311
|
-
}
|
|
312
|
-
end
|
|
313
|
-
|
|
314
|
-
test "execute_task (unknown status, really=failure)" do
|
|
315
|
-
setup_context {|db|
|
|
316
|
-
db.insert_into 'strload_tables', [1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false]
|
|
317
|
-
db.insert_into 'strload_tasks', [11, 'streaming_load_v3', 1, current_timestamp]
|
|
318
|
-
db.insert_into 'strload_task_objects', [11, 1001], [11, 1002]
|
|
319
|
-
db.insert_into 'strload_objects',
|
|
320
|
-
[1001, 's3://data-bucket/testschema.desttable/0001.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp],
|
|
321
|
-
[1002, 's3://data-bucket/testschema.desttable/0002.json.gz', 1024, 'testschema.desttable', 'mmmm', current_timestamp, current_timestamp]
|
|
322
|
-
db.insert_into 'strload_jobs',
|
|
323
|
-
[101, 11, 'localhost-1234', 'unknown', current_timestamp, current_timestamp, 'data connection failed']
|
|
324
|
-
@data_ds.provide_job_status 101, false
|
|
325
|
-
|
|
326
|
-
job = new_job(task_id: 11, force: false)
|
|
327
|
-
job.execute_task
|
|
328
|
-
|
|
329
|
-
assert_equal [
|
|
330
|
-
"begin transaction;",
|
|
331
|
-
"copy testschema.desttable from '#{job.manifest.url}' credentials 'cccc' manifest statupdate false compupdate false json 'auto' gzip timeformat 'auto' dateformat 'auto' acceptanydate acceptinvchars ' ' truncatecolumns trimblanks ;",
|
|
332
|
-
"insert into strload_load_logs (task_id, job_id, finish_time) values (11, #{job.job_id}, current_timestamp)",
|
|
333
|
-
"commit;"
|
|
334
|
-
], job.data_ds.sql_list
|
|
335
|
-
|
|
336
|
-
job_row = db.query_row("select * from strload_jobs where job_id = 101")
|
|
337
|
-
assert_equal 'failure', job_row['status']
|
|
338
|
-
|
|
339
|
-
job_row = db.query_row("select * from strload_jobs where job_id = #{job.job_id}")
|
|
340
|
-
assert_equal 11, job_row['task_id'].to_i
|
|
341
|
-
assert_equal job.process_id, job_row['process_id']
|
|
342
|
-
assert_equal 'success', job_row['status']
|
|
343
|
-
}
|
|
344
|
-
end
|
|
345
|
-
|
|
346
|
-
def setup_context(verbose: false)
|
|
347
|
-
@ctx = Context.for_application('.', environment: 'test', logger: (verbose ? nil : NullLogger.new))
|
|
348
|
-
@ctl_ds = @ctx.get_data_source('sql', 'dwhctl')
|
|
349
|
-
@data_ds = @ctx.get_data_source('sql', 'db_data_mock')
|
|
350
|
-
@ctl_ds.open {|conn|
|
|
351
|
-
client = SQLClient.new(conn)
|
|
352
|
-
clear_all_tables(client)
|
|
353
|
-
yield client
|
|
354
|
-
}
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
def new_job(task_id:, force:)
|
|
358
|
-
Job.new(
|
|
359
|
-
context: @ctx,
|
|
360
|
-
ctl_ds: @ctl_ds,
|
|
361
|
-
data_ds: @data_ds,
|
|
362
|
-
logger: @ctx.logger,
|
|
363
|
-
task_id: task_id,
|
|
364
|
-
force: force
|
|
365
|
-
)
|
|
366
|
-
end
|
|
367
|
-
|
|
368
|
-
# FIXME: database cleaner
|
|
369
|
-
def clear_all_tables(client)
|
|
370
|
-
client.truncate_tables %w[
|
|
371
|
-
strload_tables
|
|
372
|
-
strload_objects
|
|
373
|
-
strload_task_objects
|
|
374
|
-
strload_tasks
|
|
375
|
-
strload_jobs
|
|
376
|
-
]
|
|
377
|
-
end
|
|
378
|
-
|
|
379
|
-
class SQLClient
|
|
380
|
-
include SQLUtils
|
|
381
|
-
|
|
382
|
-
def initialize(conn)
|
|
383
|
-
@conn = conn
|
|
384
|
-
end
|
|
385
|
-
|
|
386
|
-
extend Forwardable
|
|
387
|
-
def_delegators '@conn',
|
|
388
|
-
:query,
|
|
389
|
-
:query_value,
|
|
390
|
-
:query_values,
|
|
391
|
-
:query_row,
|
|
392
|
-
:select,
|
|
393
|
-
:update
|
|
394
|
-
|
|
395
|
-
def insert_into(table, *records)
|
|
396
|
-
sql = "insert into #{table}"
|
|
397
|
-
sep = ' values '
|
|
398
|
-
records.each do |rec|
|
|
399
|
-
sql << sep; sep = ', '
|
|
400
|
-
sql << format_values(rec)
|
|
401
|
-
end
|
|
402
|
-
@conn.update(sql)
|
|
403
|
-
end
|
|
404
|
-
|
|
405
|
-
def truncate_tables(tables)
|
|
406
|
-
tables.each do |name|
|
|
407
|
-
@conn.update("truncate #{name}")
|
|
408
|
-
end
|
|
409
|
-
end
|
|
410
|
-
|
|
411
|
-
private
|
|
412
|
-
|
|
413
|
-
def format_values(rec)
|
|
414
|
-
'(' + rec.map {|val| format_value(val) }.join(', ') + ')'
|
|
415
|
-
end
|
|
416
|
-
|
|
417
|
-
def format_value(val)
|
|
418
|
-
case val
|
|
419
|
-
when nil then 'null'
|
|
420
|
-
when true, false then val.to_s
|
|
421
|
-
when Integer then val.to_s
|
|
422
|
-
when SQLExpr then val.to_s
|
|
423
|
-
when String then sql_string_literal(val)
|
|
424
|
-
else
|
|
425
|
-
raise ArgumentError, "unsupported value type: #{val.class}: #{val.inspect}"
|
|
426
|
-
end
|
|
427
|
-
end
|
|
428
|
-
|
|
429
|
-
module DSL
|
|
430
|
-
def null
|
|
431
|
-
nil
|
|
432
|
-
end
|
|
433
|
-
|
|
434
|
-
def current_timestamp
|
|
435
|
-
SQLExpr.new('current_timestamp')
|
|
436
|
-
end
|
|
437
|
-
|
|
438
|
-
def sql(expr)
|
|
439
|
-
SQLExpr.new(expr)
|
|
440
|
-
end
|
|
441
|
-
end
|
|
442
|
-
|
|
443
|
-
class SQLExpr
|
|
444
|
-
def initialize(expr)
|
|
445
|
-
@expr = expr
|
|
446
|
-
end
|
|
447
|
-
|
|
448
|
-
def to_s
|
|
449
|
-
@expr
|
|
450
|
-
end
|
|
451
|
-
end
|
|
452
|
-
end
|
|
453
|
-
|
|
454
|
-
include SQLClient::DSL
|
|
455
|
-
|
|
456
|
-
class PSQLDataSourceMock < DataSource
|
|
457
|
-
declare_type 'psql_mock'
|
|
458
|
-
|
|
459
|
-
def initialize(fail_pattern: nil, error_pattern: nil, exception_pattern: nil, load_error_pattern: nil, **params)
|
|
460
|
-
@sql_list = []
|
|
461
|
-
@fail_pattern = fail_pattern ? Regexp.compile(fail_pattern) : nil
|
|
462
|
-
@error_pattern = error_pattern ? Regexp.compile(error_pattern) : nil
|
|
463
|
-
@exception_pattern = exception_pattern ? Regexp.compile(exception_pattern) : nil
|
|
464
|
-
@load_error_pattern = load_error_pattern ? Regexp.compile(load_error_pattern) : nil
|
|
465
|
-
@job_status = {}
|
|
466
|
-
end
|
|
467
|
-
|
|
468
|
-
attr_reader :sql_list
|
|
469
|
-
|
|
470
|
-
def open
|
|
471
|
-
conn = Connection.new(self)
|
|
472
|
-
if block_given?
|
|
473
|
-
yield conn
|
|
474
|
-
else
|
|
475
|
-
conn
|
|
476
|
-
end
|
|
477
|
-
end
|
|
478
|
-
|
|
479
|
-
def issue_sql(sql)
|
|
480
|
-
logger.info "[mock] #{sql}"
|
|
481
|
-
@sql_list.push sql
|
|
482
|
-
if @fail_pattern and @fail_pattern =~ sql
|
|
483
|
-
raise JobFailure, "query failed: #{sql}"
|
|
484
|
-
end
|
|
485
|
-
if @error_pattern and @error_pattern =~ sql
|
|
486
|
-
raise JobError, "error"
|
|
487
|
-
end
|
|
488
|
-
if @exception_pattern and @exception_pattern =~ sql
|
|
489
|
-
raise ArgumentError, "unexpected exception"
|
|
490
|
-
end
|
|
491
|
-
if @load_error_pattern and @load_error_pattern =~ sql
|
|
492
|
-
raise JobError, "Load into table 'xxxx_table' failed. Check 'stl_load_errors' system table for details."
|
|
493
|
-
end
|
|
494
|
-
end
|
|
495
|
-
|
|
496
|
-
def provide_job_status(job_id, succeeded)
|
|
497
|
-
@job_status[job_id] = succeeded
|
|
498
|
-
end
|
|
499
|
-
|
|
500
|
-
def job_succeeded?(job_id)
|
|
501
|
-
raise "job status unregistered: job_id=#{job_id}" unless @job_status.key?(job_id)
|
|
502
|
-
@job_status[job_id]
|
|
503
|
-
end
|
|
504
|
-
|
|
505
|
-
class Connection
|
|
506
|
-
def initialize(ds)
|
|
507
|
-
@ds = ds
|
|
508
|
-
end
|
|
509
|
-
|
|
510
|
-
def query_value(sql)
|
|
511
|
-
case sql
|
|
512
|
-
when /\bstrload_load_logs where job_id = (\d+)/
|
|
513
|
-
job_id = $1.to_i
|
|
514
|
-
@ds.job_succeeded?(job_id) ? 1 : 0
|
|
515
|
-
else
|
|
516
|
-
raise "unknown query: #{sql}"
|
|
517
|
-
end
|
|
518
|
-
end
|
|
519
|
-
|
|
520
|
-
def execute(sql)
|
|
521
|
-
@ds.issue_sql sql
|
|
522
|
-
end
|
|
523
|
-
|
|
524
|
-
def transaction
|
|
525
|
-
@ds.issue_sql "begin transaction;"
|
|
526
|
-
txn = Transaction.new(@ds)
|
|
527
|
-
yield txn
|
|
528
|
-
rescue
|
|
529
|
-
txn.abort unless txn.committed?
|
|
530
|
-
raise
|
|
531
|
-
ensure
|
|
532
|
-
txn.commit unless txn.committed?
|
|
533
|
-
end
|
|
534
|
-
end
|
|
535
|
-
|
|
536
|
-
class Transaction
|
|
537
|
-
def initialize(ds)
|
|
538
|
-
@ds = ds
|
|
539
|
-
@commit = false
|
|
540
|
-
end
|
|
541
|
-
|
|
542
|
-
def committed?
|
|
543
|
-
@commit
|
|
544
|
-
end
|
|
545
|
-
|
|
546
|
-
def commit
|
|
547
|
-
@ds.issue_sql "commit;"
|
|
548
|
-
@commit = true
|
|
549
|
-
end
|
|
550
|
-
|
|
551
|
-
def abort
|
|
552
|
-
@ds.issue_sql "abort;"
|
|
553
|
-
@commit = true
|
|
554
|
-
end
|
|
555
|
-
|
|
556
|
-
def truncate_and_commit(table)
|
|
557
|
-
@ds.issue_sql "truncate #{table};"
|
|
558
|
-
@commit = true
|
|
559
|
-
end
|
|
560
|
-
end
|
|
561
|
-
end
|
|
562
|
-
|
|
563
|
-
class S3DataSourceMock < DataSource
|
|
564
|
-
declare_type 's3_mock'
|
|
565
|
-
|
|
566
|
-
def initialize(**params)
|
|
567
|
-
end
|
|
568
|
-
|
|
569
|
-
def credential_string
|
|
570
|
-
'cccc'
|
|
571
|
-
end
|
|
572
|
-
|
|
573
|
-
def url(name)
|
|
574
|
-
"s3://bucket/prefix/#{name}"
|
|
575
|
-
end
|
|
576
|
-
|
|
577
|
-
def object(name)
|
|
578
|
-
ObjectMock.new(url(name), logger)
|
|
579
|
-
end
|
|
580
|
-
|
|
581
|
-
class ObjectMock
|
|
582
|
-
def initialize(url, logger)
|
|
583
|
-
@url = url
|
|
584
|
-
@logger = logger
|
|
585
|
-
end
|
|
586
|
-
|
|
587
|
-
def put(body:)
|
|
588
|
-
@logger.info "[mock] S3 PUT #{@url} content=#{body[0,20].inspect}..."
|
|
589
|
-
end
|
|
590
|
-
|
|
591
|
-
def delete
|
|
592
|
-
@logger.info "[mock] S3 DELETE #{@url}"
|
|
593
|
-
end
|
|
594
|
-
end
|
|
595
|
-
end
|
|
596
|
-
|
|
597
|
-
test "TaskInfo#failure_count" do
|
|
598
|
-
test_data = [
|
|
599
|
-
[%w[], 0],
|
|
600
|
-
[%w[success], 0],
|
|
601
|
-
[%w[failure], 1],
|
|
602
|
-
[%w[error], 1],
|
|
603
|
-
[%w[failure failure], 2],
|
|
604
|
-
[%w[failure error], 2],
|
|
605
|
-
[%w[failure success], 0],
|
|
606
|
-
[%w[success success], 0],
|
|
607
|
-
[%w[failure success failure], 1],
|
|
608
|
-
[%w[failure success failure success failure failure], 2]
|
|
609
|
-
]
|
|
610
|
-
c = Job::ControlConnection
|
|
611
|
-
test_data.each do |status_list, expected_count|
|
|
612
|
-
task = c::TaskInfo.new(nil,nil,nil,nil,nil,nil, status_list.map {|st| c::JobInfo.new(nil, st) })
|
|
613
|
-
assert_equal expected_count, task.failure_count
|
|
614
|
-
end
|
|
615
|
-
end
|
|
616
|
-
|
|
617
|
-
end # class TestJob
|
|
618
|
-
|
|
619
|
-
end # module StreamingLoad
|
|
620
|
-
end # module Bricolage
|