bricolage-streamingload 0.15.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +61 -0
- data/LICENSES +21 -0
- data/RELEASE.md +135 -0
- data/Rakefile +3 -0
- data/bin/send-data-event +1 -1
- data/bricolage-streamingload.gemspec +27 -0
- data/config/production/database.yml +66 -0
- data/config/production/password.yml +5 -0
- data/config/production/streamingload.yml +20 -0
- data/config/production/variable.yml +5 -0
- data/lib/bricolage/streamingload/dispatcher.rb +8 -1
- data/lib/bricolage/streamingload/job.rb +14 -3
- data/lib/bricolage/streamingload/jobparams.rb +1 -1
- data/lib/bricolage/streamingload/taskhandler.rb +5 -2
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/sample/sqs-message.txt +38 -0
- data/sample/sqs-result.txt +18 -0
- data/strload_load_logs.ct +13 -0
- data/testschema/strload_test.ct +11 -0
- data/testschema/testlog.json.gz +0 -0
- data/testschema/with_work_table.job +4 -0
- data/testschema/with_work_table.sql +1 -0
- data/utils/init_strload_tables.sql +13 -0
- data/utils/strload-stat.sql +36 -0
- metadata +46 -30
- data/test/all.rb +0 -3
- data/test/streamingload/test_dispatcher.rb +0 -241
- data/test/streamingload/test_dispatchermessage.rb +0 -31
- data/test/streamingload/test_job.rb +0 -620
- data/test/test_sqsdatasource.rb +0 -55
@@ -149,12 +149,15 @@ module Bricolage
|
|
149
149
|
|
150
150
|
class NoopJob
|
151
151
|
|
152
|
-
def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
|
152
|
+
def initialize(context:, ctl_ds:, data_ds:, log_table:, task_id:, force: false, logger:)
|
153
153
|
@ctx = context
|
154
154
|
@ctl_ds = ctl_ds
|
155
|
+
@data_ds = data_ds
|
156
|
+
@log_table = log_table
|
155
157
|
@task_id = task_id
|
156
158
|
@force = force
|
157
159
|
@logger = logger
|
160
|
+
@working_dir = Dir.getwd
|
158
161
|
end
|
159
162
|
|
160
163
|
def execute(fail_fast: false)
|
@@ -164,7 +167,7 @@ module Bricolage
|
|
164
167
|
end
|
165
168
|
|
166
169
|
def execute_task
|
167
|
-
@logger.info "execute_task: task_id=#{@task_id} force=#{@force} ctx=#{@ctx.home_path} ctl_ds=#{@ctl_ds.name} dir=#{@working_dir}"
|
170
|
+
@logger.info "execute_task: task_id=#{@task_id} force=#{@force} ctx=#{@ctx.home_path} ctl_ds=#{@ctl_ds.name} data_ds=#{@data_ds.name} dir=#{@working_dir}"
|
168
171
|
end
|
169
172
|
|
170
173
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
{
|
2
|
+
"Records" => [
|
3
|
+
{
|
4
|
+
"eventVersion" => "2.0",
|
5
|
+
"eventSource" => "aws:s3",
|
6
|
+
"awsRegion" => "ap-northeast-1",
|
7
|
+
"eventTime" => "2016-02-13T11:40:08.001Z",
|
8
|
+
"eventName" => "ObjectCreated:Put",
|
9
|
+
"userIdentity" => {
|
10
|
+
"principalId" => "AWS:AIXXXXXXXXXXXXXXXXX6A"
|
11
|
+
},
|
12
|
+
"requestParameters" => {
|
13
|
+
"sourceIPAddress" => "111.222.111.90"
|
14
|
+
},
|
15
|
+
"responseElements" => {
|
16
|
+
"x-amz-request-id" => "1111AAAA9999AAAA",
|
17
|
+
"x-amz-id-2" => "6p9IZG+R+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxFyXiNMMB4="
|
18
|
+
},
|
19
|
+
"s3" => {
|
20
|
+
"s3SchemaVersion" => "1.0",
|
21
|
+
"configurationId" => "ObjectCreatedEvents",
|
22
|
+
"bucket" => {
|
23
|
+
"name" => "some-bucket",
|
24
|
+
"ownerIdentity" => {
|
25
|
+
"principalId" => "AAAAIIIIBBBB11"
|
26
|
+
},
|
27
|
+
"arn" => "arn:aws:s3:::some-bucket"
|
28
|
+
},
|
29
|
+
"object" => {
|
30
|
+
"key" => "development/logs/schema.table/20160125_0000_0_de37c5ad-d26a-42cc-a141-475676b65f69.gz",
|
31
|
+
"size" => 1302957,
|
32
|
+
"eTag" => "d704db7f9cb77b1ecb981c534526b542",
|
33
|
+
"sequencer" => "111122223333444499"
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
]
|
38
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#<struct Aws::SQS::Types::ReceiveMessageResult
|
2
|
+
messages = [
|
3
|
+
#<struct Aws::SQS::Types::Message
|
4
|
+
message_id = "11112222-b37f-4281-aee2-696408c482be",
|
5
|
+
receipt_handle = "AQEBErp4Vv159Hvx5oXSioWu7Ov1Jj1ht423LwTTUlgLohmKh02gAMAZ7kN1J4+aLzM54BRuMatnifWPH3cBh6rtdSWlllysBVmXDRKj83VCaseKPpq+Vdw/bLZrPEYRSGPuhKsHa2DFjo67KeND4AqasZO7lOCJX+YcsbqsqAtcODlDWlVbfedPkKIGM3Scra3uY9ysGjNx/zWuJ88fCtvRpu9tgk0nG7srHAm84Mxj5ArakJnKnJMbtkD/+lvm0Vqi2YCkXonS8+BP8gK4kSTbl1uv/Gp7ZfxkjKQJBB8xQiYo7e7qKLwhbImMZtO5tqDKzIKFVfMi1G2ODF+tEj4Ce+ryMRGA50GhON2ETCyofsl1T7Wdr61IEOV0NFlonGTWFf4q/1r3OPhFAchp+tYnyA==",
|
6
|
+
md5_of_body = "00006bf43abdff178ca0ffa96205aaaa",
|
7
|
+
body = "{\"Records\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-northeast-1\",\"eventTime\":\"2016-02-13T11:40:07.268Z\",\"eventName\":\"ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"AWS:AAAAJKS3A4VEF45XCAAAA\"},\"requestParameters\":{\"sourceIPAddress\":\"111.222.111.90\"},\"responseElements\":{\"x-amz-request-id\":\"10AAAA31A1EDCCCC\",\"x-amz-id-2\":\"YT+U/PxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzfFGPjqP5AQg=\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"LogStreamDev\",\"bucket\":{\"name\":\"redshift-copy-buffer\",\"ownerIdentity\":{\"principalId\":\"AAAAFIDWR40000\"},\"arn\":\"arn:aws:s3:::redshift-copy-buffer\"},\"object\":{\"key\":\"development/logs/schema.table/20160125_0000_0_2d0b43ad-0db9-4655-bd98-b7464b123763.gz\",\"size\":1238953,\"eTag\":\"aaaa196c3935f4957c7bb645f9780000\",\"sequencer\":\"0000BF161697C1AAAA\"}}}]}",
|
8
|
+
attributes = {
|
9
|
+
"SenderId" => "FACE0VEO02BJMF37H2JKW",
|
10
|
+
"ApproximateFirstReceiveTimestamp" => "1455364193429",
|
11
|
+
"ApproximateReceiveCount" => "1",
|
12
|
+
"SentTimestamp" => "1455363607341"
|
13
|
+
},
|
14
|
+
md5_of_message_attributes = nil,
|
15
|
+
message_attributes = {}
|
16
|
+
>
|
17
|
+
]
|
18
|
+
>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
--dest-table: bricolage.strload_load_logs
|
2
|
+
|
3
|
+
/*
|
4
|
+
Redshift-side log table
|
5
|
+
*/
|
6
|
+
create table $dest_table
|
7
|
+
( job_id bigint encode raw
|
8
|
+
, task_id bigint encode zstd
|
9
|
+
, finish_time timestamp encode delta
|
10
|
+
)
|
11
|
+
distkey (job_id)
|
12
|
+
sortkey (job_id)
|
13
|
+
;
|
@@ -0,0 +1,11 @@
|
|
1
|
+
--dest-table: $test_schema.strload_test
|
2
|
+
|
3
|
+
create table $dest_table
|
4
|
+
( jst_time timestamp encode delta
|
5
|
+
, full_controller varchar(128) encode lzo
|
6
|
+
, action varchar(128) encode lzo
|
7
|
+
, user_id integer encode lzo
|
8
|
+
, unique_id varchar(80) encode lzo
|
9
|
+
)
|
10
|
+
sortkey (jst_time)
|
11
|
+
;
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
insert into $dest_table select * from $work_table;
|
@@ -0,0 +1,13 @@
|
|
1
|
+
drop table if exists strload_objects cascade;
|
2
|
+
drop table if exists strload_dup_objects cascade;
|
3
|
+
drop table if exists strload_task_objects;
|
4
|
+
drop table if exists strload_tasks cascade;
|
5
|
+
drop table if exists strload_jobs cascade;
|
6
|
+
|
7
|
+
\i schema/strload_objects.ct
|
8
|
+
\i schema/strload_dup_objects.ct
|
9
|
+
\i schema/strload_task_objects.ct
|
10
|
+
\i schema/strload_tasks.ct
|
11
|
+
\i schema/strload_jobs.ct
|
12
|
+
\i schema/strload_stats.cv
|
13
|
+
\i schema/strload_task_status.cv
|
@@ -0,0 +1,36 @@
|
|
1
|
+
select
|
2
|
+
task_seq
|
3
|
+
, tbl.source_id
|
4
|
+
, tbl.schema_name
|
5
|
+
, tbl.table_name
|
6
|
+
, submit_time
|
7
|
+
, object_count
|
8
|
+
, total_object_size
|
9
|
+
, job_seq
|
10
|
+
, loader_id
|
11
|
+
, start_time
|
12
|
+
, finish_time
|
13
|
+
, status
|
14
|
+
, substring(message, 1, 30) as err_msg
|
15
|
+
from
|
16
|
+
strload_tasks t
|
17
|
+
inner join (
|
18
|
+
select
|
19
|
+
task_seq
|
20
|
+
, count(*) as object_count
|
21
|
+
, sum(object_size) as total_object_size
|
22
|
+
from
|
23
|
+
strload_task_objects
|
24
|
+
inner join strload_objects
|
25
|
+
using (object_seq)
|
26
|
+
group by 1
|
27
|
+
) o
|
28
|
+
using (task_seq)
|
29
|
+
left outer join strload_jobs j
|
30
|
+
using (task_seq)
|
31
|
+
left outer join strload_tables tbl
|
32
|
+
using (source_id)
|
33
|
+
order by
|
34
|
+
task_seq
|
35
|
+
, job_seq
|
36
|
+
;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage-streamingload
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
@@ -9,22 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-08-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bricolage
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '5.
|
20
|
+
version: '5.30'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '5.
|
27
|
+
version: '5.30'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: pg
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,44 +45,44 @@ dependencies:
|
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: '1'
|
48
|
+
version: '1.8'
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '1'
|
55
|
+
version: '1.8'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: aws-sdk-sqs
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '1'
|
62
|
+
version: '1.3'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '1'
|
69
|
+
version: '1.3'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
71
|
+
name: sentry-raven
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
|
-
- - "
|
74
|
+
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '0'
|
77
|
-
type: :
|
76
|
+
version: '3.0'
|
77
|
+
type: :runtime
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - "
|
81
|
+
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '0'
|
83
|
+
version: '3.0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
|
-
name:
|
85
|
+
name: rake
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - ">="
|
@@ -96,7 +96,7 @@ dependencies:
|
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '0'
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
|
-
name:
|
99
|
+
name: test-unit
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
101
101
|
requirements:
|
102
102
|
- - ">="
|
@@ -113,17 +113,23 @@ description: Bricolage Streaming Load Daemon loads S3 data files to Redshift con
|
|
113
113
|
email:
|
114
114
|
- aamine@loveruby.net
|
115
115
|
executables:
|
116
|
-
-
|
117
|
-
- send-shutdown-event
|
118
|
-
- send-load-task
|
116
|
+
- bricolage-streaming-dispatcher
|
119
117
|
- bricolage-streaming-loader
|
120
118
|
- send-checkpoint-event
|
121
119
|
- send-data-event
|
122
|
-
-
|
120
|
+
- send-flushtable-event
|
121
|
+
- send-load-task
|
122
|
+
- send-shutdown-event
|
123
123
|
extensions: []
|
124
124
|
extra_rdoc_files: []
|
125
125
|
files:
|
126
|
+
- ".gitignore"
|
127
|
+
- Gemfile
|
128
|
+
- Gemfile.lock
|
129
|
+
- LICENSES
|
126
130
|
- README.md
|
131
|
+
- RELEASE.md
|
132
|
+
- Rakefile
|
127
133
|
- bin/bricolage-streaming-dispatcher
|
128
134
|
- bin/bricolage-streaming-loader
|
129
135
|
- bin/send-checkpoint-event
|
@@ -131,6 +137,13 @@ files:
|
|
131
137
|
- bin/send-flushtable-event
|
132
138
|
- bin/send-load-task
|
133
139
|
- bin/send-shutdown-event
|
140
|
+
- bricolage-streamingload.gemspec
|
141
|
+
- config/production/database.yml
|
142
|
+
- config/production/password.yml
|
143
|
+
- config/production/streamingload.yml
|
144
|
+
- config/production/variable.yml
|
145
|
+
- config/test/database.yml
|
146
|
+
- config/test/variable.yml
|
134
147
|
- lib/bricolage/sqsdatasource.rb
|
135
148
|
- lib/bricolage/sqsmock.rb
|
136
149
|
- lib/bricolage/streamingload/alertinglogger.rb
|
@@ -148,11 +161,15 @@ files:
|
|
148
161
|
- lib/bricolage/streamingload/manifest.rb
|
149
162
|
- lib/bricolage/streamingload/taskhandler.rb
|
150
163
|
- lib/bricolage/streamingload/version.rb
|
151
|
-
-
|
152
|
-
-
|
153
|
-
-
|
154
|
-
-
|
155
|
-
-
|
164
|
+
- sample/sqs-message.txt
|
165
|
+
- sample/sqs-result.txt
|
166
|
+
- strload_load_logs.ct
|
167
|
+
- testschema/strload_test.ct
|
168
|
+
- testschema/testlog.json.gz
|
169
|
+
- testschema/with_work_table.job
|
170
|
+
- testschema/with_work_table.sql
|
171
|
+
- utils/init_strload_tables.sql
|
172
|
+
- utils/strload-stat.sql
|
156
173
|
homepage: https://github.com/aamine/bricolage-streamingload
|
157
174
|
licenses:
|
158
175
|
- MIT
|
@@ -165,15 +182,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
182
|
requirements:
|
166
183
|
- - ">="
|
167
184
|
- !ruby/object:Gem::Version
|
168
|
-
version: 2.
|
185
|
+
version: 2.3.0
|
169
186
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
187
|
requirements:
|
171
188
|
- - ">="
|
172
189
|
- !ruby/object:Gem::Version
|
173
190
|
version: '0'
|
174
191
|
requirements: []
|
175
|
-
|
176
|
-
rubygems_version: 2.6.11
|
192
|
+
rubygems_version: 3.1.2
|
177
193
|
signing_key:
|
178
194
|
specification_version: 4
|
179
195
|
summary: Bricolage Streaming Load Daemon
|
data/test/all.rb
DELETED
@@ -1,241 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'bricolage/context'
|
3
|
-
require 'bricolage/sqsdatasource'
|
4
|
-
require 'bricolage/sqsmock'
|
5
|
-
require 'bricolage/streamingload/dispatcher'
|
6
|
-
require 'bricolage/streamingload/chunkrouter'
|
7
|
-
require 'bricolage/streamingload/chunkbuffer'
|
8
|
-
|
9
|
-
module Bricolage
|
10
|
-
module StreamingLoad
|
11
|
-
|
12
|
-
class TestDispatcher < Test::Unit::TestCase
|
13
|
-
|
14
|
-
test "checkpoint event" do
|
15
|
-
ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
|
16
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
17
|
-
|
18
|
-
event_queue = SQSDataSource.new_mock(queue: [
|
19
|
-
# 1st ReceiveMessage
|
20
|
-
[
|
21
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0001.json.gz'),
|
22
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0002.json.gz'),
|
23
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0003.json.gz'),
|
24
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0004.json.gz'),
|
25
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0005.json.gz')
|
26
|
-
],
|
27
|
-
# 2nd ReceiveMessage
|
28
|
-
[
|
29
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0006.json.gz'),
|
30
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0007.json.gz'),
|
31
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0008.json.gz'),
|
32
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0009.json.gz'),
|
33
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'checkpoint'}),
|
34
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0010.json.gz')
|
35
|
-
]
|
36
|
-
])
|
37
|
-
|
38
|
-
task_queue = SQSDataSource.new_mock
|
39
|
-
|
40
|
-
chunk_buffer = ChunkBuffer.new(
|
41
|
-
control_data_source: ctl_ds,
|
42
|
-
logger: ctx.logger
|
43
|
-
)
|
44
|
-
|
45
|
-
chunk_router = ChunkRouter.for_config([
|
46
|
-
{
|
47
|
-
"url" => %r<\As3://test-bucket/testschema\.desttable/datafile-\d{4}\.json\.gz>.source,
|
48
|
-
"schema" => 'testschema',
|
49
|
-
"table" => 'desttable'
|
50
|
-
}
|
51
|
-
])
|
52
|
-
|
53
|
-
task_logger = DummyLoadTaskLogger.new
|
54
|
-
|
55
|
-
dispatcher = Dispatcher.new(
|
56
|
-
event_queue: event_queue,
|
57
|
-
task_queue: task_queue,
|
58
|
-
chunk_buffer: chunk_buffer,
|
59
|
-
chunk_router: chunk_router,
|
60
|
-
task_logger: task_logger,
|
61
|
-
dispatch_interval: 600,
|
62
|
-
logger: ctx.logger
|
63
|
-
)
|
64
|
-
|
65
|
-
# FIXME: database cleaner
|
66
|
-
ctl_ds.open {|conn|
|
67
|
-
conn.update("truncate strload_tables")
|
68
|
-
conn.update("truncate strload_objects")
|
69
|
-
conn.update("truncate strload_task_objects")
|
70
|
-
conn.update("truncate strload_tasks")
|
71
|
-
conn.update("insert into strload_tables values (1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false)")
|
72
|
-
}
|
73
|
-
dispatcher.event_loop
|
74
|
-
|
75
|
-
# Event Queue Call Sequence
|
76
|
-
hst = event_queue.client.call_history
|
77
|
-
assert_equal 6, hst.size
|
78
|
-
assert_equal :send_message, hst[0].name # start flush timer
|
79
|
-
assert_equal :receive_message, hst[1].name
|
80
|
-
assert_equal :delete_message_batch, hst[2].name
|
81
|
-
assert_equal :receive_message, hst[3].name
|
82
|
-
assert_equal :delete_message, hst[4].name # delete checkpoint
|
83
|
-
assert_equal :delete_message_batch, hst[5].name
|
84
|
-
|
85
|
-
# Task Queue Call Sequence
|
86
|
-
hst = task_queue.client.call_history
|
87
|
-
assert_equal 1, hst.size
|
88
|
-
assert_equal :send_message, hst[0].name
|
89
|
-
assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
|
90
|
-
task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
|
91
|
-
assert_not_equal 0, task_id
|
92
|
-
|
93
|
-
# Object Buffer
|
94
|
-
assert_equal [], unassigned_objects(ctl_ds)
|
95
|
-
task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
|
96
|
-
assert_equal 'testschema', task.schema_name
|
97
|
-
assert_equal 'desttable', task.table_name
|
98
|
-
assert_equal 10, task.object_urls.size
|
99
|
-
|
100
|
-
# Task Logger
|
101
|
-
assert_not_nil task_logger.last_task
|
102
|
-
assert_equal task_id, task_logger.last_task.id.to_i
|
103
|
-
end
|
104
|
-
|
105
|
-
def unassigned_objects(ctl_ds)
|
106
|
-
ctl_ds.open {|conn|
|
107
|
-
conn.query_values(<<-EndSQL)
|
108
|
-
select
|
109
|
-
object_url
|
110
|
-
from
|
111
|
-
strload_objects
|
112
|
-
where
|
113
|
-
object_id not in (select object_id from strload_task_objects)
|
114
|
-
;
|
115
|
-
EndSQL
|
116
|
-
}
|
117
|
-
end
|
118
|
-
|
119
|
-
test "flushtable event" do
|
120
|
-
ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
|
121
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
122
|
-
|
123
|
-
event_queue = SQSDataSource.new_mock(queue: [
|
124
|
-
# 1st ReceiveMessage
|
125
|
-
[
|
126
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0001.json.gz'),
|
127
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0001.json.gz'),
|
128
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0002.json.gz'),
|
129
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0002.json.gz'),
|
130
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0003.json.gz'),
|
131
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0003.json.gz'),
|
132
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'flushtable', tableName: 'testschema.bbb'}),
|
133
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'shutdown'})
|
134
|
-
]
|
135
|
-
])
|
136
|
-
|
137
|
-
task_queue = SQSDataSource.new_mock
|
138
|
-
|
139
|
-
chunk_buffer = ChunkBuffer.new(
|
140
|
-
control_data_source: ctl_ds,
|
141
|
-
logger: ctx.logger
|
142
|
-
)
|
143
|
-
|
144
|
-
chunk_router = ChunkRouter.for_config([
|
145
|
-
{
|
146
|
-
"url" => %r<\As3://test-bucket/testschema\.(?<table>\w+)/datafile-\d{4}\.json\.gz>.source,
|
147
|
-
"schema" => 'testschema',
|
148
|
-
"table" => '%table'
|
149
|
-
}
|
150
|
-
])
|
151
|
-
|
152
|
-
task_logger = DummyLoadTaskLogger.new
|
153
|
-
|
154
|
-
dispatcher = Dispatcher.new(
|
155
|
-
event_queue: event_queue,
|
156
|
-
task_queue: task_queue,
|
157
|
-
chunk_buffer: chunk_buffer,
|
158
|
-
chunk_router: chunk_router,
|
159
|
-
task_logger: task_logger,
|
160
|
-
dispatch_interval: 600,
|
161
|
-
logger: ctx.logger
|
162
|
-
)
|
163
|
-
|
164
|
-
# FIXME: database cleaner
|
165
|
-
ctl_ds.open {|conn|
|
166
|
-
conn.update("truncate strload_tables")
|
167
|
-
conn.update("truncate strload_objects")
|
168
|
-
conn.update("truncate strload_task_objects")
|
169
|
-
conn.update("truncate strload_tasks")
|
170
|
-
conn.update("insert into strload_tables values (1, 'testschema.aaa', 'testschema', 'aaa', 100, 1800, false)")
|
171
|
-
conn.update("insert into strload_tables values (2, 'testschema.bbb', 'testschema', 'bbb', 100, 1800, false)")
|
172
|
-
conn.update("insert into strload_tables values (3, 'testschema.ccc', 'testschema', 'ccc', 100, 1800, false)")
|
173
|
-
}
|
174
|
-
dispatcher.event_loop
|
175
|
-
|
176
|
-
# Event Queue Call Sequence
|
177
|
-
hst = event_queue.client.call_history
|
178
|
-
assert_equal 5, hst.size
|
179
|
-
assert_equal :send_message, hst[0].name # start dispatch timer
|
180
|
-
assert_equal :receive_message, hst[1].name
|
181
|
-
assert_equal :delete_message, hst[2].name # delete flushtable event
|
182
|
-
assert_equal :delete_message, hst[3].name # delete shutdown event
|
183
|
-
assert_equal :delete_message_batch, hst[4].name
|
184
|
-
|
185
|
-
# Task Queue Call Sequence
|
186
|
-
hst = task_queue.client.call_history
|
187
|
-
assert_equal 1, hst.size
|
188
|
-
assert_equal :send_message, hst[0].name
|
189
|
-
assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
|
190
|
-
task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
|
191
|
-
assert_not_equal 0, task_id
|
192
|
-
|
193
|
-
# Object Buffer
|
194
|
-
assert_equal [], unassigned_table_objects(ctl_ds, 'testschema.bbb')
|
195
|
-
task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
|
196
|
-
assert_equal 'testschema', task.schema_name
|
197
|
-
assert_equal 'bbb', task.table_name
|
198
|
-
assert_equal 2, task.object_urls.size
|
199
|
-
|
200
|
-
# Task Logger
|
201
|
-
assert_not_nil task_logger.last_task
|
202
|
-
assert_equal task_id, task_logger.last_task.id.to_i
|
203
|
-
end
|
204
|
-
|
205
|
-
def unassigned_table_objects(ctl_ds, table_name)
|
206
|
-
ctl_ds.open {|conn|
|
207
|
-
conn.query_values(<<-EndSQL)
|
208
|
-
select
|
209
|
-
object_url
|
210
|
-
from
|
211
|
-
strload_objects
|
212
|
-
where
|
213
|
-
data_source_id = '#{table_name}'
|
214
|
-
and object_id not in (select object_id from strload_task_objects)
|
215
|
-
;
|
216
|
-
EndSQL
|
217
|
-
}
|
218
|
-
end
|
219
|
-
|
220
|
-
end
|
221
|
-
|
222
|
-
|
223
|
-
class DummyLoadTaskLogger
|
224
|
-
|
225
|
-
def initialize
|
226
|
-
@task = nil
|
227
|
-
end
|
228
|
-
|
229
|
-
def log(task)
|
230
|
-
@task = task
|
231
|
-
nil
|
232
|
-
end
|
233
|
-
|
234
|
-
def last_task
|
235
|
-
@task
|
236
|
-
end
|
237
|
-
|
238
|
-
end
|
239
|
-
|
240
|
-
end
|
241
|
-
end
|