bricolage-streamingload 0.14.2 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +54 -0
- data/LICENSES +21 -0
- data/RELEASE.md +131 -0
- data/Rakefile +3 -0
- data/bin/send-data-event +1 -1
- data/bricolage-streamingload.gemspec +26 -0
- data/config/production/database.yml +66 -0
- data/config/production/password.yml +5 -0
- data/config/production/streamingload.yml +20 -0
- data/config/production/variable.yml +5 -0
- data/lib/bricolage/sqsdatasource.rb +1 -1
- data/lib/bricolage/streamingload/dispatcher.rb +0 -1
- data/lib/bricolage/streamingload/job.rb +6 -2
- data/lib/bricolage/streamingload/jobparams.rb +1 -1
- data/lib/bricolage/streamingload/manifest.rb +2 -0
- data/lib/bricolage/streamingload/taskhandler.rb +5 -2
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/sample/sqs-message.txt +38 -0
- data/sample/sqs-result.txt +18 -0
- data/strload_load_logs.ct +13 -0
- data/testschema/strload_test.ct +11 -0
- data/testschema/testlog.json.gz +0 -0
- data/testschema/with_work_table.job +4 -0
- data/testschema/with_work_table.sql +1 -0
- data/utils/init_strload_tables.sql +13 -0
- data/utils/strload-stat.sql +36 -0
- metadata +43 -27
- data/test/all.rb +0 -3
- data/test/streamingload/test_dispatcher.rb +0 -241
- data/test/streamingload/test_dispatchermessage.rb +0 -31
- data/test/streamingload/test_job.rb +0 -620
- data/test/test_sqsdatasource.rb +0 -55
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"Records" => [
|
|
3
|
+
{
|
|
4
|
+
"eventVersion" => "2.0",
|
|
5
|
+
"eventSource" => "aws:s3",
|
|
6
|
+
"awsRegion" => "ap-northeast-1",
|
|
7
|
+
"eventTime" => "2016-02-13T11:40:08.001Z",
|
|
8
|
+
"eventName" => "ObjectCreated:Put",
|
|
9
|
+
"userIdentity" => {
|
|
10
|
+
"principalId" => "AWS:AIXXXXXXXXXXXXXXXXX6A"
|
|
11
|
+
},
|
|
12
|
+
"requestParameters" => {
|
|
13
|
+
"sourceIPAddress" => "111.222.111.90"
|
|
14
|
+
},
|
|
15
|
+
"responseElements" => {
|
|
16
|
+
"x-amz-request-id" => "1111AAAA9999AAAA",
|
|
17
|
+
"x-amz-id-2" => "6p9IZG+R+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxFyXiNMMB4="
|
|
18
|
+
},
|
|
19
|
+
"s3" => {
|
|
20
|
+
"s3SchemaVersion" => "1.0",
|
|
21
|
+
"configurationId" => "ObjectCreatedEvents",
|
|
22
|
+
"bucket" => {
|
|
23
|
+
"name" => "some-bucket",
|
|
24
|
+
"ownerIdentity" => {
|
|
25
|
+
"principalId" => "AAAAIIIIBBBB11"
|
|
26
|
+
},
|
|
27
|
+
"arn" => "arn:aws:s3:::some-bucket"
|
|
28
|
+
},
|
|
29
|
+
"object" => {
|
|
30
|
+
"key" => "development/logs/schema.table/20160125_0000_0_de37c5ad-d26a-42cc-a141-475676b65f69.gz",
|
|
31
|
+
"size" => 1302957,
|
|
32
|
+
"eTag" => "d704db7f9cb77b1ecb981c534526b542",
|
|
33
|
+
"sequencer" => "111122223333444499"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#<struct Aws::SQS::Types::ReceiveMessageResult
|
|
2
|
+
messages = [
|
|
3
|
+
#<struct Aws::SQS::Types::Message
|
|
4
|
+
message_id = "11112222-b37f-4281-aee2-696408c482be",
|
|
5
|
+
receipt_handle = "AQEBErp4Vv159Hvx5oXSioWu7Ov1Jj1ht423LwTTUlgLohmKh02gAMAZ7kN1J4+aLzM54BRuMatnifWPH3cBh6rtdSWlllysBVmXDRKj83VCaseKPpq+Vdw/bLZrPEYRSGPuhKsHa2DFjo67KeND4AqasZO7lOCJX+YcsbqsqAtcODlDWlVbfedPkKIGM3Scra3uY9ysGjNx/zWuJ88fCtvRpu9tgk0nG7srHAm84Mxj5ArakJnKnJMbtkD/+lvm0Vqi2YCkXonS8+BP8gK4kSTbl1uv/Gp7ZfxkjKQJBB8xQiYo7e7qKLwhbImMZtO5tqDKzIKFVfMi1G2ODF+tEj4Ce+ryMRGA50GhON2ETCyofsl1T7Wdr61IEOV0NFlonGTWFf4q/1r3OPhFAchp+tYnyA==",
|
|
6
|
+
md5_of_body = "00006bf43abdff178ca0ffa96205aaaa",
|
|
7
|
+
body = "{\"Records\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-northeast-1\",\"eventTime\":\"2016-02-13T11:40:07.268Z\",\"eventName\":\"ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"AWS:AAAAJKS3A4VEF45XCAAAA\"},\"requestParameters\":{\"sourceIPAddress\":\"111.222.111.90\"},\"responseElements\":{\"x-amz-request-id\":\"10AAAA31A1EDCCCC\",\"x-amz-id-2\":\"YT+U/PxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzfFGPjqP5AQg=\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"LogStreamDev\",\"bucket\":{\"name\":\"redshift-copy-buffer\",\"ownerIdentity\":{\"principalId\":\"AAAAFIDWR40000\"},\"arn\":\"arn:aws:s3:::redshift-copy-buffer\"},\"object\":{\"key\":\"development/logs/schema.table/20160125_0000_0_2d0b43ad-0db9-4655-bd98-b7464b123763.gz\",\"size\":1238953,\"eTag\":\"aaaa196c3935f4957c7bb645f9780000\",\"sequencer\":\"0000BF161697C1AAAA\"}}}]}",
|
|
8
|
+
attributes = {
|
|
9
|
+
"SenderId" => "FACE0VEO02BJMF37H2JKW",
|
|
10
|
+
"ApproximateFirstReceiveTimestamp" => "1455364193429",
|
|
11
|
+
"ApproximateReceiveCount" => "1",
|
|
12
|
+
"SentTimestamp" => "1455363607341"
|
|
13
|
+
},
|
|
14
|
+
md5_of_message_attributes = nil,
|
|
15
|
+
message_attributes = {}
|
|
16
|
+
>
|
|
17
|
+
]
|
|
18
|
+
>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
--dest-table: bricolage.strload_load_logs
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
Redshift-side log table
|
|
5
|
+
*/
|
|
6
|
+
create table $dest_table
|
|
7
|
+
( job_id bigint encode raw
|
|
8
|
+
, task_id bigint encode zstd
|
|
9
|
+
, finish_time timestamp encode delta
|
|
10
|
+
)
|
|
11
|
+
distkey (job_id)
|
|
12
|
+
sortkey (job_id)
|
|
13
|
+
;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
--dest-table: $test_schema.strload_test
|
|
2
|
+
|
|
3
|
+
create table $dest_table
|
|
4
|
+
( jst_time timestamp encode delta
|
|
5
|
+
, full_controller varchar(128) encode lzo
|
|
6
|
+
, action varchar(128) encode lzo
|
|
7
|
+
, user_id integer encode lzo
|
|
8
|
+
, unique_id varchar(80) encode lzo
|
|
9
|
+
)
|
|
10
|
+
sortkey (jst_time)
|
|
11
|
+
;
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
insert into $dest_table select * from $work_table;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
drop table if exists strload_objects cascade;
|
|
2
|
+
drop table if exists strload_dup_objects cascade;
|
|
3
|
+
drop table if exists strload_task_objects;
|
|
4
|
+
drop table if exists strload_tasks cascade;
|
|
5
|
+
drop table if exists strload_jobs cascade;
|
|
6
|
+
|
|
7
|
+
\i schema/strload_objects.ct
|
|
8
|
+
\i schema/strload_dup_objects.ct
|
|
9
|
+
\i schema/strload_task_objects.ct
|
|
10
|
+
\i schema/strload_tasks.ct
|
|
11
|
+
\i schema/strload_jobs.ct
|
|
12
|
+
\i schema/strload_stats.cv
|
|
13
|
+
\i schema/strload_task_status.cv
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
select
|
|
2
|
+
task_seq
|
|
3
|
+
, tbl.source_id
|
|
4
|
+
, tbl.schema_name
|
|
5
|
+
, tbl.table_name
|
|
6
|
+
, submit_time
|
|
7
|
+
, object_count
|
|
8
|
+
, total_object_size
|
|
9
|
+
, job_seq
|
|
10
|
+
, loader_id
|
|
11
|
+
, start_time
|
|
12
|
+
, finish_time
|
|
13
|
+
, status
|
|
14
|
+
, substring(message, 1, 30) as err_msg
|
|
15
|
+
from
|
|
16
|
+
strload_tasks t
|
|
17
|
+
inner join (
|
|
18
|
+
select
|
|
19
|
+
task_seq
|
|
20
|
+
, count(*) as object_count
|
|
21
|
+
, sum(object_size) as total_object_size
|
|
22
|
+
from
|
|
23
|
+
strload_task_objects
|
|
24
|
+
inner join strload_objects
|
|
25
|
+
using (object_seq)
|
|
26
|
+
group by 1
|
|
27
|
+
) o
|
|
28
|
+
using (task_seq)
|
|
29
|
+
left outer join strload_jobs j
|
|
30
|
+
using (task_seq)
|
|
31
|
+
left outer join strload_tables tbl
|
|
32
|
+
using (source_id)
|
|
33
|
+
order by
|
|
34
|
+
task_seq
|
|
35
|
+
, job_seq
|
|
36
|
+
;
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bricolage-streamingload
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.16.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Minero Aoki
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2020-08-04 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bricolage
|
|
@@ -17,14 +17,14 @@ dependencies:
|
|
|
17
17
|
requirements:
|
|
18
18
|
- - ">="
|
|
19
19
|
- !ruby/object:Gem::Version
|
|
20
|
-
version: 5.
|
|
20
|
+
version: 5.29.2
|
|
21
21
|
type: :runtime
|
|
22
22
|
prerelease: false
|
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
|
24
24
|
requirements:
|
|
25
25
|
- - ">="
|
|
26
26
|
- !ruby/object:Gem::Version
|
|
27
|
-
version: 5.
|
|
27
|
+
version: 5.29.2
|
|
28
28
|
- !ruby/object:Gem::Dependency
|
|
29
29
|
name: pg
|
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -40,35 +40,35 @@ dependencies:
|
|
|
40
40
|
- !ruby/object:Gem::Version
|
|
41
41
|
version: 0.18.0
|
|
42
42
|
- !ruby/object:Gem::Dependency
|
|
43
|
-
name: aws-sdk
|
|
43
|
+
name: aws-sdk-s3
|
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
|
45
45
|
requirements:
|
|
46
46
|
- - "~>"
|
|
47
47
|
- !ruby/object:Gem::Version
|
|
48
|
-
version:
|
|
48
|
+
version: '1.8'
|
|
49
49
|
type: :runtime
|
|
50
50
|
prerelease: false
|
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
|
52
52
|
requirements:
|
|
53
53
|
- - "~>"
|
|
54
54
|
- !ruby/object:Gem::Version
|
|
55
|
-
version:
|
|
55
|
+
version: '1.8'
|
|
56
56
|
- !ruby/object:Gem::Dependency
|
|
57
|
-
name:
|
|
57
|
+
name: aws-sdk-sqs
|
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
|
59
59
|
requirements:
|
|
60
|
-
- - "
|
|
60
|
+
- - "~>"
|
|
61
61
|
- !ruby/object:Gem::Version
|
|
62
|
-
version: '
|
|
63
|
-
type: :
|
|
62
|
+
version: '1.3'
|
|
63
|
+
type: :runtime
|
|
64
64
|
prerelease: false
|
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
|
66
66
|
requirements:
|
|
67
|
-
- - "
|
|
67
|
+
- - "~>"
|
|
68
68
|
- !ruby/object:Gem::Version
|
|
69
|
-
version: '
|
|
69
|
+
version: '1.3'
|
|
70
70
|
- !ruby/object:Gem::Dependency
|
|
71
|
-
name:
|
|
71
|
+
name: rake
|
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
|
73
73
|
requirements:
|
|
74
74
|
- - ">="
|
|
@@ -82,7 +82,7 @@ dependencies:
|
|
|
82
82
|
- !ruby/object:Gem::Version
|
|
83
83
|
version: '0'
|
|
84
84
|
- !ruby/object:Gem::Dependency
|
|
85
|
-
name:
|
|
85
|
+
name: test-unit
|
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
|
87
87
|
requirements:
|
|
88
88
|
- - ">="
|
|
@@ -99,17 +99,23 @@ description: Bricolage Streaming Load Daemon loads S3 data files to Redshift con
|
|
|
99
99
|
email:
|
|
100
100
|
- aamine@loveruby.net
|
|
101
101
|
executables:
|
|
102
|
-
-
|
|
103
|
-
- send-shutdown-event
|
|
104
|
-
- send-load-task
|
|
102
|
+
- bricolage-streaming-dispatcher
|
|
105
103
|
- bricolage-streaming-loader
|
|
106
104
|
- send-checkpoint-event
|
|
107
105
|
- send-data-event
|
|
108
|
-
-
|
|
106
|
+
- send-flushtable-event
|
|
107
|
+
- send-load-task
|
|
108
|
+
- send-shutdown-event
|
|
109
109
|
extensions: []
|
|
110
110
|
extra_rdoc_files: []
|
|
111
111
|
files:
|
|
112
|
+
- ".gitignore"
|
|
113
|
+
- Gemfile
|
|
114
|
+
- Gemfile.lock
|
|
115
|
+
- LICENSES
|
|
112
116
|
- README.md
|
|
117
|
+
- RELEASE.md
|
|
118
|
+
- Rakefile
|
|
113
119
|
- bin/bricolage-streaming-dispatcher
|
|
114
120
|
- bin/bricolage-streaming-loader
|
|
115
121
|
- bin/send-checkpoint-event
|
|
@@ -117,6 +123,13 @@ files:
|
|
|
117
123
|
- bin/send-flushtable-event
|
|
118
124
|
- bin/send-load-task
|
|
119
125
|
- bin/send-shutdown-event
|
|
126
|
+
- bricolage-streamingload.gemspec
|
|
127
|
+
- config/production/database.yml
|
|
128
|
+
- config/production/password.yml
|
|
129
|
+
- config/production/streamingload.yml
|
|
130
|
+
- config/production/variable.yml
|
|
131
|
+
- config/test/database.yml
|
|
132
|
+
- config/test/variable.yml
|
|
120
133
|
- lib/bricolage/sqsdatasource.rb
|
|
121
134
|
- lib/bricolage/sqsmock.rb
|
|
122
135
|
- lib/bricolage/streamingload/alertinglogger.rb
|
|
@@ -134,11 +147,15 @@ files:
|
|
|
134
147
|
- lib/bricolage/streamingload/manifest.rb
|
|
135
148
|
- lib/bricolage/streamingload/taskhandler.rb
|
|
136
149
|
- lib/bricolage/streamingload/version.rb
|
|
137
|
-
-
|
|
138
|
-
-
|
|
139
|
-
-
|
|
140
|
-
-
|
|
141
|
-
-
|
|
150
|
+
- sample/sqs-message.txt
|
|
151
|
+
- sample/sqs-result.txt
|
|
152
|
+
- strload_load_logs.ct
|
|
153
|
+
- testschema/strload_test.ct
|
|
154
|
+
- testschema/testlog.json.gz
|
|
155
|
+
- testschema/with_work_table.job
|
|
156
|
+
- testschema/with_work_table.sql
|
|
157
|
+
- utils/init_strload_tables.sql
|
|
158
|
+
- utils/strload-stat.sql
|
|
142
159
|
homepage: https://github.com/aamine/bricolage-streamingload
|
|
143
160
|
licenses:
|
|
144
161
|
- MIT
|
|
@@ -151,15 +168,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
151
168
|
requirements:
|
|
152
169
|
- - ">="
|
|
153
170
|
- !ruby/object:Gem::Version
|
|
154
|
-
version: 2.
|
|
171
|
+
version: 2.3.0
|
|
155
172
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
173
|
requirements:
|
|
157
174
|
- - ">="
|
|
158
175
|
- !ruby/object:Gem::Version
|
|
159
176
|
version: '0'
|
|
160
177
|
requirements: []
|
|
161
|
-
|
|
162
|
-
rubygems_version: 2.6.11
|
|
178
|
+
rubygems_version: 3.1.2
|
|
163
179
|
signing_key:
|
|
164
180
|
specification_version: 4
|
|
165
181
|
summary: Bricolage Streaming Load Daemon
|
data/test/all.rb
DELETED
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
require 'test/unit'
|
|
2
|
-
require 'bricolage/context'
|
|
3
|
-
require 'bricolage/sqsdatasource'
|
|
4
|
-
require 'bricolage/sqsmock'
|
|
5
|
-
require 'bricolage/streamingload/dispatcher'
|
|
6
|
-
require 'bricolage/streamingload/chunkrouter'
|
|
7
|
-
require 'bricolage/streamingload/chunkbuffer'
|
|
8
|
-
|
|
9
|
-
module Bricolage
|
|
10
|
-
module StreamingLoad
|
|
11
|
-
|
|
12
|
-
class TestDispatcher < Test::Unit::TestCase
|
|
13
|
-
|
|
14
|
-
test "checkpoint event" do
|
|
15
|
-
ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
|
|
16
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
|
17
|
-
|
|
18
|
-
event_queue = SQSDataSource.new_mock(queue: [
|
|
19
|
-
# 1st ReceiveMessage
|
|
20
|
-
[
|
|
21
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0001.json.gz'),
|
|
22
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0002.json.gz'),
|
|
23
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0003.json.gz'),
|
|
24
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0004.json.gz'),
|
|
25
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0005.json.gz')
|
|
26
|
-
],
|
|
27
|
-
# 2nd ReceiveMessage
|
|
28
|
-
[
|
|
29
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0006.json.gz'),
|
|
30
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0007.json.gz'),
|
|
31
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0008.json.gz'),
|
|
32
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0009.json.gz'),
|
|
33
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'checkpoint'}),
|
|
34
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0010.json.gz')
|
|
35
|
-
]
|
|
36
|
-
])
|
|
37
|
-
|
|
38
|
-
task_queue = SQSDataSource.new_mock
|
|
39
|
-
|
|
40
|
-
chunk_buffer = ChunkBuffer.new(
|
|
41
|
-
control_data_source: ctl_ds,
|
|
42
|
-
logger: ctx.logger
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
chunk_router = ChunkRouter.for_config([
|
|
46
|
-
{
|
|
47
|
-
"url" => %r<\As3://test-bucket/testschema\.desttable/datafile-\d{4}\.json\.gz>.source,
|
|
48
|
-
"schema" => 'testschema',
|
|
49
|
-
"table" => 'desttable'
|
|
50
|
-
}
|
|
51
|
-
])
|
|
52
|
-
|
|
53
|
-
task_logger = DummyLoadTaskLogger.new
|
|
54
|
-
|
|
55
|
-
dispatcher = Dispatcher.new(
|
|
56
|
-
event_queue: event_queue,
|
|
57
|
-
task_queue: task_queue,
|
|
58
|
-
chunk_buffer: chunk_buffer,
|
|
59
|
-
chunk_router: chunk_router,
|
|
60
|
-
task_logger: task_logger,
|
|
61
|
-
dispatch_interval: 600,
|
|
62
|
-
logger: ctx.logger
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# FIXME: database cleaner
|
|
66
|
-
ctl_ds.open {|conn|
|
|
67
|
-
conn.update("truncate strload_tables")
|
|
68
|
-
conn.update("truncate strload_objects")
|
|
69
|
-
conn.update("truncate strload_task_objects")
|
|
70
|
-
conn.update("truncate strload_tasks")
|
|
71
|
-
conn.update("insert into strload_tables values (1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false)")
|
|
72
|
-
}
|
|
73
|
-
dispatcher.event_loop
|
|
74
|
-
|
|
75
|
-
# Event Queue Call Sequence
|
|
76
|
-
hst = event_queue.client.call_history
|
|
77
|
-
assert_equal 6, hst.size
|
|
78
|
-
assert_equal :send_message, hst[0].name # start flush timer
|
|
79
|
-
assert_equal :receive_message, hst[1].name
|
|
80
|
-
assert_equal :delete_message_batch, hst[2].name
|
|
81
|
-
assert_equal :receive_message, hst[3].name
|
|
82
|
-
assert_equal :delete_message, hst[4].name # delete checkpoint
|
|
83
|
-
assert_equal :delete_message_batch, hst[5].name
|
|
84
|
-
|
|
85
|
-
# Task Queue Call Sequence
|
|
86
|
-
hst = task_queue.client.call_history
|
|
87
|
-
assert_equal 1, hst.size
|
|
88
|
-
assert_equal :send_message, hst[0].name
|
|
89
|
-
assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
|
|
90
|
-
task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
|
|
91
|
-
assert_not_equal 0, task_id
|
|
92
|
-
|
|
93
|
-
# Object Buffer
|
|
94
|
-
assert_equal [], unassigned_objects(ctl_ds)
|
|
95
|
-
task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
|
|
96
|
-
assert_equal 'testschema', task.schema_name
|
|
97
|
-
assert_equal 'desttable', task.table_name
|
|
98
|
-
assert_equal 10, task.object_urls.size
|
|
99
|
-
|
|
100
|
-
# Task Logger
|
|
101
|
-
assert_not_nil task_logger.last_task
|
|
102
|
-
assert_equal task_id, task_logger.last_task.id.to_i
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
def unassigned_objects(ctl_ds)
|
|
106
|
-
ctl_ds.open {|conn|
|
|
107
|
-
conn.query_values(<<-EndSQL)
|
|
108
|
-
select
|
|
109
|
-
object_url
|
|
110
|
-
from
|
|
111
|
-
strload_objects
|
|
112
|
-
where
|
|
113
|
-
object_id not in (select object_id from strload_task_objects)
|
|
114
|
-
;
|
|
115
|
-
EndSQL
|
|
116
|
-
}
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
test "flushtable event" do
|
|
120
|
-
ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
|
|
121
|
-
ctl_ds = ctx.get_data_source('sql', 'dwhctl')
|
|
122
|
-
|
|
123
|
-
event_queue = SQSDataSource.new_mock(queue: [
|
|
124
|
-
# 1st ReceiveMessage
|
|
125
|
-
[
|
|
126
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0001.json.gz'),
|
|
127
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0001.json.gz'),
|
|
128
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0002.json.gz'),
|
|
129
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0002.json.gz'),
|
|
130
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0003.json.gz'),
|
|
131
|
-
SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0003.json.gz'),
|
|
132
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'flushtable', tableName: 'testschema.bbb'}),
|
|
133
|
-
SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'shutdown'})
|
|
134
|
-
]
|
|
135
|
-
])
|
|
136
|
-
|
|
137
|
-
task_queue = SQSDataSource.new_mock
|
|
138
|
-
|
|
139
|
-
chunk_buffer = ChunkBuffer.new(
|
|
140
|
-
control_data_source: ctl_ds,
|
|
141
|
-
logger: ctx.logger
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
chunk_router = ChunkRouter.for_config([
|
|
145
|
-
{
|
|
146
|
-
"url" => %r<\As3://test-bucket/testschema\.(?<table>\w+)/datafile-\d{4}\.json\.gz>.source,
|
|
147
|
-
"schema" => 'testschema',
|
|
148
|
-
"table" => '%table'
|
|
149
|
-
}
|
|
150
|
-
])
|
|
151
|
-
|
|
152
|
-
task_logger = DummyLoadTaskLogger.new
|
|
153
|
-
|
|
154
|
-
dispatcher = Dispatcher.new(
|
|
155
|
-
event_queue: event_queue,
|
|
156
|
-
task_queue: task_queue,
|
|
157
|
-
chunk_buffer: chunk_buffer,
|
|
158
|
-
chunk_router: chunk_router,
|
|
159
|
-
task_logger: task_logger,
|
|
160
|
-
dispatch_interval: 600,
|
|
161
|
-
logger: ctx.logger
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
# FIXME: database cleaner
|
|
165
|
-
ctl_ds.open {|conn|
|
|
166
|
-
conn.update("truncate strload_tables")
|
|
167
|
-
conn.update("truncate strload_objects")
|
|
168
|
-
conn.update("truncate strload_task_objects")
|
|
169
|
-
conn.update("truncate strload_tasks")
|
|
170
|
-
conn.update("insert into strload_tables values (1, 'testschema.aaa', 'testschema', 'aaa', 100, 1800, false)")
|
|
171
|
-
conn.update("insert into strload_tables values (2, 'testschema.bbb', 'testschema', 'bbb', 100, 1800, false)")
|
|
172
|
-
conn.update("insert into strload_tables values (3, 'testschema.ccc', 'testschema', 'ccc', 100, 1800, false)")
|
|
173
|
-
}
|
|
174
|
-
dispatcher.event_loop
|
|
175
|
-
|
|
176
|
-
# Event Queue Call Sequence
|
|
177
|
-
hst = event_queue.client.call_history
|
|
178
|
-
assert_equal 5, hst.size
|
|
179
|
-
assert_equal :send_message, hst[0].name # start dispatch timer
|
|
180
|
-
assert_equal :receive_message, hst[1].name
|
|
181
|
-
assert_equal :delete_message, hst[2].name # delete flushtable event
|
|
182
|
-
assert_equal :delete_message, hst[3].name # delete shutdown event
|
|
183
|
-
assert_equal :delete_message_batch, hst[4].name
|
|
184
|
-
|
|
185
|
-
# Task Queue Call Sequence
|
|
186
|
-
hst = task_queue.client.call_history
|
|
187
|
-
assert_equal 1, hst.size
|
|
188
|
-
assert_equal :send_message, hst[0].name
|
|
189
|
-
assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
|
|
190
|
-
task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
|
|
191
|
-
assert_not_equal 0, task_id
|
|
192
|
-
|
|
193
|
-
# Object Buffer
|
|
194
|
-
assert_equal [], unassigned_table_objects(ctl_ds, 'testschema.bbb')
|
|
195
|
-
task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
|
|
196
|
-
assert_equal 'testschema', task.schema_name
|
|
197
|
-
assert_equal 'bbb', task.table_name
|
|
198
|
-
assert_equal 2, task.object_urls.size
|
|
199
|
-
|
|
200
|
-
# Task Logger
|
|
201
|
-
assert_not_nil task_logger.last_task
|
|
202
|
-
assert_equal task_id, task_logger.last_task.id.to_i
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
def unassigned_table_objects(ctl_ds, table_name)
|
|
206
|
-
ctl_ds.open {|conn|
|
|
207
|
-
conn.query_values(<<-EndSQL)
|
|
208
|
-
select
|
|
209
|
-
object_url
|
|
210
|
-
from
|
|
211
|
-
strload_objects
|
|
212
|
-
where
|
|
213
|
-
data_source_id = '#{table_name}'
|
|
214
|
-
and object_id not in (select object_id from strload_task_objects)
|
|
215
|
-
;
|
|
216
|
-
EndSQL
|
|
217
|
-
}
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
class DummyLoadTaskLogger
|
|
224
|
-
|
|
225
|
-
def initialize
|
|
226
|
-
@task = nil
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def log(task)
|
|
230
|
-
@task = task
|
|
231
|
-
nil
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
def last_task
|
|
235
|
-
@task
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
end
|
|
241
|
-
end
|