bricolage-streamingload 0.15.0 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ --dest-table: $test_schema.strload_test
2
+
3
+ create table $dest_table
4
+ ( jst_time timestamp encode delta
5
+ , full_controller varchar(128) encode lzo
6
+ , action varchar(128) encode lzo
7
+ , user_id integer encode lzo
8
+ , unique_id varchar(80) encode lzo
9
+ )
10
+ sortkey (jst_time)
11
+ ;
@@ -0,0 +1,4 @@
1
+ class: streaming_load_v3
2
+ dest-table: $testschema_schema.with_work_table
3
+ work-table: $testschema_schema.with_work_table_wk
4
+ sql-file: with_work_table.sql
@@ -0,0 +1 @@
1
+ insert into $dest_table select * from $work_table;
@@ -0,0 +1,13 @@
1
+ drop table if exists strload_objects cascade;
2
+ drop table if exists strload_dup_objects cascade;
3
+ drop table if exists strload_task_objects;
4
+ drop table if exists strload_tasks cascade;
5
+ drop table if exists strload_jobs cascade;
6
+
7
+ \i schema/strload_objects.ct
8
+ \i schema/strload_dup_objects.ct
9
+ \i schema/strload_task_objects.ct
10
+ \i schema/strload_tasks.ct
11
+ \i schema/strload_jobs.ct
12
+ \i schema/strload_stats.cv
13
+ \i schema/strload_task_status.cv
@@ -0,0 +1,36 @@
1
+ select
2
+ task_seq
3
+ , tbl.source_id
4
+ , tbl.schema_name
5
+ , tbl.table_name
6
+ , submit_time
7
+ , object_count
8
+ , total_object_size
9
+ , job_seq
10
+ , loader_id
11
+ , start_time
12
+ , finish_time
13
+ , status
14
+ , substring(message, 1, 30) as err_msg
15
+ from
16
+ strload_tasks t
17
+ inner join (
18
+ select
19
+ task_seq
20
+ , count(*) as object_count
21
+ , sum(object_size) as total_object_size
22
+ from
23
+ strload_task_objects
24
+ inner join strload_objects
25
+ using (object_seq)
26
+ group by 1
27
+ ) o
28
+ using (task_seq)
29
+ left outer join strload_jobs j
30
+ using (task_seq)
31
+ left outer join strload_tables tbl
32
+ using (source_id)
33
+ order by
34
+ task_seq
35
+ , job_seq
36
+ ;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.0
4
+ version: 0.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-02-20 00:00:00.000000000 Z
12
+ date: 2018-08-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bricolage
@@ -17,14 +17,14 @@ dependencies:
17
17
  requirements:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
- version: '5.26'
20
+ version: 5.29.2
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
25
  - - ">="
26
26
  - !ruby/object:Gem::Version
27
- version: '5.26'
27
+ version: 5.29.2
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: pg
30
30
  requirement: !ruby/object:Gem::Requirement
@@ -45,28 +45,28 @@ dependencies:
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: '1'
48
+ version: '1.8'
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: '1'
55
+ version: '1.8'
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: aws-sdk-sqs
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '1'
62
+ version: '1.3'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '1'
69
+ version: '1.3'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: rake
72
72
  requirement: !ruby/object:Gem::Requirement
@@ -95,35 +95,27 @@ dependencies:
95
95
  - - ">="
96
96
  - !ruby/object:Gem::Version
97
97
  version: '0'
98
- - !ruby/object:Gem::Dependency
99
- name: pry
100
- requirement: !ruby/object:Gem::Requirement
101
- requirements:
102
- - - ">="
103
- - !ruby/object:Gem::Version
104
- version: '0'
105
- type: :development
106
- prerelease: false
107
- version_requirements: !ruby/object:Gem::Requirement
108
- requirements:
109
- - - ">="
110
- - !ruby/object:Gem::Version
111
- version: '0'
112
98
  description: Bricolage Streaming Load Daemon loads S3 data files to Redshift continuously.
113
99
  email:
114
100
  - aamine@loveruby.net
115
101
  executables:
116
- - send-flushtable-event
117
- - send-shutdown-event
118
- - send-load-task
102
+ - bricolage-streaming-dispatcher
119
103
  - bricolage-streaming-loader
120
104
  - send-checkpoint-event
121
105
  - send-data-event
122
- - bricolage-streaming-dispatcher
106
+ - send-flushtable-event
107
+ - send-load-task
108
+ - send-shutdown-event
123
109
  extensions: []
124
110
  extra_rdoc_files: []
125
111
  files:
112
+ - ".gitignore"
113
+ - Gemfile
114
+ - Gemfile.lock
115
+ - LICENSES
126
116
  - README.md
117
+ - RELEASE.md
118
+ - Rakefile
127
119
  - bin/bricolage-streaming-dispatcher
128
120
  - bin/bricolage-streaming-loader
129
121
  - bin/send-checkpoint-event
@@ -131,6 +123,13 @@ files:
131
123
  - bin/send-flushtable-event
132
124
  - bin/send-load-task
133
125
  - bin/send-shutdown-event
126
+ - bricolage-streamingload.gemspec
127
+ - config/production/database.yml
128
+ - config/production/password.yml
129
+ - config/production/streamingload.yml
130
+ - config/production/variable.yml
131
+ - config/test/database.yml
132
+ - config/test/variable.yml
134
133
  - lib/bricolage/sqsdatasource.rb
135
134
  - lib/bricolage/sqsmock.rb
136
135
  - lib/bricolage/streamingload/alertinglogger.rb
@@ -148,11 +147,14 @@ files:
148
147
  - lib/bricolage/streamingload/manifest.rb
149
148
  - lib/bricolage/streamingload/taskhandler.rb
150
149
  - lib/bricolage/streamingload/version.rb
151
- - test/all.rb
152
- - test/streamingload/test_dispatcher.rb
153
- - test/streamingload/test_dispatchermessage.rb
154
- - test/streamingload/test_job.rb
155
- - test/test_sqsdatasource.rb
150
+ - sample/sqs-message.txt
151
+ - sample/sqs-result.txt
152
+ - strload_load_logs.ct
153
+ - testschema/strload_test.ct
154
+ - testschema/with_work_table.job
155
+ - testschema/with_work_table.sql
156
+ - utils/init_strload_tables.sql
157
+ - utils/strload-stat.sql
156
158
  homepage: https://github.com/aamine/bricolage-streamingload
157
159
  licenses:
158
160
  - MIT
@@ -165,7 +167,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
165
167
  requirements:
166
168
  - - ">="
167
169
  - !ruby/object:Gem::Version
168
- version: 2.1.0
170
+ version: 2.3.0
169
171
  required_rubygems_version: !ruby/object:Gem::Requirement
170
172
  requirements:
171
173
  - - ">="
@@ -173,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
173
175
  version: '0'
174
176
  requirements: []
175
177
  rubyforge_project:
176
- rubygems_version: 2.6.11
178
+ rubygems_version: 2.7.6
177
179
  signing_key:
178
180
  specification_version: 4
179
181
  summary: Bricolage Streaming Load Daemon
data/test/all.rb DELETED
@@ -1,3 +0,0 @@
1
- Dir.glob("#{__dir__}/**/test_*.rb").each do |path|
2
- load path
3
- end
@@ -1,241 +0,0 @@
1
- require 'test/unit'
2
- require 'bricolage/context'
3
- require 'bricolage/sqsdatasource'
4
- require 'bricolage/sqsmock'
5
- require 'bricolage/streamingload/dispatcher'
6
- require 'bricolage/streamingload/chunkrouter'
7
- require 'bricolage/streamingload/chunkbuffer'
8
-
9
- module Bricolage
10
- module StreamingLoad
11
-
12
- class TestDispatcher < Test::Unit::TestCase
13
-
14
- test "checkpoint event" do
15
- ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
16
- ctl_ds = ctx.get_data_source('sql', 'dwhctl')
17
-
18
- event_queue = SQSDataSource.new_mock(queue: [
19
- # 1st ReceiveMessage
20
- [
21
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0001.json.gz'),
22
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0002.json.gz'),
23
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0003.json.gz'),
24
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0004.json.gz'),
25
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0005.json.gz')
26
- ],
27
- # 2nd ReceiveMessage
28
- [
29
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0006.json.gz'),
30
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0007.json.gz'),
31
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0008.json.gz'),
32
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0009.json.gz'),
33
- SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'checkpoint'}),
34
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.desttable/datafile-0010.json.gz')
35
- ]
36
- ])
37
-
38
- task_queue = SQSDataSource.new_mock
39
-
40
- chunk_buffer = ChunkBuffer.new(
41
- control_data_source: ctl_ds,
42
- logger: ctx.logger
43
- )
44
-
45
- chunk_router = ChunkRouter.for_config([
46
- {
47
- "url" => %r<\As3://test-bucket/testschema\.desttable/datafile-\d{4}\.json\.gz>.source,
48
- "schema" => 'testschema',
49
- "table" => 'desttable'
50
- }
51
- ])
52
-
53
- task_logger = DummyLoadTaskLogger.new
54
-
55
- dispatcher = Dispatcher.new(
56
- event_queue: event_queue,
57
- task_queue: task_queue,
58
- chunk_buffer: chunk_buffer,
59
- chunk_router: chunk_router,
60
- task_logger: task_logger,
61
- dispatch_interval: 600,
62
- logger: ctx.logger
63
- )
64
-
65
- # FIXME: database cleaner
66
- ctl_ds.open {|conn|
67
- conn.update("truncate strload_tables")
68
- conn.update("truncate strload_objects")
69
- conn.update("truncate strload_task_objects")
70
- conn.update("truncate strload_tasks")
71
- conn.update("insert into strload_tables values (1, 'testschema.desttable', 'testschema', 'desttable', 100, 1800, false)")
72
- }
73
- dispatcher.event_loop
74
-
75
- # Event Queue Call Sequence
76
- hst = event_queue.client.call_history
77
- assert_equal 6, hst.size
78
- assert_equal :send_message, hst[0].name # start flush timer
79
- assert_equal :receive_message, hst[1].name
80
- assert_equal :delete_message_batch, hst[2].name
81
- assert_equal :receive_message, hst[3].name
82
- assert_equal :delete_message, hst[4].name # delete checkpoint
83
- assert_equal :delete_message_batch, hst[5].name
84
-
85
- # Task Queue Call Sequence
86
- hst = task_queue.client.call_history
87
- assert_equal 1, hst.size
88
- assert_equal :send_message, hst[0].name
89
- assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
90
- task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
91
- assert_not_equal 0, task_id
92
-
93
- # Object Buffer
94
- assert_equal [], unassigned_objects(ctl_ds)
95
- task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
96
- assert_equal 'testschema', task.schema_name
97
- assert_equal 'desttable', task.table_name
98
- assert_equal 10, task.object_urls.size
99
-
100
- # Task Logger
101
- assert_not_nil task_logger.last_task
102
- assert_equal task_id, task_logger.last_task.id.to_i
103
- end
104
-
105
- def unassigned_objects(ctl_ds)
106
- ctl_ds.open {|conn|
107
- conn.query_values(<<-EndSQL)
108
- select
109
- object_url
110
- from
111
- strload_objects
112
- where
113
- object_id not in (select object_id from strload_task_objects)
114
- ;
115
- EndSQL
116
- }
117
- end
118
-
119
- test "flushtable event" do
120
- ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
121
- ctl_ds = ctx.get_data_source('sql', 'dwhctl')
122
-
123
- event_queue = SQSDataSource.new_mock(queue: [
124
- # 1st ReceiveMessage
125
- [
126
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0001.json.gz'),
127
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0001.json.gz'),
128
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0002.json.gz'),
129
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0002.json.gz'),
130
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0003.json.gz'),
131
- SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0003.json.gz'),
132
- SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'flushtable', tableName: 'testschema.bbb'}),
133
- SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'shutdown'})
134
- ]
135
- ])
136
-
137
- task_queue = SQSDataSource.new_mock
138
-
139
- chunk_buffer = ChunkBuffer.new(
140
- control_data_source: ctl_ds,
141
- logger: ctx.logger
142
- )
143
-
144
- chunk_router = ChunkRouter.for_config([
145
- {
146
- "url" => %r<\As3://test-bucket/testschema\.(?<table>\w+)/datafile-\d{4}\.json\.gz>.source,
147
- "schema" => 'testschema',
148
- "table" => '%table'
149
- }
150
- ])
151
-
152
- task_logger = DummyLoadTaskLogger.new
153
-
154
- dispatcher = Dispatcher.new(
155
- event_queue: event_queue,
156
- task_queue: task_queue,
157
- chunk_buffer: chunk_buffer,
158
- chunk_router: chunk_router,
159
- task_logger: task_logger,
160
- dispatch_interval: 600,
161
- logger: ctx.logger
162
- )
163
-
164
- # FIXME: database cleaner
165
- ctl_ds.open {|conn|
166
- conn.update("truncate strload_tables")
167
- conn.update("truncate strload_objects")
168
- conn.update("truncate strload_task_objects")
169
- conn.update("truncate strload_tasks")
170
- conn.update("insert into strload_tables values (1, 'testschema.aaa', 'testschema', 'aaa', 100, 1800, false)")
171
- conn.update("insert into strload_tables values (2, 'testschema.bbb', 'testschema', 'bbb', 100, 1800, false)")
172
- conn.update("insert into strload_tables values (3, 'testschema.ccc', 'testschema', 'ccc', 100, 1800, false)")
173
- }
174
- dispatcher.event_loop
175
-
176
- # Event Queue Call Sequence
177
- hst = event_queue.client.call_history
178
- assert_equal 5, hst.size
179
- assert_equal :send_message, hst[0].name # start dispatch timer
180
- assert_equal :receive_message, hst[1].name
181
- assert_equal :delete_message, hst[2].name # delete flushtable event
182
- assert_equal :delete_message, hst[3].name # delete shutdown event
183
- assert_equal :delete_message_batch, hst[4].name
184
-
185
- # Task Queue Call Sequence
186
- hst = task_queue.client.call_history
187
- assert_equal 1, hst.size
188
- assert_equal :send_message, hst[0].name
189
- assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
190
- task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
191
- assert_not_equal 0, task_id
192
-
193
- # Object Buffer
194
- assert_equal [], unassigned_table_objects(ctl_ds, 'testschema.bbb')
195
- task = Job::ControlConnection.open(ctl_ds) {|ctl| ctl.load_task(task_id) }
196
- assert_equal 'testschema', task.schema_name
197
- assert_equal 'bbb', task.table_name
198
- assert_equal 2, task.object_urls.size
199
-
200
- # Task Logger
201
- assert_not_nil task_logger.last_task
202
- assert_equal task_id, task_logger.last_task.id.to_i
203
- end
204
-
205
- def unassigned_table_objects(ctl_ds, table_name)
206
- ctl_ds.open {|conn|
207
- conn.query_values(<<-EndSQL)
208
- select
209
- object_url
210
- from
211
- strload_objects
212
- where
213
- data_source_id = '#{table_name}'
214
- and object_id not in (select object_id from strload_task_objects)
215
- ;
216
- EndSQL
217
- }
218
- end
219
-
220
- end
221
-
222
-
223
- class DummyLoadTaskLogger
224
-
225
- def initialize
226
- @task = nil
227
- end
228
-
229
- def log(task)
230
- @task = task
231
- nil
232
- end
233
-
234
- def last_task
235
- @task
236
- end
237
-
238
- end
239
-
240
- end
241
- end