bricolage-streamingload 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 848b815669c6580505119917a72a4e97833064c2
4
- data.tar.gz: 1809410699822e2a60a21407cad1b5814551adcd
3
+ metadata.gz: 41d99ce0dce7affca77b305ce527c5ec53e733b1
4
+ data.tar.gz: bd2254dab228018edf50debcf4627deef3bbdbd0
5
5
  SHA512:
6
- metadata.gz: 84ecbe1e548953cc1f4889eb07e9289101e1ade28e89370dae39462b61d193521ceda25f59d8a0b0f1760b8555cf254abab50cf7ed3d11e70d81c422af2f8b82
7
- data.tar.gz: b367bb4faa24e9755bee5dbeaaaf937a317826e8e562466e3264a19563bbfcc877ddba9801ffda4875d21e39feddfcc8e87df81a6433b9cd1c3a2cabcefc0754
6
+ metadata.gz: 66855f5839f86deb3c6af0500090bed718bd6860b50a83e6a11f1c79e822b81b3d7888ff5417391f4f1b0a669242ed5de3d9a9f4c3fe4e500d88ad77acfbe7fa
7
+ data.tar.gz: c18b88f15e4cd75095f5cf72863921a43ee467bb1b03f5b3481bf63a09e1a0e319f3afcd4e79e5902bdb6f62695528edd962ed561c29bfc5f6dc173ed029eb89
@@ -9,11 +9,14 @@ module Bricolage
9
9
  @sns_logger.level = Kernel.const_get("Logger").const_get(alert_level.upcase)
10
10
  end
11
11
 
12
+ def_delegators '@logger', :level, :level=, :debug?, :info?, :warn?, :error?, :fatal?, :unknown?
13
+
12
14
  %w(log debug info warn error fatal unknown).each do |m|
13
15
  define_method(m) do |*args|
14
16
  [@logger, @sns_logger].map {|t| t.send(m, *args) }
15
17
  end
16
18
  end
19
+
17
20
  end
18
21
  end
19
22
  end
@@ -56,6 +56,9 @@ module Bricolage
56
56
  Process.daemon(true) if opts.daemon?
57
57
  create_pid_file opts.pid_file_path if opts.pid_file_path
58
58
  dispatcher.event_loop
59
+ rescue Exception => e
60
+ alert_logger.error e.message
61
+ raise
59
62
  end
60
63
 
61
64
  def Dispatcher.new_logger(path, config)
@@ -119,18 +122,12 @@ module Bricolage
119
122
  def create_checkpoint
120
123
  logger.info "*** Creating checkpoint requested ***"
121
124
  logger.info "Force-flushing all objects..."
122
- flush_all_tasks_immediately
125
+ tasks = @object_buffer.flush_tasks_force
126
+ send_tasks tasks
123
127
  logger.info "All objects flushed; shutting down..."
124
128
  @event_queue.initiate_terminate
125
129
  end
126
130
 
127
- def flush_all_tasks_immediately
128
- tasks = @object_buffer.flush_tasks_force
129
- tasks.each do |task|
130
- @task_queue.put task
131
- end
132
- end
133
-
134
131
  def handle_data(e)
135
132
  unless e.created?
136
133
  @event_queue.delete_message_async(e)
@@ -144,7 +141,7 @@ module Bricolage
144
141
  def handle_dispatch(e)
145
142
  if @dispatch_message_id == e.message_id
146
143
  tasks = @object_buffer.flush_tasks
147
- tasks.each {|task| @task_queue.put task }
144
+ send_tasks tasks
148
145
  set_dispatch_timer
149
146
  end
150
147
  # Delete this event immediately
@@ -156,6 +153,20 @@ module Bricolage
156
153
  @dispatch_message_id = res.message_id
157
154
  end
158
155
 
156
+ def handle_flushtable(e)
157
+ logger.info "flushing #{e.table_name} requested"
158
+ tasks = @object_buffer.flush_table_force(e.table_name)
159
+ send_tasks tasks
160
+ # Delete this event immediately
161
+ @event_queue.delete_message(e)
162
+ end
163
+
164
+ def send_tasks(tasks)
165
+ tasks.each do |task|
166
+ @task_queue.put task
167
+ end
168
+ end
169
+
159
170
  end
160
171
 
161
172
 
@@ -10,7 +10,7 @@ module Bricolage
10
10
  case
11
11
  when rec['eventName'] == 'shutdown' then ShutdownEvent
12
12
  when rec['eventName'] == 'dispatch' then DispatchEvent
13
- when rec['eventName'] == 'flush' then FlushEvent
13
+ when rec['eventName'] == 'flushtable' then FlushTableEvent
14
14
  when rec['eventName'] == 'checkpoint' then CheckPointEvent
15
15
  when rec['eventSource'] == 'aws:s3'
16
16
  S3ObjectEvent
@@ -67,13 +67,13 @@ module Bricolage
67
67
  end
68
68
 
69
69
 
70
- class FlushEvent < Event
70
+ class FlushTableEvent < Event
71
71
 
72
- def FlushEvent.create(delay_seconds:, table_name:)
73
- super name: 'flush', delay_seconds: delay_seconds, table_name: table_name
72
+ def FlushTableEvent.create(table_name:)
73
+ super name: 'flushtable', table_name: table_name
74
74
  end
75
75
 
76
- def FlushEvent.parse_sqs_record(msg, rec)
76
+ def FlushTableEvent.parse_sqs_record(msg, rec)
77
77
  {
78
78
  table_name: rec['tableName']
79
79
  }
@@ -120,6 +120,7 @@ module Bricolage
120
120
 
121
121
  def write_job_error(status, message)
122
122
  @end_time = Time.now
123
+ @logger.warn message.lines.first
123
124
  write_job_result status, message.lines.first.strip[0, MAX_MESSAGE_LENGTH]
124
125
  end
125
126
 
@@ -49,6 +49,9 @@ module Bricolage
49
49
  create_pid_file opts.pid_file_path if opts.pid_file_path
50
50
  service.event_loop
51
51
  end
52
+ rescue Exception => e
53
+ alert_logger.error e.message
54
+ raise
52
55
  end
53
56
 
54
57
  def LoaderService.new_logger(path, config)
@@ -75,6 +78,8 @@ module Bricolage
75
78
  @logger = logger
76
79
  end
77
80
 
81
+ attr_reader :logger
82
+
78
83
  def event_loop
79
84
  @task_queue.handle_messages(handler: self, message_class: Task)
80
85
  @logger.info "shutdown gracefully"
@@ -55,6 +55,7 @@ module Bricolage
55
55
  }
56
56
  end
57
57
 
58
+ # Flushes multiple tables periodically
58
59
  def flush_tasks
59
60
  task_ids = nil
60
61
  @ctl_ds.open {|conn|
@@ -84,6 +85,24 @@ module Bricolage
84
85
  return task_ids.map {|id| LoadTask.create(task_id: id) }
85
86
  end
86
87
 
88
+ # Flushes the all objects of the specified table immediately
89
+ # with no additional conditions, to create "table checkpoint".
90
+ def flush_table_force(table_name)
91
+ task_ids = []
92
+ @ctl_ds.open {|conn|
93
+ conn.transaction {|txn|
94
+ # insert_task_object_mappings may not consume all saved objects
95
+ # (e.g. there are too many objects for one table), we must create
96
+ # tasks repeatedly until there are no unassigned objects.
97
+ until (ids = insert_table_task_force(conn, table_name)).empty?
98
+ insert_task_object_mappings(conn)
99
+ task_ids.concat ids
100
+ end
101
+ }
102
+ }
103
+ return task_ids.map {|id| LoadTask.create(task_id: id) }
104
+ end
105
+
87
106
  private
88
107
 
89
108
  def insert_object(conn, obj)
@@ -185,6 +204,60 @@ module Bricolage
185
204
  task_ids
186
205
  end
187
206
 
207
+ def insert_table_task_force(conn, table_name)
208
+ task_ids = conn.query_values(<<-EndSQL)
209
+ insert into strload_tasks
210
+ ( task_class
211
+ , schema_name
212
+ , table_name
213
+ , submit_time
214
+ )
215
+ select
216
+ 'streaming_load_v3'
217
+ , tbl.schema_name
218
+ , tbl.table_name
219
+ , current_timestamp
220
+ from
221
+ strload_tables tbl
222
+
223
+ -- The number of objects for each tables, which is not assigned to any task (> 0).
224
+ -- This subquery is covered by the index.
225
+ inner join (
226
+ select
227
+ data_source_id
228
+ , count(*) as object_count
229
+ from
230
+ (
231
+ select
232
+ min(object_id) as object_id
233
+ , object_url
234
+ from
235
+ strload_objects
236
+ where
237
+ data_source_id = #{s table_name}
238
+ group by
239
+ object_url
240
+ ) uniq_objects
241
+ inner join strload_objects using (object_id)
242
+ left outer join strload_task_objects using (object_id)
243
+ where
244
+ task_id is null -- not assigned to a task
245
+ group by
246
+ data_source_id
247
+ ) obj
248
+ using (data_source_id)
249
+ where
250
+ -- does not check disabled
251
+ data_source_id = #{s table_name}
252
+ returning task_id
253
+ ;
254
+ EndSQL
255
+
256
+ # It must be 1
257
+ @logger.info "Number of task created: #{task_ids.size}"
258
+ task_ids
259
+ end
260
+
188
261
  def insert_task_object_mappings(conn)
189
262
  conn.update(<<-EndSQL)
190
263
  insert into strload_task_objects
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.4.0'
3
+ VERSION = '0.5.0'
4
4
  end
5
5
  end
@@ -69,6 +69,7 @@ module Bricolage
69
69
 
70
70
  # Event Queue Call Sequence
71
71
  hst = event_queue.client.call_history
72
+ assert_equal 6, hst.size
72
73
  assert_equal :send_message, hst[0].name # start flush timer
73
74
  assert_equal :receive_message, hst[1].name
74
75
  assert_equal :delete_message_batch, hst[2].name
@@ -78,6 +79,7 @@ module Bricolage
78
79
 
79
80
  # Task Queue Call Sequence
80
81
  hst = task_queue.client.call_history
82
+ assert_equal 1, hst.size
81
83
  assert_equal :send_message, hst[0].name
82
84
  assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
83
85
  task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
@@ -105,6 +107,100 @@ module Bricolage
105
107
  }
106
108
  end
107
109
 
110
+ test "flushtable event" do
111
+ ctx = Context.for_application('.', environment: 'test', logger: NullLogger.new)
112
+ ctl_ds = ctx.get_data_source('sql', 'dwhctl')
113
+
114
+ event_queue = SQSDataSource.new_mock(queue: [
115
+ # 1st ReceiveMessage
116
+ [
117
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0001.json.gz'),
118
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0001.json.gz'),
119
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0002.json.gz'),
120
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.aaa/datafile-0002.json.gz'),
121
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.bbb/datafile-0003.json.gz'),
122
+ SQSMock::Message.s3_object_created_event('s3://test-bucket/testschema.ccc/datafile-0003.json.gz'),
123
+ SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'flushtable', tableName: 'testschema.bbb'}),
124
+ SQSMock::Message.new(body: {eventSource: 'bricolage:system', eventName: 'shutdown'})
125
+ ]
126
+ ])
127
+
128
+ task_queue = SQSDataSource.new_mock
129
+
130
+ object_buffer = ObjectBuffer.new(
131
+ control_data_source: ctl_ds,
132
+ logger: ctx.logger
133
+ )
134
+
135
+ url_patterns = URLPatterns.for_config([
136
+ {
137
+ "url" => %r<\As3://test-bucket/testschema\.(?<table>\w+)/datafile-\d{4}\.json\.gz>.source,
138
+ "schema" => 'testschema',
139
+ "table" => '%table'
140
+ }
141
+ ])
142
+
143
+ dispatcher = Dispatcher.new(
144
+ event_queue: event_queue,
145
+ task_queue: task_queue,
146
+ object_buffer: object_buffer,
147
+ url_patterns: url_patterns,
148
+ dispatch_interval: 600,
149
+ logger: ctx.logger
150
+ )
151
+
152
+ # FIXME: database cleaner
153
+ ctl_ds.open {|conn|
154
+ conn.update("truncate strload_tables")
155
+ conn.update("truncate strload_objects")
156
+ conn.update("truncate strload_task_objects")
157
+ conn.update("truncate strload_tasks")
158
+ conn.update("insert into strload_tables values ('testschema', 'aaa', 'testschema.aaa', 100, 1800, false)")
159
+ conn.update("insert into strload_tables values ('testschema', 'bbb', 'testschema.bbb', 100, 1800, false)")
160
+ conn.update("insert into strload_tables values ('testschema', 'ccc', 'testschema.ccc', 100, 1800, false)")
161
+ }
162
+ dispatcher.event_loop
163
+
164
+ # Event Queue Call Sequence
165
+ hst = event_queue.client.call_history
166
+ assert_equal 5, hst.size
167
+ assert_equal :send_message, hst[0].name # start dispatch timer
168
+ assert_equal :receive_message, hst[1].name
169
+ assert_equal :delete_message, hst[2].name # delete flushtable event
170
+ assert_equal :delete_message, hst[3].name # delete shutdown event
171
+ assert_equal :delete_message_batch, hst[4].name
172
+
173
+ # Task Queue Call Sequence
174
+ hst = task_queue.client.call_history
175
+ assert_equal 1, hst.size
176
+ assert_equal :send_message, hst[0].name
177
+ assert(/streaming_load_v3/ =~ hst[0].args[:message_body])
178
+ task_id = JSON.load(hst[0].args[:message_body])['Records'][0]['taskId'].to_i
179
+ assert_not_equal 0, task_id
180
+
181
+ # Object Buffer
182
+ assert_equal [], unassigned_table_objects(ctl_ds, 'testschema.bbb')
183
+ task = ctl_ds.open {|conn| LoadTask.load(conn, task_id) }
184
+ assert_equal 'testschema', task.schema
185
+ assert_equal 'bbb', task.table
186
+ assert_equal 2, task.object_urls.size
187
+ end
188
+
189
+ def unassigned_table_objects(ctl_ds, table_name)
190
+ ctl_ds.open {|conn|
191
+ conn.query_values(<<-EndSQL)
192
+ select
193
+ object_url
194
+ from
195
+ strload_objects
196
+ where
197
+ data_source_id = '#{table_name}'
198
+ and object_id not in (select object_id from strload_task_objects)
199
+ ;
200
+ EndSQL
201
+ }
202
+ end
203
+
108
204
  end
109
205
 
110
206
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-08-12 00:00:00.000000000 Z
12
+ date: 2016-08-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bricolage