bricolage-streamingload 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ed2342a1e0f9db4cbe53fbb97be7b72dcd362def
4
+ data.tar.gz: 1d1705ad9811bb4becee2de4168bb7f40f9a0b63
5
+ SHA512:
6
+ metadata.gz: 9a09e1ec19569e03a13e01e8d2aa767de54bd21ac22cb455c9ccb92798a410f655f3435b4b0e7ff4e8bde429dcf2fd904a75f5f3d303f7bf815cfa0ca08b5631
7
+ data.tar.gz: 3cfb076a0144c04592db35309236e2bfce5021525e34fe26b32e5f61ef000572d8d6641bdbee078f741a0f79a85d9ef9fd4cc47ce167338cec1ad4577c051bd2
@@ -0,0 +1,19 @@
1
+ # Bricolage Streaming Load
2
+
3
+ Streaming load daemon based on Bricolage.
4
+
5
+ This software is written in working time in Cookpad, Inc.
6
+
7
+ ## License
8
+
9
+ MIT license.
10
+ See LICENSES file for details.
11
+
12
+ ## Running Test
13
+
14
+ % rake test
15
+
16
+ ## Author
17
+
18
+ - Minero Aoki
19
+ - Shimpei Kodama
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Bundler.require(:default) if defined?(Bundler)
4
+ require 'bricolage/streamingload/dispatcher'
5
+
6
+ Bricolage::StreamingLoad::Dispatcher.main
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Bundler.require(:default) if defined?(Bundler)
4
+ require 'bricolage/streamingload/loaderservice'
5
+
6
+ Bricolage::StreamingLoad::LoaderService.main
@@ -0,0 +1,299 @@
1
+ require 'bricolage/datasource'
2
+ require 'bricolage/sqswrapper'
3
+ require 'securerandom'
4
+ require 'aws-sdk'
5
+ require 'json'
6
+ require 'time'
7
+
8
+ module Bricolage
9
+
10
+ class SQSDataSource < DataSource
11
+
12
+ declare_type 'sqs'
13
+
14
+ def initialize(region: 'ap-northeast-1', url:, access_key_id:, secret_access_key:,
15
+ visibility_timeout:, max_number_of_messages: 10, max_delete_batch_size: 10, wait_time_seconds: 20, noop: false)
16
+ @region = region
17
+ @url = url
18
+ @access_key_id = access_key_id
19
+ @secret_access_key = secret_access_key
20
+ @visibility_timeout = visibility_timeout
21
+ @max_number_of_messages = max_number_of_messages
22
+ @max_delete_batch_size = max_delete_batch_size
23
+ @wait_time_seconds = wait_time_seconds
24
+ @noop = noop
25
+ end
26
+
27
+ attr_reader :region
28
+ attr_reader :url
29
+ attr_reader :access_key_id
30
+ attr_reader :secret_access_key
31
+
32
+ def client
33
+ @client ||= begin
34
+ c = @noop ? DummySQSClient.new : Aws::SQS::Client.new(region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key)
35
+ SQSClientWrapper.new(c, logger: logger)
36
+ end
37
+ end
38
+
39
+ #
40
+ # High-Level Polling Interface
41
+ #
42
+
43
+ def main_handler_loop(handlers)
44
+ trap_signals
45
+
46
+ n_zero = 0
47
+ until terminating?
48
+ insert_handler_wait(n_zero)
49
+ n_msg = handle_messages(handlers)
50
+ if n_msg == 0
51
+ n_zero += 1
52
+ else
53
+ n_zero = 0
54
+ end
55
+ end
56
+ @delete_message_buffer.flush if @delete_message_buffer
57
+ logger.info "shutdown gracefully"
58
+ end
59
+
60
+ def trap_signals
61
+ # Allows graceful stop
62
+ Signal.trap(:TERM) {
63
+ initiate_terminate
64
+ }
65
+ Signal.trap(:INT) {
66
+ initiate_terminate
67
+ }
68
+ end
69
+
70
+ def initiate_terminate
71
+ # No I/O allowed in this method
72
+ @terminating = true
73
+ end
74
+
75
+ def terminating?
76
+ @terminating
77
+ end
78
+
79
+ def insert_handler_wait(n_zero)
80
+ sec = 2 ** [n_zero, 6].min # max 64s
81
+ logger.info "queue wait: sleep #{sec}" if n_zero > 0
82
+ sleep sec
83
+ end
84
+
85
+ def handle_messages(handlers:, message_class:)
86
+ n_msg = foreach_message(message_class) do |msg|
87
+ logger.debug "handling message: #{msg.inspect}" if logger.debug?
88
+ mid = "handle_#{msg.message_type}"
89
+ # just ignore unknown event to make app migration easy
90
+ if handlers.respond_to?(mid, true)
91
+ handlers.__send__(mid, msg)
92
+ else
93
+ logger.error "unknown SQS message type: #{msg.message_type.inspect} (message-id: #{msg.message_id})"
94
+ end
95
+ end
96
+ n_msg
97
+ end
98
+
99
+ def foreach_message(message_class, &block)
100
+ result = receive_messages()
101
+ unless result and result.successful?
102
+ logger.error "ReceiveMessage failed: #{result ? result.error.message : '(result=nil)'}"
103
+ return nil
104
+ end
105
+ logger.info "receive #{result.messages.size} messages" unless result.messages.empty?
106
+ msgs = message_class.for_sqs_result(result)
107
+ msgs.each(&block)
108
+ msgs.size
109
+ end
110
+
111
+ #
112
+ # API-Level Interface
113
+ #
114
+
115
+ def receive_messages
116
+ result = client.receive_message(
117
+ queue_url: @url,
118
+ attribute_names: ["All"],
119
+ message_attribute_names: ["All"],
120
+ max_number_of_messages: @max_number_of_messages,
121
+ visibility_timeout: @visibility_timeout,
122
+ wait_time_seconds: @wait_time_seconds
123
+ )
124
+ result
125
+ end
126
+
127
+ def delete_message(msg)
128
+ # TODO: use batch request?
129
+ client.delete_message(
130
+ queue_url: @url,
131
+ receipt_handle: msg.receipt_handle
132
+ )
133
+ end
134
+
135
+ def buffered_delete_message(msg)
136
+ delete_message_buffer.put(msg)
137
+ end
138
+
139
+ def delete_message_buffer
140
+ @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, @max_delete_batch_size, logger)
141
+ end
142
+
143
+ def put(msg)
144
+ send_message(msg)
145
+ end
146
+
147
+ def send_message(msg)
148
+ client.send_message(
149
+ queue_url: @url,
150
+ message_body: { 'Records' => [msg.body] }.to_json,
151
+ delay_seconds: msg.delay_seconds
152
+ )
153
+ end
154
+
155
+ class DeleteMessageBuffer
156
+
157
+ def initialize(sqs_client, url, max_buffer_size, logger)
158
+ @sqs_client = sqs_client
159
+ @url = url
160
+ @max_buffer_size = max_buffer_size
161
+ @logger = logger
162
+ @buf = {}
163
+ @retry_counts = Hash.new(0)
164
+ end
165
+
166
+ MAX_RETRY_COUNT = 3
167
+
168
+ def put(msg)
169
+ @buf[SecureRandom.uuid] = msg
170
+ flush if size >= @max_buffer_size
171
+ end
172
+
173
+ def size
174
+ @buf.size
175
+ end
176
+
177
+ def flush
178
+ return unless size > 0
179
+ response = @sqs_client.delete_message_batch({
180
+ queue_url: @url,
181
+ entries: @buf.to_a.map {|item| {id: item[0], receipt_handle: item[1].receipt_handle} }
182
+ })
183
+ clear_successes(response.successful)
184
+ retry_failures(response.failed)
185
+ @logger.debug "DeleteMessageBatch executed: #{response.successful.size} succeeded, #{response.failed.size} failed."
186
+ end
187
+
188
+ private
189
+
190
+ def clear_successes(successes)
191
+ successes.each do |s|
192
+ @buf.delete s.id
193
+ end
194
+ end
195
+
196
+ def retry_failures(failures)
197
+ return unless failures.size > 0
198
+ failures.each do |f|
199
+ @logger.info "DeleteMessageBatch failed to retry for: id=#{f.id}, sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
200
+ end
201
+ flush
202
+ @buf.keys.map {|k| @retry_counts[k] += 1 }
203
+ @retry_counts.select {|k, v| v >= MAX_RETRY_COUNT }.each do |k, v|
204
+ @logger.warn "DeleteMessageBatch failed #{MAX_RETRY_COUNT} times for: message_id=#{@buf[k].message_id}, receipt_handle=#{@buf[k].receipt_handle}"
205
+ @buf.delete k
206
+ @retry_counts.delete k
207
+ end
208
+ end
209
+
210
+ end # DeleteMessageBuffer
211
+
212
+ end # class SQSDataSource
213
+
214
+
215
+ class SQSMessage
216
+
217
+ SQS_EVENT_SOURCE = 'bricolage:system'
218
+
219
+ # Writer interface
220
+ def SQSMessage.create(
221
+ name:,
222
+ time: Time.now.getutc,
223
+ source: SQS_EVENT_SOURCE,
224
+ delay_seconds: 0,
225
+ **message_params)
226
+ new(name: name, time: time, source: source, delay_seconds: delay_seconds, **message_params)
227
+ end
228
+
229
+ def SQSMessage.for_sqs_result(result)
230
+ result.messages.flat_map {|msg|
231
+ body = JSON.parse(msg.body)
232
+ records = body['Records'] or next []
233
+ records.map {|rec| get_concrete_class(msg, rec).for_sqs_record(msg, rec) }
234
+ }
235
+ end
236
+
237
+ # abstract SQSMessage.get_concrete_class(msg, rec)
238
+
239
+ def SQSMessage.for_sqs_record(msg, rec)
240
+ new(** SQSMessage.parse_sqs_record(msg, rec).merge(parse_sqs_record(msg, rec)))
241
+ end
242
+
243
+ def SQSMessage.parse_sqs_record(msg, rec)
244
+ time_str = rec['eventTime']
245
+ tm = time_str ? (Time.parse(time_str) rescue nil) : nil
246
+ {
247
+ message_id: msg.message_id,
248
+ receipt_handle: msg.receipt_handle,
249
+ name: rec['eventName'],
250
+ time: tm,
251
+ source: rec['eventSource']
252
+ }
253
+ end
254
+
255
+ def initialize(name:, time:, source:,
256
+ message_id: nil, receipt_handle: nil, delay_seconds: nil,
257
+ **message_params)
258
+ @name = name
259
+ @time = time
260
+ @source = source
261
+
262
+ @message_id = message_id
263
+ @receipt_handle = receipt_handle
264
+
265
+ @delay_seconds = delay_seconds
266
+
267
+ init_message(**message_params)
268
+ end
269
+
270
+ # abstract init_message(**message_params)
271
+
272
+ attr_reader :name
273
+ attr_reader :time
274
+ attr_reader :source
275
+
276
+ # Valid only for received messages
277
+
278
+ attr_reader :message_id
279
+ attr_reader :receipt_handle
280
+
281
+ # Valid only for sending messages
282
+
283
+ attr_reader :delay_seconds
284
+
285
+ def body
286
+ obj = {}
287
+ [
288
+ ['eventName', @name],
289
+ ['eventTime', (@time ? @time.iso8601 : nil)],
290
+ ['eventSource', @source]
291
+ ].each do |name, value|
292
+ obj[name] = value if value
293
+ end
294
+ obj
295
+ end
296
+
297
+ end # class SQSMessage
298
+
299
+ end # module Bricolage
@@ -0,0 +1,77 @@
1
+ require 'json'
2
+
3
+ module Bricolage
4
+
5
+ class SQSClientWrapper
6
+ def initialize(sqs, logger:)
7
+ @sqs = sqs
8
+ @logger = logger
9
+ end
10
+
11
+ def receive_message(**args)
12
+ @logger.debug "receive_message(#{args.inspect})"
13
+ @sqs.receive_message(**args)
14
+ end
15
+
16
+ def send_message(**args)
17
+ @logger.debug "send_message(#{args.inspect})"
18
+ @sqs.send_message(**args)
19
+ end
20
+
21
+ def delete_message(**args)
22
+ @logger.debug "delete_message(#{args.inspect})"
23
+ @sqs.delete_message(**args)
24
+ end
25
+
26
+ def delete_message_batch(**args)
27
+ @logger.debug "delete_message_batch(#{args.inspect})"
28
+ @sqs.delete_message_batch(**args)
29
+ end
30
+ end
31
+
32
+
33
+ class DummySQSClient
34
+ def initialize(queue = [])
35
+ @queue = queue
36
+ end
37
+
38
+ def receive_message(**args)
39
+ msg_recs = @queue.shift or return EMPTY_RESULT
40
+ msgs = msg_recs.map {|recs| Message.new({'Records' => recs}.to_json) }
41
+ Result.new(true, msgs)
42
+ end
43
+
44
+ def send_message(**args)
45
+ SUCCESS_RESULT
46
+ end
47
+
48
+ def delete_message(**args)
49
+ SUCCESS_RESULT
50
+ end
51
+
52
+ class Result
53
+ def initialize(successful, messages = nil)
54
+ @successful = successful
55
+ @messages = messages
56
+ end
57
+
58
+ def successful?
59
+ @successful
60
+ end
61
+
62
+ attr_reader :messages
63
+ end
64
+
65
+ SUCCESS_RESULT = Result.new(true)
66
+ EMPTY_RESULT = Result.new(true, [])
67
+
68
+ class Message
69
+ def initialize(body)
70
+ @body = body
71
+ end
72
+
73
+ attr_reader :body
74
+ end
75
+ end
76
+
77
+ end # module Bricolage
@@ -0,0 +1,181 @@
1
+ require 'bricolage/exception'
2
+ require 'bricolage/version'
3
+ require 'bricolage/sqsdatasource'
4
+ require 'bricolage/logger'
5
+ require 'bricolage/streamingload/event'
6
+ require 'bricolage/streamingload/objectbuffer'
7
+ require 'bricolage/streamingload/urlpatterns'
8
+ require 'aws-sdk'
9
+ require 'yaml'
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ module Bricolage
14
+
15
+ module StreamingLoad
16
+
17
+ class Dispatcher
18
+
19
+ def Dispatcher.main
20
+ opts = DispatcherOptions.new(ARGV)
21
+ opts.parse
22
+ unless opts.rest_arguments.size == 1
23
+ $stderr.puts opts.usage
24
+ exit 1
25
+ end
26
+ config_path, * = opts.rest_arguments
27
+ config = YAML.load(File.read(config_path))
28
+ logger = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
29
+ ctx = Context.for_application('.', environment: opts.environment, logger: logger)
30
+ event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds'))
31
+ task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
32
+
33
+ object_buffer = ObjectBuffer.new(
34
+ control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
35
+ logger: ctx.logger
36
+ )
37
+
38
+ url_patterns = URLPatterns.for_config(config.fetch('url_patterns'))
39
+
40
+ dispatcher = Dispatcher.new(
41
+ event_queue: event_queue,
42
+ task_queue: task_queue,
43
+ object_buffer: object_buffer,
44
+ url_patterns: url_patterns,
45
+ dispatch_interval: 60,
46
+ logger: ctx.logger
47
+ )
48
+
49
+ Process.daemon(true) if opts.daemon?
50
+ create_pid_file opts.pid_file_path if opts.pid_file_path
51
+ dispatcher.set_dispatch_timer
52
+ dispatcher.event_loop
53
+ end
54
+
55
+ def Dispatcher.new_logger(path, config)
56
+ Logger.new(
57
+ device: path,
58
+ rotation_period: config.fetch('log-rotation-period', 'daily'),
59
+ rotation_size: config.fetch('log-rotation-size', nil)
60
+ )
61
+ end
62
+
63
+ def Dispatcher.create_pid_file(path)
64
+ File.open(path, 'w') {|f|
65
+ f.puts $$
66
+ }
67
+ rescue
68
+ # ignore
69
+ end
70
+
71
+ def initialize(event_queue:, task_queue:, object_buffer:, url_patterns:, dispatch_interval:, logger:)
72
+ @event_queue = event_queue
73
+ @task_queue = task_queue
74
+ @object_buffer = object_buffer
75
+ @url_patterns = url_patterns
76
+ @dispatch_interval = dispatch_interval
77
+ @dispatch_message_id = nil
78
+ @logger = logger
79
+ end
80
+
81
+ def event_loop
82
+ @event_queue.main_handler_loop(handlers: self, message_class: Event)
83
+ end
84
+
85
+ def handle_shutdown(e)
86
+ @event_queue.initiate_terminate
87
+ @event_queue.delete_message(e)
88
+ end
89
+
90
+ def handle_data(e)
91
+ unless e.created?
92
+ @event_queue.delete_message(e)
93
+ return
94
+ end
95
+ obj = e.loadable_object(@url_patterns)
96
+ @object_buffer.put(obj)
97
+ @event_queue.buffered_delete_message(e)
98
+ end
99
+
100
+ def handle_dispatch(e)
101
+ if @dispatch_message_id == e.message_id
102
+ tasks = @object_buffer.flush_tasks
103
+ tasks.each {|task| @task_queue.put task }
104
+ set_dispatch_timer
105
+ end
106
+ @event_queue.delete_message(e)
107
+ end
108
+
109
+ def set_dispatch_timer
110
+ resp = @event_queue.send_message DispatchEvent.create(delay_seconds: @dispatch_interval)
111
+ @dispatch_message_id = resp.message_id
112
+ end
113
+
114
+ def delete_events(events)
115
+ events.each do |e|
116
+ @event_queue.delete_message(e)
117
+ end
118
+ end
119
+
120
+ end
121
+
122
+
123
+ class DispatcherOptions
124
+
125
+ def initialize(argv)
126
+ @argv = argv
127
+ @daemon = false
128
+ @log_file_path = nil
129
+ @pid_file_path = nil
130
+ @rest_arguments = nil
131
+
132
+ @opts = opts = OptionParser.new("Usage: #{$0} CONFIG_PATH")
133
+ opts.on('--task-id=id', 'Execute oneshot load task (implicitly disables daemon mode).') {|task_id|
134
+ @task_id = task_id
135
+ }
136
+ opts.on('-e', '--environment=NAME', "Sets execution environment [default: #{Context::DEFAULT_ENV}]") {|env|
137
+ @environment = env
138
+ }
139
+ opts.on('--daemon', 'Becomes daemon in server mode.') {
140
+ @daemon = true
141
+ }
142
+ opts.on('--log-file=PATH', 'Log file path') {|path|
143
+ @log_file_path = path
144
+ }
145
+ opts.on('--pid-file=PATH', 'Creates PID file.') {|path|
146
+ @pid_file_path = path
147
+ }
148
+ opts.on('--help', 'Prints this message and quit.') {
149
+ puts opts.help
150
+ exit 0
151
+ }
152
+ opts.on('--version', 'Prints version and quit.') {
153
+ puts "#{File.basename($0)} version #{VERSION}"
154
+ exit 0
155
+ }
156
+ end
157
+
158
+ def usage
159
+ @opts.help
160
+ end
161
+
162
+ def parse
163
+ @opts.parse!(@argv)
164
+ @rest_arguments = @argv.dup
165
+ rescue OptionParser::ParseError => err
166
+ raise OptionError, err.message
167
+ end
168
+
169
+ attr_reader :rest_arguments, :environment, :log_file_path
170
+
171
+ def daemon?
172
+ @daemon
173
+ end
174
+
175
+ attr_reader :pid_file_path
176
+
177
+ end
178
+
179
+ end # module StreamingLoad
180
+
181
+ end # module Bricolage