bricolage-streamingload 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ed2342a1e0f9db4cbe53fbb97be7b72dcd362def
4
+ data.tar.gz: 1d1705ad9811bb4becee2de4168bb7f40f9a0b63
5
+ SHA512:
6
+ metadata.gz: 9a09e1ec19569e03a13e01e8d2aa767de54bd21ac22cb455c9ccb92798a410f655f3435b4b0e7ff4e8bde429dcf2fd904a75f5f3d303f7bf815cfa0ca08b5631
7
+ data.tar.gz: 3cfb076a0144c04592db35309236e2bfce5021525e34fe26b32e5f61ef000572d8d6641bdbee078f741a0f79a85d9ef9fd4cc47ce167338cec1ad4577c051bd2
@@ -0,0 +1,19 @@
1
+ # Bricolage Streaming Load
2
+
3
+ Streaming load daemon based on Bricolage.
4
+
5
+ This software is written in working time in Cookpad, Inc.
6
+
7
+ ## License
8
+
9
+ MIT license.
10
+ See LICENSES file for details.
11
+
12
+ ## Running Test
13
+
14
+ % rake test
15
+
16
+ ## Author
17
+
18
+ - Minero Aoki
19
+ - Shimpei Kodama
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Bundler.require(:default) if defined?(Bundler)
4
+ require 'bricolage/streamingload/dispatcher'
5
+
6
+ Bricolage::StreamingLoad::Dispatcher.main
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Bundler.require(:default) if defined?(Bundler)
4
+ require 'bricolage/streamingload/loaderservice'
5
+
6
+ Bricolage::StreamingLoad::LoaderService.main
@@ -0,0 +1,299 @@
1
+ require 'bricolage/datasource'
2
+ require 'bricolage/sqswrapper'
3
+ require 'securerandom'
4
+ require 'aws-sdk'
5
+ require 'json'
6
+ require 'time'
7
+
8
+ module Bricolage
9
+
10
+ class SQSDataSource < DataSource
11
+
12
+ declare_type 'sqs'
13
+
14
+ def initialize(region: 'ap-northeast-1', url:, access_key_id:, secret_access_key:,
15
+ visibility_timeout:, max_number_of_messages: 10, max_delete_batch_size: 10, wait_time_seconds: 20, noop: false)
16
+ @region = region
17
+ @url = url
18
+ @access_key_id = access_key_id
19
+ @secret_access_key = secret_access_key
20
+ @visibility_timeout = visibility_timeout
21
+ @max_number_of_messages = max_number_of_messages
22
+ @max_delete_batch_size = max_delete_batch_size
23
+ @wait_time_seconds = wait_time_seconds
24
+ @noop = noop
25
+ end
26
+
27
+ attr_reader :region
28
+ attr_reader :url
29
+ attr_reader :access_key_id
30
+ attr_reader :secret_access_key
31
+
32
+ def client
33
+ @client ||= begin
34
+ c = @noop ? DummySQSClient.new : Aws::SQS::Client.new(region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key)
35
+ SQSClientWrapper.new(c, logger: logger)
36
+ end
37
+ end
38
+
39
+ #
40
+ # High-Level Polling Interface
41
+ #
42
+
43
+ def main_handler_loop(handlers)
44
+ trap_signals
45
+
46
+ n_zero = 0
47
+ until terminating?
48
+ insert_handler_wait(n_zero)
49
+ n_msg = handle_messages(handlers)
50
+ if n_msg == 0
51
+ n_zero += 1
52
+ else
53
+ n_zero = 0
54
+ end
55
+ end
56
+ @delete_message_buffer.flush if @delete_message_buffer
57
+ logger.info "shutdown gracefully"
58
+ end
59
+
60
+ def trap_signals
61
+ # Allows graceful stop
62
+ Signal.trap(:TERM) {
63
+ initiate_terminate
64
+ }
65
+ Signal.trap(:INT) {
66
+ initiate_terminate
67
+ }
68
+ end
69
+
70
+ def initiate_terminate
71
+ # No I/O allowed in this method
72
+ @terminating = true
73
+ end
74
+
75
+ def terminating?
76
+ @terminating
77
+ end
78
+
79
+ def insert_handler_wait(n_zero)
80
+ sec = 2 ** [n_zero, 6].min # max 64s
81
+ logger.info "queue wait: sleep #{sec}" if n_zero > 0
82
+ sleep sec
83
+ end
84
+
85
+ def handle_messages(handlers:, message_class:)
86
+ n_msg = foreach_message(message_class) do |msg|
87
+ logger.debug "handling message: #{msg.inspect}" if logger.debug?
88
+ mid = "handle_#{msg.message_type}"
89
+ # just ignore unknown event to make app migration easy
90
+ if handlers.respond_to?(mid, true)
91
+ handlers.__send__(mid, msg)
92
+ else
93
+ logger.error "unknown SQS message type: #{msg.message_type.inspect} (message-id: #{msg.message_id})"
94
+ end
95
+ end
96
+ n_msg
97
+ end
98
+
99
+ def foreach_message(message_class, &block)
100
+ result = receive_messages()
101
+ unless result and result.successful?
102
+ logger.error "ReceiveMessage failed: #{result ? result.error.message : '(result=nil)'}"
103
+ return nil
104
+ end
105
+ logger.info "receive #{result.messages.size} messages" unless result.messages.empty?
106
+ msgs = message_class.for_sqs_result(result)
107
+ msgs.each(&block)
108
+ msgs.size
109
+ end
110
+
111
+ #
112
+ # API-Level Interface
113
+ #
114
+
115
+ def receive_messages
116
+ result = client.receive_message(
117
+ queue_url: @url,
118
+ attribute_names: ["All"],
119
+ message_attribute_names: ["All"],
120
+ max_number_of_messages: @max_number_of_messages,
121
+ visibility_timeout: @visibility_timeout,
122
+ wait_time_seconds: @wait_time_seconds
123
+ )
124
+ result
125
+ end
126
+
127
+ def delete_message(msg)
128
+ # TODO: use batch request?
129
+ client.delete_message(
130
+ queue_url: @url,
131
+ receipt_handle: msg.receipt_handle
132
+ )
133
+ end
134
+
135
+ def buffered_delete_message(msg)
136
+ delete_message_buffer.put(msg)
137
+ end
138
+
139
+ def delete_message_buffer
140
+ @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, @max_delete_batch_size, logger)
141
+ end
142
+
143
+ def put(msg)
144
+ send_message(msg)
145
+ end
146
+
147
+ def send_message(msg)
148
+ client.send_message(
149
+ queue_url: @url,
150
+ message_body: { 'Records' => [msg.body] }.to_json,
151
+ delay_seconds: msg.delay_seconds
152
+ )
153
+ end
154
+
155
+ class DeleteMessageBuffer
156
+
157
+ def initialize(sqs_client, url, max_buffer_size, logger)
158
+ @sqs_client = sqs_client
159
+ @url = url
160
+ @max_buffer_size = max_buffer_size
161
+ @logger = logger
162
+ @buf = {}
163
+ @retry_counts = Hash.new(0)
164
+ end
165
+
166
+ MAX_RETRY_COUNT = 3
167
+
168
+ def put(msg)
169
+ @buf[SecureRandom.uuid] = msg
170
+ flush if size >= @max_buffer_size
171
+ end
172
+
173
+ def size
174
+ @buf.size
175
+ end
176
+
177
+ def flush
178
+ return unless size > 0
179
+ response = @sqs_client.delete_message_batch({
180
+ queue_url: @url,
181
+ entries: @buf.to_a.map {|item| {id: item[0], receipt_handle: item[1].receipt_handle} }
182
+ })
183
+ clear_successes(response.successful)
184
+ retry_failures(response.failed)
185
+ @logger.debug "DeleteMessageBatch executed: #{response.successful.size} succeeded, #{response.failed.size} failed."
186
+ end
187
+
188
+ private
189
+
190
+ def clear_successes(successes)
191
+ successes.each do |s|
192
+ @buf.delete s.id
193
+ end
194
+ end
195
+
196
+ def retry_failures(failures)
197
+ return unless failures.size > 0
198
+ failures.each do |f|
199
+ @logger.info "DeleteMessageBatch failed to retry for: id=#{f.id}, sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
200
+ end
201
+ flush
202
+ @buf.keys.map {|k| @retry_counts[k] += 1 }
203
+ @retry_counts.select {|k, v| v >= MAX_RETRY_COUNT }.each do |k, v|
204
+ @logger.warn "DeleteMessageBatch failed #{MAX_RETRY_COUNT} times for: message_id=#{@buf[k].message_id}, receipt_handle=#{@buf[k].receipt_handle}"
205
+ @buf.delete k
206
+ @retry_counts.delete k
207
+ end
208
+ end
209
+
210
+ end # DeleteMessageBuffer
211
+
212
+ end # class SQSDataSource
213
+
214
+
215
+ class SQSMessage
216
+
217
+ SQS_EVENT_SOURCE = 'bricolage:system'
218
+
219
+ # Writer interface
220
+ def SQSMessage.create(
221
+ name:,
222
+ time: Time.now.getutc,
223
+ source: SQS_EVENT_SOURCE,
224
+ delay_seconds: 0,
225
+ **message_params)
226
+ new(name: name, time: time, source: source, delay_seconds: delay_seconds, **message_params)
227
+ end
228
+
229
+ def SQSMessage.for_sqs_result(result)
230
+ result.messages.flat_map {|msg|
231
+ body = JSON.parse(msg.body)
232
+ records = body['Records'] or next []
233
+ records.map {|rec| get_concrete_class(msg, rec).for_sqs_record(msg, rec) }
234
+ }
235
+ end
236
+
237
+ # abstract SQSMessage.get_concrete_class(msg, rec)
238
+
239
+ def SQSMessage.for_sqs_record(msg, rec)
240
+ new(** SQSMessage.parse_sqs_record(msg, rec).merge(parse_sqs_record(msg, rec)))
241
+ end
242
+
243
+ def SQSMessage.parse_sqs_record(msg, rec)
244
+ time_str = rec['eventTime']
245
+ tm = time_str ? (Time.parse(time_str) rescue nil) : nil
246
+ {
247
+ message_id: msg.message_id,
248
+ receipt_handle: msg.receipt_handle,
249
+ name: rec['eventName'],
250
+ time: tm,
251
+ source: rec['eventSource']
252
+ }
253
+ end
254
+
255
+ def initialize(name:, time:, source:,
256
+ message_id: nil, receipt_handle: nil, delay_seconds: nil,
257
+ **message_params)
258
+ @name = name
259
+ @time = time
260
+ @source = source
261
+
262
+ @message_id = message_id
263
+ @receipt_handle = receipt_handle
264
+
265
+ @delay_seconds = delay_seconds
266
+
267
+ init_message(**message_params)
268
+ end
269
+
270
+ # abstract init_message(**message_params)
271
+
272
+ attr_reader :name
273
+ attr_reader :time
274
+ attr_reader :source
275
+
276
+ # Valid only for received messages
277
+
278
+ attr_reader :message_id
279
+ attr_reader :receipt_handle
280
+
281
+ # Valid only for sending messages
282
+
283
+ attr_reader :delay_seconds
284
+
285
+ def body
286
+ obj = {}
287
+ [
288
+ ['eventName', @name],
289
+ ['eventTime', (@time ? @time.iso8601 : nil)],
290
+ ['eventSource', @source]
291
+ ].each do |name, value|
292
+ obj[name] = value if value
293
+ end
294
+ obj
295
+ end
296
+
297
+ end # class SQSMessage
298
+
299
+ end # module Bricolage
@@ -0,0 +1,77 @@
1
+ require 'json'
2
+
3
+ module Bricolage
4
+
5
+ class SQSClientWrapper
6
+ def initialize(sqs, logger:)
7
+ @sqs = sqs
8
+ @logger = logger
9
+ end
10
+
11
+ def receive_message(**args)
12
+ @logger.debug "receive_message(#{args.inspect})"
13
+ @sqs.receive_message(**args)
14
+ end
15
+
16
+ def send_message(**args)
17
+ @logger.debug "send_message(#{args.inspect})"
18
+ @sqs.send_message(**args)
19
+ end
20
+
21
+ def delete_message(**args)
22
+ @logger.debug "delete_message(#{args.inspect})"
23
+ @sqs.delete_message(**args)
24
+ end
25
+
26
+ def delete_message_batch(**args)
27
+ @logger.debug "delete_message_batch(#{args.inspect})"
28
+ @sqs.delete_message_batch(**args)
29
+ end
30
+ end
31
+
32
+
33
+ class DummySQSClient
34
+ def initialize(queue = [])
35
+ @queue = queue
36
+ end
37
+
38
+ def receive_message(**args)
39
+ msg_recs = @queue.shift or return EMPTY_RESULT
40
+ msgs = msg_recs.map {|recs| Message.new({'Records' => recs}.to_json) }
41
+ Result.new(true, msgs)
42
+ end
43
+
44
+ def send_message(**args)
45
+ SUCCESS_RESULT
46
+ end
47
+
48
+ def delete_message(**args)
49
+ SUCCESS_RESULT
50
+ end
51
+
52
+ class Result
53
+ def initialize(successful, messages = nil)
54
+ @successful = successful
55
+ @messages = messages
56
+ end
57
+
58
+ def successful?
59
+ @successful
60
+ end
61
+
62
+ attr_reader :messages
63
+ end
64
+
65
+ SUCCESS_RESULT = Result.new(true)
66
+ EMPTY_RESULT = Result.new(true, [])
67
+
68
+ class Message
69
+ def initialize(body)
70
+ @body = body
71
+ end
72
+
73
+ attr_reader :body
74
+ end
75
+ end
76
+
77
+ end # module Bricolage
@@ -0,0 +1,181 @@
1
+ require 'bricolage/exception'
2
+ require 'bricolage/version'
3
+ require 'bricolage/sqsdatasource'
4
+ require 'bricolage/logger'
5
+ require 'bricolage/streamingload/event'
6
+ require 'bricolage/streamingload/objectbuffer'
7
+ require 'bricolage/streamingload/urlpatterns'
8
+ require 'aws-sdk'
9
+ require 'yaml'
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ module Bricolage
14
+
15
+ module StreamingLoad
16
+
17
+ class Dispatcher
18
+
19
+ def Dispatcher.main
20
+ opts = DispatcherOptions.new(ARGV)
21
+ opts.parse
22
+ unless opts.rest_arguments.size == 1
23
+ $stderr.puts opts.usage
24
+ exit 1
25
+ end
26
+ config_path, * = opts.rest_arguments
27
+ config = YAML.load(File.read(config_path))
28
+ logger = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
29
+ ctx = Context.for_application('.', environment: opts.environment, logger: logger)
30
+ event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds'))
31
+ task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
32
+
33
+ object_buffer = ObjectBuffer.new(
34
+ control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
35
+ logger: ctx.logger
36
+ )
37
+
38
+ url_patterns = URLPatterns.for_config(config.fetch('url_patterns'))
39
+
40
+ dispatcher = Dispatcher.new(
41
+ event_queue: event_queue,
42
+ task_queue: task_queue,
43
+ object_buffer: object_buffer,
44
+ url_patterns: url_patterns,
45
+ dispatch_interval: 60,
46
+ logger: ctx.logger
47
+ )
48
+
49
+ Process.daemon(true) if opts.daemon?
50
+ create_pid_file opts.pid_file_path if opts.pid_file_path
51
+ dispatcher.set_dispatch_timer
52
+ dispatcher.event_loop
53
+ end
54
+
55
+ def Dispatcher.new_logger(path, config)
56
+ Logger.new(
57
+ device: path,
58
+ rotation_period: config.fetch('log-rotation-period', 'daily'),
59
+ rotation_size: config.fetch('log-rotation-size', nil)
60
+ )
61
+ end
62
+
63
+ def Dispatcher.create_pid_file(path)
64
+ File.open(path, 'w') {|f|
65
+ f.puts $$
66
+ }
67
+ rescue
68
+ # ignore
69
+ end
70
+
71
+ def initialize(event_queue:, task_queue:, object_buffer:, url_patterns:, dispatch_interval:, logger:)
72
+ @event_queue = event_queue
73
+ @task_queue = task_queue
74
+ @object_buffer = object_buffer
75
+ @url_patterns = url_patterns
76
+ @dispatch_interval = dispatch_interval
77
+ @dispatch_message_id = nil
78
+ @logger = logger
79
+ end
80
+
81
+ def event_loop
82
+ @event_queue.main_handler_loop(handlers: self, message_class: Event)
83
+ end
84
+
85
+ def handle_shutdown(e)
86
+ @event_queue.initiate_terminate
87
+ @event_queue.delete_message(e)
88
+ end
89
+
90
+ def handle_data(e)
91
+ unless e.created?
92
+ @event_queue.delete_message(e)
93
+ return
94
+ end
95
+ obj = e.loadable_object(@url_patterns)
96
+ @object_buffer.put(obj)
97
+ @event_queue.buffered_delete_message(e)
98
+ end
99
+
100
+ def handle_dispatch(e)
101
+ if @dispatch_message_id == e.message_id
102
+ tasks = @object_buffer.flush_tasks
103
+ tasks.each {|task| @task_queue.put task }
104
+ set_dispatch_timer
105
+ end
106
+ @event_queue.delete_message(e)
107
+ end
108
+
109
+ def set_dispatch_timer
110
+ resp = @event_queue.send_message DispatchEvent.create(delay_seconds: @dispatch_interval)
111
+ @dispatch_message_id = resp.message_id
112
+ end
113
+
114
+ def delete_events(events)
115
+ events.each do |e|
116
+ @event_queue.delete_message(e)
117
+ end
118
+ end
119
+
120
+ end
121
+
122
+
123
+ class DispatcherOptions
124
+
125
+ def initialize(argv)
126
+ @argv = argv
127
+ @daemon = false
128
+ @log_file_path = nil
129
+ @pid_file_path = nil
130
+ @rest_arguments = nil
131
+
132
+ @opts = opts = OptionParser.new("Usage: #{$0} CONFIG_PATH")
133
+ opts.on('--task-id=id', 'Execute oneshot load task (implicitly disables daemon mode).') {|task_id|
134
+ @task_id = task_id
135
+ }
136
+ opts.on('-e', '--environment=NAME', "Sets execution environment [default: #{Context::DEFAULT_ENV}]") {|env|
137
+ @environment = env
138
+ }
139
+ opts.on('--daemon', 'Becomes daemon in server mode.') {
140
+ @daemon = true
141
+ }
142
+ opts.on('--log-file=PATH', 'Log file path') {|path|
143
+ @log_file_path = path
144
+ }
145
+ opts.on('--pid-file=PATH', 'Creates PID file.') {|path|
146
+ @pid_file_path = path
147
+ }
148
+ opts.on('--help', 'Prints this message and quit.') {
149
+ puts opts.help
150
+ exit 0
151
+ }
152
+ opts.on('--version', 'Prints version and quit.') {
153
+ puts "#{File.basename($0)} version #{VERSION}"
154
+ exit 0
155
+ }
156
+ end
157
+
158
+ def usage
159
+ @opts.help
160
+ end
161
+
162
+ def parse
163
+ @opts.parse!(@argv)
164
+ @rest_arguments = @argv.dup
165
+ rescue OptionParser::ParseError => err
166
+ raise OptionError, err.message
167
+ end
168
+
169
+ attr_reader :rest_arguments, :environment, :log_file_path
170
+
171
+ def daemon?
172
+ @daemon
173
+ end
174
+
175
+ attr_reader :pid_file_path
176
+
177
+ end
178
+
179
+ end # module StreamingLoad
180
+
181
+ end # module Bricolage