bricolage-streamingload 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed2342a1e0f9db4cbe53fbb97be7b72dcd362def
4
- data.tar.gz: 1d1705ad9811bb4becee2de4168bb7f40f9a0b63
3
+ metadata.gz: d81ff86cb9addccb7ca9db4d240218679b1f72f9
4
+ data.tar.gz: f2ec045c994f1c6b619695f74a13aebcc9318722
5
5
  SHA512:
6
- metadata.gz: 9a09e1ec19569e03a13e01e8d2aa767de54bd21ac22cb455c9ccb92798a410f655f3435b4b0e7ff4e8bde429dcf2fd904a75f5f3d303f7bf815cfa0ca08b5631
7
- data.tar.gz: 3cfb076a0144c04592db35309236e2bfce5021525e34fe26b32e5f61ef000572d8d6641bdbee078f741a0f79a85d9ef9fd4cc47ce167338cec1ad4577c051bd2
6
+ metadata.gz: a78c3b8f35f8d10cbc3da301667ea811874ccadb7653b02388990d63afc2183ee3d0c4e357dccc7799721e1dcd8e33b88e9b3d66147903c32ff109595bced0f6
7
+ data.tar.gz: a3632f13d3ea039aa690deca646659e0d89ad636b2af55abf5f7011f54cfecddcef53d57152de513bcbaf5c03671af7a512a10781e7846efc60af9c752b8c364
data/README.md CHANGED
@@ -11,7 +11,7 @@ See LICENSES file for details.
11
11
 
12
12
  ## Running Test
13
13
 
14
- % rake test
14
+ % bundle exec rake test
15
15
 
16
16
  ## Author
17
17
 
@@ -0,0 +1,40 @@
1
+ require 'bricolage/datasource'
2
+ require 'aws-sdk'
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module Bricolage
7
+
8
+ class SNSTopicDataSource < DataSource
9
+
10
+ declare_type 'sns'
11
+
12
+ def initialize(region: 'ap-northeast-1', topic_arn:, access_key_id:, secret_access_key:)
13
+ @region = region
14
+ @topic_arn = topic_arn
15
+ @access_key_id = access_key_id
16
+ @secret_access_key = secret_access_key
17
+ @client = Aws::SNS::Client.new(region: region, access_key_id: access_key_id, secret_access_key: secret_access_key)
18
+ @topic = Aws::SNS::Topic.new(topic_arn, client: @client)
19
+ end
20
+
21
+ attr_reader :region
22
+ attr_reader :client, :topic
23
+
24
+ def publish(message)
25
+ @topic.publish(build_message(message))
26
+ end
27
+
28
+ alias write publish
29
+
30
+ def close
31
+ # do nothing
32
+ end
33
+
34
+ def build_message(message)
35
+ {message: message}
36
+ end
37
+
38
+ end # SNSDataSource
39
+
40
+ end # module Bricolage
@@ -12,14 +12,13 @@ module Bricolage
12
12
  declare_type 'sqs'
13
13
 
14
14
  def initialize(region: 'ap-northeast-1', url:, access_key_id:, secret_access_key:,
15
- visibility_timeout:, max_number_of_messages: 10, max_delete_batch_size: 10, wait_time_seconds: 20, noop: false)
15
+ visibility_timeout:, max_number_of_messages: 10, wait_time_seconds: 20, noop: false)
16
16
  @region = region
17
17
  @url = url
18
18
  @access_key_id = access_key_id
19
19
  @secret_access_key = secret_access_key
20
20
  @visibility_timeout = visibility_timeout
21
21
  @max_number_of_messages = max_number_of_messages
22
- @max_delete_batch_size = max_delete_batch_size
23
22
  @wait_time_seconds = wait_time_seconds
24
23
  @noop = noop
25
24
  end
@@ -40,20 +39,21 @@ module Bricolage
40
39
  # High-Level Polling Interface
41
40
  #
42
41
 
43
- def main_handler_loop(handlers)
42
+ def main_handler_loop(handlers:, message_class:)
44
43
  trap_signals
45
44
 
46
45
  n_zero = 0
47
46
  until terminating?
48
47
  insert_handler_wait(n_zero)
49
- n_msg = handle_messages(handlers)
48
+ n_msg = handle_messages(handlers: handlers, message_class: message_class)
50
49
  if n_msg == 0
51
50
  n_zero += 1
52
51
  else
53
52
  n_zero = 0
54
53
  end
54
+ delete_message_buffer.flush
55
55
  end
56
- @delete_message_buffer.flush if @delete_message_buffer
56
+ delete_message_buffer.flush_force
57
57
  logger.info "shutdown gracefully"
58
58
  end
59
59
 
@@ -115,8 +115,6 @@ module Bricolage
115
115
  def receive_messages
116
116
  result = client.receive_message(
117
117
  queue_url: @url,
118
- attribute_names: ["All"],
119
- message_attribute_names: ["All"],
120
118
  max_number_of_messages: @max_number_of_messages,
121
119
  visibility_timeout: @visibility_timeout,
122
120
  wait_time_seconds: @wait_time_seconds
@@ -125,19 +123,18 @@ module Bricolage
125
123
  end
126
124
 
127
125
  def delete_message(msg)
128
- # TODO: use batch request?
129
126
  client.delete_message(
130
127
  queue_url: @url,
131
128
  receipt_handle: msg.receipt_handle
132
129
  )
133
130
  end
134
131
 
135
- def buffered_delete_message(msg)
132
+ def delete_message_async(msg)
136
133
  delete_message_buffer.put(msg)
137
134
  end
138
135
 
139
136
  def delete_message_buffer
140
- @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, @max_delete_batch_size, logger)
137
+ @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, logger)
141
138
  end
142
139
 
143
140
  def put(msg)
@@ -154,60 +151,107 @@ module Bricolage
154
151
 
155
152
  class DeleteMessageBuffer
156
153
 
157
- def initialize(sqs_client, url, max_buffer_size, logger)
154
+ BATCH_SIZE_MAX = 10 # SQS system limit
155
+ MAX_RETRY_COUNT = 3
156
+
157
+ def initialize(sqs_client, url, logger)
158
158
  @sqs_client = sqs_client
159
159
  @url = url
160
- @max_buffer_size = max_buffer_size
161
160
  @logger = logger
162
161
  @buf = {}
163
- @retry_counts = Hash.new(0)
164
162
  end
165
163
 
166
- MAX_RETRY_COUNT = 3
167
-
168
164
  def put(msg)
169
- @buf[SecureRandom.uuid] = msg
170
- flush if size >= @max_buffer_size
165
+ ent = Entry.new(msg)
166
+ @buf[ent.id] = ent
167
+ flush if full?
168
+ end
169
+
170
+ def empty?
171
+ @buf.empty?
172
+ end
173
+
174
+ def full?
175
+ @buf.size >= BATCH_SIZE_MAX
171
176
  end
172
177
 
173
178
  def size
174
179
  @buf.size
175
180
  end
176
181
 
177
- def flush
178
- return unless size > 0
179
- response = @sqs_client.delete_message_batch({
180
- queue_url: @url,
181
- entries: @buf.to_a.map {|item| {id: item[0], receipt_handle: item[1].receipt_handle} }
182
- })
183
- clear_successes(response.successful)
184
- retry_failures(response.failed)
185
- @logger.debug "DeleteMessageBatch executed: #{response.successful.size} succeeded, #{response.failed.size} failed."
182
+ # Flushes all delayed delete requests, including pending requests
183
+ def flush_force
184
+ # retry continues in only 2m, now+1h must be after than all @next_issue_time
185
+ flush(Time.now + 3600)
186
186
  end
187
187
 
188
- private
189
-
190
- def clear_successes(successes)
191
- successes.each do |s|
192
- @buf.delete s.id
188
+ def flush(now = Time.now)
189
+ entries = @buf.values.select {|ent| ent.issuable?(now) }
190
+ return if entries.empty?
191
+ @logger.info "flushing async delete requests"
192
+ entries.each_slice(BATCH_SIZE_MAX) do |ents|
193
+ res = @sqs_client.delete_message_batch(queue_url: @url, entries: ents.map(&:request_params))
194
+ @logger.info "DeleteMessageBatch executed: #{res.successful.size} succeeded, #{res.failed.size} failed"
195
+ issued_time = Time.now
196
+ res.successful.each do |s|
197
+ @buf.delete s.id
198
+ end
199
+ res.failed.each do |f|
200
+ ent = @buf[f.id]
201
+ unless ent
202
+ @logger.error "[BUG] no corrensponding DeleteMessageBuffer entry: id=#{f.id}"
203
+ next
204
+ end
205
+ ent.failed!(issued_time)
206
+ if ent.too_many_failure?
207
+ @logger.warn "DeleteMessage failure count exceeded the limit; give up: message_id=#{ent.message.message_id}, receipt_handle=#{ent.message.receipt_handle}"
208
+ @buf.delete f.id
209
+ next
210
+ end
211
+ @logger.info "DeleteMessageBatch partially failed (#{ent.n_failure} times): sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
212
+ end
193
213
  end
194
214
  end
195
215
 
196
- def retry_failures(failures)
197
- return unless failures.size > 0
198
- failures.each do |f|
199
- @logger.info "DeleteMessageBatch failed to retry for: id=#{f.id}, sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
216
+ class Entry
217
+ def initialize(msg)
218
+ @message = msg
219
+ @id = SecureRandom.uuid
220
+ @n_failure = 0
221
+ @last_issued_time = nil
222
+ @next_issue_time = nil
200
223
  end
201
- flush
202
- @buf.keys.map {|k| @retry_counts[k] += 1 }
203
- @retry_counts.select {|k, v| v >= MAX_RETRY_COUNT }.each do |k, v|
204
- @logger.warn "DeleteMessageBatch failed #{MAX_RETRY_COUNT} times for: message_id=#{@buf[k].message_id}, receipt_handle=#{@buf[k].receipt_handle}"
205
- @buf.delete k
206
- @retry_counts.delete k
224
+
225
+ attr_reader :id
226
+ attr_reader :message
227
+ attr_reader :n_failure
228
+
229
+ def issuable?(now)
230
+ @n_failure == 0 or now > @next_issue_time
231
+ end
232
+
233
+ def failed!(issued_time = Time.now)
234
+ @n_failure += 1
235
+ @last_issued_time = issued_time
236
+ @next_issue_time = @last_issued_time + next_retry_interval
237
+ end
238
+
239
+ def next_retry_interval
240
+ # 16s, 32s, 64s -> total 2m
241
+ 2 ** (3 + @n_failure)
242
+ end
243
+
244
+ def too_many_failure?
245
+ # (first request) + (3 retry requests) = (4 requests)
246
+ @n_failure > MAX_RETRY_COUNT
247
+ end
248
+
249
+ def request_params
250
+ { id: @id, receipt_handle: @message.receipt_handle }
207
251
  end
208
252
  end
209
253
 
210
- end # DeleteMessageBuffer
254
+ end # class DeleteMessageBuffer
211
255
 
212
256
  end # class SQSDataSource
213
257
 
@@ -0,0 +1,19 @@
1
+ module Bricolage
2
+ module StreamingLoad
3
+ class AlertingLogger
4
+ extend Forwardable
5
+
6
+ def initialize(logger: , sns_datasource: , alert_level: 'warn')
7
+ @logger = logger
8
+ @sns_logger = Bricolage::Logger.new(device: sns_datasource)
9
+ @sns_logger.level = Kernel.const_get("Logger").const_get(alert_level.upcase)
10
+ end
11
+
12
+ %w(log debug info warn error fatal unknown).each do |m|
13
+ define_method(m) do |*args|
14
+ [@logger, @sns_logger].map {|t| t.send(m, *args) }
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -5,6 +5,7 @@ require 'bricolage/logger'
5
5
  require 'bricolage/streamingload/event'
6
6
  require 'bricolage/streamingload/objectbuffer'
7
7
  require 'bricolage/streamingload/urlpatterns'
8
+ require 'bricolage/streamingload/alertinglogger'
8
9
  require 'aws-sdk'
9
10
  require 'yaml'
10
11
  require 'optparse'
@@ -29,10 +30,15 @@ module Bricolage
29
30
  ctx = Context.for_application('.', environment: opts.environment, logger: logger)
30
31
  event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds'))
31
32
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
33
+ alert_logger = AlertingLogger.new(
34
+ logger: ctx.logger,
35
+ sns_datasource: ctx.get_data_source('sns', config.fetch('sns-ds')),
36
+ alert_level: config.fetch('alert-level', 'warn')
37
+ )
32
38
 
33
39
  object_buffer = ObjectBuffer.new(
34
40
  control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
35
- logger: ctx.logger
41
+ logger: alert_logger
36
42
  )
37
43
 
38
44
  url_patterns = URLPatterns.for_config(config.fetch('url_patterns'))
@@ -43,7 +49,7 @@ module Bricolage
43
49
  object_buffer: object_buffer,
44
50
  url_patterns: url_patterns,
45
51
  dispatch_interval: 60,
46
- logger: ctx.logger
52
+ logger: alert_logger
47
53
  )
48
54
 
49
55
  Process.daemon(true) if opts.daemon?
@@ -84,17 +90,18 @@ module Bricolage
84
90
 
85
91
  def handle_shutdown(e)
86
92
  @event_queue.initiate_terminate
93
+ # Delete this event immediately
87
94
  @event_queue.delete_message(e)
88
95
  end
89
96
 
90
97
  def handle_data(e)
91
98
  unless e.created?
92
- @event_queue.delete_message(e)
99
+ @event_queue.delete_message_async(e)
93
100
  return
94
101
  end
95
102
  obj = e.loadable_object(@url_patterns)
96
103
  @object_buffer.put(obj)
97
- @event_queue.buffered_delete_message(e)
104
+ @event_queue.delete_message_async(e)
98
105
  end
99
106
 
100
107
  def handle_dispatch(e)
@@ -103,18 +110,13 @@ module Bricolage
103
110
  tasks.each {|task| @task_queue.put task }
104
111
  set_dispatch_timer
105
112
  end
113
+ # Delete this event immediately
106
114
  @event_queue.delete_message(e)
107
115
  end
108
116
 
109
117
  def set_dispatch_timer
110
- resp = @event_queue.send_message DispatchEvent.create(delay_seconds: @dispatch_interval)
111
- @dispatch_message_id = resp.message_id
112
- end
113
-
114
- def delete_events(events)
115
- events.each do |e|
116
- @event_queue.delete_message(e)
117
- end
118
+ res = @event_queue.send_message(DispatchEvent.create(delay_seconds: @dispatch_interval))
119
+ @dispatch_message_id = res.message_id
118
120
  end
119
121
 
120
122
  end
@@ -1,6 +1,7 @@
1
1
  require 'bricolage/sqsdatasource'
2
2
  require 'bricolage/streamingload/task'
3
3
  require 'bricolage/streamingload/loader'
4
+ require 'bricolage/streamingload/alertinglogger'
4
5
  require 'bricolage/logger'
5
6
  require 'bricolage/exception'
6
7
  require 'bricolage/version'
@@ -25,13 +26,18 @@ module Bricolage
25
26
  ctx = Context.for_application('.', environment: opts.environment, logger: logger)
26
27
  redshift_ds = ctx.get_data_source('sql', config.fetch('redshift-ds'))
27
28
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
29
+ alert_logger = AlertingLogger.new(
30
+ logger: ctx.logger,
31
+ sns_datasource: ctx.get_data_source('sns', config.fetch('sns-ds')),
32
+ alert_level: config.fetch('alert-level', 'warn')
33
+ )
28
34
 
29
35
  service = new(
30
36
  context: ctx,
31
37
  control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
32
38
  data_source: redshift_ds,
33
39
  task_queue: task_queue,
34
- logger: ctx.logger
40
+ logger: alert_logger
35
41
  )
36
42
 
37
43
  if opts.task_id
@@ -89,6 +95,7 @@ module Bricolage
89
95
  loadtask = load_task(task.id, force: task.force)
90
96
  return if loadtask.disabled # skip if disabled, but don't delete sqs msg
91
97
  execute_task(loadtask)
98
+ # Delete load task immediately (do not use async delete)
92
99
  @task_queue.delete_message(task)
93
100
  end
94
101
 
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.1.0'
3
+ VERSION = '0.3.0'
4
4
  end
5
5
  end
@@ -5,12 +5,13 @@ module Bricolage::StreamingLoad
5
5
 
6
6
  class TestEvent < Test::Unit::TestCase
7
7
 
8
- def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, region: nil, bucket: nil, key: nil, size: nil)
8
+ def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, source: nil, region: nil, bucket: nil, key: nil, size: nil)
9
9
  S3ObjectEvent.new(
10
10
  message_id: message_id,
11
11
  receipt_handle: receipt_handle,
12
12
  name: name,
13
13
  time: time,
14
+ source: source,
14
15
  region: region,
15
16
  bucket: bucket,
16
17
  key: key,
@@ -0,0 +1,113 @@
1
+ require 'test/unit'
2
+ require 'bricolage/streamingload/event'
3
+ require 'bricolage/logger'
4
+
5
+ module Bricolage
6
+
7
+ class TestSQSDataSource < Test::Unit::TestCase
8
+
9
+ def new_sqs_ds(mock_client = nil)
10
+ SQSDataSource.new(
11
+ url: 'http://sqs/000000000000/queue-name',
12
+ access_key_id: 'access_key_id_1',
13
+ secret_access_key: 'secret_access_key_1',
14
+ visibility_timeout: 30
15
+ ).tap {|ds|
16
+ logger = NullLogger.new
17
+ #logger = Bricolage::Logger.default
18
+ ds.__send__(:initialize_base, 'name', nil, logger)
19
+ ds.instance_variable_set(:@client, mock_client) if mock_client
20
+ }
21
+ end
22
+
23
+ class MockSQSClient
24
+ def initialize(&block)
25
+ @handler = block
26
+ end
27
+
28
+ def delete_message_batch(**args)
29
+ @handler.call(args)
30
+ end
31
+ end
32
+
33
+ class NullLogger
34
+ def debug(*args) end
35
+ def info(*args) end
36
+ def warn(*args) end
37
+ def error(*args) end
38
+ def exception(*args) end
39
+ def with_elapsed_time(*args) yield end
40
+ def elapsed_time(*args) yield end
41
+ end
42
+
43
+ def sqs_message(seq)
44
+ MockSQSMessage.new("message_id_#{seq}", "receipt_handle_#{seq}")
45
+ end
46
+
47
+ MockSQSMessage = Struct.new(:message_id, :receipt_handle)
48
+
49
+ class MockSQSResponse
50
+ def initialize(successful: [], failed: [])
51
+ @successful = successful
52
+ @failed = failed
53
+ end
54
+
55
+ attr_reader :successful
56
+ attr_reader :failed
57
+
58
+ Success = Struct.new(:id)
59
+ Failure = Struct.new(:id, :sender_fault, :code, :message)
60
+
61
+ def add_success_for(ent)
62
+ @successful.push Success.new(ent[:id])
63
+ end
64
+
65
+ def add_failure_for(ent)
66
+ @failed.push Failure.new(ent[:id], true, '400', 'some reason')
67
+ end
68
+ end
69
+
70
+ test "#delete_message_async" do
71
+ messages = [sqs_message(0), sqs_message(1), sqs_message(2)]
72
+ mock = MockSQSClient.new {|args|
73
+ entries = args[:entries]
74
+ if entries.size == 3
75
+ # first time
76
+ assert_equal messages[0].receipt_handle, entries[0][:receipt_handle]
77
+ assert_equal messages[1].receipt_handle, entries[1][:receipt_handle]
78
+ assert_equal messages[2].receipt_handle, entries[2][:receipt_handle]
79
+ MockSQSResponse.new.tap {|res|
80
+ res.add_success_for(entries[0])
81
+ res.add_failure_for(entries[1])
82
+ res.add_success_for(entries[2])
83
+ }
84
+ else
85
+ # second time
86
+ MockSQSResponse.new.tap {|res|
87
+ res.add_success_for(entries[0])
88
+ }
89
+ end
90
+ }
91
+ ds = new_sqs_ds(mock)
92
+ ds.delete_message_async(messages[0])
93
+ ds.delete_message_async(messages[1])
94
+ ds.delete_message_async(messages[2])
95
+
96
+ # first flush
97
+ flush_time = Time.now
98
+ ds.delete_message_buffer.flush(flush_time)
99
+ assert_equal 1, ds.delete_message_buffer.size
100
+ bufent = ds.delete_message_buffer.instance_variable_get(:@buf).values.first
101
+ assert_equal 'receipt_handle_1', bufent.message.receipt_handle
102
+ assert_equal 1, bufent.n_failure
103
+ assert_false bufent.issuable?(flush_time)
104
+ assert_true bufent.issuable?(flush_time + 180)
105
+
106
+ # second flush
107
+ ds.delete_message_buffer.flush(flush_time + 180)
108
+ assert_true ds.delete_message_buffer.empty?
109
+ end
110
+
111
+ end
112
+
113
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -107,8 +107,10 @@ files:
107
107
  - README.md
108
108
  - bin/bricolage-streaming-dispatcher
109
109
  - bin/bricolage-streaming-loader
110
+ - lib/bricolage/snsdatasource.rb
110
111
  - lib/bricolage/sqsdatasource.rb
111
112
  - lib/bricolage/sqswrapper.rb
113
+ - lib/bricolage/streamingload/alertinglogger.rb
112
114
  - lib/bricolage/streamingload/dispatcher.rb
113
115
  - lib/bricolage/streamingload/event.rb
114
116
  - lib/bricolage/streamingload/loader.rb
@@ -121,6 +123,7 @@ files:
121
123
  - lib/bricolage/streamingload/version.rb
122
124
  - test/all.rb
123
125
  - test/streamingload/test_event.rb
126
+ - test/test_sqsdatasource.rb
124
127
  homepage: https://github.com/aamine/bricolage-streamingload
125
128
  licenses:
126
129
  - MIT