bricolage-streamingload 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed2342a1e0f9db4cbe53fbb97be7b72dcd362def
4
- data.tar.gz: 1d1705ad9811bb4becee2de4168bb7f40f9a0b63
3
+ metadata.gz: d81ff86cb9addccb7ca9db4d240218679b1f72f9
4
+ data.tar.gz: f2ec045c994f1c6b619695f74a13aebcc9318722
5
5
  SHA512:
6
- metadata.gz: 9a09e1ec19569e03a13e01e8d2aa767de54bd21ac22cb455c9ccb92798a410f655f3435b4b0e7ff4e8bde429dcf2fd904a75f5f3d303f7bf815cfa0ca08b5631
7
- data.tar.gz: 3cfb076a0144c04592db35309236e2bfce5021525e34fe26b32e5f61ef000572d8d6641bdbee078f741a0f79a85d9ef9fd4cc47ce167338cec1ad4577c051bd2
6
+ metadata.gz: a78c3b8f35f8d10cbc3da301667ea811874ccadb7653b02388990d63afc2183ee3d0c4e357dccc7799721e1dcd8e33b88e9b3d66147903c32ff109595bced0f6
7
+ data.tar.gz: a3632f13d3ea039aa690deca646659e0d89ad636b2af55abf5f7011f54cfecddcef53d57152de513bcbaf5c03671af7a512a10781e7846efc60af9c752b8c364
data/README.md CHANGED
@@ -11,7 +11,7 @@ See LICENSES file for details.
11
11
 
12
12
  ## Running Test
13
13
 
14
- % rake test
14
+ % bundle exec rake test
15
15
 
16
16
  ## Author
17
17
 
@@ -0,0 +1,40 @@
1
+ require 'bricolage/datasource'
2
+ require 'aws-sdk'
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module Bricolage
7
+
8
+ class SNSTopicDataSource < DataSource
9
+
10
+ declare_type 'sns'
11
+
12
+ def initialize(region: 'ap-northeast-1', topic_arn:, access_key_id:, secret_access_key:)
13
+ @region = region
14
+ @topic_arn = topic_arn
15
+ @access_key_id = access_key_id
16
+ @secret_access_key = secret_access_key
17
+ @client = Aws::SNS::Client.new(region: region, access_key_id: access_key_id, secret_access_key: secret_access_key)
18
+ @topic = Aws::SNS::Topic.new(topic_arn, client: @client)
19
+ end
20
+
21
+ attr_reader :region
22
+ attr_reader :client, :topic
23
+
24
+ def publish(message)
25
+ @topic.publish(build_message(message))
26
+ end
27
+
28
+ alias write publish
29
+
30
+ def close
31
+ # do nothing
32
+ end
33
+
34
+ def build_message(message)
35
+ {message: message}
36
+ end
37
+
38
+ end # SNSDataSource
39
+
40
+ end # module Bricolage
@@ -12,14 +12,13 @@ module Bricolage
12
12
  declare_type 'sqs'
13
13
 
14
14
  def initialize(region: 'ap-northeast-1', url:, access_key_id:, secret_access_key:,
15
- visibility_timeout:, max_number_of_messages: 10, max_delete_batch_size: 10, wait_time_seconds: 20, noop: false)
15
+ visibility_timeout:, max_number_of_messages: 10, wait_time_seconds: 20, noop: false)
16
16
  @region = region
17
17
  @url = url
18
18
  @access_key_id = access_key_id
19
19
  @secret_access_key = secret_access_key
20
20
  @visibility_timeout = visibility_timeout
21
21
  @max_number_of_messages = max_number_of_messages
22
- @max_delete_batch_size = max_delete_batch_size
23
22
  @wait_time_seconds = wait_time_seconds
24
23
  @noop = noop
25
24
  end
@@ -40,20 +39,21 @@ module Bricolage
40
39
  # High-Level Polling Interface
41
40
  #
42
41
 
43
- def main_handler_loop(handlers)
42
+ def main_handler_loop(handlers:, message_class:)
44
43
  trap_signals
45
44
 
46
45
  n_zero = 0
47
46
  until terminating?
48
47
  insert_handler_wait(n_zero)
49
- n_msg = handle_messages(handlers)
48
+ n_msg = handle_messages(handlers: handlers, message_class: message_class)
50
49
  if n_msg == 0
51
50
  n_zero += 1
52
51
  else
53
52
  n_zero = 0
54
53
  end
54
+ delete_message_buffer.flush
55
55
  end
56
- @delete_message_buffer.flush if @delete_message_buffer
56
+ delete_message_buffer.flush_force
57
57
  logger.info "shutdown gracefully"
58
58
  end
59
59
 
@@ -115,8 +115,6 @@ module Bricolage
115
115
  def receive_messages
116
116
  result = client.receive_message(
117
117
  queue_url: @url,
118
- attribute_names: ["All"],
119
- message_attribute_names: ["All"],
120
118
  max_number_of_messages: @max_number_of_messages,
121
119
  visibility_timeout: @visibility_timeout,
122
120
  wait_time_seconds: @wait_time_seconds
@@ -125,19 +123,18 @@ module Bricolage
125
123
  end
126
124
 
127
125
  def delete_message(msg)
128
- # TODO: use batch request?
129
126
  client.delete_message(
130
127
  queue_url: @url,
131
128
  receipt_handle: msg.receipt_handle
132
129
  )
133
130
  end
134
131
 
135
- def buffered_delete_message(msg)
132
+ def delete_message_async(msg)
136
133
  delete_message_buffer.put(msg)
137
134
  end
138
135
 
139
136
  def delete_message_buffer
140
- @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, @max_delete_batch_size, logger)
137
+ @delete_message_buffer ||= DeleteMessageBuffer.new(client, @url, logger)
141
138
  end
142
139
 
143
140
  def put(msg)
@@ -154,60 +151,107 @@ module Bricolage
154
151
 
155
152
  class DeleteMessageBuffer
156
153
 
157
- def initialize(sqs_client, url, max_buffer_size, logger)
154
+ BATCH_SIZE_MAX = 10 # SQS system limit
155
+ MAX_RETRY_COUNT = 3
156
+
157
+ def initialize(sqs_client, url, logger)
158
158
  @sqs_client = sqs_client
159
159
  @url = url
160
- @max_buffer_size = max_buffer_size
161
160
  @logger = logger
162
161
  @buf = {}
163
- @retry_counts = Hash.new(0)
164
162
  end
165
163
 
166
- MAX_RETRY_COUNT = 3
167
-
168
164
  def put(msg)
169
- @buf[SecureRandom.uuid] = msg
170
- flush if size >= @max_buffer_size
165
+ ent = Entry.new(msg)
166
+ @buf[ent.id] = ent
167
+ flush if full?
168
+ end
169
+
170
+ def empty?
171
+ @buf.empty?
172
+ end
173
+
174
+ def full?
175
+ @buf.size >= BATCH_SIZE_MAX
171
176
  end
172
177
 
173
178
  def size
174
179
  @buf.size
175
180
  end
176
181
 
177
- def flush
178
- return unless size > 0
179
- response = @sqs_client.delete_message_batch({
180
- queue_url: @url,
181
- entries: @buf.to_a.map {|item| {id: item[0], receipt_handle: item[1].receipt_handle} }
182
- })
183
- clear_successes(response.successful)
184
- retry_failures(response.failed)
185
- @logger.debug "DeleteMessageBatch executed: #{response.successful.size} succeeded, #{response.failed.size} failed."
182
+ # Flushes all delayed delete requests, including pending requests
183
+ def flush_force
184
+ # retry continues in only 2m, now+1h must be after than all @next_issue_time
185
+ flush(Time.now + 3600)
186
186
  end
187
187
 
188
- private
189
-
190
- def clear_successes(successes)
191
- successes.each do |s|
192
- @buf.delete s.id
188
+ def flush(now = Time.now)
189
+ entries = @buf.values.select {|ent| ent.issuable?(now) }
190
+ return if entries.empty?
191
+ @logger.info "flushing async delete requests"
192
+ entries.each_slice(BATCH_SIZE_MAX) do |ents|
193
+ res = @sqs_client.delete_message_batch(queue_url: @url, entries: ents.map(&:request_params))
194
+ @logger.info "DeleteMessageBatch executed: #{res.successful.size} succeeded, #{res.failed.size} failed"
195
+ issued_time = Time.now
196
+ res.successful.each do |s|
197
+ @buf.delete s.id
198
+ end
199
+ res.failed.each do |f|
200
+ ent = @buf[f.id]
201
+ unless ent
202
+ @logger.error "[BUG] no corrensponding DeleteMessageBuffer entry: id=#{f.id}"
203
+ next
204
+ end
205
+ ent.failed!(issued_time)
206
+ if ent.too_many_failure?
207
+ @logger.warn "DeleteMessage failure count exceeded the limit; give up: message_id=#{ent.message.message_id}, receipt_handle=#{ent.message.receipt_handle}"
208
+ @buf.delete f.id
209
+ next
210
+ end
211
+ @logger.info "DeleteMessageBatch partially failed (#{ent.n_failure} times): sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
212
+ end
193
213
  end
194
214
  end
195
215
 
196
- def retry_failures(failures)
197
- return unless failures.size > 0
198
- failures.each do |f|
199
- @logger.info "DeleteMessageBatch failed to retry for: id=#{f.id}, sender_fault=#{f.sender_fault}, code=#{f.code}, message=#{f.message}"
216
+ class Entry
217
+ def initialize(msg)
218
+ @message = msg
219
+ @id = SecureRandom.uuid
220
+ @n_failure = 0
221
+ @last_issued_time = nil
222
+ @next_issue_time = nil
200
223
  end
201
- flush
202
- @buf.keys.map {|k| @retry_counts[k] += 1 }
203
- @retry_counts.select {|k, v| v >= MAX_RETRY_COUNT }.each do |k, v|
204
- @logger.warn "DeleteMessageBatch failed #{MAX_RETRY_COUNT} times for: message_id=#{@buf[k].message_id}, receipt_handle=#{@buf[k].receipt_handle}"
205
- @buf.delete k
206
- @retry_counts.delete k
224
+
225
+ attr_reader :id
226
+ attr_reader :message
227
+ attr_reader :n_failure
228
+
229
+ def issuable?(now)
230
+ @n_failure == 0 or now > @next_issue_time
231
+ end
232
+
233
+ def failed!(issued_time = Time.now)
234
+ @n_failure += 1
235
+ @last_issued_time = issued_time
236
+ @next_issue_time = @last_issued_time + next_retry_interval
237
+ end
238
+
239
+ def next_retry_interval
240
+ # 16s, 32s, 64s -> total 2m
241
+ 2 ** (3 + @n_failure)
242
+ end
243
+
244
+ def too_many_failure?
245
+ # (first request) + (3 retry requests) = (4 requests)
246
+ @n_failure > MAX_RETRY_COUNT
247
+ end
248
+
249
+ def request_params
250
+ { id: @id, receipt_handle: @message.receipt_handle }
207
251
  end
208
252
  end
209
253
 
210
- end # DeleteMessageBuffer
254
+ end # class DeleteMessageBuffer
211
255
 
212
256
  end # class SQSDataSource
213
257
 
@@ -0,0 +1,19 @@
1
+ module Bricolage
2
+ module StreamingLoad
3
+ class AlertingLogger
4
+ extend Forwardable
5
+
6
+ def initialize(logger: , sns_datasource: , alert_level: 'warn')
7
+ @logger = logger
8
+ @sns_logger = Bricolage::Logger.new(device: sns_datasource)
9
+ @sns_logger.level = Kernel.const_get("Logger").const_get(alert_level.upcase)
10
+ end
11
+
12
+ %w(log debug info warn error fatal unknown).each do |m|
13
+ define_method(m) do |*args|
14
+ [@logger, @sns_logger].map {|t| t.send(m, *args) }
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -5,6 +5,7 @@ require 'bricolage/logger'
5
5
  require 'bricolage/streamingload/event'
6
6
  require 'bricolage/streamingload/objectbuffer'
7
7
  require 'bricolage/streamingload/urlpatterns'
8
+ require 'bricolage/streamingload/alertinglogger'
8
9
  require 'aws-sdk'
9
10
  require 'yaml'
10
11
  require 'optparse'
@@ -29,10 +30,15 @@ module Bricolage
29
30
  ctx = Context.for_application('.', environment: opts.environment, logger: logger)
30
31
  event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds'))
31
32
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
33
+ alert_logger = AlertingLogger.new(
34
+ logger: ctx.logger,
35
+ sns_datasource: ctx.get_data_source('sns', config.fetch('sns-ds')),
36
+ alert_level: config.fetch('alert-level', 'warn')
37
+ )
32
38
 
33
39
  object_buffer = ObjectBuffer.new(
34
40
  control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
35
- logger: ctx.logger
41
+ logger: alert_logger
36
42
  )
37
43
 
38
44
  url_patterns = URLPatterns.for_config(config.fetch('url_patterns'))
@@ -43,7 +49,7 @@ module Bricolage
43
49
  object_buffer: object_buffer,
44
50
  url_patterns: url_patterns,
45
51
  dispatch_interval: 60,
46
- logger: ctx.logger
52
+ logger: alert_logger
47
53
  )
48
54
 
49
55
  Process.daemon(true) if opts.daemon?
@@ -84,17 +90,18 @@ module Bricolage
84
90
 
85
91
  def handle_shutdown(e)
86
92
  @event_queue.initiate_terminate
93
+ # Delete this event immediately
87
94
  @event_queue.delete_message(e)
88
95
  end
89
96
 
90
97
  def handle_data(e)
91
98
  unless e.created?
92
- @event_queue.delete_message(e)
99
+ @event_queue.delete_message_async(e)
93
100
  return
94
101
  end
95
102
  obj = e.loadable_object(@url_patterns)
96
103
  @object_buffer.put(obj)
97
- @event_queue.buffered_delete_message(e)
104
+ @event_queue.delete_message_async(e)
98
105
  end
99
106
 
100
107
  def handle_dispatch(e)
@@ -103,18 +110,13 @@ module Bricolage
103
110
  tasks.each {|task| @task_queue.put task }
104
111
  set_dispatch_timer
105
112
  end
113
+ # Delete this event immediately
106
114
  @event_queue.delete_message(e)
107
115
  end
108
116
 
109
117
  def set_dispatch_timer
110
- resp = @event_queue.send_message DispatchEvent.create(delay_seconds: @dispatch_interval)
111
- @dispatch_message_id = resp.message_id
112
- end
113
-
114
- def delete_events(events)
115
- events.each do |e|
116
- @event_queue.delete_message(e)
117
- end
118
+ res = @event_queue.send_message(DispatchEvent.create(delay_seconds: @dispatch_interval))
119
+ @dispatch_message_id = res.message_id
118
120
  end
119
121
 
120
122
  end
@@ -1,6 +1,7 @@
1
1
  require 'bricolage/sqsdatasource'
2
2
  require 'bricolage/streamingload/task'
3
3
  require 'bricolage/streamingload/loader'
4
+ require 'bricolage/streamingload/alertinglogger'
4
5
  require 'bricolage/logger'
5
6
  require 'bricolage/exception'
6
7
  require 'bricolage/version'
@@ -25,13 +26,18 @@ module Bricolage
25
26
  ctx = Context.for_application('.', environment: opts.environment, logger: logger)
26
27
  redshift_ds = ctx.get_data_source('sql', config.fetch('redshift-ds'))
27
28
  task_queue = ctx.get_data_source('sqs', config.fetch('task-queue-ds'))
29
+ alert_logger = AlertingLogger.new(
30
+ logger: ctx.logger,
31
+ sns_datasource: ctx.get_data_source('sns', config.fetch('sns-ds')),
32
+ alert_level: config.fetch('alert-level', 'warn')
33
+ )
28
34
 
29
35
  service = new(
30
36
  context: ctx,
31
37
  control_data_source: ctx.get_data_source('sql', config.fetch('ctl-postgres-ds')),
32
38
  data_source: redshift_ds,
33
39
  task_queue: task_queue,
34
- logger: ctx.logger
40
+ logger: alert_logger
35
41
  )
36
42
 
37
43
  if opts.task_id
@@ -89,6 +95,7 @@ module Bricolage
89
95
  loadtask = load_task(task.id, force: task.force)
90
96
  return if loadtask.disabled # skip if disabled, but don't delete sqs msg
91
97
  execute_task(loadtask)
98
+ # Delete load task immediately (do not use async delete)
92
99
  @task_queue.delete_message(task)
93
100
  end
94
101
 
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.1.0'
3
+ VERSION = '0.3.0'
4
4
  end
5
5
  end
@@ -5,12 +5,13 @@ module Bricolage::StreamingLoad
5
5
 
6
6
  class TestEvent < Test::Unit::TestCase
7
7
 
8
- def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, region: nil, bucket: nil, key: nil, size: nil)
8
+ def new_s3event(message_id: nil, receipt_handle: nil, name: nil, time: nil, source: nil, region: nil, bucket: nil, key: nil, size: nil)
9
9
  S3ObjectEvent.new(
10
10
  message_id: message_id,
11
11
  receipt_handle: receipt_handle,
12
12
  name: name,
13
13
  time: time,
14
+ source: source,
14
15
  region: region,
15
16
  bucket: bucket,
16
17
  key: key,
@@ -0,0 +1,113 @@
1
+ require 'test/unit'
2
+ require 'bricolage/streamingload/event'
3
+ require 'bricolage/logger'
4
+
5
+ module Bricolage
6
+
7
+ class TestSQSDataSource < Test::Unit::TestCase
8
+
9
+ def new_sqs_ds(mock_client = nil)
10
+ SQSDataSource.new(
11
+ url: 'http://sqs/000000000000/queue-name',
12
+ access_key_id: 'access_key_id_1',
13
+ secret_access_key: 'secret_access_key_1',
14
+ visibility_timeout: 30
15
+ ).tap {|ds|
16
+ logger = NullLogger.new
17
+ #logger = Bricolage::Logger.default
18
+ ds.__send__(:initialize_base, 'name', nil, logger)
19
+ ds.instance_variable_set(:@client, mock_client) if mock_client
20
+ }
21
+ end
22
+
23
+ class MockSQSClient
24
+ def initialize(&block)
25
+ @handler = block
26
+ end
27
+
28
+ def delete_message_batch(**args)
29
+ @handler.call(args)
30
+ end
31
+ end
32
+
33
+ class NullLogger
34
+ def debug(*args) end
35
+ def info(*args) end
36
+ def warn(*args) end
37
+ def error(*args) end
38
+ def exception(*args) end
39
+ def with_elapsed_time(*args) yield end
40
+ def elapsed_time(*args) yield end
41
+ end
42
+
43
+ def sqs_message(seq)
44
+ MockSQSMessage.new("message_id_#{seq}", "receipt_handle_#{seq}")
45
+ end
46
+
47
+ MockSQSMessage = Struct.new(:message_id, :receipt_handle)
48
+
49
+ class MockSQSResponse
50
+ def initialize(successful: [], failed: [])
51
+ @successful = successful
52
+ @failed = failed
53
+ end
54
+
55
+ attr_reader :successful
56
+ attr_reader :failed
57
+
58
+ Success = Struct.new(:id)
59
+ Failure = Struct.new(:id, :sender_fault, :code, :message)
60
+
61
+ def add_success_for(ent)
62
+ @successful.push Success.new(ent[:id])
63
+ end
64
+
65
+ def add_failure_for(ent)
66
+ @failed.push Failure.new(ent[:id], true, '400', 'some reason')
67
+ end
68
+ end
69
+
70
+ test "#delete_message_async" do
71
+ messages = [sqs_message(0), sqs_message(1), sqs_message(2)]
72
+ mock = MockSQSClient.new {|args|
73
+ entries = args[:entries]
74
+ if entries.size == 3
75
+ # first time
76
+ assert_equal messages[0].receipt_handle, entries[0][:receipt_handle]
77
+ assert_equal messages[1].receipt_handle, entries[1][:receipt_handle]
78
+ assert_equal messages[2].receipt_handle, entries[2][:receipt_handle]
79
+ MockSQSResponse.new.tap {|res|
80
+ res.add_success_for(entries[0])
81
+ res.add_failure_for(entries[1])
82
+ res.add_success_for(entries[2])
83
+ }
84
+ else
85
+ # second time
86
+ MockSQSResponse.new.tap {|res|
87
+ res.add_success_for(entries[0])
88
+ }
89
+ end
90
+ }
91
+ ds = new_sqs_ds(mock)
92
+ ds.delete_message_async(messages[0])
93
+ ds.delete_message_async(messages[1])
94
+ ds.delete_message_async(messages[2])
95
+
96
+ # first flush
97
+ flush_time = Time.now
98
+ ds.delete_message_buffer.flush(flush_time)
99
+ assert_equal 1, ds.delete_message_buffer.size
100
+ bufent = ds.delete_message_buffer.instance_variable_get(:@buf).values.first
101
+ assert_equal 'receipt_handle_1', bufent.message.receipt_handle
102
+ assert_equal 1, bufent.n_failure
103
+ assert_false bufent.issuable?(flush_time)
104
+ assert_true bufent.issuable?(flush_time + 180)
105
+
106
+ # second flush
107
+ ds.delete_message_buffer.flush(flush_time + 180)
108
+ assert_true ds.delete_message_buffer.empty?
109
+ end
110
+
111
+ end
112
+
113
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage-streamingload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -107,8 +107,10 @@ files:
107
107
  - README.md
108
108
  - bin/bricolage-streaming-dispatcher
109
109
  - bin/bricolage-streaming-loader
110
+ - lib/bricolage/snsdatasource.rb
110
111
  - lib/bricolage/sqsdatasource.rb
111
112
  - lib/bricolage/sqswrapper.rb
113
+ - lib/bricolage/streamingload/alertinglogger.rb
112
114
  - lib/bricolage/streamingload/dispatcher.rb
113
115
  - lib/bricolage/streamingload/event.rb
114
116
  - lib/bricolage/streamingload/loader.rb
@@ -121,6 +123,7 @@ files:
121
123
  - lib/bricolage/streamingload/version.rb
122
124
  - test/all.rb
123
125
  - test/streamingload/test_event.rb
126
+ - test/test_sqsdatasource.rb
124
127
  homepage: https://github.com/aamine/bricolage-streamingload
125
128
  licenses:
126
129
  - MIT