nulogy_message_bus_consumer 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c47fb7fab95cd7d64e9654c0ce43fec64f77f8718331f8028afc6c18ab993f81
4
- data.tar.gz: 3c656a7dea18d149f04ccfbc3dc4db04e9aa7b3cd6183be7b0a1a45337c3a975
3
+ metadata.gz: c7a2f6f3118c35e9f6b0f4cfebd87b96d0f17fbd391130953b560d61d642f73e
4
+ data.tar.gz: 6ff5ee3e40fd44c96592fdba4c78b47b3c87aa4c16f753ef8e9acbf5af1b429e
5
5
  SHA512:
6
- metadata.gz: 04320f62a752bcfcc4bad93bd0611ba0f8945018dfb1680016ab739b70ba66cb7ebcf5062fb12f316745e59386714613c44f17f7feea4006a56b8625da119e23
7
- data.tar.gz: c480ced5fb50feeb8d655cb8c0b45c19a12bd733088db6fe4bf78ce33d08a7051594eb86a011227d2cc1363ea058e1544435c318824a6c9549be2e32fb6f65ef
6
+ metadata.gz: 80ee9d751672ab78511a130e22926f0d9f2e6abeb5ed5447baa5866180a75387bad43b7e4b292d300a2853176d6ed24df701b268c43c6c4495d3397fb5b62415
7
+ data.tar.gz: 5aae3dc5540949b92e086c39b038dab2d2c2959535c777557279e70023e0ce76a6585881dfb293783aa55ebd9c1910b897f1f609ff257526bd7ebb4a8f393a18
data/Rakefile CHANGED
@@ -21,9 +21,8 @@ load "rails/tasks/statistics.rake"
21
21
  require "rspec/core"
22
22
  require "rspec/core/rake_task"
23
23
  RSpec::Core::RakeTask.new(:spec)
24
- require "rubocop/rake_task"
25
- RuboCop::RakeTask.new
26
- task default: %i[spec rubocop]
24
+ require "standard/rake"
25
+ task default: %i[spec standard]
27
26
 
28
27
  require "rake/release"
29
28
 
@@ -1,10 +1,14 @@
1
+ require "active_record/railtie"
2
+ require "active_support/core_ext/time/zones"
1
3
  require "rdkafka"
2
4
 
3
5
  require "nulogy_message_bus_consumer/engine"
4
-
6
+ require "nulogy_message_bus_consumer/clock"
5
7
  require "nulogy_message_bus_consumer/config"
8
+ require "nulogy_message_bus_consumer/deployment/ecs"
6
9
  require "nulogy_message_bus_consumer/handlers/log_unprocessed_messages"
7
10
  require "nulogy_message_bus_consumer/kafka_utils"
11
+ require "nulogy_message_bus_consumer/lag_tracker"
8
12
  require "nulogy_message_bus_consumer/message"
9
13
  require "nulogy_message_bus_consumer/null_logger"
10
14
  require "nulogy_message_bus_consumer/pipeline"
@@ -12,11 +16,12 @@ require "nulogy_message_bus_consumer/processed_message"
12
16
  require "nulogy_message_bus_consumer/steps/commit_on_success"
13
17
  require "nulogy_message_bus_consumer/steps/connect_to_message_bus"
14
18
  require "nulogy_message_bus_consumer/steps/deduplicate_messages"
19
+ require "nulogy_message_bus_consumer/steps/log_consumer_lag"
15
20
  require "nulogy_message_bus_consumer/steps/log_messages"
16
- require "nulogy_message_bus_consumer/steps/monitor_replication_lag"
17
21
  require "nulogy_message_bus_consumer/steps/seek_beginning_of_topic"
18
22
  require "nulogy_message_bus_consumer/steps/stream_messages"
19
23
  require "nulogy_message_bus_consumer/steps/stream_messages_until_none_are_left"
24
+ require "nulogy_message_bus_consumer/steps/supervise_consumer_lag"
20
25
 
21
26
  module NulogyMessageBusConsumer
22
27
  module_function
@@ -40,14 +45,21 @@ module NulogyMessageBusConsumer
40
45
 
41
46
  def recommended_consumer_pipeline(config: self.config, logger: self.logger)
42
47
  Pipeline.new([
43
- # The first three are really system processing steps
48
+ # System processing/health steps.
49
+ # Note: that since they are before `StreamMessages`, they will only
50
+ # be called once, without any messages.
44
51
  Steps::ConnectToMessageBus.new(config, logger),
45
- Steps::MonitorReplicationLag.new(logger),
52
+ Steps::LogConsumerLag.new(logger),
53
+ Steps::SuperviseConsumerLag.new(
54
+ logger,
55
+ check_interval_seconds: config.lag_check_interval_seconds,
56
+ tracker: LagTracker.new(failing_checks: config.lag_checks)
57
+ ),
46
58
  Steps::StreamMessages.new(logger),
47
59
  # Message processing steps start here.
48
60
  Steps::LogMessages.new(logger),
49
61
  Steps::CommitOnSuccess.new,
50
- Steps::DeduplicateMessages.new(logger),
62
+ Steps::DeduplicateMessages.new(logger)
51
63
  ])
52
64
  end
53
65
 
@@ -56,7 +68,7 @@ module NulogyMessageBusConsumer
56
68
  Steps::ConnectToMessageBus.new(config, logger),
57
69
  Steps::SeekBeginningOfTopic.new,
58
70
  Steps::StreamMessagesUntilNoneAreLeft.new(logger),
59
- Handlers::LogUnprocessedMessages.new(logger),
71
+ Handlers::LogUnprocessedMessages.new(logger)
60
72
  ])
61
73
  end
62
74
  end
@@ -0,0 +1,13 @@
1
+ module NulogyMessageBusConsumer
2
+ # Note: Since this calls Time.zone, it is NOt thread-safe
3
+ class Clock
4
+ def now
5
+ Time.zone.now.to_datetime
6
+ end
7
+
8
+ # milliseconds since epoch
9
+ def ms
10
+ now.strftime("%Q").to_i
11
+ end
12
+ end
13
+ end
@@ -1,11 +1,19 @@
1
1
  module NulogyMessageBusConsumer
2
2
  class Config
3
- attr_accessor :consumer_group_id
4
- attr_accessor :bootstrap_servers
5
- attr_accessor :topic_name
3
+ attr_accessor :bootstrap_servers,
4
+ :client_id,
5
+ :consumer_group_id,
6
+ :lag_check_interval_seconds,
7
+ :lag_checks,
8
+ :topic_name
6
9
 
7
10
  def initialize(options = {})
8
- update(options)
11
+ defaults = {
12
+ lag_check_interval_seconds: 20,
13
+ lag_checks: 6
14
+ }
15
+
16
+ update(defaults.merge(options))
9
17
  end
10
18
 
11
19
  def update(options = {})
@@ -0,0 +1,23 @@
1
+ module NulogyMessageBusConsumer
2
+ module Deployment
3
+ module ECS
4
+ module_function
5
+
6
+ # Try to get the TaskID from metadata server:
7
+ # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html
8
+ # Otherwise, return nil
9
+ def task_id
10
+ data = `curl --silent "$ECS_CONTAINER_METADATA_URI_V4/task"`
11
+
12
+ return if data.empty?
13
+
14
+ json = JSON.parse(data)
15
+ arn = json["TaskARN"]
16
+
17
+ return unless arn
18
+
19
+ arn.split("/").last
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,53 @@
1
+ require "set"
2
+
3
+ module NulogyMessageBusConsumer
4
+ # Keeps track of how many times a topic's partition has not changed (non-zero) lag between update calls.
5
+ class LagTracker
6
+ attr_reader :failing_checks
7
+
8
+ def initialize(failing_checks: 3)
9
+ @failing_checks = failing_checks
10
+ @tracked = Hash.new { |h, topic| h[topic] = {} }
11
+ @failed = Hash.new { |h, topic| h[topic] = Set.new }
12
+ end
13
+
14
+ def update(topic_partitions)
15
+ topic_partitions.each_pair do |topic, partitions|
16
+ partitions.each_pair do |partition, value|
17
+ update_topic_partition(topic, partition, value)
18
+ end
19
+ end
20
+ end
21
+
22
+ def failing?
23
+ @failed.any?
24
+ end
25
+
26
+ def failed
27
+ @failed.transform_values { |v| v.to_a.sort }
28
+ end
29
+
30
+ private
31
+
32
+ def update_topic_partition(topic, partition, value)
33
+ current_value, count = @tracked.dig(topic, partition)
34
+
35
+ new_value, new_count =
36
+ if current_value == value && !value.zero?
37
+ [current_value, count + 1]
38
+ else
39
+ [value, 0]
40
+ end
41
+
42
+ @tracked[topic][partition] = [new_value, new_count]
43
+
44
+ if new_count >= @failing_checks
45
+ @failed[topic] << partition
46
+ end
47
+ end
48
+
49
+ def exists?(topic, partition)
50
+ @tracked.dig(topic, partition)
51
+ end
52
+ end
53
+ end
@@ -21,7 +21,7 @@ module NulogyMessageBusConsumer
21
21
  event_data =
22
22
  begin
23
23
  JSON.parse(envelope_data[:event_json], symbolize_names: true)
24
- rescue StandardError
24
+ rescue
25
25
  {}
26
26
  end
27
27
 
@@ -1,9 +1,12 @@
1
1
  module NulogyMessageBusConsumer
2
2
  class NullLogger
3
- def info(*_) end
3
+ def info(*_)
4
+ end
4
5
 
5
- def error(*_) end
6
+ def error(*_)
7
+ end
6
8
 
7
- def warn(*_) end
9
+ def warn(*_)
10
+ end
8
11
  end
9
12
  end
@@ -34,7 +34,7 @@ module NulogyMessageBusConsumer
34
34
  lambda do |**yielded_args|
35
35
  args_to_be_overridden = existing_args.keys & yielded_args.keys
36
36
  if args_to_be_overridden.any?
37
- raise "Cannot override existing argument(s): #{args_to_be_overridden.join(', ')}"
37
+ raise "Cannot override existing argument(s): #{args_to_be_overridden.join(", ")}"
38
38
  end
39
39
 
40
40
  func.call(**existing_args.merge(yielded_args))
@@ -9,31 +9,44 @@ module NulogyMessageBusConsumer
9
9
 
10
10
  def call(**_)
11
11
  @logger.info("Connecting to the MessageBus")
12
- consumer = build_consumer
13
12
  @logger.info("Using consumer group id: #{@config.consumer_group_id}")
14
13
 
15
- consumer.subscribe(@config.topic_name)
16
- @logger.info("Listening for kafka messages on topic #{@config.topic_name}")
14
+ subscribe
15
+
16
+ trap("TERM") { kafka_consumer.close }
17
17
 
18
- trap("TERM") { consumer.close }
18
+ wait_for_assignment
19
19
 
20
- KafkaUtils.wait_for_assignment(consumer)
21
- yield(kafka_consumer: consumer)
20
+ yield(kafka_consumer: kafka_consumer)
22
21
  end
23
22
 
24
23
  private
25
24
 
26
- def build_consumer
27
- @kafka_consumer || Rdkafka::Config.new(consumer_config).consumer
25
+ def kafka_consumer
26
+ @kafka_consumer ||= Rdkafka::Config.new(consumer_config).consumer
28
27
  end
29
28
 
30
29
  def consumer_config
31
- {
30
+ config = {
32
31
  "bootstrap.servers": @config.bootstrap_servers,
33
32
  "enable.auto.commit": false,
34
33
  "group.id": @config.consumer_group_id,
35
- "enable.auto.offset.store": false,
34
+ "enable.auto.offset.store": false
36
35
  }
36
+
37
+ config["client.id"] = @config.client_id if @config.client_id
38
+
39
+ config
40
+ end
41
+
42
+ def subscribe
43
+ kafka_consumer.subscribe(@config.topic_name)
44
+ @logger.info("Listening for kafka messages on topic #{@config.topic_name}")
45
+ end
46
+
47
+ def wait_for_assignment
48
+ KafkaUtils.wait_for_assignment(kafka_consumer)
49
+ @logger.info("Connected as client: #{kafka_consumer.member_id}")
37
50
  end
38
51
  end
39
52
  end
@@ -41,7 +41,7 @@ module NulogyMessageBusConsumer
41
41
  def log_duplicate(message)
42
42
  @logger.warn(JSON.dump({
43
43
  event: "duplicate_message_detected",
44
- kafka_message_id: message.id,
44
+ kafka_message_id: message.id
45
45
  }))
46
46
  end
47
47
  end
@@ -1,6 +1,6 @@
1
1
  module NulogyMessageBusConsumer
2
2
  module Steps
3
- class MonitorReplicationLag
3
+ class LogConsumerLag
4
4
  def initialize(logger)
5
5
  @logger = logger
6
6
  end
@@ -22,9 +22,9 @@ module NulogyMessageBusConsumer
22
22
 
23
23
  @logger.info(JSON.dump({
24
24
  event: "consumer_lag",
25
- topics: Calculator.add_max_lag(lag_per_topic),
25
+ topics: Calculator.add_max_lag(lag_per_topic)
26
26
  }))
27
- STDOUT.flush
27
+ $stdout.flush
28
28
 
29
29
  sleep 60
30
30
  end
@@ -1,12 +1,5 @@
1
1
  module NulogyMessageBusConsumer
2
2
  module Steps
3
- class Clock
4
- # milliseconds since epoch
5
- def now
6
- Time.zone.now.to_datetime.strftime("%Q").to_i
7
- end
8
- end
9
-
10
3
  class LogMessages
11
4
  def initialize(logger, clock: Clock.new)
12
5
  @logger = logger
@@ -17,18 +10,18 @@ module NulogyMessageBusConsumer
17
10
  @logger.info(JSON.dump({
18
11
  event: "message_received",
19
12
  kafka_message_id: message.id,
20
- message: "Received #{message.id}",
13
+ message: "Received #{message.id}"
21
14
  }))
22
15
 
23
16
  result = yield
24
17
 
25
- millis = diff_millis(message.created_at, @clock.now)
18
+ millis = diff_millis(message.created_at, @clock.ms)
26
19
  @logger.info(JSON.dump({
27
20
  event: "message_processed",
28
21
  kafka_message_id: message.id,
29
- message: "Processed #{message.id}",
22
+ message: "Processed #{message.id} (#{message.topic}##{message.partition}@#{message.offset})",
30
23
  result: result,
31
- time_to_processed: millis,
24
+ time_to_processed: millis
32
25
  }))
33
26
 
34
27
  result
@@ -12,11 +12,11 @@ module NulogyMessageBusConsumer
12
12
  kafka_message: kafka_message
13
13
  )
14
14
  end
15
- rescue StandardError => e
15
+ rescue => e
16
16
  @logger.error(JSON.dump({
17
17
  event: "message_processing_errored",
18
18
  class: e.class,
19
- message: e.message,
19
+ message: e.message
20
20
  }))
21
21
 
22
22
  raise
@@ -12,11 +12,11 @@ module NulogyMessageBusConsumer
12
12
  kafka_message: kafka_message
13
13
  )
14
14
  end
15
- rescue StandardError => e
15
+ rescue => e
16
16
  @logger.error(JSON.dump({
17
17
  event: "message_processing_errored",
18
18
  class: e.class,
19
- message: e.message,
19
+ message: e.message
20
20
  }))
21
21
 
22
22
  raise
@@ -0,0 +1,76 @@
1
+ module NulogyMessageBusConsumer
2
+ module Steps
3
+ # Supervises the consumer's lag.
4
+ #
5
+ # If a partition's lag is non-zero and does not change for an extended period
6
+ # of time, then kill the main thread.
7
+ #
8
+ # That period of time is check_interval_seconds * LagTracker#failing_checks
9
+ # With the defaults, that would be 20 * 6 ~ 120 seconds = 2 minutes.
10
+ #
11
+ # Note that this strategy may not work for a busy integration.
12
+ # Consumer lag monitoring should alert in that case.
13
+ # However, this strategy may help alleviate alerts for low traffic or off-peak
14
+ # environments.
15
+ #
16
+ # We've come across cases where the consumer lag is still being logged,
17
+ # messages are being processed, but the consumer is not consuming messages
18
+ # in particular topics.
19
+ #
20
+ # Killing the main thread causes ECS to restart the task.
21
+ class SuperviseConsumerLag
22
+ def initialize(logger, tracker: NulogyMessageBusConsumer::LagTracker.new(failing_checks: 6), killable: nil, check_interval_seconds: 20)
23
+ @logger = logger
24
+ @tracker = tracker
25
+ @killable = killable
26
+ @check_interval_seconds = check_interval_seconds
27
+ end
28
+
29
+ def call(kafka_consumer:, **_)
30
+ @consumer = kafka_consumer
31
+ @killable ||= Thread.current
32
+
33
+ run
34
+
35
+ yield
36
+ end
37
+
38
+ private
39
+
40
+ def run
41
+ Thread.abort_on_exception = true
42
+
43
+ Thread.new do
44
+ NulogyMessageBusConsumer::KafkaUtils.wait_for_assignment(@consumer)
45
+
46
+ loop do
47
+ @tracker.update(@consumer.lag(@consumer.committed))
48
+
49
+ if @tracker.failing?
50
+ log_failed_partitions
51
+
52
+ @killable.kill
53
+ Thread.current.exit
54
+ end
55
+
56
+ sleep @check_interval_seconds
57
+ end
58
+ end
59
+ end
60
+
61
+ def log_failed_partitions
62
+ seconds = @check_interval_seconds * @tracker.failing_checks
63
+ failed = @tracker
64
+ .failed
65
+ .map { |topic, partitions| "#{topic}: #{partitions.join(",")}" }
66
+ .join(", ")
67
+
68
+ @logger.warn(JSON.dump({
69
+ event: "message_processing_warning",
70
+ message: "Assigned partition lag has not changed in #{seconds} seconds: #{failed}"
71
+ }))
72
+ $stdout.flush
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,3 +1,3 @@
1
1
  module NulogyMessageBusConsumer
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nulogy_message_bus_consumer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nulogy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-02 00:00:00.000000000 Z
11
+ date: 2021-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - '='
144
144
  - !ruby/object:Gem::Version
145
- version: 1.2.1
145
+ version: 1.3.0
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - '='
151
151
  - !ruby/object:Gem::Version
152
- version: 1.2.1
152
+ version: 1.3.0
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: rspec
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -193,19 +193,19 @@ dependencies:
193
193
  - !ruby/object:Gem::Version
194
194
  version: 4.0.1
195
195
  - !ruby/object:Gem::Dependency
196
- name: rubocop
196
+ name: standard
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - '='
200
200
  - !ruby/object:Gem::Version
201
- version: 0.81.0
201
+ version: 0.11.0
202
202
  type: :development
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - '='
207
207
  - !ruby/object:Gem::Version
208
- version: 0.81.0
208
+ version: 0.11.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: rubocop-rails
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -234,7 +234,7 @@ dependencies:
234
234
  - - '='
235
235
  - !ruby/object:Gem::Version
236
236
  version: 1.38.1
237
- description:
237
+ description:
238
238
  email:
239
239
  - tass@nulogy.com
240
240
  executables: []
@@ -247,10 +247,13 @@ files:
247
247
  - config/routes.rb
248
248
  - db/migrate/20200509095105_create_message_bus_processed_messages.rb
249
249
  - lib/nulogy_message_bus_consumer.rb
250
+ - lib/nulogy_message_bus_consumer/clock.rb
250
251
  - lib/nulogy_message_bus_consumer/config.rb
252
+ - lib/nulogy_message_bus_consumer/deployment/ecs.rb
251
253
  - lib/nulogy_message_bus_consumer/engine.rb
252
254
  - lib/nulogy_message_bus_consumer/handlers/log_unprocessed_messages.rb
253
255
  - lib/nulogy_message_bus_consumer/kafka_utils.rb
256
+ - lib/nulogy_message_bus_consumer/lag_tracker.rb
254
257
  - lib/nulogy_message_bus_consumer/message.rb
255
258
  - lib/nulogy_message_bus_consumer/null_logger.rb
256
259
  - lib/nulogy_message_bus_consumer/pipeline.rb
@@ -258,18 +261,19 @@ files:
258
261
  - lib/nulogy_message_bus_consumer/steps/commit_on_success.rb
259
262
  - lib/nulogy_message_bus_consumer/steps/connect_to_message_bus.rb
260
263
  - lib/nulogy_message_bus_consumer/steps/deduplicate_messages.rb
264
+ - lib/nulogy_message_bus_consumer/steps/log_consumer_lag.rb
261
265
  - lib/nulogy_message_bus_consumer/steps/log_messages.rb
262
- - lib/nulogy_message_bus_consumer/steps/monitor_replication_lag.rb
263
266
  - lib/nulogy_message_bus_consumer/steps/seek_beginning_of_topic.rb
264
267
  - lib/nulogy_message_bus_consumer/steps/stream_messages.rb
265
268
  - lib/nulogy_message_bus_consumer/steps/stream_messages_until_none_are_left.rb
269
+ - lib/nulogy_message_bus_consumer/steps/supervise_consumer_lag.rb
266
270
  - lib/nulogy_message_bus_consumer/version.rb
267
271
  - lib/tasks/engine/message_bus_consumer.rake
268
272
  homepage: https://github.com/nulogy/message-bus/tree/master/gems/nulogy_message_bus_consumer
269
273
  licenses: []
270
274
  metadata:
271
275
  allowed_push_host: https://rubygems.org/
272
- post_install_message:
276
+ post_install_message:
273
277
  rdoc_options: []
274
278
  require_paths:
275
279
  - lib
@@ -285,7 +289,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
285
289
  version: '0'
286
290
  requirements: []
287
291
  rubygems_version: 3.0.3
288
- signing_key:
292
+ signing_key:
289
293
  specification_version: 4
290
294
  summary: Code for accessing the Nulogy Message Bus
291
295
  test_files: []