waterdrop 2.0.7 → 2.6.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/FUNDING.yml +1 -0
  4. data/.github/workflows/ci.yml +22 -11
  5. data/.ruby-version +1 -1
  6. data/CHANGELOG.md +200 -0
  7. data/Gemfile +0 -2
  8. data/Gemfile.lock +32 -75
  9. data/README.md +22 -275
  10. data/certs/cert_chain.pem +26 -0
  11. data/config/locales/errors.yml +33 -0
  12. data/docker-compose.yml +19 -12
  13. data/lib/waterdrop/clients/buffered.rb +90 -0
  14. data/lib/waterdrop/clients/dummy.rb +69 -0
  15. data/lib/waterdrop/clients/rdkafka.rb +34 -0
  16. data/lib/{water_drop → waterdrop}/config.rb +39 -16
  17. data/lib/waterdrop/contracts/config.rb +43 -0
  18. data/lib/waterdrop/contracts/message.rb +64 -0
  19. data/lib/{water_drop → waterdrop}/errors.rb +14 -7
  20. data/lib/waterdrop/instrumentation/callbacks/delivery.rb +102 -0
  21. data/lib/{water_drop → waterdrop}/instrumentation/callbacks/error.rb +6 -2
  22. data/lib/{water_drop → waterdrop}/instrumentation/callbacks/statistics.rb +1 -1
  23. data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +66 -21
  24. data/lib/waterdrop/instrumentation/monitor.rb +20 -0
  25. data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +12 -14
  26. data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
  27. data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +210 -0
  28. data/lib/waterdrop/middleware.rb +50 -0
  29. data/lib/{water_drop → waterdrop}/producer/async.rb +40 -4
  30. data/lib/{water_drop → waterdrop}/producer/buffer.rb +12 -30
  31. data/lib/{water_drop → waterdrop}/producer/builder.rb +6 -11
  32. data/lib/{water_drop → waterdrop}/producer/sync.rb +44 -15
  33. data/lib/waterdrop/producer/transactions.rb +170 -0
  34. data/lib/waterdrop/producer.rb +308 -0
  35. data/lib/{water_drop → waterdrop}/version.rb +1 -1
  36. data/lib/waterdrop.rb +28 -2
  37. data/renovate.json +6 -0
  38. data/waterdrop.gemspec +14 -11
  39. data.tar.gz.sig +0 -0
  40. metadata +71 -111
  41. metadata.gz.sig +0 -0
  42. data/certs/mensfeld.pem +0 -25
  43. data/config/errors.yml +0 -6
  44. data/lib/water_drop/contracts/config.rb +0 -26
  45. data/lib/water_drop/contracts/message.rb +0 -42
  46. data/lib/water_drop/instrumentation/callbacks/delivery.rb +0 -30
  47. data/lib/water_drop/instrumentation/callbacks/statistics_decorator.rb +0 -77
  48. data/lib/water_drop/instrumentation/callbacks_manager.rb +0 -39
  49. data/lib/water_drop/instrumentation.rb +0 -20
  50. data/lib/water_drop/patches/rdkafka/bindings.rb +0 -42
  51. data/lib/water_drop/patches/rdkafka/producer.rb +0 -20
  52. data/lib/water_drop/producer/dummy_client.rb +0 -32
  53. data/lib/water_drop/producer.rb +0 -162
  54. data/lib/water_drop.rb +0 -36
  55. /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
  56. /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -5,11 +5,14 @@
5
5
  module WaterDrop
6
6
  # Configuration object for setting up all options required by WaterDrop
7
7
  class Config
8
- include Dry::Configurable
8
+ include ::Karafka::Core::Configurable
9
9
 
10
10
  # Defaults for kafka settings, that will be overwritten only if not present already
11
11
  KAFKA_DEFAULTS = {
12
- 'client.id' => 'waterdrop'
12
+ 'client.id': 'waterdrop',
13
+ # emit librdkafka statistics every five seconds. This is used in instrumentation.
14
+ # When disabled, part of metrics will not be published and available.
15
+ 'statistics.interval.ms': 5_000
13
16
  }.freeze
14
17
 
15
18
  private_constant :KAFKA_DEFAULTS
@@ -22,7 +25,7 @@ module WaterDrop
22
25
  setting(
23
26
  :id,
24
27
  default: false,
25
- constructor: ->(id) { id || SecureRandom.uuid }
28
+ constructor: ->(id) { id || SecureRandom.hex(6) }
26
29
  )
27
30
  # option [Instance] logger that we want to use
28
31
  # @note Due to how rdkafka works, this setting is global for all the producers
@@ -47,13 +50,41 @@ module WaterDrop
47
50
  # delivery report. In a really robust systems, this describes the min-delivery time
48
51
  # for a single sync message when produced in isolation
49
52
  setting :wait_timeout, default: 0.005 # 5 milliseconds
53
+ # option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
54
+ # we raise an exception.
55
+ # When this is set to `true`, upon full queue, we won't raise an error. There will be error
56
+ # in the `error.occurred` notification pipeline with a proper type as while this is
57
+ # recoverable, in a high number it still may mean issues.
58
+ # Waiting is one of the recommended strategies.
59
+ setting :wait_on_queue_full, default: true
60
+ # option [Integer] how long (in seconds) should we backoff before a retry when queue is full
61
+ # The retry will happen with the same message and backoff should give us some time to
62
+ # dispatch previously buffered messages.
63
+ setting :wait_backoff_on_queue_full, default: 0.1
64
+ # option [Numeric] how many seconds should we wait with the backoff on queue having space for
65
+ # more messages before re-raising the error.
66
+ setting :wait_timeout_on_queue_full, default: 10
67
+ # option [Numeric] How long to wait before retrying a retryable transaction related error
68
+ setting :wait_backoff_on_transaction_command, default: 0.5
69
+ # option [Numeric] How many times to retry a retryable transaction related error before
70
+ # giving up
71
+ setting :max_attempts_on_transaction_command, default: 5
72
+
50
73
  # option [Boolean] should we send messages. Setting this to false can be really useful when
51
74
  # testing and or developing because when set to false, won't actually ping Kafka but will
52
75
  # run all the validations, etc
53
76
  setting :deliver, default: true
77
+ # option [Class] class for usage when creating the underlying client used to dispatch messages
78
+ setting :client_class, default: Clients::Rdkafka
54
79
  # rdkafka options
55
80
  # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
56
81
  setting :kafka, default: {}
82
+ # Middleware chain that can be expanded with useful middleware steps
83
+ setting(
84
+ :middleware,
85
+ default: false,
86
+ constructor: ->(middleware) { middleware || WaterDrop::Middleware.new }
87
+ )
57
88
 
58
89
  # Configuration method
59
90
  # @yield Runs a block of code providing a config singleton instance to it
@@ -63,10 +94,13 @@ module WaterDrop
63
94
  yield(config)
64
95
 
65
96
  merge_kafka_defaults!(config)
66
- validate!(config.to_h)
97
+
98
+ Contracts::Config.new.validate!(config.to_h, Errors::ConfigurationInvalidError)
67
99
 
68
100
  ::Rdkafka::Config.logger = config.logger
69
101
  end
102
+
103
+ self
70
104
  end
71
105
 
72
106
  private
@@ -74,7 +108,7 @@ module WaterDrop
74
108
  # Propagates the kafka setting defaults unless they are already present
75
109
  # This makes it easier to set some values that users usually don't change but still allows them
76
110
  # to overwrite the whole hash if they want to
77
- # @param config [Dry::Configurable::Config] dry config of this producer
111
+ # @param config [Karafka::Core::Configurable::Node] config of this producer
78
112
  def merge_kafka_defaults!(config)
79
113
  KAFKA_DEFAULTS.each do |key, value|
80
114
  next if config.kafka.key?(key)
@@ -82,16 +116,5 @@ module WaterDrop
82
116
  config.kafka[key] = value
83
117
  end
84
118
  end
85
-
86
- # Validates the configuration and if anything is wrong, will raise an exception
87
- # @param config_hash [Hash] config hash with setup details
88
- # @raise [WaterDrop::Errors::ConfigurationInvalidError] raised when something is wrong with
89
- # the configuration
90
- def validate!(config_hash)
91
- result = Contracts::Config.new.call(config_hash)
92
- return true if result.success?
93
-
94
- raise Errors::ConfigurationInvalidError, result.errors.to_h
95
- end
96
119
  end
97
120
  end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Contracts
5
+ # Contract with validation rules for WaterDrop configuration details
6
+ class Config < ::Karafka::Core::Contractable::Contract
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('config')
13
+ end
14
+
15
+ required(:id) { |val| val.is_a?(String) && !val.empty? }
16
+ required(:logger) { |val| !val.nil? }
17
+ required(:deliver) { |val| [true, false].include?(val) }
18
+ required(:max_payload_size) { |val| val.is_a?(Integer) && val >= 1 }
19
+ required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
20
+ required(:wait_timeout) { |val| val.is_a?(Numeric) && val.positive? }
21
+ required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
22
+ required(:wait_on_queue_full) { |val| [true, false].include?(val) }
23
+ required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
24
+ required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
25
+
26
+ # rdkafka allows both symbols and strings as keys for config but then casts them to strings
27
+ # This can be confusing, so we expect all keys to be symbolized
28
+ virtual do |config, errors|
29
+ next true unless errors.empty?
30
+
31
+ errors = []
32
+
33
+ config
34
+ .fetch(:kafka)
35
+ .keys
36
+ .reject { |key| key.is_a?(Symbol) }
37
+ .each { |key| errors << [[:kafka, key], :kafka_key_must_be_a_symbol] }
38
+
39
+ errors
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Contracts
5
+ # Contract with validation rules for validating that all the message options that
6
+ # we provide to producer are valid and usable
7
+ class Message < ::Karafka::Core::Contractable::Contract
8
+ configure do |config|
9
+ config.error_messages = YAML.safe_load(
10
+ File.read(
11
+ File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
12
+ )
13
+ ).fetch('en').fetch('validations').fetch('message')
14
+ end
15
+
16
+ # Regex to check that topic has a valid format
17
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/
18
+
19
+ private_constant :TOPIC_REGEXP
20
+
21
+ attr_reader :max_payload_size
22
+
23
+ # @param max_payload_size [Integer] max payload size
24
+ def initialize(max_payload_size:)
25
+ super()
26
+ @max_payload_size = max_payload_size
27
+ end
28
+
29
+ required(:topic) do |val|
30
+ (val.is_a?(String) || val.is_a?(Symbol)) && TOPIC_REGEXP.match?(val.to_s)
31
+ end
32
+
33
+ required(:payload) { |val| val.nil? || val.is_a?(String) }
34
+ optional(:key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
35
+ optional(:partition) { |val| val.is_a?(Integer) && val >= -1 }
36
+ optional(:partition_key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
37
+ optional(:timestamp) { |val| val.nil? || (val.is_a?(Time) || val.is_a?(Integer)) }
38
+ optional(:headers) { |val| val.nil? || val.is_a?(Hash) }
39
+
40
+ virtual do |message, errors|
41
+ next true unless errors.empty?
42
+ next true unless message.key?(:headers)
43
+ next true if message[:headers].nil?
44
+
45
+ errors = []
46
+
47
+ message.fetch(:headers).each do |key, value|
48
+ errors << [%i[headers], :invalid_key_type] unless key.is_a?(String)
49
+ errors << [%i[headers], :invalid_value_type] unless value.is_a?(String)
50
+ end
51
+
52
+ errors
53
+ end
54
+
55
+ virtual do |message, errors, validator|
56
+ next true unless errors.empty?
57
+ next if message[:payload].nil? # tombstone payload
58
+ next true if message[:payload].bytesize <= validator.max_payload_size
59
+
60
+ [[%i[payload], :max_size]]
61
+ end
62
+ end
63
+ end
64
+ end
@@ -29,15 +29,22 @@ module WaterDrop
29
29
  # contact us as it is an error.
30
30
  StatusInvalidError = Class.new(BaseError)
31
31
 
32
- # Raised when during messages flushing something bad happened
33
- class FlushFailureError < BaseError
34
- attr_reader :dispatched_messages
32
+ # Raised when there is an inline error during single message produce operations
33
+ ProduceError = Class.new(BaseError)
35
34
 
36
- # @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
35
+ # Raise it within a transaction to abort it
36
+ AbortTransaction = Class.new(BaseError)
37
+
38
+ # Raised when during messages producing something bad happened inline
39
+ class ProduceManyError < ProduceError
40
+ attr_reader :dispatched
41
+
42
+ # @param dispatched [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
37
43
  # messages that we've dispatched
38
- def initialize(dispatched_messages)
39
- super()
40
- @dispatched_messages = dispatched_messages
44
+ # @param message [String] error message
45
+ def initialize(dispatched, message)
46
+ super(message)
47
+ @dispatched = dispatched
41
48
  end
42
49
  end
43
50
  end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Creates a callable that we want to run upon each message delivery or failure
7
+ #
8
+ # @note We don't have to provide client_name here as this callback is per client instance
9
+ #
10
+ # @note We do not consider `message.purge` as an error for transactional producers, because
11
+ # this is a standard behaviour for not yet dispatched messages on aborted transactions.
12
+ # We do however still want to instrument it for traceability.
13
+ class Delivery
14
+ # Error emitted when a message was not yet dispatched and was purged from the queue
15
+ RD_KAFKA_RESP_PURGE_QUEUE = -152
16
+
17
+ # Error emitted when a message was purged while it was dispatched
18
+ RD_KAFKA_RESP_PURGE_INFLIGHT = -151
19
+
20
+ # Errors related to queue purging that is expected in transactions
21
+ PURGE_ERRORS = [RD_KAFKA_RESP_PURGE_INFLIGHT, RD_KAFKA_RESP_PURGE_QUEUE].freeze
22
+
23
+ private_constant :RD_KAFKA_RESP_PURGE_QUEUE, :RD_KAFKA_RESP_PURGE_INFLIGHT, :PURGE_ERRORS
24
+
25
+ # @param producer_id [String] id of the current producer
26
+ # @param transactional [Boolean] is this handle for a transactional or regular producer
27
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
28
+ def initialize(producer_id, transactional, monitor)
29
+ @producer_id = producer_id
30
+ @transactional = transactional
31
+ @monitor = monitor
32
+ end
33
+
34
+ # Emits delivery details to the monitor
35
+ # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
36
+ def call(delivery_report)
37
+ error_code = delivery_report.error.to_i
38
+
39
+ if error_code.zero?
40
+ instrument_acknowledged(delivery_report)
41
+
42
+ elsif @transactional && PURGE_ERRORS.include?(error_code)
43
+ instrument_purged(delivery_report)
44
+ else
45
+ instrument_error(delivery_report)
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
52
+ def instrument_acknowledged(delivery_report)
53
+ @monitor.instrument(
54
+ 'message.acknowledged',
55
+ caller: self,
56
+ producer_id: @producer_id,
57
+ offset: delivery_report.offset,
58
+ partition: delivery_report.partition,
59
+ topic: delivery_report.topic_name,
60
+ delivery_report: delivery_report
61
+ )
62
+ end
63
+
64
+ # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
65
+ def instrument_purged(delivery_report)
66
+ @monitor.instrument(
67
+ 'message.purged',
68
+ caller: self,
69
+ error: build_error(delivery_report),
70
+ producer_id: @producer_id,
71
+ offset: delivery_report.offset,
72
+ partition: delivery_report.partition,
73
+ topic: delivery_report.topic_name,
74
+ delivery_report: delivery_report
75
+ )
76
+ end
77
+
78
+ # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
79
+ def instrument_error(delivery_report)
80
+ @monitor.instrument(
81
+ 'error.occurred',
82
+ caller: self,
83
+ error: build_error(delivery_report),
84
+ producer_id: @producer_id,
85
+ offset: delivery_report.offset,
86
+ partition: delivery_report.partition,
87
+ topic: delivery_report.topic_name,
88
+ delivery_report: delivery_report,
89
+ type: 'librdkafka.dispatch_error'
90
+ )
91
+ end
92
+
93
+ # Builds appropriate rdkafka error
94
+ # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
95
+ # @return [::Rdkafka::RdkafkaError]
96
+ def build_error(delivery_report)
97
+ ::Rdkafka::RdkafkaError.new(delivery_report.error)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -18,15 +18,19 @@ module WaterDrop
18
18
  # @param client_name [String] rdkafka client name
19
19
  # @param error [Rdkafka::Error] error that occurred
20
20
  # @note It will only instrument on errors of the client of our producer
21
+ # @note When there is a particular message produce error (not internal error), the error
22
+ # is shipped via the delivery callback, not via error callback.
21
23
  def call(client_name, error)
22
24
  # Emit only errors related to our client
23
25
  # Same as with statistics (mor explanation there)
24
26
  return unless @client_name == client_name
25
27
 
26
28
  @monitor.instrument(
27
- 'error.emitted',
29
+ 'error.occurred',
30
+ caller: self,
31
+ error: error,
28
32
  producer_id: @producer_id,
29
- error: error
33
+ type: 'librdkafka.error'
30
34
  )
31
35
  end
32
36
  end
@@ -17,7 +17,7 @@ module WaterDrop
17
17
  @producer_id = producer_id
18
18
  @client_name = client_name
19
19
  @monitor = monitor
20
- @statistics_decorator = StatisticsDecorator.new
20
+ @statistics_decorator = ::Karafka::Core::Monitoring::StatisticsDecorator.new
21
21
  end
22
22
 
23
23
  # Emits decorated statistics to the monitor
@@ -1,15 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WaterDrop
4
+ # WaterDrop instrumentation related module
4
5
  module Instrumentation
5
6
  # Default listener that hooks up to our instrumentation and uses its events for logging
6
7
  # It can be removed/replaced or anything without any harm to the Waterdrop flow
7
8
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
9
  # as well as we can use it standalone
9
- class StdoutListener
10
- # @param logger [Object] stdout logger we want to use
11
- def initialize(logger)
10
+ class LoggerListener
11
+ # @param logger [Object] logger we want to use
12
+ # @param log_messages [Boolean] Should we report the messages content (payload and metadata)
13
+ # with each message operation.
14
+ #
15
+ # This can be extensive, especially when producing a lot of messages. We provide this
16
+ # despite the fact that we only report payloads in debug, because Rails by default operates
17
+ # with debug level. This means, that when working with Rails in development, every single
18
+ # payload dispatched will go to logs. In majority of the cases this is extensive and simply
19
+ # floods the end user.
20
+ def initialize(logger, log_messages: true)
12
21
  @logger = logger
22
+ @log_messages = log_messages
13
23
  end
14
24
 
15
25
  # @param event [Dry::Events::Event] event that happened with the details
@@ -17,6 +27,9 @@ module WaterDrop
17
27
  message = event[:message]
18
28
 
19
29
  info(event, "Async producing of a message to '#{message[:topic]}' topic")
30
+
31
+ return unless log_messages?
32
+
20
33
  debug(event, message)
21
34
  end
22
35
 
@@ -25,6 +38,9 @@ module WaterDrop
25
38
  message = event[:message]
26
39
 
27
40
  info(event, "Sync producing of a message to '#{message[:topic]}' topic")
41
+
42
+ return unless log_messages?
43
+
28
44
  debug(event, message)
29
45
  end
30
46
 
@@ -34,6 +50,9 @@ module WaterDrop
34
50
  topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
35
51
 
36
52
  info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
53
+
54
+ return unless log_messages?
55
+
37
56
  debug(event, messages)
38
57
  end
39
58
 
@@ -43,6 +62,9 @@ module WaterDrop
43
62
  topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
44
63
 
45
64
  info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
65
+
66
+ return unless log_messages?
67
+
46
68
  debug(event, messages)
47
69
  end
48
70
 
@@ -51,6 +73,9 @@ module WaterDrop
51
73
  message = event[:message]
52
74
 
53
75
  info(event, "Buffering of a message to '#{message[:topic]}' topic")
76
+
77
+ return unless log_messages?
78
+
54
79
  debug(event, [message])
55
80
  end
56
81
 
@@ -59,6 +84,9 @@ module WaterDrop
59
84
  messages = event[:messages]
60
85
 
61
86
  info(event, "Buffering of #{messages.size} messages")
87
+
88
+ return unless log_messages?
89
+
62
90
  debug(event, [messages, messages.size])
63
91
  end
64
92
 
@@ -67,15 +95,9 @@ module WaterDrop
67
95
  messages = event[:messages]
68
96
 
69
97
  info(event, "Async flushing of #{messages.size} messages from the buffer")
70
- debug(event, messages)
71
- end
72
98
 
73
- # @param event [Dry::Events::Event] event that happened with the details
74
- def on_buffer_flushed_async_error(event)
75
- messages = event[:messages]
76
- error = event[:error]
99
+ return unless log_messages?
77
100
 
78
- error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
101
  debug(event, messages)
80
102
  end
81
103
 
@@ -84,34 +106,57 @@ module WaterDrop
84
106
  messages = event[:messages]
85
107
 
86
108
  info(event, "Sync flushing of #{messages.size} messages from the buffer")
109
+
110
+ return unless log_messages?
111
+
87
112
  debug(event, messages)
88
113
  end
89
114
 
90
115
  # @param event [Dry::Events::Event] event that happened with the details
91
- def on_buffer_flushed_sync_error(event)
92
- messages = event[:dispatched]
93
- error = event[:error]
94
-
95
- error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
- debug(event, messages)
116
+ def on_buffer_purged(event)
117
+ info(event, 'Successfully purging buffer')
97
118
  end
98
119
 
99
120
  # @param event [Dry::Events::Event] event that happened with the details
100
121
  def on_producer_closed(event)
101
- info event, 'Closing producer'
102
- debug event, ''
122
+ info(event, 'Closing producer')
103
123
  end
104
124
 
105
125
  # @param event [Dry::Events::Event] event that happened with the error details
106
- def on_error_emitted(event)
126
+ def on_error_occurred(event)
107
127
  error = event[:error]
128
+ type = event[:type]
129
+
130
+ error(event, "Error occurred: #{error} - #{type}")
131
+ end
132
+
133
+ # @param event [Dry::Events::Event] event that happened with the details
134
+ def on_transaction_started(event)
135
+ info(event, 'Starting transaction')
136
+ end
108
137
 
109
- error(event, "Background thread error emitted: #{error}")
110
- debug(event, '')
138
+ # @param event [Dry::Events::Event] event that happened with the details
139
+ def on_transaction_aborted(event)
140
+ info(event, 'Aborting transaction')
141
+ end
142
+
143
+ # @param event [Dry::Events::Event] event that happened with the details
144
+ def on_transaction_committed(event)
145
+ info(event, 'Committing transaction')
146
+ end
147
+
148
+ # @param event [Dry::Events::Event] event that happened with the details
149
+ def on_transaction_finished(event)
150
+ info(event, 'Processing transaction')
111
151
  end
112
152
 
113
153
  private
114
154
 
155
+ # @return [Boolean] should we report the messages details in the debug mode.
156
+ def log_messages?
157
+ @log_messages
158
+ end
159
+
115
160
  # @param event [Dry::Events::Event] event that happened with the details
116
161
  # @param log_message [String] message we want to publish
117
162
  def debug(event, log_message)
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # WaterDrop instrumentation monitor that we use to publish events
6
+ # By default uses our internal notifications bus but can be used with
7
+ # `ActiveSupport::Notifications` as well
8
+ class Monitor < ::Karafka::Core::Monitoring::Monitor
9
+ # @param notifications_bus [Object] either our internal notifications bus or
10
+ # `ActiveSupport::Notifications`
11
+ # @param namespace [String, nil] namespace for events or nil if no namespace
12
+ def initialize(
13
+ notifications_bus = WaterDrop::Instrumentation::Notifications.new,
14
+ namespace = nil
15
+ )
16
+ super(notifications_bus, namespace)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -2,13 +2,8 @@
2
2
 
3
3
  module WaterDrop
4
4
  module Instrumentation
5
- # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
6
- # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
7
- # same time, which means that you might have for example file logging and NewRelic at the same
8
- # time
9
- # @note This class acts as a singleton because we are only permitted to have single monitor
10
- # per running process (just as logger)
11
- class Monitor < Dry::Monitor::Notifications
5
+ # Instrumented is used to hookup external monitoring services to monitor how WaterDrop works
6
+ class Notifications < ::Karafka::Core::Monitoring::Notifications
12
7
  # List of events that we support in the system and to which a monitor client can hook up
13
8
  # @note The non-error once support timestamp benchmarking
14
9
  EVENTS = %w[
@@ -17,28 +12,31 @@ module WaterDrop
17
12
  message.produced_async
18
13
  message.produced_sync
19
14
  message.acknowledged
15
+ message.purged
20
16
  message.buffered
21
17
 
22
18
  messages.produced_async
23
19
  messages.produced_sync
24
20
  messages.buffered
25
21
 
22
+ transaction.started
23
+ transaction.committed
24
+ transaction.aborted
25
+ transaction.finished
26
+
26
27
  buffer.flushed_async
27
- buffer.flushed_async.error
28
28
  buffer.flushed_sync
29
- buffer.flushed_sync.error
29
+ buffer.purged
30
30
 
31
31
  statistics.emitted
32
32
 
33
- error.emitted
33
+ error.occurred
34
34
  ].freeze
35
35
 
36
- private_constant :EVENTS
37
-
38
36
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
39
37
  def initialize
40
- super(:waterdrop)
41
- EVENTS.each(&method(:register_event))
38
+ super
39
+ EVENTS.each { |event| register_event(event) }
42
40
  end
43
41
  end
44
42
  end
@@ -0,0 +1 @@
1
+ {"title":"WaterDrop producer example dashboard","description":"This dashboard include example setup for monitoring activity of your WaterDrop producer","widgets":[{"id":243951318,"definition":{"title":"Messages produced","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"produced sync","formula":"query1"},{"alias":"produced async","formula":"query2"},{"alias":"flushed sync","formula":"query3"},{"alias":"flushed async","formula":"query4"},{"alias":"acknowledged","formula":"query5"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.produced_sync{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:waterdrop.produced_async{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:waterdrop.flushed_sync{*}.as_count()","data_source":"metrics","name":"query3"},{"query":"sum:waterdrop.flushed_async{*}.as_count()","data_source":"metrics","name":"query4"},{"query":"sum:waterdrop.acknowledged{*}.as_count()","data_source":"metrics","name":"query5"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":1979626566852990,"definition":{"title":"Messages buffer size","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.buffer.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":243951221,"definition":{"title":"Kafka broker API calls","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"API calls","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.calls{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951952,"definition":{"title":"Producer queue size","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Queue size average","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Queue size max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951263,"definition":{"title":"Producer queue latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951276,"definition":{"title":"Producer network latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.request_size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243954928,"definition":{"title":"Producer errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.error_occurred{*}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"auto","id":"rnr-kgh-dna"}