waterdrop 2.0.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +33 -6
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +80 -0
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +36 -87
  8. data/MIT-LICENSE +18 -0
  9. data/README.md +180 -46
  10. data/certs/mensfeld.pem +21 -21
  11. data/config/errors.yml +29 -5
  12. data/docker-compose.yml +2 -1
  13. data/lib/{water_drop → waterdrop}/config.rb +47 -19
  14. data/lib/waterdrop/contracts/config.rb +40 -0
  15. data/lib/waterdrop/contracts/message.rb +60 -0
  16. data/lib/waterdrop/instrumentation/callbacks/delivery.rb +30 -0
  17. data/lib/waterdrop/instrumentation/callbacks/error.rb +36 -0
  18. data/lib/waterdrop/instrumentation/callbacks/statistics.rb +41 -0
  19. data/lib/waterdrop/instrumentation/callbacks/statistics_decorator.rb +77 -0
  20. data/lib/waterdrop/instrumentation/callbacks_manager.rb +39 -0
  21. data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +17 -26
  22. data/lib/waterdrop/instrumentation/monitor.rb +20 -0
  23. data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +12 -13
  24. data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
  25. data/lib/waterdrop/instrumentation/vendors/datadog/listener.rb +210 -0
  26. data/lib/waterdrop/instrumentation.rb +20 -0
  27. data/lib/waterdrop/patches/rdkafka/bindings.rb +42 -0
  28. data/lib/waterdrop/patches/rdkafka/producer.rb +28 -0
  29. data/lib/{water_drop → waterdrop}/producer/async.rb +2 -2
  30. data/lib/{water_drop → waterdrop}/producer/buffer.rb +15 -8
  31. data/lib/waterdrop/producer/builder.rb +28 -0
  32. data/lib/{water_drop → waterdrop}/producer/sync.rb +2 -2
  33. data/lib/{water_drop → waterdrop}/producer.rb +29 -15
  34. data/lib/{water_drop → waterdrop}/version.rb +1 -1
  35. data/lib/waterdrop.rb +33 -2
  36. data/waterdrop.gemspec +12 -10
  37. data.tar.gz.sig +0 -0
  38. metadata +64 -97
  39. metadata.gz.sig +0 -0
  40. data/.github/FUNDING.yml +0 -1
  41. data/LICENSE +0 -165
  42. data/lib/water_drop/contracts/config.rb +0 -26
  43. data/lib/water_drop/contracts/message.rb +0 -41
  44. data/lib/water_drop/instrumentation.rb +0 -7
  45. data/lib/water_drop/producer/builder.rb +0 -63
  46. data/lib/water_drop/producer/statistics_decorator.rb +0 -71
  47. data/lib/water_drop.rb +0 -30
  48. /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
  49. /data/lib/{water_drop → waterdrop}/errors.rb +0 -0
  50. /data/lib/{water_drop → waterdrop}/producer/dummy_client.rb +0 -0
  51. /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that kicks in when error occurs and is published in a background thread
7
+ class Error
8
+ # @param producer_id [String] id of the current producer
9
+ # @param client_name [String] rdkafka client name
10
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
11
+ def initialize(producer_id, client_name, monitor)
12
+ @producer_id = producer_id
13
+ @client_name = client_name
14
+ @monitor = monitor
15
+ end
16
+
17
+ # Runs the instrumentation monitor with error
18
+ # @param client_name [String] rdkafka client name
19
+ # @param error [Rdkafka::Error] error that occurred
20
+ # @note It will only instrument on errors of the client of our producer
21
+ def call(client_name, error)
22
+ # Emit only errors related to our client
23
+ # Same as with statistics (mor explanation there)
24
+ return unless @client_name == client_name
25
+
26
+ @monitor.instrument(
27
+ 'error.occurred',
28
+ error: error,
29
+ producer_id: @producer_id,
30
+ type: 'librdkafka.error'
31
+ )
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # Namespace for handlers of callbacks emitted by the kafka client lib
6
+ module Callbacks
7
+ # Statistics callback handler
8
+ # @note We decorate the statistics with our own decorator because some of the metrics from
9
+ # rdkafka are absolute. For example number of sent messages increases not in reference to
10
+ # previous statistics emit but from the beginning of the process. We decorate it with diff
11
+ # of all the numeric values against the data from the previous callback emit
12
+ class Statistics
13
+ # @param producer_id [String] id of the current producer
14
+ # @param client_name [String] rdkafka client name
15
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
16
+ def initialize(producer_id, client_name, monitor)
17
+ @producer_id = producer_id
18
+ @client_name = client_name
19
+ @monitor = monitor
20
+ @statistics_decorator = StatisticsDecorator.new
21
+ end
22
+
23
+ # Emits decorated statistics to the monitor
24
+ # @param statistics [Hash] rdkafka statistics
25
+ def call(statistics)
26
+ # Emit only statistics related to our client
27
+ # rdkafka does not have per-instance statistics hook, thus we need to make sure that we
28
+ # emit only stats that are related to current producer. Otherwise we would emit all of
29
+ # all the time.
30
+ return unless @client_name == statistics['name']
31
+
32
+ @monitor.instrument(
33
+ 'statistics.emitted',
34
+ producer_id: @producer_id,
35
+ statistics: @statistics_decorator.call(statistics)
36
+ )
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Many of the librdkafka statistics are absolute values instead of a gauge.
7
+ # This means, that for example number of messages sent is an absolute growing value
8
+ # instead of being a value of messages sent from the last statistics report.
9
+ # This decorator calculates the diff against previously emited stats, so we get also
10
+ # the diff together with the original values
11
+ class StatisticsDecorator
12
+ def initialize
13
+ @previous = {}.freeze
14
+ end
15
+
16
+ # @param emited_stats [Hash] original emited statistics
17
+ # @return [Hash] emited statistics extended with the diff data
18
+ # @note We modify the emited statistics, instead of creating new. Since we don't expose
19
+ # any API to get raw data, users can just assume that the result of this decoration is
20
+ # the proper raw stats that they can use
21
+ def call(emited_stats)
22
+ diff(
23
+ @previous,
24
+ emited_stats
25
+ )
26
+
27
+ @previous = emited_stats
28
+
29
+ emited_stats.freeze
30
+ end
31
+
32
+ private
33
+
34
+ # Calculates the diff of the provided values and modifies in place the emited statistics
35
+ #
36
+ # @param previous [Object] previous value from the given scope in which
37
+ # we are
38
+ # @param current [Object] current scope from emitted statistics
39
+ # @return [Object] the diff if the values were numerics or the current scope
40
+ def diff(previous, current)
41
+ if current.is_a?(Hash)
42
+ # @note We cannot use #each_key as we modify the content of the current scope
43
+ # in place (in case it's a hash)
44
+ current.keys.each do |key|
45
+ append(
46
+ current,
47
+ key,
48
+ diff((previous || {})[key], (current || {})[key])
49
+ )
50
+ end
51
+ end
52
+
53
+ # Diff can be computed only for numerics
54
+ return current unless current.is_a?(Numeric)
55
+ # If there was no previous value, delta is always zero
56
+ return 0 unless previous
57
+ # Should never happen but just in case, a type changed in between stats
58
+ return current unless previous.is_a?(Numeric)
59
+
60
+ current - previous
61
+ end
62
+
63
+ # Appends the result of the diff to a given key as long as the result is numeric
64
+ #
65
+ # @param current [Hash] current scope
66
+ # @param key [Symbol] key based on which we were diffing
67
+ # @param result [Object] diff result
68
+ def append(current, key, result)
69
+ return unless result.is_a?(Numeric)
70
+ return if current.frozen?
71
+
72
+ current["#{key}_d"] = result
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # This manager allows us to register multiple callbacks into a hook that is suppose to support
6
+ # a single callback
7
+ class CallbacksManager
8
+ # @return [::WaterDrop::Instrumentation::CallbacksManager]
9
+ def initialize
10
+ @callbacks = Concurrent::Hash.new
11
+ end
12
+
13
+ # Invokes all the callbacks registered one after another
14
+ #
15
+ # @param args [Object] any args that should go to the callbacks
16
+ # @note We do not use `#each_value` here on purpose. With it being used, we cannot dispatch
17
+ # callbacks and add new at the same time. Since we don't know when and in what thread
18
+ # things are going to be added to the manager, we need to extract values into an array and
19
+ # run it. That way we can add new things the same time.
20
+ def call(*args)
21
+ @callbacks.values.each { |callback| callback.call(*args) }
22
+ end
23
+
24
+ # Adds a callback to the manager
25
+ #
26
+ # @param id [String] id of the callback (used when deleting it)
27
+ # @param callable [#call] object that responds to a `#call` method
28
+ def add(id, callable)
29
+ @callbacks[id] = callable
30
+ end
31
+
32
+ # Removes the callback from the manager
33
+ # @param id [String] id of the callback we want to remove
34
+ def delete(id)
35
+ @callbacks.delete(id)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -6,8 +6,8 @@ module WaterDrop
6
6
  # It can be removed/replaced or anything without any harm to the Waterdrop flow
7
7
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
8
  # as well as we can use it standalone
9
- class StdoutListener
10
- # @param logger [Object] stdout logger we want to use
9
+ class LoggerListener
10
+ # @param logger [Object] logger we want to use
11
11
  def initialize(logger)
12
12
  @logger = logger
13
13
  end
@@ -51,7 +51,7 @@ module WaterDrop
51
51
  message = event[:message]
52
52
 
53
53
  info(event, "Buffering of a message to '#{message[:topic]}' topic")
54
- debug(event, [message, event[:producer].messages.size])
54
+ debug(event, [message])
55
55
  end
56
56
 
57
57
  # @param event [Dry::Events::Event] event that happened with the details
@@ -59,7 +59,7 @@ module WaterDrop
59
59
  messages = event[:messages]
60
60
 
61
61
  info(event, "Buffering of #{messages.size} messages")
62
- debug(event, [messages, event[:producer].messages.size])
62
+ debug(event, [messages, messages.size])
63
63
  end
64
64
 
65
65
  # @param event [Dry::Events::Event] event that happened with the details
@@ -70,15 +70,6 @@ module WaterDrop
70
70
  debug(event, messages)
71
71
  end
72
72
 
73
- # @param event [Dry::Events::Event] event that happened with the details
74
- def on_buffer_flushed_async_error(event)
75
- messages = event[:messages]
76
- error = event[:error]
77
-
78
- error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
- debug(event, messages)
80
- end
81
-
82
73
  # @param event [Dry::Events::Event] event that happened with the details
83
74
  def on_buffer_flushed_sync(event)
84
75
  messages = event[:messages]
@@ -87,19 +78,19 @@ module WaterDrop
87
78
  debug(event, messages)
88
79
  end
89
80
 
90
- # @param event [Dry::Events::Event] event that happened with the details
91
- def on_buffer_flushed_sync_error(event)
92
- messages = event[:dispatched]
93
- error = event[:error]
94
-
95
- error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
- debug(event, messages)
97
- end
98
-
99
81
  # @param event [Dry::Events::Event] event that happened with the details
100
82
  def on_producer_closed(event)
101
83
  info event, 'Closing producer'
102
- debug event, event[:producer].messages.size
84
+ debug event, ''
85
+ end
86
+
87
+ # @param event [Dry::Events::Event] event that happened with the error details
88
+ def on_error_occurred(event)
89
+ error = event[:error]
90
+ type = event[:type]
91
+
92
+ error(event, "Error occurred: #{error} - #{type}")
93
+ debug(event, '')
103
94
  end
104
95
 
105
96
  private
@@ -107,19 +98,19 @@ module WaterDrop
107
98
  # @param event [Dry::Events::Event] event that happened with the details
108
99
  # @param log_message [String] message we want to publish
109
100
  def debug(event, log_message)
110
- @logger.debug("[#{event[:producer].id}] #{log_message}")
101
+ @logger.debug("[#{event[:producer_id]}] #{log_message}")
111
102
  end
112
103
 
113
104
  # @param event [Dry::Events::Event] event that happened with the details
114
105
  # @param log_message [String] message we want to publish
115
106
  def info(event, log_message)
116
- @logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
107
+ @logger.info("[#{event[:producer_id]}] #{log_message} took #{event[:time]} ms")
117
108
  end
118
109
 
119
110
  # @param event [Dry::Events::Event] event that happened with the details
120
111
  # @param log_message [String] message we want to publish
121
112
  def error(event, log_message)
122
- @logger.error("[#{event[:producer].id}] #{log_message}")
113
+ @logger.error("[#{event[:producer_id]}] #{log_message}")
123
114
  end
124
115
  end
125
116
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # WaterDrop instrumentation monitor that we use to publish events
6
+ # By default uses our internal notifications bus but can be used with
7
+ # `ActiveSupport::Notifications` as well
8
+ class Monitor < ::Karafka::Core::Monitoring::Monitor
9
+ # @param notifications_bus [Object] either our internal notifications bus or
10
+ # `ActiveSupport::Notifications`
11
+ # @param namespace [String, nil] namespace for events or nil if no namespace
12
+ def initialize(
13
+ notifications_bus = WaterDrop::Instrumentation::Notifications.new,
14
+ namespace = nil
15
+ )
16
+ super(notifications_bus, namespace)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -2,37 +2,36 @@
2
2
 
3
3
  module WaterDrop
4
4
  module Instrumentation
5
- # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
6
- # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
7
- # same time, which means that you might have for example file logging and NewRelic at the same
8
- # time
9
- # @note This class acts as a singleton because we are only permitted to have single monitor
10
- # per running process (just as logger)
11
- class Monitor < Dry::Monitor::Notifications
5
+ # Instrumented is used to hookup external monitoring services to monitor how WaterDrop works
6
+ class Notifications < ::Karafka::Core::Monitoring::Notifications
12
7
  # List of events that we support in the system and to which a monitor client can hook up
13
8
  # @note The non-error once support timestamp benchmarking
14
9
  EVENTS = %w[
15
10
  producer.closed
11
+
16
12
  message.produced_async
17
13
  message.produced_sync
14
+ message.acknowledged
15
+ message.buffered
16
+
18
17
  messages.produced_async
19
18
  messages.produced_sync
20
- message.buffered
21
19
  messages.buffered
22
- message.acknowledged
20
+
23
21
  buffer.flushed_async
24
- buffer.flushed_async.error
25
22
  buffer.flushed_sync
26
- buffer.flushed_sync.error
23
+
27
24
  statistics.emitted
25
+
26
+ error.occurred
28
27
  ].freeze
29
28
 
30
29
  private_constant :EVENTS
31
30
 
32
31
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
33
32
  def initialize
34
- super(:waterdrop)
35
- EVENTS.each(&method(:register_event))
33
+ super
34
+ EVENTS.each { |event| register_event(event) }
36
35
  end
37
36
  end
38
37
  end
@@ -0,0 +1 @@
1
+ {"title":"WaterDrop producer example dashboard","description":"This dashboard include example setup for monitoring activity of your WaterDrop producer","widgets":[{"id":243951318,"definition":{"title":"Messages produced","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"produced sync","formula":"query1"},{"alias":"produced async","formula":"query2"},{"alias":"flushed sync","formula":"query3"},{"alias":"flushed async","formula":"query4"},{"alias":"acknowledged","formula":"query5"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.produced_sync{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:waterdrop.produced_async{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:waterdrop.flushed_sync{*}.as_count()","data_source":"metrics","name":"query3"},{"query":"sum:waterdrop.flushed_async{*}.as_count()","data_source":"metrics","name":"query4"},{"query":"sum:waterdrop.acknowledged{*}.as_count()","data_source":"metrics","name":"query5"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":1979626566852990,"definition":{"title":"Messages buffer size","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.buffer.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":243951221,"definition":{"title":"Kafka broker API calls","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"API calls","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.calls{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951952,"definition":{"title":"Producer queue size","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Queue size average","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Queue size max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951263,"definition":{"title":"Producer queue latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951276,"definition":{"title":"Producer network latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.request_size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243954928,"definition":{"title":"Producer errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.error_occurred{*}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"auto","id":"rnr-kgh-dna"}
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # Listener that can be used to subscribe to WaterDrop producer to receive stats via StatsD
10
+ # and/or Datadog
11
+ #
12
+ # @note You need to setup the `dogstatsd-ruby` client and assign it
13
+ class Listener
14
+ include ::Karafka::Core::Configurable
15
+ extend Forwardable
16
+
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
18
+
19
+ # Value object for storing a single rdkafka metric publishing details
20
+ RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
+
22
+ # Namespace under which the DD metrics should be published
23
+ setting :namespace, default: 'waterdrop'
24
+
25
+ # Datadog client that we should use to publish the metrics
26
+ setting :client
27
+
28
+ # Default tags we want to publish (for example hostname)
29
+ # Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
30
+ setting :default_tags, default: []
31
+
32
+ # All the rdkafka metrics we want to publish
33
+ #
34
+ # By default we publish quite a lot so this can be tuned
35
+ # Note, that the once with `_d` come from WaterDrop, not rdkafka or Kafka
36
+ setting :rd_kafka_metrics, default: [
37
+ # Client metrics
38
+ RdKafkaMetric.new(:count, :root, 'calls', 'tx_d'),
39
+ RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt_d'),
40
+
41
+ # Broker metrics
42
+ RdKafkaMetric.new(:count, :brokers, 'deliver.attempts', 'txretries_d'),
43
+ RdKafkaMetric.new(:count, :brokers, 'deliver.errors', 'txerrs_d'),
44
+ RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.avg', %w[outbuf_latency avg]),
46
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p95', %w[outbuf_latency p95]),
47
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p99', %w[outbuf_latency p99]),
48
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
50
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
51
+ ].freeze
52
+
53
+ configure
54
+
55
+ # @param block [Proc] configuration block
56
+ def initialize(&block)
57
+ configure
58
+ setup(&block) if block
59
+ end
60
+
61
+ # @param block [Proc] configuration block
62
+ # @note We define this alias to be consistent with `WaterDrop#setup`
63
+ def setup(&block)
64
+ configure(&block)
65
+ end
66
+
67
+ # Hooks up to WaterDrop instrumentation for emitted statistics
68
+ #
69
+ # @param event [WaterDrop::Monitor::Event]
70
+ def on_statistics_emitted(event)
71
+ statistics = event[:statistics]
72
+
73
+ rd_kafka_metrics.each do |metric|
74
+ report_metric(metric, statistics)
75
+ end
76
+ end
77
+
78
+ # Increases the errors count by 1
79
+ #
80
+ # @param _event [WaterDrop::Monitor::Event]
81
+ def on_error_occurred(_event)
82
+ count('error_occurred', 1, tags: default_tags)
83
+ end
84
+
85
+ # Increases acknowledged messages counter
86
+ # @param _event [WaterDrop::Monitor::Event]
87
+ def on_message_acknowledged(_event)
88
+ increment('acknowledged', tags: default_tags)
89
+ end
90
+
91
+ %i[
92
+ produced_sync
93
+ produced_async
94
+ ].each do |event_scope|
95
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
96
+ # @param event [WaterDrop::Monitor::Event]
97
+ def on_message_#{event_scope}(event)
98
+ report_message(event[:message][:topic], :#{event_scope})
99
+ end
100
+
101
+ # @param event [WaterDrop::Monitor::Event]
102
+ def on_messages_#{event_scope}(event)
103
+ event[:messages].each do |message|
104
+ report_message(message[:topic], :#{event_scope})
105
+ end
106
+ end
107
+ METHODS
108
+ end
109
+
110
+ # Reports the buffer usage when anything is added to the buffer
111
+ %i[
112
+ message_buffered
113
+ messages_buffered
114
+ ].each do |event_scope|
115
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
116
+ # @param event [WaterDrop::Monitor::Event]
117
+ def on_#{event_scope}(event)
118
+ histogram(
119
+ 'buffer.size',
120
+ event[:buffer].size,
121
+ tags: default_tags
122
+ )
123
+ end
124
+ METHODS
125
+ end
126
+
127
+ # Events that support many messages only
128
+ # Reports data flushing operation (production from the buffer)
129
+ %i[
130
+ flushed_sync
131
+ flushed_async
132
+ ].each do |event_scope|
133
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
134
+ # @param event [WaterDrop::Monitor::Event]
135
+ def on_buffer_#{event_scope}(event)
136
+ event[:messages].each do |message|
137
+ report_message(message[:topic], :#{event_scope})
138
+ end
139
+ end
140
+ METHODS
141
+ end
142
+
143
+ private
144
+
145
+ %i[
146
+ count
147
+ gauge
148
+ histogram
149
+ increment
150
+ decrement
151
+ ].each do |metric_type|
152
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
153
+ def #{metric_type}(key, *args)
154
+ client.#{metric_type}(
155
+ namespaced_metric(key),
156
+ *args
157
+ )
158
+ end
159
+ METHODS
160
+ end
161
+
162
+ # Report that a message has been produced to a topic.
163
+ # @param topic [String] Kafka topic
164
+ # @param method_name [Symbol] method from which this message operation comes
165
+ def report_message(topic, method_name)
166
+ increment(method_name, tags: default_tags + ["topic:#{topic}"])
167
+ end
168
+
169
+ # Wraps metric name in listener's namespace
170
+ # @param metric_name [String] RdKafkaMetric name
171
+ # @return [String]
172
+ def namespaced_metric(metric_name)
173
+ "#{namespace}.#{metric_name}"
174
+ end
175
+
176
+ # Reports a given metric statistics to Datadog
177
+ # @param metric [RdKafkaMetric] metric value object
178
+ # @param statistics [Hash] hash with all the statistics emitted
179
+ def report_metric(metric, statistics)
180
+ case metric.scope
181
+ when :root
182
+ public_send(
183
+ metric.type,
184
+ metric.name,
185
+ statistics.fetch(*metric.key_location),
186
+ tags: default_tags
187
+ )
188
+ when :brokers
189
+ statistics.fetch('brokers').each_value do |broker_statistics|
190
+ # Skip bootstrap nodes
191
+ # Bootstrap nodes have nodeid -1, other nodes have positive
192
+ # node ids
193
+ next if broker_statistics['nodeid'] == -1
194
+
195
+ public_send(
196
+ metric.type,
197
+ metric.name,
198
+ broker_statistics.dig(*metric.key_location),
199
+ tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
200
+ )
201
+ end
202
+ else
203
+ raise ArgumentError, metric.scope
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Namespace for all the things related with WaterDrop instrumentation process
5
+ module Instrumentation
6
+ class << self
7
+ # Builds a manager for statistics callbacks
8
+ # @return [WaterDrop::CallbacksManager]
9
+ def statistics_callbacks
10
+ @statistics_callbacks ||= CallbacksManager.new
11
+ end
12
+
13
+ # Builds a manager for error callbacks
14
+ # @return [WaterDrop::CallbacksManager]
15
+ def error_callbacks
16
+ @error_callbacks ||= CallbacksManager.new
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Patches
5
+ module Rdkafka
6
+ # Extends `Rdkafka::Bindings` with some extra methods and updates callbacks that we intend
7
+ # to work with in a bit different way than rdkafka itself
8
+ module Bindings
9
+ class << self
10
+ # Add extra methods that we need
11
+ # @param mod [::Rdkafka::Bindings] rdkafka bindings module
12
+ def included(mod)
13
+ mod.attach_function :rd_kafka_name, [:pointer], :string
14
+
15
+ # Default rdkafka setup for errors doest not propagate client details, thus it always
16
+ # publishes all the stuff for all rdkafka instances. We change that by providing
17
+ # function that fetches the instance name, allowing us to have better notifications
18
+ mod.send(:remove_const, :ErrorCallback)
19
+ mod.const_set(:ErrorCallback, build_error_callback)
20
+ end
21
+
22
+ # @return [FFI::Function] overwritten callback function
23
+ def build_error_callback
24
+ FFI::Function.new(
25
+ :void, %i[pointer int string pointer]
26
+ ) do |client_prr, err_code, reason, _opaque|
27
+ return nil unless ::Rdkafka::Config.error_callback
28
+
29
+ name = ::Rdkafka::Bindings.rd_kafka_name(client_prr)
30
+
31
+ error = ::Rdkafka::RdkafkaError.new(err_code, broker_message: reason)
32
+
33
+ ::Rdkafka::Config.error_callback.call(name, error)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ ::Rdkafka::Bindings.include(::WaterDrop::Patches::Rdkafka::Bindings)