waterdrop 2.0.0 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +33 -6
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +80 -0
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +36 -87
  8. data/MIT-LICENSE +18 -0
  9. data/README.md +180 -46
  10. data/certs/mensfeld.pem +21 -21
  11. data/config/errors.yml +29 -5
  12. data/docker-compose.yml +2 -1
  13. data/lib/{water_drop → waterdrop}/config.rb +47 -19
  14. data/lib/waterdrop/contracts/config.rb +40 -0
  15. data/lib/waterdrop/contracts/message.rb +60 -0
  16. data/lib/waterdrop/instrumentation/callbacks/delivery.rb +30 -0
  17. data/lib/waterdrop/instrumentation/callbacks/error.rb +36 -0
  18. data/lib/waterdrop/instrumentation/callbacks/statistics.rb +41 -0
  19. data/lib/waterdrop/instrumentation/callbacks/statistics_decorator.rb +77 -0
  20. data/lib/waterdrop/instrumentation/callbacks_manager.rb +39 -0
  21. data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +17 -26
  22. data/lib/waterdrop/instrumentation/monitor.rb +20 -0
  23. data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +12 -13
  24. data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
  25. data/lib/waterdrop/instrumentation/vendors/datadog/listener.rb +210 -0
  26. data/lib/waterdrop/instrumentation.rb +20 -0
  27. data/lib/waterdrop/patches/rdkafka/bindings.rb +42 -0
  28. data/lib/waterdrop/patches/rdkafka/producer.rb +28 -0
  29. data/lib/{water_drop → waterdrop}/producer/async.rb +2 -2
  30. data/lib/{water_drop → waterdrop}/producer/buffer.rb +15 -8
  31. data/lib/waterdrop/producer/builder.rb +28 -0
  32. data/lib/{water_drop → waterdrop}/producer/sync.rb +2 -2
  33. data/lib/{water_drop → waterdrop}/producer.rb +29 -15
  34. data/lib/{water_drop → waterdrop}/version.rb +1 -1
  35. data/lib/waterdrop.rb +33 -2
  36. data/waterdrop.gemspec +12 -10
  37. data.tar.gz.sig +0 -0
  38. metadata +64 -97
  39. metadata.gz.sig +0 -0
  40. data/.github/FUNDING.yml +0 -1
  41. data/LICENSE +0 -165
  42. data/lib/water_drop/contracts/config.rb +0 -26
  43. data/lib/water_drop/contracts/message.rb +0 -41
  44. data/lib/water_drop/instrumentation.rb +0 -7
  45. data/lib/water_drop/producer/builder.rb +0 -63
  46. data/lib/water_drop/producer/statistics_decorator.rb +0 -71
  47. data/lib/water_drop.rb +0 -30
  48. /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
  49. /data/lib/{water_drop → waterdrop}/errors.rb +0 -0
  50. /data/lib/{water_drop → waterdrop}/producer/dummy_client.rb +0 -0
  51. /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that kicks in when error occurs and is published in a background thread
7
+ class Error
8
+ # @param producer_id [String] id of the current producer
9
+ # @param client_name [String] rdkafka client name
10
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
11
+ def initialize(producer_id, client_name, monitor)
12
+ @producer_id = producer_id
13
+ @client_name = client_name
14
+ @monitor = monitor
15
+ end
16
+
17
+ # Runs the instrumentation monitor with error
18
+ # @param client_name [String] rdkafka client name
19
+ # @param error [Rdkafka::Error] error that occurred
20
+ # @note It will only instrument on errors of the client of our producer
21
+ def call(client_name, error)
22
+ # Emit only errors related to our client
23
+ # Same as with statistics (mor explanation there)
24
+ return unless @client_name == client_name
25
+
26
+ @monitor.instrument(
27
+ 'error.occurred',
28
+ error: error,
29
+ producer_id: @producer_id,
30
+ type: 'librdkafka.error'
31
+ )
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # Namespace for handlers of callbacks emitted by the kafka client lib
6
+ module Callbacks
7
+ # Statistics callback handler
8
+ # @note We decorate the statistics with our own decorator because some of the metrics from
9
+ # rdkafka are absolute. For example number of sent messages increases not in reference to
10
+ # previous statistics emit but from the beginning of the process. We decorate it with diff
11
+ # of all the numeric values against the data from the previous callback emit
12
+ class Statistics
13
+ # @param producer_id [String] id of the current producer
14
+ # @param client_name [String] rdkafka client name
15
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
16
+ def initialize(producer_id, client_name, monitor)
17
+ @producer_id = producer_id
18
+ @client_name = client_name
19
+ @monitor = monitor
20
+ @statistics_decorator = StatisticsDecorator.new
21
+ end
22
+
23
+ # Emits decorated statistics to the monitor
24
+ # @param statistics [Hash] rdkafka statistics
25
+ def call(statistics)
26
+ # Emit only statistics related to our client
27
+ # rdkafka does not have per-instance statistics hook, thus we need to make sure that we
28
+ # emit only stats that are related to current producer. Otherwise we would emit all of
29
+ # all the time.
30
+ return unless @client_name == statistics['name']
31
+
32
+ @monitor.instrument(
33
+ 'statistics.emitted',
34
+ producer_id: @producer_id,
35
+ statistics: @statistics_decorator.call(statistics)
36
+ )
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Many of the librdkafka statistics are absolute values instead of a gauge.
7
+ # This means, that for example number of messages sent is an absolute growing value
8
+ # instead of being a value of messages sent from the last statistics report.
9
+ # This decorator calculates the diff against previously emited stats, so we get also
10
+ # the diff together with the original values
11
+ class StatisticsDecorator
12
+ def initialize
13
+ @previous = {}.freeze
14
+ end
15
+
16
+ # @param emited_stats [Hash] original emited statistics
17
+ # @return [Hash] emited statistics extended with the diff data
18
+ # @note We modify the emited statistics, instead of creating new. Since we don't expose
19
+ # any API to get raw data, users can just assume that the result of this decoration is
20
+ # the proper raw stats that they can use
21
+ def call(emited_stats)
22
+ diff(
23
+ @previous,
24
+ emited_stats
25
+ )
26
+
27
+ @previous = emited_stats
28
+
29
+ emited_stats.freeze
30
+ end
31
+
32
+ private
33
+
34
+ # Calculates the diff of the provided values and modifies in place the emited statistics
35
+ #
36
+ # @param previous [Object] previous value from the given scope in which
37
+ # we are
38
+ # @param current [Object] current scope from emitted statistics
39
+ # @return [Object] the diff if the values were numerics or the current scope
40
+ def diff(previous, current)
41
+ if current.is_a?(Hash)
42
+ # @note We cannot use #each_key as we modify the content of the current scope
43
+ # in place (in case it's a hash)
44
+ current.keys.each do |key|
45
+ append(
46
+ current,
47
+ key,
48
+ diff((previous || {})[key], (current || {})[key])
49
+ )
50
+ end
51
+ end
52
+
53
+ # Diff can be computed only for numerics
54
+ return current unless current.is_a?(Numeric)
55
+ # If there was no previous value, delta is always zero
56
+ return 0 unless previous
57
+ # Should never happen but just in case, a type changed in between stats
58
+ return current unless previous.is_a?(Numeric)
59
+
60
+ current - previous
61
+ end
62
+
63
+ # Appends the result of the diff to a given key as long as the result is numeric
64
+ #
65
+ # @param current [Hash] current scope
66
+ # @param key [Symbol] key based on which we were diffing
67
+ # @param result [Object] diff result
68
+ def append(current, key, result)
69
+ return unless result.is_a?(Numeric)
70
+ return if current.frozen?
71
+
72
+ current["#{key}_d"] = result
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # This manager allows us to register multiple callbacks into a hook that is suppose to support
6
+ # a single callback
7
+ class CallbacksManager
8
+ # @return [::WaterDrop::Instrumentation::CallbacksManager]
9
+ def initialize
10
+ @callbacks = Concurrent::Hash.new
11
+ end
12
+
13
+ # Invokes all the callbacks registered one after another
14
+ #
15
+ # @param args [Object] any args that should go to the callbacks
16
+ # @note We do not use `#each_value` here on purpose. With it being used, we cannot dispatch
17
+ # callbacks and add new at the same time. Since we don't know when and in what thread
18
+ # things are going to be added to the manager, we need to extract values into an array and
19
+ # run it. That way we can add new things the same time.
20
+ def call(*args)
21
+ @callbacks.values.each { |callback| callback.call(*args) }
22
+ end
23
+
24
+ # Adds a callback to the manager
25
+ #
26
+ # @param id [String] id of the callback (used when deleting it)
27
+ # @param callable [#call] object that responds to a `#call` method
28
+ def add(id, callable)
29
+ @callbacks[id] = callable
30
+ end
31
+
32
+ # Removes the callback from the manager
33
+ # @param id [String] id of the callback we want to remove
34
+ def delete(id)
35
+ @callbacks.delete(id)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -6,8 +6,8 @@ module WaterDrop
6
6
  # It can be removed/replaced or anything without any harm to the Waterdrop flow
7
7
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
8
  # as well as we can use it standalone
9
- class StdoutListener
10
- # @param logger [Object] stdout logger we want to use
9
+ class LoggerListener
10
+ # @param logger [Object] logger we want to use
11
11
  def initialize(logger)
12
12
  @logger = logger
13
13
  end
@@ -51,7 +51,7 @@ module WaterDrop
51
51
  message = event[:message]
52
52
 
53
53
  info(event, "Buffering of a message to '#{message[:topic]}' topic")
54
- debug(event, [message, event[:producer].messages.size])
54
+ debug(event, [message])
55
55
  end
56
56
 
57
57
  # @param event [Dry::Events::Event] event that happened with the details
@@ -59,7 +59,7 @@ module WaterDrop
59
59
  messages = event[:messages]
60
60
 
61
61
  info(event, "Buffering of #{messages.size} messages")
62
- debug(event, [messages, event[:producer].messages.size])
62
+ debug(event, [messages, messages.size])
63
63
  end
64
64
 
65
65
  # @param event [Dry::Events::Event] event that happened with the details
@@ -70,15 +70,6 @@ module WaterDrop
70
70
  debug(event, messages)
71
71
  end
72
72
 
73
- # @param event [Dry::Events::Event] event that happened with the details
74
- def on_buffer_flushed_async_error(event)
75
- messages = event[:messages]
76
- error = event[:error]
77
-
78
- error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
- debug(event, messages)
80
- end
81
-
82
73
  # @param event [Dry::Events::Event] event that happened with the details
83
74
  def on_buffer_flushed_sync(event)
84
75
  messages = event[:messages]
@@ -87,19 +78,19 @@ module WaterDrop
87
78
  debug(event, messages)
88
79
  end
89
80
 
90
- # @param event [Dry::Events::Event] event that happened with the details
91
- def on_buffer_flushed_sync_error(event)
92
- messages = event[:dispatched]
93
- error = event[:error]
94
-
95
- error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
- debug(event, messages)
97
- end
98
-
99
81
  # @param event [Dry::Events::Event] event that happened with the details
100
82
  def on_producer_closed(event)
101
83
  info event, 'Closing producer'
102
- debug event, event[:producer].messages.size
84
+ debug event, ''
85
+ end
86
+
87
+ # @param event [Dry::Events::Event] event that happened with the error details
88
+ def on_error_occurred(event)
89
+ error = event[:error]
90
+ type = event[:type]
91
+
92
+ error(event, "Error occurred: #{error} - #{type}")
93
+ debug(event, '')
103
94
  end
104
95
 
105
96
  private
@@ -107,19 +98,19 @@ module WaterDrop
107
98
  # @param event [Dry::Events::Event] event that happened with the details
108
99
  # @param log_message [String] message we want to publish
109
100
  def debug(event, log_message)
110
- @logger.debug("[#{event[:producer].id}] #{log_message}")
101
+ @logger.debug("[#{event[:producer_id]}] #{log_message}")
111
102
  end
112
103
 
113
104
  # @param event [Dry::Events::Event] event that happened with the details
114
105
  # @param log_message [String] message we want to publish
115
106
  def info(event, log_message)
116
- @logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
107
+ @logger.info("[#{event[:producer_id]}] #{log_message} took #{event[:time]} ms")
117
108
  end
118
109
 
119
110
  # @param event [Dry::Events::Event] event that happened with the details
120
111
  # @param log_message [String] message we want to publish
121
112
  def error(event, log_message)
122
- @logger.error("[#{event[:producer].id}] #{log_message}")
113
+ @logger.error("[#{event[:producer_id]}] #{log_message}")
123
114
  end
124
115
  end
125
116
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # WaterDrop instrumentation monitor that we use to publish events
6
+ # By default uses our internal notifications bus but can be used with
7
+ # `ActiveSupport::Notifications` as well
8
+ class Monitor < ::Karafka::Core::Monitoring::Monitor
9
+ # @param notifications_bus [Object] either our internal notifications bus or
10
+ # `ActiveSupport::Notifications`
11
+ # @param namespace [String, nil] namespace for events or nil if no namespace
12
+ def initialize(
13
+ notifications_bus = WaterDrop::Instrumentation::Notifications.new,
14
+ namespace = nil
15
+ )
16
+ super(notifications_bus, namespace)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -2,37 +2,36 @@
2
2
 
3
3
  module WaterDrop
4
4
  module Instrumentation
5
- # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
6
- # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
7
- # same time, which means that you might have for example file logging and NewRelic at the same
8
- # time
9
- # @note This class acts as a singleton because we are only permitted to have single monitor
10
- # per running process (just as logger)
11
- class Monitor < Dry::Monitor::Notifications
5
+ # Instrumented is used to hookup external monitoring services to monitor how WaterDrop works
6
+ class Notifications < ::Karafka::Core::Monitoring::Notifications
12
7
  # List of events that we support in the system and to which a monitor client can hook up
13
8
  # @note The non-error once support timestamp benchmarking
14
9
  EVENTS = %w[
15
10
  producer.closed
11
+
16
12
  message.produced_async
17
13
  message.produced_sync
14
+ message.acknowledged
15
+ message.buffered
16
+
18
17
  messages.produced_async
19
18
  messages.produced_sync
20
- message.buffered
21
19
  messages.buffered
22
- message.acknowledged
20
+
23
21
  buffer.flushed_async
24
- buffer.flushed_async.error
25
22
  buffer.flushed_sync
26
- buffer.flushed_sync.error
23
+
27
24
  statistics.emitted
25
+
26
+ error.occurred
28
27
  ].freeze
29
28
 
30
29
  private_constant :EVENTS
31
30
 
32
31
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
33
32
  def initialize
34
- super(:waterdrop)
35
- EVENTS.each(&method(:register_event))
33
+ super
34
+ EVENTS.each { |event| register_event(event) }
36
35
  end
37
36
  end
38
37
  end
@@ -0,0 +1 @@
1
+ {"title":"WaterDrop producer example dashboard","description":"This dashboard include example setup for monitoring activity of your WaterDrop producer","widgets":[{"id":243951318,"definition":{"title":"Messages produced","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"produced sync","formula":"query1"},{"alias":"produced async","formula":"query2"},{"alias":"flushed sync","formula":"query3"},{"alias":"flushed async","formula":"query4"},{"alias":"acknowledged","formula":"query5"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.produced_sync{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:waterdrop.produced_async{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:waterdrop.flushed_sync{*}.as_count()","data_source":"metrics","name":"query3"},{"query":"sum:waterdrop.flushed_async{*}.as_count()","data_source":"metrics","name":"query4"},{"query":"sum:waterdrop.acknowledged{*}.as_count()","data_source":"metrics","name":"query5"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":1979626566852990,"definition":{"title":"Messages buffer size","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.buffer.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":243951221,"definition":{"title":"Kafka broker API calls","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"API calls","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.calls{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951952,"definition":{"title":"Producer queue size","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Queue size average","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Queue size max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951263,"definition":{"title":"Producer queue latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951276,"definition":{"title":"Producer network latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.request_size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243954928,"definition":{"title":"Producer errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.error_occurred{*}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"auto","id":"rnr-kgh-dna"}
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # Listener that can be used to subscribe to WaterDrop producer to receive stats via StatsD
10
+ # and/or Datadog
11
+ #
12
+ # @note You need to setup the `dogstatsd-ruby` client and assign it
13
+ class Listener
14
+ include ::Karafka::Core::Configurable
15
+ extend Forwardable
16
+
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
18
+
19
+ # Value object for storing a single rdkafka metric publishing details
20
+ RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
+
22
+ # Namespace under which the DD metrics should be published
23
+ setting :namespace, default: 'waterdrop'
24
+
25
+ # Datadog client that we should use to publish the metrics
26
+ setting :client
27
+
28
+ # Default tags we want to publish (for example hostname)
29
+ # Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
30
+ setting :default_tags, default: []
31
+
32
+ # All the rdkafka metrics we want to publish
33
+ #
34
+ # By default we publish quite a lot so this can be tuned
35
+ # Note, that the once with `_d` come from WaterDrop, not rdkafka or Kafka
36
+ setting :rd_kafka_metrics, default: [
37
+ # Client metrics
38
+ RdKafkaMetric.new(:count, :root, 'calls', 'tx_d'),
39
+ RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt_d'),
40
+
41
+ # Broker metrics
42
+ RdKafkaMetric.new(:count, :brokers, 'deliver.attempts', 'txretries_d'),
43
+ RdKafkaMetric.new(:count, :brokers, 'deliver.errors', 'txerrs_d'),
44
+ RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.avg', %w[outbuf_latency avg]),
46
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p95', %w[outbuf_latency p95]),
47
+ RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p99', %w[outbuf_latency p99]),
48
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
50
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
51
+ ].freeze
52
+
53
+ configure
54
+
55
+ # @param block [Proc] configuration block
56
+ def initialize(&block)
57
+ configure
58
+ setup(&block) if block
59
+ end
60
+
61
+ # @param block [Proc] configuration block
62
+ # @note We define this alias to be consistent with `WaterDrop#setup`
63
+ def setup(&block)
64
+ configure(&block)
65
+ end
66
+
67
+ # Hooks up to WaterDrop instrumentation for emitted statistics
68
+ #
69
+ # @param event [WaterDrop::Monitor::Event]
70
+ def on_statistics_emitted(event)
71
+ statistics = event[:statistics]
72
+
73
+ rd_kafka_metrics.each do |metric|
74
+ report_metric(metric, statistics)
75
+ end
76
+ end
77
+
78
+ # Increases the errors count by 1
79
+ #
80
+ # @param _event [WaterDrop::Monitor::Event]
81
+ def on_error_occurred(_event)
82
+ count('error_occurred', 1, tags: default_tags)
83
+ end
84
+
85
+ # Increases acknowledged messages counter
86
+ # @param _event [WaterDrop::Monitor::Event]
87
+ def on_message_acknowledged(_event)
88
+ increment('acknowledged', tags: default_tags)
89
+ end
90
+
91
+ %i[
92
+ produced_sync
93
+ produced_async
94
+ ].each do |event_scope|
95
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
96
+ # @param event [WaterDrop::Monitor::Event]
97
+ def on_message_#{event_scope}(event)
98
+ report_message(event[:message][:topic], :#{event_scope})
99
+ end
100
+
101
+ # @param event [WaterDrop::Monitor::Event]
102
+ def on_messages_#{event_scope}(event)
103
+ event[:messages].each do |message|
104
+ report_message(message[:topic], :#{event_scope})
105
+ end
106
+ end
107
+ METHODS
108
+ end
109
+
110
+ # Reports the buffer usage when anything is added to the buffer
111
+ %i[
112
+ message_buffered
113
+ messages_buffered
114
+ ].each do |event_scope|
115
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
116
+ # @param event [WaterDrop::Monitor::Event]
117
+ def on_#{event_scope}(event)
118
+ histogram(
119
+ 'buffer.size',
120
+ event[:buffer].size,
121
+ tags: default_tags
122
+ )
123
+ end
124
+ METHODS
125
+ end
126
+
127
+ # Events that support many messages only
128
+ # Reports data flushing operation (production from the buffer)
129
+ %i[
130
+ flushed_sync
131
+ flushed_async
132
+ ].each do |event_scope|
133
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
134
+ # @param event [WaterDrop::Monitor::Event]
135
+ def on_buffer_#{event_scope}(event)
136
+ event[:messages].each do |message|
137
+ report_message(message[:topic], :#{event_scope})
138
+ end
139
+ end
140
+ METHODS
141
+ end
142
+
143
+ private
144
+
145
+ %i[
146
+ count
147
+ gauge
148
+ histogram
149
+ increment
150
+ decrement
151
+ ].each do |metric_type|
152
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
153
+ def #{metric_type}(key, *args)
154
+ client.#{metric_type}(
155
+ namespaced_metric(key),
156
+ *args
157
+ )
158
+ end
159
+ METHODS
160
+ end
161
+
162
+ # Report that a message has been produced to a topic.
163
+ # @param topic [String] Kafka topic
164
+ # @param method_name [Symbol] method from which this message operation comes
165
+ def report_message(topic, method_name)
166
+ increment(method_name, tags: default_tags + ["topic:#{topic}"])
167
+ end
168
+
169
+ # Wraps metric name in listener's namespace
170
+ # @param metric_name [String] RdKafkaMetric name
171
+ # @return [String]
172
+ def namespaced_metric(metric_name)
173
+ "#{namespace}.#{metric_name}"
174
+ end
175
+
176
+ # Reports a given metric statistics to Datadog
177
+ # @param metric [RdKafkaMetric] metric value object
178
+ # @param statistics [Hash] hash with all the statistics emitted
179
+ def report_metric(metric, statistics)
180
+ case metric.scope
181
+ when :root
182
+ public_send(
183
+ metric.type,
184
+ metric.name,
185
+ statistics.fetch(*metric.key_location),
186
+ tags: default_tags
187
+ )
188
+ when :brokers
189
+ statistics.fetch('brokers').each_value do |broker_statistics|
190
+ # Skip bootstrap nodes
191
+ # Bootstrap nodes have nodeid -1, other nodes have positive
192
+ # node ids
193
+ next if broker_statistics['nodeid'] == -1
194
+
195
+ public_send(
196
+ metric.type,
197
+ metric.name,
198
+ broker_statistics.dig(*metric.key_location),
199
+ tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
200
+ )
201
+ end
202
+ else
203
+ raise ArgumentError, metric.scope
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Namespace for all the things related with WaterDrop instrumentation process
5
+ module Instrumentation
6
+ class << self
7
+ # Builds a manager for statistics callbacks
8
+ # @return [WaterDrop::CallbacksManager]
9
+ def statistics_callbacks
10
+ @statistics_callbacks ||= CallbacksManager.new
11
+ end
12
+
13
+ # Builds a manager for error callbacks
14
+ # @return [WaterDrop::CallbacksManager]
15
+ def error_callbacks
16
+ @error_callbacks ||= CallbacksManager.new
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Patches
5
+ module Rdkafka
6
+ # Extends `Rdkafka::Bindings` with some extra methods and updates callbacks that we intend
7
+ # to work with in a bit different way than rdkafka itself
8
+ module Bindings
9
+ class << self
10
+ # Add extra methods that we need
11
+ # @param mod [::Rdkafka::Bindings] rdkafka bindings module
12
+ def included(mod)
13
+ mod.attach_function :rd_kafka_name, [:pointer], :string
14
+
15
+ # Default rdkafka setup for errors doest not propagate client details, thus it always
16
+ # publishes all the stuff for all rdkafka instances. We change that by providing
17
+ # function that fetches the instance name, allowing us to have better notifications
18
+ mod.send(:remove_const, :ErrorCallback)
19
+ mod.const_set(:ErrorCallback, build_error_callback)
20
+ end
21
+
22
+ # @return [FFI::Function] overwritten callback function
23
+ def build_error_callback
24
+ FFI::Function.new(
25
+ :void, %i[pointer int string pointer]
26
+ ) do |client_prr, err_code, reason, _opaque|
27
+ return nil unless ::Rdkafka::Config.error_callback
28
+
29
+ name = ::Rdkafka::Bindings.rd_kafka_name(client_prr)
30
+
31
+ error = ::Rdkafka::RdkafkaError.new(err_code, broker_message: reason)
32
+
33
+ ::Rdkafka::Config.error_callback.call(name, error)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ ::Rdkafka::Bindings.include(::WaterDrop::Patches::Rdkafka::Bindings)