waterdrop 2.8.14 → 2.8.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +215 -36
  3. data/.github/workflows/push.yml +3 -3
  4. data/.github/workflows/trigger-wiki-refresh.yml +1 -1
  5. data/.github/workflows/verify-action-pins.yml +1 -1
  6. data/.gitignore +0 -1
  7. data/.rubocop.yml +87 -0
  8. data/.ruby-version +1 -1
  9. data/.yard-lint.yml +172 -72
  10. data/CHANGELOG.md +13 -0
  11. data/Gemfile +8 -9
  12. data/Gemfile.lint +14 -0
  13. data/Gemfile.lint.lock +123 -0
  14. data/Gemfile.lock +27 -28
  15. data/README.md +1 -1
  16. data/Rakefile +2 -2
  17. data/bin/integrations +28 -29
  18. data/bin/verify_topics_naming +8 -8
  19. data/config/locales/errors.yml +12 -0
  20. data/docker-compose.oauth.yml +56 -0
  21. data/docker-compose.yml +1 -1
  22. data/lib/waterdrop/clients/dummy.rb +9 -0
  23. data/lib/waterdrop/clients/rdkafka.rb +13 -2
  24. data/lib/waterdrop/config.rb +32 -5
  25. data/lib/waterdrop/connection_pool.rb +13 -11
  26. data/lib/waterdrop/contracts/config.rb +30 -6
  27. data/lib/waterdrop/contracts/message.rb +2 -2
  28. data/lib/waterdrop/contracts/poller_config.rb +26 -0
  29. data/lib/waterdrop/contracts/transactional_offset.rb +2 -2
  30. data/lib/waterdrop/contracts/variant.rb +18 -18
  31. data/lib/waterdrop/errors.rb +3 -0
  32. data/lib/waterdrop/instrumentation/callbacks/delivery.rb +8 -8
  33. data/lib/waterdrop/instrumentation/callbacks/error.rb +5 -5
  34. data/lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb +4 -4
  35. data/lib/waterdrop/instrumentation/callbacks/statistics.rb +18 -5
  36. data/lib/waterdrop/instrumentation/idle_disconnector_listener.rb +4 -4
  37. data/lib/waterdrop/instrumentation/logger_listener.rb +10 -10
  38. data/lib/waterdrop/instrumentation/notifications.rb +3 -0
  39. data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +19 -19
  40. data/lib/waterdrop/polling/config.rb +52 -0
  41. data/lib/waterdrop/polling/latch.rb +49 -0
  42. data/lib/waterdrop/polling/poller.rb +415 -0
  43. data/lib/waterdrop/polling/queue_pipe.rb +63 -0
  44. data/lib/waterdrop/polling/state.rb +151 -0
  45. data/lib/waterdrop/polling.rb +22 -0
  46. data/lib/waterdrop/producer/async.rb +6 -6
  47. data/lib/waterdrop/producer/buffer.rb +8 -8
  48. data/lib/waterdrop/producer/idempotence.rb +3 -3
  49. data/lib/waterdrop/producer/sync.rb +15 -8
  50. data/lib/waterdrop/producer/testing.rb +1 -1
  51. data/lib/waterdrop/producer/transactions.rb +6 -6
  52. data/lib/waterdrop/producer.rb +113 -30
  53. data/lib/waterdrop/version.rb +1 -1
  54. data/lib/waterdrop.rb +15 -10
  55. data/package-lock.json +331 -0
  56. data/package.json +9 -0
  57. data/renovate.json +25 -6
  58. data/waterdrop.gemspec +23 -23
  59. metadata +17 -5
  60. data/.coditsu/ci.yml +0 -3
@@ -56,6 +56,9 @@ module WaterDrop
56
56
  # Do not use `break`, `return` or `throw` inside of the transaction blocks
57
57
  EarlyTransactionExitNotAllowedError = Class.new(BaseError)
58
58
 
59
+ # Raised when an error occurs in the polling loop
60
+ PollerError = Class.new(BaseError)
61
+
59
62
  # Raised when during messages producing something bad happened inline
60
63
  class ProduceManyError < ProduceError
61
64
  attr_reader :dispatched
@@ -47,13 +47,13 @@ module WaterDrop
47
47
  # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
48
48
  # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
49
49
  # the rdkafka background thread
50
- rescue StandardError => e
50
+ rescue => e
51
51
  @monitor.instrument(
52
- 'error.occurred',
52
+ "error.occurred",
53
53
  caller: self,
54
54
  error: e,
55
55
  producer_id: @producer_id,
56
- type: 'callbacks.delivery.error'
56
+ type: "callbacks.delivery.error"
57
57
  )
58
58
  end
59
59
 
@@ -62,7 +62,7 @@ module WaterDrop
62
62
  # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
63
63
  def instrument_acknowledged(delivery_report)
64
64
  @monitor.instrument(
65
- 'message.acknowledged',
65
+ "message.acknowledged",
66
66
  caller: self,
67
67
  producer_id: @producer_id,
68
68
  offset: delivery_report.offset,
@@ -76,7 +76,7 @@ module WaterDrop
76
76
  # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
77
77
  def instrument_purged(delivery_report)
78
78
  @monitor.instrument(
79
- 'message.purged',
79
+ "message.purged",
80
80
  caller: self,
81
81
  error: build_error(delivery_report),
82
82
  producer_id: @producer_id,
@@ -85,14 +85,14 @@ module WaterDrop
85
85
  topic: delivery_report.topic_name,
86
86
  delivery_report: delivery_report,
87
87
  label: delivery_report.label,
88
- type: 'librdkafka.dispatch_error'
88
+ type: "librdkafka.dispatch_error"
89
89
  )
90
90
  end
91
91
 
92
92
  # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
93
93
  def instrument_error(delivery_report)
94
94
  @monitor.instrument(
95
- 'error.occurred',
95
+ "error.occurred",
96
96
  caller: self,
97
97
  error: build_error(delivery_report),
98
98
  producer_id: @producer_id,
@@ -101,7 +101,7 @@ module WaterDrop
101
101
  topic: delivery_report.topic_name,
102
102
  delivery_report: delivery_report,
103
103
  label: delivery_report.label,
104
- type: 'librdkafka.dispatch_error'
104
+ type: "librdkafka.dispatch_error"
105
105
  )
106
106
  end
107
107
 
@@ -26,22 +26,22 @@ module WaterDrop
26
26
  return unless @client_name == client_name
27
27
 
28
28
  @monitor.instrument(
29
- 'error.occurred',
29
+ "error.occurred",
30
30
  caller: self,
31
31
  error: error,
32
32
  producer_id: @producer_id,
33
- type: 'librdkafka.error'
33
+ type: "librdkafka.error"
34
34
  )
35
35
  # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
36
36
  # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
37
37
  # the rdkafka background thread
38
- rescue StandardError => e
38
+ rescue => e
39
39
  @monitor.instrument(
40
- 'error.occurred',
40
+ "error.occurred",
41
41
  caller: self,
42
42
  error: e,
43
43
  producer_id: @producer_id,
44
- type: 'callbacks.error.error'
44
+ type: "callbacks.error.error"
45
45
  )
46
46
  end
47
47
  end
@@ -27,20 +27,20 @@ module WaterDrop
27
27
  return unless @bearer.name == bearer_name
28
28
 
29
29
  @monitor.instrument(
30
- 'oauthbearer.token_refresh',
30
+ "oauthbearer.token_refresh",
31
31
  bearer: @bearer,
32
32
  caller: self
33
33
  )
34
34
  # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
35
35
  # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
36
36
  # the rdkafka background thread
37
- rescue StandardError => e
37
+ rescue => e
38
38
  @monitor.instrument(
39
- 'error.occurred',
39
+ "error.occurred",
40
40
  caller: self,
41
41
  error: e,
42
42
  producer_id: @producer_id,
43
- type: 'callbacks.oauthbearer_token_refresh.error'
43
+ type: "callbacks.oauthbearer_token_refresh.error"
44
44
  )
45
45
  end
46
46
  end
@@ -27,25 +27,38 @@ module WaterDrop
27
27
  # rdkafka does not have per-instance statistics hook, thus we need to make sure that we
28
28
  # emit only stats that are related to current producer. Otherwise we would emit all of
29
29
  # all the time.
30
- return unless @client_name == statistics['name']
30
+ return unless @client_name == statistics["name"]
31
+
32
+ # Skip if no one is listening. We check on each emission to support late subscribers
33
+ # since statistics are emitted every 5 seconds, this check is cheap enough
34
+ return unless listening?
31
35
 
32
36
  @monitor.instrument(
33
- 'statistics.emitted',
37
+ "statistics.emitted",
34
38
  producer_id: @producer_id,
35
39
  statistics: @statistics_decorator.call(statistics)
36
40
  )
37
41
  # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
38
42
  # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
39
43
  # the rdkafka background thread
40
- rescue StandardError => e
44
+ rescue => e
41
45
  @monitor.instrument(
42
- 'error.occurred',
46
+ "error.occurred",
43
47
  caller: self,
44
48
  error: e,
45
49
  producer_id: @producer_id,
46
- type: 'callbacks.statistics.error'
50
+ type: "callbacks.statistics.error"
47
51
  )
48
52
  end
53
+
54
+ private
55
+
56
+ # Check if anyone is listening to statistics events
57
+ # @return [Boolean] true if there are listeners
58
+ def listening?
59
+ listeners = @monitor.listeners["statistics.emitted"]
60
+ listeners && !listeners.empty?
61
+ end
49
62
  end
50
63
  end
51
64
  end
@@ -63,7 +63,7 @@ module WaterDrop
63
63
  # Handles statistics.emitted events to monitor message transmission activity
64
64
  # @param statistics [Hash] producer librdkafka statistics
65
65
  def call(statistics)
66
- current_txmsgs = statistics.fetch('txmsgs', 0)
66
+ current_txmsgs = statistics.fetch("txmsgs", 0)
67
67
  current_time = monotonic_now
68
68
 
69
69
  # Update activity if messages changed
@@ -86,12 +86,12 @@ module WaterDrop
86
86
  # thread creation
87
87
  Thread.new do
88
88
  @producer.disconnect
89
- rescue StandardError => e
89
+ rescue => e
90
90
  @producer.monitor.instrument(
91
- 'error.occurred',
91
+ "error.occurred",
92
92
  producer_id: @producer.id,
93
93
  error: e,
94
- type: 'producer.disconnect.error'
94
+ type: "producer.disconnect.error"
95
95
  )
96
96
  end
97
97
  end
@@ -117,36 +117,36 @@ module WaterDrop
117
117
 
118
118
  # @param event [Dry::Events::Event] event that happened with the details
119
119
  def on_buffer_purged(event)
120
- info(event, 'Successfully purging buffer')
120
+ info(event, "Successfully purging buffer")
121
121
  end
122
122
 
123
123
  # @param event [Dry::Events::Event] event that happened with the details
124
124
  def on_producer_closing(event)
125
- info(event, 'Closing producer')
125
+ info(event, "Closing producer")
126
126
  end
127
127
 
128
128
  # @param event [Dry::Events::Event] event that happened with the details
129
129
  # @note While this says "Closing producer", it produces a nice message with time taken:
130
130
  # "Closing producer took 12 ms" indicating it happened in the past.
131
131
  def on_producer_closed(event)
132
- info(event, 'Closing producer')
132
+ info(event, "Closing producer")
133
133
  end
134
134
 
135
135
  # @param event [Dry::Events::Event] event that happened with the details
136
136
  def on_producer_disconnecting(event)
137
- info(event, 'Disconnecting producer')
137
+ info(event, "Disconnecting producer")
138
138
  end
139
139
 
140
140
  # @param event [Dry::Events::Event] event that happened with the details
141
141
  # @note While this says "Disconnecting producer", it produces a nice message with time taken:
142
142
  # "Disconnecting producer took 5 ms" indicating it happened in the past.
143
143
  def on_producer_disconnected(event)
144
- info(event, 'Disconnected producer')
144
+ info(event, "Disconnected producer")
145
145
  end
146
146
 
147
147
  # @param event [Dry::Events::Event] event that happened with the details
148
148
  def on_producer_reloaded(event)
149
- info(event, 'Producer successfully reloaded')
149
+ info(event, "Producer successfully reloaded")
150
150
  end
151
151
 
152
152
  # @param event [Dry::Events::Event] event that happened with the error details
@@ -159,17 +159,17 @@ module WaterDrop
159
159
 
160
160
  # @param event [Dry::Events::Event] event that happened with the details
161
161
  def on_transaction_started(event)
162
- info(event, 'Starting transaction')
162
+ info(event, "Starting transaction")
163
163
  end
164
164
 
165
165
  # @param event [Dry::Events::Event] event that happened with the details
166
166
  def on_transaction_aborted(event)
167
- info(event, 'Aborting transaction')
167
+ info(event, "Aborting transaction")
168
168
  end
169
169
 
170
170
  # @param event [Dry::Events::Event] event that happened with the details
171
171
  def on_transaction_committed(event)
172
- info(event, 'Committing transaction')
172
+ info(event, "Committing transaction")
173
173
  end
174
174
 
175
175
  # @param event [Dry::Events::Event] event that happened with the details
@@ -188,7 +188,7 @@ module WaterDrop
188
188
 
189
189
  # @param event [Dry::Events::Event] event that happened with the details
190
190
  def on_transaction_finished(event)
191
- info(event, 'Processing transaction')
191
+ info(event, "Processing transaction")
192
192
  end
193
193
 
194
194
  private
@@ -37,6 +37,9 @@ module WaterDrop
37
37
  buffer.flushed_sync
38
38
  buffer.purged
39
39
 
40
+ poller.producer_registered
41
+ poller.producer_unregistered
42
+
40
43
  statistics.emitted
41
44
 
42
45
  error.occurred
@@ -20,7 +20,7 @@ module WaterDrop
20
20
  RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
21
 
22
22
  # Namespace under which the DD metrics should be published
23
- setting :namespace, default: 'waterdrop'
23
+ setting :namespace, default: "waterdrop"
24
24
 
25
25
  # Datadog client that we should use to publish the metrics
26
26
  setting :client
@@ -35,19 +35,19 @@ module WaterDrop
35
35
  # Note, that the once with `_d` come from WaterDrop, not rdkafka or Kafka
36
36
  setting :rd_kafka_metrics, default: [
37
37
  # Client metrics
38
- RdKafkaMetric.new(:count, :root, 'calls', 'tx_d'),
39
- RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt'),
38
+ RdKafkaMetric.new(:count, :root, "calls", "tx_d"),
39
+ RdKafkaMetric.new(:histogram, :root, "queue.size", "msg_cnt"),
40
40
 
41
41
  # Broker metrics
42
- RdKafkaMetric.new(:count, :brokers, 'deliver.attempts', 'txretries_d'),
43
- RdKafkaMetric.new(:count, :brokers, 'deliver.errors', 'txerrs_d'),
44
- RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
- RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.avg', %w[outbuf_latency avg]),
46
- RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p95', %w[outbuf_latency p95]),
47
- RdKafkaMetric.new(:gauge, :brokers, 'queue.latency.p99', %w[outbuf_latency p99]),
48
- RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
49
- RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
50
- RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
42
+ RdKafkaMetric.new(:count, :brokers, "deliver.attempts", "txretries_d"),
43
+ RdKafkaMetric.new(:count, :brokers, "deliver.errors", "txerrs_d"),
44
+ RdKafkaMetric.new(:count, :brokers, "receive.errors", "rxerrs_d"),
45
+ RdKafkaMetric.new(:gauge, :brokers, "queue.latency.avg", %w[outbuf_latency avg]),
46
+ RdKafkaMetric.new(:gauge, :brokers, "queue.latency.p95", %w[outbuf_latency p95]),
47
+ RdKafkaMetric.new(:gauge, :brokers, "queue.latency.p99", %w[outbuf_latency p99]),
48
+ RdKafkaMetric.new(:gauge, :brokers, "network.latency.avg", %w[rtt avg]),
49
+ RdKafkaMetric.new(:gauge, :brokers, "network.latency.p95", %w[rtt p95]),
50
+ RdKafkaMetric.new(:gauge, :brokers, "network.latency.p99", %w[rtt p99])
51
51
  ].freeze
52
52
 
53
53
  configure
@@ -78,13 +78,13 @@ module WaterDrop
78
78
  #
79
79
  # @param _event [Karafka::Core::Monitoring::Event]
80
80
  def on_error_occurred(_event)
81
- count('error_occurred', 1, tags: default_tags)
81
+ count("error_occurred", 1, tags: default_tags)
82
82
  end
83
83
 
84
84
  # Increases acknowledged messages counter
85
85
  # @param _event [Karafka::Core::Monitoring::Event]
86
86
  def on_message_acknowledged(_event)
87
- increment('acknowledged', tags: default_tags)
87
+ increment("acknowledged", tags: default_tags)
88
88
  end
89
89
 
90
90
  %i[
@@ -216,26 +216,26 @@ module WaterDrop
216
216
  tags: default_tags
217
217
  )
218
218
  when :brokers
219
- statistics.fetch('brokers').each_value do |broker_statistics|
219
+ statistics.fetch("brokers").each_value do |broker_statistics|
220
220
  # Skip bootstrap nodes
221
221
  # Bootstrap nodes have nodeid -1, other nodes have positive
222
222
  # node ids
223
- next if broker_statistics['nodeid'] == -1
223
+ next if broker_statistics["nodeid"] == -1
224
224
 
225
225
  public_send(
226
226
  metric.type,
227
227
  metric.name,
228
228
  broker_statistics.dig(*metric.key_location),
229
- tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
229
+ tags: default_tags + ["broker:#{broker_statistics["nodename"]}"]
230
230
  )
231
231
  end
232
232
  when :topics
233
- statistics.fetch('topics').each_value do |topic_statistics|
233
+ statistics.fetch("topics").each_value do |topic_statistics|
234
234
  public_send(
235
235
  metric.type,
236
236
  metric.name,
237
237
  topic_statistics.dig(*metric.key_location),
238
- tags: default_tags + ["topic:#{topic_statistics['topic']}"]
238
+ tags: default_tags + ["topic:#{topic_statistics["topic"]}"]
239
239
  )
240
240
  end
241
241
  else
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # WaterDrop main module
4
+ module WaterDrop
5
+ # Namespace for FD-based polling components
6
+ module Polling
7
+ # Configuration for the global FD poller singleton
8
+ # These settings apply to all producers using FD polling mode
9
+ #
10
+ # @example Configure before creating any producers
11
+ # WaterDrop::Polling::Config.setup do |config|
12
+ # config.thread_priority = -1
13
+ # config.poll_timeout = 500
14
+ # end
15
+ class Config
16
+ extend ::Karafka::Core::Configurable
17
+
18
+ # Ruby thread priority for the poller thread
19
+ # Valid range: -3 to 3 (Ruby's thread priority range)
20
+ # Higher values = higher priority
21
+ setting :thread_priority, default: 0
22
+
23
+ # IO.select timeout in milliseconds
24
+ # Controls how often periodic polling happens when no FD events occur
25
+ # Lower values = more responsive OAuth/stats callbacks but higher CPU
26
+ setting :poll_timeout, default: 1_000
27
+
28
+ # Initial backoff delay in milliseconds after a polling error
29
+ setting :backoff_min, default: 100
30
+
31
+ # Maximum backoff delay in milliseconds after repeated errors
32
+ # Backoff doubles on each consecutive error up to this limit
33
+ setting :backoff_max, default: 30_000
34
+
35
+ class << self
36
+ # Configures the poller settings
37
+ # @yield [config] Configuration block
38
+ # @yieldparam config [Karafka::Core::Configurable::Node] config node
39
+ def setup
40
+ configure do |config|
41
+ yield(config)
42
+ end
43
+
44
+ Contracts::PollerConfig.new.validate!(
45
+ self.config.to_h,
46
+ Errors::ConfigurationInvalidError
47
+ )
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Polling
5
+ # A thread-safe latch for synchronizing producer close operations
6
+ #
7
+ # When a producer is closed, two threads are involved:
8
+ # 1. The caller thread (user code calling producer.close)
9
+ # 2. The poller thread (background thread running IO.select)
10
+ #
11
+ # The close sequence:
12
+ # 1. Caller calls producer.close -> unregister_from_poller -> Poller#unregister
13
+ # 2. Poller#unregister signals via control pipe and calls state.wait_for_close (blocks on latch)
14
+ # 3. Poller thread receives control signal, drains queue, calls state.close
15
+ # 4. state.close releases the latch via release!
16
+ # 5. Caller's wait_for_close returns, unregister completes
17
+ #
18
+ # This ensures the producer is fully drained and removed from the poller
19
+ # before returning control to the caller, preventing race conditions.
20
+ class Latch
21
+ def initialize
22
+ @mutex = Mutex.new
23
+ @cv = ConditionVariable.new
24
+ @released = false
25
+ end
26
+
27
+ # Releases the latch and wakes any waiting threads
28
+ def release!
29
+ @mutex.synchronize do
30
+ @released = true
31
+ @cv.broadcast
32
+ end
33
+ end
34
+
35
+ # Waits until the latch is released
36
+ # Returns immediately if already released
37
+ def wait
38
+ @mutex.synchronize do
39
+ @cv.wait(@mutex) until @released
40
+ end
41
+ end
42
+
43
+ # @return [Boolean] whether the latch has been released
44
+ def released?
45
+ @released
46
+ end
47
+ end
48
+ end
49
+ end