waterdrop 2.6.7 → 2.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,26 @@ module WaterDrop
25
25
  true
26
26
  end
27
27
 
28
+ # Yields the code pretending it is in a transaction
29
+ # Supports our aborting transaction flow
30
+ def transaction
31
+ result = nil
32
+ commit = false
33
+
34
+ catch(:abort) do
35
+ result = yield
36
+ commit = true
37
+ end
38
+
39
+ commit || raise(WaterDrop::Errors::AbortTransaction)
40
+
41
+ result
42
+ rescue StandardError => e
43
+ return if e.is_a?(WaterDrop::Errors::AbortTransaction)
44
+
45
+ raise
46
+ end
47
+
28
48
  # @param _args [Object] anything really, this dummy is suppose to support anything
29
49
  # @return [self] returns self for chaining cases
30
50
  def method_missing(*_args)
@@ -11,7 +11,9 @@ module WaterDrop
11
11
  # @param producer [WaterDrop::Producer] producer instance with its config, etc
12
12
  # @note We overwrite this that way, because we do not care
13
13
  def new(producer)
14
- client = ::Rdkafka::Config.new(producer.config.kafka.to_h).producer
14
+ config = producer.config.kafka.to_h
15
+
16
+ client = ::Rdkafka::Config.new(config).producer
15
17
 
16
18
  # This callback is not global and is per client, thus we do not have to wrap it with a
17
19
  # callbacks manager to make it work
@@ -20,6 +22,9 @@ module WaterDrop
20
22
  producer.config.monitor
21
23
  )
22
24
 
25
+ # Switch to the transactional mode if user provided the transactional id
26
+ client.init_transactions if config.key?(:'transactional.id')
27
+
23
28
  client
24
29
  end
25
30
  end
@@ -64,6 +64,11 @@ module WaterDrop
64
64
  # option [Numeric] how many seconds should we wait with the backoff on queue having space for
65
65
  # more messages before re-raising the error.
66
66
  setting :wait_timeout_on_queue_full, default: 10
67
+
68
+ setting :wait_backoff_on_transaction_command, default: 0.5
69
+
70
+ setting :max_attempts_on_transaction_command, default: 5
71
+
67
72
  # option [Boolean] should we send messages. Setting this to false can be really useful when
68
73
  # testing and or developing because when set to false, won't actually ping Kafka but will
69
74
  # run all the validations, etc
@@ -26,7 +26,10 @@ module WaterDrop
26
26
  @max_payload_size = max_payload_size
27
27
  end
28
28
 
29
- required(:topic) { |val| val.is_a?(String) && TOPIC_REGEXP.match?(val) }
29
+ required(:topic) do |val|
30
+ (val.is_a?(String) || val.is_a?(Symbol)) && TOPIC_REGEXP.match?(val.to_s)
31
+ end
32
+
30
33
  required(:payload) { |val| val.nil? || val.is_a?(String) }
31
34
  optional(:key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
32
35
  optional(:partition) { |val| val.is_a?(Integer) && val >= -1 }
@@ -32,6 +32,9 @@ module WaterDrop
32
32
  # Raised when there is an inline error during single message produce operations
33
33
  ProduceError = Class.new(BaseError)
34
34
 
35
+ # Raise it within a transaction to abort it
36
+ AbortTransaction = Class.new(BaseError)
37
+
35
38
  # Raised when during messages producing something bad happened inline
36
39
  class ProduceManyError < ProduceError
37
40
  attr_reader :dispatched
@@ -17,10 +17,10 @@ module WaterDrop
17
17
  # Emits delivery details to the monitor
18
18
  # @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
19
19
  def call(delivery_report)
20
- if delivery_report.error.to_i.positive?
21
- instrument_error(delivery_report)
22
- else
20
+ if delivery_report.error.to_i.zero?
23
21
  instrument_acknowledged(delivery_report)
22
+ else
23
+ instrument_error(delivery_report)
24
24
  end
25
25
  end
26
26
 
@@ -36,6 +36,7 @@ module WaterDrop
36
36
  offset: delivery_report.offset,
37
37
  partition: delivery_report.partition,
38
38
  topic: delivery_report.topic_name,
39
+ delivery_report: delivery_report,
39
40
  type: 'librdkafka.dispatch_error'
40
41
  )
41
42
  end
@@ -47,7 +48,8 @@ module WaterDrop
47
48
  producer_id: @producer_id,
48
49
  offset: delivery_report.offset,
49
50
  partition: delivery_report.partition,
50
- topic: delivery_report.topic_name
51
+ topic: delivery_report.topic_name,
52
+ delivery_report: delivery_report
51
53
  )
52
54
  end
53
55
  end
@@ -112,9 +112,14 @@ module WaterDrop
112
112
  debug(event, messages)
113
113
  end
114
114
 
115
+ # @param event [Dry::Events::Event] event that happened with the details
116
+ def on_buffer_purged(event)
117
+ info(event, 'Successfully purging buffer')
118
+ end
119
+
115
120
  # @param event [Dry::Events::Event] event that happened with the details
116
121
  def on_producer_closed(event)
117
- info event, 'Closing producer'
122
+ info(event, 'Closing producer')
118
123
  end
119
124
 
120
125
  # @param event [Dry::Events::Event] event that happened with the error details
@@ -125,6 +130,21 @@ module WaterDrop
125
130
  error(event, "Error occurred: #{error} - #{type}")
126
131
  end
127
132
 
133
+ # @param event [Dry::Events::Event] event that happened with the details
134
+ def on_transaction_started(event)
135
+ info(event, 'Starting transaction')
136
+ end
137
+
138
+ # @param event [Dry::Events::Event] event that happened with the details
139
+ def on_transaction_aborted(event)
140
+ info(event, 'Aborting transaction')
141
+ end
142
+
143
+ # @param event [Dry::Events::Event] event that happened with the details
144
+ def on_transaction_committed(event)
145
+ info(event, 'Committing transaction')
146
+ end
147
+
128
148
  private
129
149
 
130
150
  # @return [Boolean] should we report the messages details in the debug mode.
@@ -18,8 +18,13 @@ module WaterDrop
18
18
  messages.produced_sync
19
19
  messages.buffered
20
20
 
21
+ transaction.started
22
+ transaction.committed
23
+ transaction.aborted
24
+
21
25
  buffer.flushed_async
22
26
  buffer.flushed_sync
27
+ buffer.purged
23
28
 
24
29
  statistics.emitted
25
30
 
@@ -60,8 +60,10 @@ module WaterDrop
60
60
  producer_id: id,
61
61
  messages: messages
62
62
  ) do
63
- messages.each do |message|
64
- dispatched << produce(message)
63
+ with_transaction_if_transactional do
64
+ messages.each do |message|
65
+ dispatched << produce(message)
66
+ end
65
67
  end
66
68
 
67
69
  dispatched
@@ -63,8 +63,10 @@ module WaterDrop
63
63
  dispatched = []
64
64
 
65
65
  @monitor.instrument('messages.produced_sync', producer_id: id, messages: messages) do
66
- messages.each do |message|
67
- dispatched << produce(message)
66
+ with_transaction_if_transactional do
67
+ messages.each do |message|
68
+ dispatched << produce(message)
69
+ end
68
70
  end
69
71
 
70
72
  dispatched.map! do |handler|
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Transactions related producer functionalities
6
+ module Transactions
7
+ # Creates a transaction.
8
+ #
9
+ # Karafka transactions work in a similar manner to SQL db transactions though there are some
10
+ # crucial differences. When you start a transaction, all messages produced during it will
11
+ # be delivered together or will fail together. The difference is, that messages from within
12
+ # a single transaction can be delivered and will have a delivery handle but will be then
13
+ # compacted prior to moving the LSO forward. This means, that not every delivery handle for
14
+ # async dispatches will emit a queue purge error. None for sync as the delivery has happened
15
+ # but they will never be visible by the transactional consumers.
16
+ #
17
+ # Transactions **are** thread-safe however they lock a mutex. This means, that for
18
+ # high-throughput transactional messages production in multiple threads
19
+ # (for example in Karafka), it may be much better to use few instances that can work in
20
+ # parallel.
21
+ #
22
+ # Please note, that if a producer is configured as transactional, it **cannot** produce
23
+ # messages outside of transactions, that is why by default all dispatches will be wrapped
24
+ # with a transaction. One transaction per single dispatch and for `produce_many` it will be
25
+ # a single transaction wrapping all messages dispatches (not one per message).
26
+ #
27
+ # @return Block result
28
+ #
29
+ # @example Simple transaction
30
+ # producer.transaction do
31
+ # producer.produce_async(topic: 'topic', payload: 'data')
32
+ # end
33
+ #
34
+ # @example Aborted transaction - messages producer won't be visible by consumers
35
+ # producer.transaction do
36
+ # producer.produce_sync(topic: 'topic', payload: 'data')
37
+ # throw(:abort)
38
+ # end
39
+ #
40
+ # @example Use block result last handler to wait on all messages ack
41
+ # handler = producer.transaction do
42
+ # producer.produce_async(topic: 'topic', payload: 'data')
43
+ # end
44
+ #
45
+ # handler.wait
46
+ def transaction
47
+ # This will safely allow us to support one operation transactions so a transactional
48
+ # producer can work without the transactional block if needed
49
+ return yield if @transaction_mutex.owned?
50
+
51
+ @transaction_mutex.synchronize do
52
+ transactional_instrument(:committed) do
53
+ with_transactional_error_handling(:begin) do
54
+ transactional_instrument(:started) { client.begin_transaction }
55
+ end
56
+
57
+ result = nil
58
+ commit = false
59
+
60
+ catch(:abort) do
61
+ result = yield
62
+ commit = true
63
+ end
64
+
65
+ commit || raise(WaterDrop::Errors::AbortTransaction)
66
+
67
+ with_transactional_error_handling(:commit) do
68
+ client.commit_transaction
69
+ end
70
+
71
+ result
72
+ rescue StandardError => e
73
+ with_transactional_error_handling(:abort) do
74
+ transactional_instrument(:aborted) { client.abort_transaction }
75
+ end
76
+
77
+ raise unless e.is_a?(WaterDrop::Errors::AbortTransaction)
78
+ end
79
+ end
80
+ end
81
+
82
+ # @return [Boolean] Is this producer a transactional one
83
+ def transactional?
84
+ return @transactional if instance_variable_defined?(:'@transactional')
85
+
86
+ @transactional = config.kafka.to_h.key?(:'transactional.id')
87
+ end
88
+
89
+ private
90
+
91
+ # Runs provided code with a transaction wrapper if transactions are enabled.
92
+ # This allows us to simplify the async and sync batch dispatchers because we can ensure that
93
+ # their internal dispatches will be wrapped only with a single transaction and not
94
+ # a transaction per message
95
+ # @param block [Proc] code we want to run
96
+ def with_transaction_if_transactional(&block)
97
+ transactional? ? transaction(&block) : yield
98
+ end
99
+
100
+ # Instruments the transactional operation with producer id
101
+ #
102
+ # @param key [Symbol] transaction operation key
103
+ # @param block [Proc] block to run inside the instrumentation or nothing if not given
104
+ def transactional_instrument(key, &block)
105
+ @monitor.instrument("transaction.#{key}", producer_id: id, &block)
106
+ end
107
+
108
+ # Error handling for transactional operations is a bit special. There are three types of
109
+ # errors coming from librdkafka:
110
+ # - retryable - indicates that a given operation (like offset commit) can be retried after
111
+ # a backoff and that is should be operating later as expected. We try to retry those
112
+ # few times before finally failing.
113
+ # - fatal - errors that will not recover no matter what (for example being fenced out)
114
+ # - abortable - error from which we cannot recover but for which we should abort the
115
+ # current transaction.
116
+ #
117
+ # The code below handles this logic also publishing the appropriate notifications via our
118
+ # notifications pipeline.
119
+ #
120
+ # @param action [Symbol] action type
121
+ # @param allow_abortable [Boolean] should we allow for the abortable flow. This is set to
122
+ # false internally to prevent attempts to abort from failed abort operations
123
+ def with_transactional_error_handling(action, allow_abortable: true)
124
+ attempt ||= 0
125
+ attempt += 1
126
+
127
+ yield
128
+ rescue ::Rdkafka::RdkafkaError => e
129
+ # Decide if there is a chance to retry given error
130
+ do_retry = e.retryable? && attempt < config.max_attempts_on_transaction_command
131
+
132
+ @monitor.instrument(
133
+ 'error.occurred',
134
+ producer_id: id,
135
+ caller: self,
136
+ error: e,
137
+ type: "transaction.#{action}",
138
+ retry: do_retry,
139
+ attempt: attempt
140
+ )
141
+
142
+ raise if e.fatal?
143
+
144
+ if do_retry
145
+ # Backoff more and more before retries
146
+ sleep(config.wait_backoff_on_transaction_command * attempt)
147
+
148
+ retry
149
+ end
150
+
151
+ if e.abortable? && allow_abortable
152
+ # Always attempt to abort but if aborting fails with an abortable error, do not attempt
153
+ # to abort from abort as this could create an infinite loop
154
+ with_transactional_error_handling(:abort, allow_abortable: false) do
155
+ transactional_instrument(:aborted) { @client.abort_transaction }
156
+ end
157
+
158
+ raise
159
+ end
160
+
161
+ raise
162
+ end
163
+ end
164
+ end
165
+ end
@@ -7,6 +7,7 @@ module WaterDrop
7
7
  include Sync
8
8
  include Async
9
9
  include Buffer
10
+ include Transactions
10
11
  include ::Karafka::Core::Helpers::Time
11
12
 
12
13
  # Which of the inline flow errors do we want to intercept and re-bind
@@ -38,6 +39,7 @@ module WaterDrop
38
39
  @buffer_mutex = Mutex.new
39
40
  @connecting_mutex = Mutex.new
40
41
  @operating_mutex = Mutex.new
42
+ @transaction_mutex = Mutex.new
41
43
 
42
44
  @status = Status.new
43
45
  @messages = Concurrent::Array.new
@@ -117,8 +119,25 @@ module WaterDrop
117
119
  @client
118
120
  end
119
121
 
122
+ # Purges data from both the buffer queue as well as the librdkafka queue.
123
+ #
124
+ # @note This is an operation that can cause data loss. Keep that in mind. It will not only
125
+ # purge the internal WaterDrop buffer but will also purge the librdkafka queue as well as
126
+ # will cancel any outgoing messages dispatches.
127
+ def purge
128
+ @monitor.instrument('buffer.purged', producer_id: id) do
129
+ @buffer_mutex.synchronize do
130
+ @messages = Concurrent::Array.new
131
+ end
132
+
133
+ @client.purge
134
+ end
135
+ end
136
+
120
137
  # Flushes the buffers in a sync way and closes the producer
121
- def close
138
+ # @param force [Boolean] should we force closing even with outstanding messages after the
139
+ # max wait timeout
140
+ def close(force: false)
122
141
  @operating_mutex.synchronize do
123
142
  return unless @status.active?
124
143
 
@@ -156,12 +175,19 @@ module WaterDrop
156
175
  # `max_wait_timeout` is in seconds at the moment
157
176
  @client.flush(@config.max_wait_timeout * 1_000) unless @client.closed?
158
177
  # We can safely ignore timeouts here because any left outstanding requests
159
- # will anyhow force wait on close
178
+ # will anyhow force wait on close if not forced.
179
+ # If forced, we will purge the queue and just close
160
180
  rescue ::Rdkafka::RdkafkaError, Rdkafka::AbstractHandle::WaitTimeoutError
161
181
  nil
182
+ ensure
183
+ # Purge fully the local queue in case of a forceful shutdown just to be sure, that
184
+ # there are no dangling messages. In case flush was successful, there should be
185
+ # none but we do it just in case it timed out
186
+ purge if force
162
187
  end
163
188
 
164
189
  @client.close
190
+
165
191
  @client = nil
166
192
  end
167
193
 
@@ -174,6 +200,11 @@ module WaterDrop
174
200
  end
175
201
  end
176
202
 
203
+ # Closes the producer with forced close after timeout, purging any outgoing data
204
+ def close!
205
+ close(force: true)
206
+ end
207
+
177
208
  private
178
209
 
179
210
  # Ensures that we don't run any operations when the producer is not configured or when it
@@ -223,7 +254,15 @@ module WaterDrop
223
254
  ensure_active!
224
255
  end
225
256
 
226
- client.produce(**message)
257
+ # In case someone defines topic as a symbol, we need to convert it into a string as
258
+ # librdkafka does not accept symbols
259
+ message = message.merge(topic: message[:topic].to_s) if message[:topic].is_a?(Symbol)
260
+
261
+ if transactional?
262
+ transaction { client.produce(**message) }
263
+ else
264
+ client.produce(**message)
265
+ end
227
266
  rescue SUPPORTED_FLOW_ERRORS.first => e
228
267
  # Unless we want to wait and retry and it's a full queue, we raise normally
229
268
  raise unless @config.wait_on_queue_full
@@ -3,5 +3,5 @@
3
3
  # WaterDrop library
4
4
  module WaterDrop
5
5
  # Current WaterDrop version
6
- VERSION = '2.6.7'
6
+ VERSION = '2.6.8'
7
7
  end
data/waterdrop.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
  spec.description = spec.summary
17
17
  spec.license = 'MIT'
18
18
 
19
- spec.add_dependency 'karafka-core', '>= 2.1.1', '< 3.0.0'
19
+ spec.add_dependency 'karafka-core', '>= 2.2.3', '< 3.0.0'
20
20
  spec.add_dependency 'zeitwerk', '~> 2.3'
21
21
 
22
22
  if $PROGRAM_NAME.end_with?('gem')
@@ -31,10 +31,10 @@ Gem::Specification.new do |spec|
31
31
  spec.metadata = {
32
32
  'funding_uri' => 'https://karafka.io/#become-pro',
33
33
  'homepage_uri' => 'https://karafka.io',
34
- 'changelog_uri' => 'https://github.com/karafka/waterdrop/blob/master/CHANGELOG.md',
34
+ 'changelog_uri' => 'https://karafka.io/docs/Changelog-WaterDrop',
35
35
  'bug_tracker_uri' => 'https://github.com/karafka/waterdrop/issues',
36
36
  'source_code_uri' => 'https://github.com/karafka/waterdrop',
37
- 'documentation_uri' => 'https://github.com/karafka/waterdrop#readme',
37
+ 'documentation_uri' => 'https://karafka.io/docs/#waterdrop',
38
38
  'rubygems_mfa_required' => 'true'
39
39
  }
40
40
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: waterdrop
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.7
4
+ version: 2.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-09-01 00:00:00.000000000 Z
38
+ date: 2023-10-20 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -43,7 +43,7 @@ dependencies:
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.1.1
46
+ version: 2.2.3
47
47
  - - "<"
48
48
  - !ruby/object:Gem::Version
49
49
  version: 3.0.0
@@ -53,7 +53,7 @@ dependencies:
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: 2.1.1
56
+ version: 2.2.3
57
57
  - - "<"
58
58
  - !ruby/object:Gem::Version
59
59
  version: 3.0.0
@@ -118,6 +118,7 @@ files:
118
118
  - lib/waterdrop/producer/builder.rb
119
119
  - lib/waterdrop/producer/status.rb
120
120
  - lib/waterdrop/producer/sync.rb
121
+ - lib/waterdrop/producer/transactions.rb
121
122
  - lib/waterdrop/version.rb
122
123
  - log/.gitkeep
123
124
  - renovate.json
@@ -128,10 +129,10 @@ licenses:
128
129
  metadata:
129
130
  funding_uri: https://karafka.io/#become-pro
130
131
  homepage_uri: https://karafka.io
131
- changelog_uri: https://github.com/karafka/waterdrop/blob/master/CHANGELOG.md
132
+ changelog_uri: https://karafka.io/docs/Changelog-WaterDrop
132
133
  bug_tracker_uri: https://github.com/karafka/waterdrop/issues
133
134
  source_code_uri: https://github.com/karafka/waterdrop
134
- documentation_uri: https://github.com/karafka/waterdrop#readme
135
+ documentation_uri: https://karafka.io/docs/#waterdrop
135
136
  rubygems_mfa_required: 'true'
136
137
  post_install_message:
137
138
  rdoc_options: []
metadata.gz.sig CHANGED
Binary file