waterdrop 2.0.7 → 2.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +39 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +212 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +45 -75
- data/README.md +22 -275
- data/certs/cert_chain.pem +26 -0
- data/config/locales/errors.yml +39 -0
- data/docker-compose.yml +21 -12
- data/lib/waterdrop/clients/buffered.rb +95 -0
- data/lib/waterdrop/clients/dummy.rb +69 -0
- data/lib/waterdrop/clients/rdkafka.rb +34 -0
- data/lib/{water_drop → waterdrop}/config.rb +39 -16
- data/lib/waterdrop/contracts/config.rb +43 -0
- data/lib/waterdrop/contracts/message.rb +64 -0
- data/lib/waterdrop/contracts/transactional_offset.rb +21 -0
- data/lib/{water_drop → waterdrop}/errors.rb +23 -7
- data/lib/waterdrop/helpers/counter.rb +27 -0
- data/lib/waterdrop/instrumentation/callbacks/delivery.rb +106 -0
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/error.rb +6 -2
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/statistics.rb +1 -1
- data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +91 -21
- data/lib/waterdrop/instrumentation/monitor.rb +20 -0
- data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +15 -14
- data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +210 -0
- data/lib/waterdrop/middleware.rb +50 -0
- data/lib/{water_drop → waterdrop}/producer/async.rb +40 -4
- data/lib/{water_drop → waterdrop}/producer/buffer.rb +13 -31
- data/lib/{water_drop → waterdrop}/producer/builder.rb +6 -11
- data/lib/{water_drop → waterdrop}/producer/sync.rb +44 -15
- data/lib/waterdrop/producer/transactions.rb +219 -0
- data/lib/waterdrop/producer.rb +324 -0
- data/lib/{water_drop → waterdrop}/version.rb +1 -1
- data/lib/waterdrop.rb +27 -2
- data/renovate.json +6 -0
- data/waterdrop.gemspec +14 -11
- data.tar.gz.sig +0 -0
- metadata +73 -111
- metadata.gz.sig +0 -0
- data/certs/mensfeld.pem +0 -25
- data/config/errors.yml +0 -6
- data/lib/water_drop/contracts/config.rb +0 -26
- data/lib/water_drop/contracts/message.rb +0 -42
- data/lib/water_drop/instrumentation/callbacks/delivery.rb +0 -30
- data/lib/water_drop/instrumentation/callbacks/statistics_decorator.rb +0 -77
- data/lib/water_drop/instrumentation/callbacks_manager.rb +0 -39
- data/lib/water_drop/instrumentation.rb +0 -20
- data/lib/water_drop/patches/rdkafka/bindings.rb +0 -42
- data/lib/water_drop/patches/rdkafka/producer.rb +0 -20
- data/lib/water_drop/producer/dummy_client.rb +0 -32
- data/lib/water_drop/producer.rb +0 -162
- data/lib/water_drop.rb +0 -36
- /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
- /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Transactions related producer functionalities
|
6
|
+
module Transactions
|
7
|
+
# Contract to validate that input for transactional offset storage is correct
|
8
|
+
CONTRACT = Contracts::TransactionalOffset.new
|
9
|
+
|
10
|
+
private_constant :CONTRACT
|
11
|
+
|
12
|
+
# Creates a transaction.
|
13
|
+
#
|
14
|
+
# Karafka transactions work in a similar manner to SQL db transactions though there are some
|
15
|
+
# crucial differences. When you start a transaction, all messages produced during it will
|
16
|
+
# be delivered together or will fail together. The difference is, that messages from within
|
17
|
+
# a single transaction can be delivered and will have a delivery handle but will be then
|
18
|
+
# compacted prior to moving the LSO forward. This means, that not every delivery handle for
|
19
|
+
# async dispatches will emit a queue purge error. None for sync as the delivery has happened
|
20
|
+
# but they will never be visible by the transactional consumers.
|
21
|
+
#
|
22
|
+
# Transactions **are** thread-safe however they lock a mutex. This means, that for
|
23
|
+
# high-throughput transactional messages production in multiple threads
|
24
|
+
# (for example in Karafka), it may be much better to use few instances that can work in
|
25
|
+
# parallel.
|
26
|
+
#
|
27
|
+
# Please note, that if a producer is configured as transactional, it **cannot** produce
|
28
|
+
# messages outside of transactions, that is why by default all dispatches will be wrapped
|
29
|
+
# with a transaction. One transaction per single dispatch and for `produce_many` it will be
|
30
|
+
# a single transaction wrapping all messages dispatches (not one per message).
|
31
|
+
#
|
32
|
+
# @return Block result
|
33
|
+
#
|
34
|
+
# @example Simple transaction
|
35
|
+
# producer.transaction do
|
36
|
+
# producer.produce_async(topic: 'topic', payload: 'data')
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
# @example Aborted transaction - messages producer won't be visible by consumers
|
40
|
+
# producer.transaction do
|
41
|
+
# producer.produce_sync(topic: 'topic', payload: 'data')
|
42
|
+
# throw(:abort)
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# @example Use block result last handler to wait on all messages ack
|
46
|
+
# handler = producer.transaction do
|
47
|
+
# producer.produce_async(topic: 'topic', payload: 'data')
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# handler.wait
|
51
|
+
def transaction
|
52
|
+
# This will safely allow us to support one operation transactions so a transactional
|
53
|
+
# producer can work without the transactional block if needed
|
54
|
+
return yield if @transaction_mutex.owned?
|
55
|
+
|
56
|
+
@transaction_mutex.synchronize do
|
57
|
+
transactional_instrument(:finished) do
|
58
|
+
with_transactional_error_handling(:begin) do
|
59
|
+
transactional_instrument(:started) { client.begin_transaction }
|
60
|
+
end
|
61
|
+
|
62
|
+
result = nil
|
63
|
+
commit = false
|
64
|
+
|
65
|
+
catch(:abort) do
|
66
|
+
result = yield
|
67
|
+
commit = true
|
68
|
+
end
|
69
|
+
|
70
|
+
commit || raise(WaterDrop::Errors::AbortTransaction)
|
71
|
+
|
72
|
+
with_transactional_error_handling(:commit) do
|
73
|
+
transactional_instrument(:committed) { client.commit_transaction }
|
74
|
+
end
|
75
|
+
|
76
|
+
result
|
77
|
+
# We need to handle any interrupt including critical in order not to have the transaction
|
78
|
+
# running. This will also handle things like `IRB::Abort`
|
79
|
+
#
|
80
|
+
# rubocop:disable Lint/RescueException
|
81
|
+
rescue Exception => e
|
82
|
+
# rubocop:enable Lint/RescueException
|
83
|
+
with_transactional_error_handling(:abort) do
|
84
|
+
transactional_instrument(:aborted) { client.abort_transaction }
|
85
|
+
end
|
86
|
+
|
87
|
+
raise unless e.is_a?(WaterDrop::Errors::AbortTransaction)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @return [Boolean] Is this producer a transactional one
|
93
|
+
def transactional?
|
94
|
+
return @transactional if instance_variable_defined?(:'@transactional')
|
95
|
+
|
96
|
+
@transactional = config.kafka.to_h.key?(:'transactional.id')
|
97
|
+
end
|
98
|
+
|
99
|
+
# Marks given message as consumed inside of a transaction.
|
100
|
+
#
|
101
|
+
# @param consumer [#consumer_group_metadata_pointer] any consumer from which we can obtain
|
102
|
+
# the librdkafka consumer group metadata pointer
|
103
|
+
# @param message [Karafka::Messages::Message] karafka message
|
104
|
+
# @param offset_metadata [String] offset metadata or nil if none
|
105
|
+
def transaction_mark_as_consumed(consumer, message, offset_metadata = nil)
|
106
|
+
raise Errors::TransactionRequiredError unless @transaction_mutex.owned?
|
107
|
+
|
108
|
+
CONTRACT.validate!(
|
109
|
+
{
|
110
|
+
consumer: consumer,
|
111
|
+
message: message,
|
112
|
+
offset_metadata: offset_metadata
|
113
|
+
},
|
114
|
+
Errors::TransactionalOffsetInvalidError
|
115
|
+
)
|
116
|
+
|
117
|
+
details = { message: message, offset_metadata: offset_metadata }
|
118
|
+
|
119
|
+
transactional_instrument(:marked_as_consumed, details) do
|
120
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
121
|
+
partition = Rdkafka::Consumer::Partition.new(
|
122
|
+
message.partition,
|
123
|
+
# +1 because this is next offset from which we will start processing from
|
124
|
+
message.offset + 1,
|
125
|
+
0,
|
126
|
+
offset_metadata
|
127
|
+
)
|
128
|
+
|
129
|
+
tpl.add_topic_and_partitions_with_offsets(message.topic, [partition])
|
130
|
+
|
131
|
+
with_transactional_error_handling(:store_offset) do
|
132
|
+
client.send_offsets_to_transaction(
|
133
|
+
consumer,
|
134
|
+
tpl,
|
135
|
+
# This setting is at the moment in seconds and we require ms
|
136
|
+
@config.max_wait_timeout * 1_000
|
137
|
+
)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
# Runs provided code with a transaction wrapper if transactions are enabled.
|
145
|
+
# This allows us to simplify the async and sync batch dispatchers because we can ensure that
|
146
|
+
# their internal dispatches will be wrapped only with a single transaction and not
|
147
|
+
# a transaction per message
|
148
|
+
# @param block [Proc] code we want to run
|
149
|
+
def with_transaction_if_transactional(&block)
|
150
|
+
transactional? ? transaction(&block) : yield
|
151
|
+
end
|
152
|
+
|
153
|
+
# Instruments the transactional operation with producer id
|
154
|
+
#
|
155
|
+
# @param key [Symbol] transaction operation key
|
156
|
+
# @param details [Hash] additional instrumentation details
|
157
|
+
# @param block [Proc] block to run inside the instrumentation or nothing if not given
|
158
|
+
def transactional_instrument(key, details = EMPTY_HASH, &block)
|
159
|
+
@monitor.instrument("transaction.#{key}", details.merge(producer_id: id), &block)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Error handling for transactional operations is a bit special. There are three types of
|
163
|
+
# errors coming from librdkafka:
|
164
|
+
# - retryable - indicates that a given operation (like offset commit) can be retried after
|
165
|
+
# a backoff and that is should be operating later as expected. We try to retry those
|
166
|
+
# few times before finally failing.
|
167
|
+
# - fatal - errors that will not recover no matter what (for example being fenced out)
|
168
|
+
# - abortable - error from which we cannot recover but for which we should abort the
|
169
|
+
# current transaction.
|
170
|
+
#
|
171
|
+
# The code below handles this logic also publishing the appropriate notifications via our
|
172
|
+
# notifications pipeline.
|
173
|
+
#
|
174
|
+
# @param action [Symbol] action type
|
175
|
+
# @param allow_abortable [Boolean] should we allow for the abortable flow. This is set to
|
176
|
+
# false internally to prevent attempts to abort from failed abort operations
|
177
|
+
def with_transactional_error_handling(action, allow_abortable: true)
|
178
|
+
attempt ||= 0
|
179
|
+
attempt += 1
|
180
|
+
|
181
|
+
yield
|
182
|
+
rescue ::Rdkafka::RdkafkaError => e
|
183
|
+
# Decide if there is a chance to retry given error
|
184
|
+
do_retry = e.retryable? && attempt < config.max_attempts_on_transaction_command
|
185
|
+
|
186
|
+
@monitor.instrument(
|
187
|
+
'error.occurred',
|
188
|
+
producer_id: id,
|
189
|
+
caller: self,
|
190
|
+
error: e,
|
191
|
+
type: "transaction.#{action}",
|
192
|
+
retry: do_retry,
|
193
|
+
attempt: attempt
|
194
|
+
)
|
195
|
+
|
196
|
+
raise if e.fatal?
|
197
|
+
|
198
|
+
if do_retry
|
199
|
+
# Backoff more and more before retries
|
200
|
+
sleep(config.wait_backoff_on_transaction_command * attempt)
|
201
|
+
|
202
|
+
retry
|
203
|
+
end
|
204
|
+
|
205
|
+
if e.abortable? && allow_abortable
|
206
|
+
# Always attempt to abort but if aborting fails with an abortable error, do not attempt
|
207
|
+
# to abort from abort as this could create an infinite loop
|
208
|
+
with_transactional_error_handling(:abort, allow_abortable: false) do
|
209
|
+
transactional_instrument(:aborted) { client.abort_transaction }
|
210
|
+
end
|
211
|
+
|
212
|
+
raise
|
213
|
+
end
|
214
|
+
|
215
|
+
raise
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,324 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
# Main WaterDrop messages producer
|
5
|
+
class Producer
|
6
|
+
extend Forwardable
|
7
|
+
include Sync
|
8
|
+
include Async
|
9
|
+
include Buffer
|
10
|
+
include Transactions
|
11
|
+
include ::Karafka::Core::Helpers::Time
|
12
|
+
|
13
|
+
# Which of the inline flow errors do we want to intercept and re-bind
|
14
|
+
SUPPORTED_FLOW_ERRORS = [
|
15
|
+
Rdkafka::RdkafkaError,
|
16
|
+
Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
# Empty has to save on memory allocations
|
20
|
+
EMPTY_HASH = {}.freeze
|
21
|
+
|
22
|
+
private_constant :SUPPORTED_FLOW_ERRORS, :EMPTY_HASH
|
23
|
+
|
24
|
+
def_delegators :config, :middleware
|
25
|
+
|
26
|
+
# @return [String] uuid of the current producer
|
27
|
+
attr_reader :id
|
28
|
+
# @return [Status] producer status object
|
29
|
+
attr_reader :status
|
30
|
+
# @return [Array] internal messages buffer
|
31
|
+
attr_reader :messages
|
32
|
+
# @return [Object] monitor we want to use
|
33
|
+
attr_reader :monitor
|
34
|
+
# @return [Object] dry-configurable config object
|
35
|
+
attr_reader :config
|
36
|
+
|
37
|
+
# Creates a not-yet-configured instance of the producer
|
38
|
+
# @param block [Proc] configuration block
|
39
|
+
# @return [Producer] producer instance
|
40
|
+
def initialize(&block)
|
41
|
+
@operations_in_progress = Helpers::Counter.new
|
42
|
+
@buffer_mutex = Mutex.new
|
43
|
+
@connecting_mutex = Mutex.new
|
44
|
+
@operating_mutex = Mutex.new
|
45
|
+
@transaction_mutex = Mutex.new
|
46
|
+
|
47
|
+
@status = Status.new
|
48
|
+
@messages = []
|
49
|
+
|
50
|
+
return unless block
|
51
|
+
|
52
|
+
setup(&block)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Sets up the whole configuration and initializes all that is needed
|
56
|
+
# @param block [Block] configuration block
|
57
|
+
def setup(&block)
|
58
|
+
raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
|
59
|
+
|
60
|
+
@config = Config
|
61
|
+
.new
|
62
|
+
.setup(&block)
|
63
|
+
.config
|
64
|
+
|
65
|
+
@id = @config.id
|
66
|
+
@monitor = @config.monitor
|
67
|
+
@contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
|
68
|
+
@status.configured!
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Rdkafka::Producer] raw rdkafka producer
|
72
|
+
# @note Client is lazy initialized, keeping in mind also the fact of a potential fork that
|
73
|
+
# can happen any time.
|
74
|
+
# @note It is not recommended to fork a producer that is already in use so in case of
|
75
|
+
# bootstrapping a cluster, it's much better to fork configured but not used producers
|
76
|
+
def client
|
77
|
+
return @client if @client && @pid == Process.pid
|
78
|
+
|
79
|
+
# Don't allow to obtain a client reference for a producer that was not configured
|
80
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
81
|
+
|
82
|
+
@connecting_mutex.synchronize do
|
83
|
+
return @client if @client && @pid == Process.pid
|
84
|
+
|
85
|
+
# We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
|
86
|
+
# process don't leak
|
87
|
+
ObjectSpace.undefine_finalizer(id)
|
88
|
+
|
89
|
+
# We should raise an error when trying to use a producer with client from a fork. Always.
|
90
|
+
if @client
|
91
|
+
# We need to reset the client, otherwise there might be attempt to close the parent
|
92
|
+
# client
|
93
|
+
@client = nil
|
94
|
+
raise Errors::ProducerUsedInParentProcess, Process.pid
|
95
|
+
end
|
96
|
+
|
97
|
+
# Finalizer tracking is needed for handling shutdowns gracefully.
|
98
|
+
# I don't expect everyone to remember about closing all the producers all the time, thus
|
99
|
+
# this approach is better. Although it is still worth keeping in mind, that this will
|
100
|
+
# block GC from removing a no longer used producer unless closed properly but at least
|
101
|
+
# won't crash the VM upon closing the process
|
102
|
+
ObjectSpace.define_finalizer(id, proc { close })
|
103
|
+
|
104
|
+
@pid = Process.pid
|
105
|
+
@client = Builder.new.call(self, @config)
|
106
|
+
|
107
|
+
# Register statistics runner for this particular type of callbacks
|
108
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.add(
|
109
|
+
@id,
|
110
|
+
Instrumentation::Callbacks::Statistics.new(@id, @client.name, @config.monitor)
|
111
|
+
)
|
112
|
+
|
113
|
+
# Register error tracking callback
|
114
|
+
::Karafka::Core::Instrumentation.error_callbacks.add(
|
115
|
+
@id,
|
116
|
+
Instrumentation::Callbacks::Error.new(@id, @client.name, @config.monitor)
|
117
|
+
)
|
118
|
+
|
119
|
+
@status.connected!
|
120
|
+
@monitor.instrument('producer.connected', producer_id: id)
|
121
|
+
end
|
122
|
+
|
123
|
+
@client
|
124
|
+
end
|
125
|
+
|
126
|
+
# Fetches and caches the partition count of a topic
|
127
|
+
#
|
128
|
+
# @param topic [String] topic for which we want to get the number of partitions
|
129
|
+
# @return [Integer] number of partitions of the requested topic or -1 if number could not be
|
130
|
+
# retrieved.
|
131
|
+
#
|
132
|
+
# @note It uses the underlying `rdkafka-ruby` partition count fetch and cache.
|
133
|
+
def partition_count(topic)
|
134
|
+
client.partition_count(topic.to_s)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Purges data from both the buffer queue as well as the librdkafka queue.
|
138
|
+
#
|
139
|
+
# @note This is an operation that can cause data loss. Keep that in mind. It will not only
|
140
|
+
# purge the internal WaterDrop buffer but will also purge the librdkafka queue as well as
|
141
|
+
# will cancel any outgoing messages dispatches.
|
142
|
+
def purge
|
143
|
+
@monitor.instrument('buffer.purged', producer_id: id) do
|
144
|
+
@buffer_mutex.synchronize do
|
145
|
+
@messages = []
|
146
|
+
end
|
147
|
+
|
148
|
+
@client.purge
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Flushes the buffers in a sync way and closes the producer
|
153
|
+
# @param force [Boolean] should we force closing even with outstanding messages after the
|
154
|
+
# max wait timeout
|
155
|
+
def close(force: false)
|
156
|
+
@operating_mutex.synchronize do
|
157
|
+
return unless @status.active?
|
158
|
+
|
159
|
+
@monitor.instrument(
|
160
|
+
'producer.closed',
|
161
|
+
producer_id: id
|
162
|
+
) do
|
163
|
+
@status.closing!
|
164
|
+
@monitor.instrument('producer.closing', producer_id: id)
|
165
|
+
|
166
|
+
# No need for auto-gc if everything got closed by us
|
167
|
+
# This should be used only in case a producer was not closed properly and forgotten
|
168
|
+
ObjectSpace.undefine_finalizer(id)
|
169
|
+
|
170
|
+
# We save this thread id because we need to bypass the activity verification on the
|
171
|
+
# producer for final flush of buffers.
|
172
|
+
@closing_thread_id = Thread.current.object_id
|
173
|
+
|
174
|
+
# Wait until all the outgoing operations are done. Only when no one is using the
|
175
|
+
# underlying client running operations we can close
|
176
|
+
sleep(0.001) until @operations_in_progress.value.zero?
|
177
|
+
|
178
|
+
# Flush has its own buffer mutex but even if it is blocked, flushing can still happen
|
179
|
+
# as we close the client after the flushing (even if blocked by the mutex)
|
180
|
+
flush(true)
|
181
|
+
|
182
|
+
# We should not close the client in several threads the same time
|
183
|
+
# It is safe to run it several times but not exactly the same moment
|
184
|
+
# We also mark it as closed only if it was connected, if not, it would trigger a new
|
185
|
+
# connection that anyhow would be immediately closed
|
186
|
+
if @client
|
187
|
+
# Why do we trigger it early instead of just having `#close` do it?
|
188
|
+
# The linger.ms time will be ignored for the duration of the call,
|
189
|
+
# queued messages will be sent to the broker as soon as possible.
|
190
|
+
begin
|
191
|
+
# `max_wait_timeout` is in seconds at the moment
|
192
|
+
@client.flush(@config.max_wait_timeout * 1_000) unless @client.closed?
|
193
|
+
# We can safely ignore timeouts here because any left outstanding requests
|
194
|
+
# will anyhow force wait on close if not forced.
|
195
|
+
# If forced, we will purge the queue and just close
|
196
|
+
rescue ::Rdkafka::RdkafkaError, Rdkafka::AbstractHandle::WaitTimeoutError
|
197
|
+
nil
|
198
|
+
ensure
|
199
|
+
# Purge fully the local queue in case of a forceful shutdown just to be sure, that
|
200
|
+
# there are no dangling messages. In case flush was successful, there should be
|
201
|
+
# none but we do it just in case it timed out
|
202
|
+
purge if force
|
203
|
+
end
|
204
|
+
|
205
|
+
@client.close
|
206
|
+
|
207
|
+
@client = nil
|
208
|
+
end
|
209
|
+
|
210
|
+
# Remove callbacks runners that were registered
|
211
|
+
::Karafka::Core::Instrumentation.statistics_callbacks.delete(@id)
|
212
|
+
::Karafka::Core::Instrumentation.error_callbacks.delete(@id)
|
213
|
+
|
214
|
+
@status.closed!
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
# Closes the producer with forced close after timeout, purging any outgoing data
|
220
|
+
def close!
|
221
|
+
close(force: true)
|
222
|
+
end
|
223
|
+
|
224
|
+
private
|
225
|
+
|
226
|
+
# Ensures that we don't run any operations when the producer is not configured or when it
|
227
|
+
# was already closed
|
228
|
+
def ensure_active!
|
229
|
+
return if @status.active?
|
230
|
+
return if @status.closing? && @operating_mutex.owned?
|
231
|
+
|
232
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
233
|
+
raise Errors::ProducerClosedError, id if @status.closing?
|
234
|
+
raise Errors::ProducerClosedError, id if @status.closed?
|
235
|
+
|
236
|
+
# This should never happen
|
237
|
+
raise Errors::StatusInvalidError, [id, @status.to_s]
|
238
|
+
end
|
239
|
+
|
240
|
+
# Ensures that the message we want to send out to Kafka is actually valid and that it can be
|
241
|
+
# sent there
|
242
|
+
# @param message [Hash] message we want to send
|
243
|
+
# @raise [Karafka::Errors::MessageInvalidError]
|
244
|
+
def validate_message!(message)
|
245
|
+
@contract.validate!(message, Errors::MessageInvalidError)
|
246
|
+
end
|
247
|
+
|
248
|
+
# Waits on a given handler
|
249
|
+
#
|
250
|
+
# @param handler [Rdkafka::Producer::DeliveryHandle]
|
251
|
+
def wait(handler)
|
252
|
+
handler.wait(
|
253
|
+
max_wait_timeout: @config.max_wait_timeout,
|
254
|
+
wait_timeout: @config.wait_timeout
|
255
|
+
)
|
256
|
+
end
|
257
|
+
|
258
|
+
# Runs the client produce method with a given message
|
259
|
+
#
|
260
|
+
# @param message [Hash] message we want to send
|
261
|
+
def produce(message)
|
262
|
+
produce_time ||= monotonic_now
|
263
|
+
|
264
|
+
# This can happen only during flushing on closing, in case like this we don't have to
|
265
|
+
# synchronize because we already own the lock
|
266
|
+
if @operating_mutex.owned?
|
267
|
+
@operations_in_progress.increment
|
268
|
+
else
|
269
|
+
@operating_mutex.synchronize { @operations_in_progress.increment }
|
270
|
+
ensure_active!
|
271
|
+
end
|
272
|
+
|
273
|
+
# In case someone defines topic as a symbol, we need to convert it into a string as
|
274
|
+
# librdkafka does not accept symbols
|
275
|
+
message = message.merge(topic: message[:topic].to_s) if message[:topic].is_a?(Symbol)
|
276
|
+
|
277
|
+
if transactional?
|
278
|
+
transaction { client.produce(**message) }
|
279
|
+
else
|
280
|
+
client.produce(**message)
|
281
|
+
end
|
282
|
+
rescue SUPPORTED_FLOW_ERRORS.first => e
|
283
|
+
# Unless we want to wait and retry and it's a full queue, we raise normally
|
284
|
+
raise unless @config.wait_on_queue_full
|
285
|
+
raise unless e.code == :queue_full
|
286
|
+
# If we're running for longer than the timeout, we need to re-raise the queue full.
|
287
|
+
# This will prevent from situation where cluster is down forever and we just retry and retry
|
288
|
+
# in an infinite loop, effectively hanging the processing
|
289
|
+
raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full * 1_000
|
290
|
+
|
291
|
+
label = caller_locations(2, 1)[0].label.split(' ').last
|
292
|
+
|
293
|
+
# We use this syntax here because we want to preserve the original `#cause` when we
|
294
|
+
# instrument the error and there is no way to manually assign `#cause` value. We want to keep
|
295
|
+
# the original cause to maintain the same API across all the errors dispatched to the
|
296
|
+
# notifications pipeline.
|
297
|
+
begin
|
298
|
+
raise Errors::ProduceError, e.inspect
|
299
|
+
rescue Errors::ProduceError => e
|
300
|
+
# We want to instrument on this event even when we restart it.
|
301
|
+
# The reason is simple: instrumentation and visibility.
|
302
|
+
# We can recover from this, but despite that we should be able to instrument this.
|
303
|
+
# If this type of event happens too often, it may indicate that the buffer settings are not
|
304
|
+
# well configured.
|
305
|
+
@monitor.instrument(
|
306
|
+
'error.occurred',
|
307
|
+
producer_id: id,
|
308
|
+
message: message,
|
309
|
+
error: e,
|
310
|
+
type: "message.#{label}"
|
311
|
+
)
|
312
|
+
|
313
|
+
# We do not poll the producer because polling happens in a background thread
|
314
|
+
# It also should not be a frequent case (queue full), hence it's ok to just throttle.
|
315
|
+
sleep @config.wait_backoff_on_queue_full
|
316
|
+
end
|
317
|
+
|
318
|
+
@operations_in_progress.decrement
|
319
|
+
retry
|
320
|
+
ensure
|
321
|
+
@operations_in_progress.decrement
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
data/lib/waterdrop.rb
CHANGED
@@ -1,4 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
|
3
|
+
# External components
|
4
|
+
# delegate should be removed because we don't need it, we just add it because of ruby-kafka
|
5
|
+
%w[
|
6
|
+
forwardable
|
7
|
+
json
|
8
|
+
zeitwerk
|
9
|
+
securerandom
|
10
|
+
karafka-core
|
11
|
+
pathname
|
12
|
+
].each { |lib| require lib }
|
13
|
+
|
14
|
+
# WaterDrop library
|
15
|
+
module WaterDrop
|
16
|
+
class << self
|
17
|
+
# @return [String] root path of this gem
|
18
|
+
def gem_root
|
19
|
+
Pathname.new(File.expand_path('..', __dir__))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
loader = Zeitwerk::Loader.for_gem
|
25
|
+
loader.inflector.inflect('waterdrop' => 'WaterDrop')
|
26
|
+
# Do not load vendors instrumentation components. Those need to be required manually if needed
|
27
|
+
loader.ignore("#{__dir__}/waterdrop/instrumentation/vendors/**/*.rb")
|
28
|
+
loader.setup
|
29
|
+
loader.eager_load
|
data/renovate.json
ADDED
data/waterdrop.gemspec
CHANGED
@@ -3,35 +3,38 @@
|
|
3
3
|
lib = File.expand_path('lib', __dir__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
-
require '
|
6
|
+
require 'waterdrop/version'
|
7
7
|
|
8
8
|
Gem::Specification.new do |spec|
|
9
9
|
spec.name = 'waterdrop'
|
10
10
|
spec.version = ::WaterDrop::VERSION
|
11
11
|
spec.platform = Gem::Platform::RUBY
|
12
12
|
spec.authors = ['Maciej Mensfeld']
|
13
|
-
spec.email = %w[
|
13
|
+
spec.email = %w[contact@karafka.io]
|
14
14
|
spec.homepage = 'https://karafka.io'
|
15
15
|
spec.summary = 'Kafka messaging made easy!'
|
16
16
|
spec.description = spec.summary
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
|
-
spec.add_dependency '
|
20
|
-
spec.add_dependency 'dry-configurable', '~> 0.13'
|
21
|
-
spec.add_dependency 'dry-monitor', '~> 0.5'
|
22
|
-
spec.add_dependency 'dry-validation', '~> 1.7'
|
23
|
-
spec.add_dependency 'rdkafka', '>= 0.10'
|
19
|
+
spec.add_dependency 'karafka-core', '>= 2.2.3', '< 3.0.0'
|
24
20
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
25
21
|
|
26
|
-
spec.required_ruby_version = '>= 2.6.0'
|
27
|
-
|
28
22
|
if $PROGRAM_NAME.end_with?('gem')
|
29
23
|
spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem')
|
30
24
|
end
|
31
25
|
|
32
|
-
spec.cert_chain = %w[certs/
|
26
|
+
spec.cert_chain = %w[certs/cert_chain.pem]
|
33
27
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
|
34
28
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
35
29
|
spec.require_paths = %w[lib]
|
36
|
-
|
30
|
+
|
31
|
+
spec.metadata = {
|
32
|
+
'funding_uri' => 'https://karafka.io/#become-pro',
|
33
|
+
'homepage_uri' => 'https://karafka.io',
|
34
|
+
'changelog_uri' => 'https://karafka.io/docs/Changelog-WaterDrop',
|
35
|
+
'bug_tracker_uri' => 'https://github.com/karafka/waterdrop/issues',
|
36
|
+
'source_code_uri' => 'https://github.com/karafka/waterdrop',
|
37
|
+
'documentation_uri' => 'https://karafka.io/docs/#waterdrop',
|
38
|
+
'rubygems_mfa_required' => 'true'
|
39
|
+
}
|
37
40
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|