waterdrop 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +75 -0
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +13 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +67 -54
- data/LICENSE +165 -0
- data/README.md +194 -56
- data/config/errors.yml +3 -16
- data/docker-compose.yml +17 -0
- data/lib/water_drop.rb +4 -24
- data/lib/water_drop/config.rb +41 -142
- data/lib/water_drop/contracts.rb +0 -2
- data/lib/water_drop/contracts/config.rb +8 -121
- data/lib/water_drop/contracts/message.rb +41 -0
- data/lib/water_drop/errors.rb +31 -5
- data/lib/water_drop/instrumentation.rb +7 -0
- data/lib/water_drop/instrumentation/monitor.rb +16 -23
- data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
- data/lib/water_drop/producer.rb +143 -0
- data/lib/water_drop/producer/async.rb +51 -0
- data/lib/water_drop/producer/buffer.rb +113 -0
- data/lib/water_drop/producer/builder.rb +63 -0
- data/lib/water_drop/producer/dummy_client.rb +32 -0
- data/lib/water_drop/producer/statistics_decorator.rb +71 -0
- data/lib/water_drop/producer/status.rb +52 -0
- data/lib/water_drop/producer/sync.rb +65 -0
- data/lib/water_drop/version.rb +1 -1
- data/waterdrop.gemspec +5 -5
- metadata +27 -26
- metadata.gz.sig +0 -0
- data/.travis.yml +0 -35
- data/MIT-LICENCE +0 -18
- data/lib/water_drop/async_producer.rb +0 -26
- data/lib/water_drop/base_producer.rb +0 -57
- data/lib/water_drop/config_applier.rb +0 -52
- data/lib/water_drop/contracts/message_options.rb +0 -19
- data/lib/water_drop/sync_producer.rb +0 -24
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
# Main WaterDrop messages producer
|
5
|
+
class Producer
|
6
|
+
include Sync
|
7
|
+
include Async
|
8
|
+
include Buffer
|
9
|
+
|
10
|
+
# @return [String] uuid of the current producer
|
11
|
+
attr_reader :id
|
12
|
+
# @return [Status] producer status object
|
13
|
+
attr_reader :status
|
14
|
+
# @return [Concurrent::Array] internal messages buffer
|
15
|
+
attr_reader :messages
|
16
|
+
# @return [Object] monitor we want to use
|
17
|
+
attr_reader :monitor
|
18
|
+
# @return [Object] dry-configurable config object
|
19
|
+
attr_reader :config
|
20
|
+
|
21
|
+
# Creates a not-yet-configured instance of the producer
|
22
|
+
# @param block [Proc] configuration block
|
23
|
+
# @return [Producer] producer instance
|
24
|
+
def initialize(&block)
|
25
|
+
@buffer_mutex = Mutex.new
|
26
|
+
@connecting_mutex = Mutex.new
|
27
|
+
@closing_mutex = Mutex.new
|
28
|
+
|
29
|
+
@status = Status.new
|
30
|
+
@messages = Concurrent::Array.new
|
31
|
+
|
32
|
+
return unless block
|
33
|
+
|
34
|
+
setup(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Sets up the whole configuration and initializes all that is needed
|
38
|
+
# @param block [Block] configuration block
|
39
|
+
def setup(&block)
|
40
|
+
raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
|
41
|
+
|
42
|
+
@config = Config
|
43
|
+
.new
|
44
|
+
.setup(&block)
|
45
|
+
.config
|
46
|
+
|
47
|
+
@id = @config.id
|
48
|
+
@monitor = @config.monitor
|
49
|
+
@contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
|
50
|
+
@status.configured!
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Rdkafka::Producer] raw rdkafka producer
|
54
|
+
# @note Client is lazy initialized, keeping in mind also the fact of a potential fork that
|
55
|
+
# can happen any time.
|
56
|
+
# @note It is not recommended to fork a producer that is already in use so in case of
|
57
|
+
# bootstrapping a cluster, it's much better to fork configured but not used producers
|
58
|
+
def client
|
59
|
+
return @client if @client && @pid == Process.pid
|
60
|
+
|
61
|
+
# Don't allow to obtain a client reference for a producer that was not configured
|
62
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
63
|
+
|
64
|
+
@connecting_mutex.synchronize do
|
65
|
+
return @client if @client && @pid == Process.pid
|
66
|
+
|
67
|
+
# We should raise an error when trying to use a producer from a fork, that is already
|
68
|
+
# connected to Kafka. We allow forking producers only before they are used
|
69
|
+
raise Errors::ProducerUsedInParentProcess, Process.pid if @status.connected?
|
70
|
+
|
71
|
+
# We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
|
72
|
+
# process don't leak
|
73
|
+
ObjectSpace.undefine_finalizer(id)
|
74
|
+
# Finalizer tracking is needed for handling shutdowns gracefully.
|
75
|
+
# I don't expect everyone to remember about closing all the producers all the time, thus
|
76
|
+
# this approach is better. Although it is still worth keeping in mind, that this will
|
77
|
+
# block GC from removing a no longer used producer unless closed properly but at least
|
78
|
+
# won't crash the VM upon closing the process
|
79
|
+
ObjectSpace.define_finalizer(id, proc { close })
|
80
|
+
|
81
|
+
@pid = Process.pid
|
82
|
+
@client = Builder.new.call(self, @config)
|
83
|
+
@status.connected!
|
84
|
+
end
|
85
|
+
|
86
|
+
@client
|
87
|
+
end
|
88
|
+
|
89
|
+
# Flushes the buffers in a sync way and closes the producer
|
90
|
+
def close
|
91
|
+
@closing_mutex.synchronize do
|
92
|
+
return unless @status.active?
|
93
|
+
|
94
|
+
@monitor.instrument(
|
95
|
+
'producer.closed',
|
96
|
+
producer: self
|
97
|
+
) do
|
98
|
+
@status.closing!
|
99
|
+
|
100
|
+
# No need for auto-gc if everything got closed by us
|
101
|
+
# This should be used only in case a producer was not closed properly and forgotten
|
102
|
+
ObjectSpace.undefine_finalizer(id)
|
103
|
+
|
104
|
+
# Flush has it's own buffer mutex but even if it is blocked, flushing can still happen
|
105
|
+
# as we close the client after the flushing (even if blocked by the mutex)
|
106
|
+
flush(false)
|
107
|
+
|
108
|
+
# We should not close the client in several threads the same time
|
109
|
+
# It is safe to run it several times but not exactly the same moment
|
110
|
+
client.close
|
111
|
+
|
112
|
+
@status.closed!
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Ensures that we don't run any operations when the producer is not configured or when it
|
118
|
+
# was already closed
|
119
|
+
def ensure_active!
|
120
|
+
return if @status.active?
|
121
|
+
|
122
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
123
|
+
raise Errors::ProducerClosedError, id if @status.closing? || @status.closed?
|
124
|
+
|
125
|
+
# This should never happen
|
126
|
+
raise Errors::StatusInvalidError, [id, @status.to_s]
|
127
|
+
end
|
128
|
+
|
129
|
+
# Ensures that the message we want to send out to Kafka is actually valid and that it can be
|
130
|
+
# sent there
|
131
|
+
# @param message [Hash] message we want to send
|
132
|
+
# @raise [Karafka::Errors::MessageInvalidError]
|
133
|
+
def validate_message!(message)
|
134
|
+
result = @contract.call(message)
|
135
|
+
return if result.success?
|
136
|
+
|
137
|
+
raise Errors::MessageInvalidError, [
|
138
|
+
result.errors.to_h,
|
139
|
+
message
|
140
|
+
]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for asynchronous producer operations
|
6
|
+
module Async
|
7
|
+
# Produces a message to Kafka and does not wait for results
|
8
|
+
#
|
9
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
10
|
+
#
|
11
|
+
# @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
|
12
|
+
#
|
13
|
+
# @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
|
14
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
15
|
+
# message could not be sent to Kafka
|
16
|
+
def produce_async(message)
|
17
|
+
ensure_active!
|
18
|
+
validate_message!(message)
|
19
|
+
|
20
|
+
@monitor.instrument(
|
21
|
+
'message.produced_async',
|
22
|
+
producer: self,
|
23
|
+
message: message
|
24
|
+
) { client.produce(**message) }
|
25
|
+
end
|
26
|
+
|
27
|
+
# Produces many messages to Kafka and does not wait for them to be delivered
|
28
|
+
#
|
29
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
30
|
+
# {Contracts::Message} contract
|
31
|
+
#
|
32
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
|
33
|
+
#
|
34
|
+
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def produce_many_async(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.produced_async',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.map { |message| client.produce(**message) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for buffered operations
|
6
|
+
module Buffer
|
7
|
+
# Exceptions we catch when dispatching messages from a buffer
|
8
|
+
RESCUED_ERRORS = [
|
9
|
+
Rdkafka::RdkafkaError,
|
10
|
+
Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
private_constant :RESCUED_ERRORS
|
14
|
+
|
15
|
+
# Adds given message into the internal producer buffer without flushing it to Kafka
|
16
|
+
#
|
17
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
18
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
19
|
+
# message could not be sent to Kafka
|
20
|
+
def buffer(message)
|
21
|
+
ensure_active!
|
22
|
+
validate_message!(message)
|
23
|
+
|
24
|
+
@monitor.instrument(
|
25
|
+
'message.buffered',
|
26
|
+
producer: self,
|
27
|
+
message: message
|
28
|
+
) { @messages << message }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Adds given messages into the internal producer buffer without flushing them to Kafka
|
32
|
+
#
|
33
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
34
|
+
# {Contracts::Message} contract
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def buffer_many(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.buffered',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.each { |message| @messages << message }
|
47
|
+
messages
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Flushes the internal buffer to Kafka in an async way
|
52
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles for messages that were
|
53
|
+
# flushed
|
54
|
+
def flush_async
|
55
|
+
ensure_active!
|
56
|
+
|
57
|
+
@monitor.instrument(
|
58
|
+
'buffer.flushed_async',
|
59
|
+
producer: self,
|
60
|
+
messages: @messages
|
61
|
+
) { flush(false) }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Flushes the internal buffer to Kafka in a sync way
|
65
|
+
# @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports for messages that were
|
66
|
+
# flushed
|
67
|
+
def flush_sync
|
68
|
+
ensure_active!
|
69
|
+
|
70
|
+
@monitor.instrument(
|
71
|
+
'buffer.flushed_sync',
|
72
|
+
producer: self,
|
73
|
+
messages: @messages
|
74
|
+
) { flush(true) }
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Method for triggering the buffer
|
80
|
+
# @param sync [Boolean] should it flush in a sync way
|
81
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle, Rdkafka::Producer::DeliveryReport>]
|
82
|
+
# delivery handles for async or delivery reports for sync
|
83
|
+
# @raise [Errors::FlushFailureError] when there was a failure in flushing
|
84
|
+
# @note We use this method underneath to provide a different instrumentation for sync and
|
85
|
+
# async flushing within the public API
|
86
|
+
def flush(sync)
|
87
|
+
data_for_dispatch = nil
|
88
|
+
dispatched = []
|
89
|
+
|
90
|
+
@buffer_mutex.synchronize do
|
91
|
+
data_for_dispatch = @messages
|
92
|
+
@messages = Concurrent::Array.new
|
93
|
+
end
|
94
|
+
|
95
|
+
dispatched = data_for_dispatch.map { |message| client.produce(**message) }
|
96
|
+
|
97
|
+
return dispatched unless sync
|
98
|
+
|
99
|
+
dispatched.map do |handler|
|
100
|
+
handler.wait(
|
101
|
+
max_wait_timeout: @config.max_wait_timeout,
|
102
|
+
wait_timeout: @config.wait_timeout
|
103
|
+
)
|
104
|
+
end
|
105
|
+
rescue *RESCUED_ERRORS => e
|
106
|
+
key = sync ? 'buffer.flushed_sync.error' : 'buffer.flush_async.error'
|
107
|
+
@monitor.instrument(key, producer: self, error: e, dispatched: dispatched)
|
108
|
+
|
109
|
+
raise Errors::FlushFailureError.new(dispatched)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Class used to construct the rdkafka producer client
|
6
|
+
class Builder
|
7
|
+
# @param producer [Producer] not yet configured producer for which we want to
|
8
|
+
# build the client
|
9
|
+
# @param config [Object] dry-configurable based configuration object
|
10
|
+
# @return [Rdkafka::Producer, Producer::DummyClient] raw rdkafka producer or a dummy producer
|
11
|
+
# when we don't want to dispatch any messages
|
12
|
+
def call(producer, config)
|
13
|
+
return DummyClient.new unless config.deliver
|
14
|
+
|
15
|
+
Rdkafka::Config.logger = config.logger
|
16
|
+
Rdkafka::Config.statistics_callback = build_statistics_callback(producer, config.monitor)
|
17
|
+
|
18
|
+
client = Rdkafka::Config.new(config.kafka.to_h).producer
|
19
|
+
client.delivery_callback = build_delivery_callback(producer, config.monitor)
|
20
|
+
client
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# Creates a proc that we want to run upon each successful message delivery
|
26
|
+
#
|
27
|
+
# @param producer [Producer]
|
28
|
+
# @param monitor [Object] monitor we want to use
|
29
|
+
# @return [Proc] delivery callback
|
30
|
+
def build_delivery_callback(producer, monitor)
|
31
|
+
lambda do |delivery_report|
|
32
|
+
monitor.instrument(
|
33
|
+
'message.acknowledged',
|
34
|
+
producer: producer,
|
35
|
+
offset: delivery_report.offset,
|
36
|
+
partition: delivery_report.partition
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Creates a proc that we want to run upon each statistics callback execution
|
42
|
+
#
|
43
|
+
# @param producer [Producer]
|
44
|
+
# @param monitor [Object] monitor we want to use
|
45
|
+
# @return [Proc] statistics callback
|
46
|
+
# @note We decorate the statistics with our own decorator because some of the metrics from
|
47
|
+
# rdkafka are absolute. For example number of sent messages increases not in reference to
|
48
|
+
# previous statistics emit but from the beginning of the process. We decorate it with diff
|
49
|
+
# of all the numeric values against the data from the previous callback emit
|
50
|
+
def build_statistics_callback(producer, monitor)
|
51
|
+
statistics_decorator = StatisticsDecorator.new
|
52
|
+
|
53
|
+
lambda do |statistics|
|
54
|
+
monitor.instrument(
|
55
|
+
'statistics.emitted',
|
56
|
+
producer: producer,
|
57
|
+
statistics: statistics_decorator.call(statistics)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# A dummy client that is supposed to be used instead of Rdkafka::Producer in case we don't
|
6
|
+
# want to dispatch anything to Kafka
|
7
|
+
class DummyClient
|
8
|
+
# @return [DummyClient] dummy instance
|
9
|
+
def initialize
|
10
|
+
@counter = -1
|
11
|
+
end
|
12
|
+
|
13
|
+
# Dummy method for returning the delivery report
|
14
|
+
# @param _args [Object] anything that the delivery handle accepts
|
15
|
+
# @return [::Rdkafka::Producer::DeliveryReport]
|
16
|
+
def wait(*_args)
|
17
|
+
::Rdkafka::Producer::DeliveryReport.new(0, @counter += 1)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param _args [Object] anything really, this dummy is suppose to support anything
|
21
|
+
def respond_to_missing?(*_args)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param _args [Object] anything really, this dummy is suppose to support anything
|
26
|
+
# @return [self] returns self for chaining cases
|
27
|
+
def method_missing(*_args)
|
28
|
+
self || super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Many of the librdkafka statistics are absolute values instead of a gauge.
|
6
|
+
# This means, that for example number of messages sent is an absolute growing value
|
7
|
+
# instead of being a value of messages sent from the last statistics report.
|
8
|
+
# This decorator calculates the diff against previously emited stats, so we get also
|
9
|
+
# the diff together with the original values
|
10
|
+
class StatisticsDecorator
|
11
|
+
def initialize
|
12
|
+
@previous = {}.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param emited_stats [Hash] original emited statistics
|
16
|
+
# @return [Hash] emited statistics extended with the diff data
|
17
|
+
# @note We modify the emited statistics, instead of creating new. Since we don't expose
|
18
|
+
# any API to get raw data, users can just assume that the result of this decoration is the
|
19
|
+
# proper raw stats that they can use
|
20
|
+
def call(emited_stats)
|
21
|
+
diff(
|
22
|
+
@previous,
|
23
|
+
emited_stats
|
24
|
+
)
|
25
|
+
|
26
|
+
@previous = emited_stats
|
27
|
+
|
28
|
+
emited_stats.freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Calculates the diff of the provided values and modifies in place the emited statistics
|
34
|
+
#
|
35
|
+
# @param previous [Object] previous value from the given scope in which
|
36
|
+
# we are
|
37
|
+
# @param current [Object] current scope from emitted statistics
|
38
|
+
# @return [Object] the diff if the values were numerics or the current scope
|
39
|
+
def diff(previous, current)
|
40
|
+
if current.is_a?(Hash)
|
41
|
+
# @note We cannot use #each_key as we modify the content of the current scope
|
42
|
+
# in place (in case it's a hash)
|
43
|
+
current.keys.each do |key|
|
44
|
+
append(
|
45
|
+
current,
|
46
|
+
key,
|
47
|
+
diff((previous || {})[key], (current || {})[key])
|
48
|
+
)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if current.is_a?(Numeric) && previous.is_a?(Numeric)
|
53
|
+
current - previous
|
54
|
+
else
|
55
|
+
current
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Appends the result of the diff to a given key as long as the result is numeric
|
60
|
+
#
|
61
|
+
# @param current [Hash] current scope
|
62
|
+
# @param key [Symbol] key based on which we were diffing
|
63
|
+
# @param result [Object] diff result
|
64
|
+
def append(current, key, result)
|
65
|
+
return unless result.is_a?(Numeric)
|
66
|
+
|
67
|
+
current["#{key}_d"] = result
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|