waterdrop 1.4.2 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -2
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +17 -5
- data/Gemfile +9 -0
- data/Gemfile.lock +42 -29
- data/{MIT-LICENCE → MIT-LICENSE} +0 -0
- data/README.md +244 -57
- data/certs/mensfeld.pem +21 -21
- data/config/errors.yml +3 -16
- data/docker-compose.yml +1 -1
- data/lib/water_drop.rb +4 -24
- data/lib/water_drop/config.rb +41 -142
- data/lib/water_drop/contracts.rb +0 -2
- data/lib/water_drop/contracts/config.rb +8 -121
- data/lib/water_drop/contracts/message.rb +42 -0
- data/lib/water_drop/errors.rb +31 -5
- data/lib/water_drop/instrumentation/monitor.rb +16 -22
- data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
- data/lib/water_drop/patches/rdkafka_producer.rb +49 -0
- data/lib/water_drop/producer.rb +143 -0
- data/lib/water_drop/producer/async.rb +51 -0
- data/lib/water_drop/producer/buffer.rb +113 -0
- data/lib/water_drop/producer/builder.rb +63 -0
- data/lib/water_drop/producer/dummy_client.rb +32 -0
- data/lib/water_drop/producer/statistics_decorator.rb +71 -0
- data/lib/water_drop/producer/status.rb +52 -0
- data/lib/water_drop/producer/sync.rb +65 -0
- data/lib/water_drop/version.rb +1 -1
- data/waterdrop.gemspec +4 -4
- metadata +44 -45
- metadata.gz.sig +0 -0
- data/lib/water_drop/async_producer.rb +0 -26
- data/lib/water_drop/base_producer.rb +0 -57
- data/lib/water_drop/config_applier.rb +0 -52
- data/lib/water_drop/contracts/message_options.rb +0 -19
- data/lib/water_drop/sync_producer.rb +0 -24
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for asynchronous producer operations
|
6
|
+
module Async
|
7
|
+
# Produces a message to Kafka and does not wait for results
|
8
|
+
#
|
9
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
10
|
+
#
|
11
|
+
# @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
|
12
|
+
#
|
13
|
+
# @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
|
14
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
15
|
+
# message could not be sent to Kafka
|
16
|
+
def produce_async(message)
|
17
|
+
ensure_active!
|
18
|
+
validate_message!(message)
|
19
|
+
|
20
|
+
@monitor.instrument(
|
21
|
+
'message.produced_async',
|
22
|
+
producer: self,
|
23
|
+
message: message
|
24
|
+
) { client.produce(**message) }
|
25
|
+
end
|
26
|
+
|
27
|
+
# Produces many messages to Kafka and does not wait for them to be delivered
|
28
|
+
#
|
29
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
30
|
+
# {Contracts::Message} contract
|
31
|
+
#
|
32
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
|
33
|
+
#
|
34
|
+
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def produce_many_async(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.produced_async',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.map { |message| client.produce(**message) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for buffered operations
|
6
|
+
module Buffer
|
7
|
+
# Exceptions we catch when dispatching messages from a buffer
|
8
|
+
RESCUED_ERRORS = [
|
9
|
+
Rdkafka::RdkafkaError,
|
10
|
+
Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
private_constant :RESCUED_ERRORS
|
14
|
+
|
15
|
+
# Adds given message into the internal producer buffer without flushing it to Kafka
|
16
|
+
#
|
17
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
18
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
19
|
+
# message could not be sent to Kafka
|
20
|
+
def buffer(message)
|
21
|
+
ensure_active!
|
22
|
+
validate_message!(message)
|
23
|
+
|
24
|
+
@monitor.instrument(
|
25
|
+
'message.buffered',
|
26
|
+
producer: self,
|
27
|
+
message: message
|
28
|
+
) { @messages << message }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Adds given messages into the internal producer buffer without flushing them to Kafka
|
32
|
+
#
|
33
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
34
|
+
# {Contracts::Message} contract
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def buffer_many(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.buffered',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.each { |message| @messages << message }
|
47
|
+
messages
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Flushes the internal buffer to Kafka in an async way
|
52
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles for messages that were
|
53
|
+
# flushed
|
54
|
+
def flush_async
|
55
|
+
ensure_active!
|
56
|
+
|
57
|
+
@monitor.instrument(
|
58
|
+
'buffer.flushed_async',
|
59
|
+
producer: self,
|
60
|
+
messages: @messages
|
61
|
+
) { flush(false) }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Flushes the internal buffer to Kafka in a sync way
|
65
|
+
# @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports for messages that were
|
66
|
+
# flushed
|
67
|
+
def flush_sync
|
68
|
+
ensure_active!
|
69
|
+
|
70
|
+
@monitor.instrument(
|
71
|
+
'buffer.flushed_sync',
|
72
|
+
producer: self,
|
73
|
+
messages: @messages
|
74
|
+
) { flush(true) }
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Method for triggering the buffer
|
80
|
+
# @param sync [Boolean] should it flush in a sync way
|
81
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle, Rdkafka::Producer::DeliveryReport>]
|
82
|
+
# delivery handles for async or delivery reports for sync
|
83
|
+
# @raise [Errors::FlushFailureError] when there was a failure in flushing
|
84
|
+
# @note We use this method underneath to provide a different instrumentation for sync and
|
85
|
+
# async flushing within the public API
|
86
|
+
def flush(sync)
|
87
|
+
data_for_dispatch = nil
|
88
|
+
dispatched = []
|
89
|
+
|
90
|
+
@buffer_mutex.synchronize do
|
91
|
+
data_for_dispatch = @messages
|
92
|
+
@messages = Concurrent::Array.new
|
93
|
+
end
|
94
|
+
|
95
|
+
dispatched = data_for_dispatch.map { |message| client.produce(**message) }
|
96
|
+
|
97
|
+
return dispatched unless sync
|
98
|
+
|
99
|
+
dispatched.map do |handler|
|
100
|
+
handler.wait(
|
101
|
+
max_wait_timeout: @config.max_wait_timeout,
|
102
|
+
wait_timeout: @config.wait_timeout
|
103
|
+
)
|
104
|
+
end
|
105
|
+
rescue *RESCUED_ERRORS => e
|
106
|
+
key = sync ? 'buffer.flushed_sync.error' : 'buffer.flush_async.error'
|
107
|
+
@monitor.instrument(key, producer: self, error: e, dispatched: dispatched)
|
108
|
+
|
109
|
+
raise Errors::FlushFailureError.new(dispatched)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Class used to construct the rdkafka producer client
|
6
|
+
class Builder
|
7
|
+
# @param producer [Producer] not yet configured producer for which we want to
|
8
|
+
# build the client
|
9
|
+
# @param config [Object] dry-configurable based configuration object
|
10
|
+
# @return [Rdkafka::Producer, Producer::DummyClient] raw rdkafka producer or a dummy producer
|
11
|
+
# when we don't want to dispatch any messages
|
12
|
+
def call(producer, config)
|
13
|
+
return DummyClient.new unless config.deliver
|
14
|
+
|
15
|
+
Rdkafka::Config.logger = config.logger
|
16
|
+
Rdkafka::Config.statistics_callback = build_statistics_callback(producer, config.monitor)
|
17
|
+
|
18
|
+
client = Rdkafka::Config.new(config.kafka.to_h).producer
|
19
|
+
client.delivery_callback = build_delivery_callback(producer, config.monitor)
|
20
|
+
client
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# Creates a proc that we want to run upon each successful message delivery
|
26
|
+
#
|
27
|
+
# @param producer [Producer]
|
28
|
+
# @param monitor [Object] monitor we want to use
|
29
|
+
# @return [Proc] delivery callback
|
30
|
+
def build_delivery_callback(producer, monitor)
|
31
|
+
lambda do |delivery_report|
|
32
|
+
monitor.instrument(
|
33
|
+
'message.acknowledged',
|
34
|
+
producer: producer,
|
35
|
+
offset: delivery_report.offset,
|
36
|
+
partition: delivery_report.partition
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Creates a proc that we want to run upon each statistics callback execution
|
42
|
+
#
|
43
|
+
# @param producer [Producer]
|
44
|
+
# @param monitor [Object] monitor we want to use
|
45
|
+
# @return [Proc] statistics callback
|
46
|
+
# @note We decorate the statistics with our own decorator because some of the metrics from
|
47
|
+
# rdkafka are absolute. For example number of sent messages increases not in reference to
|
48
|
+
# previous statistics emit but from the beginning of the process. We decorate it with diff
|
49
|
+
# of all the numeric values against the data from the previous callback emit
|
50
|
+
def build_statistics_callback(producer, monitor)
|
51
|
+
statistics_decorator = StatisticsDecorator.new
|
52
|
+
|
53
|
+
lambda do |statistics|
|
54
|
+
monitor.instrument(
|
55
|
+
'statistics.emitted',
|
56
|
+
producer: producer,
|
57
|
+
statistics: statistics_decorator.call(statistics)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# A dummy client that is supposed to be used instead of Rdkafka::Producer in case we don't
|
6
|
+
# want to dispatch anything to Kafka
|
7
|
+
class DummyClient
|
8
|
+
# @return [DummyClient] dummy instance
|
9
|
+
def initialize
|
10
|
+
@counter = -1
|
11
|
+
end
|
12
|
+
|
13
|
+
# Dummy method for returning the delivery report
|
14
|
+
# @param _args [Object] anything that the delivery handle accepts
|
15
|
+
# @return [::Rdkafka::Producer::DeliveryReport]
|
16
|
+
def wait(*_args)
|
17
|
+
::Rdkafka::Producer::DeliveryReport.new(0, @counter += 1)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param _args [Object] anything really, this dummy is suppose to support anything
|
21
|
+
def respond_to_missing?(*_args)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param _args [Object] anything really, this dummy is suppose to support anything
|
26
|
+
# @return [self] returns self for chaining cases
|
27
|
+
def method_missing(*_args)
|
28
|
+
self || super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Many of the librdkafka statistics are absolute values instead of a gauge.
|
6
|
+
# This means, that for example number of messages sent is an absolute growing value
|
7
|
+
# instead of being a value of messages sent from the last statistics report.
|
8
|
+
# This decorator calculates the diff against previously emited stats, so we get also
|
9
|
+
# the diff together with the original values
|
10
|
+
class StatisticsDecorator
|
11
|
+
def initialize
|
12
|
+
@previous = {}.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param emited_stats [Hash] original emited statistics
|
16
|
+
# @return [Hash] emited statistics extended with the diff data
|
17
|
+
# @note We modify the emited statistics, instead of creating new. Since we don't expose
|
18
|
+
# any API to get raw data, users can just assume that the result of this decoration is the
|
19
|
+
# proper raw stats that they can use
|
20
|
+
def call(emited_stats)
|
21
|
+
diff(
|
22
|
+
@previous,
|
23
|
+
emited_stats
|
24
|
+
)
|
25
|
+
|
26
|
+
@previous = emited_stats
|
27
|
+
|
28
|
+
emited_stats.freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Calculates the diff of the provided values and modifies in place the emited statistics
|
34
|
+
#
|
35
|
+
# @param previous [Object] previous value from the given scope in which
|
36
|
+
# we are
|
37
|
+
# @param current [Object] current scope from emitted statistics
|
38
|
+
# @return [Object] the diff if the values were numerics or the current scope
|
39
|
+
def diff(previous, current)
|
40
|
+
if current.is_a?(Hash)
|
41
|
+
# @note We cannot use #each_key as we modify the content of the current scope
|
42
|
+
# in place (in case it's a hash)
|
43
|
+
current.keys.each do |key|
|
44
|
+
append(
|
45
|
+
current,
|
46
|
+
key,
|
47
|
+
diff((previous || {})[key], (current || {})[key])
|
48
|
+
)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if current.is_a?(Numeric) && previous.is_a?(Numeric)
|
53
|
+
current - previous
|
54
|
+
else
|
55
|
+
current
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Appends the result of the diff to a given key as long as the result is numeric
|
60
|
+
#
|
61
|
+
# @param current [Hash] current scope
|
62
|
+
# @param key [Symbol] key based on which we were diffing
|
63
|
+
# @param result [Object] diff result
|
64
|
+
def append(current, key, result)
|
65
|
+
return unless result.is_a?(Numeric)
|
66
|
+
|
67
|
+
current["#{key}_d"] = result
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Producer lifecycle status object representation
|
6
|
+
class Status
|
7
|
+
# States in which the producer can be
|
8
|
+
LIFECYCLE = %i[
|
9
|
+
initial
|
10
|
+
configured
|
11
|
+
connected
|
12
|
+
closing
|
13
|
+
closed
|
14
|
+
].freeze
|
15
|
+
|
16
|
+
private_constant :LIFECYCLE
|
17
|
+
|
18
|
+
# Creates a new instance of status with the initial state
|
19
|
+
# @return [Status]
|
20
|
+
def initialize
|
21
|
+
@current = LIFECYCLE.first
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Boolean] true if producer is in a active state. Active means, that we can start
|
25
|
+
# sending messages. Actives states are connected (connection established) or configured,
|
26
|
+
# which means, that producer is configured, but connection with Kafka is
|
27
|
+
# not yet established.
|
28
|
+
def active?
|
29
|
+
connected? || configured?
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [String] current status as a string
|
33
|
+
def to_s
|
34
|
+
@current.to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
LIFECYCLE.each do |state|
|
38
|
+
module_eval <<-RUBY, __FILE__, __LINE__ + 1
|
39
|
+
# @return [Boolean] true if current status is as we want, otherwise false
|
40
|
+
def #{state}?
|
41
|
+
@current == :#{state}
|
42
|
+
end
|
43
|
+
|
44
|
+
# Sets a given state as current
|
45
|
+
def #{state}!
|
46
|
+
@current = :#{state}
|
47
|
+
end
|
48
|
+
RUBY
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for synchronous producer operations
|
6
|
+
module Sync
|
7
|
+
# Produces a message to Kafka and waits for it to be delivered
|
8
|
+
#
|
9
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
10
|
+
#
|
11
|
+
# @return [Rdkafka::Producer::DeliveryReport] delivery report
|
12
|
+
#
|
13
|
+
# @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
|
14
|
+
# @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and the
|
15
|
+
# handle is still pending
|
16
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
17
|
+
# message could not be sent to Kafka
|
18
|
+
def produce_sync(message)
|
19
|
+
ensure_active!
|
20
|
+
validate_message!(message)
|
21
|
+
|
22
|
+
@monitor.instrument(
|
23
|
+
'message.produced_sync',
|
24
|
+
producer: self,
|
25
|
+
message: message
|
26
|
+
) do
|
27
|
+
client
|
28
|
+
.produce(**message)
|
29
|
+
.wait(
|
30
|
+
max_wait_timeout: @config.max_wait_timeout,
|
31
|
+
wait_timeout: @config.wait_timeout
|
32
|
+
)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Produces many messages to Kafka and waits for them to be delivered
|
37
|
+
#
|
38
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
39
|
+
# {Contracts::Message} contract
|
40
|
+
#
|
41
|
+
# @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports
|
42
|
+
#
|
43
|
+
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
44
|
+
# @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and the
|
45
|
+
# some handles are still pending
|
46
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
47
|
+
# and the message could not be sent to Kafka
|
48
|
+
def produce_many_sync(messages)
|
49
|
+
ensure_active!
|
50
|
+
messages.each { |message| validate_message!(message) }
|
51
|
+
|
52
|
+
@monitor.instrument('messages.produced_sync', producer: self, messages: messages) do
|
53
|
+
messages
|
54
|
+
.map { |message| client.produce(**message) }
|
55
|
+
.map! do |handler|
|
56
|
+
handler.wait(
|
57
|
+
max_wait_timeout: @config.max_wait_timeout,
|
58
|
+
wait_timeout: @config.wait_timeout
|
59
|
+
)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|