waterdrop 1.4.2 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.github/workflows/ci.yml +1 -2
  5. data/.gitignore +2 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +17 -5
  8. data/Gemfile +9 -0
  9. data/Gemfile.lock +42 -29
  10. data/{MIT-LICENCE → MIT-LICENSE} +0 -0
  11. data/README.md +244 -57
  12. data/certs/mensfeld.pem +21 -21
  13. data/config/errors.yml +3 -16
  14. data/docker-compose.yml +1 -1
  15. data/lib/water_drop.rb +4 -24
  16. data/lib/water_drop/config.rb +41 -142
  17. data/lib/water_drop/contracts.rb +0 -2
  18. data/lib/water_drop/contracts/config.rb +8 -121
  19. data/lib/water_drop/contracts/message.rb +42 -0
  20. data/lib/water_drop/errors.rb +31 -5
  21. data/lib/water_drop/instrumentation/monitor.rb +16 -22
  22. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  23. data/lib/water_drop/patches/rdkafka_producer.rb +49 -0
  24. data/lib/water_drop/producer.rb +143 -0
  25. data/lib/water_drop/producer/async.rb +51 -0
  26. data/lib/water_drop/producer/buffer.rb +113 -0
  27. data/lib/water_drop/producer/builder.rb +63 -0
  28. data/lib/water_drop/producer/dummy_client.rb +32 -0
  29. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  30. data/lib/water_drop/producer/status.rb +52 -0
  31. data/lib/water_drop/producer/sync.rb +65 -0
  32. data/lib/water_drop/version.rb +1 -1
  33. data/waterdrop.gemspec +4 -4
  34. metadata +44 -45
  35. metadata.gz.sig +0 -0
  36. data/lib/water_drop/async_producer.rb +0 -26
  37. data/lib/water_drop/base_producer.rb +0 -57
  38. data/lib/water_drop/config_applier.rb +0 -52
  39. data/lib/water_drop/contracts/message_options.rb +0 -19
  40. data/lib/water_drop/sync_producer.rb +0 -24
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Component for asynchronous producer operations
6
+ module Async
7
+ # Produces a message to Kafka and does not wait for results
8
+ #
9
+ # @param message [Hash] hash that complies with the {Contracts::Message} contract
10
+ #
11
+ # @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
12
+ #
13
+ # @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
14
+ # @raise [Errors::MessageInvalidError] When provided message details are invalid and the
15
+ # message could not be sent to Kafka
16
+ def produce_async(message)
17
+ ensure_active!
18
+ validate_message!(message)
19
+
20
+ @monitor.instrument(
21
+ 'message.produced_async',
22
+ producer: self,
23
+ message: message
24
+ ) { client.produce(**message) }
25
+ end
26
+
27
+ # Produces many messages to Kafka and does not wait for them to be delivered
28
+ #
29
+ # @param messages [Array<Hash>] array with messages that comply with the
30
+ # {Contracts::Message} contract
31
+ #
32
+ # @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
33
+ #
34
+ # @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
35
+ # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
36
+ # and the message could not be sent to Kafka
37
+ def produce_many_async(messages)
38
+ ensure_active!
39
+ messages.each { |message| validate_message!(message) }
40
+
41
+ @monitor.instrument(
42
+ 'messages.produced_async',
43
+ producer: self,
44
+ messages: messages
45
+ ) do
46
+ messages.map { |message| client.produce(**message) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Component for buffered operations
6
+ module Buffer
7
+ # Exceptions we catch when dispatching messages from a buffer
8
+ RESCUED_ERRORS = [
9
+ Rdkafka::RdkafkaError,
10
+ Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
11
+ ].freeze
12
+
13
+ private_constant :RESCUED_ERRORS
14
+
15
+ # Adds given message into the internal producer buffer without flushing it to Kafka
16
+ #
17
+ # @param message [Hash] hash that complies with the {Contracts::Message} contract
18
+ # @raise [Errors::MessageInvalidError] When provided message details are invalid and the
19
+ # message could not be sent to Kafka
20
+ def buffer(message)
21
+ ensure_active!
22
+ validate_message!(message)
23
+
24
+ @monitor.instrument(
25
+ 'message.buffered',
26
+ producer: self,
27
+ message: message
28
+ ) { @messages << message }
29
+ end
30
+
31
+ # Adds given messages into the internal producer buffer without flushing them to Kafka
32
+ #
33
+ # @param messages [Array<Hash>] array with messages that comply with the
34
+ # {Contracts::Message} contract
35
+ # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
36
+ # and the message could not be sent to Kafka
37
+ def buffer_many(messages)
38
+ ensure_active!
39
+ messages.each { |message| validate_message!(message) }
40
+
41
+ @monitor.instrument(
42
+ 'messages.buffered',
43
+ producer: self,
44
+ messages: messages
45
+ ) do
46
+ messages.each { |message| @messages << message }
47
+ messages
48
+ end
49
+ end
50
+
51
+ # Flushes the internal buffer to Kafka in an async way
52
+ # @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles for messages that were
53
+ # flushed
54
+ def flush_async
55
+ ensure_active!
56
+
57
+ @monitor.instrument(
58
+ 'buffer.flushed_async',
59
+ producer: self,
60
+ messages: @messages
61
+ ) { flush(false) }
62
+ end
63
+
64
+ # Flushes the internal buffer to Kafka in a sync way
65
+ # @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports for messages that were
66
+ # flushed
67
+ def flush_sync
68
+ ensure_active!
69
+
70
+ @monitor.instrument(
71
+ 'buffer.flushed_sync',
72
+ producer: self,
73
+ messages: @messages
74
+ ) { flush(true) }
75
+ end
76
+
77
+ private
78
+
79
+ # Method for triggering the buffer
80
+ # @param sync [Boolean] should it flush in a sync way
81
+ # @return [Array<Rdkafka::Producer::DeliveryHandle, Rdkafka::Producer::DeliveryReport>]
82
+ # delivery handles for async or delivery reports for sync
83
+ # @raise [Errors::FlushFailureError] when there was a failure in flushing
84
+ # @note We use this method underneath to provide a different instrumentation for sync and
85
+ # async flushing within the public API
86
+ def flush(sync)
87
+ data_for_dispatch = nil
88
+ dispatched = []
89
+
90
+ @buffer_mutex.synchronize do
91
+ data_for_dispatch = @messages
92
+ @messages = Concurrent::Array.new
93
+ end
94
+
95
+ dispatched = data_for_dispatch.map { |message| client.produce(**message) }
96
+
97
+ return dispatched unless sync
98
+
99
+ dispatched.map do |handler|
100
+ handler.wait(
101
+ max_wait_timeout: @config.max_wait_timeout,
102
+ wait_timeout: @config.wait_timeout
103
+ )
104
+ end
105
+ rescue *RESCUED_ERRORS => e
106
+ key = sync ? 'buffer.flushed_sync.error' : 'buffer.flush_async.error'
107
+ @monitor.instrument(key, producer: self, error: e, dispatched: dispatched)
108
+
109
+ raise Errors::FlushFailureError.new(dispatched)
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Class used to construct the rdkafka producer client
6
+ class Builder
7
+ # @param producer [Producer] not yet configured producer for which we want to
8
+ # build the client
9
+ # @param config [Object] dry-configurable based configuration object
10
+ # @return [Rdkafka::Producer, Producer::DummyClient] raw rdkafka producer or a dummy producer
11
+ # when we don't want to dispatch any messages
12
+ def call(producer, config)
13
+ return DummyClient.new unless config.deliver
14
+
15
+ Rdkafka::Config.logger = config.logger
16
+ Rdkafka::Config.statistics_callback = build_statistics_callback(producer, config.monitor)
17
+
18
+ client = Rdkafka::Config.new(config.kafka.to_h).producer
19
+ client.delivery_callback = build_delivery_callback(producer, config.monitor)
20
+ client
21
+ end
22
+
23
+ private
24
+
25
+ # Creates a proc that we want to run upon each successful message delivery
26
+ #
27
+ # @param producer [Producer]
28
+ # @param monitor [Object] monitor we want to use
29
+ # @return [Proc] delivery callback
30
+ def build_delivery_callback(producer, monitor)
31
+ lambda do |delivery_report|
32
+ monitor.instrument(
33
+ 'message.acknowledged',
34
+ producer: producer,
35
+ offset: delivery_report.offset,
36
+ partition: delivery_report.partition
37
+ )
38
+ end
39
+ end
40
+
41
+ # Creates a proc that we want to run upon each statistics callback execution
42
+ #
43
+ # @param producer [Producer]
44
+ # @param monitor [Object] monitor we want to use
45
+ # @return [Proc] statistics callback
46
+ # @note We decorate the statistics with our own decorator because some of the metrics from
47
+ # rdkafka are absolute. For example number of sent messages increases not in reference to
48
+ # previous statistics emit but from the beginning of the process. We decorate it with diff
49
+ # of all the numeric values against the data from the previous callback emit
50
+ def build_statistics_callback(producer, monitor)
51
+ statistics_decorator = StatisticsDecorator.new
52
+
53
+ lambda do |statistics|
54
+ monitor.instrument(
55
+ 'statistics.emitted',
56
+ producer: producer,
57
+ statistics: statistics_decorator.call(statistics)
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # A dummy client that is supposed to be used instead of Rdkafka::Producer in case we don't
6
+ # want to dispatch anything to Kafka
7
+ class DummyClient
8
+ # @return [DummyClient] dummy instance
9
+ def initialize
10
+ @counter = -1
11
+ end
12
+
13
+ # Dummy method for returning the delivery report
14
+ # @param _args [Object] anything that the delivery handle accepts
15
+ # @return [::Rdkafka::Producer::DeliveryReport]
16
+ def wait(*_args)
17
+ ::Rdkafka::Producer::DeliveryReport.new(0, @counter += 1)
18
+ end
19
+
20
+ # @param _args [Object] anything really, this dummy is suppose to support anything
21
+ def respond_to_missing?(*_args)
22
+ true
23
+ end
24
+
25
+ # @param _args [Object] anything really, this dummy is suppose to support anything
26
+ # @return [self] returns self for chaining cases
27
+ def method_missing(*_args)
28
+ self || super
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Many of the librdkafka statistics are absolute values instead of a gauge.
6
+ # This means, that for example number of messages sent is an absolute growing value
7
+ # instead of being a value of messages sent from the last statistics report.
8
+ # This decorator calculates the diff against previously emited stats, so we get also
9
+ # the diff together with the original values
10
+ class StatisticsDecorator
11
+ def initialize
12
+ @previous = {}.freeze
13
+ end
14
+
15
+ # @param emited_stats [Hash] original emited statistics
16
+ # @return [Hash] emited statistics extended with the diff data
17
+ # @note We modify the emited statistics, instead of creating new. Since we don't expose
18
+ # any API to get raw data, users can just assume that the result of this decoration is the
19
+ # proper raw stats that they can use
20
+ def call(emited_stats)
21
+ diff(
22
+ @previous,
23
+ emited_stats
24
+ )
25
+
26
+ @previous = emited_stats
27
+
28
+ emited_stats.freeze
29
+ end
30
+
31
+ private
32
+
33
+ # Calculates the diff of the provided values and modifies in place the emited statistics
34
+ #
35
+ # @param previous [Object] previous value from the given scope in which
36
+ # we are
37
+ # @param current [Object] current scope from emitted statistics
38
+ # @return [Object] the diff if the values were numerics or the current scope
39
+ def diff(previous, current)
40
+ if current.is_a?(Hash)
41
+ # @note We cannot use #each_key as we modify the content of the current scope
42
+ # in place (in case it's a hash)
43
+ current.keys.each do |key|
44
+ append(
45
+ current,
46
+ key,
47
+ diff((previous || {})[key], (current || {})[key])
48
+ )
49
+ end
50
+ end
51
+
52
+ if current.is_a?(Numeric) && previous.is_a?(Numeric)
53
+ current - previous
54
+ else
55
+ current
56
+ end
57
+ end
58
+
59
+ # Appends the result of the diff to a given key as long as the result is numeric
60
+ #
61
+ # @param current [Hash] current scope
62
+ # @param key [Symbol] key based on which we were diffing
63
+ # @param result [Object] diff result
64
+ def append(current, key, result)
65
+ return unless result.is_a?(Numeric)
66
+
67
+ current["#{key}_d"] = result
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Producer lifecycle status object representation
6
+ class Status
7
+ # States in which the producer can be
8
+ LIFECYCLE = %i[
9
+ initial
10
+ configured
11
+ connected
12
+ closing
13
+ closed
14
+ ].freeze
15
+
16
+ private_constant :LIFECYCLE
17
+
18
+ # Creates a new instance of status with the initial state
19
+ # @return [Status]
20
+ def initialize
21
+ @current = LIFECYCLE.first
22
+ end
23
+
24
+ # @return [Boolean] true if producer is in a active state. Active means, that we can start
25
+ # sending messages. Actives states are connected (connection established) or configured,
26
+ # which means, that producer is configured, but connection with Kafka is
27
+ # not yet established.
28
+ def active?
29
+ connected? || configured?
30
+ end
31
+
32
+ # @return [String] current status as a string
33
+ def to_s
34
+ @current.to_s
35
+ end
36
+
37
+ LIFECYCLE.each do |state|
38
+ module_eval <<-RUBY, __FILE__, __LINE__ + 1
39
+ # @return [Boolean] true if current status is as we want, otherwise false
40
+ def #{state}?
41
+ @current == :#{state}
42
+ end
43
+
44
+ # Sets a given state as current
45
+ def #{state}!
46
+ @current = :#{state}
47
+ end
48
+ RUBY
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Component for synchronous producer operations
6
+ module Sync
7
+ # Produces a message to Kafka and waits for it to be delivered
8
+ #
9
+ # @param message [Hash] hash that complies with the {Contracts::Message} contract
10
+ #
11
+ # @return [Rdkafka::Producer::DeliveryReport] delivery report
12
+ #
13
+ # @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
14
+ # @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and the
15
+ # handle is still pending
16
+ # @raise [Errors::MessageInvalidError] When provided message details are invalid and the
17
+ # message could not be sent to Kafka
18
+ def produce_sync(message)
19
+ ensure_active!
20
+ validate_message!(message)
21
+
22
+ @monitor.instrument(
23
+ 'message.produced_sync',
24
+ producer: self,
25
+ message: message
26
+ ) do
27
+ client
28
+ .produce(**message)
29
+ .wait(
30
+ max_wait_timeout: @config.max_wait_timeout,
31
+ wait_timeout: @config.wait_timeout
32
+ )
33
+ end
34
+ end
35
+
36
+ # Produces many messages to Kafka and waits for them to be delivered
37
+ #
38
+ # @param messages [Array<Hash>] array with messages that comply with the
39
+ # {Contracts::Message} contract
40
+ #
41
+ # @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports
42
+ #
43
+ # @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
44
+ # @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and the
45
+ # some handles are still pending
46
+ # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
47
+ # and the message could not be sent to Kafka
48
+ def produce_many_sync(messages)
49
+ ensure_active!
50
+ messages.each { |message| validate_message!(message) }
51
+
52
+ @monitor.instrument('messages.produced_sync', producer: self, messages: messages) do
53
+ messages
54
+ .map { |message| client.produce(**message) }
55
+ .map! do |handler|
56
+ handler.wait(
57
+ max_wait_timeout: @config.max_wait_timeout,
58
+ wait_timeout: @config.wait_timeout
59
+ )
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end