waterdrop 1.4.4 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +3 -25
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +8 -13
- data/Gemfile +9 -0
- data/Gemfile.lock +81 -60
- data/LICENSE +165 -0
- data/README.md +200 -57
- data/certs/mensfeld.pem +21 -21
- data/config/errors.yml +3 -16
- data/lib/water_drop/config.rb +42 -143
- data/lib/water_drop/contracts/config.rb +8 -121
- data/lib/water_drop/contracts/message.rb +41 -0
- data/lib/water_drop/contracts.rb +0 -2
- data/lib/water_drop/errors.rb +30 -5
- data/lib/water_drop/instrumentation/monitor.rb +16 -22
- data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
- data/lib/water_drop/producer/async.rb +51 -0
- data/lib/water_drop/producer/buffer.rb +113 -0
- data/lib/water_drop/producer/builder.rb +63 -0
- data/lib/water_drop/producer/dummy_client.rb +32 -0
- data/lib/water_drop/producer/statistics_decorator.rb +71 -0
- data/lib/water_drop/producer/status.rb +52 -0
- data/lib/water_drop/producer/sync.rb +65 -0
- data/lib/water_drop/producer.rb +142 -0
- data/lib/water_drop/version.rb +1 -1
- data/lib/water_drop.rb +4 -24
- data/waterdrop.gemspec +8 -8
- data.tar.gz.sig +0 -0
- metadata +53 -54
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/water_drop/async_producer.rb +0 -26
- data/lib/water_drop/base_producer.rb +0 -57
- data/lib/water_drop/config_applier.rb +0 -52
- data/lib/water_drop/contracts/message_options.rb +0 -19
- data/lib/water_drop/sync_producer.rb +0 -24
@@ -4,134 +4,21 @@ module WaterDrop
|
|
4
4
|
module Contracts
|
5
5
|
# Contract with validation rules for WaterDrop configuration details
|
6
6
|
class Config < Dry::Validation::Contract
|
7
|
-
#
|
8
|
-
|
7
|
+
# Ensure valid format of each seed broker so that rdkafka doesn't fail silently
|
8
|
+
SEED_BROKER_FORMAT_REGEXP = %r{\A([^\:\/,]+:[0-9]+)(,[^\:\/,]+:[0-9]+)*\z}.freeze
|
9
9
|
|
10
|
-
|
11
|
-
SASL_SCRAM_MECHANISMS = %w[sha256 sha512].freeze
|
12
|
-
|
13
|
-
# Supported compression codecs
|
14
|
-
COMPRESSION_CODECS = %i[snappy gzip lz4 zstd].freeze
|
15
|
-
|
16
|
-
config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
|
17
|
-
|
18
|
-
class << self
|
19
|
-
private
|
20
|
-
|
21
|
-
# Builder for kafka scoped data custom rules
|
22
|
-
# @param keys [Symbol, Hash] the keys names
|
23
|
-
# @param block [Proc] block we want to run with validations within the kafka scope
|
24
|
-
def kafka_scope_rule(*keys, &block)
|
25
|
-
rule(*[:kafka].product(keys)) do
|
26
|
-
instance_exec(values[:kafka], &block)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
# Uri validator to check if uri is in a Kafka acceptable format
|
34
|
-
# @param uri [String] uri we want to validate
|
35
|
-
# @return [Boolean] true if it is a valid uri, otherwise false
|
36
|
-
def broker_schema?(uri)
|
37
|
-
uri = URI.parse(uri)
|
38
|
-
URI_SCHEMES.include?(uri.scheme) && uri.port
|
39
|
-
rescue URI::InvalidURIError
|
40
|
-
false
|
41
|
-
end
|
10
|
+
private_constant :SEED_BROKER_FORMAT_REGEXP
|
42
11
|
|
43
12
|
params do
|
44
|
-
required(:
|
13
|
+
required(:id).filled(:str?)
|
45
14
|
required(:logger).filled
|
46
15
|
required(:deliver).filled(:bool?)
|
47
|
-
required(:
|
16
|
+
required(:max_payload_size).filled(:int?, gteq?: 1)
|
17
|
+
required(:max_wait_timeout).filled(:number?, gteq?: 0)
|
18
|
+
required(:wait_timeout).filled(:number?, gt?: 0)
|
48
19
|
|
49
20
|
required(:kafka).schema do
|
50
|
-
required(:
|
51
|
-
required(:connect_timeout).filled(:int?, gt?: 0)
|
52
|
-
required(:socket_timeout).filled(:int?, gt?: 0)
|
53
|
-
required(:compression_threshold).filled(:int?, gteq?: 1)
|
54
|
-
optional(:compression_codec).maybe(included_in?: COMPRESSION_CODECS)
|
55
|
-
|
56
|
-
required(:max_buffer_bytesize).filled(:int?, gt?: 0)
|
57
|
-
required(:max_buffer_size).filled(:int?, gt?: 0)
|
58
|
-
required(:max_queue_size).filled(:int?, gt?: 0)
|
59
|
-
|
60
|
-
required(:ack_timeout).filled(:int?, gt?: 0)
|
61
|
-
required(:delivery_interval).filled(:int?, gteq?: 0)
|
62
|
-
required(:delivery_threshold).filled(:int?, gteq?: 0)
|
63
|
-
|
64
|
-
required(:max_retries).filled(:int?, gteq?: 0)
|
65
|
-
required(:retry_backoff).filled(:int?, gteq?: 0)
|
66
|
-
required(:required_acks).filled(included_in?: [1, 0, -1, :all])
|
67
|
-
|
68
|
-
%i[
|
69
|
-
ssl_ca_cert
|
70
|
-
ssl_ca_cert_file_path
|
71
|
-
ssl_client_cert
|
72
|
-
ssl_client_cert_key
|
73
|
-
ssl_client_cert_chain
|
74
|
-
ssl_client_cert_key_password
|
75
|
-
sasl_gssapi_principal
|
76
|
-
sasl_gssapi_keytab
|
77
|
-
sasl_plain_authzid
|
78
|
-
sasl_plain_username
|
79
|
-
sasl_plain_password
|
80
|
-
sasl_scram_username
|
81
|
-
sasl_scram_password
|
82
|
-
].each do |encryption_attribute|
|
83
|
-
optional(encryption_attribute).maybe(:str?)
|
84
|
-
end
|
85
|
-
|
86
|
-
optional(:ssl_verify_hostname).maybe(:bool?)
|
87
|
-
optional(:ssl_ca_certs_from_system).maybe(:bool?)
|
88
|
-
optional(:sasl_over_ssl).maybe(:bool?)
|
89
|
-
optional(:sasl_oauth_token_provider).value(:any)
|
90
|
-
|
91
|
-
# It's not with other encryptions as it has some more rules
|
92
|
-
optional(:sasl_scram_mechanism)
|
93
|
-
.maybe(:str?, included_in?: SASL_SCRAM_MECHANISMS)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
kafka_scope_rule(:seed_brokers) do |kafka|
|
98
|
-
unless kafka[:seed_brokers].all?(&method(:broker_schema?))
|
99
|
-
key(%i[kafka seed_brokers]).failure(:broker_schema)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
kafka_scope_rule(:ssl_client_cert, :ssl_client_cert_key) do |kafka|
|
104
|
-
if kafka[:ssl_client_cert] &&
|
105
|
-
kafka[:ssl_client_cert_key].nil?
|
106
|
-
key(%i[kafka ssl_client_cert_key]).failure(:ssl_client_cert_with_ssl_client_cert_key)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
kafka_scope_rule(:ssl_client_cert_key, :ssl_client_cert) do |kafka|
|
111
|
-
if kafka[:ssl_client_cert_key] &&
|
112
|
-
kafka[:ssl_client_cert].nil?
|
113
|
-
key.failure(:ssl_client_cert_key_with_ssl_client_cert)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
kafka_scope_rule(:ssl_client_cert_chain, :ssl_client_cert) do |kafka|
|
118
|
-
if kafka[:ssl_client_cert_chain] &&
|
119
|
-
kafka[:ssl_client_cert].nil?
|
120
|
-
key.failure(:ssl_client_cert_chain_with_ssl_client_cert)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
kafka_scope_rule(:ssl_client_cert_key_password, :ssl_client_cert_key) do |kafka|
|
125
|
-
if kafka[:ssl_client_cert_key_password] &&
|
126
|
-
kafka[:ssl_client_cert_key].nil?
|
127
|
-
key.failure(:ssl_client_cert_key_password_with_ssl_client_cert_key)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
kafka_scope_rule(:sasl_oauth_token_provider) do |kafka|
|
132
|
-
if kafka[:sasl_oauth_token_provider] &&
|
133
|
-
!kafka[:sasl_oauth_token_provider].respond_to?(:token)
|
134
|
-
key.failure(:sasl_oauth_token_provider_respond_to_token)
|
21
|
+
required(:'bootstrap.servers').filled(:str?, format?: SEED_BROKER_FORMAT_REGEXP)
|
135
22
|
end
|
136
23
|
end
|
137
24
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Contracts
|
5
|
+
# Contract with validation rules for validating that all the message options that
|
6
|
+
# we provide to producer ale valid and usable
|
7
|
+
class Message < Dry::Validation::Contract
|
8
|
+
# Regex to check that topic has a valid format
|
9
|
+
TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
|
10
|
+
|
11
|
+
# Checks, that the given value is a string
|
12
|
+
STRING_ASSERTION = ->(value) { value.is_a?(String) }.to_proc
|
13
|
+
|
14
|
+
private_constant :TOPIC_REGEXP, :STRING_ASSERTION
|
15
|
+
|
16
|
+
config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
|
17
|
+
|
18
|
+
option :max_payload_size
|
19
|
+
|
20
|
+
params do
|
21
|
+
required(:topic).filled(:str?, format?: TOPIC_REGEXP)
|
22
|
+
required(:payload).filled(:str?)
|
23
|
+
optional(:key).maybe(:str?, :filled?)
|
24
|
+
optional(:partition).filled(:int?, gteq?: -1)
|
25
|
+
optional(:timestamp).maybe { time? | int? }
|
26
|
+
optional(:headers).maybe(:hash?)
|
27
|
+
end
|
28
|
+
|
29
|
+
rule(:headers) do
|
30
|
+
next unless value.is_a?(Hash)
|
31
|
+
|
32
|
+
key.failure(:invalid_key_type) unless value.keys.all?(&STRING_ASSERTION)
|
33
|
+
key.failure(:invalid_value_type) unless value.values.all?(&STRING_ASSERTION)
|
34
|
+
end
|
35
|
+
|
36
|
+
rule(:payload) do
|
37
|
+
key.failure(:max_payload_size) if value.bytesize > max_payload_size
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/water_drop/contracts.rb
CHANGED
data/lib/water_drop/errors.rb
CHANGED
@@ -7,12 +7,37 @@ module WaterDrop
|
|
7
7
|
BaseError = Class.new(StandardError)
|
8
8
|
|
9
9
|
# Raised when configuration doesn't match with validation contract
|
10
|
-
|
10
|
+
ConfigurationInvalidError = Class.new(BaseError)
|
11
11
|
|
12
|
-
# Raised when we
|
13
|
-
|
12
|
+
# Raised when we want to use a producer that was not configured
|
13
|
+
ProducerNotConfiguredError = Class.new(BaseError)
|
14
14
|
|
15
|
-
# Raised when want to
|
16
|
-
|
15
|
+
# Raised when we want to reconfigure a producer that was already configured
|
16
|
+
ProducerAlreadyConfiguredError = Class.new(BaseError)
|
17
|
+
|
18
|
+
# Raised when trying to use connected producer from a forked child process
|
19
|
+
# Producers cannot be used in forks if they were already used in the child processes
|
20
|
+
ProducerUsedInParentProcess = Class.new(BaseError)
|
21
|
+
|
22
|
+
# Raised when there was an attempt to use a closed producer
|
23
|
+
ProducerClosedError = Class.new(BaseError)
|
24
|
+
|
25
|
+
# Raised when we want to send a message that is invalid (impossible topic, etc)
|
26
|
+
MessageInvalidError = Class.new(BaseError)
|
27
|
+
|
28
|
+
# Raised when we've got an unexpected status. This should never happen. If it does, please
|
29
|
+
# contact us as it is an error.
|
30
|
+
StatusInvalidError = Class.new(BaseError)
|
31
|
+
|
32
|
+
# Raised when during messages flushing something bad happened
|
33
|
+
class FlushFailureError < BaseError
|
34
|
+
attr_reader :dispatched_messages
|
35
|
+
|
36
|
+
# @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
|
37
|
+
# messages that we've dispatched
|
38
|
+
def initialize(dispatched_messages)
|
39
|
+
@dispatched_messages = dispatched_messages
|
40
|
+
end
|
41
|
+
end
|
17
42
|
end
|
18
43
|
end
|
@@ -11,34 +11,28 @@ module WaterDrop
|
|
11
11
|
class Monitor < Dry::Monitor::Notifications
|
12
12
|
# List of events that we support in the system and to which a monitor client can hook up
|
13
13
|
# @note The non-error once support timestamp benchmarking
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
14
|
+
EVENTS = %w[
|
15
|
+
producer.closed
|
16
|
+
message.produced_async
|
17
|
+
message.produced_sync
|
18
|
+
messages.produced_async
|
19
|
+
messages.produced_sync
|
20
|
+
message.buffered
|
21
|
+
messages.buffered
|
22
|
+
message.acknowledged
|
23
|
+
buffer.flushed_async
|
24
|
+
buffer.flushed_async.error
|
25
|
+
buffer.flushed_sync
|
26
|
+
buffer.flushed_sync.error
|
27
|
+
statistics.emitted
|
19
28
|
].freeze
|
20
29
|
|
21
|
-
private_constant :
|
30
|
+
private_constant :EVENTS
|
22
31
|
|
23
32
|
# @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
|
24
33
|
def initialize
|
25
34
|
super(:waterdrop)
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
# Allows us to subscribe to events with a code that will be yielded upon events
|
30
|
-
# @param event_name_or_listener [String, Object] name of the event we want to subscribe to
|
31
|
-
# or a listener if we decide to go with object listener
|
32
|
-
def subscribe(event_name_or_listener)
|
33
|
-
return super unless event_name_or_listener.is_a?(String)
|
34
|
-
return super if available_events.include?(event_name_or_listener)
|
35
|
-
|
36
|
-
raise Errors::UnregisteredMonitorEvent, event_name_or_listener
|
37
|
-
end
|
38
|
-
|
39
|
-
# @return [Array<String>] names of available events to which we can subscribe
|
40
|
-
def available_events
|
41
|
-
__bus__.events.keys
|
35
|
+
EVENTS.each(&method(:register_event))
|
42
36
|
end
|
43
37
|
end
|
44
38
|
end
|
@@ -7,38 +7,119 @@ module WaterDrop
|
|
7
7
|
# @note It is a module as we can use it then as a part of the Karafka framework listener
|
8
8
|
# as well as we can use it standalone
|
9
9
|
class StdoutListener
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
10
|
+
# @param logger [Object] stdout logger we want to use
|
11
|
+
def initialize(logger)
|
12
|
+
@logger = logger
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
16
|
+
def on_message_produced_async(event)
|
17
|
+
message = event[:message]
|
18
|
+
|
19
|
+
info(event, "Async producing of a message to '#{message[:topic]}' topic")
|
20
|
+
debug(event, message)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
24
|
+
def on_message_produced_sync(event)
|
25
|
+
message = event[:message]
|
26
|
+
|
27
|
+
info(event, "Sync producing of a message to '#{message[:topic]}' topic")
|
28
|
+
debug(event, message)
|
29
|
+
end
|
30
|
+
|
31
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
32
|
+
def on_messages_produced_async(event)
|
33
|
+
messages = event[:messages]
|
34
|
+
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
35
|
+
|
36
|
+
info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
|
37
|
+
debug(event, messages)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
41
|
+
def on_messages_produced_sync(event)
|
42
|
+
messages = event[:messages]
|
43
|
+
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
44
|
+
|
45
|
+
info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
|
46
|
+
debug(event, messages)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
50
|
+
def on_message_buffered(event)
|
51
|
+
message = event[:message]
|
52
|
+
|
53
|
+
info(event, "Buffering of a message to '#{message[:topic]}' topic")
|
54
|
+
debug(event, [message, event[:producer].messages.size])
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
58
|
+
def on_messages_buffered(event)
|
59
|
+
messages = event[:messages]
|
60
|
+
|
61
|
+
info(event, "Buffering of #{messages.size} messages")
|
62
|
+
debug(event, [messages, event[:producer].messages.size])
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
66
|
+
def on_buffer_flushed_async(event)
|
67
|
+
messages = event[:messages]
|
68
|
+
|
69
|
+
info(event, "Async flushing of #{messages.size} messages from the buffer")
|
70
|
+
debug(event, messages)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
74
|
+
def on_buffer_flushed_async_error(event)
|
75
|
+
messages = event[:messages]
|
76
|
+
error = event[:error]
|
77
|
+
|
78
|
+
error(event, "Async flushing of #{messages.size} failed due to: #{error}")
|
79
|
+
debug(event, messages)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
83
|
+
def on_buffer_flushed_sync(event)
|
84
|
+
messages = event[:messages]
|
85
|
+
|
86
|
+
info(event, "Sync flushing of #{messages.size} messages from the buffer")
|
87
|
+
debug(event, messages)
|
88
|
+
end
|
89
|
+
|
90
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
91
|
+
def on_buffer_flushed_sync_error(event)
|
92
|
+
messages = event[:dispatched]
|
93
|
+
error = event[:error]
|
94
|
+
|
95
|
+
error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
|
96
|
+
debug(event, messages)
|
97
|
+
end
|
98
|
+
|
99
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
100
|
+
def on_producer_closed(event)
|
101
|
+
info event, 'Closing producer'
|
102
|
+
debug event, event[:producer].messages.size
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
108
|
+
# @param log_message [String] message we want to publish
|
109
|
+
def debug(event, log_message)
|
110
|
+
@logger.debug("[#{event[:producer].id}] #{log_message}")
|
111
|
+
end
|
112
|
+
|
113
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
114
|
+
# @param log_message [String] message we want to publish
|
115
|
+
def info(event, log_message)
|
116
|
+
@logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
120
|
+
# @param log_message [String] message we want to publish
|
121
|
+
def error(event, log_message)
|
122
|
+
@logger.error("[#{event[:producer].id}] #{log_message}")
|
42
123
|
end
|
43
124
|
end
|
44
125
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for asynchronous producer operations
|
6
|
+
module Async
|
7
|
+
# Produces a message to Kafka and does not wait for results
|
8
|
+
#
|
9
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
10
|
+
#
|
11
|
+
# @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
|
12
|
+
#
|
13
|
+
# @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
|
14
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
15
|
+
# message could not be sent to Kafka
|
16
|
+
def produce_async(message)
|
17
|
+
ensure_active!
|
18
|
+
validate_message!(message)
|
19
|
+
|
20
|
+
@monitor.instrument(
|
21
|
+
'message.produced_async',
|
22
|
+
producer: self,
|
23
|
+
message: message
|
24
|
+
) { client.produce(**message) }
|
25
|
+
end
|
26
|
+
|
27
|
+
# Produces many messages to Kafka and does not wait for them to be delivered
|
28
|
+
#
|
29
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
30
|
+
# {Contracts::Message} contract
|
31
|
+
#
|
32
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
|
33
|
+
#
|
34
|
+
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def produce_many_async(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.produced_async',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.map { |message| client.produce(**message) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for buffered operations
|
6
|
+
module Buffer
|
7
|
+
# Exceptions we catch when dispatching messages from a buffer
|
8
|
+
RESCUED_ERRORS = [
|
9
|
+
Rdkafka::RdkafkaError,
|
10
|
+
Rdkafka::Producer::DeliveryHandle::WaitTimeoutError
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
private_constant :RESCUED_ERRORS
|
14
|
+
|
15
|
+
# Adds given message into the internal producer buffer without flushing it to Kafka
|
16
|
+
#
|
17
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
18
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
19
|
+
# message could not be sent to Kafka
|
20
|
+
def buffer(message)
|
21
|
+
ensure_active!
|
22
|
+
validate_message!(message)
|
23
|
+
|
24
|
+
@monitor.instrument(
|
25
|
+
'message.buffered',
|
26
|
+
producer: self,
|
27
|
+
message: message
|
28
|
+
) { @messages << message }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Adds given messages into the internal producer buffer without flushing them to Kafka
|
32
|
+
#
|
33
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
34
|
+
# {Contracts::Message} contract
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def buffer_many(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.buffered',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.each { |message| @messages << message }
|
47
|
+
messages
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Flushes the internal buffer to Kafka in an async way
|
52
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] delivery handles for messages that were
|
53
|
+
# flushed
|
54
|
+
def flush_async
|
55
|
+
ensure_active!
|
56
|
+
|
57
|
+
@monitor.instrument(
|
58
|
+
'buffer.flushed_async',
|
59
|
+
producer: self,
|
60
|
+
messages: @messages
|
61
|
+
) { flush(false) }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Flushes the internal buffer to Kafka in a sync way
|
65
|
+
# @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports for messages that were
|
66
|
+
# flushed
|
67
|
+
def flush_sync
|
68
|
+
ensure_active!
|
69
|
+
|
70
|
+
@monitor.instrument(
|
71
|
+
'buffer.flushed_sync',
|
72
|
+
producer: self,
|
73
|
+
messages: @messages
|
74
|
+
) { flush(true) }
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Method for triggering the buffer
|
80
|
+
# @param sync [Boolean] should it flush in a sync way
|
81
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle, Rdkafka::Producer::DeliveryReport>]
|
82
|
+
# delivery handles for async or delivery reports for sync
|
83
|
+
# @raise [Errors::FlushFailureError] when there was a failure in flushing
|
84
|
+
# @note We use this method underneath to provide a different instrumentation for sync and
|
85
|
+
# async flushing within the public API
|
86
|
+
def flush(sync)
|
87
|
+
data_for_dispatch = nil
|
88
|
+
dispatched = []
|
89
|
+
|
90
|
+
@buffer_mutex.synchronize do
|
91
|
+
data_for_dispatch = @messages
|
92
|
+
@messages = Concurrent::Array.new
|
93
|
+
end
|
94
|
+
|
95
|
+
dispatched = data_for_dispatch.map { |message| client.produce(**message) }
|
96
|
+
|
97
|
+
return dispatched unless sync
|
98
|
+
|
99
|
+
dispatched.map do |handler|
|
100
|
+
handler.wait(
|
101
|
+
max_wait_timeout: @config.max_wait_timeout,
|
102
|
+
wait_timeout: @config.wait_timeout
|
103
|
+
)
|
104
|
+
end
|
105
|
+
rescue *RESCUED_ERRORS => e
|
106
|
+
key = sync ? 'buffer.flushed_sync.error' : 'buffer.flush_async.error'
|
107
|
+
@monitor.instrument(key, producer: self, error: e, dispatched: dispatched)
|
108
|
+
|
109
|
+
raise Errors::FlushFailureError.new(dispatched)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Class used to construct the rdkafka producer client
|
6
|
+
class Builder
|
7
|
+
# @param producer [Producer] not yet configured producer for which we want to
|
8
|
+
# build the client
|
9
|
+
# @param config [Object] dry-configurable based configuration object
|
10
|
+
# @return [Rdkafka::Producer, Producer::DummyClient] raw rdkafka producer or a dummy producer
|
11
|
+
# when we don't want to dispatch any messages
|
12
|
+
def call(producer, config)
|
13
|
+
return DummyClient.new unless config.deliver
|
14
|
+
|
15
|
+
Rdkafka::Config.logger = config.logger
|
16
|
+
Rdkafka::Config.statistics_callback = build_statistics_callback(producer, config.monitor)
|
17
|
+
|
18
|
+
client = Rdkafka::Config.new(config.kafka.to_h).producer
|
19
|
+
client.delivery_callback = build_delivery_callback(producer, config.monitor)
|
20
|
+
client
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# Creates a proc that we want to run upon each successful message delivery
|
26
|
+
#
|
27
|
+
# @param producer [Producer]
|
28
|
+
# @param monitor [Object] monitor we want to use
|
29
|
+
# @return [Proc] delivery callback
|
30
|
+
def build_delivery_callback(producer, monitor)
|
31
|
+
lambda do |delivery_report|
|
32
|
+
monitor.instrument(
|
33
|
+
'message.acknowledged',
|
34
|
+
producer: producer,
|
35
|
+
offset: delivery_report.offset,
|
36
|
+
partition: delivery_report.partition
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Creates a proc that we want to run upon each statistics callback execution
|
42
|
+
#
|
43
|
+
# @param producer [Producer]
|
44
|
+
# @param monitor [Object] monitor we want to use
|
45
|
+
# @return [Proc] statistics callback
|
46
|
+
# @note We decorate the statistics with our own decorator because some of the metrics from
|
47
|
+
# rdkafka are absolute. For example number of sent messages increases not in reference to
|
48
|
+
# previous statistics emit but from the beginning of the process. We decorate it with diff
|
49
|
+
# of all the numeric values against the data from the previous callback emit
|
50
|
+
def build_statistics_callback(producer, monitor)
|
51
|
+
statistics_decorator = StatisticsDecorator.new
|
52
|
+
|
53
|
+
lambda do |statistics|
|
54
|
+
monitor.instrument(
|
55
|
+
'statistics.emitted',
|
56
|
+
producer: producer,
|
57
|
+
statistics: statistics_decorator.call(statistics)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|