waterdrop 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +53 -0
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +51 -33
- data/LICENSE +165 -0
- data/README.md +192 -53
- data/config/errors.yml +3 -16
- data/docker-compose.yml +17 -0
- data/lib/water_drop.rb +4 -24
- data/lib/water_drop/config.rb +41 -142
- data/lib/water_drop/contracts.rb +0 -2
- data/lib/water_drop/contracts/config.rb +8 -121
- data/lib/water_drop/contracts/message.rb +41 -0
- data/lib/water_drop/errors.rb +31 -5
- data/lib/water_drop/instrumentation.rb +7 -0
- data/lib/water_drop/instrumentation/monitor.rb +16 -23
- data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
- data/lib/water_drop/producer.rb +142 -0
- data/lib/water_drop/producer/async.rb +51 -0
- data/lib/water_drop/producer/buffer.rb +113 -0
- data/lib/water_drop/producer/builder.rb +63 -0
- data/lib/water_drop/producer/dummy_client.rb +32 -0
- data/lib/water_drop/producer/statistics_decorator.rb +71 -0
- data/lib/water_drop/producer/status.rb +52 -0
- data/lib/water_drop/producer/sync.rb +65 -0
- data/lib/water_drop/version.rb +1 -1
- data/waterdrop.gemspec +4 -4
- metadata +25 -24
- metadata.gz.sig +0 -0
- data/.travis.yml +0 -35
- data/MIT-LICENCE +0 -18
- data/lib/water_drop/async_producer.rb +0 -26
- data/lib/water_drop/base_producer.rb +0 -57
- data/lib/water_drop/config_applier.rb +0 -52
- data/lib/water_drop/contracts/message_options.rb +0 -19
- data/lib/water_drop/sync_producer.rb +0 -24
data/lib/water_drop/errors.rb
CHANGED
@@ -7,12 +7,38 @@ module WaterDrop
|
|
7
7
|
BaseError = Class.new(StandardError)
|
8
8
|
|
9
9
|
# Raised when configuration doesn't match with validation contract
|
10
|
-
|
10
|
+
ConfigurationInvalidError = Class.new(BaseError)
|
11
11
|
|
12
|
-
# Raised when we
|
13
|
-
|
12
|
+
# Raised when we want to use a producer that was not configured
|
13
|
+
ProducerNotConfiguredError = Class.new(BaseError)
|
14
14
|
|
15
|
-
# Raised when want to
|
16
|
-
|
15
|
+
# Raised when we want to reconfigure a producer that was already configured
|
16
|
+
ProducerAlreadyConfiguredError = Class.new(BaseError)
|
17
|
+
|
18
|
+
# Raised when trying to use connected producer from a forked child process
|
19
|
+
# Producers cannot be used in forks if they were already used in the child processes
|
20
|
+
ProducerUsedInParentProcess = Class.new(BaseError)
|
21
|
+
|
22
|
+
# Raised when there was an attempt to use a closed producer
|
23
|
+
ProducerClosedError = Class.new(BaseError)
|
24
|
+
|
25
|
+
# Raised when we want to send a message that is invalid (impossible topic, etc)
|
26
|
+
MessageInvalidError = Class.new(BaseError)
|
27
|
+
|
28
|
+
# Raised when we've got an unexpected status. This should never happen. If it does, please
|
29
|
+
# contact us as it is an error.
|
30
|
+
StatusInvalidError = Class.new(BaseError)
|
31
|
+
|
32
|
+
# Raised when during messages flushing something bad happened
|
33
|
+
class FlushFailureError < BaseError
|
34
|
+
attr_reader :dispatched_messages
|
35
|
+
|
36
|
+
# @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
|
37
|
+
# messages that we've dispatched
|
38
|
+
def initialize(dispatched_messages)
|
39
|
+
super()
|
40
|
+
@dispatched_messages = dispatched_messages
|
41
|
+
end
|
42
|
+
end
|
17
43
|
end
|
18
44
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module WaterDrop
|
4
|
-
# Namespace for all the things related with WaterDrop instrumentation process
|
5
4
|
module Instrumentation
|
6
5
|
# Monitor is used to hookup external monitoring services to monitor how WaterDrop works
|
7
6
|
# Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
|
@@ -12,34 +11,28 @@ module WaterDrop
|
|
12
11
|
class Monitor < Dry::Monitor::Notifications
|
13
12
|
# List of events that we support in the system and to which a monitor client can hook up
|
14
13
|
# @note The non-error once support timestamp benchmarking
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
EVENTS = %w[
|
15
|
+
producer.closed
|
16
|
+
message.produced_async
|
17
|
+
message.produced_sync
|
18
|
+
messages.produced_async
|
19
|
+
messages.produced_sync
|
20
|
+
message.buffered
|
21
|
+
messages.buffered
|
22
|
+
message.acknowledged
|
23
|
+
buffer.flushed_async
|
24
|
+
buffer.flushed_async.error
|
25
|
+
buffer.flushed_sync
|
26
|
+
buffer.flushed_sync.error
|
27
|
+
statistics.emitted
|
20
28
|
].freeze
|
21
29
|
|
22
|
-
private_constant :
|
30
|
+
private_constant :EVENTS
|
23
31
|
|
24
32
|
# @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
|
25
33
|
def initialize
|
26
34
|
super(:waterdrop)
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
# Allows us to subscribe to events with a code that will be yielded upon events
|
31
|
-
# @param event_name_or_listener [String, Object] name of the event we want to subscribe to
|
32
|
-
# or a listener if we decide to go with object listener
|
33
|
-
def subscribe(event_name_or_listener)
|
34
|
-
return super unless event_name_or_listener.is_a?(String)
|
35
|
-
return super if available_events.include?(event_name_or_listener)
|
36
|
-
|
37
|
-
raise Errors::UnregisteredMonitorEvent, event_name_or_listener
|
38
|
-
end
|
39
|
-
|
40
|
-
# @return [Array<String>] names of available events to which we can subscribe
|
41
|
-
def available_events
|
42
|
-
__bus__.events.keys
|
35
|
+
EVENTS.each(&method(:register_event))
|
43
36
|
end
|
44
37
|
end
|
45
38
|
end
|
@@ -7,38 +7,119 @@ module WaterDrop
|
|
7
7
|
# @note It is a module as we can use it then as a part of the Karafka framework listener
|
8
8
|
# as well as we can use it standalone
|
9
9
|
class StdoutListener
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
10
|
+
# @param logger [Object] stdout logger we want to use
|
11
|
+
def initialize(logger)
|
12
|
+
@logger = logger
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
16
|
+
def on_message_produced_async(event)
|
17
|
+
message = event[:message]
|
18
|
+
|
19
|
+
info(event, "Async producing of a message to '#{message[:topic]}' topic")
|
20
|
+
debug(event, message)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
24
|
+
def on_message_produced_sync(event)
|
25
|
+
message = event[:message]
|
26
|
+
|
27
|
+
info(event, "Sync producing of a message to '#{message[:topic]}' topic")
|
28
|
+
debug(event, message)
|
29
|
+
end
|
30
|
+
|
31
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
32
|
+
def on_messages_produced_async(event)
|
33
|
+
messages = event[:messages]
|
34
|
+
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
35
|
+
|
36
|
+
info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
|
37
|
+
debug(event, messages)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
41
|
+
def on_messages_produced_sync(event)
|
42
|
+
messages = event[:messages]
|
43
|
+
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
44
|
+
|
45
|
+
info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
|
46
|
+
debug(event, messages)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
50
|
+
def on_message_buffered(event)
|
51
|
+
message = event[:message]
|
52
|
+
|
53
|
+
info(event, "Buffering of a message to '#{message[:topic]}' topic")
|
54
|
+
debug(event, [message, event[:producer].messages.size])
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
58
|
+
def on_messages_buffered(event)
|
59
|
+
messages = event[:messages]
|
60
|
+
|
61
|
+
info(event, "Buffering of #{messages.size} messages")
|
62
|
+
debug(event, [messages, event[:producer].messages.size])
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
66
|
+
def on_buffer_flushed_async(event)
|
67
|
+
messages = event[:messages]
|
68
|
+
|
69
|
+
info(event, "Async flushing of #{messages.size} messages from the buffer")
|
70
|
+
debug(event, messages)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
74
|
+
def on_buffer_flushed_async_error(event)
|
75
|
+
messages = event[:messages]
|
76
|
+
error = event[:error]
|
77
|
+
|
78
|
+
error(event, "Async flushing of #{messages.size} failed due to: #{error}")
|
79
|
+
debug(event, messages)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
83
|
+
def on_buffer_flushed_sync(event)
|
84
|
+
messages = event[:messages]
|
85
|
+
|
86
|
+
info(event, "Sync flushing of #{messages.size} messages from the buffer")
|
87
|
+
debug(event, messages)
|
88
|
+
end
|
89
|
+
|
90
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
91
|
+
def on_buffer_flushed_sync_error(event)
|
92
|
+
messages = event[:dispatched]
|
93
|
+
error = event[:error]
|
94
|
+
|
95
|
+
error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
|
96
|
+
debug(event, messages)
|
97
|
+
end
|
98
|
+
|
99
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
100
|
+
def on_producer_closed(event)
|
101
|
+
info event, 'Closing producer'
|
102
|
+
debug event, event[:producer].messages.size
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
108
|
+
# @param log_message [String] message we want to publish
|
109
|
+
def debug(event, log_message)
|
110
|
+
@logger.debug("[#{event[:producer].id}] #{log_message}")
|
111
|
+
end
|
112
|
+
|
113
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
114
|
+
# @param log_message [String] message we want to publish
|
115
|
+
def info(event, log_message)
|
116
|
+
@logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
120
|
+
# @param log_message [String] message we want to publish
|
121
|
+
def error(event, log_message)
|
122
|
+
@logger.error("[#{event[:producer].id}] #{log_message}")
|
42
123
|
end
|
43
124
|
end
|
44
125
|
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
# Main WaterDrop messages producer
|
5
|
+
class Producer
|
6
|
+
include Sync
|
7
|
+
include Async
|
8
|
+
include Buffer
|
9
|
+
|
10
|
+
# @return [String] uuid of the current producer
|
11
|
+
attr_reader :id
|
12
|
+
# @return [Status] producer status object
|
13
|
+
attr_reader :status
|
14
|
+
# @return [Concurrent::Array] internal messages buffer
|
15
|
+
attr_reader :messages
|
16
|
+
# @return [Object] monitor we want to use
|
17
|
+
attr_reader :monitor
|
18
|
+
# @return [Object] dry-configurable config object
|
19
|
+
attr_reader :config
|
20
|
+
|
21
|
+
# Creates a not-yet-configured instance of the producer
|
22
|
+
# @param block [Proc] configuration block
|
23
|
+
# @return [Producer] producer instance
|
24
|
+
def initialize(&block)
|
25
|
+
@buffer_mutex = Mutex.new
|
26
|
+
@connecting_mutex = Mutex.new
|
27
|
+
@closing_mutex = Mutex.new
|
28
|
+
|
29
|
+
@status = Status.new
|
30
|
+
@messages = Concurrent::Array.new
|
31
|
+
|
32
|
+
return unless block
|
33
|
+
|
34
|
+
setup(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Sets up the whole configuration and initializes all that is needed
|
38
|
+
# @param block [Block] configuration block
|
39
|
+
def setup(&block)
|
40
|
+
raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
|
41
|
+
|
42
|
+
@config = Config
|
43
|
+
.new
|
44
|
+
.setup(&block)
|
45
|
+
.config
|
46
|
+
|
47
|
+
@id = @config.id
|
48
|
+
@monitor = @config.monitor
|
49
|
+
@contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
|
50
|
+
@status.configured!
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Rdkafka::Producer] raw rdkafka producer
|
54
|
+
# @note Client is lazy initialized, keeping in mind also the fact of a potential fork that
|
55
|
+
# can happen any time.
|
56
|
+
# @note It is not recommended to fork a producer that is already in use so in case of
|
57
|
+
# bootstrapping a cluster, it's much better to fork configured but not used producers
|
58
|
+
def client
|
59
|
+
return @client if @client && @pid == Process.pid
|
60
|
+
|
61
|
+
# Don't allow to obtain a client reference for a producer that was not configured
|
62
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
63
|
+
|
64
|
+
@connecting_mutex.synchronize do
|
65
|
+
return @client if @client && @pid == Process.pid
|
66
|
+
|
67
|
+
# We should raise an error when trying to use a producer from a fork, that is already
|
68
|
+
# connected to Kafka. We allow forking producers only before they are used
|
69
|
+
raise Errors::ProducerUsedInParentProcess, Process.pid if @status.connected?
|
70
|
+
|
71
|
+
# We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
|
72
|
+
# process don't leak
|
73
|
+
ObjectSpace.undefine_finalizer(self)
|
74
|
+
# Finalizer tracking is needed for handling shutdowns gracefully.
|
75
|
+
# I don't expect everyone to remember about closing all the producers all the time, thus
|
76
|
+
# this approach is better. Although it is still worth keeping in mind, that this will
|
77
|
+
# block GC from removing a no longer used producer unless closed properly
|
78
|
+
ObjectSpace.define_finalizer(self, proc { close })
|
79
|
+
|
80
|
+
@pid = Process.pid
|
81
|
+
@client = Builder.new.call(self, @config)
|
82
|
+
@status.connected!
|
83
|
+
end
|
84
|
+
|
85
|
+
@client
|
86
|
+
end
|
87
|
+
|
88
|
+
# Flushes the buffers in a sync way and closes the producer
|
89
|
+
def close
|
90
|
+
@closing_mutex.synchronize do
|
91
|
+
return unless @status.active?
|
92
|
+
|
93
|
+
@monitor.instrument(
|
94
|
+
'producer.closed',
|
95
|
+
producer: self
|
96
|
+
) do
|
97
|
+
@status.closing!
|
98
|
+
|
99
|
+
# No need for auto-gc if everything got closed by us
|
100
|
+
# This should be used only in case a producer was not closed properly and forgotten
|
101
|
+
ObjectSpace.undefine_finalizer(self)
|
102
|
+
|
103
|
+
# Flush has it's own buffer mutex but even if it is blocked, flushing can still happen
|
104
|
+
# as we close the client after the flushing (even if blocked by the mutex)
|
105
|
+
flush(false)
|
106
|
+
|
107
|
+
# We should not close the client in several threads the same time
|
108
|
+
# It is safe to run it several times but not exactly the same moment
|
109
|
+
client.close
|
110
|
+
|
111
|
+
@status.closed!
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Ensures that we don't run any operations when the producer is not configured or when it
|
117
|
+
# was already closed
|
118
|
+
def ensure_active!
|
119
|
+
return if @status.active?
|
120
|
+
|
121
|
+
raise Errors::ProducerNotConfiguredError, id if @status.initial?
|
122
|
+
raise Errors::ProducerClosedError, id if @status.closing? || @status.closed?
|
123
|
+
|
124
|
+
# This should never happen
|
125
|
+
raise Errors::StatusInvalidError, [id, @status.to_s]
|
126
|
+
end
|
127
|
+
|
128
|
+
# Ensures that the message we want to send out to Kafka is actually valid and that it can be
|
129
|
+
# sent there
|
130
|
+
# @param message [Hash] message we want to send
|
131
|
+
# @raise [Karafka::Errors::MessageInvalidError]
|
132
|
+
def validate_message!(message)
|
133
|
+
result = @contract.call(message)
|
134
|
+
return if result.success?
|
135
|
+
|
136
|
+
raise Errors::MessageInvalidError, [
|
137
|
+
result.errors.to_h,
|
138
|
+
message
|
139
|
+
]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
class Producer
|
5
|
+
# Component for asynchronous producer operations
|
6
|
+
module Async
|
7
|
+
# Produces a message to Kafka and does not wait for results
|
8
|
+
#
|
9
|
+
# @param message [Hash] hash that complies with the {Contracts::Message} contract
|
10
|
+
#
|
11
|
+
# @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
|
12
|
+
#
|
13
|
+
# @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
|
14
|
+
# @raise [Errors::MessageInvalidError] When provided message details are invalid and the
|
15
|
+
# message could not be sent to Kafka
|
16
|
+
def produce_async(message)
|
17
|
+
ensure_active!
|
18
|
+
validate_message!(message)
|
19
|
+
|
20
|
+
@monitor.instrument(
|
21
|
+
'message.produced_async',
|
22
|
+
producer: self,
|
23
|
+
message: message
|
24
|
+
) { client.produce(**message) }
|
25
|
+
end
|
26
|
+
|
27
|
+
# Produces many messages to Kafka and does not wait for them to be delivered
|
28
|
+
#
|
29
|
+
# @param messages [Array<Hash>] array with messages that comply with the
|
30
|
+
# {Contracts::Message} contract
|
31
|
+
#
|
32
|
+
# @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
|
33
|
+
#
|
34
|
+
# @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
|
35
|
+
# @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
|
36
|
+
# and the message could not be sent to Kafka
|
37
|
+
def produce_many_async(messages)
|
38
|
+
ensure_active!
|
39
|
+
messages.each { |message| validate_message!(message) }
|
40
|
+
|
41
|
+
@monitor.instrument(
|
42
|
+
'messages.produced_async',
|
43
|
+
producer: self,
|
44
|
+
messages: messages
|
45
|
+
) do
|
46
|
+
messages.map { |message| client.produce(**message) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|