waterdrop 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +53 -0
  6. data/.gitignore +2 -0
  7. data/.ruby-version +1 -1
  8. data/CHANGELOG.md +9 -0
  9. data/Gemfile +9 -0
  10. data/Gemfile.lock +51 -33
  11. data/LICENSE +165 -0
  12. data/README.md +192 -53
  13. data/config/errors.yml +3 -16
  14. data/docker-compose.yml +17 -0
  15. data/lib/water_drop.rb +4 -24
  16. data/lib/water_drop/config.rb +41 -142
  17. data/lib/water_drop/contracts.rb +0 -2
  18. data/lib/water_drop/contracts/config.rb +8 -121
  19. data/lib/water_drop/contracts/message.rb +41 -0
  20. data/lib/water_drop/errors.rb +31 -5
  21. data/lib/water_drop/instrumentation.rb +7 -0
  22. data/lib/water_drop/instrumentation/monitor.rb +16 -23
  23. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  24. data/lib/water_drop/producer.rb +142 -0
  25. data/lib/water_drop/producer/async.rb +51 -0
  26. data/lib/water_drop/producer/buffer.rb +113 -0
  27. data/lib/water_drop/producer/builder.rb +63 -0
  28. data/lib/water_drop/producer/dummy_client.rb +32 -0
  29. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  30. data/lib/water_drop/producer/status.rb +52 -0
  31. data/lib/water_drop/producer/sync.rb +65 -0
  32. data/lib/water_drop/version.rb +1 -1
  33. data/waterdrop.gemspec +4 -4
  34. metadata +25 -24
  35. metadata.gz.sig +0 -0
  36. data/.travis.yml +0 -35
  37. data/MIT-LICENCE +0 -18
  38. data/lib/water_drop/async_producer.rb +0 -26
  39. data/lib/water_drop/base_producer.rb +0 -57
  40. data/lib/water_drop/config_applier.rb +0 -52
  41. data/lib/water_drop/contracts/message_options.rb +0 -19
  42. data/lib/water_drop/sync_producer.rb +0 -24
@@ -7,12 +7,38 @@ module WaterDrop
7
7
  BaseError = Class.new(StandardError)
8
8
 
9
9
  # Raised when configuration doesn't match with validation contract
10
- InvalidConfiguration = Class.new(BaseError)
10
+ ConfigurationInvalidError = Class.new(BaseError)
11
11
 
12
- # Raised when we try to send message with invalid options
13
- InvalidMessageOptions = Class.new(BaseError)
12
+ # Raised when we want to use a producer that was not configured
13
+ ProducerNotConfiguredError = Class.new(BaseError)
14
14
 
15
- # Raised when want to hook up to an event that is not registered and supported
16
- UnregisteredMonitorEvent = Class.new(BaseError)
15
+ # Raised when we want to reconfigure a producer that was already configured
16
+ ProducerAlreadyConfiguredError = Class.new(BaseError)
17
+
18
+ # Raised when trying to use connected producer from a forked child process
19
+ # Producers cannot be used in forks if they were already used in the child processes
20
+ ProducerUsedInParentProcess = Class.new(BaseError)
21
+
22
+ # Raised when there was an attempt to use a closed producer
23
+ ProducerClosedError = Class.new(BaseError)
24
+
25
+ # Raised when we want to send a message that is invalid (impossible topic, etc)
26
+ MessageInvalidError = Class.new(BaseError)
27
+
28
+ # Raised when we've got an unexpected status. This should never happen. If it does, please
29
+ # contact us as it is an error.
30
+ StatusInvalidError = Class.new(BaseError)
31
+
32
+ # Raised when during messages flushing something bad happened
33
+ class FlushFailureError < BaseError
34
+ attr_reader :dispatched_messages
35
+
36
+ # @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
37
+ # messages that we've dispatched
38
+ def initialize(dispatched_messages)
39
+ super()
40
+ @dispatched_messages = dispatched_messages
41
+ end
42
+ end
17
43
  end
18
44
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Namespace for all the things related with WaterDrop instrumentation process
5
+ module Instrumentation
6
+ end
7
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WaterDrop
4
- # Namespace for all the things related with WaterDrop instrumentation process
5
4
  module Instrumentation
6
5
  # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
7
6
  # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
@@ -12,34 +11,28 @@ module WaterDrop
12
11
  class Monitor < Dry::Monitor::Notifications
13
12
  # List of events that we support in the system and to which a monitor client can hook up
14
13
  # @note The non-error once support timestamp benchmarking
15
- BASE_EVENTS = %w[
16
- async_producer.call.error
17
- async_producer.call.retry
18
- sync_producer.call.error
19
- sync_producer.call.retry
14
+ EVENTS = %w[
15
+ producer.closed
16
+ message.produced_async
17
+ message.produced_sync
18
+ messages.produced_async
19
+ messages.produced_sync
20
+ message.buffered
21
+ messages.buffered
22
+ message.acknowledged
23
+ buffer.flushed_async
24
+ buffer.flushed_async.error
25
+ buffer.flushed_sync
26
+ buffer.flushed_sync.error
27
+ statistics.emitted
20
28
  ].freeze
21
29
 
22
- private_constant :BASE_EVENTS
30
+ private_constant :EVENTS
23
31
 
24
32
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
25
33
  def initialize
26
34
  super(:waterdrop)
27
- BASE_EVENTS.each(&method(:register_event))
28
- end
29
-
30
- # Allows us to subscribe to events with a code that will be yielded upon events
31
- # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
32
- # or a listener if we decide to go with object listener
33
- def subscribe(event_name_or_listener)
34
- return super unless event_name_or_listener.is_a?(String)
35
- return super if available_events.include?(event_name_or_listener)
36
-
37
- raise Errors::UnregisteredMonitorEvent, event_name_or_listener
38
- end
39
-
40
- # @return [Array<String>] names of available events to which we can subscribe
41
- def available_events
42
- __bus__.events.keys
35
+ EVENTS.each(&method(:register_event))
43
36
  end
44
37
  end
45
38
  end
@@ -7,38 +7,119 @@ module WaterDrop
7
7
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
8
  # as well as we can use it standalone
9
9
  class StdoutListener
10
- # Log levels that we use in this particular listener
11
- USED_LOG_LEVELS = %i[
12
- info
13
- error
14
- ].freeze
15
-
16
- %i[
17
- sync_producer
18
- async_producer
19
- ].each do |producer_type|
20
- error_name = :"on_#{producer_type}_call_error"
21
- retry_name = :"on_#{producer_type}_call_retry"
22
-
23
- define_method error_name do |event|
24
- options = event[:options]
25
- error = event[:error]
26
- error "Delivery failure to: #{options} because of #{error}"
27
- end
28
-
29
- define_method retry_name do |event|
30
- attempts_count = event[:attempts_count]
31
- options = event[:options]
32
- error = event[:error]
33
-
34
- info "Attempt #{attempts_count} of delivery to: #{options} because of #{error}"
35
- end
36
- end
37
-
38
- USED_LOG_LEVELS.each do |log_level|
39
- define_method log_level do |*args|
40
- WaterDrop.logger.send(log_level, *args)
41
- end
10
+ # @param logger [Object] stdout logger we want to use
11
+ def initialize(logger)
12
+ @logger = logger
13
+ end
14
+
15
+ # @param event [Dry::Events::Event] event that happened with the details
16
+ def on_message_produced_async(event)
17
+ message = event[:message]
18
+
19
+ info(event, "Async producing of a message to '#{message[:topic]}' topic")
20
+ debug(event, message)
21
+ end
22
+
23
+ # @param event [Dry::Events::Event] event that happened with the details
24
+ def on_message_produced_sync(event)
25
+ message = event[:message]
26
+
27
+ info(event, "Sync producing of a message to '#{message[:topic]}' topic")
28
+ debug(event, message)
29
+ end
30
+
31
+ # @param event [Dry::Events::Event] event that happened with the details
32
+ def on_messages_produced_async(event)
33
+ messages = event[:messages]
34
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
35
+
36
+ info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
37
+ debug(event, messages)
38
+ end
39
+
40
+ # @param event [Dry::Events::Event] event that happened with the details
41
+ def on_messages_produced_sync(event)
42
+ messages = event[:messages]
43
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
44
+
45
+ info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
46
+ debug(event, messages)
47
+ end
48
+
49
+ # @param event [Dry::Events::Event] event that happened with the details
50
+ def on_message_buffered(event)
51
+ message = event[:message]
52
+
53
+ info(event, "Buffering of a message to '#{message[:topic]}' topic")
54
+ debug(event, [message, event[:producer].messages.size])
55
+ end
56
+
57
+ # @param event [Dry::Events::Event] event that happened with the details
58
+ def on_messages_buffered(event)
59
+ messages = event[:messages]
60
+
61
+ info(event, "Buffering of #{messages.size} messages")
62
+ debug(event, [messages, event[:producer].messages.size])
63
+ end
64
+
65
+ # @param event [Dry::Events::Event] event that happened with the details
66
+ def on_buffer_flushed_async(event)
67
+ messages = event[:messages]
68
+
69
+ info(event, "Async flushing of #{messages.size} messages from the buffer")
70
+ debug(event, messages)
71
+ end
72
+
73
+ # @param event [Dry::Events::Event] event that happened with the details
74
+ def on_buffer_flushed_async_error(event)
75
+ messages = event[:messages]
76
+ error = event[:error]
77
+
78
+ error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
+ debug(event, messages)
80
+ end
81
+
82
+ # @param event [Dry::Events::Event] event that happened with the details
83
+ def on_buffer_flushed_sync(event)
84
+ messages = event[:messages]
85
+
86
+ info(event, "Sync flushing of #{messages.size} messages from the buffer")
87
+ debug(event, messages)
88
+ end
89
+
90
+ # @param event [Dry::Events::Event] event that happened with the details
91
+ def on_buffer_flushed_sync_error(event)
92
+ messages = event[:dispatched]
93
+ error = event[:error]
94
+
95
+ error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
+ debug(event, messages)
97
+ end
98
+
99
+ # @param event [Dry::Events::Event] event that happened with the details
100
+ def on_producer_closed(event)
101
+ info event, 'Closing producer'
102
+ debug event, event[:producer].messages.size
103
+ end
104
+
105
+ private
106
+
107
+ # @param event [Dry::Events::Event] event that happened with the details
108
+ # @param log_message [String] message we want to publish
109
+ def debug(event, log_message)
110
+ @logger.debug("[#{event[:producer].id}] #{log_message}")
111
+ end
112
+
113
+ # @param event [Dry::Events::Event] event that happened with the details
114
+ # @param log_message [String] message we want to publish
115
+ def info(event, log_message)
116
+ @logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
117
+ end
118
+
119
+ # @param event [Dry::Events::Event] event that happened with the details
120
+ # @param log_message [String] message we want to publish
121
+ def error(event, log_message)
122
+ @logger.error("[#{event[:producer].id}] #{log_message}")
42
123
  end
43
124
  end
44
125
  end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Main WaterDrop messages producer
5
+ class Producer
6
+ include Sync
7
+ include Async
8
+ include Buffer
9
+
10
+ # @return [String] uuid of the current producer
11
+ attr_reader :id
12
+ # @return [Status] producer status object
13
+ attr_reader :status
14
+ # @return [Concurrent::Array] internal messages buffer
15
+ attr_reader :messages
16
+ # @return [Object] monitor we want to use
17
+ attr_reader :monitor
18
+ # @return [Object] dry-configurable config object
19
+ attr_reader :config
20
+
21
+ # Creates a not-yet-configured instance of the producer
22
+ # @param block [Proc] configuration block
23
+ # @return [Producer] producer instance
24
+ def initialize(&block)
25
+ @buffer_mutex = Mutex.new
26
+ @connecting_mutex = Mutex.new
27
+ @closing_mutex = Mutex.new
28
+
29
+ @status = Status.new
30
+ @messages = Concurrent::Array.new
31
+
32
+ return unless block
33
+
34
+ setup(&block)
35
+ end
36
+
37
+ # Sets up the whole configuration and initializes all that is needed
38
+ # @param block [Block] configuration block
39
+ def setup(&block)
40
+ raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
41
+
42
+ @config = Config
43
+ .new
44
+ .setup(&block)
45
+ .config
46
+
47
+ @id = @config.id
48
+ @monitor = @config.monitor
49
+ @contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
50
+ @status.configured!
51
+ end
52
+
53
+ # @return [Rdkafka::Producer] raw rdkafka producer
54
+ # @note Client is lazy initialized, keeping in mind also the fact of a potential fork that
55
+ # can happen any time.
56
+ # @note It is not recommended to fork a producer that is already in use so in case of
57
+ # bootstrapping a cluster, it's much better to fork configured but not used producers
58
+ def client
59
+ return @client if @client && @pid == Process.pid
60
+
61
+ # Don't allow to obtain a client reference for a producer that was not configured
62
+ raise Errors::ProducerNotConfiguredError, id if @status.initial?
63
+
64
+ @connecting_mutex.synchronize do
65
+ return @client if @client && @pid == Process.pid
66
+
67
+ # We should raise an error when trying to use a producer from a fork, that is already
68
+ # connected to Kafka. We allow forking producers only before they are used
69
+ raise Errors::ProducerUsedInParentProcess, Process.pid if @status.connected?
70
+
71
+ # We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
72
+ # process don't leak
73
+ ObjectSpace.undefine_finalizer(self)
74
+ # Finalizer tracking is needed for handling shutdowns gracefully.
75
+ # I don't expect everyone to remember about closing all the producers all the time, thus
76
+ # this approach is better. Although it is still worth keeping in mind, that this will
77
+ # block GC from removing a no longer used producer unless closed properly
78
+ ObjectSpace.define_finalizer(self, proc { close })
79
+
80
+ @pid = Process.pid
81
+ @client = Builder.new.call(self, @config)
82
+ @status.connected!
83
+ end
84
+
85
+ @client
86
+ end
87
+
88
+ # Flushes the buffers in a sync way and closes the producer
89
+ def close
90
+ @closing_mutex.synchronize do
91
+ return unless @status.active?
92
+
93
+ @monitor.instrument(
94
+ 'producer.closed',
95
+ producer: self
96
+ ) do
97
+ @status.closing!
98
+
99
+ # No need for auto-gc if everything got closed by us
100
+ # This should be used only in case a producer was not closed properly and forgotten
101
+ ObjectSpace.undefine_finalizer(self)
102
+
103
+ # Flush has it's own buffer mutex but even if it is blocked, flushing can still happen
104
+ # as we close the client after the flushing (even if blocked by the mutex)
105
+ flush(false)
106
+
107
+ # We should not close the client in several threads the same time
108
+ # It is safe to run it several times but not exactly the same moment
109
+ client.close
110
+
111
+ @status.closed!
112
+ end
113
+ end
114
+ end
115
+
116
+ # Ensures that we don't run any operations when the producer is not configured or when it
117
+ # was already closed
118
+ def ensure_active!
119
+ return if @status.active?
120
+
121
+ raise Errors::ProducerNotConfiguredError, id if @status.initial?
122
+ raise Errors::ProducerClosedError, id if @status.closing? || @status.closed?
123
+
124
+ # This should never happen
125
+ raise Errors::StatusInvalidError, [id, @status.to_s]
126
+ end
127
+
128
+ # Ensures that the message we want to send out to Kafka is actually valid and that it can be
129
+ # sent there
130
+ # @param message [Hash] message we want to send
131
+ # @raise [Karafka::Errors::MessageInvalidError]
132
+ def validate_message!(message)
133
+ result = @contract.call(message)
134
+ return if result.success?
135
+
136
+ raise Errors::MessageInvalidError, [
137
+ result.errors.to_h,
138
+ message
139
+ ]
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Component for asynchronous producer operations
6
+ module Async
7
+ # Produces a message to Kafka and does not wait for results
8
+ #
9
+ # @param message [Hash] hash that complies with the {Contracts::Message} contract
10
+ #
11
+ # @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
12
+ #
13
+ # @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
14
+ # @raise [Errors::MessageInvalidError] When provided message details are invalid and the
15
+ # message could not be sent to Kafka
16
+ def produce_async(message)
17
+ ensure_active!
18
+ validate_message!(message)
19
+
20
+ @monitor.instrument(
21
+ 'message.produced_async',
22
+ producer: self,
23
+ message: message
24
+ ) { client.produce(**message) }
25
+ end
26
+
27
+ # Produces many messages to Kafka and does not wait for them to be delivered
28
+ #
29
+ # @param messages [Array<Hash>] array with messages that comply with the
30
+ # {Contracts::Message} contract
31
+ #
32
+ # @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
33
+ #
34
+ # @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
35
+ # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
36
+ # and the message could not be sent to Kafka
37
+ def produce_many_async(messages)
38
+ ensure_active!
39
+ messages.each { |message| validate_message!(message) }
40
+
41
+ @monitor.instrument(
42
+ 'messages.produced_async',
43
+ producer: self,
44
+ messages: messages
45
+ ) do
46
+ messages.map { |message| client.produce(**message) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end