waterdrop 1.4.0 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +53 -0
  6. data/.gitignore +2 -0
  7. data/CHANGELOG.md +9 -0
  8. data/Gemfile +9 -0
  9. data/Gemfile.lock +30 -14
  10. data/LICENSE +165 -0
  11. data/README.md +192 -53
  12. data/config/errors.yml +3 -16
  13. data/docker-compose.yml +17 -0
  14. data/lib/water_drop.rb +4 -24
  15. data/lib/water_drop/config.rb +41 -142
  16. data/lib/water_drop/contracts.rb +0 -2
  17. data/lib/water_drop/contracts/config.rb +8 -121
  18. data/lib/water_drop/contracts/message.rb +41 -0
  19. data/lib/water_drop/errors.rb +30 -5
  20. data/lib/water_drop/instrumentation.rb +7 -0
  21. data/lib/water_drop/instrumentation/monitor.rb +16 -23
  22. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  23. data/lib/water_drop/producer.rb +142 -0
  24. data/lib/water_drop/producer/async.rb +51 -0
  25. data/lib/water_drop/producer/buffer.rb +113 -0
  26. data/lib/water_drop/producer/builder.rb +63 -0
  27. data/lib/water_drop/producer/dummy_client.rb +32 -0
  28. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  29. data/lib/water_drop/producer/status.rb +52 -0
  30. data/lib/water_drop/producer/sync.rb +65 -0
  31. data/lib/water_drop/version.rb +1 -1
  32. data/waterdrop.gemspec +4 -4
  33. metadata +27 -26
  34. metadata.gz.sig +0 -0
  35. data/.travis.yml +0 -35
  36. data/MIT-LICENCE +0 -18
  37. data/lib/water_drop/async_producer.rb +0 -26
  38. data/lib/water_drop/base_producer.rb +0 -57
  39. data/lib/water_drop/config_applier.rb +0 -52
  40. data/lib/water_drop/contracts/message_options.rb +0 -19
  41. data/lib/water_drop/sync_producer.rb +0 -24
@@ -7,12 +7,37 @@ module WaterDrop
7
7
  BaseError = Class.new(StandardError)
8
8
 
9
9
  # Raised when configuration doesn't match with validation contract
10
- InvalidConfiguration = Class.new(BaseError)
10
+ ConfigurationInvalidError = Class.new(BaseError)
11
11
 
12
- # Raised when we try to send message with invalid options
13
- InvalidMessageOptions = Class.new(BaseError)
12
+ # Raised when we want to use a producer that was not configured
13
+ ProducerNotConfiguredError = Class.new(BaseError)
14
14
 
15
- # Raised when want to hook up to an event that is not registered and supported
16
- UnregisteredMonitorEvent = Class.new(BaseError)
15
+ # Raised when we want to reconfigure a producer that was already configured
16
+ ProducerAlreadyConfiguredError = Class.new(BaseError)
17
+
18
+ # Raised when trying to use connected producer from a forked child process
19
+ # Producers cannot be used in forks if they were already used in the child processes
20
+ ProducerUsedInParentProcess = Class.new(BaseError)
21
+
22
+ # Raised when there was an attempt to use a closed producer
23
+ ProducerClosedError = Class.new(BaseError)
24
+
25
+ # Raised when we want to send a message that is invalid (impossible topic, etc)
26
+ MessageInvalidError = Class.new(BaseError)
27
+
28
+ # Raised when we've got an unexpected status. This should never happen. If it does, please
29
+ # contact us as it is an error.
30
+ StatusInvalidError = Class.new(BaseError)
31
+
32
+ # Raised when during messages flushing something bad happened
33
+ class FlushFailureError < BaseError
34
+ attr_reader :dispatched_messages
35
+
36
+ # @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
37
+ # messages that we've dispatched
38
+ def initialize(dispatched_messages)
39
+ @dispatched_messages = dispatched_messages
40
+ end
41
+ end
17
42
  end
18
43
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Namespace for all the things related with WaterDrop instrumentation process
5
+ module Instrumentation
6
+ end
7
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WaterDrop
4
- # Namespace for all the things related with WaterDrop instrumentation process
5
4
  module Instrumentation
6
5
  # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
7
6
  # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
@@ -12,34 +11,28 @@ module WaterDrop
12
11
  class Monitor < Dry::Monitor::Notifications
13
12
  # List of events that we support in the system and to which a monitor client can hook up
14
13
  # @note The non-error once support timestamp benchmarking
15
- BASE_EVENTS = %w[
16
- async_producer.call.error
17
- async_producer.call.retry
18
- sync_producer.call.error
19
- sync_producer.call.retry
14
+ EVENTS = %w[
15
+ producer.closed
16
+ message.produced_async
17
+ message.produced_sync
18
+ messages.produced_async
19
+ messages.produced_sync
20
+ message.buffered
21
+ messages.buffered
22
+ message.acknowledged
23
+ buffer.flushed_async
24
+ buffer.flushed_async.error
25
+ buffer.flushed_sync
26
+ buffer.flushed_sync.error
27
+ statistics.emitted
20
28
  ].freeze
21
29
 
22
- private_constant :BASE_EVENTS
30
+ private_constant :EVENTS
23
31
 
24
32
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
25
33
  def initialize
26
34
  super(:waterdrop)
27
- BASE_EVENTS.each(&method(:register_event))
28
- end
29
-
30
- # Allows us to subscribe to events with a code that will be yielded upon events
31
- # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
32
- # or a listener if we decide to go with object listener
33
- def subscribe(event_name_or_listener)
34
- return super unless event_name_or_listener.is_a?(String)
35
- return super if available_events.include?(event_name_or_listener)
36
-
37
- raise Errors::UnregisteredMonitorEvent, event_name_or_listener
38
- end
39
-
40
- # @return [Array<String>] names of available events to which we can subscribe
41
- def available_events
42
- __bus__.events.keys
35
+ EVENTS.each(&method(:register_event))
43
36
  end
44
37
  end
45
38
  end
@@ -7,38 +7,119 @@ module WaterDrop
7
7
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
8
  # as well as we can use it standalone
9
9
  class StdoutListener
10
- # Log levels that we use in this particular listener
11
- USED_LOG_LEVELS = %i[
12
- info
13
- error
14
- ].freeze
15
-
16
- %i[
17
- sync_producer
18
- async_producer
19
- ].each do |producer_type|
20
- error_name = :"on_#{producer_type}_call_error"
21
- retry_name = :"on_#{producer_type}_call_retry"
22
-
23
- define_method error_name do |event|
24
- options = event[:options]
25
- error = event[:error]
26
- error "Delivery failure to: #{options} because of #{error}"
27
- end
28
-
29
- define_method retry_name do |event|
30
- attempts_count = event[:attempts_count]
31
- options = event[:options]
32
- error = event[:error]
33
-
34
- info "Attempt #{attempts_count} of delivery to: #{options} because of #{error}"
35
- end
36
- end
37
-
38
- USED_LOG_LEVELS.each do |log_level|
39
- define_method log_level do |*args|
40
- WaterDrop.logger.send(log_level, *args)
41
- end
10
+ # @param logger [Object] stdout logger we want to use
11
+ def initialize(logger)
12
+ @logger = logger
13
+ end
14
+
15
+ # @param event [Dry::Events::Event] event that happened with the details
16
+ def on_message_produced_async(event)
17
+ message = event[:message]
18
+
19
+ info(event, "Async producing of a message to '#{message[:topic]}' topic")
20
+ debug(event, message)
21
+ end
22
+
23
+ # @param event [Dry::Events::Event] event that happened with the details
24
+ def on_message_produced_sync(event)
25
+ message = event[:message]
26
+
27
+ info(event, "Sync producing of a message to '#{message[:topic]}' topic")
28
+ debug(event, message)
29
+ end
30
+
31
+ # @param event [Dry::Events::Event] event that happened with the details
32
+ def on_messages_produced_async(event)
33
+ messages = event[:messages]
34
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
35
+
36
+ info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
37
+ debug(event, messages)
38
+ end
39
+
40
+ # @param event [Dry::Events::Event] event that happened with the details
41
+ def on_messages_produced_sync(event)
42
+ messages = event[:messages]
43
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
44
+
45
+ info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
46
+ debug(event, messages)
47
+ end
48
+
49
+ # @param event [Dry::Events::Event] event that happened with the details
50
+ def on_message_buffered(event)
51
+ message = event[:message]
52
+
53
+ info(event, "Buffering of a message to '#{message[:topic]}' topic")
54
+ debug(event, [message, event[:producer].messages.size])
55
+ end
56
+
57
+ # @param event [Dry::Events::Event] event that happened with the details
58
+ def on_messages_buffered(event)
59
+ messages = event[:messages]
60
+
61
+ info(event, "Buffering of #{messages.size} messages")
62
+ debug(event, [messages, event[:producer].messages.size])
63
+ end
64
+
65
+ # @param event [Dry::Events::Event] event that happened with the details
66
+ def on_buffer_flushed_async(event)
67
+ messages = event[:messages]
68
+
69
+ info(event, "Async flushing of #{messages.size} messages from the buffer")
70
+ debug(event, messages)
71
+ end
72
+
73
+ # @param event [Dry::Events::Event] event that happened with the details
74
+ def on_buffer_flushed_async_error(event)
75
+ messages = event[:messages]
76
+ error = event[:error]
77
+
78
+ error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
+ debug(event, messages)
80
+ end
81
+
82
+ # @param event [Dry::Events::Event] event that happened with the details
83
+ def on_buffer_flushed_sync(event)
84
+ messages = event[:messages]
85
+
86
+ info(event, "Sync flushing of #{messages.size} messages from the buffer")
87
+ debug(event, messages)
88
+ end
89
+
90
+ # @param event [Dry::Events::Event] event that happened with the details
91
+ def on_buffer_flushed_sync_error(event)
92
+ messages = event[:dispatched]
93
+ error = event[:error]
94
+
95
+ error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
+ debug(event, messages)
97
+ end
98
+
99
+ # @param event [Dry::Events::Event] event that happened with the details
100
+ def on_producer_closed(event)
101
+ info event, 'Closing producer'
102
+ debug event, event[:producer].messages.size
103
+ end
104
+
105
+ private
106
+
107
+ # @param event [Dry::Events::Event] event that happened with the details
108
+ # @param log_message [String] message we want to publish
109
+ def debug(event, log_message)
110
+ @logger.debug("[#{event[:producer].id}] #{log_message}")
111
+ end
112
+
113
+ # @param event [Dry::Events::Event] event that happened with the details
114
+ # @param log_message [String] message we want to publish
115
+ def info(event, log_message)
116
+ @logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
117
+ end
118
+
119
+ # @param event [Dry::Events::Event] event that happened with the details
120
+ # @param log_message [String] message we want to publish
121
+ def error(event, log_message)
122
+ @logger.error("[#{event[:producer].id}] #{log_message}")
42
123
  end
43
124
  end
44
125
  end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Main WaterDrop messages producer
5
+ class Producer
6
+ include Sync
7
+ include Async
8
+ include Buffer
9
+
10
+ # @return [String] uuid of the current producer
11
+ attr_reader :id
12
+ # @return [Status] producer status object
13
+ attr_reader :status
14
+ # @return [Concurrent::Array] internal messages buffer
15
+ attr_reader :messages
16
+ # @return [Object] monitor we want to use
17
+ attr_reader :monitor
18
+ # @return [Object] dry-configurable config object
19
+ attr_reader :config
20
+
21
+ # Creates a not-yet-configured instance of the producer
22
+ # @param block [Proc] configuration block
23
+ # @return [Producer] producer instance
24
+ def initialize(&block)
25
+ @buffer_mutex = Mutex.new
26
+ @connecting_mutex = Mutex.new
27
+ @closing_mutex = Mutex.new
28
+
29
+ @status = Status.new
30
+ @messages = Concurrent::Array.new
31
+
32
+ return unless block
33
+
34
+ setup(&block)
35
+ end
36
+
37
+ # Sets up the whole configuration and initializes all that is needed
38
+ # @param block [Block] configuration block
39
+ def setup(&block)
40
+ raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?
41
+
42
+ @config = Config
43
+ .new
44
+ .setup(&block)
45
+ .config
46
+
47
+ @id = @config.id
48
+ @monitor = @config.monitor
49
+ @contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
50
+ @status.configured!
51
+ end
52
+
53
+ # @return [Rdkafka::Producer] raw rdkafka producer
54
+ # @note Client is lazy initialized, keeping in mind also the fact of a potential fork that
55
+ # can happen any time.
56
+ # @note It is not recommended to fork a producer that is already in use so in case of
57
+ # bootstrapping a cluster, it's much better to fork configured but not used producers
58
+ def client
59
+ return @client if @client && @pid == Process.pid
60
+
61
+ # Don't allow to obtain a client reference for a producer that was not configured
62
+ raise Errors::ProducerNotConfiguredError, id if @status.initial?
63
+
64
+ @connecting_mutex.synchronize do
65
+ return @client if @client && @pid == Process.pid
66
+
67
+ # We should raise an error when trying to use a producer from a fork, that is already
68
+ # connected to Kafka. We allow forking producers only before they are used
69
+ raise Errors::ProducerUsedInParentProcess, Process.pid if @status.connected?
70
+
71
+ # We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
72
+ # process don't leak
73
+ ObjectSpace.undefine_finalizer(self)
74
+ # Finalizer tracking is needed for handling shutdowns gracefully.
75
+ # I don't expect everyone to remember about closing all the producers all the time, thus
76
+ # this approach is better. Although it is still worth keeping in mind, that this will
77
+ # block GC from removing a no longer used producer unless closed properly
78
+ ObjectSpace.define_finalizer(self, proc { close })
79
+
80
+ @pid = Process.pid
81
+ @client = Builder.new.call(self, @config)
82
+ @status.connected!
83
+ end
84
+
85
+ @client
86
+ end
87
+
88
+ # Flushes the buffers in a sync way and closes the producer
89
+ def close
90
+ @closing_mutex.synchronize do
91
+ return unless @status.active?
92
+
93
+ @monitor.instrument(
94
+ 'producer.closed',
95
+ producer: self
96
+ ) do
97
+ @status.closing!
98
+
99
+ # No need for auto-gc if everything got closed by us
100
+ # This should be used only in case a producer was not closed properly and forgotten
101
+ ObjectSpace.undefine_finalizer(self)
102
+
103
+ # Flush has it's own buffer mutex but even if it is blocked, flushing can still happen
104
+ # as we close the client after the flushing (even if blocked by the mutex)
105
+ flush(false)
106
+
107
+ # We should not close the client in several threads the same time
108
+ # It is safe to run it several times but not exactly the same moment
109
+ client.close
110
+
111
+ @status.closed!
112
+ end
113
+ end
114
+ end
115
+
116
+ # Ensures that we don't run any operations when the producer is not configured or when it
117
+ # was already closed
118
+ def ensure_active!
119
+ return if @status.active?
120
+
121
+ raise Errors::ProducerNotConfiguredError, id if @status.initial?
122
+ raise Errors::ProducerClosedError, id if @status.closing? || @status.closed?
123
+
124
+ # This should never happen
125
+ raise Errors::StatusInvalidError, [id, @status.to_s]
126
+ end
127
+
128
+ # Ensures that the message we want to send out to Kafka is actually valid and that it can be
129
+ # sent there
130
+ # @param message [Hash] message we want to send
131
+ # @raise [Karafka::Errors::MessageInvalidError]
132
+ def validate_message!(message)
133
+ result = @contract.call(message)
134
+ return if result.success?
135
+
136
+ raise Errors::MessageInvalidError, [
137
+ result.errors.to_h,
138
+ message
139
+ ]
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ class Producer
5
+ # Component for asynchronous producer operations
6
+ module Async
7
+ # Produces a message to Kafka and does not wait for results
8
+ #
9
+ # @param message [Hash] hash that complies with the {Contracts::Message} contract
10
+ #
11
+ # @return [Rdkafka::Producer::DeliveryHandle] delivery handle that might return the report
12
+ #
13
+ # @raise [Rdkafka::RdkafkaError] When adding the message to rdkafka's queue failed
14
+ # @raise [Errors::MessageInvalidError] When provided message details are invalid and the
15
+ # message could not be sent to Kafka
16
+ def produce_async(message)
17
+ ensure_active!
18
+ validate_message!(message)
19
+
20
+ @monitor.instrument(
21
+ 'message.produced_async',
22
+ producer: self,
23
+ message: message
24
+ ) { client.produce(**message) }
25
+ end
26
+
27
+ # Produces many messages to Kafka and does not wait for them to be delivered
28
+ #
29
+ # @param messages [Array<Hash>] array with messages that comply with the
30
+ # {Contracts::Message} contract
31
+ #
32
+ # @return [Array<Rdkafka::Producer::DeliveryHandle>] deliveries handles
33
+ #
34
+ # @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
35
+ # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
36
+ # and the message could not be sent to Kafka
37
+ def produce_many_async(messages)
38
+ ensure_active!
39
+ messages.each { |message| validate_message!(message) }
40
+
41
+ @monitor.instrument(
42
+ 'messages.produced_async',
43
+ producer: self,
44
+ messages: messages
45
+ ) do
46
+ messages.map { |message| client.produce(**message) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end