waterdrop 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +75 -0
  6. data/.gitignore +2 -0
  7. data/.ruby-version +1 -1
  8. data/CHANGELOG.md +13 -0
  9. data/Gemfile +9 -0
  10. data/Gemfile.lock +67 -54
  11. data/LICENSE +165 -0
  12. data/README.md +194 -56
  13. data/config/errors.yml +3 -16
  14. data/docker-compose.yml +17 -0
  15. data/lib/water_drop.rb +4 -24
  16. data/lib/water_drop/config.rb +41 -142
  17. data/lib/water_drop/contracts.rb +0 -2
  18. data/lib/water_drop/contracts/config.rb +8 -121
  19. data/lib/water_drop/contracts/message.rb +41 -0
  20. data/lib/water_drop/errors.rb +31 -5
  21. data/lib/water_drop/instrumentation.rb +7 -0
  22. data/lib/water_drop/instrumentation/monitor.rb +16 -23
  23. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  24. data/lib/water_drop/producer.rb +143 -0
  25. data/lib/water_drop/producer/async.rb +51 -0
  26. data/lib/water_drop/producer/buffer.rb +113 -0
  27. data/lib/water_drop/producer/builder.rb +63 -0
  28. data/lib/water_drop/producer/dummy_client.rb +32 -0
  29. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  30. data/lib/water_drop/producer/status.rb +52 -0
  31. data/lib/water_drop/producer/sync.rb +65 -0
  32. data/lib/water_drop/version.rb +1 -1
  33. data/waterdrop.gemspec +5 -5
  34. metadata +27 -26
  35. metadata.gz.sig +0 -0
  36. data/.travis.yml +0 -35
  37. data/MIT-LICENCE +0 -18
  38. data/lib/water_drop/async_producer.rb +0 -26
  39. data/lib/water_drop/base_producer.rb +0 -57
  40. data/lib/water_drop/config_applier.rb +0 -52
  41. data/lib/water_drop/contracts/message_options.rb +0 -19
  42. data/lib/water_drop/sync_producer.rb +0 -24
@@ -3,7 +3,5 @@
3
3
  module WaterDrop
4
4
  # Namespace for all the contracts for config validations
5
5
  module Contracts
6
- # Regex to check that topic has a valid format
7
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
6
  end
9
7
  end
@@ -4,134 +4,21 @@ module WaterDrop
4
4
  module Contracts
5
5
  # Contract with validation rules for WaterDrop configuration details
6
6
  class Config < Dry::Validation::Contract
7
- # Valid uri schemas of Kafka broker url
8
- URI_SCHEMES = %w[kafka kafka+ssl plaintext ssl].freeze
7
+ # Ensure valid format of each seed broker so that rdkafka doesn't fail silently
8
+ SEED_BROKER_FORMAT_REGEXP = %r{\A([^:/,]+:[0-9]+)(,[^:/,]+:[0-9]+)*\z}.freeze
9
9
 
10
- # Available sasl scram mechanism of authentication (plus nil)
11
- SASL_SCRAM_MECHANISMS = %w[sha256 sha512].freeze
12
-
13
- # Supported compression codecs
14
- COMPRESSION_CODECS = %i[snappy gzip lz4 zstd].freeze
15
-
16
- config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
-
18
- class << self
19
- private
20
-
21
- # Builder for kafka scoped data custom rules
22
- # @param keys [Symbol, Hash] the keys names
23
- # @param block [Proc] block we want to run with validations within the kafka scope
24
- def kafka_scope_rule(*keys, &block)
25
- rule(*[:kafka].product(keys)) do
26
- instance_exec(values[:kafka], &block)
27
- end
28
- end
29
- end
30
-
31
- private
32
-
33
- # Uri validator to check if uri is in a Kafka acceptable format
34
- # @param uri [String] uri we want to validate
35
- # @return [Boolean] true if it is a valid uri, otherwise false
36
- def broker_schema?(uri)
37
- uri = URI.parse(uri)
38
- URI_SCHEMES.include?(uri.scheme) && uri.port
39
- rescue URI::InvalidURIError
40
- false
41
- end
10
+ private_constant :SEED_BROKER_FORMAT_REGEXP
42
11
 
43
12
  params do
44
- required(:client_id).filled(:str?, format?: Contracts::TOPIC_REGEXP)
13
+ required(:id).filled(:str?)
45
14
  required(:logger).filled
46
15
  required(:deliver).filled(:bool?)
47
- required(:raise_on_buffer_overflow).filled(:bool?)
16
+ required(:max_payload_size).filled(:int?, gteq?: 1)
17
+ required(:max_wait_timeout).filled(:number?, gteq?: 0)
18
+ required(:wait_timeout).filled(:number?, gt?: 0)
48
19
 
49
20
  required(:kafka).schema do
50
- required(:seed_brokers).value(:array, :filled?).each(:str?)
51
- required(:connect_timeout).filled(:int?, gt?: 0)
52
- required(:socket_timeout).filled(:int?, gt?: 0)
53
- required(:compression_threshold).filled(:int?, gteq?: 1)
54
- optional(:compression_codec).maybe(included_in?: COMPRESSION_CODECS)
55
-
56
- required(:max_buffer_bytesize).filled(:int?, gt?: 0)
57
- required(:max_buffer_size).filled(:int?, gt?: 0)
58
- required(:max_queue_size).filled(:int?, gt?: 0)
59
-
60
- required(:ack_timeout).filled(:int?, gt?: 0)
61
- required(:delivery_interval).filled(:int?, gteq?: 0)
62
- required(:delivery_threshold).filled(:int?, gteq?: 0)
63
-
64
- required(:max_retries).filled(:int?, gteq?: 0)
65
- required(:retry_backoff).filled(:int?, gteq?: 0)
66
- required(:required_acks).filled(included_in?: [1, 0, -1, :all])
67
-
68
- %i[
69
- ssl_ca_cert
70
- ssl_ca_cert_file_path
71
- ssl_client_cert
72
- ssl_client_cert_key
73
- ssl_client_cert_chain
74
- ssl_client_cert_key_password
75
- sasl_gssapi_principal
76
- sasl_gssapi_keytab
77
- sasl_plain_authzid
78
- sasl_plain_username
79
- sasl_plain_password
80
- sasl_scram_username
81
- sasl_scram_password
82
- ].each do |encryption_attribute|
83
- optional(encryption_attribute).maybe(:str?)
84
- end
85
-
86
- optional(:ssl_verify_hostname).maybe(:bool?)
87
- optional(:ssl_ca_certs_from_system).maybe(:bool?)
88
- optional(:sasl_over_ssl).maybe(:bool?)
89
- optional(:sasl_oauth_token_provider).value(:any)
90
-
91
- # It's not with other encryptions as it has some more rules
92
- optional(:sasl_scram_mechanism)
93
- .maybe(:str?, included_in?: SASL_SCRAM_MECHANISMS)
94
- end
95
- end
96
-
97
- kafka_scope_rule(:seed_brokers) do |kafka|
98
- unless kafka[:seed_brokers].all?(&method(:broker_schema?))
99
- key(%i[kafka seed_brokers]).failure(:broker_schema)
100
- end
101
- end
102
-
103
- kafka_scope_rule(:ssl_client_cert, :ssl_client_cert_key) do |kafka|
104
- if kafka[:ssl_client_cert] &&
105
- kafka[:ssl_client_cert_key].nil?
106
- key(%i[kafka ssl_client_cert_key]).failure(:ssl_client_cert_with_ssl_client_cert_key)
107
- end
108
- end
109
-
110
- kafka_scope_rule(:ssl_client_cert_key, :ssl_client_cert) do |kafka|
111
- if kafka[:ssl_client_cert_key] &&
112
- kafka[:ssl_client_cert].nil?
113
- key.failure(:ssl_client_cert_key_with_ssl_client_cert)
114
- end
115
- end
116
-
117
- kafka_scope_rule(:ssl_client_cert_chain, :ssl_client_cert) do |kafka|
118
- if kafka[:ssl_client_cert_chain] &&
119
- kafka[:ssl_client_cert].nil?
120
- key.failure(:ssl_client_cert_chain_with_ssl_client_cert)
121
- end
122
- end
123
-
124
- kafka_scope_rule(:ssl_client_cert_key_password, :ssl_client_cert_key) do |kafka|
125
- if kafka[:ssl_client_cert_key_password] &&
126
- kafka[:ssl_client_cert_key].nil?
127
- key.failure(:ssl_client_cert_key_password_with_ssl_client_cert_key)
128
- end
129
- end
130
-
131
- kafka_scope_rule(:sasl_oauth_token_provider) do |kafka|
132
- if kafka[:sasl_oauth_token_provider] &&
133
- !kafka[:sasl_oauth_token_provider].respond_to?(:token)
134
- key.failure(:sasl_oauth_token_provider_respond_to_token)
21
+ required(:'bootstrap.servers').filled(:str?, format?: SEED_BROKER_FORMAT_REGEXP)
135
22
  end
136
23
  end
137
24
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Contracts
5
+ # Contract with validation rules for validating that all the message options that
6
+ # we provide to producer ale valid and usable
7
+ class Message < Dry::Validation::Contract
8
+ # Regex to check that topic has a valid format
9
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/.freeze
10
+
11
+ # Checks, that the given value is a string
12
+ STRING_ASSERTION = ->(value) { value.is_a?(String) }.to_proc
13
+
14
+ private_constant :TOPIC_REGEXP, :STRING_ASSERTION
15
+
16
+ config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
+
18
+ option :max_payload_size
19
+
20
+ params do
21
+ required(:topic).filled(:str?, format?: TOPIC_REGEXP)
22
+ required(:payload).filled(:str?)
23
+ optional(:key).maybe(:str?, :filled?)
24
+ optional(:partition).filled(:int?, gteq?: -1)
25
+ optional(:timestamp).maybe { time? | int? }
26
+ optional(:headers).maybe(:hash?)
27
+ end
28
+
29
+ rule(:headers) do
30
+ next unless value.is_a?(Hash)
31
+
32
+ key.failure(:invalid_key_type) unless value.keys.all?(&STRING_ASSERTION)
33
+ key.failure(:invalid_value_type) unless value.values.all?(&STRING_ASSERTION)
34
+ end
35
+
36
+ rule(:payload) do
37
+ key.failure(:max_payload_size) if value.bytesize > max_payload_size
38
+ end
39
+ end
40
+ end
41
+ end
@@ -7,12 +7,38 @@ module WaterDrop
7
7
  BaseError = Class.new(StandardError)
8
8
 
9
9
  # Raised when configuration doesn't match with validation contract
10
- InvalidConfiguration = Class.new(BaseError)
10
+ ConfigurationInvalidError = Class.new(BaseError)
11
11
 
12
- # Raised when we try to send message with invalid options
13
- InvalidMessageOptions = Class.new(BaseError)
12
+ # Raised when we want to use a producer that was not configured
13
+ ProducerNotConfiguredError = Class.new(BaseError)
14
14
 
15
- # Raised when want to hook up to an event that is not registered and supported
16
- UnregisteredMonitorEvent = Class.new(BaseError)
15
+ # Raised when we want to reconfigure a producer that was already configured
16
+ ProducerAlreadyConfiguredError = Class.new(BaseError)
17
+
18
+ # Raised when trying to use connected producer from a forked child process
19
+ # Producers cannot be used in forks if they were already used in the child processes
20
+ ProducerUsedInParentProcess = Class.new(BaseError)
21
+
22
+ # Raised when there was an attempt to use a closed producer
23
+ ProducerClosedError = Class.new(BaseError)
24
+
25
+ # Raised when we want to send a message that is invalid (impossible topic, etc)
26
+ MessageInvalidError = Class.new(BaseError)
27
+
28
+ # Raised when we've got an unexpected status. This should never happen. If it does, please
29
+ # contact us as it is an error.
30
+ StatusInvalidError = Class.new(BaseError)
31
+
32
+ # Raised when during messages flushing something bad happened
33
+ class FlushFailureError < BaseError
34
+ attr_reader :dispatched_messages
35
+
36
+ # @param dispatched_messages [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
37
+ # messages that we've dispatched
38
+ def initialize(dispatched_messages)
39
+ super()
40
+ @dispatched_messages = dispatched_messages
41
+ end
42
+ end
17
43
  end
18
44
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ # Namespace for all the things related with WaterDrop instrumentation process
5
+ module Instrumentation
6
+ end
7
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WaterDrop
4
- # Namespace for all the things related with WaterDrop instrumentation process
5
4
  module Instrumentation
6
5
  # Monitor is used to hookup external monitoring services to monitor how WaterDrop works
7
6
  # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
@@ -12,34 +11,28 @@ module WaterDrop
12
11
  class Monitor < Dry::Monitor::Notifications
13
12
  # List of events that we support in the system and to which a monitor client can hook up
14
13
  # @note The non-error once support timestamp benchmarking
15
- BASE_EVENTS = %w[
16
- async_producer.call.error
17
- async_producer.call.retry
18
- sync_producer.call.error
19
- sync_producer.call.retry
14
+ EVENTS = %w[
15
+ producer.closed
16
+ message.produced_async
17
+ message.produced_sync
18
+ messages.produced_async
19
+ messages.produced_sync
20
+ message.buffered
21
+ messages.buffered
22
+ message.acknowledged
23
+ buffer.flushed_async
24
+ buffer.flushed_async.error
25
+ buffer.flushed_sync
26
+ buffer.flushed_sync.error
27
+ statistics.emitted
20
28
  ].freeze
21
29
 
22
- private_constant :BASE_EVENTS
30
+ private_constant :EVENTS
23
31
 
24
32
  # @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
25
33
  def initialize
26
34
  super(:waterdrop)
27
- BASE_EVENTS.each(&method(:register_event))
28
- end
29
-
30
- # Allows us to subscribe to events with a code that will be yielded upon events
31
- # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
32
- # or a listener if we decide to go with object listener
33
- def subscribe(event_name_or_listener)
34
- return super unless event_name_or_listener.is_a?(String)
35
- return super if available_events.include?(event_name_or_listener)
36
-
37
- raise Errors::UnregisteredMonitorEvent, event_name_or_listener
38
- end
39
-
40
- # @return [Array<String>] names of available events to which we can subscribe
41
- def available_events
42
- __bus__.events.keys
35
+ EVENTS.each(&method(:register_event))
43
36
  end
44
37
  end
45
38
  end
@@ -7,38 +7,119 @@ module WaterDrop
7
7
  # @note It is a module as we can use it then as a part of the Karafka framework listener
8
8
  # as well as we can use it standalone
9
9
  class StdoutListener
10
- # Log levels that we use in this particular listener
11
- USED_LOG_LEVELS = %i[
12
- info
13
- error
14
- ].freeze
15
-
16
- %i[
17
- sync_producer
18
- async_producer
19
- ].each do |producer_type|
20
- error_name = :"on_#{producer_type}_call_error"
21
- retry_name = :"on_#{producer_type}_call_retry"
22
-
23
- define_method error_name do |event|
24
- options = event[:options]
25
- error = event[:error]
26
- error "Delivery failure to: #{options} because of #{error}"
27
- end
28
-
29
- define_method retry_name do |event|
30
- attempts_count = event[:attempts_count]
31
- options = event[:options]
32
- error = event[:error]
33
-
34
- info "Attempt #{attempts_count} of delivery to: #{options} because of #{error}"
35
- end
36
- end
37
-
38
- USED_LOG_LEVELS.each do |log_level|
39
- define_method log_level do |*args|
40
- WaterDrop.logger.send(log_level, *args)
41
- end
10
+ # @param logger [Object] stdout logger we want to use
11
+ def initialize(logger)
12
+ @logger = logger
13
+ end
14
+
15
+ # @param event [Dry::Events::Event] event that happened with the details
16
+ def on_message_produced_async(event)
17
+ message = event[:message]
18
+
19
+ info(event, "Async producing of a message to '#{message[:topic]}' topic")
20
+ debug(event, message)
21
+ end
22
+
23
+ # @param event [Dry::Events::Event] event that happened with the details
24
+ def on_message_produced_sync(event)
25
+ message = event[:message]
26
+
27
+ info(event, "Sync producing of a message to '#{message[:topic]}' topic")
28
+ debug(event, message)
29
+ end
30
+
31
+ # @param event [Dry::Events::Event] event that happened with the details
32
+ def on_messages_produced_async(event)
33
+ messages = event[:messages]
34
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
35
+
36
+ info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
37
+ debug(event, messages)
38
+ end
39
+
40
+ # @param event [Dry::Events::Event] event that happened with the details
41
+ def on_messages_produced_sync(event)
42
+ messages = event[:messages]
43
+ topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
44
+
45
+ info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
46
+ debug(event, messages)
47
+ end
48
+
49
+ # @param event [Dry::Events::Event] event that happened with the details
50
+ def on_message_buffered(event)
51
+ message = event[:message]
52
+
53
+ info(event, "Buffering of a message to '#{message[:topic]}' topic")
54
+ debug(event, [message, event[:producer].messages.size])
55
+ end
56
+
57
+ # @param event [Dry::Events::Event] event that happened with the details
58
+ def on_messages_buffered(event)
59
+ messages = event[:messages]
60
+
61
+ info(event, "Buffering of #{messages.size} messages")
62
+ debug(event, [messages, event[:producer].messages.size])
63
+ end
64
+
65
+ # @param event [Dry::Events::Event] event that happened with the details
66
+ def on_buffer_flushed_async(event)
67
+ messages = event[:messages]
68
+
69
+ info(event, "Async flushing of #{messages.size} messages from the buffer")
70
+ debug(event, messages)
71
+ end
72
+
73
+ # @param event [Dry::Events::Event] event that happened with the details
74
+ def on_buffer_flushed_async_error(event)
75
+ messages = event[:messages]
76
+ error = event[:error]
77
+
78
+ error(event, "Async flushing of #{messages.size} failed due to: #{error}")
79
+ debug(event, messages)
80
+ end
81
+
82
+ # @param event [Dry::Events::Event] event that happened with the details
83
+ def on_buffer_flushed_sync(event)
84
+ messages = event[:messages]
85
+
86
+ info(event, "Sync flushing of #{messages.size} messages from the buffer")
87
+ debug(event, messages)
88
+ end
89
+
90
+ # @param event [Dry::Events::Event] event that happened with the details
91
+ def on_buffer_flushed_sync_error(event)
92
+ messages = event[:dispatched]
93
+ error = event[:error]
94
+
95
+ error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
96
+ debug(event, messages)
97
+ end
98
+
99
+ # @param event [Dry::Events::Event] event that happened with the details
100
+ def on_producer_closed(event)
101
+ info event, 'Closing producer'
102
+ debug event, event[:producer].messages.size
103
+ end
104
+
105
+ private
106
+
107
+ # @param event [Dry::Events::Event] event that happened with the details
108
+ # @param log_message [String] message we want to publish
109
+ def debug(event, log_message)
110
+ @logger.debug("[#{event[:producer].id}] #{log_message}")
111
+ end
112
+
113
+ # @param event [Dry::Events::Event] event that happened with the details
114
+ # @param log_message [String] message we want to publish
115
+ def info(event, log_message)
116
+ @logger.info("[#{event[:producer].id}] #{log_message} took #{event[:time]} ms")
117
+ end
118
+
119
+ # @param event [Dry::Events::Event] event that happened with the details
120
+ # @param log_message [String] message we want to publish
121
+ def error(event, log_message)
122
+ @logger.error("[#{event[:producer].id}] #{log_message}")
42
123
  end
43
124
  end
44
125
  end