waterdrop 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +53 -0
  6. data/.gitignore +2 -0
  7. data/.ruby-version +1 -1
  8. data/CHANGELOG.md +9 -0
  9. data/Gemfile +9 -0
  10. data/Gemfile.lock +51 -33
  11. data/LICENSE +165 -0
  12. data/README.md +192 -53
  13. data/config/errors.yml +3 -16
  14. data/docker-compose.yml +17 -0
  15. data/lib/water_drop.rb +4 -24
  16. data/lib/water_drop/config.rb +41 -142
  17. data/lib/water_drop/contracts.rb +0 -2
  18. data/lib/water_drop/contracts/config.rb +8 -121
  19. data/lib/water_drop/contracts/message.rb +41 -0
  20. data/lib/water_drop/errors.rb +31 -5
  21. data/lib/water_drop/instrumentation.rb +7 -0
  22. data/lib/water_drop/instrumentation/monitor.rb +16 -23
  23. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  24. data/lib/water_drop/producer.rb +142 -0
  25. data/lib/water_drop/producer/async.rb +51 -0
  26. data/lib/water_drop/producer/buffer.rb +113 -0
  27. data/lib/water_drop/producer/builder.rb +63 -0
  28. data/lib/water_drop/producer/dummy_client.rb +32 -0
  29. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  30. data/lib/water_drop/producer/status.rb +52 -0
  31. data/lib/water_drop/producer/sync.rb +65 -0
  32. data/lib/water_drop/version.rb +1 -1
  33. data/waterdrop.gemspec +4 -4
  34. metadata +25 -24
  35. metadata.gz.sig +0 -0
  36. data/.travis.yml +0 -35
  37. data/MIT-LICENCE +0 -18
  38. data/lib/water_drop/async_producer.rb +0 -26
  39. data/lib/water_drop/base_producer.rb +0 -57
  40. data/lib/water_drop/config_applier.rb +0 -52
  41. data/lib/water_drop/contracts/message_options.rb +0 -19
  42. data/lib/water_drop/sync_producer.rb +0 -24
@@ -1,19 +1,6 @@
1
1
  en:
2
2
  dry_validation:
3
3
  errors:
4
- broker_schema: >
5
- has an invalid format.
6
- Expected schema, host and port number.
7
- Example: kafka://127.0.0.1:9092 or kafka+ssl://127.0.0.1:9092
8
- ssl_client_cert_with_ssl_client_cert_key: >
9
- Both ssl_client_cert and ssl_client_cert_key need to be provided.
10
- ssl_client_cert_key_with_ssl_client_cert: >
11
- Both ssl_client_cert_key and ssl_client_cert need to be provided.
12
- ssl_client_cert_chain_with_ssl_client_cert: >
13
- Both ssl_client_cert_chain and ssl_client_cert need to be provided.
14
- ssl_client_cert_chain_with_ssl_client_cert_key: >
15
- Both ssl_client_cert_chain and ssl_client_cert_key need to be provided.
16
- ssl_client_cert_key_password_with_ssl_client_cert_key: >
17
- Both ssl_client_cert_key_password and ssl_client_cert_key need to be provided.
18
- sasl_oauth_token_provider_respond_to_token: >
19
- sasl_oauth_token_provider needs to respond to a #token method.
4
+ invalid_key_type: all keys need to be of type String
5
+ invalid_value_type: all values need to be of type String
6
+ max_payload_size: is more than `max_payload_size` config value
@@ -0,0 +1,17 @@
1
+ version: '2'
2
+ services:
3
+ zookeeper:
4
+ image: wurstmeister/zookeeper
5
+ ports:
6
+ - "2181:2181"
7
+ kafka:
8
+ image: wurstmeister/kafka:1.0.1
9
+ ports:
10
+ - "9092:9092"
11
+ environment:
12
+ KAFKA_ADVERTISED_HOST_NAME: localhost
13
+ KAFKA_ADVERTISED_PORT: 9092
14
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
+ KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
+ volumes:
17
+ - /var/run/docker.sock:/var/run/docker.sock
@@ -3,39 +3,19 @@
3
3
  # External components
4
4
  # delegate should be removed because we don't need it, we just add it because of ruby-kafka
5
5
  %w[
6
- delegate
7
- json
8
- delivery_boy
9
- singleton
6
+ concurrent/array
10
7
  dry-configurable
11
8
  dry/monitor/notifications
12
9
  dry-validation
10
+ rdkafka
11
+ json
13
12
  zeitwerk
13
+ securerandom
14
14
  ].each { |lib| require lib }
15
15
 
16
16
  # WaterDrop library
17
17
  module WaterDrop
18
18
  class << self
19
- attr_accessor :logger
20
-
21
- # Sets up the whole configuration
22
- # @param [Block] block configuration block
23
- def setup(&block)
24
- Config.setup(&block)
25
- DeliveryBoy.logger = self.logger = config.logger
26
- ConfigApplier.call(DeliveryBoy.config, Config.config.to_h)
27
- end
28
-
29
- # @return [WaterDrop::Config] config instance
30
- def config
31
- Config.config
32
- end
33
-
34
- # @return [::WaterDrop::Monitor] monitor that we want to use
35
- def monitor
36
- config.monitor
37
- end
38
-
39
19
  # @return [String] root path of this gem
40
20
  def gem_root
41
21
  Pathname.new(File.expand_path('..', __dir__))
@@ -5,158 +5,57 @@
5
5
  module WaterDrop
6
6
  # Configuration object for setting up all options required by WaterDrop
7
7
  class Config
8
- extend Dry::Configurable
9
-
10
- # Config schema definition
11
- # @note We use a single instance not to create new one upon each usage
12
- SCHEMA = Contracts::Config.new.freeze
13
-
14
- private_constant :SCHEMA
8
+ include Dry::Configurable
15
9
 
16
10
  # WaterDrop options
17
- # option client_id [String] identifier of this producer
18
- setting :client_id, 'waterdrop'
19
- # option [Instance, nil] logger that we want to use or nil to fallback to ruby-kafka logger
20
- setting :logger, Logger.new($stdout, level: Logger::WARN)
11
+ #
12
+ # option [String] id of the producer. This can be helpful when building producer specific
13
+ # instrumentation or loggers. It is not the kafka producer id
14
+ setting(:id, false) { |id| id || SecureRandom.uuid }
15
+ # option [Instance] logger that we want to use
16
+ # @note Due to how rdkafka works, this setting is global for all the producers
17
+ setting(:logger, false) { |logger| logger || Logger.new($stdout, level: Logger::WARN) }
21
18
  # option [Instance] monitor that we want to use. See instrumentation part of the README for
22
19
  # more details
23
- setting :monitor, WaterDrop::Instrumentation::Monitor.new
20
+ setting(:monitor, false) { |monitor| monitor || WaterDrop::Instrumentation::Monitor.new }
21
+ # option [Integer] max payload size allowed for delivery to Kafka
22
+ setting :max_payload_size, 1_000_012
23
+ # option [Integer] Wait that long for the delivery report or raise an error if this takes
24
+ # longer than the timeout.
25
+ setting :max_wait_timeout, 5
26
+ # option [Numeric] how long should we wait between re-checks on the availability of the
27
+ # delivery report. In a really robust systems, this describes the min-delivery time
28
+ # for a single sync message when produced in isolation
29
+ setting :wait_timeout, 0.005 # 5 milliseconds
24
30
  # option [Boolean] should we send messages. Setting this to false can be really useful when
25
- # testing and or developing because when set to false, won't actually ping Kafka
31
+ # testing and or developing because when set to false, won't actually ping Kafka but will
32
+ # run all the validations, etc
26
33
  setting :deliver, true
27
- # option [Boolean] if you're producing messages faster than the framework or the network can
28
- # send them off, ruby-kafka might reject them. If that happens, WaterDrop will either raise
29
- # or ignore - this setting manages that behavior. This only applies to async producer as
30
- # sync producer will always raise upon problems
31
- setting :raise_on_buffer_overflow, true
32
-
33
- # Settings directly related to the Kafka driver
34
- setting :kafka do
35
- # option [Array<String>] Array that contains Kafka seed broker hosts with ports
36
- setting :seed_brokers
37
-
38
- # Network timeouts
39
- # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
40
- # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
41
- # least one host.
42
- setting :connect_timeout, 10
43
- # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
44
- # writing to a socket connection to a broker. After this timeout expires the connection
45
- # will be killed. Note that some Kafka operations are by definition long-running, such as
46
- # waiting for new messages to arrive in a partition, so don't set this value too low
47
- setting :socket_timeout, 30
48
-
49
- # Buffering for async producer
50
- # @option [Integer] The maximum number of bytes allowed in the buffer before new messages
51
- # are rejected.
52
- setting :max_buffer_bytesize, 10_000_000
53
- # @option [Integer] The maximum number of messages allowed in the buffer before new messages
54
- # are rejected.
55
- setting :max_buffer_size, 1000
56
- # @option [Integer] The maximum number of messages allowed in the queue before new messages
57
- # are rejected. The queue is used to ferry messages from the foreground threads of your
58
- # application to the background thread that buffers and delivers messages.
59
- setting :max_queue_size, 1000
60
-
61
- # option [Integer] A timeout executed by a broker when the client is sending messages to it.
62
- # It defines the number of seconds the broker should wait for replicas to acknowledge the
63
- # write before responding to the client with an error. As such, it relates to the
64
- # required_acks setting. It should be set lower than socket_timeout.
65
- setting :ack_timeout, 5
66
- # option [Integer] The number of seconds between background message
67
- # deliveries. Default is 10 seconds. Disable timer-based background deliveries by
68
- # setting this to 0.
69
- setting :delivery_interval, 10
70
- # option [Integer] The number of buffered messages that will trigger a background message
71
- # delivery. Default is 100 messages. Disable buffer size based background deliveries by
72
- # setting this to 0.
73
- setting :delivery_threshold, 100
74
- # option [Boolean]
75
- setting :idempotent, false
76
- # option [Boolean]
77
- setting :transactional, false
78
- # option [Integer]
79
- setting :transactional_timeout, 60
80
-
81
- # option [Integer] The number of retries when attempting to deliver messages.
82
- setting :max_retries, 2
83
- # option [Integer]
84
- setting :required_acks, -1
85
- # option [Integer]
86
- setting :retry_backoff, 1
87
-
88
- # option [Integer] The minimum number of messages that must be buffered before compression is
89
- # attempted. By default only one message is required. Only relevant if compression_codec
90
- # is set.
91
- setting :compression_threshold, 1
92
- # option [Symbol] The codec used to compress messages. Must be either snappy or gzip.
93
- setting :compression_codec, nil
94
-
95
- # SSL authentication related settings
96
- # option ca_cert [String, nil] SSL CA certificate
97
- setting :ssl_ca_cert, nil
98
- # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
99
- setting :ssl_ca_cert_file_path, nil
100
- # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
101
- # certificate store
102
- setting :ssl_ca_certs_from_system, false
103
- # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
104
- setting :ssl_verify_hostname, true
105
- # option ssl_client_cert [String, nil] SSL client certificate
106
- setting :ssl_client_cert, nil
107
- # option ssl_client_cert_key [String, nil] SSL client certificate password
108
- setting :ssl_client_cert_key, nil
109
- # option sasl_gssapi_principal [String, nil] sasl principal
110
- setting :sasl_gssapi_principal, nil
111
- # option sasl_gssapi_keytab [String, nil] sasl keytab
112
- setting :sasl_gssapi_keytab, nil
113
- # option sasl_plain_authzid [String] The authorization identity to use
114
- setting :sasl_plain_authzid, ''
115
- # option sasl_plain_username [String, nil] The username used to authenticate
116
- setting :sasl_plain_username, nil
117
- # option sasl_plain_password [String, nil] The password used to authenticate
118
- setting :sasl_plain_password, nil
119
- # option sasl_scram_username [String, nil] The username used to authenticate
120
- setting :sasl_scram_username, nil
121
- # option sasl_scram_password [String, nil] The password used to authenticate
122
- setting :sasl_scram_password, nil
123
- # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
124
- setting :sasl_scram_mechanism, nil
125
- # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
126
- setting :sasl_over_ssl, true
127
- # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
128
- setting :ssl_client_cert_chain, nil
129
- # option ssl_client_cert_key_password [String, nil] the password required to read
130
- # the ssl_client_cert_key
131
- setting :ssl_client_cert_key_password, nil
132
- # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
133
- # implements method token.
134
- setting :sasl_oauth_token_provider, nil
135
- end
136
-
137
- class << self
138
- # Configuration method
139
- # @yield Runs a block of code providing a config singleton instance to it
140
- # @yieldparam [WaterDrop::Config] WaterDrop config instance
141
- def setup
142
- configure do |config|
143
- yield(config)
144
- validate!(config.to_h)
145
- end
34
+ # rdkafka options
35
+ # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
36
+ setting :kafka, {}
37
+
38
+ # Configuration method
39
+ # @yield Runs a block of code providing a config singleton instance to it
40
+ # @yieldparam [WaterDrop::Config] WaterDrop config instance
41
+ def setup
42
+ configure do |config|
43
+ yield(config)
44
+ validate!(config.to_h)
146
45
  end
46
+ end
147
47
 
148
- private
48
+ private
149
49
 
150
- # Validates the configuration and if anything is wrong, will raise an exception
151
- # @param config_hash [Hash] config hash with setup details
152
- # @raise [WaterDrop::Errors::InvalidConfiguration] raised when something is wrong with
153
- # the configuration
154
- def validate!(config_hash)
155
- validation_result = SCHEMA.call(config_hash)
156
- return true if validation_result.success?
50
+ # Validates the configuration and if anything is wrong, will raise an exception
51
+ # @param config_hash [Hash] config hash with setup details
52
+ # @raise [WaterDrop::Errors::ConfigurationInvalidError] raised when something is wrong with
53
+ # the configuration
54
+ def validate!(config_hash)
55
+ result = Contracts::Config.new.call(config_hash)
56
+ return true if result.success?
157
57
 
158
- raise Errors::InvalidConfiguration, validation_result.errors.to_h
159
- end
58
+ raise Errors::ConfigurationInvalidError, result.errors.to_h
160
59
  end
161
60
  end
162
61
  end
@@ -3,7 +3,5 @@
3
3
  module WaterDrop
4
4
  # Namespace for all the contracts for config validations
5
5
  module Contracts
6
- # Regex to check that topic has a valid format
7
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
6
  end
9
7
  end
@@ -4,134 +4,21 @@ module WaterDrop
4
4
  module Contracts
5
5
  # Contract with validation rules for WaterDrop configuration details
6
6
  class Config < Dry::Validation::Contract
7
- # Valid uri schemas of Kafka broker url
8
- URI_SCHEMES = %w[kafka kafka+ssl plaintext ssl].freeze
7
+ # Ensure valid format of each seed broker so that rdkafka doesn't fail silently
8
+ SEED_BROKER_FORMAT_REGEXP = %r{\A([^:/,]+:[0-9]+)(,[^:/,]+:[0-9]+)*\z}.freeze
9
9
 
10
- # Available sasl scram mechanism of authentication (plus nil)
11
- SASL_SCRAM_MECHANISMS = %w[sha256 sha512].freeze
12
-
13
- # Supported compression codecs
14
- COMPRESSION_CODECS = %i[snappy gzip lz4 zstd].freeze
15
-
16
- config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
-
18
- class << self
19
- private
20
-
21
- # Builder for kafka scoped data custom rules
22
- # @param keys [Symbol, Hash] the keys names
23
- # @param block [Proc] block we want to run with validations within the kafka scope
24
- def kafka_scope_rule(*keys, &block)
25
- rule(*[:kafka].product(keys)) do
26
- instance_exec(values[:kafka], &block)
27
- end
28
- end
29
- end
30
-
31
- private
32
-
33
- # Uri validator to check if uri is in a Kafka acceptable format
34
- # @param uri [String] uri we want to validate
35
- # @return [Boolean] true if it is a valid uri, otherwise false
36
- def broker_schema?(uri)
37
- uri = URI.parse(uri)
38
- URI_SCHEMES.include?(uri.scheme) && uri.port
39
- rescue URI::InvalidURIError
40
- false
41
- end
10
+ private_constant :SEED_BROKER_FORMAT_REGEXP
42
11
 
43
12
  params do
44
- required(:client_id).filled(:str?, format?: Contracts::TOPIC_REGEXP)
13
+ required(:id).filled(:str?)
45
14
  required(:logger).filled
46
15
  required(:deliver).filled(:bool?)
47
- required(:raise_on_buffer_overflow).filled(:bool?)
16
+ required(:max_payload_size).filled(:int?, gteq?: 1)
17
+ required(:max_wait_timeout).filled(:number?, gteq?: 0)
18
+ required(:wait_timeout).filled(:number?, gt?: 0)
48
19
 
49
20
  required(:kafka).schema do
50
- required(:seed_brokers).value(:array, :filled?).each(:str?)
51
- required(:connect_timeout).filled(:int?, gt?: 0)
52
- required(:socket_timeout).filled(:int?, gt?: 0)
53
- required(:compression_threshold).filled(:int?, gteq?: 1)
54
- optional(:compression_codec).maybe(included_in?: COMPRESSION_CODECS)
55
-
56
- required(:max_buffer_bytesize).filled(:int?, gt?: 0)
57
- required(:max_buffer_size).filled(:int?, gt?: 0)
58
- required(:max_queue_size).filled(:int?, gt?: 0)
59
-
60
- required(:ack_timeout).filled(:int?, gt?: 0)
61
- required(:delivery_interval).filled(:int?, gteq?: 0)
62
- required(:delivery_threshold).filled(:int?, gteq?: 0)
63
-
64
- required(:max_retries).filled(:int?, gteq?: 0)
65
- required(:retry_backoff).filled(:int?, gteq?: 0)
66
- required(:required_acks).filled(included_in?: [1, 0, -1, :all])
67
-
68
- %i[
69
- ssl_ca_cert
70
- ssl_ca_cert_file_path
71
- ssl_client_cert
72
- ssl_client_cert_key
73
- ssl_client_cert_chain
74
- ssl_client_cert_key_password
75
- sasl_gssapi_principal
76
- sasl_gssapi_keytab
77
- sasl_plain_authzid
78
- sasl_plain_username
79
- sasl_plain_password
80
- sasl_scram_username
81
- sasl_scram_password
82
- ].each do |encryption_attribute|
83
- optional(encryption_attribute).maybe(:str?)
84
- end
85
-
86
- optional(:ssl_verify_hostname).maybe(:bool?)
87
- optional(:ssl_ca_certs_from_system).maybe(:bool?)
88
- optional(:sasl_over_ssl).maybe(:bool?)
89
- optional(:sasl_oauth_token_provider).value(:any)
90
-
91
- # It's not with other encryptions as it has some more rules
92
- optional(:sasl_scram_mechanism)
93
- .maybe(:str?, included_in?: SASL_SCRAM_MECHANISMS)
94
- end
95
- end
96
-
97
- kafka_scope_rule(:seed_brokers) do |kafka|
98
- unless kafka[:seed_brokers].all?(&method(:broker_schema?))
99
- key(%i[kafka seed_brokers]).failure(:broker_schema)
100
- end
101
- end
102
-
103
- kafka_scope_rule(:ssl_client_cert, :ssl_client_cert_key) do |kafka|
104
- if kafka[:ssl_client_cert] &&
105
- kafka[:ssl_client_cert_key].nil?
106
- key(%i[kafka ssl_client_cert_key]).failure(:ssl_client_cert_with_ssl_client_cert_key)
107
- end
108
- end
109
-
110
- kafka_scope_rule(:ssl_client_cert_key, :ssl_client_cert) do |kafka|
111
- if kafka[:ssl_client_cert_key] &&
112
- kafka[:ssl_client_cert].nil?
113
- key.failure(:ssl_client_cert_key_with_ssl_client_cert)
114
- end
115
- end
116
-
117
- kafka_scope_rule(:ssl_client_cert_chain, :ssl_client_cert) do |kafka|
118
- if kafka[:ssl_client_cert_chain] &&
119
- kafka[:ssl_client_cert].nil?
120
- key.failure(:ssl_client_cert_chain_with_ssl_client_cert)
121
- end
122
- end
123
-
124
- kafka_scope_rule(:ssl_client_cert_key_password, :ssl_client_cert_key) do |kafka|
125
- if kafka[:ssl_client_cert_key_password] &&
126
- kafka[:ssl_client_cert_key].nil?
127
- key.failure(:ssl_client_cert_key_password_with_ssl_client_cert_key)
128
- end
129
- end
130
-
131
- kafka_scope_rule(:sasl_oauth_token_provider) do |kafka|
132
- if kafka[:sasl_oauth_token_provider] &&
133
- !kafka[:sasl_oauth_token_provider].respond_to?(:token)
134
- key.failure(:sasl_oauth_token_provider_respond_to_token)
21
+ required(:'bootstrap.servers').filled(:str?, format?: SEED_BROKER_FORMAT_REGEXP)
135
22
  end
136
23
  end
137
24
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Contracts
5
+ # Contract with validation rules for validating that all the message options that
6
+ # we provide to producer ale valid and usable
7
+ class Message < Dry::Validation::Contract
8
+ # Regex to check that topic has a valid format
9
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/.freeze
10
+
11
+ # Checks, that the given value is a string
12
+ STRING_ASSERTION = ->(value) { value.is_a?(String) }.to_proc
13
+
14
+ private_constant :TOPIC_REGEXP, :STRING_ASSERTION
15
+
16
+ config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
+
18
+ option :max_payload_size
19
+
20
+ params do
21
+ required(:topic).filled(:str?, format?: TOPIC_REGEXP)
22
+ required(:payload).filled(:str?)
23
+ optional(:key).maybe(:str?, :filled?)
24
+ optional(:partition).filled(:int?, gteq?: -1)
25
+ optional(:timestamp).maybe { time? | int? }
26
+ optional(:headers).maybe(:hash?)
27
+ end
28
+
29
+ rule(:headers) do
30
+ next unless value.is_a?(Hash)
31
+
32
+ key.failure(:invalid_key_type) unless value.keys.all?(&STRING_ASSERTION)
33
+ key.failure(:invalid_value_type) unless value.values.all?(&STRING_ASSERTION)
34
+ end
35
+
36
+ rule(:payload) do
37
+ key.failure(:max_payload_size) if value.bytesize > max_payload_size
38
+ end
39
+ end
40
+ end
41
+ end