waterdrop 1.4.0 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +53 -0
  6. data/.gitignore +2 -0
  7. data/CHANGELOG.md +9 -0
  8. data/Gemfile +9 -0
  9. data/Gemfile.lock +30 -14
  10. data/LICENSE +165 -0
  11. data/README.md +192 -53
  12. data/config/errors.yml +3 -16
  13. data/docker-compose.yml +17 -0
  14. data/lib/water_drop.rb +4 -24
  15. data/lib/water_drop/config.rb +41 -142
  16. data/lib/water_drop/contracts.rb +0 -2
  17. data/lib/water_drop/contracts/config.rb +8 -121
  18. data/lib/water_drop/contracts/message.rb +41 -0
  19. data/lib/water_drop/errors.rb +30 -5
  20. data/lib/water_drop/instrumentation.rb +7 -0
  21. data/lib/water_drop/instrumentation/monitor.rb +16 -23
  22. data/lib/water_drop/instrumentation/stdout_listener.rb +113 -32
  23. data/lib/water_drop/producer.rb +142 -0
  24. data/lib/water_drop/producer/async.rb +51 -0
  25. data/lib/water_drop/producer/buffer.rb +113 -0
  26. data/lib/water_drop/producer/builder.rb +63 -0
  27. data/lib/water_drop/producer/dummy_client.rb +32 -0
  28. data/lib/water_drop/producer/statistics_decorator.rb +71 -0
  29. data/lib/water_drop/producer/status.rb +52 -0
  30. data/lib/water_drop/producer/sync.rb +65 -0
  31. data/lib/water_drop/version.rb +1 -1
  32. data/waterdrop.gemspec +4 -4
  33. metadata +27 -26
  34. metadata.gz.sig +0 -0
  35. data/.travis.yml +0 -35
  36. data/MIT-LICENCE +0 -18
  37. data/lib/water_drop/async_producer.rb +0 -26
  38. data/lib/water_drop/base_producer.rb +0 -57
  39. data/lib/water_drop/config_applier.rb +0 -52
  40. data/lib/water_drop/contracts/message_options.rb +0 -19
  41. data/lib/water_drop/sync_producer.rb +0 -24
@@ -1,19 +1,6 @@
1
1
  en:
2
2
  dry_validation:
3
3
  errors:
4
- broker_schema: >
5
- has an invalid format.
6
- Expected schema, host and port number.
7
- Example: kafka://127.0.0.1:9092 or kafka+ssl://127.0.0.1:9092
8
- ssl_client_cert_with_ssl_client_cert_key: >
9
- Both ssl_client_cert and ssl_client_cert_key need to be provided.
10
- ssl_client_cert_key_with_ssl_client_cert: >
11
- Both ssl_client_cert_key and ssl_client_cert need to be provided.
12
- ssl_client_cert_chain_with_ssl_client_cert: >
13
- Both ssl_client_cert_chain and ssl_client_cert need to be provided.
14
- ssl_client_cert_chain_with_ssl_client_cert_key: >
15
- Both ssl_client_cert_chain and ssl_client_cert_key need to be provided.
16
- ssl_client_cert_key_password_with_ssl_client_cert_key: >
17
- Both ssl_client_cert_key_password and ssl_client_cert_key need to be provided.
18
- sasl_oauth_token_provider_respond_to_token: >
19
- sasl_oauth_token_provider needs to respond to a #token method.
4
+ invalid_key_type: all keys need to be of type String
5
+ invalid_value_type: all values need to be of type String
6
+ max_payload_size: is more than `max_payload_size` config value
@@ -0,0 +1,17 @@
1
+ version: '2'
2
+ services:
3
+ zookeeper:
4
+ image: wurstmeister/zookeeper
5
+ ports:
6
+ - "2181:2181"
7
+ kafka:
8
+ image: wurstmeister/kafka:1.0.1
9
+ ports:
10
+ - "9092:9092"
11
+ environment:
12
+ KAFKA_ADVERTISED_HOST_NAME: localhost
13
+ KAFKA_ADVERTISED_PORT: 9092
14
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
+ KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
+ volumes:
17
+ - /var/run/docker.sock:/var/run/docker.sock
@@ -3,39 +3,19 @@
3
3
  # External components
4
4
  # delegate should be removed because we don't need it, we just add it because of ruby-kafka
5
5
  %w[
6
- delegate
7
- json
8
- delivery_boy
9
- singleton
6
+ concurrent/array
10
7
  dry-configurable
11
8
  dry/monitor/notifications
12
9
  dry-validation
10
+ rdkafka
11
+ json
13
12
  zeitwerk
13
+ securerandom
14
14
  ].each { |lib| require lib }
15
15
 
16
16
  # WaterDrop library
17
17
  module WaterDrop
18
18
  class << self
19
- attr_accessor :logger
20
-
21
- # Sets up the whole configuration
22
- # @param [Block] block configuration block
23
- def setup(&block)
24
- Config.setup(&block)
25
- DeliveryBoy.logger = self.logger = config.logger
26
- ConfigApplier.call(DeliveryBoy.config, Config.config.to_h)
27
- end
28
-
29
- # @return [WaterDrop::Config] config instance
30
- def config
31
- Config.config
32
- end
33
-
34
- # @return [::WaterDrop::Monitor] monitor that we want to use
35
- def monitor
36
- config.monitor
37
- end
38
-
39
19
  # @return [String] root path of this gem
40
20
  def gem_root
41
21
  Pathname.new(File.expand_path('..', __dir__))
@@ -5,158 +5,57 @@
5
5
  module WaterDrop
6
6
  # Configuration object for setting up all options required by WaterDrop
7
7
  class Config
8
- extend Dry::Configurable
9
-
10
- # Config schema definition
11
- # @note We use a single instance not to create new one upon each usage
12
- SCHEMA = Contracts::Config.new.freeze
13
-
14
- private_constant :SCHEMA
8
+ include Dry::Configurable
15
9
 
16
10
  # WaterDrop options
17
- # option client_id [String] identifier of this producer
18
- setting :client_id, 'waterdrop'
19
- # option [Instance, nil] logger that we want to use or nil to fallback to ruby-kafka logger
20
- setting :logger, Logger.new($stdout, level: Logger::WARN)
11
+ #
12
+ # option [String] id of the producer. This can be helpful when building producer specific
13
+ # instrumentation or loggers. It is not the kafka producer id
14
+ setting(:id, false) { |id| id || SecureRandom.uuid }
15
+ # option [Instance] logger that we want to use
16
+ # @note Due to how rdkafka works, this setting is global for all the producers
17
+ setting(:logger, false) { |logger| logger || Logger.new($stdout, level: Logger::WARN) }
21
18
  # option [Instance] monitor that we want to use. See instrumentation part of the README for
22
19
  # more details
23
- setting :monitor, WaterDrop::Instrumentation::Monitor.new
20
+ setting(:monitor, false) { |monitor| monitor || WaterDrop::Instrumentation::Monitor.new }
21
+ # option [Integer] max payload size allowed for delivery to Kafka
22
+ setting :max_payload_size, 1_000_012
23
+ # option [Integer] Wait that long for the delivery report or raise an error if this takes
24
+ # longer than the timeout.
25
+ setting :max_wait_timeout, 5
26
+ # option [Numeric] how long should we wait between re-checks on the availability of the
27
+ # delivery report. In a really robust systems, this describes the min-delivery time
28
+ # for a single sync message when produced in isolation
29
+ setting :wait_timeout, 0.005 # 5 milliseconds
24
30
  # option [Boolean] should we send messages. Setting this to false can be really useful when
25
- # testing and or developing because when set to false, won't actually ping Kafka
31
+ # testing and or developing because when set to false, won't actually ping Kafka but will
32
+ # run all the validations, etc
26
33
  setting :deliver, true
27
- # option [Boolean] if you're producing messages faster than the framework or the network can
28
- # send them off, ruby-kafka might reject them. If that happens, WaterDrop will either raise
29
- # or ignore - this setting manages that behavior. This only applies to async producer as
30
- # sync producer will always raise upon problems
31
- setting :raise_on_buffer_overflow, true
32
-
33
- # Settings directly related to the Kafka driver
34
- setting :kafka do
35
- # option [Array<String>] Array that contains Kafka seed broker hosts with ports
36
- setting :seed_brokers
37
-
38
- # Network timeouts
39
- # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
40
- # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
41
- # least one host.
42
- setting :connect_timeout, 10
43
- # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
44
- # writing to a socket connection to a broker. After this timeout expires the connection
45
- # will be killed. Note that some Kafka operations are by definition long-running, such as
46
- # waiting for new messages to arrive in a partition, so don't set this value too low
47
- setting :socket_timeout, 30
48
-
49
- # Buffering for async producer
50
- # @option [Integer] The maximum number of bytes allowed in the buffer before new messages
51
- # are rejected.
52
- setting :max_buffer_bytesize, 10_000_000
53
- # @option [Integer] The maximum number of messages allowed in the buffer before new messages
54
- # are rejected.
55
- setting :max_buffer_size, 1000
56
- # @option [Integer] The maximum number of messages allowed in the queue before new messages
57
- # are rejected. The queue is used to ferry messages from the foreground threads of your
58
- # application to the background thread that buffers and delivers messages.
59
- setting :max_queue_size, 1000
60
-
61
- # option [Integer] A timeout executed by a broker when the client is sending messages to it.
62
- # It defines the number of seconds the broker should wait for replicas to acknowledge the
63
- # write before responding to the client with an error. As such, it relates to the
64
- # required_acks setting. It should be set lower than socket_timeout.
65
- setting :ack_timeout, 5
66
- # option [Integer] The number of seconds between background message
67
- # deliveries. Default is 10 seconds. Disable timer-based background deliveries by
68
- # setting this to 0.
69
- setting :delivery_interval, 10
70
- # option [Integer] The number of buffered messages that will trigger a background message
71
- # delivery. Default is 100 messages. Disable buffer size based background deliveries by
72
- # setting this to 0.
73
- setting :delivery_threshold, 100
74
- # option [Boolean]
75
- setting :idempotent, false
76
- # option [Boolean]
77
- setting :transactional, false
78
- # option [Integer]
79
- setting :transactional_timeout, 60
80
-
81
- # option [Integer] The number of retries when attempting to deliver messages.
82
- setting :max_retries, 2
83
- # option [Integer]
84
- setting :required_acks, -1
85
- # option [Integer]
86
- setting :retry_backoff, 1
87
-
88
- # option [Integer] The minimum number of messages that must be buffered before compression is
89
- # attempted. By default only one message is required. Only relevant if compression_codec
90
- # is set.
91
- setting :compression_threshold, 1
92
- # option [Symbol] The codec used to compress messages. Must be either snappy or gzip.
93
- setting :compression_codec, nil
94
-
95
- # SSL authentication related settings
96
- # option ca_cert [String, nil] SSL CA certificate
97
- setting :ssl_ca_cert, nil
98
- # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
99
- setting :ssl_ca_cert_file_path, nil
100
- # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
101
- # certificate store
102
- setting :ssl_ca_certs_from_system, false
103
- # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
104
- setting :ssl_verify_hostname, true
105
- # option ssl_client_cert [String, nil] SSL client certificate
106
- setting :ssl_client_cert, nil
107
- # option ssl_client_cert_key [String, nil] SSL client certificate password
108
- setting :ssl_client_cert_key, nil
109
- # option sasl_gssapi_principal [String, nil] sasl principal
110
- setting :sasl_gssapi_principal, nil
111
- # option sasl_gssapi_keytab [String, nil] sasl keytab
112
- setting :sasl_gssapi_keytab, nil
113
- # option sasl_plain_authzid [String] The authorization identity to use
114
- setting :sasl_plain_authzid, ''
115
- # option sasl_plain_username [String, nil] The username used to authenticate
116
- setting :sasl_plain_username, nil
117
- # option sasl_plain_password [String, nil] The password used to authenticate
118
- setting :sasl_plain_password, nil
119
- # option sasl_scram_username [String, nil] The username used to authenticate
120
- setting :sasl_scram_username, nil
121
- # option sasl_scram_password [String, nil] The password used to authenticate
122
- setting :sasl_scram_password, nil
123
- # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
124
- setting :sasl_scram_mechanism, nil
125
- # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
126
- setting :sasl_over_ssl, true
127
- # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
128
- setting :ssl_client_cert_chain, nil
129
- # option ssl_client_cert_key_password [String, nil] the password required to read
130
- # the ssl_client_cert_key
131
- setting :ssl_client_cert_key_password, nil
132
- # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
133
- # implements method token.
134
- setting :sasl_oauth_token_provider, nil
135
- end
136
-
137
- class << self
138
- # Configuration method
139
- # @yield Runs a block of code providing a config singleton instance to it
140
- # @yieldparam [WaterDrop::Config] WaterDrop config instance
141
- def setup
142
- configure do |config|
143
- yield(config)
144
- validate!(config.to_h)
145
- end
34
+ # rdkafka options
35
+ # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
36
+ setting :kafka, {}
37
+
38
+ # Configuration method
39
+ # @yield Runs a block of code providing a config singleton instance to it
40
+ # @yieldparam [WaterDrop::Config] WaterDrop config instance
41
+ def setup
42
+ configure do |config|
43
+ yield(config)
44
+ validate!(config.to_h)
146
45
  end
46
+ end
147
47
 
148
- private
48
+ private
149
49
 
150
- # Validates the configuration and if anything is wrong, will raise an exception
151
- # @param config_hash [Hash] config hash with setup details
152
- # @raise [WaterDrop::Errors::InvalidConfiguration] raised when something is wrong with
153
- # the configuration
154
- def validate!(config_hash)
155
- validation_result = SCHEMA.call(config_hash)
156
- return true if validation_result.success?
50
+ # Validates the configuration and if anything is wrong, will raise an exception
51
+ # @param config_hash [Hash] config hash with setup details
52
+ # @raise [WaterDrop::Errors::ConfigurationInvalidError] raised when something is wrong with
53
+ # the configuration
54
+ def validate!(config_hash)
55
+ result = Contracts::Config.new.call(config_hash)
56
+ return true if result.success?
157
57
 
158
- raise Errors::InvalidConfiguration, validation_result.errors.to_h
159
- end
58
+ raise Errors::ConfigurationInvalidError, result.errors.to_h
160
59
  end
161
60
  end
162
61
  end
@@ -3,7 +3,5 @@
3
3
  module WaterDrop
4
4
  # Namespace for all the contracts for config validations
5
5
  module Contracts
6
- # Regex to check that topic has a valid format
7
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
6
  end
9
7
  end
@@ -4,134 +4,21 @@ module WaterDrop
4
4
  module Contracts
5
5
  # Contract with validation rules for WaterDrop configuration details
6
6
  class Config < Dry::Validation::Contract
7
- # Valid uri schemas of Kafka broker url
8
- URI_SCHEMES = %w[kafka kafka+ssl plaintext ssl].freeze
7
+ # Ensure valid format of each seed broker so that rdkafka doesn't fail silently
8
+ SEED_BROKER_FORMAT_REGEXP = %r{\A([^\:\/,]+:[0-9]+)(,[^\:\/,]+:[0-9]+)*\z}.freeze
9
9
 
10
- # Available sasl scram mechanism of authentication (plus nil)
11
- SASL_SCRAM_MECHANISMS = %w[sha256 sha512].freeze
12
-
13
- # Supported compression codecs
14
- COMPRESSION_CODECS = %i[snappy gzip lz4 zstd].freeze
15
-
16
- config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
-
18
- class << self
19
- private
20
-
21
- # Builder for kafka scoped data custom rules
22
- # @param keys [Symbol, Hash] the keys names
23
- # @param block [Proc] block we want to run with validations within the kafka scope
24
- def kafka_scope_rule(*keys, &block)
25
- rule(*[:kafka].product(keys)) do
26
- instance_exec(values[:kafka], &block)
27
- end
28
- end
29
- end
30
-
31
- private
32
-
33
- # Uri validator to check if uri is in a Kafka acceptable format
34
- # @param uri [String] uri we want to validate
35
- # @return [Boolean] true if it is a valid uri, otherwise false
36
- def broker_schema?(uri)
37
- uri = URI.parse(uri)
38
- URI_SCHEMES.include?(uri.scheme) && uri.port
39
- rescue URI::InvalidURIError
40
- false
41
- end
10
+ private_constant :SEED_BROKER_FORMAT_REGEXP
42
11
 
43
12
  params do
44
- required(:client_id).filled(:str?, format?: Contracts::TOPIC_REGEXP)
13
+ required(:id).filled(:str?)
45
14
  required(:logger).filled
46
15
  required(:deliver).filled(:bool?)
47
- required(:raise_on_buffer_overflow).filled(:bool?)
16
+ required(:max_payload_size).filled(:int?, gteq?: 1)
17
+ required(:max_wait_timeout).filled(:number?, gteq?: 0)
18
+ required(:wait_timeout).filled(:number?, gt?: 0)
48
19
 
49
20
  required(:kafka).schema do
50
- required(:seed_brokers).value(:array, :filled?).each(:str?)
51
- required(:connect_timeout).filled(:int?, gt?: 0)
52
- required(:socket_timeout).filled(:int?, gt?: 0)
53
- required(:compression_threshold).filled(:int?, gteq?: 1)
54
- optional(:compression_codec).maybe(included_in?: COMPRESSION_CODECS)
55
-
56
- required(:max_buffer_bytesize).filled(:int?, gt?: 0)
57
- required(:max_buffer_size).filled(:int?, gt?: 0)
58
- required(:max_queue_size).filled(:int?, gt?: 0)
59
-
60
- required(:ack_timeout).filled(:int?, gt?: 0)
61
- required(:delivery_interval).filled(:int?, gteq?: 0)
62
- required(:delivery_threshold).filled(:int?, gteq?: 0)
63
-
64
- required(:max_retries).filled(:int?, gteq?: 0)
65
- required(:retry_backoff).filled(:int?, gteq?: 0)
66
- required(:required_acks).filled(included_in?: [1, 0, -1, :all])
67
-
68
- %i[
69
- ssl_ca_cert
70
- ssl_ca_cert_file_path
71
- ssl_client_cert
72
- ssl_client_cert_key
73
- ssl_client_cert_chain
74
- ssl_client_cert_key_password
75
- sasl_gssapi_principal
76
- sasl_gssapi_keytab
77
- sasl_plain_authzid
78
- sasl_plain_username
79
- sasl_plain_password
80
- sasl_scram_username
81
- sasl_scram_password
82
- ].each do |encryption_attribute|
83
- optional(encryption_attribute).maybe(:str?)
84
- end
85
-
86
- optional(:ssl_verify_hostname).maybe(:bool?)
87
- optional(:ssl_ca_certs_from_system).maybe(:bool?)
88
- optional(:sasl_over_ssl).maybe(:bool?)
89
- optional(:sasl_oauth_token_provider).value(:any)
90
-
91
- # It's not with other encryptions as it has some more rules
92
- optional(:sasl_scram_mechanism)
93
- .maybe(:str?, included_in?: SASL_SCRAM_MECHANISMS)
94
- end
95
- end
96
-
97
- kafka_scope_rule(:seed_brokers) do |kafka|
98
- unless kafka[:seed_brokers].all?(&method(:broker_schema?))
99
- key(%i[kafka seed_brokers]).failure(:broker_schema)
100
- end
101
- end
102
-
103
- kafka_scope_rule(:ssl_client_cert, :ssl_client_cert_key) do |kafka|
104
- if kafka[:ssl_client_cert] &&
105
- kafka[:ssl_client_cert_key].nil?
106
- key(%i[kafka ssl_client_cert_key]).failure(:ssl_client_cert_with_ssl_client_cert_key)
107
- end
108
- end
109
-
110
- kafka_scope_rule(:ssl_client_cert_key, :ssl_client_cert) do |kafka|
111
- if kafka[:ssl_client_cert_key] &&
112
- kafka[:ssl_client_cert].nil?
113
- key.failure(:ssl_client_cert_key_with_ssl_client_cert)
114
- end
115
- end
116
-
117
- kafka_scope_rule(:ssl_client_cert_chain, :ssl_client_cert) do |kafka|
118
- if kafka[:ssl_client_cert_chain] &&
119
- kafka[:ssl_client_cert].nil?
120
- key.failure(:ssl_client_cert_chain_with_ssl_client_cert)
121
- end
122
- end
123
-
124
- kafka_scope_rule(:ssl_client_cert_key_password, :ssl_client_cert_key) do |kafka|
125
- if kafka[:ssl_client_cert_key_password] &&
126
- kafka[:ssl_client_cert_key].nil?
127
- key.failure(:ssl_client_cert_key_password_with_ssl_client_cert_key)
128
- end
129
- end
130
-
131
- kafka_scope_rule(:sasl_oauth_token_provider) do |kafka|
132
- if kafka[:sasl_oauth_token_provider] &&
133
- !kafka[:sasl_oauth_token_provider].respond_to?(:token)
134
- key.failure(:sasl_oauth_token_provider_respond_to_token)
21
+ required(:'bootstrap.servers').filled(:str?, format?: SEED_BROKER_FORMAT_REGEXP)
135
22
  end
136
23
  end
137
24
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WaterDrop
4
+ module Contracts
5
+ # Contract with validation rules for validating that all the message options that
6
+ # we provide to producer ale valid and usable
7
+ class Message < Dry::Validation::Contract
8
+ # Regex to check that topic has a valid format
9
+ TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
10
+
11
+ # Checks, that the given value is a string
12
+ STRING_ASSERTION = ->(value) { value.is_a?(String) }.to_proc
13
+
14
+ private_constant :TOPIC_REGEXP, :STRING_ASSERTION
15
+
16
+ config.messages.load_paths << File.join(WaterDrop.gem_root, 'config', 'errors.yml')
17
+
18
+ option :max_payload_size
19
+
20
+ params do
21
+ required(:topic).filled(:str?, format?: TOPIC_REGEXP)
22
+ required(:payload).filled(:str?)
23
+ optional(:key).maybe(:str?, :filled?)
24
+ optional(:partition).filled(:int?, gteq?: -1)
25
+ optional(:timestamp).maybe { time? | int? }
26
+ optional(:headers).maybe(:hash?)
27
+ end
28
+
29
+ rule(:headers) do
30
+ next unless value.is_a?(Hash)
31
+
32
+ key.failure(:invalid_key_type) unless value.keys.all?(&STRING_ASSERTION)
33
+ key.failure(:invalid_value_type) unless value.values.all?(&STRING_ASSERTION)
34
+ end
35
+
36
+ rule(:payload) do
37
+ key.failure(:max_payload_size) if value.bytesize > max_payload_size
38
+ end
39
+ end
40
+ end
41
+ end