waterdrop 2.0.7 → 2.6.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +22 -11
- data/.ruby-version +1 -1
- data/CHANGELOG.md +200 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +32 -75
- data/README.md +22 -275
- data/certs/cert_chain.pem +26 -0
- data/config/locales/errors.yml +33 -0
- data/docker-compose.yml +19 -12
- data/lib/waterdrop/clients/buffered.rb +90 -0
- data/lib/waterdrop/clients/dummy.rb +69 -0
- data/lib/waterdrop/clients/rdkafka.rb +34 -0
- data/lib/{water_drop → waterdrop}/config.rb +39 -16
- data/lib/waterdrop/contracts/config.rb +43 -0
- data/lib/waterdrop/contracts/message.rb +64 -0
- data/lib/{water_drop → waterdrop}/errors.rb +14 -7
- data/lib/waterdrop/instrumentation/callbacks/delivery.rb +102 -0
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/error.rb +6 -2
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/statistics.rb +1 -1
- data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +66 -21
- data/lib/waterdrop/instrumentation/monitor.rb +20 -0
- data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +12 -14
- data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +210 -0
- data/lib/waterdrop/middleware.rb +50 -0
- data/lib/{water_drop → waterdrop}/producer/async.rb +40 -4
- data/lib/{water_drop → waterdrop}/producer/buffer.rb +12 -30
- data/lib/{water_drop → waterdrop}/producer/builder.rb +6 -11
- data/lib/{water_drop → waterdrop}/producer/sync.rb +44 -15
- data/lib/waterdrop/producer/transactions.rb +170 -0
- data/lib/waterdrop/producer.rb +308 -0
- data/lib/{water_drop → waterdrop}/version.rb +1 -1
- data/lib/waterdrop.rb +28 -2
- data/renovate.json +6 -0
- data/waterdrop.gemspec +14 -11
- data.tar.gz.sig +0 -0
- metadata +71 -111
- metadata.gz.sig +0 -0
- data/certs/mensfeld.pem +0 -25
- data/config/errors.yml +0 -6
- data/lib/water_drop/contracts/config.rb +0 -26
- data/lib/water_drop/contracts/message.rb +0 -42
- data/lib/water_drop/instrumentation/callbacks/delivery.rb +0 -30
- data/lib/water_drop/instrumentation/callbacks/statistics_decorator.rb +0 -77
- data/lib/water_drop/instrumentation/callbacks_manager.rb +0 -39
- data/lib/water_drop/instrumentation.rb +0 -20
- data/lib/water_drop/patches/rdkafka/bindings.rb +0 -42
- data/lib/water_drop/patches/rdkafka/producer.rb +0 -20
- data/lib/water_drop/producer/dummy_client.rb +0 -32
- data/lib/water_drop/producer.rb +0 -162
- data/lib/water_drop.rb +0 -36
- /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
- /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -5,11 +5,14 @@
|
|
5
5
|
module WaterDrop
|
6
6
|
# Configuration object for setting up all options required by WaterDrop
|
7
7
|
class Config
|
8
|
-
include
|
8
|
+
include ::Karafka::Core::Configurable
|
9
9
|
|
10
10
|
# Defaults for kafka settings, that will be overwritten only if not present already
|
11
11
|
KAFKA_DEFAULTS = {
|
12
|
-
'client.id'
|
12
|
+
'client.id': 'waterdrop',
|
13
|
+
# emit librdkafka statistics every five seconds. This is used in instrumentation.
|
14
|
+
# When disabled, part of metrics will not be published and available.
|
15
|
+
'statistics.interval.ms': 5_000
|
13
16
|
}.freeze
|
14
17
|
|
15
18
|
private_constant :KAFKA_DEFAULTS
|
@@ -22,7 +25,7 @@ module WaterDrop
|
|
22
25
|
setting(
|
23
26
|
:id,
|
24
27
|
default: false,
|
25
|
-
constructor: ->(id) { id || SecureRandom.
|
28
|
+
constructor: ->(id) { id || SecureRandom.hex(6) }
|
26
29
|
)
|
27
30
|
# option [Instance] logger that we want to use
|
28
31
|
# @note Due to how rdkafka works, this setting is global for all the producers
|
@@ -47,13 +50,41 @@ module WaterDrop
|
|
47
50
|
# delivery report. In a really robust systems, this describes the min-delivery time
|
48
51
|
# for a single sync message when produced in isolation
|
49
52
|
setting :wait_timeout, default: 0.005 # 5 milliseconds
|
53
|
+
# option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
|
54
|
+
# we raise an exception.
|
55
|
+
# When this is set to `true`, upon full queue, we won't raise an error. There will be error
|
56
|
+
# in the `error.occurred` notification pipeline with a proper type as while this is
|
57
|
+
# recoverable, in a high number it still may mean issues.
|
58
|
+
# Waiting is one of the recommended strategies.
|
59
|
+
setting :wait_on_queue_full, default: true
|
60
|
+
# option [Integer] how long (in seconds) should we backoff before a retry when queue is full
|
61
|
+
# The retry will happen with the same message and backoff should give us some time to
|
62
|
+
# dispatch previously buffered messages.
|
63
|
+
setting :wait_backoff_on_queue_full, default: 0.1
|
64
|
+
# option [Numeric] how many seconds should we wait with the backoff on queue having space for
|
65
|
+
# more messages before re-raising the error.
|
66
|
+
setting :wait_timeout_on_queue_full, default: 10
|
67
|
+
# option [Numeric] How long to wait before retrying a retryable transaction related error
|
68
|
+
setting :wait_backoff_on_transaction_command, default: 0.5
|
69
|
+
# option [Numeric] How many times to retry a retryable transaction related error before
|
70
|
+
# giving up
|
71
|
+
setting :max_attempts_on_transaction_command, default: 5
|
72
|
+
|
50
73
|
# option [Boolean] should we send messages. Setting this to false can be really useful when
|
51
74
|
# testing and or developing because when set to false, won't actually ping Kafka but will
|
52
75
|
# run all the validations, etc
|
53
76
|
setting :deliver, default: true
|
77
|
+
# option [Class] class for usage when creating the underlying client used to dispatch messages
|
78
|
+
setting :client_class, default: Clients::Rdkafka
|
54
79
|
# rdkafka options
|
55
80
|
# @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
56
81
|
setting :kafka, default: {}
|
82
|
+
# Middleware chain that can be expanded with useful middleware steps
|
83
|
+
setting(
|
84
|
+
:middleware,
|
85
|
+
default: false,
|
86
|
+
constructor: ->(middleware) { middleware || WaterDrop::Middleware.new }
|
87
|
+
)
|
57
88
|
|
58
89
|
# Configuration method
|
59
90
|
# @yield Runs a block of code providing a config singleton instance to it
|
@@ -63,10 +94,13 @@ module WaterDrop
|
|
63
94
|
yield(config)
|
64
95
|
|
65
96
|
merge_kafka_defaults!(config)
|
66
|
-
|
97
|
+
|
98
|
+
Contracts::Config.new.validate!(config.to_h, Errors::ConfigurationInvalidError)
|
67
99
|
|
68
100
|
::Rdkafka::Config.logger = config.logger
|
69
101
|
end
|
102
|
+
|
103
|
+
self
|
70
104
|
end
|
71
105
|
|
72
106
|
private
|
@@ -74,7 +108,7 @@ module WaterDrop
|
|
74
108
|
# Propagates the kafka setting defaults unless they are already present
|
75
109
|
# This makes it easier to set some values that users usually don't change but still allows them
|
76
110
|
# to overwrite the whole hash if they want to
|
77
|
-
# @param config [
|
111
|
+
# @param config [Karafka::Core::Configurable::Node] config of this producer
|
78
112
|
def merge_kafka_defaults!(config)
|
79
113
|
KAFKA_DEFAULTS.each do |key, value|
|
80
114
|
next if config.kafka.key?(key)
|
@@ -82,16 +116,5 @@ module WaterDrop
|
|
82
116
|
config.kafka[key] = value
|
83
117
|
end
|
84
118
|
end
|
85
|
-
|
86
|
-
# Validates the configuration and if anything is wrong, will raise an exception
|
87
|
-
# @param config_hash [Hash] config hash with setup details
|
88
|
-
# @raise [WaterDrop::Errors::ConfigurationInvalidError] raised when something is wrong with
|
89
|
-
# the configuration
|
90
|
-
def validate!(config_hash)
|
91
|
-
result = Contracts::Config.new.call(config_hash)
|
92
|
-
return true if result.success?
|
93
|
-
|
94
|
-
raise Errors::ConfigurationInvalidError, result.errors.to_h
|
95
|
-
end
|
96
119
|
end
|
97
120
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Contracts
|
5
|
+
# Contract with validation rules for WaterDrop configuration details
|
6
|
+
class Config < ::Karafka::Core::Contractable::Contract
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('config')
|
13
|
+
end
|
14
|
+
|
15
|
+
required(:id) { |val| val.is_a?(String) && !val.empty? }
|
16
|
+
required(:logger) { |val| !val.nil? }
|
17
|
+
required(:deliver) { |val| [true, false].include?(val) }
|
18
|
+
required(:max_payload_size) { |val| val.is_a?(Integer) && val >= 1 }
|
19
|
+
required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
|
20
|
+
required(:wait_timeout) { |val| val.is_a?(Numeric) && val.positive? }
|
21
|
+
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
22
|
+
required(:wait_on_queue_full) { |val| [true, false].include?(val) }
|
23
|
+
required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
24
|
+
required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
25
|
+
|
26
|
+
# rdkafka allows both symbols and strings as keys for config but then casts them to strings
|
27
|
+
# This can be confusing, so we expect all keys to be symbolized
|
28
|
+
virtual do |config, errors|
|
29
|
+
next true unless errors.empty?
|
30
|
+
|
31
|
+
errors = []
|
32
|
+
|
33
|
+
config
|
34
|
+
.fetch(:kafka)
|
35
|
+
.keys
|
36
|
+
.reject { |key| key.is_a?(Symbol) }
|
37
|
+
.each { |key| errors << [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
38
|
+
|
39
|
+
errors
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Contracts
|
5
|
+
# Contract with validation rules for validating that all the message options that
|
6
|
+
# we provide to producer are valid and usable
|
7
|
+
class Message < ::Karafka::Core::Contractable::Contract
|
8
|
+
configure do |config|
|
9
|
+
config.error_messages = YAML.safe_load(
|
10
|
+
File.read(
|
11
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
12
|
+
)
|
13
|
+
).fetch('en').fetch('validations').fetch('message')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Regex to check that topic has a valid format
|
17
|
+
TOPIC_REGEXP = /\A(\w|-|\.)+\z/
|
18
|
+
|
19
|
+
private_constant :TOPIC_REGEXP
|
20
|
+
|
21
|
+
attr_reader :max_payload_size
|
22
|
+
|
23
|
+
# @param max_payload_size [Integer] max payload size
|
24
|
+
def initialize(max_payload_size:)
|
25
|
+
super()
|
26
|
+
@max_payload_size = max_payload_size
|
27
|
+
end
|
28
|
+
|
29
|
+
required(:topic) do |val|
|
30
|
+
(val.is_a?(String) || val.is_a?(Symbol)) && TOPIC_REGEXP.match?(val.to_s)
|
31
|
+
end
|
32
|
+
|
33
|
+
required(:payload) { |val| val.nil? || val.is_a?(String) }
|
34
|
+
optional(:key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
|
35
|
+
optional(:partition) { |val| val.is_a?(Integer) && val >= -1 }
|
36
|
+
optional(:partition_key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
|
37
|
+
optional(:timestamp) { |val| val.nil? || (val.is_a?(Time) || val.is_a?(Integer)) }
|
38
|
+
optional(:headers) { |val| val.nil? || val.is_a?(Hash) }
|
39
|
+
|
40
|
+
virtual do |message, errors|
|
41
|
+
next true unless errors.empty?
|
42
|
+
next true unless message.key?(:headers)
|
43
|
+
next true if message[:headers].nil?
|
44
|
+
|
45
|
+
errors = []
|
46
|
+
|
47
|
+
message.fetch(:headers).each do |key, value|
|
48
|
+
errors << [%i[headers], :invalid_key_type] unless key.is_a?(String)
|
49
|
+
errors << [%i[headers], :invalid_value_type] unless value.is_a?(String)
|
50
|
+
end
|
51
|
+
|
52
|
+
errors
|
53
|
+
end
|
54
|
+
|
55
|
+
virtual do |message, errors, validator|
|
56
|
+
next true unless errors.empty?
|
57
|
+
next if message[:payload].nil? # tombstone payload
|
58
|
+
next true if message[:payload].bytesize <= validator.max_payload_size
|
59
|
+
|
60
|
+
[[%i[payload], :max_size]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -29,15 +29,22 @@ module WaterDrop
|
|
29
29
|
# contact us as it is an error.
|
30
30
|
StatusInvalidError = Class.new(BaseError)
|
31
31
|
|
32
|
-
# Raised when during
|
33
|
-
|
34
|
-
attr_reader :dispatched_messages
|
32
|
+
# Raised when there is an inline error during single message produce operations
|
33
|
+
ProduceError = Class.new(BaseError)
|
35
34
|
|
36
|
-
|
35
|
+
# Raise it within a transaction to abort it
|
36
|
+
AbortTransaction = Class.new(BaseError)
|
37
|
+
|
38
|
+
# Raised when during messages producing something bad happened inline
|
39
|
+
class ProduceManyError < ProduceError
|
40
|
+
attr_reader :dispatched
|
41
|
+
|
42
|
+
# @param dispatched [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
|
37
43
|
# messages that we've dispatched
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
# @param message [String] error message
|
45
|
+
def initialize(dispatched, message)
|
46
|
+
super(message)
|
47
|
+
@dispatched = dispatched
|
41
48
|
end
|
42
49
|
end
|
43
50
|
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Creates a callable that we want to run upon each message delivery or failure
|
7
|
+
#
|
8
|
+
# @note We don't have to provide client_name here as this callback is per client instance
|
9
|
+
#
|
10
|
+
# @note We do not consider `message.purge` as an error for transactional producers, because
|
11
|
+
# this is a standard behaviour for not yet dispatched messages on aborted transactions.
|
12
|
+
# We do however still want to instrument it for traceability.
|
13
|
+
class Delivery
|
14
|
+
# Error emitted when a message was not yet dispatched and was purged from the queue
|
15
|
+
RD_KAFKA_RESP_PURGE_QUEUE = -152
|
16
|
+
|
17
|
+
# Error emitted when a message was purged while it was dispatched
|
18
|
+
RD_KAFKA_RESP_PURGE_INFLIGHT = -151
|
19
|
+
|
20
|
+
# Errors related to queue purging that is expected in transactions
|
21
|
+
PURGE_ERRORS = [RD_KAFKA_RESP_PURGE_INFLIGHT, RD_KAFKA_RESP_PURGE_QUEUE].freeze
|
22
|
+
|
23
|
+
private_constant :RD_KAFKA_RESP_PURGE_QUEUE, :RD_KAFKA_RESP_PURGE_INFLIGHT, :PURGE_ERRORS
|
24
|
+
|
25
|
+
# @param producer_id [String] id of the current producer
|
26
|
+
# @param transactional [Boolean] is this handle for a transactional or regular producer
|
27
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
|
28
|
+
def initialize(producer_id, transactional, monitor)
|
29
|
+
@producer_id = producer_id
|
30
|
+
@transactional = transactional
|
31
|
+
@monitor = monitor
|
32
|
+
end
|
33
|
+
|
34
|
+
# Emits delivery details to the monitor
|
35
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
36
|
+
def call(delivery_report)
|
37
|
+
error_code = delivery_report.error.to_i
|
38
|
+
|
39
|
+
if error_code.zero?
|
40
|
+
instrument_acknowledged(delivery_report)
|
41
|
+
|
42
|
+
elsif @transactional && PURGE_ERRORS.include?(error_code)
|
43
|
+
instrument_purged(delivery_report)
|
44
|
+
else
|
45
|
+
instrument_error(delivery_report)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
52
|
+
def instrument_acknowledged(delivery_report)
|
53
|
+
@monitor.instrument(
|
54
|
+
'message.acknowledged',
|
55
|
+
caller: self,
|
56
|
+
producer_id: @producer_id,
|
57
|
+
offset: delivery_report.offset,
|
58
|
+
partition: delivery_report.partition,
|
59
|
+
topic: delivery_report.topic_name,
|
60
|
+
delivery_report: delivery_report
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
65
|
+
def instrument_purged(delivery_report)
|
66
|
+
@monitor.instrument(
|
67
|
+
'message.purged',
|
68
|
+
caller: self,
|
69
|
+
error: build_error(delivery_report),
|
70
|
+
producer_id: @producer_id,
|
71
|
+
offset: delivery_report.offset,
|
72
|
+
partition: delivery_report.partition,
|
73
|
+
topic: delivery_report.topic_name,
|
74
|
+
delivery_report: delivery_report
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
79
|
+
def instrument_error(delivery_report)
|
80
|
+
@monitor.instrument(
|
81
|
+
'error.occurred',
|
82
|
+
caller: self,
|
83
|
+
error: build_error(delivery_report),
|
84
|
+
producer_id: @producer_id,
|
85
|
+
offset: delivery_report.offset,
|
86
|
+
partition: delivery_report.partition,
|
87
|
+
topic: delivery_report.topic_name,
|
88
|
+
delivery_report: delivery_report,
|
89
|
+
type: 'librdkafka.dispatch_error'
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Builds appropriate rdkafka error
|
94
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
95
|
+
# @return [::Rdkafka::RdkafkaError]
|
96
|
+
def build_error(delivery_report)
|
97
|
+
::Rdkafka::RdkafkaError.new(delivery_report.error)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -18,15 +18,19 @@ module WaterDrop
|
|
18
18
|
# @param client_name [String] rdkafka client name
|
19
19
|
# @param error [Rdkafka::Error] error that occurred
|
20
20
|
# @note It will only instrument on errors of the client of our producer
|
21
|
+
# @note When there is a particular message produce error (not internal error), the error
|
22
|
+
# is shipped via the delivery callback, not via error callback.
|
21
23
|
def call(client_name, error)
|
22
24
|
# Emit only errors related to our client
|
23
25
|
# Same as with statistics (mor explanation there)
|
24
26
|
return unless @client_name == client_name
|
25
27
|
|
26
28
|
@monitor.instrument(
|
27
|
-
'error.
|
29
|
+
'error.occurred',
|
30
|
+
caller: self,
|
31
|
+
error: error,
|
28
32
|
producer_id: @producer_id,
|
29
|
-
|
33
|
+
type: 'librdkafka.error'
|
30
34
|
)
|
31
35
|
end
|
32
36
|
end
|
@@ -17,7 +17,7 @@ module WaterDrop
|
|
17
17
|
@producer_id = producer_id
|
18
18
|
@client_name = client_name
|
19
19
|
@monitor = monitor
|
20
|
-
@statistics_decorator = StatisticsDecorator.new
|
20
|
+
@statistics_decorator = ::Karafka::Core::Monitoring::StatisticsDecorator.new
|
21
21
|
end
|
22
22
|
|
23
23
|
# Emits decorated statistics to the monitor
|
@@ -1,15 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module WaterDrop
|
4
|
+
# WaterDrop instrumentation related module
|
4
5
|
module Instrumentation
|
5
6
|
# Default listener that hooks up to our instrumentation and uses its events for logging
|
6
7
|
# It can be removed/replaced or anything without any harm to the Waterdrop flow
|
7
8
|
# @note It is a module as we can use it then as a part of the Karafka framework listener
|
8
9
|
# as well as we can use it standalone
|
9
|
-
class
|
10
|
-
# @param logger [Object]
|
11
|
-
|
10
|
+
class LoggerListener
|
11
|
+
# @param logger [Object] logger we want to use
|
12
|
+
# @param log_messages [Boolean] Should we report the messages content (payload and metadata)
|
13
|
+
# with each message operation.
|
14
|
+
#
|
15
|
+
# This can be extensive, especially when producing a lot of messages. We provide this
|
16
|
+
# despite the fact that we only report payloads in debug, because Rails by default operates
|
17
|
+
# with debug level. This means, that when working with Rails in development, every single
|
18
|
+
# payload dispatched will go to logs. In majority of the cases this is extensive and simply
|
19
|
+
# floods the end user.
|
20
|
+
def initialize(logger, log_messages: true)
|
12
21
|
@logger = logger
|
22
|
+
@log_messages = log_messages
|
13
23
|
end
|
14
24
|
|
15
25
|
# @param event [Dry::Events::Event] event that happened with the details
|
@@ -17,6 +27,9 @@ module WaterDrop
|
|
17
27
|
message = event[:message]
|
18
28
|
|
19
29
|
info(event, "Async producing of a message to '#{message[:topic]}' topic")
|
30
|
+
|
31
|
+
return unless log_messages?
|
32
|
+
|
20
33
|
debug(event, message)
|
21
34
|
end
|
22
35
|
|
@@ -25,6 +38,9 @@ module WaterDrop
|
|
25
38
|
message = event[:message]
|
26
39
|
|
27
40
|
info(event, "Sync producing of a message to '#{message[:topic]}' topic")
|
41
|
+
|
42
|
+
return unless log_messages?
|
43
|
+
|
28
44
|
debug(event, message)
|
29
45
|
end
|
30
46
|
|
@@ -34,6 +50,9 @@ module WaterDrop
|
|
34
50
|
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
35
51
|
|
36
52
|
info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
|
53
|
+
|
54
|
+
return unless log_messages?
|
55
|
+
|
37
56
|
debug(event, messages)
|
38
57
|
end
|
39
58
|
|
@@ -43,6 +62,9 @@ module WaterDrop
|
|
43
62
|
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
44
63
|
|
45
64
|
info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
|
65
|
+
|
66
|
+
return unless log_messages?
|
67
|
+
|
46
68
|
debug(event, messages)
|
47
69
|
end
|
48
70
|
|
@@ -51,6 +73,9 @@ module WaterDrop
|
|
51
73
|
message = event[:message]
|
52
74
|
|
53
75
|
info(event, "Buffering of a message to '#{message[:topic]}' topic")
|
76
|
+
|
77
|
+
return unless log_messages?
|
78
|
+
|
54
79
|
debug(event, [message])
|
55
80
|
end
|
56
81
|
|
@@ -59,6 +84,9 @@ module WaterDrop
|
|
59
84
|
messages = event[:messages]
|
60
85
|
|
61
86
|
info(event, "Buffering of #{messages.size} messages")
|
87
|
+
|
88
|
+
return unless log_messages?
|
89
|
+
|
62
90
|
debug(event, [messages, messages.size])
|
63
91
|
end
|
64
92
|
|
@@ -67,15 +95,9 @@ module WaterDrop
|
|
67
95
|
messages = event[:messages]
|
68
96
|
|
69
97
|
info(event, "Async flushing of #{messages.size} messages from the buffer")
|
70
|
-
debug(event, messages)
|
71
|
-
end
|
72
98
|
|
73
|
-
|
74
|
-
def on_buffer_flushed_async_error(event)
|
75
|
-
messages = event[:messages]
|
76
|
-
error = event[:error]
|
99
|
+
return unless log_messages?
|
77
100
|
|
78
|
-
error(event, "Async flushing of #{messages.size} failed due to: #{error}")
|
79
101
|
debug(event, messages)
|
80
102
|
end
|
81
103
|
|
@@ -84,34 +106,57 @@ module WaterDrop
|
|
84
106
|
messages = event[:messages]
|
85
107
|
|
86
108
|
info(event, "Sync flushing of #{messages.size} messages from the buffer")
|
109
|
+
|
110
|
+
return unless log_messages?
|
111
|
+
|
87
112
|
debug(event, messages)
|
88
113
|
end
|
89
114
|
|
90
115
|
# @param event [Dry::Events::Event] event that happened with the details
|
91
|
-
def
|
92
|
-
|
93
|
-
error = event[:error]
|
94
|
-
|
95
|
-
error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
|
96
|
-
debug(event, messages)
|
116
|
+
def on_buffer_purged(event)
|
117
|
+
info(event, 'Successfully purging buffer')
|
97
118
|
end
|
98
119
|
|
99
120
|
# @param event [Dry::Events::Event] event that happened with the details
|
100
121
|
def on_producer_closed(event)
|
101
|
-
info
|
102
|
-
debug event, ''
|
122
|
+
info(event, 'Closing producer')
|
103
123
|
end
|
104
124
|
|
105
125
|
# @param event [Dry::Events::Event] event that happened with the error details
|
106
|
-
def
|
126
|
+
def on_error_occurred(event)
|
107
127
|
error = event[:error]
|
128
|
+
type = event[:type]
|
129
|
+
|
130
|
+
error(event, "Error occurred: #{error} - #{type}")
|
131
|
+
end
|
132
|
+
|
133
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
134
|
+
def on_transaction_started(event)
|
135
|
+
info(event, 'Starting transaction')
|
136
|
+
end
|
108
137
|
|
109
|
-
|
110
|
-
|
138
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
139
|
+
def on_transaction_aborted(event)
|
140
|
+
info(event, 'Aborting transaction')
|
141
|
+
end
|
142
|
+
|
143
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
144
|
+
def on_transaction_committed(event)
|
145
|
+
info(event, 'Committing transaction')
|
146
|
+
end
|
147
|
+
|
148
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
149
|
+
def on_transaction_finished(event)
|
150
|
+
info(event, 'Processing transaction')
|
111
151
|
end
|
112
152
|
|
113
153
|
private
|
114
154
|
|
155
|
+
# @return [Boolean] should we report the messages details in the debug mode.
|
156
|
+
def log_messages?
|
157
|
+
@log_messages
|
158
|
+
end
|
159
|
+
|
115
160
|
# @param event [Dry::Events::Event] event that happened with the details
|
116
161
|
# @param log_message [String] message we want to publish
|
117
162
|
def debug(event, log_message)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
# WaterDrop instrumentation monitor that we use to publish events
|
6
|
+
# By default uses our internal notifications bus but can be used with
|
7
|
+
# `ActiveSupport::Notifications` as well
|
8
|
+
class Monitor < ::Karafka::Core::Monitoring::Monitor
|
9
|
+
# @param notifications_bus [Object] either our internal notifications bus or
|
10
|
+
# `ActiveSupport::Notifications`
|
11
|
+
# @param namespace [String, nil] namespace for events or nil if no namespace
|
12
|
+
def initialize(
|
13
|
+
notifications_bus = WaterDrop::Instrumentation::Notifications.new,
|
14
|
+
namespace = nil
|
15
|
+
)
|
16
|
+
super(notifications_bus, namespace)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb}
RENAMED
@@ -2,13 +2,8 @@
|
|
2
2
|
|
3
3
|
module WaterDrop
|
4
4
|
module Instrumentation
|
5
|
-
#
|
6
|
-
|
7
|
-
# same time, which means that you might have for example file logging and NewRelic at the same
|
8
|
-
# time
|
9
|
-
# @note This class acts as a singleton because we are only permitted to have single monitor
|
10
|
-
# per running process (just as logger)
|
11
|
-
class Monitor < Dry::Monitor::Notifications
|
5
|
+
# Instrumented is used to hookup external monitoring services to monitor how WaterDrop works
|
6
|
+
class Notifications < ::Karafka::Core::Monitoring::Notifications
|
12
7
|
# List of events that we support in the system and to which a monitor client can hook up
|
13
8
|
# @note The non-error once support timestamp benchmarking
|
14
9
|
EVENTS = %w[
|
@@ -17,28 +12,31 @@ module WaterDrop
|
|
17
12
|
message.produced_async
|
18
13
|
message.produced_sync
|
19
14
|
message.acknowledged
|
15
|
+
message.purged
|
20
16
|
message.buffered
|
21
17
|
|
22
18
|
messages.produced_async
|
23
19
|
messages.produced_sync
|
24
20
|
messages.buffered
|
25
21
|
|
22
|
+
transaction.started
|
23
|
+
transaction.committed
|
24
|
+
transaction.aborted
|
25
|
+
transaction.finished
|
26
|
+
|
26
27
|
buffer.flushed_async
|
27
|
-
buffer.flushed_async.error
|
28
28
|
buffer.flushed_sync
|
29
|
-
buffer.
|
29
|
+
buffer.purged
|
30
30
|
|
31
31
|
statistics.emitted
|
32
32
|
|
33
|
-
error.
|
33
|
+
error.occurred
|
34
34
|
].freeze
|
35
35
|
|
36
|
-
private_constant :EVENTS
|
37
|
-
|
38
36
|
# @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
|
39
37
|
def initialize
|
40
|
-
super
|
41
|
-
EVENTS.each(
|
38
|
+
super
|
39
|
+
EVENTS.each { |event| register_event(event) }
|
42
40
|
end
|
43
41
|
end
|
44
42
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"WaterDrop producer example dashboard","description":"This dashboard include example setup for monitoring activity of your WaterDrop producer","widgets":[{"id":243951318,"definition":{"title":"Messages produced","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"produced sync","formula":"query1"},{"alias":"produced async","formula":"query2"},{"alias":"flushed sync","formula":"query3"},{"alias":"flushed async","formula":"query4"},{"alias":"acknowledged","formula":"query5"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.produced_sync{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:waterdrop.produced_async{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:waterdrop.flushed_sync{*}.as_count()","data_source":"metrics","name":"query3"},{"query":"sum:waterdrop.flushed_async{*}.as_count()","data_source":"metrics","name":"query4"},{"query":"sum:waterdrop.acknowledged{*}.as_count()","data_source":"metrics","name":"query5"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":1979626566852990,"definition":{"title":"Messages buffer size","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.buffer.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":243951221,"definition":{"title":"Kafka broker API calls","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"API calls","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.calls{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951952,"definition":{"title":"Producer queue size","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Queue size average","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Queue size max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951263,"definition":{"title":"Producer queue latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951276,"definition":{"title":"Producer network latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.request_size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243954928,"definition":{"title":"Producer errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.error_occurred{*}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"auto","id":"rnr-kgh-dna"}
|