waterdrop 2.0.7 → 2.6.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +22 -11
- data/.ruby-version +1 -1
- data/CHANGELOG.md +200 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +32 -75
- data/README.md +22 -275
- data/certs/cert_chain.pem +26 -0
- data/config/locales/errors.yml +33 -0
- data/docker-compose.yml +19 -12
- data/lib/waterdrop/clients/buffered.rb +90 -0
- data/lib/waterdrop/clients/dummy.rb +69 -0
- data/lib/waterdrop/clients/rdkafka.rb +34 -0
- data/lib/{water_drop → waterdrop}/config.rb +39 -16
- data/lib/waterdrop/contracts/config.rb +43 -0
- data/lib/waterdrop/contracts/message.rb +64 -0
- data/lib/{water_drop → waterdrop}/errors.rb +14 -7
- data/lib/waterdrop/instrumentation/callbacks/delivery.rb +102 -0
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/error.rb +6 -2
- data/lib/{water_drop → waterdrop}/instrumentation/callbacks/statistics.rb +1 -1
- data/lib/{water_drop/instrumentation/stdout_listener.rb → waterdrop/instrumentation/logger_listener.rb} +66 -21
- data/lib/waterdrop/instrumentation/monitor.rb +20 -0
- data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb} +12 -14
- data/lib/waterdrop/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +210 -0
- data/lib/waterdrop/middleware.rb +50 -0
- data/lib/{water_drop → waterdrop}/producer/async.rb +40 -4
- data/lib/{water_drop → waterdrop}/producer/buffer.rb +12 -30
- data/lib/{water_drop → waterdrop}/producer/builder.rb +6 -11
- data/lib/{water_drop → waterdrop}/producer/sync.rb +44 -15
- data/lib/waterdrop/producer/transactions.rb +170 -0
- data/lib/waterdrop/producer.rb +308 -0
- data/lib/{water_drop → waterdrop}/version.rb +1 -1
- data/lib/waterdrop.rb +28 -2
- data/renovate.json +6 -0
- data/waterdrop.gemspec +14 -11
- data.tar.gz.sig +0 -0
- metadata +71 -111
- metadata.gz.sig +0 -0
- data/certs/mensfeld.pem +0 -25
- data/config/errors.yml +0 -6
- data/lib/water_drop/contracts/config.rb +0 -26
- data/lib/water_drop/contracts/message.rb +0 -42
- data/lib/water_drop/instrumentation/callbacks/delivery.rb +0 -30
- data/lib/water_drop/instrumentation/callbacks/statistics_decorator.rb +0 -77
- data/lib/water_drop/instrumentation/callbacks_manager.rb +0 -39
- data/lib/water_drop/instrumentation.rb +0 -20
- data/lib/water_drop/patches/rdkafka/bindings.rb +0 -42
- data/lib/water_drop/patches/rdkafka/producer.rb +0 -20
- data/lib/water_drop/producer/dummy_client.rb +0 -32
- data/lib/water_drop/producer.rb +0 -162
- data/lib/water_drop.rb +0 -36
- /data/lib/{water_drop → waterdrop}/contracts.rb +0 -0
- /data/lib/{water_drop → waterdrop}/producer/status.rb +0 -0
@@ -5,11 +5,14 @@
|
|
5
5
|
module WaterDrop
|
6
6
|
# Configuration object for setting up all options required by WaterDrop
|
7
7
|
class Config
|
8
|
-
include
|
8
|
+
include ::Karafka::Core::Configurable
|
9
9
|
|
10
10
|
# Defaults for kafka settings, that will be overwritten only if not present already
|
11
11
|
KAFKA_DEFAULTS = {
|
12
|
-
'client.id'
|
12
|
+
'client.id': 'waterdrop',
|
13
|
+
# emit librdkafka statistics every five seconds. This is used in instrumentation.
|
14
|
+
# When disabled, part of metrics will not be published and available.
|
15
|
+
'statistics.interval.ms': 5_000
|
13
16
|
}.freeze
|
14
17
|
|
15
18
|
private_constant :KAFKA_DEFAULTS
|
@@ -22,7 +25,7 @@ module WaterDrop
|
|
22
25
|
setting(
|
23
26
|
:id,
|
24
27
|
default: false,
|
25
|
-
constructor: ->(id) { id || SecureRandom.
|
28
|
+
constructor: ->(id) { id || SecureRandom.hex(6) }
|
26
29
|
)
|
27
30
|
# option [Instance] logger that we want to use
|
28
31
|
# @note Due to how rdkafka works, this setting is global for all the producers
|
@@ -47,13 +50,41 @@ module WaterDrop
|
|
47
50
|
# delivery report. In a really robust systems, this describes the min-delivery time
|
48
51
|
# for a single sync message when produced in isolation
|
49
52
|
setting :wait_timeout, default: 0.005 # 5 milliseconds
|
53
|
+
# option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
|
54
|
+
# we raise an exception.
|
55
|
+
# When this is set to `true`, upon full queue, we won't raise an error. There will be error
|
56
|
+
# in the `error.occurred` notification pipeline with a proper type as while this is
|
57
|
+
# recoverable, in a high number it still may mean issues.
|
58
|
+
# Waiting is one of the recommended strategies.
|
59
|
+
setting :wait_on_queue_full, default: true
|
60
|
+
# option [Integer] how long (in seconds) should we backoff before a retry when queue is full
|
61
|
+
# The retry will happen with the same message and backoff should give us some time to
|
62
|
+
# dispatch previously buffered messages.
|
63
|
+
setting :wait_backoff_on_queue_full, default: 0.1
|
64
|
+
# option [Numeric] how many seconds should we wait with the backoff on queue having space for
|
65
|
+
# more messages before re-raising the error.
|
66
|
+
setting :wait_timeout_on_queue_full, default: 10
|
67
|
+
# option [Numeric] How long to wait before retrying a retryable transaction related error
|
68
|
+
setting :wait_backoff_on_transaction_command, default: 0.5
|
69
|
+
# option [Numeric] How many times to retry a retryable transaction related error before
|
70
|
+
# giving up
|
71
|
+
setting :max_attempts_on_transaction_command, default: 5
|
72
|
+
|
50
73
|
# option [Boolean] should we send messages. Setting this to false can be really useful when
|
51
74
|
# testing and or developing because when set to false, won't actually ping Kafka but will
|
52
75
|
# run all the validations, etc
|
53
76
|
setting :deliver, default: true
|
77
|
+
# option [Class] class for usage when creating the underlying client used to dispatch messages
|
78
|
+
setting :client_class, default: Clients::Rdkafka
|
54
79
|
# rdkafka options
|
55
80
|
# @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
56
81
|
setting :kafka, default: {}
|
82
|
+
# Middleware chain that can be expanded with useful middleware steps
|
83
|
+
setting(
|
84
|
+
:middleware,
|
85
|
+
default: false,
|
86
|
+
constructor: ->(middleware) { middleware || WaterDrop::Middleware.new }
|
87
|
+
)
|
57
88
|
|
58
89
|
# Configuration method
|
59
90
|
# @yield Runs a block of code providing a config singleton instance to it
|
@@ -63,10 +94,13 @@ module WaterDrop
|
|
63
94
|
yield(config)
|
64
95
|
|
65
96
|
merge_kafka_defaults!(config)
|
66
|
-
|
97
|
+
|
98
|
+
Contracts::Config.new.validate!(config.to_h, Errors::ConfigurationInvalidError)
|
67
99
|
|
68
100
|
::Rdkafka::Config.logger = config.logger
|
69
101
|
end
|
102
|
+
|
103
|
+
self
|
70
104
|
end
|
71
105
|
|
72
106
|
private
|
@@ -74,7 +108,7 @@ module WaterDrop
|
|
74
108
|
# Propagates the kafka setting defaults unless they are already present
|
75
109
|
# This makes it easier to set some values that users usually don't change but still allows them
|
76
110
|
# to overwrite the whole hash if they want to
|
77
|
-
# @param config [
|
111
|
+
# @param config [Karafka::Core::Configurable::Node] config of this producer
|
78
112
|
def merge_kafka_defaults!(config)
|
79
113
|
KAFKA_DEFAULTS.each do |key, value|
|
80
114
|
next if config.kafka.key?(key)
|
@@ -82,16 +116,5 @@ module WaterDrop
|
|
82
116
|
config.kafka[key] = value
|
83
117
|
end
|
84
118
|
end
|
85
|
-
|
86
|
-
# Validates the configuration and if anything is wrong, will raise an exception
|
87
|
-
# @param config_hash [Hash] config hash with setup details
|
88
|
-
# @raise [WaterDrop::Errors::ConfigurationInvalidError] raised when something is wrong with
|
89
|
-
# the configuration
|
90
|
-
def validate!(config_hash)
|
91
|
-
result = Contracts::Config.new.call(config_hash)
|
92
|
-
return true if result.success?
|
93
|
-
|
94
|
-
raise Errors::ConfigurationInvalidError, result.errors.to_h
|
95
|
-
end
|
96
119
|
end
|
97
120
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Contracts
|
5
|
+
# Contract with validation rules for WaterDrop configuration details
|
6
|
+
class Config < ::Karafka::Core::Contractable::Contract
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('config')
|
13
|
+
end
|
14
|
+
|
15
|
+
required(:id) { |val| val.is_a?(String) && !val.empty? }
|
16
|
+
required(:logger) { |val| !val.nil? }
|
17
|
+
required(:deliver) { |val| [true, false].include?(val) }
|
18
|
+
required(:max_payload_size) { |val| val.is_a?(Integer) && val >= 1 }
|
19
|
+
required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
|
20
|
+
required(:wait_timeout) { |val| val.is_a?(Numeric) && val.positive? }
|
21
|
+
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
22
|
+
required(:wait_on_queue_full) { |val| [true, false].include?(val) }
|
23
|
+
required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
24
|
+
required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
|
25
|
+
|
26
|
+
# rdkafka allows both symbols and strings as keys for config but then casts them to strings
|
27
|
+
# This can be confusing, so we expect all keys to be symbolized
|
28
|
+
virtual do |config, errors|
|
29
|
+
next true unless errors.empty?
|
30
|
+
|
31
|
+
errors = []
|
32
|
+
|
33
|
+
config
|
34
|
+
.fetch(:kafka)
|
35
|
+
.keys
|
36
|
+
.reject { |key| key.is_a?(Symbol) }
|
37
|
+
.each { |key| errors << [[:kafka, key], :kafka_key_must_be_a_symbol] }
|
38
|
+
|
39
|
+
errors
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Contracts
|
5
|
+
# Contract with validation rules for validating that all the message options that
|
6
|
+
# we provide to producer are valid and usable
|
7
|
+
class Message < ::Karafka::Core::Contractable::Contract
|
8
|
+
configure do |config|
|
9
|
+
config.error_messages = YAML.safe_load(
|
10
|
+
File.read(
|
11
|
+
File.join(WaterDrop.gem_root, 'config', 'locales', 'errors.yml')
|
12
|
+
)
|
13
|
+
).fetch('en').fetch('validations').fetch('message')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Regex to check that topic has a valid format
|
17
|
+
TOPIC_REGEXP = /\A(\w|-|\.)+\z/
|
18
|
+
|
19
|
+
private_constant :TOPIC_REGEXP
|
20
|
+
|
21
|
+
attr_reader :max_payload_size
|
22
|
+
|
23
|
+
# @param max_payload_size [Integer] max payload size
|
24
|
+
def initialize(max_payload_size:)
|
25
|
+
super()
|
26
|
+
@max_payload_size = max_payload_size
|
27
|
+
end
|
28
|
+
|
29
|
+
required(:topic) do |val|
|
30
|
+
(val.is_a?(String) || val.is_a?(Symbol)) && TOPIC_REGEXP.match?(val.to_s)
|
31
|
+
end
|
32
|
+
|
33
|
+
required(:payload) { |val| val.nil? || val.is_a?(String) }
|
34
|
+
optional(:key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
|
35
|
+
optional(:partition) { |val| val.is_a?(Integer) && val >= -1 }
|
36
|
+
optional(:partition_key) { |val| val.nil? || (val.is_a?(String) && !val.empty?) }
|
37
|
+
optional(:timestamp) { |val| val.nil? || (val.is_a?(Time) || val.is_a?(Integer)) }
|
38
|
+
optional(:headers) { |val| val.nil? || val.is_a?(Hash) }
|
39
|
+
|
40
|
+
virtual do |message, errors|
|
41
|
+
next true unless errors.empty?
|
42
|
+
next true unless message.key?(:headers)
|
43
|
+
next true if message[:headers].nil?
|
44
|
+
|
45
|
+
errors = []
|
46
|
+
|
47
|
+
message.fetch(:headers).each do |key, value|
|
48
|
+
errors << [%i[headers], :invalid_key_type] unless key.is_a?(String)
|
49
|
+
errors << [%i[headers], :invalid_value_type] unless value.is_a?(String)
|
50
|
+
end
|
51
|
+
|
52
|
+
errors
|
53
|
+
end
|
54
|
+
|
55
|
+
virtual do |message, errors, validator|
|
56
|
+
next true unless errors.empty?
|
57
|
+
next if message[:payload].nil? # tombstone payload
|
58
|
+
next true if message[:payload].bytesize <= validator.max_payload_size
|
59
|
+
|
60
|
+
[[%i[payload], :max_size]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -29,15 +29,22 @@ module WaterDrop
|
|
29
29
|
# contact us as it is an error.
|
30
30
|
StatusInvalidError = Class.new(BaseError)
|
31
31
|
|
32
|
-
# Raised when during
|
33
|
-
|
34
|
-
attr_reader :dispatched_messages
|
32
|
+
# Raised when there is an inline error during single message produce operations
|
33
|
+
ProduceError = Class.new(BaseError)
|
35
34
|
|
36
|
-
|
35
|
+
# Raise it within a transaction to abort it
|
36
|
+
AbortTransaction = Class.new(BaseError)
|
37
|
+
|
38
|
+
# Raised when during messages producing something bad happened inline
|
39
|
+
class ProduceManyError < ProduceError
|
40
|
+
attr_reader :dispatched
|
41
|
+
|
42
|
+
# @param dispatched [Array<Rdkafka::Producer::DeliveryHandle>] handlers of the
|
37
43
|
# messages that we've dispatched
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
# @param message [String] error message
|
45
|
+
def initialize(dispatched, message)
|
46
|
+
super(message)
|
47
|
+
@dispatched = dispatched
|
41
48
|
end
|
42
49
|
end
|
43
50
|
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Creates a callable that we want to run upon each message delivery or failure
|
7
|
+
#
|
8
|
+
# @note We don't have to provide client_name here as this callback is per client instance
|
9
|
+
#
|
10
|
+
# @note We do not consider `message.purge` as an error for transactional producers, because
|
11
|
+
# this is a standard behaviour for not yet dispatched messages on aborted transactions.
|
12
|
+
# We do however still want to instrument it for traceability.
|
13
|
+
class Delivery
|
14
|
+
# Error emitted when a message was not yet dispatched and was purged from the queue
|
15
|
+
RD_KAFKA_RESP_PURGE_QUEUE = -152
|
16
|
+
|
17
|
+
# Error emitted when a message was purged while it was dispatched
|
18
|
+
RD_KAFKA_RESP_PURGE_INFLIGHT = -151
|
19
|
+
|
20
|
+
# Errors related to queue purging that is expected in transactions
|
21
|
+
PURGE_ERRORS = [RD_KAFKA_RESP_PURGE_INFLIGHT, RD_KAFKA_RESP_PURGE_QUEUE].freeze
|
22
|
+
|
23
|
+
private_constant :RD_KAFKA_RESP_PURGE_QUEUE, :RD_KAFKA_RESP_PURGE_INFLIGHT, :PURGE_ERRORS
|
24
|
+
|
25
|
+
# @param producer_id [String] id of the current producer
|
26
|
+
# @param transactional [Boolean] is this handle for a transactional or regular producer
|
27
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
|
28
|
+
def initialize(producer_id, transactional, monitor)
|
29
|
+
@producer_id = producer_id
|
30
|
+
@transactional = transactional
|
31
|
+
@monitor = monitor
|
32
|
+
end
|
33
|
+
|
34
|
+
# Emits delivery details to the monitor
|
35
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
36
|
+
def call(delivery_report)
|
37
|
+
error_code = delivery_report.error.to_i
|
38
|
+
|
39
|
+
if error_code.zero?
|
40
|
+
instrument_acknowledged(delivery_report)
|
41
|
+
|
42
|
+
elsif @transactional && PURGE_ERRORS.include?(error_code)
|
43
|
+
instrument_purged(delivery_report)
|
44
|
+
else
|
45
|
+
instrument_error(delivery_report)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
52
|
+
def instrument_acknowledged(delivery_report)
|
53
|
+
@monitor.instrument(
|
54
|
+
'message.acknowledged',
|
55
|
+
caller: self,
|
56
|
+
producer_id: @producer_id,
|
57
|
+
offset: delivery_report.offset,
|
58
|
+
partition: delivery_report.partition,
|
59
|
+
topic: delivery_report.topic_name,
|
60
|
+
delivery_report: delivery_report
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
65
|
+
def instrument_purged(delivery_report)
|
66
|
+
@monitor.instrument(
|
67
|
+
'message.purged',
|
68
|
+
caller: self,
|
69
|
+
error: build_error(delivery_report),
|
70
|
+
producer_id: @producer_id,
|
71
|
+
offset: delivery_report.offset,
|
72
|
+
partition: delivery_report.partition,
|
73
|
+
topic: delivery_report.topic_name,
|
74
|
+
delivery_report: delivery_report
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
79
|
+
def instrument_error(delivery_report)
|
80
|
+
@monitor.instrument(
|
81
|
+
'error.occurred',
|
82
|
+
caller: self,
|
83
|
+
error: build_error(delivery_report),
|
84
|
+
producer_id: @producer_id,
|
85
|
+
offset: delivery_report.offset,
|
86
|
+
partition: delivery_report.partition,
|
87
|
+
topic: delivery_report.topic_name,
|
88
|
+
delivery_report: delivery_report,
|
89
|
+
type: 'librdkafka.dispatch_error'
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Builds appropriate rdkafka error
|
94
|
+
# @param delivery_report [Rdkafka::Producer::DeliveryReport] delivery report
|
95
|
+
# @return [::Rdkafka::RdkafkaError]
|
96
|
+
def build_error(delivery_report)
|
97
|
+
::Rdkafka::RdkafkaError.new(delivery_report.error)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -18,15 +18,19 @@ module WaterDrop
|
|
18
18
|
# @param client_name [String] rdkafka client name
|
19
19
|
# @param error [Rdkafka::Error] error that occurred
|
20
20
|
# @note It will only instrument on errors of the client of our producer
|
21
|
+
# @note When there is a particular message produce error (not internal error), the error
|
22
|
+
# is shipped via the delivery callback, not via error callback.
|
21
23
|
def call(client_name, error)
|
22
24
|
# Emit only errors related to our client
|
23
25
|
# Same as with statistics (mor explanation there)
|
24
26
|
return unless @client_name == client_name
|
25
27
|
|
26
28
|
@monitor.instrument(
|
27
|
-
'error.
|
29
|
+
'error.occurred',
|
30
|
+
caller: self,
|
31
|
+
error: error,
|
28
32
|
producer_id: @producer_id,
|
29
|
-
|
33
|
+
type: 'librdkafka.error'
|
30
34
|
)
|
31
35
|
end
|
32
36
|
end
|
@@ -17,7 +17,7 @@ module WaterDrop
|
|
17
17
|
@producer_id = producer_id
|
18
18
|
@client_name = client_name
|
19
19
|
@monitor = monitor
|
20
|
-
@statistics_decorator = StatisticsDecorator.new
|
20
|
+
@statistics_decorator = ::Karafka::Core::Monitoring::StatisticsDecorator.new
|
21
21
|
end
|
22
22
|
|
23
23
|
# Emits decorated statistics to the monitor
|
@@ -1,15 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module WaterDrop
|
4
|
+
# WaterDrop instrumentation related module
|
4
5
|
module Instrumentation
|
5
6
|
# Default listener that hooks up to our instrumentation and uses its events for logging
|
6
7
|
# It can be removed/replaced or anything without any harm to the Waterdrop flow
|
7
8
|
# @note It is a module as we can use it then as a part of the Karafka framework listener
|
8
9
|
# as well as we can use it standalone
|
9
|
-
class
|
10
|
-
# @param logger [Object]
|
11
|
-
|
10
|
+
class LoggerListener
|
11
|
+
# @param logger [Object] logger we want to use
|
12
|
+
# @param log_messages [Boolean] Should we report the messages content (payload and metadata)
|
13
|
+
# with each message operation.
|
14
|
+
#
|
15
|
+
# This can be extensive, especially when producing a lot of messages. We provide this
|
16
|
+
# despite the fact that we only report payloads in debug, because Rails by default operates
|
17
|
+
# with debug level. This means, that when working with Rails in development, every single
|
18
|
+
# payload dispatched will go to logs. In majority of the cases this is extensive and simply
|
19
|
+
# floods the end user.
|
20
|
+
def initialize(logger, log_messages: true)
|
12
21
|
@logger = logger
|
22
|
+
@log_messages = log_messages
|
13
23
|
end
|
14
24
|
|
15
25
|
# @param event [Dry::Events::Event] event that happened with the details
|
@@ -17,6 +27,9 @@ module WaterDrop
|
|
17
27
|
message = event[:message]
|
18
28
|
|
19
29
|
info(event, "Async producing of a message to '#{message[:topic]}' topic")
|
30
|
+
|
31
|
+
return unless log_messages?
|
32
|
+
|
20
33
|
debug(event, message)
|
21
34
|
end
|
22
35
|
|
@@ -25,6 +38,9 @@ module WaterDrop
|
|
25
38
|
message = event[:message]
|
26
39
|
|
27
40
|
info(event, "Sync producing of a message to '#{message[:topic]}' topic")
|
41
|
+
|
42
|
+
return unless log_messages?
|
43
|
+
|
28
44
|
debug(event, message)
|
29
45
|
end
|
30
46
|
|
@@ -34,6 +50,9 @@ module WaterDrop
|
|
34
50
|
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
35
51
|
|
36
52
|
info(event, "Async producing of #{messages.size} messages to #{topics_count} topics")
|
53
|
+
|
54
|
+
return unless log_messages?
|
55
|
+
|
37
56
|
debug(event, messages)
|
38
57
|
end
|
39
58
|
|
@@ -43,6 +62,9 @@ module WaterDrop
|
|
43
62
|
topics_count = messages.map { |message| "'#{message[:topic]}'" }.uniq.count
|
44
63
|
|
45
64
|
info(event, "Sync producing of #{messages.size} messages to #{topics_count} topics")
|
65
|
+
|
66
|
+
return unless log_messages?
|
67
|
+
|
46
68
|
debug(event, messages)
|
47
69
|
end
|
48
70
|
|
@@ -51,6 +73,9 @@ module WaterDrop
|
|
51
73
|
message = event[:message]
|
52
74
|
|
53
75
|
info(event, "Buffering of a message to '#{message[:topic]}' topic")
|
76
|
+
|
77
|
+
return unless log_messages?
|
78
|
+
|
54
79
|
debug(event, [message])
|
55
80
|
end
|
56
81
|
|
@@ -59,6 +84,9 @@ module WaterDrop
|
|
59
84
|
messages = event[:messages]
|
60
85
|
|
61
86
|
info(event, "Buffering of #{messages.size} messages")
|
87
|
+
|
88
|
+
return unless log_messages?
|
89
|
+
|
62
90
|
debug(event, [messages, messages.size])
|
63
91
|
end
|
64
92
|
|
@@ -67,15 +95,9 @@ module WaterDrop
|
|
67
95
|
messages = event[:messages]
|
68
96
|
|
69
97
|
info(event, "Async flushing of #{messages.size} messages from the buffer")
|
70
|
-
debug(event, messages)
|
71
|
-
end
|
72
98
|
|
73
|
-
|
74
|
-
def on_buffer_flushed_async_error(event)
|
75
|
-
messages = event[:messages]
|
76
|
-
error = event[:error]
|
99
|
+
return unless log_messages?
|
77
100
|
|
78
|
-
error(event, "Async flushing of #{messages.size} failed due to: #{error}")
|
79
101
|
debug(event, messages)
|
80
102
|
end
|
81
103
|
|
@@ -84,34 +106,57 @@ module WaterDrop
|
|
84
106
|
messages = event[:messages]
|
85
107
|
|
86
108
|
info(event, "Sync flushing of #{messages.size} messages from the buffer")
|
109
|
+
|
110
|
+
return unless log_messages?
|
111
|
+
|
87
112
|
debug(event, messages)
|
88
113
|
end
|
89
114
|
|
90
115
|
# @param event [Dry::Events::Event] event that happened with the details
|
91
|
-
def
|
92
|
-
|
93
|
-
error = event[:error]
|
94
|
-
|
95
|
-
error(event, "Sync flushing of #{messages.size} failed due to: #{error}")
|
96
|
-
debug(event, messages)
|
116
|
+
def on_buffer_purged(event)
|
117
|
+
info(event, 'Successfully purging buffer')
|
97
118
|
end
|
98
119
|
|
99
120
|
# @param event [Dry::Events::Event] event that happened with the details
|
100
121
|
def on_producer_closed(event)
|
101
|
-
info
|
102
|
-
debug event, ''
|
122
|
+
info(event, 'Closing producer')
|
103
123
|
end
|
104
124
|
|
105
125
|
# @param event [Dry::Events::Event] event that happened with the error details
|
106
|
-
def
|
126
|
+
def on_error_occurred(event)
|
107
127
|
error = event[:error]
|
128
|
+
type = event[:type]
|
129
|
+
|
130
|
+
error(event, "Error occurred: #{error} - #{type}")
|
131
|
+
end
|
132
|
+
|
133
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
134
|
+
def on_transaction_started(event)
|
135
|
+
info(event, 'Starting transaction')
|
136
|
+
end
|
108
137
|
|
109
|
-
|
110
|
-
|
138
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
139
|
+
def on_transaction_aborted(event)
|
140
|
+
info(event, 'Aborting transaction')
|
141
|
+
end
|
142
|
+
|
143
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
144
|
+
def on_transaction_committed(event)
|
145
|
+
info(event, 'Committing transaction')
|
146
|
+
end
|
147
|
+
|
148
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
149
|
+
def on_transaction_finished(event)
|
150
|
+
info(event, 'Processing transaction')
|
111
151
|
end
|
112
152
|
|
113
153
|
private
|
114
154
|
|
155
|
+
# @return [Boolean] should we report the messages details in the debug mode.
|
156
|
+
def log_messages?
|
157
|
+
@log_messages
|
158
|
+
end
|
159
|
+
|
115
160
|
# @param event [Dry::Events::Event] event that happened with the details
|
116
161
|
# @param log_message [String] message we want to publish
|
117
162
|
def debug(event, log_message)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
# WaterDrop instrumentation monitor that we use to publish events
|
6
|
+
# By default uses our internal notifications bus but can be used with
|
7
|
+
# `ActiveSupport::Notifications` as well
|
8
|
+
class Monitor < ::Karafka::Core::Monitoring::Monitor
|
9
|
+
# @param notifications_bus [Object] either our internal notifications bus or
|
10
|
+
# `ActiveSupport::Notifications`
|
11
|
+
# @param namespace [String, nil] namespace for events or nil if no namespace
|
12
|
+
def initialize(
|
13
|
+
notifications_bus = WaterDrop::Instrumentation::Notifications.new,
|
14
|
+
namespace = nil
|
15
|
+
)
|
16
|
+
super(notifications_bus, namespace)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/{water_drop/instrumentation/monitor.rb → waterdrop/instrumentation/notifications.rb}
RENAMED
@@ -2,13 +2,8 @@
|
|
2
2
|
|
3
3
|
module WaterDrop
|
4
4
|
module Instrumentation
|
5
|
-
#
|
6
|
-
|
7
|
-
# same time, which means that you might have for example file logging and NewRelic at the same
|
8
|
-
# time
|
9
|
-
# @note This class acts as a singleton because we are only permitted to have single monitor
|
10
|
-
# per running process (just as logger)
|
11
|
-
class Monitor < Dry::Monitor::Notifications
|
5
|
+
# Instrumented is used to hookup external monitoring services to monitor how WaterDrop works
|
6
|
+
class Notifications < ::Karafka::Core::Monitoring::Notifications
|
12
7
|
# List of events that we support in the system and to which a monitor client can hook up
|
13
8
|
# @note The non-error once support timestamp benchmarking
|
14
9
|
EVENTS = %w[
|
@@ -17,28 +12,31 @@ module WaterDrop
|
|
17
12
|
message.produced_async
|
18
13
|
message.produced_sync
|
19
14
|
message.acknowledged
|
15
|
+
message.purged
|
20
16
|
message.buffered
|
21
17
|
|
22
18
|
messages.produced_async
|
23
19
|
messages.produced_sync
|
24
20
|
messages.buffered
|
25
21
|
|
22
|
+
transaction.started
|
23
|
+
transaction.committed
|
24
|
+
transaction.aborted
|
25
|
+
transaction.finished
|
26
|
+
|
26
27
|
buffer.flushed_async
|
27
|
-
buffer.flushed_async.error
|
28
28
|
buffer.flushed_sync
|
29
|
-
buffer.
|
29
|
+
buffer.purged
|
30
30
|
|
31
31
|
statistics.emitted
|
32
32
|
|
33
|
-
error.
|
33
|
+
error.occurred
|
34
34
|
].freeze
|
35
35
|
|
36
|
-
private_constant :EVENTS
|
37
|
-
|
38
36
|
# @return [WaterDrop::Instrumentation::Monitor] monitor instance for system instrumentation
|
39
37
|
def initialize
|
40
|
-
super
|
41
|
-
EVENTS.each(
|
38
|
+
super
|
39
|
+
EVENTS.each { |event| register_event(event) }
|
42
40
|
end
|
43
41
|
end
|
44
42
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"WaterDrop producer example dashboard","description":"This dashboard include example setup for monitoring activity of your WaterDrop producer","widgets":[{"id":243951318,"definition":{"title":"Messages produced","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"produced sync","formula":"query1"},{"alias":"produced async","formula":"query2"},{"alias":"flushed sync","formula":"query3"},{"alias":"flushed async","formula":"query4"},{"alias":"acknowledged","formula":"query5"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.produced_sync{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:waterdrop.produced_async{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:waterdrop.flushed_sync{*}.as_count()","data_source":"metrics","name":"query3"},{"query":"sum:waterdrop.flushed_async{*}.as_count()","data_source":"metrics","name":"query4"},{"query":"sum:waterdrop.acknowledged{*}.as_count()","data_source":"metrics","name":"query5"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":1979626566852990,"definition":{"title":"Messages buffer size","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.buffer.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":243951221,"definition":{"title":"Kafka broker API calls","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"API calls","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.calls{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951952,"definition":{"title":"Producer queue size","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Queue size average","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Queue size max","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"max:waterdrop.queue.size.max{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951263,"definition":{"title":"Producer queue latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.queue.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243951276,"definition":{"title":"Producer network latency","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Average latency","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.request_size.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p95","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p95{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"alias":"Latency p99","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"avg:waterdrop.network.latency.p99{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}},{"id":243954928,"definition":{"title":"Producer errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:waterdrop.error_occurred{*}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","label":"","min":"auto","max":"auto"}}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"auto","id":"rnr-kgh-dna"}
|