karafka 1.4.0 → 2.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +89 -18
- data/.ruby-version +1 -1
- data/CHANGELOG.md +365 -1
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +56 -112
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +61 -68
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +272 -0
- data/bin/karafka +10 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/certs/cert_chain.pem +26 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +59 -38
- data/docker-compose.yml +10 -3
- data/karafka.gemspec +18 -21
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +33 -0
- data/lib/karafka/admin.rb +63 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +197 -31
- data/lib/karafka/cli/info.rb +44 -10
- data/lib/karafka/cli/install.rb +22 -12
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -3
- data/lib/karafka/connection/client.rb +379 -89
- data/lib/karafka/connection/listener.rb +250 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -184
- data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger.rb +6 -10
- data/lib/karafka/instrumentation/logger_listener.rb +174 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +53 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +34 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +107 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +85 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +58 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +103 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +126 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +55 -0
- data/lib/karafka/processing/jobs/consume.rb +47 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +37 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +91 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +127 -0
- data/lib/karafka/routing/builder.rb +26 -23
- data/lib/karafka/routing/consumer_group.rb +37 -17
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/proxy.rb +9 -16
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
- data/lib/karafka/routing/topic.rb +65 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +153 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +17 -55
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +46 -16
- data.tar.gz.sig +0 -0
- metadata +145 -171
- metadata.gz.sig +0 -0
- data/.github/FUNDING.yml +0 -3
- data/MIT-LICENCE +0 -18
- data/certs/mensfeld.pem +0 -25
- data/lib/karafka/attributes_map.rb +0 -62
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -161
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -18
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Statistics callback handler
|
7
|
+
# @see `WaterDrop::Instrumentation::Callbacks::Statistics` for details on why we decorate
|
8
|
+
# those statistics
|
9
|
+
class Statistics
|
10
|
+
# @param subscription_group_id [String] id of the current subscription group
|
11
|
+
# @param consumer_group_id [String] id of the current consumer group
|
12
|
+
# @param client_name [String] rdkafka client name
|
13
|
+
# @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
|
14
|
+
def initialize(subscription_group_id, consumer_group_id, client_name, monitor)
|
15
|
+
@subscription_group_id = subscription_group_id
|
16
|
+
@consumer_group_id = consumer_group_id
|
17
|
+
@client_name = client_name
|
18
|
+
@monitor = monitor
|
19
|
+
@statistics_decorator = ::Karafka::Core::Monitoring::StatisticsDecorator.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# Emits decorated statistics to the monitor
|
23
|
+
# @param statistics [Hash] rdkafka statistics
|
24
|
+
def call(statistics)
|
25
|
+
# Emit only statistics related to our client
|
26
|
+
# rdkafka does not have per-instance statistics hook, thus we need to make sure that we
|
27
|
+
# emit only stats that are related to current producer. Otherwise we would emit all of
|
28
|
+
# all the time.
|
29
|
+
return unless @client_name == statistics['name']
|
30
|
+
|
31
|
+
@monitor.instrument(
|
32
|
+
'statistics.emitted',
|
33
|
+
subscription_group_id: @subscription_group_id,
|
34
|
+
consumer_group_id: @consumer_group_id,
|
35
|
+
statistics: @statistics_decorator.call(statistics)
|
36
|
+
)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -20,7 +20,6 @@ module Karafka
|
|
20
20
|
# @param _args Any arguments that we don't care about but that are needed in order to
|
21
21
|
# make this logger compatible with the default Ruby one
|
22
22
|
def initialize(*_args)
|
23
|
-
ensure_dir_exists
|
24
23
|
super(target)
|
25
24
|
self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
|
26
25
|
end
|
@@ -29,18 +28,11 @@ module Karafka
|
|
29
28
|
|
30
29
|
# @return [Karafka::Helpers::MultiDelegator] multi delegator instance
|
31
30
|
# to which we will be writing logs
|
32
|
-
# We use this approach to log stuff to file and to the
|
31
|
+
# We use this approach to log stuff to file and to the $stdout at the same time
|
33
32
|
def target
|
34
33
|
Karafka::Helpers::MultiDelegator
|
35
34
|
.delegate(:write, :close)
|
36
|
-
.to(
|
37
|
-
end
|
38
|
-
|
39
|
-
# Makes sure the log directory exists as long as we can write to it
|
40
|
-
def ensure_dir_exists
|
41
|
-
FileUtils.mkdir_p(File.dirname(log_path))
|
42
|
-
rescue Errno::EACCES
|
43
|
-
nil
|
35
|
+
.to(*[$stdout, file].compact)
|
44
36
|
end
|
45
37
|
|
46
38
|
# @return [Pathname] Path to a file to which we should log
|
@@ -51,7 +43,11 @@ module Karafka
|
|
51
43
|
# @return [File] file to which we want to write our logs
|
52
44
|
# @note File is being opened in append mode ('a')
|
53
45
|
def file
|
46
|
+
FileUtils.mkdir_p(File.dirname(log_path))
|
47
|
+
|
54
48
|
@file ||= File.open(log_path, 'a')
|
49
|
+
rescue Errno::EACCES, Errno::EROFS
|
50
|
+
nil
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Default listener that hooks up to our instrumentation and uses its events for logging
|
6
|
+
# It can be removed/replaced or anything without any harm to the Karafka app flow.
|
7
|
+
class LoggerListener
|
8
|
+
# Log levels that we use in this particular listener
|
9
|
+
USED_LOG_LEVELS = %i[
|
10
|
+
debug
|
11
|
+
info
|
12
|
+
warn
|
13
|
+
error
|
14
|
+
fatal
|
15
|
+
].freeze
|
16
|
+
|
17
|
+
# Logs each messages fetching attempt
|
18
|
+
#
|
19
|
+
# @param event [Dry::Events::Event] event details including payload
|
20
|
+
def on_connection_listener_fetch_loop(event)
|
21
|
+
listener = event[:caller]
|
22
|
+
debug "[#{listener.id}] Polling messages..."
|
23
|
+
end
|
24
|
+
|
25
|
+
# Logs about messages that we've received from Kafka
|
26
|
+
#
|
27
|
+
# @param event [Dry::Events::Event] event details including payload
|
28
|
+
def on_connection_listener_fetch_loop_received(event)
|
29
|
+
listener = event[:caller]
|
30
|
+
time = event[:time]
|
31
|
+
messages_count = event[:messages_buffer].size
|
32
|
+
|
33
|
+
message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
|
34
|
+
|
35
|
+
# We don't want the "polled 0" in dev as it would spam the log
|
36
|
+
# Instead we publish only info when there was anything we could poll and fail over to the
|
37
|
+
# zero notifications when in debug mode
|
38
|
+
messages_count.zero? ? debug(message) : info(message)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Prints info about the fact that a given job has started
|
42
|
+
#
|
43
|
+
# @param event [Dry::Events::Event] event details including payload
|
44
|
+
def on_worker_process(event)
|
45
|
+
job = event[:job]
|
46
|
+
job_type = job.class.to_s.split('::').last
|
47
|
+
consumer = job.executor.topic.consumer
|
48
|
+
topic = job.executor.topic.name
|
49
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
|
50
|
+
end
|
51
|
+
|
52
|
+
# Prints info about the fact that a given job has finished
|
53
|
+
#
|
54
|
+
# @param event [Dry::Events::Event] event details including payload
|
55
|
+
def on_worker_processed(event)
|
56
|
+
job = event[:job]
|
57
|
+
time = event[:time]
|
58
|
+
job_type = job.class.to_s.split('::').last
|
59
|
+
consumer = job.executor.topic.consumer
|
60
|
+
topic = job.executor.topic.name
|
61
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
|
62
|
+
end
|
63
|
+
|
64
|
+
# Logs info about system signals that Karafka received and prints backtrace for threads in
|
65
|
+
# case of ttin
|
66
|
+
#
|
67
|
+
# @param event [Dry::Events::Event] event details including payload
|
68
|
+
def on_process_notice_signal(event)
|
69
|
+
info "Received #{event[:signal]} system signal"
|
70
|
+
|
71
|
+
# We print backtrace only for ttin
|
72
|
+
return unless event[:signal] == :SIGTTIN
|
73
|
+
|
74
|
+
# Inspired by Sidekiq
|
75
|
+
Thread.list.each do |thread|
|
76
|
+
tid = (thread.object_id ^ ::Process.pid).to_s(36)
|
77
|
+
|
78
|
+
warn "Thread TID-#{tid} #{thread['label']}"
|
79
|
+
|
80
|
+
if thread.backtrace
|
81
|
+
warn thread.backtrace.join("\n")
|
82
|
+
else
|
83
|
+
warn '<no backtrace available>'
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Logs info that we're initializing Karafka app.
|
89
|
+
#
|
90
|
+
# @param _event [Dry::Events::Event] event details including payload
|
91
|
+
def on_app_initializing(_event)
|
92
|
+
info 'Initializing Karafka framework'
|
93
|
+
end
|
94
|
+
|
95
|
+
# Logs info that we're running Karafka app.
|
96
|
+
#
|
97
|
+
# @param _event [Dry::Events::Event] event details including payload
|
98
|
+
def on_app_running(_event)
|
99
|
+
info "Running in #{RUBY_DESCRIPTION}"
|
100
|
+
info "Running Karafka #{Karafka::VERSION} server"
|
101
|
+
|
102
|
+
return if Karafka.pro?
|
103
|
+
|
104
|
+
info 'See LICENSE and the LGPL-3.0 for licensing details.'
|
105
|
+
end
|
106
|
+
|
107
|
+
# Logs info that we're going to stop the Karafka server.
|
108
|
+
#
|
109
|
+
# @param _event [Dry::Events::Event] event details including payload
|
110
|
+
def on_app_stopping(_event)
|
111
|
+
info 'Stopping Karafka server'
|
112
|
+
end
|
113
|
+
|
114
|
+
# Logs info that we stopped the Karafka server.
|
115
|
+
#
|
116
|
+
# @param _event [Dry::Events::Event] event details including payload
|
117
|
+
def on_app_stopped(_event)
|
118
|
+
info 'Stopped Karafka server'
|
119
|
+
end
|
120
|
+
|
121
|
+
# There are many types of errors that can occur in many places, but we provide a single
|
122
|
+
# handler for all of them to simplify error instrumentation.
|
123
|
+
# @param event [Dry::Events::Event] event details including payload
|
124
|
+
def on_error_occurred(event)
|
125
|
+
type = event[:type]
|
126
|
+
error = event[:error]
|
127
|
+
details = (error.backtrace || []).join("\n")
|
128
|
+
|
129
|
+
case type
|
130
|
+
when 'consumer.consume.error'
|
131
|
+
error "Consumer consuming error: #{error}"
|
132
|
+
error details
|
133
|
+
when 'consumer.revoked.error'
|
134
|
+
error "Consumer on revoked failed due to an error: #{error}"
|
135
|
+
error details
|
136
|
+
when 'consumer.shutdown.error'
|
137
|
+
error "Consumer on shutdown failed due to an error: #{error}"
|
138
|
+
error details
|
139
|
+
when 'worker.process.error'
|
140
|
+
fatal "Worker processing failed due to an error: #{error}"
|
141
|
+
fatal details
|
142
|
+
when 'connection.listener.fetch_loop.error'
|
143
|
+
error "Listener fetch loop error: #{error}"
|
144
|
+
error details
|
145
|
+
when 'licenser.expired'
|
146
|
+
error error
|
147
|
+
error details
|
148
|
+
when 'runner.call.error'
|
149
|
+
fatal "Runner crashed due to an error: #{error}"
|
150
|
+
fatal details
|
151
|
+
when 'app.stopping.error'
|
152
|
+
error 'Forceful Karafka server stop'
|
153
|
+
when 'librdkafka.error'
|
154
|
+
error "librdkafka internal error occurred: #{error}"
|
155
|
+
error details
|
156
|
+
# Those will only occur when retries in the client fail and when they did not stop after
|
157
|
+
# backoffs
|
158
|
+
when 'connection.client.poll.error'
|
159
|
+
error "Data polling error occurred: #{error}"
|
160
|
+
error details
|
161
|
+
else
|
162
|
+
# This should never happen. Please contact the maintainers
|
163
|
+
raise Errors::UnsupportedCaseError, event
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
USED_LOG_LEVELS.each do |log_level|
|
168
|
+
define_method log_level do |*args|
|
169
|
+
Karafka.logger.send(log_level, *args)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
@@ -1,69 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Karafka
|
4
|
-
# Namespace for all the things related with Karafka instrumentation process
|
5
4
|
module Instrumentation
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
# @note This class acts as a singleton because we are only permitted to have single monitor
|
12
|
-
# per running process (just as logger)
|
13
|
-
class Monitor < Dry::Monitor::Notifications
|
14
|
-
# List of events that we support in the system and to which a monitor client can hook up
|
15
|
-
# @note The non-error once support timestamp benchmarking
|
16
|
-
# @note Depending on Karafka extensions and additional engines, this might not be the
|
17
|
-
# complete list of all the events. Please use the #available_events on fully loaded
|
18
|
-
# Karafka system to determine all of the events you can use.
|
19
|
-
# Last 4 events are from WaterDrop but for convenience we use the same monitor for the
|
20
|
-
# whole karafka ecosystem
|
21
|
-
BASE_EVENTS = %w[
|
22
|
-
params.params.deserialize
|
23
|
-
params.params.deserialize.error
|
24
|
-
connection.listener.before_fetch_loop
|
25
|
-
connection.listener.fetch_loop
|
26
|
-
connection.listener.fetch_loop.error
|
27
|
-
connection.client.fetch_loop.error
|
28
|
-
connection.batch_delegator.call
|
29
|
-
connection.message_delegator.call
|
30
|
-
fetcher.call.error
|
31
|
-
backends.inline.process
|
32
|
-
process.notice_signal
|
33
|
-
consumers.responders.respond_with
|
34
|
-
async_producer.call.error
|
35
|
-
async_producer.call.retry
|
36
|
-
sync_producer.call.error
|
37
|
-
sync_producer.call.retry
|
38
|
-
app.initializing
|
39
|
-
app.initialized
|
40
|
-
app.running
|
41
|
-
app.stopping
|
42
|
-
app.stopping.error
|
43
|
-
app.stopped
|
44
|
-
].freeze
|
5
|
+
# Karafka instrumentation monitor that we use to publish events
|
6
|
+
# By default uses our internal notifications bus but can be used with
|
7
|
+
# `ActiveSupport::Notifications` as well
|
8
|
+
class Monitor < ::Karafka::Core::Monitoring::Monitor
|
9
|
+
attr_reader :notifications_bus
|
45
10
|
|
46
|
-
|
47
|
-
|
48
|
-
# @
|
49
|
-
def initialize
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
# Allows us to subscribe to events with a code that will be yielded upon events
|
55
|
-
# @param event_name_or_listener [String, Object] name of the event we want to subscribe to
|
56
|
-
# or a listener if we decide to go with object listener
|
57
|
-
def subscribe(event_name_or_listener)
|
58
|
-
return super unless event_name_or_listener.is_a?(String)
|
59
|
-
return super if available_events.include?(event_name_or_listener)
|
60
|
-
|
61
|
-
raise Errors::UnregisteredMonitorEventError, event_name_or_listener
|
62
|
-
end
|
63
|
-
|
64
|
-
# @return [Array<String>] names of available events to which we can subscribe
|
65
|
-
def available_events
|
66
|
-
__bus__.events.keys
|
11
|
+
# @param notifications_bus [Object] either our internal notifications bus or
|
12
|
+
# `ActiveSupport::Notifications`
|
13
|
+
# @param namespace [String, nil] namespace for events or nil if no namespace
|
14
|
+
def initialize(
|
15
|
+
notifications_bus = ::Karafka::Instrumentation::Notifications.new,
|
16
|
+
namespace = nil
|
17
|
+
)
|
18
|
+
super(notifications_bus, namespace)
|
67
19
|
end
|
68
20
|
end
|
69
21
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Namespace for all the things related with Karafka instrumentation process
|
5
|
+
module Instrumentation
|
6
|
+
# Monitor is used to hookup external monitoring services to monitor how Karafka works
|
7
|
+
# It provides a standardized API for checking incoming messages/enqueueing etc
|
8
|
+
# Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
|
9
|
+
# same time, which means that you might have for example file logging and NewRelic at the same
|
10
|
+
# time
|
11
|
+
# @note This class acts as a singleton because we are only permitted to have single monitor
|
12
|
+
# per running process (just as logger)
|
13
|
+
class Notifications < Karafka::Core::Monitoring::Notifications
|
14
|
+
# List of events that we support in the system and to which a monitor client can hook up
|
15
|
+
# @note The non-error once support timestamp benchmarking
|
16
|
+
# @note Depending on Karafka extensions and additional engines, this might not be the
|
17
|
+
# complete list of all the events. Please use the #available_events on fully loaded
|
18
|
+
# Karafka system to determine all of the events you can use.
|
19
|
+
EVENTS = %w[
|
20
|
+
app.initialized
|
21
|
+
app.running
|
22
|
+
app.stopping
|
23
|
+
app.stopped
|
24
|
+
|
25
|
+
consumer.consumed
|
26
|
+
consumer.revoked
|
27
|
+
consumer.shutdown
|
28
|
+
|
29
|
+
process.notice_signal
|
30
|
+
|
31
|
+
connection.listener.before_fetch_loop
|
32
|
+
connection.listener.fetch_loop
|
33
|
+
connection.listener.fetch_loop.received
|
34
|
+
|
35
|
+
worker.process
|
36
|
+
worker.processed
|
37
|
+
worker.completed
|
38
|
+
|
39
|
+
statistics.emitted
|
40
|
+
|
41
|
+
error.occurred
|
42
|
+
].freeze
|
43
|
+
|
44
|
+
private_constant :EVENTS
|
45
|
+
|
46
|
+
# @return [Karafka::Instrumentation::Monitor] monitor instance for system instrumentation
|
47
|
+
def initialize
|
48
|
+
super
|
49
|
+
EVENTS.each { |event| register_event(event) }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -5,19 +5,19 @@ module Karafka
|
|
5
5
|
# Listener that sets a proc title with a nice descriptive value
|
6
6
|
class ProctitleListener
|
7
7
|
# Updates proc title to an initializing one
|
8
|
-
# @param _event [
|
8
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
9
9
|
def on_app_initializing(_event)
|
10
10
|
setproctitle('initializing')
|
11
11
|
end
|
12
12
|
|
13
13
|
# Updates proc title to a running one
|
14
|
-
# @param _event [
|
14
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
15
15
|
def on_app_running(_event)
|
16
16
|
setproctitle('running')
|
17
17
|
end
|
18
18
|
|
19
19
|
# Updates proc title to a stopping one
|
20
|
-
# @param _event [
|
20
|
+
# @param _event [Karafka::Core::Monitoring::Event] event details including payload
|
21
21
|
def on_app_stopping(_event)
|
22
22
|
setproctitle('stopping')
|
23
23
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|