karafka 2.0.0.beta5 → 2.0.0.rc3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +28 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +12 -42
- data/LICENSE-COMM +1 -1
- data/README.md +44 -16
- data/bin/stress_many +1 -1
- data/bin/stress_one +1 -1
- data/config/errors.yml +52 -5
- data/docker-compose.yml +7 -0
- data/karafka.gemspec +2 -4
- data/lib/karafka/active_job/consumer.rb +2 -0
- data/lib/karafka/active_job/job_options_contract.rb +8 -2
- data/lib/karafka/base_consumer.rb +4 -6
- data/lib/karafka/cli/install.rb +15 -2
- data/lib/karafka/cli/server.rb +4 -2
- data/lib/karafka/connection/client.rb +20 -17
- data/lib/karafka/connection/listener.rb +12 -24
- data/lib/karafka/connection/pauses_manager.rb +0 -8
- data/lib/karafka/contracts/base.rb +2 -8
- data/lib/karafka/contracts/config.rb +71 -51
- data/lib/karafka/contracts/consumer_group.rb +25 -18
- data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
- data/lib/karafka/contracts/server_cli_options.rb +18 -7
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/instrumentation/logger_listener.rb +8 -2
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
- data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
- data/lib/karafka/pro/base_consumer.rb +21 -12
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +23 -3
- data/lib/karafka/pro/processing/coordinator.rb +51 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
- data/lib/karafka/processing/coordinator.rb +6 -2
- data/lib/karafka/processing/coordinators_buffer.rb +3 -7
- data/lib/karafka/processing/executor.rb +1 -1
- data/lib/karafka/processing/jobs_queue.rb +11 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/worker.rb +4 -2
- data/lib/karafka/setup/config.rb +9 -3
- data/lib/karafka/templates/example_consumer.rb.erb +2 -2
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +2 -2
- data.tar.gz.sig +0 -0
- metadata +15 -34
- metadata.gz.sig +0 -0
@@ -18,15 +18,18 @@ module Karafka
|
|
18
18
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
19
19
|
# @return [Karafka::Connection::Listener] listener instance
|
20
20
|
def initialize(subscription_group, jobs_queue)
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
22
|
+
|
21
23
|
@id = SecureRandom.uuid
|
22
24
|
@subscription_group = subscription_group
|
23
25
|
@jobs_queue = jobs_queue
|
24
|
-
@jobs_builder = ::Karafka::App.config.internal.processing.jobs_builder
|
25
26
|
@coordinators = Processing::CoordinatorsBuffer.new
|
26
27
|
@client = Client.new(@subscription_group)
|
27
28
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
28
31
|
# We reference scheduler here as it is much faster than fetching this each time
|
29
|
-
@scheduler =
|
32
|
+
@scheduler = proc_config.scheduler
|
30
33
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
31
34
|
# We can do this that way because we always first schedule jobs using messages before we
|
32
35
|
# fetch another batch.
|
@@ -79,10 +82,6 @@ module Karafka
|
|
79
82
|
poll_and_remap_messages
|
80
83
|
end
|
81
84
|
|
82
|
-
# This will ensure, that in the next poll, we continue processing (if we get them back)
|
83
|
-
# partitions that we have paused
|
84
|
-
resume_assigned_partitions
|
85
|
-
|
86
85
|
# If there were revoked partitions, we need to wait on their jobs to finish before
|
87
86
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
88
87
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
@@ -159,8 +158,6 @@ module Karafka
|
|
159
158
|
|
160
159
|
revoked_partitions.each do |topic, partitions|
|
161
160
|
partitions.each do |partition|
|
162
|
-
# We revoke the coordinator here, so we do not have to revoke it in the revoke job
|
163
|
-
# itself (this happens prior to scheduling those jobs)
|
164
161
|
@coordinators.revoke(topic, partition)
|
165
162
|
|
166
163
|
# There may be a case where we have lost partition of which data we have never
|
@@ -204,17 +201,6 @@ module Karafka
|
|
204
201
|
)
|
205
202
|
end
|
206
203
|
|
207
|
-
# Revoked partition needs to be resumed if we were processing them earlier. This will do
|
208
|
-
# nothing to things that we are planning to process. Without this, things we get
|
209
|
-
# re-assigned would not be polled.
|
210
|
-
def resume_assigned_partitions
|
211
|
-
@client.rebalance_manager.assigned_partitions.each do |topic, partitions|
|
212
|
-
partitions.each do |partition|
|
213
|
-
@client.resume(topic, partition)
|
214
|
-
end
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
204
|
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
219
205
|
# given scheduler.
|
220
206
|
def build_and_schedule_consumption_jobs
|
@@ -226,14 +212,16 @@ module Karafka
|
|
226
212
|
coordinator = @coordinators.find_or_create(topic, partition)
|
227
213
|
|
228
214
|
# Start work coordination for this topic partition
|
229
|
-
coordinator.start
|
215
|
+
coordinator.start(messages)
|
230
216
|
|
231
|
-
|
232
|
-
|
217
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
218
|
+
# Count the job we're going to create here
|
219
|
+
coordinator.increment
|
233
220
|
|
234
|
-
|
221
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
235
222
|
|
236
|
-
|
223
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
224
|
+
end
|
237
225
|
end
|
238
226
|
|
239
227
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
@@ -25,14 +25,6 @@ module Karafka
|
|
25
25
|
)
|
26
26
|
end
|
27
27
|
|
28
|
-
# Revokes pause tracker for a given topic partition
|
29
|
-
#
|
30
|
-
# @param topic [String] topic name
|
31
|
-
# @param partition [Integer] partition number
|
32
|
-
def revoke(topic, partition)
|
33
|
-
@pauses[topic].delete(partition)
|
34
|
-
end
|
35
|
-
|
36
28
|
# Resumes processing of partitions for which pause time has ended.
|
37
29
|
#
|
38
30
|
# @yieldparam [String] topic name
|
@@ -3,20 +3,14 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Contracts
|
5
5
|
# Base contract for all Karafka contracts
|
6
|
-
class Base <
|
7
|
-
config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
|
8
|
-
|
6
|
+
class Base < ::WaterDrop::Contractable::Contract
|
9
7
|
# @param data [Hash] data for validation
|
10
8
|
# @return [Boolean] true if all good
|
11
9
|
# @raise [Errors::InvalidConfigurationError] invalid configuration error
|
12
10
|
# @note We use contracts only in the config validation context, so no need to add support
|
13
11
|
# for multiple error classes. It will be added when it will be needed.
|
14
12
|
def validate!(data)
|
15
|
-
|
16
|
-
|
17
|
-
return true if result.success?
|
18
|
-
|
19
|
-
raise Errors::InvalidConfigurationError, result.errors.to_h
|
13
|
+
super(data, Errors::InvalidConfigurationError)
|
20
14
|
end
|
21
15
|
end
|
22
16
|
end
|
@@ -9,70 +9,90 @@ module Karafka
|
|
9
9
|
# validated per each route (topic + consumer_group) because they can be overwritten,
|
10
10
|
# so we validate all of that once all the routes are defined and ready.
|
11
11
|
class Config < Base
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
configure do |config|
|
13
|
+
config.error_messages = YAML.safe_load(
|
14
|
+
File.read(
|
15
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
16
|
+
)
|
17
|
+
).fetch('en').fetch('validations').fetch('config')
|
18
|
+
end
|
19
|
+
|
20
|
+
# License validity happens in the licenser. Here we do only the simple consistency checks
|
21
|
+
nested(:license) do
|
22
|
+
required(:token) { |val| [true, false].include?(val) || val.is_a?(String) }
|
23
|
+
required(:entity) { |val| val.is_a?(String) }
|
24
|
+
required(:expires_on) { |val| val.is_a?(Date) }
|
25
|
+
end
|
26
|
+
|
27
|
+
required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
28
|
+
required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
|
29
|
+
required(:consumer_mapper) { |val| !val.nil? }
|
30
|
+
required(:consumer_persistence) { |val| [true, false].include?(val) }
|
31
|
+
required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
32
|
+
required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
33
|
+
required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
|
34
|
+
required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
35
|
+
required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
36
|
+
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
37
|
+
|
38
|
+
# We validate internals just to be sure, that they are present and working
|
39
|
+
nested(:internal) do
|
40
|
+
required(:status) { |val| !val.nil? }
|
41
|
+
required(:process) { |val| !val.nil? }
|
42
|
+
|
43
|
+
nested(:routing) do
|
44
|
+
required(:builder) { |val| !val.nil? }
|
45
|
+
required(:subscription_groups_builder) { |val| !val.nil? }
|
46
|
+
end
|
47
|
+
|
48
|
+
nested(:processing) do
|
49
|
+
required(:jobs_builder) { |val| !val.nil? }
|
50
|
+
required(:scheduler) { |val| !val.nil? }
|
51
|
+
required(:coordinator_class) { |val| !val.nil? }
|
52
|
+
required(:partitioner_class) { |val| !val.nil? }
|
18
53
|
end
|
19
54
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
required(:pause_timeout) { int? & gt?(0) }
|
25
|
-
required(:pause_max_timeout) { int? & gt?(0) }
|
26
|
-
required(:pause_with_exponential_backoff).filled(:bool?)
|
27
|
-
required(:shutdown_timeout) { int? & gt?(0) }
|
28
|
-
required(:max_wait_time) { int? & gt?(0) }
|
29
|
-
required(:kafka).filled(:hash)
|
30
|
-
|
31
|
-
# We validate internals just to be sure, that they are present and working
|
32
|
-
required(:internal).schema do
|
33
|
-
required(:status)
|
34
|
-
required(:process)
|
35
|
-
|
36
|
-
required(:routing).schema do
|
37
|
-
required(:builder)
|
38
|
-
required(:subscription_groups_builder)
|
39
|
-
end
|
40
|
-
|
41
|
-
required(:processing).schema do
|
42
|
-
required(:jobs_builder)
|
43
|
-
required(:scheduler)
|
44
|
-
required(:coordinator_class)
|
45
|
-
end
|
46
|
-
|
47
|
-
required(:active_job).schema do
|
48
|
-
required(:dispatcher)
|
49
|
-
required(:job_options_contract)
|
50
|
-
required(:consumer_class)
|
51
|
-
end
|
55
|
+
nested(:active_job) do
|
56
|
+
required(:dispatcher) { |val| !val.nil? }
|
57
|
+
required(:job_options_contract) { |val| !val.nil? }
|
58
|
+
required(:consumer_class) { |val| !val.nil? }
|
52
59
|
end
|
53
60
|
end
|
54
61
|
|
55
|
-
|
56
|
-
|
57
|
-
|
62
|
+
virtual do |data, errors|
|
63
|
+
next unless errors.empty?
|
64
|
+
|
65
|
+
detected_errors = []
|
58
66
|
|
59
|
-
|
67
|
+
data.fetch(:kafka).each_key do |key|
|
60
68
|
next if key.is_a?(Symbol)
|
61
69
|
|
62
|
-
|
70
|
+
detected_errors << [[:kafka, key], :key_must_be_a_symbol]
|
63
71
|
end
|
72
|
+
|
73
|
+
detected_errors
|
64
74
|
end
|
65
75
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
76
|
+
virtual do |data, errors|
|
77
|
+
next unless errors.empty?
|
78
|
+
|
79
|
+
pause_timeout = data.fetch(:pause_timeout)
|
80
|
+
pause_max_timeout = data.fetch(:pause_max_timeout)
|
81
|
+
|
82
|
+
next if pause_timeout <= pause_max_timeout
|
83
|
+
|
84
|
+
[[%i[pause_timeout], :max_timeout_vs_pause_max_timeout]]
|
70
85
|
end
|
71
86
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
87
|
+
virtual do |data, errors|
|
88
|
+
next unless errors.empty?
|
89
|
+
|
90
|
+
shutdown_timeout = data.fetch(:shutdown_timeout)
|
91
|
+
max_wait_time = data.fetch(:max_wait_time)
|
92
|
+
|
93
|
+
next if max_wait_time < shutdown_timeout
|
94
|
+
|
95
|
+
[[%i[shutdown_timeout], :shutdown_timeout_vs_max_wait_time]]
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -4,32 +4,39 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Contract for single full route (consumer group + topics) validation.
|
6
6
|
class ConsumerGroup < Base
|
7
|
-
|
8
|
-
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('consumer_group')
|
13
|
+
end
|
9
14
|
|
10
|
-
|
15
|
+
required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
|
16
|
+
required(:topics) { |topics| topics.is_a?(Array) && !topics.empty? }
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
required(:topics).value(:array, :filled?)
|
15
|
-
end
|
18
|
+
virtual do |data, errors|
|
19
|
+
next unless errors.empty?
|
16
20
|
|
17
|
-
|
18
|
-
if value.is_a?(Array)
|
19
|
-
names = value.map { |topic| topic[:name] }
|
21
|
+
names = data.fetch(:topics).map { |topic| topic[:name] }
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
+
next if names.size == names.uniq.size
|
24
|
+
|
25
|
+
[[%i[topics], :names_not_unique]]
|
23
26
|
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
virtual do |data, errors|
|
29
|
+
next unless errors.empty?
|
30
|
+
|
31
|
+
fetched_errors = []
|
32
|
+
|
33
|
+
data.fetch(:topics).each do |topic|
|
34
|
+
ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
|
35
|
+
fetched_errors << [[topic, key].flatten, value]
|
31
36
|
end
|
32
37
|
end
|
38
|
+
|
39
|
+
fetched_errors
|
33
40
|
end
|
34
41
|
end
|
35
42
|
end
|
@@ -4,24 +4,38 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Consumer group topic validation rules.
|
6
6
|
class ConsumerGroupTopic < Base
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
required(:initial_offset).filled(included_in?: %w[earliest latest])
|
14
|
-
required(:max_wait_time).filled { int? & gteq?(10) }
|
15
|
-
required(:manual_offset_management).filled(:bool?)
|
16
|
-
required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('consumer_group_topic')
|
17
13
|
end
|
18
14
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
15
|
+
required(:consumer) { |consumer_group| !consumer_group.nil? }
|
16
|
+
required(:deserializer) { |deserializer| !deserializer.nil? }
|
17
|
+
required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
|
18
|
+
required(:kafka) { |kafka| kafka.is_a?(Hash) && !kafka.empty? }
|
19
|
+
required(:max_messages) { |mm| mm.is_a?(Integer) && mm >= 1 }
|
20
|
+
required(:initial_offset) { |io| %w[earliest latest].include?(io) }
|
21
|
+
required(:max_wait_time) { |mwt| mwt.is_a?(Integer) && mwt >= 10 }
|
22
|
+
required(:manual_offset_management) { |mmm| [true, false].include?(mmm) }
|
23
|
+
required(:name) { |name| name.is_a?(String) && Contracts::TOPIC_REGEXP.match?(name) }
|
24
|
+
|
25
|
+
virtual do |data, errors|
|
26
|
+
next unless errors.empty?
|
27
|
+
|
28
|
+
value = data.fetch(:kafka)
|
29
|
+
|
30
|
+
begin
|
31
|
+
# This will trigger rdkafka validations that we catch and re-map the info and use dry
|
32
|
+
# compatible format
|
33
|
+
Rdkafka::Config.new(value).send(:native_config)
|
34
|
+
|
35
|
+
nil
|
36
|
+
rescue Rdkafka::Config::ConfigError => e
|
37
|
+
[[%w[kafka], e.message]]
|
38
|
+
end
|
25
39
|
end
|
26
40
|
end
|
27
41
|
end
|
@@ -4,17 +4,28 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Contract for validating correctness of the server cli command options.
|
6
6
|
class ServerCliOptions < Base
|
7
|
-
|
8
|
-
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('server_cli_options')
|
9
13
|
end
|
10
14
|
|
11
|
-
|
15
|
+
optional(:consumer_groups) { |cg| cg.is_a?(Array) && !cg.empty? }
|
16
|
+
|
17
|
+
virtual do |data, errors|
|
18
|
+
next unless errors.empty?
|
19
|
+
next unless data.key?(:consumer_groups)
|
20
|
+
|
21
|
+
value = data.fetch(:consumer_groups)
|
22
|
+
|
12
23
|
# If there were no consumer_groups declared in the server cli, it means that we will
|
13
24
|
# run all of them and no need to validate them here at all
|
14
|
-
if
|
15
|
-
|
16
|
-
|
17
|
-
|
25
|
+
next if value.nil?
|
26
|
+
next if (value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
|
27
|
+
|
28
|
+
[[%i[consumer_groups], :consumer_groups_inclusion]]
|
18
29
|
end
|
19
30
|
end
|
20
31
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Helpers
|
5
|
+
# Simple wrapper for adding colors to strings
|
6
|
+
module Colorize
|
7
|
+
# @param string [String] string we want to have in green
|
8
|
+
# @return [String] green string
|
9
|
+
def green(string)
|
10
|
+
"\033[0;32m#{string}\033[0m"
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param string [String] string we want to have in red
|
14
|
+
# @return [String] red string
|
15
|
+
def red(string)
|
16
|
+
"\033[0;31m#{string}\033[0m"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
# @param event [Dry::Events::Event] event details including payload
|
19
19
|
def on_connection_listener_fetch_loop(event)
|
20
20
|
listener = event[:caller]
|
21
|
-
|
21
|
+
debug "[#{listener.id}] Polling messages..."
|
22
22
|
end
|
23
23
|
|
24
24
|
# Logs about messages that we've received from Kafka
|
@@ -28,7 +28,13 @@ module Karafka
|
|
28
28
|
listener = event[:caller]
|
29
29
|
time = event[:time]
|
30
30
|
messages_count = event[:messages_buffer].size
|
31
|
-
|
31
|
+
|
32
|
+
message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
|
33
|
+
|
34
|
+
# We don't want the "polled 0" in dev as it would spam the log
|
35
|
+
# Instead we publish only info when there was anything we could poll and fail over to the
|
36
|
+
# zero notifications when in debug mode
|
37
|
+
messages_count.zero? ? debug(message) : info(message)
|
32
38
|
end
|
33
39
|
|
34
40
|
# Prints info about the fact that a given job has started
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|