karafka 2.0.0.beta5 → 2.0.0.rc3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +28 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +12 -42
- data/LICENSE-COMM +1 -1
- data/README.md +44 -16
- data/bin/stress_many +1 -1
- data/bin/stress_one +1 -1
- data/config/errors.yml +52 -5
- data/docker-compose.yml +7 -0
- data/karafka.gemspec +2 -4
- data/lib/karafka/active_job/consumer.rb +2 -0
- data/lib/karafka/active_job/job_options_contract.rb +8 -2
- data/lib/karafka/base_consumer.rb +4 -6
- data/lib/karafka/cli/install.rb +15 -2
- data/lib/karafka/cli/server.rb +4 -2
- data/lib/karafka/connection/client.rb +20 -17
- data/lib/karafka/connection/listener.rb +12 -24
- data/lib/karafka/connection/pauses_manager.rb +0 -8
- data/lib/karafka/contracts/base.rb +2 -8
- data/lib/karafka/contracts/config.rb +71 -51
- data/lib/karafka/contracts/consumer_group.rb +25 -18
- data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
- data/lib/karafka/contracts/server_cli_options.rb +18 -7
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/instrumentation/logger_listener.rb +8 -2
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
- data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
- data/lib/karafka/pro/base_consumer.rb +21 -12
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +23 -3
- data/lib/karafka/pro/processing/coordinator.rb +51 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
- data/lib/karafka/processing/coordinator.rb +6 -2
- data/lib/karafka/processing/coordinators_buffer.rb +3 -7
- data/lib/karafka/processing/executor.rb +1 -1
- data/lib/karafka/processing/jobs_queue.rb +11 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/worker.rb +4 -2
- data/lib/karafka/setup/config.rb +9 -3
- data/lib/karafka/templates/example_consumer.rb.erb +2 -2
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +2 -2
- data.tar.gz.sig +0 -0
- metadata +15 -34
- metadata.gz.sig +0 -0
@@ -18,15 +18,18 @@ module Karafka
|
|
18
18
|
# @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
|
19
19
|
# @return [Karafka::Connection::Listener] listener instance
|
20
20
|
def initialize(subscription_group, jobs_queue)
|
21
|
+
proc_config = ::Karafka::App.config.internal.processing
|
22
|
+
|
21
23
|
@id = SecureRandom.uuid
|
22
24
|
@subscription_group = subscription_group
|
23
25
|
@jobs_queue = jobs_queue
|
24
|
-
@jobs_builder = ::Karafka::App.config.internal.processing.jobs_builder
|
25
26
|
@coordinators = Processing::CoordinatorsBuffer.new
|
26
27
|
@client = Client.new(@subscription_group)
|
27
28
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
29
|
+
@jobs_builder = proc_config.jobs_builder
|
30
|
+
@partitioner = proc_config.partitioner_class.new(subscription_group)
|
28
31
|
# We reference scheduler here as it is much faster than fetching this each time
|
29
|
-
@scheduler =
|
32
|
+
@scheduler = proc_config.scheduler
|
30
33
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
31
34
|
# We can do this that way because we always first schedule jobs using messages before we
|
32
35
|
# fetch another batch.
|
@@ -79,10 +82,6 @@ module Karafka
|
|
79
82
|
poll_and_remap_messages
|
80
83
|
end
|
81
84
|
|
82
|
-
# This will ensure, that in the next poll, we continue processing (if we get them back)
|
83
|
-
# partitions that we have paused
|
84
|
-
resume_assigned_partitions
|
85
|
-
|
86
85
|
# If there were revoked partitions, we need to wait on their jobs to finish before
|
87
86
|
# distributing consuming jobs as upon revoking, we might get assigned to the same
|
88
87
|
# partitions, thus getting their jobs. The revoking jobs need to finish before
|
@@ -159,8 +158,6 @@ module Karafka
|
|
159
158
|
|
160
159
|
revoked_partitions.each do |topic, partitions|
|
161
160
|
partitions.each do |partition|
|
162
|
-
# We revoke the coordinator here, so we do not have to revoke it in the revoke job
|
163
|
-
# itself (this happens prior to scheduling those jobs)
|
164
161
|
@coordinators.revoke(topic, partition)
|
165
162
|
|
166
163
|
# There may be a case where we have lost partition of which data we have never
|
@@ -204,17 +201,6 @@ module Karafka
|
|
204
201
|
)
|
205
202
|
end
|
206
203
|
|
207
|
-
# Revoked partition needs to be resumed if we were processing them earlier. This will do
|
208
|
-
# nothing to things that we are planning to process. Without this, things we get
|
209
|
-
# re-assigned would not be polled.
|
210
|
-
def resume_assigned_partitions
|
211
|
-
@client.rebalance_manager.assigned_partitions.each do |topic, partitions|
|
212
|
-
partitions.each do |partition|
|
213
|
-
@client.resume(topic, partition)
|
214
|
-
end
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
204
|
# Takes the messages per topic partition and enqueues processing jobs in threads using
|
219
205
|
# given scheduler.
|
220
206
|
def build_and_schedule_consumption_jobs
|
@@ -226,14 +212,16 @@ module Karafka
|
|
226
212
|
coordinator = @coordinators.find_or_create(topic, partition)
|
227
213
|
|
228
214
|
# Start work coordination for this topic partition
|
229
|
-
coordinator.start
|
215
|
+
coordinator.start(messages)
|
230
216
|
|
231
|
-
|
232
|
-
|
217
|
+
@partitioner.call(topic, messages) do |group_id, partition_messages|
|
218
|
+
# Count the job we're going to create here
|
219
|
+
coordinator.increment
|
233
220
|
|
234
|
-
|
221
|
+
executor = @executors.find_or_create(topic, partition, group_id)
|
235
222
|
|
236
|
-
|
223
|
+
jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
|
224
|
+
end
|
237
225
|
end
|
238
226
|
|
239
227
|
@scheduler.schedule_consumption(@jobs_queue, jobs)
|
@@ -25,14 +25,6 @@ module Karafka
|
|
25
25
|
)
|
26
26
|
end
|
27
27
|
|
28
|
-
# Revokes pause tracker for a given topic partition
|
29
|
-
#
|
30
|
-
# @param topic [String] topic name
|
31
|
-
# @param partition [Integer] partition number
|
32
|
-
def revoke(topic, partition)
|
33
|
-
@pauses[topic].delete(partition)
|
34
|
-
end
|
35
|
-
|
36
28
|
# Resumes processing of partitions for which pause time has ended.
|
37
29
|
#
|
38
30
|
# @yieldparam [String] topic name
|
@@ -3,20 +3,14 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Contracts
|
5
5
|
# Base contract for all Karafka contracts
|
6
|
-
class Base <
|
7
|
-
config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
|
8
|
-
|
6
|
+
class Base < ::WaterDrop::Contractable::Contract
|
9
7
|
# @param data [Hash] data for validation
|
10
8
|
# @return [Boolean] true if all good
|
11
9
|
# @raise [Errors::InvalidConfigurationError] invalid configuration error
|
12
10
|
# @note We use contracts only in the config validation context, so no need to add support
|
13
11
|
# for multiple error classes. It will be added when it will be needed.
|
14
12
|
def validate!(data)
|
15
|
-
|
16
|
-
|
17
|
-
return true if result.success?
|
18
|
-
|
19
|
-
raise Errors::InvalidConfigurationError, result.errors.to_h
|
13
|
+
super(data, Errors::InvalidConfigurationError)
|
20
14
|
end
|
21
15
|
end
|
22
16
|
end
|
@@ -9,70 +9,90 @@ module Karafka
|
|
9
9
|
# validated per each route (topic + consumer_group) because they can be overwritten,
|
10
10
|
# so we validate all of that once all the routes are defined and ready.
|
11
11
|
class Config < Base
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
configure do |config|
|
13
|
+
config.error_messages = YAML.safe_load(
|
14
|
+
File.read(
|
15
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
16
|
+
)
|
17
|
+
).fetch('en').fetch('validations').fetch('config')
|
18
|
+
end
|
19
|
+
|
20
|
+
# License validity happens in the licenser. Here we do only the simple consistency checks
|
21
|
+
nested(:license) do
|
22
|
+
required(:token) { |val| [true, false].include?(val) || val.is_a?(String) }
|
23
|
+
required(:entity) { |val| val.is_a?(String) }
|
24
|
+
required(:expires_on) { |val| val.is_a?(Date) }
|
25
|
+
end
|
26
|
+
|
27
|
+
required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
|
28
|
+
required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
|
29
|
+
required(:consumer_mapper) { |val| !val.nil? }
|
30
|
+
required(:consumer_persistence) { |val| [true, false].include?(val) }
|
31
|
+
required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
32
|
+
required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
33
|
+
required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
|
34
|
+
required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
|
35
|
+
required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
|
36
|
+
required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
|
37
|
+
|
38
|
+
# We validate internals just to be sure, that they are present and working
|
39
|
+
nested(:internal) do
|
40
|
+
required(:status) { |val| !val.nil? }
|
41
|
+
required(:process) { |val| !val.nil? }
|
42
|
+
|
43
|
+
nested(:routing) do
|
44
|
+
required(:builder) { |val| !val.nil? }
|
45
|
+
required(:subscription_groups_builder) { |val| !val.nil? }
|
46
|
+
end
|
47
|
+
|
48
|
+
nested(:processing) do
|
49
|
+
required(:jobs_builder) { |val| !val.nil? }
|
50
|
+
required(:scheduler) { |val| !val.nil? }
|
51
|
+
required(:coordinator_class) { |val| !val.nil? }
|
52
|
+
required(:partitioner_class) { |val| !val.nil? }
|
18
53
|
end
|
19
54
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
required(:pause_timeout) { int? & gt?(0) }
|
25
|
-
required(:pause_max_timeout) { int? & gt?(0) }
|
26
|
-
required(:pause_with_exponential_backoff).filled(:bool?)
|
27
|
-
required(:shutdown_timeout) { int? & gt?(0) }
|
28
|
-
required(:max_wait_time) { int? & gt?(0) }
|
29
|
-
required(:kafka).filled(:hash)
|
30
|
-
|
31
|
-
# We validate internals just to be sure, that they are present and working
|
32
|
-
required(:internal).schema do
|
33
|
-
required(:status)
|
34
|
-
required(:process)
|
35
|
-
|
36
|
-
required(:routing).schema do
|
37
|
-
required(:builder)
|
38
|
-
required(:subscription_groups_builder)
|
39
|
-
end
|
40
|
-
|
41
|
-
required(:processing).schema do
|
42
|
-
required(:jobs_builder)
|
43
|
-
required(:scheduler)
|
44
|
-
required(:coordinator_class)
|
45
|
-
end
|
46
|
-
|
47
|
-
required(:active_job).schema do
|
48
|
-
required(:dispatcher)
|
49
|
-
required(:job_options_contract)
|
50
|
-
required(:consumer_class)
|
51
|
-
end
|
55
|
+
nested(:active_job) do
|
56
|
+
required(:dispatcher) { |val| !val.nil? }
|
57
|
+
required(:job_options_contract) { |val| !val.nil? }
|
58
|
+
required(:consumer_class) { |val| !val.nil? }
|
52
59
|
end
|
53
60
|
end
|
54
61
|
|
55
|
-
|
56
|
-
|
57
|
-
|
62
|
+
virtual do |data, errors|
|
63
|
+
next unless errors.empty?
|
64
|
+
|
65
|
+
detected_errors = []
|
58
66
|
|
59
|
-
|
67
|
+
data.fetch(:kafka).each_key do |key|
|
60
68
|
next if key.is_a?(Symbol)
|
61
69
|
|
62
|
-
|
70
|
+
detected_errors << [[:kafka, key], :key_must_be_a_symbol]
|
63
71
|
end
|
72
|
+
|
73
|
+
detected_errors
|
64
74
|
end
|
65
75
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
76
|
+
virtual do |data, errors|
|
77
|
+
next unless errors.empty?
|
78
|
+
|
79
|
+
pause_timeout = data.fetch(:pause_timeout)
|
80
|
+
pause_max_timeout = data.fetch(:pause_max_timeout)
|
81
|
+
|
82
|
+
next if pause_timeout <= pause_max_timeout
|
83
|
+
|
84
|
+
[[%i[pause_timeout], :max_timeout_vs_pause_max_timeout]]
|
70
85
|
end
|
71
86
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
87
|
+
virtual do |data, errors|
|
88
|
+
next unless errors.empty?
|
89
|
+
|
90
|
+
shutdown_timeout = data.fetch(:shutdown_timeout)
|
91
|
+
max_wait_time = data.fetch(:max_wait_time)
|
92
|
+
|
93
|
+
next if max_wait_time < shutdown_timeout
|
94
|
+
|
95
|
+
[[%i[shutdown_timeout], :shutdown_timeout_vs_max_wait_time]]
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -4,32 +4,39 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Contract for single full route (consumer group + topics) validation.
|
6
6
|
class ConsumerGroup < Base
|
7
|
-
|
8
|
-
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('consumer_group')
|
13
|
+
end
|
9
14
|
|
10
|
-
|
15
|
+
required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
|
16
|
+
required(:topics) { |topics| topics.is_a?(Array) && !topics.empty? }
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
required(:topics).value(:array, :filled?)
|
15
|
-
end
|
18
|
+
virtual do |data, errors|
|
19
|
+
next unless errors.empty?
|
16
20
|
|
17
|
-
|
18
|
-
if value.is_a?(Array)
|
19
|
-
names = value.map { |topic| topic[:name] }
|
21
|
+
names = data.fetch(:topics).map { |topic| topic[:name] }
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
+
next if names.size == names.uniq.size
|
24
|
+
|
25
|
+
[[%i[topics], :names_not_unique]]
|
23
26
|
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
virtual do |data, errors|
|
29
|
+
next unless errors.empty?
|
30
|
+
|
31
|
+
fetched_errors = []
|
32
|
+
|
33
|
+
data.fetch(:topics).each do |topic|
|
34
|
+
ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
|
35
|
+
fetched_errors << [[topic, key].flatten, value]
|
31
36
|
end
|
32
37
|
end
|
38
|
+
|
39
|
+
fetched_errors
|
33
40
|
end
|
34
41
|
end
|
35
42
|
end
|
@@ -4,24 +4,38 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Consumer group topic validation rules.
|
6
6
|
class ConsumerGroupTopic < Base
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
required(:initial_offset).filled(included_in?: %w[earliest latest])
|
14
|
-
required(:max_wait_time).filled { int? & gteq?(10) }
|
15
|
-
required(:manual_offset_management).filled(:bool?)
|
16
|
-
required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('consumer_group_topic')
|
17
13
|
end
|
18
14
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
15
|
+
required(:consumer) { |consumer_group| !consumer_group.nil? }
|
16
|
+
required(:deserializer) { |deserializer| !deserializer.nil? }
|
17
|
+
required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
|
18
|
+
required(:kafka) { |kafka| kafka.is_a?(Hash) && !kafka.empty? }
|
19
|
+
required(:max_messages) { |mm| mm.is_a?(Integer) && mm >= 1 }
|
20
|
+
required(:initial_offset) { |io| %w[earliest latest].include?(io) }
|
21
|
+
required(:max_wait_time) { |mwt| mwt.is_a?(Integer) && mwt >= 10 }
|
22
|
+
required(:manual_offset_management) { |mmm| [true, false].include?(mmm) }
|
23
|
+
required(:name) { |name| name.is_a?(String) && Contracts::TOPIC_REGEXP.match?(name) }
|
24
|
+
|
25
|
+
virtual do |data, errors|
|
26
|
+
next unless errors.empty?
|
27
|
+
|
28
|
+
value = data.fetch(:kafka)
|
29
|
+
|
30
|
+
begin
|
31
|
+
# This will trigger rdkafka validations that we catch and re-map the info and use dry
|
32
|
+
# compatible format
|
33
|
+
Rdkafka::Config.new(value).send(:native_config)
|
34
|
+
|
35
|
+
nil
|
36
|
+
rescue Rdkafka::Config::ConfigError => e
|
37
|
+
[[%w[kafka], e.message]]
|
38
|
+
end
|
25
39
|
end
|
26
40
|
end
|
27
41
|
end
|
@@ -4,17 +4,28 @@ module Karafka
|
|
4
4
|
module Contracts
|
5
5
|
# Contract for validating correctness of the server cli command options.
|
6
6
|
class ServerCliOptions < Base
|
7
|
-
|
8
|
-
|
7
|
+
configure do |config|
|
8
|
+
config.error_messages = YAML.safe_load(
|
9
|
+
File.read(
|
10
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
11
|
+
)
|
12
|
+
).fetch('en').fetch('validations').fetch('server_cli_options')
|
9
13
|
end
|
10
14
|
|
11
|
-
|
15
|
+
optional(:consumer_groups) { |cg| cg.is_a?(Array) && !cg.empty? }
|
16
|
+
|
17
|
+
virtual do |data, errors|
|
18
|
+
next unless errors.empty?
|
19
|
+
next unless data.key?(:consumer_groups)
|
20
|
+
|
21
|
+
value = data.fetch(:consumer_groups)
|
22
|
+
|
12
23
|
# If there were no consumer_groups declared in the server cli, it means that we will
|
13
24
|
# run all of them and no need to validate them here at all
|
14
|
-
if
|
15
|
-
|
16
|
-
|
17
|
-
|
25
|
+
next if value.nil?
|
26
|
+
next if (value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
|
27
|
+
|
28
|
+
[[%i[consumer_groups], :consumer_groups_inclusion]]
|
18
29
|
end
|
19
30
|
end
|
20
31
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Helpers
|
5
|
+
# Simple wrapper for adding colors to strings
|
6
|
+
module Colorize
|
7
|
+
# @param string [String] string we want to have in green
|
8
|
+
# @return [String] green string
|
9
|
+
def green(string)
|
10
|
+
"\033[0;32m#{string}\033[0m"
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param string [String] string we want to have in red
|
14
|
+
# @return [String] red string
|
15
|
+
def red(string)
|
16
|
+
"\033[0;31m#{string}\033[0m"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
# @param event [Dry::Events::Event] event details including payload
|
19
19
|
def on_connection_listener_fetch_loop(event)
|
20
20
|
listener = event[:caller]
|
21
|
-
|
21
|
+
debug "[#{listener.id}] Polling messages..."
|
22
22
|
end
|
23
23
|
|
24
24
|
# Logs about messages that we've received from Kafka
|
@@ -28,7 +28,13 @@ module Karafka
|
|
28
28
|
listener = event[:caller]
|
29
29
|
time = event[:time]
|
30
30
|
messages_count = event[:messages_buffer].size
|
31
|
-
|
31
|
+
|
32
|
+
message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
|
33
|
+
|
34
|
+
# We don't want the "polled 0" in dev as it would spam the log
|
35
|
+
# Instead we publish only info when there was anything we could poll and fail over to the
|
36
|
+
# zero notifications when in debug mode
|
37
|
+
messages_count.zero? ? debug(message) : info(message)
|
32
38
|
end
|
33
39
|
|
34
40
|
# Prints info about the fact that a given job has started
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|