karafka 2.0.0.beta5 → 2.0.0.rc3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +28 -0
  4. data/CONTRIBUTING.md +0 -5
  5. data/Gemfile.lock +12 -42
  6. data/LICENSE-COMM +1 -1
  7. data/README.md +44 -16
  8. data/bin/stress_many +1 -1
  9. data/bin/stress_one +1 -1
  10. data/config/errors.yml +52 -5
  11. data/docker-compose.yml +7 -0
  12. data/karafka.gemspec +2 -4
  13. data/lib/karafka/active_job/consumer.rb +2 -0
  14. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  15. data/lib/karafka/base_consumer.rb +4 -6
  16. data/lib/karafka/cli/install.rb +15 -2
  17. data/lib/karafka/cli/server.rb +4 -2
  18. data/lib/karafka/connection/client.rb +20 -17
  19. data/lib/karafka/connection/listener.rb +12 -24
  20. data/lib/karafka/connection/pauses_manager.rb +0 -8
  21. data/lib/karafka/contracts/base.rb +2 -8
  22. data/lib/karafka/contracts/config.rb +71 -51
  23. data/lib/karafka/contracts/consumer_group.rb +25 -18
  24. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  25. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  26. data/lib/karafka/helpers/colorize.rb +20 -0
  27. data/lib/karafka/instrumentation/logger_listener.rb +8 -2
  28. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  29. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
  32. data/lib/karafka/pro/base_consumer.rb +21 -12
  33. data/lib/karafka/pro/contracts/base.rb +21 -0
  34. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  35. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  36. data/lib/karafka/pro/loader.rb +23 -3
  37. data/lib/karafka/pro/processing/coordinator.rb +51 -0
  38. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  39. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  40. data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
  41. data/lib/karafka/processing/coordinator.rb +6 -2
  42. data/lib/karafka/processing/coordinators_buffer.rb +3 -7
  43. data/lib/karafka/processing/executor.rb +1 -1
  44. data/lib/karafka/processing/jobs_queue.rb +11 -0
  45. data/lib/karafka/processing/partitioner.rb +22 -0
  46. data/lib/karafka/processing/worker.rb +4 -2
  47. data/lib/karafka/setup/config.rb +9 -3
  48. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +2 -2
  51. data.tar.gz.sig +0 -0
  52. metadata +15 -34
  53. metadata.gz.sig +0 -0
@@ -18,15 +18,18 @@ module Karafka
18
18
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
19
19
  # @return [Karafka::Connection::Listener] listener instance
20
20
  def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
21
23
  @id = SecureRandom.uuid
22
24
  @subscription_group = subscription_group
23
25
  @jobs_queue = jobs_queue
24
- @jobs_builder = ::Karafka::App.config.internal.processing.jobs_builder
25
26
  @coordinators = Processing::CoordinatorsBuffer.new
26
27
  @client = Client.new(@subscription_group)
27
28
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
28
31
  # We reference scheduler here as it is much faster than fetching this each time
29
- @scheduler = ::Karafka::App.config.internal.processing.scheduler
32
+ @scheduler = proc_config.scheduler
30
33
  # We keep one buffer for messages to preserve memory and not allocate extra objects
31
34
  # We can do this that way because we always first schedule jobs using messages before we
32
35
  # fetch another batch.
@@ -79,10 +82,6 @@ module Karafka
79
82
  poll_and_remap_messages
80
83
  end
81
84
 
82
- # This will ensure, that in the next poll, we continue processing (if we get them back)
83
- # partitions that we have paused
84
- resume_assigned_partitions
85
-
86
85
  # If there were revoked partitions, we need to wait on their jobs to finish before
87
86
  # distributing consuming jobs as upon revoking, we might get assigned to the same
88
87
  # partitions, thus getting their jobs. The revoking jobs need to finish before
@@ -159,8 +158,6 @@ module Karafka
159
158
 
160
159
  revoked_partitions.each do |topic, partitions|
161
160
  partitions.each do |partition|
162
- # We revoke the coordinator here, so we do not have to revoke it in the revoke job
163
- # itself (this happens prior to scheduling those jobs)
164
161
  @coordinators.revoke(topic, partition)
165
162
 
166
163
  # There may be a case where we have lost partition of which data we have never
@@ -204,17 +201,6 @@ module Karafka
204
201
  )
205
202
  end
206
203
 
207
- # Revoked partition needs to be resumed if we were processing them earlier. This will do
208
- # nothing to things that we are planning to process. Without this, things we get
209
- # re-assigned would not be polled.
210
- def resume_assigned_partitions
211
- @client.rebalance_manager.assigned_partitions.each do |topic, partitions|
212
- partitions.each do |partition|
213
- @client.resume(topic, partition)
214
- end
215
- end
216
- end
217
-
218
204
  # Takes the messages per topic partition and enqueues processing jobs in threads using
219
205
  # given scheduler.
220
206
  def build_and_schedule_consumption_jobs
@@ -226,14 +212,16 @@ module Karafka
226
212
  coordinator = @coordinators.find_or_create(topic, partition)
227
213
 
228
214
  # Start work coordination for this topic partition
229
- coordinator.start
215
+ coordinator.start(messages)
230
216
 
231
- # Count the job we're going to create here
232
- coordinator.increment
217
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
218
+ # Count the job we're going to create here
219
+ coordinator.increment
233
220
 
234
- executor = @executors.find_or_create(topic, partition, 0)
221
+ executor = @executors.find_or_create(topic, partition, group_id)
235
222
 
236
- jobs << @jobs_builder.consume(executor, messages, coordinator)
223
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
224
+ end
237
225
  end
238
226
 
239
227
  @scheduler.schedule_consumption(@jobs_queue, jobs)
@@ -25,14 +25,6 @@ module Karafka
25
25
  )
26
26
  end
27
27
 
28
- # Revokes pause tracker for a given topic partition
29
- #
30
- # @param topic [String] topic name
31
- # @param partition [Integer] partition number
32
- def revoke(topic, partition)
33
- @pauses[topic].delete(partition)
34
- end
35
-
36
28
  # Resumes processing of partitions for which pause time has ended.
37
29
  #
38
30
  # @yieldparam [String] topic name
@@ -3,20 +3,14 @@
3
3
  module Karafka
4
4
  module Contracts
5
5
  # Base contract for all Karafka contracts
6
- class Base < Dry::Validation::Contract
7
- config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
8
-
6
+ class Base < ::WaterDrop::Contractable::Contract
9
7
  # @param data [Hash] data for validation
10
8
  # @return [Boolean] true if all good
11
9
  # @raise [Errors::InvalidConfigurationError] invalid configuration error
12
10
  # @note We use contracts only in the config validation context, so no need to add support
13
11
  # for multiple error classes. It will be added when it will be needed.
14
12
  def validate!(data)
15
- result = call(data)
16
-
17
- return true if result.success?
18
-
19
- raise Errors::InvalidConfigurationError, result.errors.to_h
13
+ super(data, Errors::InvalidConfigurationError)
20
14
  end
21
15
  end
22
16
  end
@@ -9,70 +9,90 @@ module Karafka
9
9
  # validated per each route (topic + consumer_group) because they can be overwritten,
10
10
  # so we validate all of that once all the routes are defined and ready.
11
11
  class Config < Base
12
- params do
13
- # License validity happens in the licenser. Here we do only the simple consistency checks
14
- required(:license).schema do
15
- required(:token) { bool? | str? }
16
- required(:entity) { str? }
17
- required(:expires_on) { date? }
12
+ configure do |config|
13
+ config.error_messages = YAML.safe_load(
14
+ File.read(
15
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
16
+ )
17
+ ).fetch('en').fetch('validations').fetch('config')
18
+ end
19
+
20
+ # License validity happens in the licenser. Here we do only the simple consistency checks
21
+ nested(:license) do
22
+ required(:token) { |val| [true, false].include?(val) || val.is_a?(String) }
23
+ required(:entity) { |val| val.is_a?(String) }
24
+ required(:expires_on) { |val| val.is_a?(Date) }
25
+ end
26
+
27
+ required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
28
+ required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
29
+ required(:consumer_mapper) { |val| !val.nil? }
30
+ required(:consumer_persistence) { |val| [true, false].include?(val) }
31
+ required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
32
+ required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
33
+ required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
34
+ required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
35
+ required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
36
+ required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
37
+
38
+ # We validate internals just to be sure, that they are present and working
39
+ nested(:internal) do
40
+ required(:status) { |val| !val.nil? }
41
+ required(:process) { |val| !val.nil? }
42
+
43
+ nested(:routing) do
44
+ required(:builder) { |val| !val.nil? }
45
+ required(:subscription_groups_builder) { |val| !val.nil? }
46
+ end
47
+
48
+ nested(:processing) do
49
+ required(:jobs_builder) { |val| !val.nil? }
50
+ required(:scheduler) { |val| !val.nil? }
51
+ required(:coordinator_class) { |val| !val.nil? }
52
+ required(:partitioner_class) { |val| !val.nil? }
18
53
  end
19
54
 
20
- required(:client_id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
21
- required(:concurrency) { int? & gt?(0) }
22
- required(:consumer_mapper).filled
23
- required(:consumer_persistence).filled(:bool?)
24
- required(:pause_timeout) { int? & gt?(0) }
25
- required(:pause_max_timeout) { int? & gt?(0) }
26
- required(:pause_with_exponential_backoff).filled(:bool?)
27
- required(:shutdown_timeout) { int? & gt?(0) }
28
- required(:max_wait_time) { int? & gt?(0) }
29
- required(:kafka).filled(:hash)
30
-
31
- # We validate internals just to be sure, that they are present and working
32
- required(:internal).schema do
33
- required(:status)
34
- required(:process)
35
-
36
- required(:routing).schema do
37
- required(:builder)
38
- required(:subscription_groups_builder)
39
- end
40
-
41
- required(:processing).schema do
42
- required(:jobs_builder)
43
- required(:scheduler)
44
- required(:coordinator_class)
45
- end
46
-
47
- required(:active_job).schema do
48
- required(:dispatcher)
49
- required(:job_options_contract)
50
- required(:consumer_class)
51
- end
55
+ nested(:active_job) do
56
+ required(:dispatcher) { |val| !val.nil? }
57
+ required(:job_options_contract) { |val| !val.nil? }
58
+ required(:consumer_class) { |val| !val.nil? }
52
59
  end
53
60
  end
54
61
 
55
- # rdkafka requires all the keys to be strings, so we ensure that
56
- rule(:kafka) do
57
- next unless value.is_a?(Hash)
62
+ virtual do |data, errors|
63
+ next unless errors.empty?
64
+
65
+ detected_errors = []
58
66
 
59
- value.each_key do |key|
67
+ data.fetch(:kafka).each_key do |key|
60
68
  next if key.is_a?(Symbol)
61
69
 
62
- key(:"kafka.#{key}").failure(:kafka_key_must_be_a_symbol)
70
+ detected_errors << [[:kafka, key], :key_must_be_a_symbol]
63
71
  end
72
+
73
+ detected_errors
64
74
  end
65
75
 
66
- rule(:pause_timeout, :pause_max_timeout) do
67
- if values[:pause_timeout].to_i > values[:pause_max_timeout].to_i
68
- key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
69
- end
76
+ virtual do |data, errors|
77
+ next unless errors.empty?
78
+
79
+ pause_timeout = data.fetch(:pause_timeout)
80
+ pause_max_timeout = data.fetch(:pause_max_timeout)
81
+
82
+ next if pause_timeout <= pause_max_timeout
83
+
84
+ [[%i[pause_timeout], :max_timeout_vs_pause_max_timeout]]
70
85
  end
71
86
 
72
- rule(:shutdown_timeout, :max_wait_time) do
73
- if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
74
- key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
75
- end
87
+ virtual do |data, errors|
88
+ next unless errors.empty?
89
+
90
+ shutdown_timeout = data.fetch(:shutdown_timeout)
91
+ max_wait_time = data.fetch(:max_wait_time)
92
+
93
+ next if max_wait_time < shutdown_timeout
94
+
95
+ [[%i[shutdown_timeout], :shutdown_timeout_vs_max_wait_time]]
76
96
  end
77
97
  end
78
98
  end
@@ -4,32 +4,39 @@ module Karafka
4
4
  module Contracts
5
5
  # Contract for single full route (consumer group + topics) validation.
6
6
  class ConsumerGroup < Base
7
- # Internal contract for sub-validating topics schema
8
- TOPIC_CONTRACT = ConsumerGroupTopic.new.freeze
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('consumer_group')
13
+ end
9
14
 
10
- private_constant :TOPIC_CONTRACT
15
+ required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
16
+ required(:topics) { |topics| topics.is_a?(Array) && !topics.empty? }
11
17
 
12
- params do
13
- required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
14
- required(:topics).value(:array, :filled?)
15
- end
18
+ virtual do |data, errors|
19
+ next unless errors.empty?
16
20
 
17
- rule(:topics) do
18
- if value.is_a?(Array)
19
- names = value.map { |topic| topic[:name] }
21
+ names = data.fetch(:topics).map { |topic| topic[:name] }
20
22
 
21
- key.failure(:topics_names_not_unique) if names.size != names.uniq.size
22
- end
23
+ next if names.size == names.uniq.size
24
+
25
+ [[%i[topics], :names_not_unique]]
23
26
  end
24
27
 
25
- rule(:topics) do
26
- if value.is_a?(Array)
27
- value.each_with_index do |topic, index|
28
- TOPIC_CONTRACT.call(topic).errors.each do |error|
29
- key([:topics, index, error.path[0]]).failure(error.text)
30
- end
28
+ virtual do |data, errors|
29
+ next unless errors.empty?
30
+
31
+ fetched_errors = []
32
+
33
+ data.fetch(:topics).each do |topic|
34
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
35
+ fetched_errors << [[topic, key].flatten, value]
31
36
  end
32
37
  end
38
+
39
+ fetched_errors
33
40
  end
34
41
  end
35
42
  end
@@ -4,24 +4,38 @@ module Karafka
4
4
  module Contracts
5
5
  # Consumer group topic validation rules.
6
6
  class ConsumerGroupTopic < Base
7
- params do
8
- required(:consumer).filled
9
- required(:deserializer).filled
10
- required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
11
- required(:kafka).filled
12
- required(:max_messages) { int? & gteq?(1) }
13
- required(:initial_offset).filled(included_in?: %w[earliest latest])
14
- required(:max_wait_time).filled { int? & gteq?(10) }
15
- required(:manual_offset_management).filled(:bool?)
16
- required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('consumer_group_topic')
17
13
  end
18
14
 
19
- rule(:kafka) do
20
- # This will trigger rdkafka validations that we catch and re-map the info and use dry
21
- # compatible format
22
- Rdkafka::Config.new(value).send(:native_config)
23
- rescue Rdkafka::Config::ConfigError => e
24
- key(:kafka).failure(e.message)
15
+ required(:consumer) { |consumer_group| !consumer_group.nil? }
16
+ required(:deserializer) { |deserializer| !deserializer.nil? }
17
+ required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
18
+ required(:kafka) { |kafka| kafka.is_a?(Hash) && !kafka.empty? }
19
+ required(:max_messages) { |mm| mm.is_a?(Integer) && mm >= 1 }
20
+ required(:initial_offset) { |io| %w[earliest latest].include?(io) }
21
+ required(:max_wait_time) { |mwt| mwt.is_a?(Integer) && mwt >= 10 }
22
+ required(:manual_offset_management) { |mmm| [true, false].include?(mmm) }
23
+ required(:name) { |name| name.is_a?(String) && Contracts::TOPIC_REGEXP.match?(name) }
24
+
25
+ virtual do |data, errors|
26
+ next unless errors.empty?
27
+
28
+ value = data.fetch(:kafka)
29
+
30
+ begin
31
+ # This will trigger rdkafka validations that we catch and re-map the info and use dry
32
+ # compatible format
33
+ Rdkafka::Config.new(value).send(:native_config)
34
+
35
+ nil
36
+ rescue Rdkafka::Config::ConfigError => e
37
+ [[%w[kafka], e.message]]
38
+ end
25
39
  end
26
40
  end
27
41
  end
@@ -4,17 +4,28 @@ module Karafka
4
4
  module Contracts
5
5
  # Contract for validating correctness of the server cli command options.
6
6
  class ServerCliOptions < Base
7
- params do
8
- optional(:consumer_groups).value(:array, :filled?)
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('server_cli_options')
9
13
  end
10
14
 
11
- rule(:consumer_groups) do
15
+ optional(:consumer_groups) { |cg| cg.is_a?(Array) && !cg.empty? }
16
+
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:consumer_groups)
20
+
21
+ value = data.fetch(:consumer_groups)
22
+
12
23
  # If there were no consumer_groups declared in the server cli, it means that we will
13
24
  # run all of them and no need to validate them here at all
14
- if !value.nil? &&
15
- !(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
16
- key(:consumer_groups).failure(:consumer_groups_inclusion)
17
- end
25
+ next if value.nil?
26
+ next if (value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
27
+
28
+ [[%i[consumer_groups], :consumer_groups_inclusion]]
18
29
  end
19
30
  end
20
31
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Simple wrapper for adding colors to strings
6
+ module Colorize
7
+ # @param string [String] string we want to have in green
8
+ # @return [String] green string
9
+ def green(string)
10
+ "\033[0;32m#{string}\033[0m"
11
+ end
12
+
13
+ # @param string [String] string we want to have in red
14
+ # @return [String] red string
15
+ def red(string)
16
+ "\033[0;31m#{string}\033[0m"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  # @param event [Dry::Events::Event] event details including payload
19
19
  def on_connection_listener_fetch_loop(event)
20
20
  listener = event[:caller]
21
- info "[#{listener.id}] Polling messages..."
21
+ debug "[#{listener.id}] Polling messages..."
22
22
  end
23
23
 
24
24
  # Logs about messages that we've received from Kafka
@@ -28,7 +28,13 @@ module Karafka
28
28
  listener = event[:caller]
29
29
  time = event[:time]
30
30
  messages_count = event[:messages_buffer].size
31
- info "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
31
+
32
+ message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
33
+
34
+ # We don't want the "polled 0" in dev as it would spam the log
35
+ # Instead we publish only info when there was anything we could poll and fail over to the
36
+ # zero notifications when in debug mode
37
+ messages_count.zero? ? debug(message) : info(message)
32
38
  end
33
39
 
34
40
  # Prints info about the fact that a given job has started
@@ -0,0 +1 @@
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}