karafka 2.0.0.beta5 → 2.0.0.rc3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +28 -0
  4. data/CONTRIBUTING.md +0 -5
  5. data/Gemfile.lock +12 -42
  6. data/LICENSE-COMM +1 -1
  7. data/README.md +44 -16
  8. data/bin/stress_many +1 -1
  9. data/bin/stress_one +1 -1
  10. data/config/errors.yml +52 -5
  11. data/docker-compose.yml +7 -0
  12. data/karafka.gemspec +2 -4
  13. data/lib/karafka/active_job/consumer.rb +2 -0
  14. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  15. data/lib/karafka/base_consumer.rb +4 -6
  16. data/lib/karafka/cli/install.rb +15 -2
  17. data/lib/karafka/cli/server.rb +4 -2
  18. data/lib/karafka/connection/client.rb +20 -17
  19. data/lib/karafka/connection/listener.rb +12 -24
  20. data/lib/karafka/connection/pauses_manager.rb +0 -8
  21. data/lib/karafka/contracts/base.rb +2 -8
  22. data/lib/karafka/contracts/config.rb +71 -51
  23. data/lib/karafka/contracts/consumer_group.rb +25 -18
  24. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  25. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  26. data/lib/karafka/helpers/colorize.rb +20 -0
  27. data/lib/karafka/instrumentation/logger_listener.rb +8 -2
  28. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  29. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
  32. data/lib/karafka/pro/base_consumer.rb +21 -12
  33. data/lib/karafka/pro/contracts/base.rb +21 -0
  34. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  35. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  36. data/lib/karafka/pro/loader.rb +23 -3
  37. data/lib/karafka/pro/processing/coordinator.rb +51 -0
  38. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  39. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  40. data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
  41. data/lib/karafka/processing/coordinator.rb +6 -2
  42. data/lib/karafka/processing/coordinators_buffer.rb +3 -7
  43. data/lib/karafka/processing/executor.rb +1 -1
  44. data/lib/karafka/processing/jobs_queue.rb +11 -0
  45. data/lib/karafka/processing/partitioner.rb +22 -0
  46. data/lib/karafka/processing/worker.rb +4 -2
  47. data/lib/karafka/setup/config.rb +9 -3
  48. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +2 -2
  51. data.tar.gz.sig +0 -0
  52. metadata +15 -34
  53. metadata.gz.sig +0 -0
@@ -18,15 +18,18 @@ module Karafka
18
18
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
19
19
  # @return [Karafka::Connection::Listener] listener instance
20
20
  def initialize(subscription_group, jobs_queue)
21
+ proc_config = ::Karafka::App.config.internal.processing
22
+
21
23
  @id = SecureRandom.uuid
22
24
  @subscription_group = subscription_group
23
25
  @jobs_queue = jobs_queue
24
- @jobs_builder = ::Karafka::App.config.internal.processing.jobs_builder
25
26
  @coordinators = Processing::CoordinatorsBuffer.new
26
27
  @client = Client.new(@subscription_group)
27
28
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
29
+ @jobs_builder = proc_config.jobs_builder
30
+ @partitioner = proc_config.partitioner_class.new(subscription_group)
28
31
  # We reference scheduler here as it is much faster than fetching this each time
29
- @scheduler = ::Karafka::App.config.internal.processing.scheduler
32
+ @scheduler = proc_config.scheduler
30
33
  # We keep one buffer for messages to preserve memory and not allocate extra objects
31
34
  # We can do this that way because we always first schedule jobs using messages before we
32
35
  # fetch another batch.
@@ -79,10 +82,6 @@ module Karafka
79
82
  poll_and_remap_messages
80
83
  end
81
84
 
82
- # This will ensure, that in the next poll, we continue processing (if we get them back)
83
- # partitions that we have paused
84
- resume_assigned_partitions
85
-
86
85
  # If there were revoked partitions, we need to wait on their jobs to finish before
87
86
  # distributing consuming jobs as upon revoking, we might get assigned to the same
88
87
  # partitions, thus getting their jobs. The revoking jobs need to finish before
@@ -159,8 +158,6 @@ module Karafka
159
158
 
160
159
  revoked_partitions.each do |topic, partitions|
161
160
  partitions.each do |partition|
162
- # We revoke the coordinator here, so we do not have to revoke it in the revoke job
163
- # itself (this happens prior to scheduling those jobs)
164
161
  @coordinators.revoke(topic, partition)
165
162
 
166
163
  # There may be a case where we have lost partition of which data we have never
@@ -204,17 +201,6 @@ module Karafka
204
201
  )
205
202
  end
206
203
 
207
- # Revoked partition needs to be resumed if we were processing them earlier. This will do
208
- # nothing to things that we are planning to process. Without this, things we get
209
- # re-assigned would not be polled.
210
- def resume_assigned_partitions
211
- @client.rebalance_manager.assigned_partitions.each do |topic, partitions|
212
- partitions.each do |partition|
213
- @client.resume(topic, partition)
214
- end
215
- end
216
- end
217
-
218
204
  # Takes the messages per topic partition and enqueues processing jobs in threads using
219
205
  # given scheduler.
220
206
  def build_and_schedule_consumption_jobs
@@ -226,14 +212,16 @@ module Karafka
226
212
  coordinator = @coordinators.find_or_create(topic, partition)
227
213
 
228
214
  # Start work coordination for this topic partition
229
- coordinator.start
215
+ coordinator.start(messages)
230
216
 
231
- # Count the job we're going to create here
232
- coordinator.increment
217
+ @partitioner.call(topic, messages) do |group_id, partition_messages|
218
+ # Count the job we're going to create here
219
+ coordinator.increment
233
220
 
234
- executor = @executors.find_or_create(topic, partition, 0)
221
+ executor = @executors.find_or_create(topic, partition, group_id)
235
222
 
236
- jobs << @jobs_builder.consume(executor, messages, coordinator)
223
+ jobs << @jobs_builder.consume(executor, partition_messages, coordinator)
224
+ end
237
225
  end
238
226
 
239
227
  @scheduler.schedule_consumption(@jobs_queue, jobs)
@@ -25,14 +25,6 @@ module Karafka
25
25
  )
26
26
  end
27
27
 
28
- # Revokes pause tracker for a given topic partition
29
- #
30
- # @param topic [String] topic name
31
- # @param partition [Integer] partition number
32
- def revoke(topic, partition)
33
- @pauses[topic].delete(partition)
34
- end
35
-
36
28
  # Resumes processing of partitions for which pause time has ended.
37
29
  #
38
30
  # @yieldparam [String] topic name
@@ -3,20 +3,14 @@
3
3
  module Karafka
4
4
  module Contracts
5
5
  # Base contract for all Karafka contracts
6
- class Base < Dry::Validation::Contract
7
- config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
8
-
6
+ class Base < ::WaterDrop::Contractable::Contract
9
7
  # @param data [Hash] data for validation
10
8
  # @return [Boolean] true if all good
11
9
  # @raise [Errors::InvalidConfigurationError] invalid configuration error
12
10
  # @note We use contracts only in the config validation context, so no need to add support
13
11
  # for multiple error classes. It will be added when it will be needed.
14
12
  def validate!(data)
15
- result = call(data)
16
-
17
- return true if result.success?
18
-
19
- raise Errors::InvalidConfigurationError, result.errors.to_h
13
+ super(data, Errors::InvalidConfigurationError)
20
14
  end
21
15
  end
22
16
  end
@@ -9,70 +9,90 @@ module Karafka
9
9
  # validated per each route (topic + consumer_group) because they can be overwritten,
10
10
  # so we validate all of that once all the routes are defined and ready.
11
11
  class Config < Base
12
- params do
13
- # License validity happens in the licenser. Here we do only the simple consistency checks
14
- required(:license).schema do
15
- required(:token) { bool? | str? }
16
- required(:entity) { str? }
17
- required(:expires_on) { date? }
12
+ configure do |config|
13
+ config.error_messages = YAML.safe_load(
14
+ File.read(
15
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
16
+ )
17
+ ).fetch('en').fetch('validations').fetch('config')
18
+ end
19
+
20
+ # License validity happens in the licenser. Here we do only the simple consistency checks
21
+ nested(:license) do
22
+ required(:token) { |val| [true, false].include?(val) || val.is_a?(String) }
23
+ required(:entity) { |val| val.is_a?(String) }
24
+ required(:expires_on) { |val| val.is_a?(Date) }
25
+ end
26
+
27
+ required(:client_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
28
+ required(:concurrency) { |val| val.is_a?(Integer) && val.positive? }
29
+ required(:consumer_mapper) { |val| !val.nil? }
30
+ required(:consumer_persistence) { |val| [true, false].include?(val) }
31
+ required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
32
+ required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
33
+ required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
34
+ required(:shutdown_timeout) { |val| val.is_a?(Integer) && val.positive? }
35
+ required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
36
+ required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
37
+
38
+ # We validate internals just to be sure, that they are present and working
39
+ nested(:internal) do
40
+ required(:status) { |val| !val.nil? }
41
+ required(:process) { |val| !val.nil? }
42
+
43
+ nested(:routing) do
44
+ required(:builder) { |val| !val.nil? }
45
+ required(:subscription_groups_builder) { |val| !val.nil? }
46
+ end
47
+
48
+ nested(:processing) do
49
+ required(:jobs_builder) { |val| !val.nil? }
50
+ required(:scheduler) { |val| !val.nil? }
51
+ required(:coordinator_class) { |val| !val.nil? }
52
+ required(:partitioner_class) { |val| !val.nil? }
18
53
  end
19
54
 
20
- required(:client_id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
21
- required(:concurrency) { int? & gt?(0) }
22
- required(:consumer_mapper).filled
23
- required(:consumer_persistence).filled(:bool?)
24
- required(:pause_timeout) { int? & gt?(0) }
25
- required(:pause_max_timeout) { int? & gt?(0) }
26
- required(:pause_with_exponential_backoff).filled(:bool?)
27
- required(:shutdown_timeout) { int? & gt?(0) }
28
- required(:max_wait_time) { int? & gt?(0) }
29
- required(:kafka).filled(:hash)
30
-
31
- # We validate internals just to be sure, that they are present and working
32
- required(:internal).schema do
33
- required(:status)
34
- required(:process)
35
-
36
- required(:routing).schema do
37
- required(:builder)
38
- required(:subscription_groups_builder)
39
- end
40
-
41
- required(:processing).schema do
42
- required(:jobs_builder)
43
- required(:scheduler)
44
- required(:coordinator_class)
45
- end
46
-
47
- required(:active_job).schema do
48
- required(:dispatcher)
49
- required(:job_options_contract)
50
- required(:consumer_class)
51
- end
55
+ nested(:active_job) do
56
+ required(:dispatcher) { |val| !val.nil? }
57
+ required(:job_options_contract) { |val| !val.nil? }
58
+ required(:consumer_class) { |val| !val.nil? }
52
59
  end
53
60
  end
54
61
 
55
- # rdkafka requires all the keys to be strings, so we ensure that
56
- rule(:kafka) do
57
- next unless value.is_a?(Hash)
62
+ virtual do |data, errors|
63
+ next unless errors.empty?
64
+
65
+ detected_errors = []
58
66
 
59
- value.each_key do |key|
67
+ data.fetch(:kafka).each_key do |key|
60
68
  next if key.is_a?(Symbol)
61
69
 
62
- key(:"kafka.#{key}").failure(:kafka_key_must_be_a_symbol)
70
+ detected_errors << [[:kafka, key], :key_must_be_a_symbol]
63
71
  end
72
+
73
+ detected_errors
64
74
  end
65
75
 
66
- rule(:pause_timeout, :pause_max_timeout) do
67
- if values[:pause_timeout].to_i > values[:pause_max_timeout].to_i
68
- key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
69
- end
76
+ virtual do |data, errors|
77
+ next unless errors.empty?
78
+
79
+ pause_timeout = data.fetch(:pause_timeout)
80
+ pause_max_timeout = data.fetch(:pause_max_timeout)
81
+
82
+ next if pause_timeout <= pause_max_timeout
83
+
84
+ [[%i[pause_timeout], :max_timeout_vs_pause_max_timeout]]
70
85
  end
71
86
 
72
- rule(:shutdown_timeout, :max_wait_time) do
73
- if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
74
- key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
75
- end
87
+ virtual do |data, errors|
88
+ next unless errors.empty?
89
+
90
+ shutdown_timeout = data.fetch(:shutdown_timeout)
91
+ max_wait_time = data.fetch(:max_wait_time)
92
+
93
+ next if max_wait_time < shutdown_timeout
94
+
95
+ [[%i[shutdown_timeout], :shutdown_timeout_vs_max_wait_time]]
76
96
  end
77
97
  end
78
98
  end
@@ -4,32 +4,39 @@ module Karafka
4
4
  module Contracts
5
5
  # Contract for single full route (consumer group + topics) validation.
6
6
  class ConsumerGroup < Base
7
- # Internal contract for sub-validating topics schema
8
- TOPIC_CONTRACT = ConsumerGroupTopic.new.freeze
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('consumer_group')
13
+ end
9
14
 
10
- private_constant :TOPIC_CONTRACT
15
+ required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
16
+ required(:topics) { |topics| topics.is_a?(Array) && !topics.empty? }
11
17
 
12
- params do
13
- required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
14
- required(:topics).value(:array, :filled?)
15
- end
18
+ virtual do |data, errors|
19
+ next unless errors.empty?
16
20
 
17
- rule(:topics) do
18
- if value.is_a?(Array)
19
- names = value.map { |topic| topic[:name] }
21
+ names = data.fetch(:topics).map { |topic| topic[:name] }
20
22
 
21
- key.failure(:topics_names_not_unique) if names.size != names.uniq.size
22
- end
23
+ next if names.size == names.uniq.size
24
+
25
+ [[%i[topics], :names_not_unique]]
23
26
  end
24
27
 
25
- rule(:topics) do
26
- if value.is_a?(Array)
27
- value.each_with_index do |topic, index|
28
- TOPIC_CONTRACT.call(topic).errors.each do |error|
29
- key([:topics, index, error.path[0]]).failure(error.text)
30
- end
28
+ virtual do |data, errors|
29
+ next unless errors.empty?
30
+
31
+ fetched_errors = []
32
+
33
+ data.fetch(:topics).each do |topic|
34
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
35
+ fetched_errors << [[topic, key].flatten, value]
31
36
  end
32
37
  end
38
+
39
+ fetched_errors
33
40
  end
34
41
  end
35
42
  end
@@ -4,24 +4,38 @@ module Karafka
4
4
  module Contracts
5
5
  # Consumer group topic validation rules.
6
6
  class ConsumerGroupTopic < Base
7
- params do
8
- required(:consumer).filled
9
- required(:deserializer).filled
10
- required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
11
- required(:kafka).filled
12
- required(:max_messages) { int? & gteq?(1) }
13
- required(:initial_offset).filled(included_in?: %w[earliest latest])
14
- required(:max_wait_time).filled { int? & gteq?(10) }
15
- required(:manual_offset_management).filled(:bool?)
16
- required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('consumer_group_topic')
17
13
  end
18
14
 
19
- rule(:kafka) do
20
- # This will trigger rdkafka validations that we catch and re-map the info and use dry
21
- # compatible format
22
- Rdkafka::Config.new(value).send(:native_config)
23
- rescue Rdkafka::Config::ConfigError => e
24
- key(:kafka).failure(e.message)
15
+ required(:consumer) { |consumer_group| !consumer_group.nil? }
16
+ required(:deserializer) { |deserializer| !deserializer.nil? }
17
+ required(:id) { |id| id.is_a?(String) && Contracts::TOPIC_REGEXP.match?(id) }
18
+ required(:kafka) { |kafka| kafka.is_a?(Hash) && !kafka.empty? }
19
+ required(:max_messages) { |mm| mm.is_a?(Integer) && mm >= 1 }
20
+ required(:initial_offset) { |io| %w[earliest latest].include?(io) }
21
+ required(:max_wait_time) { |mwt| mwt.is_a?(Integer) && mwt >= 10 }
22
+ required(:manual_offset_management) { |mmm| [true, false].include?(mmm) }
23
+ required(:name) { |name| name.is_a?(String) && Contracts::TOPIC_REGEXP.match?(name) }
24
+
25
+ virtual do |data, errors|
26
+ next unless errors.empty?
27
+
28
+ value = data.fetch(:kafka)
29
+
30
+ begin
31
+ # This will trigger rdkafka validations that we catch and re-map the info and use dry
32
+ # compatible format
33
+ Rdkafka::Config.new(value).send(:native_config)
34
+
35
+ nil
36
+ rescue Rdkafka::Config::ConfigError => e
37
+ [[%w[kafka], e.message]]
38
+ end
25
39
  end
26
40
  end
27
41
  end
@@ -4,17 +4,28 @@ module Karafka
4
4
  module Contracts
5
5
  # Contract for validating correctness of the server cli command options.
6
6
  class ServerCliOptions < Base
7
- params do
8
- optional(:consumer_groups).value(:array, :filled?)
7
+ configure do |config|
8
+ config.error_messages = YAML.safe_load(
9
+ File.read(
10
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
11
+ )
12
+ ).fetch('en').fetch('validations').fetch('server_cli_options')
9
13
  end
10
14
 
11
- rule(:consumer_groups) do
15
+ optional(:consumer_groups) { |cg| cg.is_a?(Array) && !cg.empty? }
16
+
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:consumer_groups)
20
+
21
+ value = data.fetch(:consumer_groups)
22
+
12
23
  # If there were no consumer_groups declared in the server cli, it means that we will
13
24
  # run all of them and no need to validate them here at all
14
- if !value.nil? &&
15
- !(value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
16
- key(:consumer_groups).failure(:consumer_groups_inclusion)
17
- end
25
+ next if value.nil?
26
+ next if (value - Karafka::App.config.internal.routing.builder.map(&:name)).empty?
27
+
28
+ [[%i[consumer_groups], :consumer_groups_inclusion]]
18
29
  end
19
30
  end
20
31
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Simple wrapper for adding colors to strings
6
+ module Colorize
7
+ # @param string [String] string we want to have in green
8
+ # @return [String] green string
9
+ def green(string)
10
+ "\033[0;32m#{string}\033[0m"
11
+ end
12
+
13
+ # @param string [String] string we want to have in red
14
+ # @return [String] red string
15
+ def red(string)
16
+ "\033[0;31m#{string}\033[0m"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  # @param event [Dry::Events::Event] event details including payload
19
19
  def on_connection_listener_fetch_loop(event)
20
20
  listener = event[:caller]
21
- info "[#{listener.id}] Polling messages..."
21
+ debug "[#{listener.id}] Polling messages..."
22
22
  end
23
23
 
24
24
  # Logs about messages that we've received from Kafka
@@ -28,7 +28,13 @@ module Karafka
28
28
  listener = event[:caller]
29
29
  time = event[:time]
30
30
  messages_count = event[:messages_buffer].size
31
- info "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
31
+
32
+ message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
33
+
34
+ # We don't want the "polled 0" in dev as it would spam the log
35
+ # Instead we publish only info when there was anything we could poll and fail over to the
36
+ # zero notifications when in debug mode
37
+ messages_count.zero? ? debug(message) : info(message)
32
38
  end
33
39
 
34
40
  # Prints info about the fact that a given job has started
@@ -0,0 +1 @@
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}