karafka-web 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +60 -6
  4. data/Gemfile.lock +14 -14
  5. data/karafka-web.gemspec +3 -3
  6. data/lib/karafka/web/config.rb +11 -5
  7. data/lib/karafka/web/installer.rb +2 -3
  8. data/lib/karafka/web/tracking/consumers/contracts/consumer_group.rb +1 -1
  9. data/lib/karafka/web/tracking/consumers/contracts/job.rb +4 -1
  10. data/lib/karafka/web/tracking/consumers/contracts/partition.rb +1 -1
  11. data/lib/karafka/web/tracking/consumers/contracts/report.rb +1 -1
  12. data/lib/karafka/web/tracking/consumers/contracts/subscription_group.rb +1 -1
  13. data/lib/karafka/web/tracking/consumers/contracts/topic.rb +3 -1
  14. data/lib/karafka/web/tracking/consumers/listeners/base.rb +2 -2
  15. data/lib/karafka/web/tracking/consumers/listeners/errors.rb +8 -44
  16. data/lib/karafka/web/tracking/consumers/listeners/processing.rb +5 -0
  17. data/lib/karafka/web/tracking/consumers/reporter.rb +151 -0
  18. data/lib/karafka/web/tracking/contracts/base.rb +34 -0
  19. data/lib/karafka/web/tracking/contracts/error.rb +31 -0
  20. data/lib/karafka/web/tracking/helpers/error_info.rb +50 -0
  21. data/lib/karafka/web/tracking/memoized_shell.rb +1 -1
  22. data/lib/karafka/web/tracking/producers/listeners/base.rb +33 -0
  23. data/lib/karafka/web/tracking/producers/listeners/errors.rb +66 -0
  24. data/lib/karafka/web/tracking/producers/listeners/reporter.rb +21 -0
  25. data/lib/karafka/web/tracking/producers/reporter.rb +101 -0
  26. data/lib/karafka/web/tracking/producers/sampler.rb +42 -0
  27. data/lib/karafka/web/ui/controllers/consumers.rb +2 -4
  28. data/lib/karafka/web/ui/models/counters.rb +51 -0
  29. data/lib/karafka/web/ui/pro/controllers/consumers.rb +2 -3
  30. data/lib/karafka/web/ui/pro/views/consumers/consumer/_job.erb +6 -6
  31. data/lib/karafka/web/ui/pro/views/consumers/index.erb +25 -21
  32. data/lib/karafka/web/ui/pro/views/consumers/jobs.erb +1 -1
  33. data/lib/karafka/web/ui/pro/views/errors/_breadcrumbs.erb +1 -2
  34. data/lib/karafka/web/ui/pro/views/errors/_error.erb +8 -6
  35. data/lib/karafka/web/ui/pro/views/errors/show.erb +3 -2
  36. data/lib/karafka/web/ui/public/stylesheets/application.css +4 -0
  37. data/lib/karafka/web/ui/views/consumers/_no_consumers.erb +9 -0
  38. data/lib/karafka/web/ui/views/consumers/index.erb +24 -20
  39. data/lib/karafka/web/ui/views/errors/_breadcrumbs.erb +1 -2
  40. data/lib/karafka/web/ui/views/errors/_detail.erb +9 -1
  41. data/lib/karafka/web/ui/views/errors/_error.erb +8 -6
  42. data/lib/karafka/web/ui/views/errors/show.erb +50 -2
  43. data/lib/karafka/web/ui/views/shared/_feature_pro.erb +4 -0
  44. data/lib/karafka/web/ui/views/shared/_pagination.erb +8 -2
  45. data/lib/karafka/web/ui/views/shared/exceptions/pro_only.erb +0 -4
  46. data/lib/karafka/web/version.rb +1 -1
  47. data.tar.gz.sig +0 -0
  48. metadata +26 -16
  49. metadata.gz.sig +0 -0
  50. data/lib/karafka/web/tracking/base_contract.rb +0 -31
  51. data/lib/karafka/web/tracking/reporter.rb +0 -144
  52. data/lib/karafka/web/ui/pro/views/consumers/_summary.erb +0 -81
  53. data/lib/karafka/web/ui/pro/views/errors/_cleaned.erb +0 -3
  54. data/lib/karafka/web/ui/pro/views/errors/_detail.erb +0 -31
  55. data/lib/karafka/web/ui/pro/views/errors/_no_errors.erb +0 -3
  56. data/lib/karafka/web/ui/pro/views/jobs/_breadcrumbs.erb +0 -5
  57. data/lib/karafka/web/ui/views/consumers/_breadcrumbs.erb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9696cc8cfbcb6124155dbac2a3c92cadca69bd1acec727418874ecdd24f49de1
4
- data.tar.gz: eb60dcbf83ba9cbd8f542b2e536e2e1c5328ee3fe358b234916575202ee75bba
3
+ metadata.gz: 1f2269efe1b2e14f38c5265d3a8ffd1e7c1bfb1775b11382b27d2cb7a119de03
4
+ data.tar.gz: e2656bacf8540ea3854eb0ace8a4e5bf83d3aa7c9e5a7a06d1c36bea50083790
5
5
  SHA512:
6
- metadata.gz: 133ecc3adc65e9559a317b4148813b95f21a41150a271f754e726d2617f19515e89498322b0b50e808825a9793a78472041618c584aaf3797e80259a2f296782
7
- data.tar.gz: bf07811d4f0825f0c9770ae04218c2d73c24eed397296ae54fbeb5f8898a6db750952588426ad45ded5c88d9ce498b3aadf54a202852f4ec8193fd9ff4a159d3
6
+ metadata.gz: 665fdedafab36bb818a64c6aca5ea17355baca597878c0d5819c56c266228d1c8c14135c2dd27a8fb21d81c08d6e7958e9b89c8b21e2b8eac03a93519d8c0b77
7
+ data.tar.gz: 6845812a25677375a2d6931785a0ceb8e1d5ac1a454245d924e271aa8993af733b67697aecc055b65feb799c12a337c1e4969024b903b2130d2f5903c6edef3b
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,12 +1,66 @@
1
1
  # Karafka Web changelog
2
2
 
3
+ ## 0.6.0 (Unreleased)
4
+ - **[Feature]** Introduce producers errors tracking.
5
+ - [Improvement] Display the error origin as a badge to align with consumers view topic assignments.
6
+ - [Improvement] Collect more job metrics for future usage.
7
+ - [Improvement] Normalize order of job columns on multiple views.
8
+ - [Improvement] Improve pagination by providing a "Go to first page" fast button.
9
+ - [Improvement] Provide more explicit info in the consumers view when no consumers running.
10
+ - [Improvement] Validate error reporting with unified error contract.
11
+ - [Improvement] Use estimated errors count for counters presentation taken from the errors topic instead of materialization via consumers states to allow for producers errors tracking.
12
+ - [Improvement] Introduce `schema_version` to error reports.
13
+ - [Improvement] Do not display the dispatched error message offset in the breadcrumb and title as it was confused with the error message content.
14
+ - [Improvement] Display `error_class` value wrapped with code tag.
15
+ - [Improvement] Display error `type` value wrapped with label tag.
16
+ - [Improvement] Include a blurred backtrace for non-Pro error inspection as a form of indication of this Pro feature.
17
+ - [Fix] Fix invalid arrows style in the pagination.
18
+ - [Fix] Fix missing empty `Process name` value in the errors index view.
19
+ - [Fix] Fix potential empty dispatch of consumer metrics.
20
+ - [Fix] Remove confusing part about real time resources from the "Pro feature" page.
21
+ - [Refactor] Cleanup common components for errors extraction.
22
+ - [Refactor] Remove not used and redundant partials.
23
+ - [Maintenance] Require `karafka` `2.1.4` due to fixes in metrics usage for workless flows.
24
+
25
+ ### Upgrade notes
26
+
27
+ Because of the reporting schema update, it is recommended to:
28
+
29
+ - First, deploy **all** the Karafka consumer processes (`karafka server`)
30
+ - Deploy the Web update to your web server.
31
+
32
+ Please note that if you decide to use the updated Web UI with not updated consumers, you may hit a 500 error or offset related data may not be displayed correctly.
33
+
34
+ #### Disabling producers instrumentation
35
+
36
+ Producers error tracking **is** enabled by default. If you want to opt out of it, you need to disable the producers' instrumentation by clearing the producers' listeners:
37
+
38
+ ```ruby
39
+ Karafka::Web.setup do |config|
40
+ # Do not instrument producers with web-ui listeners
41
+ config.tracking.producers.listeners = []
42
+ end
43
+ ```
44
+
45
+ #### Custom producers instrumentation
46
+
47
+ By default, Karafka Web-UI instruments only `Karafka.producer`. If you use producers initialized by yourself, you need to connect the listeners to them manually. To do so, run the following code:
48
+
49
+ ```ruby
50
+ ::Karafka::Web.config.tracking.producers.listeners.each do |listener|
51
+ MY_CUSTOM_PRODUCER.monitor.subscribe(listener)
52
+ end
53
+ ```
54
+
55
+ Please make sure **not** to do it for the default `Karafka.producer` because it is instrumented out of the box.
56
+
3
57
  ## 0.5.2 (2023-05-22)
4
- - Label ActiveJob consumers jobs with `active_job` tag.
5
- - Label Virtual Partitions consumers with `virtual` tag.
6
- - Label Long Running Jobs with `long_running_job` tag.
7
- - Label collapsed Virtual Partition with `collapsed` tag.
8
- - Display consumer tags always below the consumer class name in Jobs/Consumer Jobs views.
9
- - Add label with the attempt count on work being retried.
58
+ - [Improvement] Label ActiveJob consumers jobs with `active_job` tag.
59
+ - [Improvement] Label Virtual Partitions consumers with `virtual` tag.
60
+ - [Improvement] Label Long Running Jobs with `long_running_job` tag.
61
+ - [Improvement] Label collapsed Virtual Partition with `collapsed` tag.
62
+ - [Improvement] Display consumer tags always below the consumer class name in Jobs/Consumer Jobs views.
63
+ - [Improvement] Add label with the attempt count on work being retried.
10
64
 
11
65
  ## 0.5.1 (2023-04-16)
12
66
  - [Fix] Use CSP header matching Sidekiq one to ensure styles and js loading (#55)
data/Gemfile.lock CHANGED
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka-web (0.5.2)
4
+ karafka-web (0.6.0)
5
5
  erubi (~> 1.4)
6
- karafka (>= 2.0.40, < 3.0.0)
7
- karafka-core (>= 2.0.12, < 3.0.0)
8
- roda (~> 3.63)
6
+ karafka (>= 2.1.4, < 3.0.0)
7
+ karafka-core (>= 2.0.13, < 3.0.0)
8
+ roda (~> 3.68, >= 3.68)
9
9
  tilt (~> 2.0)
10
10
 
11
11
  GEM
@@ -26,15 +26,15 @@ GEM
26
26
  ffi (1.15.5)
27
27
  i18n (1.13.0)
28
28
  concurrent-ruby (~> 1.0)
29
- karafka (2.0.41)
30
- karafka-core (>= 2.0.12, < 3.0.0)
29
+ karafka (2.1.4)
30
+ karafka-core (>= 2.0.13, < 3.0.0)
31
31
  thor (>= 0.20)
32
- waterdrop (>= 2.4.10, < 3.0.0)
32
+ waterdrop (>= 2.5.3, < 3.0.0)
33
33
  zeitwerk (~> 2.3)
34
- karafka-core (2.0.12)
34
+ karafka-core (2.0.13)
35
35
  concurrent-ruby (>= 1.1)
36
- karafka-rdkafka (>= 0.12.1)
37
- karafka-rdkafka (0.12.1)
36
+ karafka-rdkafka (>= 0.12.3)
37
+ karafka-rdkafka (0.12.3)
38
38
  ffi (~> 1.15)
39
39
  mini_portile2 (~> 2.6)
40
40
  rake (> 12)
@@ -45,7 +45,7 @@ GEM
45
45
  rack (>= 3.0.0.beta1)
46
46
  webrick
47
47
  rake (13.0.6)
48
- roda (3.67.0)
48
+ roda (3.68.0)
49
49
  rack
50
50
  rspec (3.12.0)
51
51
  rspec-core (~> 3.12.0)
@@ -66,12 +66,12 @@ GEM
66
66
  simplecov_json_formatter (~> 0.1)
67
67
  simplecov-html (0.12.3)
68
68
  simplecov_json_formatter (0.1.4)
69
- thor (1.2.1)
69
+ thor (1.2.2)
70
70
  tilt (2.1.0)
71
71
  tzinfo (2.0.6)
72
72
  concurrent-ruby (~> 1.0)
73
- waterdrop (2.5.2)
74
- karafka-core (>= 2.0.12, < 3.0.0)
73
+ waterdrop (2.5.3)
74
+ karafka-core (>= 2.0.13, < 3.0.0)
75
75
  zeitwerk (~> 2.3)
76
76
  webrick (1.8.1)
77
77
  zeitwerk (2.6.8)
data/karafka-web.gemspec CHANGED
@@ -17,9 +17,9 @@ Gem::Specification.new do |spec|
17
17
  spec.licenses = %w[LGPL-3.0 Commercial]
18
18
 
19
19
  spec.add_dependency 'erubi', '~> 1.4'
20
- spec.add_dependency 'karafka', '>= 2.0.40', '< 3.0.0'
21
- spec.add_dependency 'karafka-core', '>= 2.0.12', '< 3.0.0'
22
- spec.add_dependency 'roda', '~> 3.63'
20
+ spec.add_dependency 'karafka', '>= 2.1.4', '< 3.0.0'
21
+ spec.add_dependency 'karafka-core', '>= 2.0.13', '< 3.0.0'
22
+ spec.add_dependency 'roda', '~> 3.68', '>= 3.68'
23
23
  spec.add_dependency 'tilt', '~> 2.0'
24
24
 
25
25
  spec.add_development_dependency 'rackup', '~> 0.2'
@@ -28,16 +28,15 @@ module Karafka
28
28
 
29
29
  # Tracking and reporting related settings
30
30
  setting :tracking do
31
- # Collects the metrics we will be dispatching
32
- # Tracks and reports the collected metrics
33
- setting :reporter, default: Tracking::Reporter.new
34
-
35
31
  # How often should we report data from a single process
36
32
  # You may set it to a lower value in development but in production and scale, every
37
33
  # 5 seconds should be enough
38
34
  setting :interval, default: 5_000
39
35
 
40
36
  setting :consumers do
37
+ # Reports the metrics collected in the sampler
38
+ setting :reporter, default: Tracking::Consumers::Reporter.new
39
+
41
40
  setting :sampler, default: Tracking::Consumers::Sampler.new
42
41
 
43
42
  setting :listeners, default: [
@@ -51,7 +50,14 @@ module Karafka
51
50
  end
52
51
 
53
52
  setting :producers do
54
- setting :listeners, default: []
53
+ setting :reporter, default: Tracking::Producers::Reporter.new
54
+
55
+ setting :sampler, default: Tracking::Producers::Sampler.new
56
+
57
+ setting :listeners, default: [
58
+ Tracking::Producers::Listeners::Errors.new,
59
+ Tracking::Producers::Listeners::Reporter.new
60
+ ]
55
61
  end
56
62
  end
57
63
 
@@ -10,7 +10,7 @@ module Karafka
10
10
  # @param replication_factor [Integer] replication factor we want to use (1 by default)
11
11
  def bootstrap!(replication_factor: 1)
12
12
  bootstrap_topics!(replication_factor)
13
- bootstrap_state!
13
+ bootstrap_consumers_state!
14
14
  end
15
15
 
16
16
  # Removes all the Karafka topics and creates them again with the same replication factor
@@ -142,7 +142,7 @@ module Karafka
142
142
  end
143
143
 
144
144
  # Creates the initial state record with all values being empty
145
- def bootstrap_state!
145
+ def bootstrap_consumers_state!
146
146
  ::Karafka.producer.produce_sync(
147
147
  topic: Karafka::Web.config.topics.consumers.states,
148
148
  key: Karafka::Web.config.topics.consumers.states,
@@ -151,7 +151,6 @@ module Karafka
151
151
  stats: {
152
152
  batches: 0,
153
153
  messages: 0,
154
- errors: 0,
155
154
  retries: 0,
156
155
  dead: 0,
157
156
  busy: 0,
@@ -8,7 +8,7 @@ module Karafka
8
8
  module Contracts
9
9
  # Expected data for each consumer group
10
10
  # It's mostly about subscription groups details
11
- class ConsumerGroup < BaseContract
11
+ class ConsumerGroup < Tracking::Contracts::Base
12
12
  configure
13
13
 
14
14
  required(:id) { |val| val.is_a?(String) && !val.empty? }
@@ -6,7 +6,7 @@ module Karafka
6
6
  module Consumers
7
7
  module Contracts
8
8
  # Contract for the job reporting details
9
- class Job < BaseContract
9
+ class Job < Tracking::Contracts::Base
10
10
  configure
11
11
 
12
12
  required(:consumer) { |val| val.is_a?(String) }
@@ -19,6 +19,9 @@ module Karafka
19
19
  required(:committed_offset) { |val| val.is_a?(Integer) }
20
20
  required(:type) { |val| %w[consume revoked shutdown].include?(val) }
21
21
  required(:tags) { |val| val.is_a?(Karafka::Core::Taggable::Tags) }
22
+ # -1 can be here for workless flows
23
+ required(:consumption_lag) { |val| val.is_a?(Integer) && (val >= 0 || val == -1) }
24
+ required(:processing_lag) { |val| val.is_a?(Integer) && (val >= 0 || val == -1) }
22
25
  end
23
26
  end
24
27
  end
@@ -6,7 +6,7 @@ module Karafka
6
6
  module Consumers
7
7
  module Contracts
8
8
  # Partition metrics required for web to operate
9
- class Partition < BaseContract
9
+ class Partition < Tracking::Contracts::Base
10
10
  configure
11
11
 
12
12
  required(:id) { |val| val.is_a?(Integer) && val >= 0 }
@@ -9,7 +9,7 @@ module Karafka
9
9
  #
10
10
  # Any outgoing reporting needs to match this format for it to work with the statuses
11
11
  # consumer.
12
- class Report < BaseContract
12
+ class Report < Tracking::Contracts::Base
13
13
  configure
14
14
 
15
15
  required(:schema_version) { |val| val.is_a?(String) }
@@ -7,7 +7,7 @@ module Karafka
7
7
  module Contracts
8
8
  # Expected data for each subscription group
9
9
  # It's mostly about topics details
10
- class SubscriptionGroup < BaseContract
10
+ class SubscriptionGroup < Tracking::Contracts::Base
11
11
  configure
12
12
 
13
13
  required(:id) { |val| val.is_a?(String) && !val.empty? }
@@ -6,7 +6,9 @@ module Karafka
6
6
  module Consumers
7
7
  module Contracts
8
8
  # Expected topic information that needs to go out
9
- class Topic < BaseContract
9
+ class Topic < Tracking::Contracts::Base
10
+ configure
11
+
10
12
  required(:name) { |val| val.is_a?(String) && !val.empty? }
11
13
  required(:partitions) { |val| val.is_a?(Hash) }
12
14
 
@@ -21,9 +21,9 @@ module Karafka
21
21
  @sampler ||= ::Karafka::Web.config.tracking.consumers.sampler
22
22
  end
23
23
 
24
- # @return [Object] reported in use
24
+ # @return [Object] reporter in use
25
25
  def reporter
26
- @reporter ||= ::Karafka::Web.config.tracking.reporter
26
+ @reporter ||= ::Karafka::Web.config.tracking.consumers.reporter
27
27
  end
28
28
  end
29
29
  end
@@ -7,6 +7,13 @@ module Karafka
7
7
  module Listeners
8
8
  # Listener related to tracking errors, DLQs, and retries metrics for the Web UI
9
9
  class Errors < Base
10
+ include Tracking::Helpers::ErrorInfo
11
+
12
+ # Schema used by consumers error reporting
13
+ SCHEMA_VERSION = '1.0.0'
14
+
15
+ private_constant :SCHEMA_VERSION
16
+
10
17
  # Collects errors info and counts errors
11
18
  #
12
19
  # @param event [Karafka::Core::Monitoring::Event]
@@ -23,6 +30,7 @@ module Karafka
23
30
  error_class, error_message, backtrace = extract_error_info(event[:error])
24
31
 
25
32
  sampler.errors << {
33
+ schema_version: SCHEMA_VERSION,
26
34
  type: event[:type],
27
35
  error_class: error_class,
28
36
  error_message: error_message,
@@ -56,13 +64,6 @@ module Karafka
56
64
 
57
65
  private
58
66
 
59
- # @return [Object] sampler for the metrics
60
- # @note We use this sampler to get basic process details that we want to assign
61
- # to the error
62
- def consumer_sampler
63
- @consumer_sampler ||= ::Karafka::Web.config.tracking.consumers.sampler
64
- end
65
-
66
67
  # @param consumer [::Karafka::BaseConsumer]
67
68
  # @return [Hash] hash with consumer specific info for details of error
68
69
  def extract_consumer_info(consumer)
@@ -77,43 +78,6 @@ module Karafka
77
78
  tags: consumer.tags
78
79
  }
79
80
  end
80
-
81
- # Extracts the basic error info
82
- #
83
- # @param error [StandardError] error that occurred
84
- # @return [Array<String, String, String>] array with error name, message and backtrace
85
- def extract_error_info(error)
86
- app_root = "#{::Karafka.root}/"
87
-
88
- gem_home = if ENV.key?('GEM_HOME')
89
- ENV['GEM_HOME']
90
- else
91
- File.expand_path(File.join(Karafka.gem_root.to_s, '../'))
92
- end
93
-
94
- gem_home = "#{gem_home}/"
95
-
96
- backtrace = error.backtrace || []
97
- backtrace.map! { |line| line.gsub(app_root, '') }
98
- backtrace.map! { |line| line.gsub(gem_home, '') }
99
-
100
- [
101
- error.class.name,
102
- extract_exception_message(error),
103
- backtrace.join("\n")
104
- ]
105
- end
106
-
107
- # @param error [StandardError] error that occurred
108
- # @return [String] formatted exception message
109
- def extract_exception_message(error)
110
- error_message = error.message.to_s[0, 10_000]
111
- error_message.force_encoding('utf-8')
112
- error_message.scrub! if error_message.respond_to?(:scrub!)
113
- error_message
114
- rescue StandardError
115
- '!!! Error message extraction failed !!!'
116
- end
117
81
  end
118
82
  end
119
83
  end
@@ -147,6 +147,9 @@ module Karafka
147
147
  #
148
148
  # @param consumer [::Karafka::BaseConsumer] consumer instance
149
149
  # @param type [String] job type
150
+ # @note Be aware, that non consumption jobs may not have any messages (empty) in them
151
+ # when certain filters or features are applied. Please refer to the Karafka docs for
152
+ # more details.
150
153
  def job_details(consumer, type)
151
154
  {
152
155
  started_at: float_now,
@@ -154,6 +157,8 @@ module Karafka
154
157
  partition: consumer.partition,
155
158
  first_offset: consumer.messages.metadata.first_offset,
156
159
  last_offset: consumer.messages.metadata.last_offset,
160
+ processing_lag: consumer.messages.metadata.processing_lag,
161
+ consumption_lag: consumer.messages.metadata.consumption_lag,
157
162
  committed_offset: consumer.coordinator.seek_offset - 1,
158
163
  consumer: consumer.class.to_s,
159
164
  consumer_group: consumer.topic.consumer_group.id,
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Tracking
6
+ module Consumers
7
+ # Reports the collected data about the process and sends it, so we can use it in the UI
8
+ class Reporter
9
+ include ::Karafka::Core::Helpers::Time
10
+ include ::Karafka::Helpers::Async
11
+
12
+ # Minimum number of messages to produce to produce them in sync mode
13
+ # This acts as a small back-off not to overload the system in case we would have
14
+ # extremely big number of errors happening
15
+ PRODUCE_SYNC_THRESHOLD = 25
16
+
17
+ private_constant :PRODUCE_SYNC_THRESHOLD
18
+
19
+ # This mutex is shared between tracker and samplers so there is no case where metrics
20
+ # would be collected same time tracker reports
21
+ MUTEX = Mutex.new
22
+
23
+ def initialize
24
+ # Move back so first report is dispatched fast to indicate, that the process is alive
25
+ @tracked_at = monotonic_now - 10_000
26
+ @report_contract = Consumers::Contracts::Report.new
27
+ @error_contract = Tracking::Contracts::Error.new
28
+ end
29
+
30
+ # Dispatches the current state from sampler to appropriate topics
31
+ #
32
+ # @param forced [Boolean] should we report bypassing the time frequency or should we
33
+ # report only in case we would not send the report for long enough time.
34
+ def report(forced: false)
35
+ MUTEX.synchronize do
36
+ # Start background thread only when needed
37
+ # This prevents us from starting it too early or for non-consumer processes where
38
+ # Karafka is being included
39
+ async_call unless @running
40
+
41
+ return unless report?(forced)
42
+
43
+ @tracked_at = monotonic_now
44
+
45
+ report = sampler.to_report
46
+
47
+ @report_contract.validate!(report)
48
+
49
+ process_name = report[:process][:name]
50
+
51
+ # Report consumers statuses
52
+ messages = [
53
+ {
54
+ topic: ::Karafka::Web.config.topics.consumers.reports,
55
+ payload: report.to_json,
56
+ key: process_name,
57
+ partition: 0
58
+ }
59
+ ]
60
+
61
+ # Report errors that occurred (if any)
62
+ messages += sampler.errors.map do |error|
63
+ @error_contract.validate!(error)
64
+
65
+ {
66
+ topic: Karafka::Web.config.topics.errors,
67
+ payload: error.to_json,
68
+ # Always dispatch errors from the same process to the same partition
69
+ key: process_name
70
+ }
71
+ end
72
+
73
+ return if messages.empty?
74
+
75
+ produce(messages)
76
+
77
+ # Clear the sampler so it tracks new state changes without previous once impacting
78
+ # the data
79
+ sampler.clear
80
+ end
81
+ end
82
+
83
+ # Reports bypassing frequency check. This can be used to report when state changes in the
84
+ # process drastically. For example when process is stopping, we want to indicate this as
85
+ # fast as possible in the UI, etc.
86
+ def report!
87
+ report(forced: true)
88
+ end
89
+
90
+ private
91
+
92
+ # Reports the process state once in a while
93
+ def call
94
+ @running = true
95
+
96
+ loop do
97
+ report
98
+
99
+ # We won't track more often anyhow but want to try frequently not to miss a window
100
+ # We need to convert the sleep interval into seconds for sleep
101
+ sleep(::Karafka::Web.config.tracking.interval / 1_000 / 10)
102
+ end
103
+ end
104
+
105
+ # @param forced [Boolean] is this report forced. Forced means that as long as we can
106
+ # flush we will flush
107
+ # @return [Boolean] Should we report or is it not yet time to do so
108
+ def report?(forced)
109
+ # We never report in initializing phase because things are not yet fully configured
110
+ return false if ::Karafka::App.initializing?
111
+ # We never report in the initialized because server is not yet ready until Karafka is
112
+ # fully running and some of the things like listeners are not yet available
113
+ return false if ::Karafka::App.initialized?
114
+
115
+ return true if forced
116
+
117
+ (monotonic_now - @tracked_at) >= ::Karafka::Web.config.tracking.interval
118
+ end
119
+
120
+ # @return [Object] sampler for the metrics
121
+ def sampler
122
+ @sampler ||= ::Karafka::Web.config.tracking.consumers.sampler
123
+ end
124
+
125
+ # Produces messages to Kafka.
126
+ #
127
+ # @param messages [Array<Hash>]
128
+ #
129
+ # @note We pick either sync or async dependent on number of messages. The trick here is,
130
+ # that we do not want to end up overloading the internal queue with messages in case
131
+ # someone has a lot of errors from processing or other errors. Producing sync will wait
132
+ # for the delivery, hence will slow things down a little bit. On the other hand during
133
+ # normal operations we should not have that many messages to dispatch and it should not
134
+ # slowdown any processing.
135
+ def produce(messages)
136
+ if messages.count >= PRODUCE_SYNC_THRESHOLD
137
+ ::Karafka.producer.produce_many_sync(messages)
138
+ else
139
+ ::Karafka.producer.produce_many_async(messages)
140
+ end
141
+ # Since we run this in a background thread, there may be a case upon shutdown, where the
142
+ # producer is closed right before a potential dispatch. It is not worth dealing with this
143
+ # and we can just safely ignore this
144
+ rescue WaterDrop::Errors::ProducerClosedError
145
+ nil
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Tracking
6
+ # Namespace for contracts used by consumers and producers tracking
7
+ module Contracts
8
+ # Base for all the metric related contracts
9
+ class Base < ::Karafka::Core::Contractable::Contract
10
+ class << self
11
+ # This layer is not for users extensive feedback, thus we can easily use the minimum
12
+ # error messaging there is.
13
+ def configure
14
+ super do |config|
15
+ config.error_messages = YAML.safe_load(
16
+ File.read(
17
+ File.join(Karafka::Web.gem_root, 'config', 'locales', 'errors.yml')
18
+ )
19
+ ).fetch('en').fetch('validations').fetch('web')
20
+ end
21
+ end
22
+ end
23
+
24
+ # @param data [Hash] data for validation
25
+ # @return [Boolean] true if all good
26
+ # @raise [Errors::ContractError] invalid report
27
+ def validate!(data)
28
+ super(data, Errors::Tracking::ContractError)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Tracking
6
+ module Contracts
7
+ # Contract for error reporting
8
+ # Since producers and consumers report their errors to the same topic, we need to have
9
+ # a unified contract for both
10
+ class Error < Base
11
+ configure
12
+
13
+ required(:schema_version) { |val| val.is_a?(String) }
14
+ required(:type) { |val| val.is_a?(String) && !val.empty? }
15
+ required(:error_class) { |val| val.is_a?(String) && !val.empty? }
16
+ required(:error_message) { |val| val.is_a?(String) }
17
+ required(:backtrace) { |val| val.is_a?(String) }
18
+ required(:details) { |val| val.is_a?(Hash) }
19
+ required(:occurred_at) { |val| val.is_a?(Float) }
20
+
21
+ nested(:process) do
22
+ required(:name) { |val| val.is_a?(String) && !val.empty? }
23
+ # Tags may not be present for producers because they may operate from outside of
24
+ # karafka taggable process
25
+ optional(:tags) { |val| val.is_a?(Karafka::Core::Taggable::Tags) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end