karafka-web 0.6.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (197) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +13 -4
  4. data/CHANGELOG.md +114 -6
  5. data/Gemfile +1 -0
  6. data/Gemfile.lock +27 -24
  7. data/README.md +2 -0
  8. data/bin/rspecs +6 -0
  9. data/certs/cert_chain.pem +21 -21
  10. data/docker-compose.yml +22 -0
  11. data/karafka-web.gemspec +3 -3
  12. data/lib/karafka/web/app.rb +6 -2
  13. data/lib/karafka/web/cli.rb +51 -47
  14. data/lib/karafka/web/config.rb +31 -9
  15. data/lib/karafka/web/contracts/base.rb +32 -0
  16. data/lib/karafka/web/contracts/config.rb +63 -0
  17. data/lib/karafka/web/deserializer.rb +10 -1
  18. data/lib/karafka/web/errors.rb +29 -7
  19. data/lib/karafka/web/installer.rb +58 -148
  20. data/lib/karafka/web/management/base.rb +34 -0
  21. data/lib/karafka/web/management/clean_boot_file.rb +31 -0
  22. data/lib/karafka/web/management/create_initial_states.rb +101 -0
  23. data/lib/karafka/web/management/create_topics.rb +127 -0
  24. data/lib/karafka/web/management/delete_topics.rb +28 -0
  25. data/lib/karafka/web/management/enable.rb +82 -0
  26. data/lib/karafka/web/management/extend_boot_file.rb +37 -0
  27. data/lib/karafka/web/processing/consumer.rb +73 -17
  28. data/lib/karafka/web/processing/consumers/aggregators/base.rb +56 -0
  29. data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +154 -0
  30. data/lib/karafka/web/processing/consumers/aggregators/state.rb +180 -0
  31. data/lib/karafka/web/processing/consumers/contracts/aggregated_stats.rb +32 -0
  32. data/lib/karafka/web/processing/consumers/contracts/metrics.rb +53 -0
  33. data/lib/karafka/web/processing/consumers/contracts/process.rb +19 -0
  34. data/lib/karafka/web/processing/consumers/contracts/state.rb +49 -0
  35. data/lib/karafka/web/processing/consumers/contracts/topic_stats.rb +21 -0
  36. data/lib/karafka/web/processing/consumers/metrics.rb +29 -0
  37. data/lib/karafka/web/processing/consumers/schema_manager.rb +56 -0
  38. data/lib/karafka/web/processing/consumers/state.rb +6 -9
  39. data/lib/karafka/web/processing/time_series_tracker.rb +130 -0
  40. data/lib/karafka/web/tracking/consumers/contracts/consumer_group.rb +2 -2
  41. data/lib/karafka/web/tracking/consumers/contracts/job.rb +2 -1
  42. data/lib/karafka/web/tracking/consumers/contracts/partition.rb +14 -1
  43. data/lib/karafka/web/tracking/consumers/contracts/report.rb +10 -8
  44. data/lib/karafka/web/tracking/consumers/contracts/subscription_group.rb +2 -2
  45. data/lib/karafka/web/tracking/consumers/contracts/topic.rb +2 -2
  46. data/lib/karafka/web/tracking/consumers/listeners/processing.rb +6 -2
  47. data/lib/karafka/web/tracking/consumers/listeners/statistics.rb +15 -1
  48. data/lib/karafka/web/tracking/consumers/reporter.rb +14 -6
  49. data/lib/karafka/web/tracking/consumers/sampler.rb +80 -39
  50. data/lib/karafka/web/tracking/contracts/error.rb +2 -1
  51. data/lib/karafka/web/ui/app.rb +20 -10
  52. data/lib/karafka/web/ui/base.rb +50 -4
  53. data/lib/karafka/web/ui/controllers/base.rb +11 -0
  54. data/lib/karafka/web/ui/controllers/become_pro.rb +1 -1
  55. data/lib/karafka/web/ui/controllers/cluster.rb +7 -4
  56. data/lib/karafka/web/ui/controllers/consumers.rb +1 -1
  57. data/lib/karafka/web/ui/controllers/dashboard.rb +32 -0
  58. data/lib/karafka/web/ui/controllers/jobs.rb +1 -1
  59. data/lib/karafka/web/ui/controllers/requests/params.rb +18 -0
  60. data/lib/karafka/web/ui/controllers/responses/redirect.rb +29 -0
  61. data/lib/karafka/web/ui/helpers/application_helper.rb +57 -14
  62. data/lib/karafka/web/ui/helpers/paths_helper.rb +48 -0
  63. data/lib/karafka/web/ui/lib/hash_proxy.rb +18 -6
  64. data/lib/karafka/web/ui/lib/paginations/watermark_offsets_based.rb +75 -0
  65. data/lib/karafka/web/ui/lib/ttl_cache.rb +8 -0
  66. data/lib/karafka/web/ui/models/consumers_metrics.rb +46 -0
  67. data/lib/karafka/web/ui/models/{state.rb → consumers_state.rb} +6 -2
  68. data/lib/karafka/web/ui/models/health.rb +37 -7
  69. data/lib/karafka/web/ui/models/message.rb +11 -3
  70. data/lib/karafka/web/ui/models/metrics/aggregated.rb +196 -0
  71. data/lib/karafka/web/ui/models/metrics/charts/aggregated.rb +50 -0
  72. data/lib/karafka/web/ui/models/metrics/charts/topics.rb +109 -0
  73. data/lib/karafka/web/ui/models/metrics/topics.rb +101 -0
  74. data/lib/karafka/web/ui/models/partition.rb +27 -0
  75. data/lib/karafka/web/ui/models/process.rb +12 -1
  76. data/lib/karafka/web/ui/models/status.rb +107 -21
  77. data/lib/karafka/web/ui/models/visibility_filter.rb +33 -0
  78. data/lib/karafka/web/ui/pro/app.rb +80 -20
  79. data/lib/karafka/web/ui/pro/controllers/cluster.rb +11 -0
  80. data/lib/karafka/web/ui/pro/controllers/consumers.rb +10 -6
  81. data/lib/karafka/web/ui/pro/controllers/dashboard.rb +54 -0
  82. data/lib/karafka/web/ui/pro/controllers/errors.rb +3 -0
  83. data/lib/karafka/web/ui/pro/controllers/explorer.rb +94 -9
  84. data/lib/karafka/web/ui/pro/controllers/health.rb +10 -2
  85. data/lib/karafka/web/ui/pro/controllers/messages.rb +62 -0
  86. data/lib/karafka/web/ui/pro/controllers/routing.rb +44 -0
  87. data/lib/karafka/web/ui/pro/views/consumers/_breadcrumbs.erb +7 -1
  88. data/lib/karafka/web/ui/pro/views/consumers/_consumer.erb +1 -1
  89. data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +7 -5
  90. data/lib/karafka/web/ui/pro/views/consumers/consumer/_job.erb +3 -3
  91. data/lib/karafka/web/ui/pro/views/consumers/consumer/_metrics.erb +5 -4
  92. data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +13 -4
  93. data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +3 -2
  94. data/lib/karafka/web/ui/pro/views/consumers/consumer/_tabs.erb +7 -0
  95. data/lib/karafka/web/ui/pro/views/consumers/details.erb +21 -0
  96. data/lib/karafka/web/ui/pro/views/consumers/index.erb +4 -2
  97. data/lib/karafka/web/ui/pro/views/dashboard/_ranges_selector.erb +39 -0
  98. data/lib/karafka/web/ui/pro/views/dashboard/index.erb +82 -0
  99. data/lib/karafka/web/ui/pro/views/dlq/_topic.erb +1 -1
  100. data/lib/karafka/web/ui/pro/views/errors/_error.erb +1 -1
  101. data/lib/karafka/web/ui/pro/views/errors/index.erb +1 -1
  102. data/lib/karafka/web/ui/pro/views/errors/partition.erb +1 -1
  103. data/lib/karafka/web/ui/pro/views/errors/show.erb +1 -1
  104. data/lib/karafka/web/ui/pro/views/explorer/_breadcrumbs.erb +6 -4
  105. data/lib/karafka/web/ui/pro/views/explorer/_filtered.erb +16 -0
  106. data/lib/karafka/web/ui/pro/views/explorer/_message.erb +6 -2
  107. data/lib/karafka/web/ui/pro/views/explorer/_no_topics.erb +7 -0
  108. data/lib/karafka/web/ui/pro/views/explorer/_partition_option.erb +2 -2
  109. data/lib/karafka/web/ui/pro/views/explorer/_topic.erb +1 -1
  110. data/lib/karafka/web/ui/pro/views/explorer/index.erb +12 -8
  111. data/lib/karafka/web/ui/pro/views/explorer/messages/_headers.erb +15 -0
  112. data/lib/karafka/web/ui/pro/views/explorer/messages/_key.erb +12 -0
  113. data/lib/karafka/web/ui/pro/views/explorer/partition/_details.erb +35 -0
  114. data/lib/karafka/web/ui/pro/views/explorer/partition.erb +5 -3
  115. data/lib/karafka/web/ui/pro/views/explorer/show.erb +48 -5
  116. data/lib/karafka/web/ui/pro/views/explorer/topic/_details.erb +23 -0
  117. data/lib/karafka/web/ui/pro/views/explorer/topic.erb +14 -12
  118. data/lib/karafka/web/ui/pro/views/health/_breadcrumbs.erb +16 -0
  119. data/lib/karafka/web/ui/pro/views/health/_no_data.erb +9 -0
  120. data/lib/karafka/web/ui/pro/views/health/_partition.erb +17 -15
  121. data/lib/karafka/web/ui/pro/views/health/_partition_offset.erb +40 -0
  122. data/lib/karafka/web/ui/pro/views/health/_tabs.erb +27 -0
  123. data/lib/karafka/web/ui/pro/views/health/offsets.erb +71 -0
  124. data/lib/karafka/web/ui/pro/views/health/overview.erb +68 -0
  125. data/lib/karafka/web/ui/pro/views/jobs/_job.erb +6 -3
  126. data/lib/karafka/web/ui/pro/views/jobs/index.erb +4 -1
  127. data/lib/karafka/web/ui/pro/views/routing/_consumer_group.erb +37 -0
  128. data/lib/karafka/web/ui/pro/views/routing/_detail.erb +25 -0
  129. data/lib/karafka/web/ui/pro/views/routing/_topic.erb +23 -0
  130. data/lib/karafka/web/ui/pro/views/routing/index.erb +10 -0
  131. data/lib/karafka/web/ui/pro/views/routing/show.erb +26 -0
  132. data/lib/karafka/web/ui/pro/views/shared/_navigation.erb +6 -9
  133. data/lib/karafka/web/ui/public/images/logo-gray.svg +28 -0
  134. data/lib/karafka/web/ui/public/javascripts/application.js +30 -0
  135. data/lib/karafka/web/ui/public/javascripts/chart.min.js +14 -0
  136. data/lib/karafka/web/ui/public/javascripts/charts.js +330 -0
  137. data/lib/karafka/web/ui/public/javascripts/datepicker.js +6 -0
  138. data/lib/karafka/web/ui/public/javascripts/live_poll.js +39 -12
  139. data/lib/karafka/web/ui/public/javascripts/offset_datetime.js +74 -0
  140. data/lib/karafka/web/ui/public/javascripts/tabs.js +59 -0
  141. data/lib/karafka/web/ui/public/stylesheets/application.css +11 -0
  142. data/lib/karafka/web/ui/public/stylesheets/datepicker.min.css +12 -0
  143. data/lib/karafka/web/ui/views/cluster/_no_partitions.erb +3 -0
  144. data/lib/karafka/web/ui/views/cluster/_partition.erb +20 -22
  145. data/lib/karafka/web/ui/views/cluster/index.erb +6 -1
  146. data/lib/karafka/web/ui/views/consumers/_consumer.erb +1 -1
  147. data/lib/karafka/web/ui/views/consumers/_counters.erb +6 -4
  148. data/lib/karafka/web/ui/views/consumers/_summary.erb +3 -3
  149. data/lib/karafka/web/ui/views/consumers/index.erb +3 -1
  150. data/lib/karafka/web/ui/views/dashboard/_feature_pro.erb +3 -0
  151. data/lib/karafka/web/ui/views/dashboard/_not_enough_data.erb +15 -0
  152. data/lib/karafka/web/ui/views/dashboard/_ranges_selector.erb +23 -0
  153. data/lib/karafka/web/ui/views/dashboard/index.erb +95 -0
  154. data/lib/karafka/web/ui/views/errors/_detail.erb +12 -0
  155. data/lib/karafka/web/ui/views/errors/_error.erb +1 -1
  156. data/lib/karafka/web/ui/views/errors/show.erb +1 -1
  157. data/lib/karafka/web/ui/views/jobs/index.erb +3 -1
  158. data/lib/karafka/web/ui/views/layout.erb +10 -3
  159. data/lib/karafka/web/ui/views/routing/_consumer_group.erb +8 -6
  160. data/lib/karafka/web/ui/views/routing/_detail.erb +2 -2
  161. data/lib/karafka/web/ui/views/routing/_topic.erb +1 -1
  162. data/lib/karafka/web/ui/views/routing/show.erb +1 -1
  163. data/lib/karafka/web/ui/views/shared/_brand.erb +2 -2
  164. data/lib/karafka/web/ui/views/shared/_chart.erb +14 -0
  165. data/lib/karafka/web/ui/views/shared/_content.erb +2 -2
  166. data/lib/karafka/web/ui/views/shared/_feature_pro.erb +1 -1
  167. data/lib/karafka/web/ui/views/shared/_flashes.erb +9 -0
  168. data/lib/karafka/web/ui/views/shared/_footer.erb +22 -0
  169. data/lib/karafka/web/ui/views/shared/_header.erb +15 -9
  170. data/lib/karafka/web/ui/views/shared/_live_poll.erb +7 -0
  171. data/lib/karafka/web/ui/views/shared/_navigation.erb +5 -8
  172. data/lib/karafka/web/ui/views/shared/_no_paginated_data.erb +9 -0
  173. data/lib/karafka/web/ui/views/shared/_pagination.erb +1 -1
  174. data/lib/karafka/web/ui/views/shared/_tab_nav.erb +7 -0
  175. data/lib/karafka/web/ui/views/shared/exceptions/not_found.erb +34 -32
  176. data/lib/karafka/web/ui/views/shared/exceptions/pro_only.erb +45 -43
  177. data/lib/karafka/web/ui/views/status/failures/_consumers_reports_schema_state.erb +15 -0
  178. data/lib/karafka/web/ui/views/status/failures/_enabled.erb +8 -0
  179. data/lib/karafka/web/ui/views/status/failures/_initial_consumers_metrics.erb +11 -0
  180. data/lib/karafka/web/ui/views/status/failures/{_initial_state.erb → _initial_consumers_state.erb} +3 -3
  181. data/lib/karafka/web/ui/views/status/failures/_partitions.erb +14 -6
  182. data/lib/karafka/web/ui/views/status/info/_components.erb +21 -1
  183. data/lib/karafka/web/ui/views/status/show.erb +62 -5
  184. data/lib/karafka/web/ui/views/status/successes/_enabled.erb +1 -0
  185. data/lib/karafka/web/ui/views/status/warnings/_replication.erb +19 -0
  186. data/lib/karafka/web/version.rb +1 -1
  187. data/lib/karafka/web.rb +11 -0
  188. data.tar.gz.sig +0 -0
  189. metadata +109 -39
  190. metadata.gz.sig +0 -0
  191. data/lib/karafka/web/processing/consumers/aggregator.rb +0 -130
  192. data/lib/karafka/web/tracking/contracts/base.rb +0 -34
  193. data/lib/karafka/web/ui/pro/views/explorer/_encryption_enabled.erb +0 -18
  194. data/lib/karafka/web/ui/pro/views/explorer/partition/_watermark_offsets.erb +0 -10
  195. data/lib/karafka/web/ui/pro/views/explorer/topic/_partitions.erb +0 -11
  196. data/lib/karafka/web/ui/pro/views/health/index.erb +0 -60
  197. /data/lib/karafka/web/ui/pro/views/explorer/{_detail.erb → messages/_detail.erb} +0 -0
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Ui
6
+ module Models
7
+ # Namespace for metrics related models
8
+ module Metrics
9
+ # Materializes the aggregated data and computes the expected diffs out of the snapshots
10
+ # We do some pre-processing to make sure, we do not have bigger gaps and to compensate
11
+ # for reporting drifting
12
+ class Aggregated < Lib::HashProxy
13
+ include ::Karafka::Core::Helpers::Time
14
+
15
+ # If samples are closer than that, sample will be rejected
16
+ MIN_ACCEPTED_DRIFT = 4
17
+
18
+ # If samples are further away than that, we will inject an artificial sample in-between
19
+ MAX_ACCEPTED_DRIFT = 7
20
+
21
+ # For which keys we should compute the delta in reference to the previous period
22
+ # Metrics we get from the processes are always absolute, hence we need a reference point
23
+ # to compute the deltas
24
+ #
25
+ # If at least two elements do not exist for given delta range, we keep it empty
26
+ DELTA_KEYS = %i[
27
+ batches
28
+ messages
29
+ errors
30
+ retries
31
+ dead
32
+ ].freeze
33
+
34
+ private_constant :MIN_ACCEPTED_DRIFT, :MAX_ACCEPTED_DRIFT, :DELTA_KEYS
35
+
36
+ # Builds the Web-UI historicals representation that includes deltas
37
+ #
38
+ # @param aggregated [Hash] aggregated historical metrics
39
+ def initialize(aggregated)
40
+ aggregated
41
+ .tap { |historicals| reject_drifters(historicals) }
42
+ .tap { |historicals| fill_gaps(historicals) }
43
+ .then { |historicals| enrich_with_deltas(historicals) }
44
+ .tap { |historicals| enrich_with_batch_size(historicals) }
45
+ .tap { |historicals| enrich_with_process_rss(historicals) }
46
+ .then { |enriched| super(enriched) }
47
+ end
48
+
49
+ # @return [Boolean] do we have enough data to draw any basic charts
50
+ def sufficient?
51
+ seconds.size > 2
52
+ end
53
+
54
+ private
55
+
56
+ # Since our reporting is not ms precise, there are cases where sampling can drift.
57
+ # If drifting gets us close to one side, for delta metrics it would create sudden
58
+ # artificial drops in metrics that would not match the reality. We reject drifters like
59
+ # this as we can compensate this later.
60
+ #
61
+ # This problems only affects our near real-time metrics with seconds precision
62
+ #
63
+ # @param historicals [Hash] all historicals for all the ranges
64
+ def reject_drifters(historicals)
65
+ initial = nil
66
+
67
+ historicals.fetch(:seconds).delete_if do |sample|
68
+ unless initial
69
+ initial = sample.first
70
+
71
+ next
72
+ end
73
+
74
+ # Reject values that are closer than minimum
75
+ too_close = sample.first - initial < MIN_ACCEPTED_DRIFT
76
+
77
+ initial = sample.first
78
+
79
+ too_close
80
+ end
81
+ end
82
+
83
+ # In case of a positive drift, we may have gaps bigger than few seconds in reporting.
84
+ # This can create a false sense of spikes that do not reflect the reality. We compensate
85
+ # this by extrapolating the delta values and using the rest as they are.
86
+ #
87
+ # This problems only affects our near real-time metrics with seconds precision
88
+ #
89
+ # @param historicals [Hash] all historicals for all the ranges
90
+ def fill_gaps(historicals)
91
+ filled = []
92
+ previous = nil
93
+
94
+ historicals.fetch(:seconds).each do |sample|
95
+ unless previous
96
+ filled << sample
97
+ previous = sample
98
+ next
99
+ end
100
+
101
+ if sample.first - previous.first > MAX_ACCEPTED_DRIFT
102
+ base = sample.last.dup
103
+
104
+ DELTA_KEYS.each do |key|
105
+ base[key] = previous.last[key] + (sample.last[key] - previous.last[key]) / 2
106
+ end
107
+
108
+ filled << [previous.first + (sample.first - previous.first) / 2, base]
109
+ end
110
+
111
+ filled << sample
112
+ previous = sample
113
+ end
114
+
115
+ historicals[:seconds] = filled
116
+ end
117
+
118
+ # Takes the historical hash, iterates over all the samples and enriches them with the
119
+ # delta computed values
120
+ #
121
+ # @param historicals [Hash] all historicals for all the ranges
122
+ # @return [Hash] historicals with delta based data
123
+ def enrich_with_deltas(historicals)
124
+ results = {}
125
+
126
+ historicals.each do |range, time_samples|
127
+ results[range] = []
128
+
129
+ baseline = nil
130
+
131
+ time_samples.each do |time_sample|
132
+ metrics = time_sample[1]
133
+
134
+ if baseline
135
+ deltas = compute_deltas(baseline, metrics)
136
+ results[range] << [time_sample[0], metrics.merge(deltas)]
137
+ end
138
+
139
+ baseline = metrics
140
+ end
141
+ end
142
+
143
+ results
144
+ end
145
+
146
+ # Batch size is a match between number of messages and number of batches
147
+ # It is derived out of the data we have so we compute it on the fly
148
+ # @param historicals [Hash] all historicals for all the ranges
149
+ def enrich_with_batch_size(historicals)
150
+ historicals.each_value do |time_samples|
151
+ time_samples.each do |time_sample|
152
+ metrics = time_sample[1]
153
+
154
+ batches = metrics[:batches]
155
+
156
+ # We check if not zero just in case something would be off there
157
+ # We do not want to divide by zero
158
+ metrics[:batch_size] = batches.zero? ? 0 : metrics[:messages] / batches
159
+ end
160
+ end
161
+ end
162
+
163
+ # Adds an average RSS on a per process basis
164
+ # @param historicals [Hash] all historicals for all the ranges
165
+ def enrich_with_process_rss(historicals)
166
+ historicals.each_value do |time_samples|
167
+ time_samples.each do |time_sample|
168
+ metrics = time_sample[1]
169
+
170
+ rss = metrics[:rss]
171
+ processes = metrics[:processes]
172
+
173
+ metrics[:process_rss] = processes.zero? ? 0 : rss / processes
174
+ end
175
+ end
176
+ end
177
+
178
+ # Computes deltas for all the relevant keys for which we want to have deltas
179
+ #
180
+ # @param previous [Hash]
181
+ # @param current [Hash]
182
+ # @return [Hash] delta computed values
183
+ def compute_deltas(previous, current)
184
+ DELTA_KEYS.map do |delta_key|
185
+ [
186
+ delta_key,
187
+ current.fetch(delta_key) - previous.fetch(delta_key)
188
+ ]
189
+ end.to_h
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Ui
6
+ module Models
7
+ module Metrics
8
+ # Namespace for models related to presentation of charts
9
+ module Charts
10
+ # Model for formatting aggregated metrics data for charts
11
+ class Aggregated < Lib::HashProxy
12
+ # @param aggregated [Hash] all aggregated for all periods
13
+ # @param period [Symbol] period that we are interested in
14
+ def initialize(aggregated, period)
15
+ @data = aggregated.to_h.fetch(period)
16
+ end
17
+
18
+ # @param args [Array<String>] names of aggregated we want to show
19
+ # @return [String] JSON with data about all the charts we were interested in
20
+ def with(*args)
21
+ args
22
+ .map { |name| [name.to_sym, public_send(name)] }
23
+ .to_h
24
+ .to_json
25
+ end
26
+
27
+ # @param method_name [String]
28
+ # @param include_private [Boolean]
29
+ def respond_to_missing?(method_name, include_private = false)
30
+ @data.last.last.key?(method_name.to_sym) || super
31
+ end
32
+
33
+ # Handles delegation to fetch appropriate historical metrics based on their name
34
+ #
35
+ # @param method_name [String]
36
+ # @param arguments [Array] missing method call arguments
37
+ def method_missing(method_name, *arguments)
38
+ if @data.last.last.key?(method_name.to_sym)
39
+ @data.map { |a| [a.first, a.last[method_name]] }
40
+ else
41
+ super
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Ui
6
+ module Models
7
+ module Metrics
8
+ module Charts
9
+ # Model for preparing data about topics states
10
+ class Topics
11
+ # @param topics_data [Hash] topics aggregated metrics data
12
+ # @param period [Symbol] period that we are interested in
13
+ def initialize(topics_data, period)
14
+ @data = topics_data.to_h.fetch(period)
15
+ end
16
+
17
+ # @return [String] JSON with lags of each of the topics + total lag of all the topics
18
+ # from all the consumer groups.
19
+ def lags_stored
20
+ total = Hash.new { |h, v| h[v] = 0 }
21
+
22
+ @data.to_h.each_value do |metrics|
23
+ metrics.each do |metric|
24
+ time = metric.first
25
+ lag_stored = metric.last[:lag_stored]
26
+
27
+ if lag_stored
28
+ total[time] ||= 0
29
+ total[time] += lag_stored
30
+ else
31
+ next if total.key?(time)
32
+
33
+ total[time] = nil
34
+ end
35
+ end
36
+ end
37
+
38
+ # Extract the lag stored only from all the data
39
+ per_topic = @data.to_h.map do |topic, metrics|
40
+ extracted = metrics.map { |metric| [metric.first, metric.last[:lag_stored]] }
41
+
42
+ [topic, extracted]
43
+ end.to_h
44
+
45
+ # We name it with a space because someone may have a topic called "total" and we
46
+ # want to avoid collisions
47
+ per_topic.merge('total sum' => total.to_a).to_json
48
+ end
49
+
50
+ # @return [String] JSON with producers pace that represents high-watermarks sum for
51
+ # each topic
52
+ def topics_pace
53
+ topics = {}
54
+
55
+ @data.to_h.each do |topic, metrics|
56
+ topic_without_cg = topic.split('[').first
57
+
58
+ # If we've already seen this topic data, we can skip
59
+ next if topics.include?(topic_without_cg)
60
+
61
+ topics[topic_without_cg] = metrics.map do |current|
62
+ [current.first, current.last[:pace]]
63
+ end
64
+ end
65
+
66
+ topics.each_value(&:compact!)
67
+ topics.to_json
68
+ end
69
+
70
+ # @return [String] JSON with per-topic, highest LSO freeze duration. Useful for
71
+ # debugging of issues arising from hanging transactions
72
+ def max_lso_time
73
+ topics = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = [] } }
74
+
75
+ @data.to_h.each do |topic, metrics|
76
+ topic_without_cg = topic.split('[').first
77
+
78
+ metrics.each do |current|
79
+ ls_offset = current.last[:ls_offset] || 0
80
+ ls_offset_fd = current.last[:ls_offset_fd] || 0
81
+ hi_offset = current.last[:hi_offset] || 0
82
+
83
+ # We convert this to seconds from milliseconds due to our Web UI precision
84
+ # Reporting is in ms for consistency
85
+ normalized_fd = (ls_offset_fd / 1_000).round
86
+ # In case ls_offset and hi_offset are the same, it means we're reached eof
87
+ # and we just don't have more data. In cases like this, LSO freeze duration
88
+ # will grow because LSO will remain unchanged, but it does not mean it is
89
+ # frozen. It means there is just no more data in the topic partition
90
+ # This means we need to nullify this case, otherwise it would report, that
91
+ # lso is hanging.
92
+ normalized_fd = 0 if ls_offset == hi_offset
93
+
94
+ topics[topic_without_cg][current.first] << normalized_fd
95
+ end
96
+ end
97
+
98
+ topics.each_value(&:compact!)
99
+ topics.each_value { |metrics| metrics.transform_values!(&:max) }
100
+ topics.transform_values! { |values| values.to_a.sort_by!(&:first) }
101
+ topics.to_json
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Ui
6
+ module Models
7
+ module Metrics
8
+ # Representation of topics historical metrics based on the aggregated metrics data
9
+ # We do some pre-processing to align and normalize all the data
10
+ class Topics < Lib::HashProxy
11
+ # @param consumers_groups [Hash] historical metrics for consumers groups
12
+ def initialize(consumers_groups)
13
+ aggregate_topics_data(consumers_groups)
14
+ .tap { |topics_metrics| nulify_gaps(topics_metrics) }
15
+ .then { |topics_metrics| super(topics_metrics) }
16
+ end
17
+
18
+ private
19
+
20
+ # Extracts and aggregates data on a per-topic basis in a hash. Because in theory same
21
+ # topic can be consumed by multiple consumer groups, we include consumer group in the
22
+ # hash keys.
23
+ #
24
+ # @param consumers_groups [Hash] consumers groups initial hash with metrics
25
+ # @return [Hash] remapped hash with range including extracted topics details
26
+ def aggregate_topics_data(consumers_groups)
27
+ extracted = Hash.new { |h, k| h[k] = [] }
28
+
29
+ consumers_groups.each do |range, samples|
30
+ range_extracted = {}
31
+
32
+ samples.each do |sample|
33
+ time = sample.first
34
+ groups = sample.last
35
+
36
+ groups.each do |cg_name, topics|
37
+ topics.each do |topic_name, topic_data|
38
+ range_extracted["#{topic_name}[#{cg_name}]"] ||= []
39
+ range_extracted["#{topic_name}[#{cg_name}]"] << [time, topic_data]
40
+ end
41
+ end
42
+ end
43
+
44
+ # Always align the order of topics in hash based on their name so it is
45
+ # independent from the reported order
46
+ extracted[range] = range_extracted.keys.sort.map do |key|
47
+ [key, range_extracted[key]]
48
+ end.to_h
49
+ end
50
+
51
+ extracted
52
+ end
53
+
54
+ # Nullifies gaps within data with metrics with nil values. This is needed for us to be
55
+ # able to provide consistent charts even with gaps in reporting.
56
+ #
57
+ # @param topics_metrics [Hash] flattened topics data
58
+ # @note This modifies the original data in place
59
+ # @note We nullify both gaps in metrics as well as gaps in times (no values for time)
60
+ def nulify_gaps(topics_metrics)
61
+ # Hash with all potential keys that a single sample metric can have
62
+ # This allows us to fill gaps not only in times but also in values
63
+ base_samples = topics_metrics
64
+ .values
65
+ .map(&:values)
66
+ .flatten
67
+ .select { |val| val.is_a?(Hash) }
68
+ .flat_map(&:keys)
69
+ .uniq
70
+ .map { |key| [key, nil] }
71
+ .to_h
72
+ .freeze
73
+
74
+ # Normalize data in between topics reportings
75
+ # One topic may have a sample in a time moment when a different one does not
76
+ topics_metrics.each_value do |samples|
77
+ # All available times from all the topics
78
+ times = samples.values.map { |set| set.map(&:first) }.flatten.uniq
79
+
80
+ samples.each_value do |set|
81
+ times.each do |time|
82
+ existing_index = set.find_index { |existing_time, _| existing_time == time }
83
+
84
+ if existing_index
85
+ existing_value = set[existing_index][1]
86
+ set[existing_index][1] = base_samples.merge(existing_value)
87
+ else
88
+ set << [time, base_samples]
89
+ end
90
+ end
91
+
92
+ set.sort_by!(&:first)
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -6,6 +6,33 @@ module Karafka
6
6
  module Models
7
7
  # Single topic partition data representation model
8
8
  class Partition < Lib::HashProxy
9
+ # @return [Symbol] one of three states in which LSO can be in the correlation to given
10
+ # partition in the context of a consumer group.
11
+ #
12
+ # @note States descriptions:
13
+ # - `:active` all good. No hanging transactions, processing is ok
14
+ # - `:at_risk` - there may be hanging transactions but they do not affect processing
15
+ # before being stuck. This means, that the transaction still may be finished
16
+ # without affecting the processing, hence not having any impact.
17
+ # - `:stopped` - we have reached a hanging LSO and we cannot move forward despite more
18
+ # data being available. Unless the hanging transaction is killed or it finishes,
19
+ # we will not move forward.
20
+ def lso_risk_state
21
+ # If last stable is falling behind the high watermark
22
+ if ls_offset < hi_offset
23
+ # But it is changing and moving fast enough, it does not mean it is stuck
24
+ return :active if ls_offset_fd < ::Karafka::Web.config.ui.lso_threshold
25
+
26
+ # If it is stuck but we still have work to do, this is not a tragic situation because
27
+ # maybe it will unstuck before we reach it
28
+ return :at_risk if (committed_offset || 0) < ls_offset
29
+
30
+ # If it is not changing and falling behind high, it is stuck
31
+ :stopped
32
+ else
33
+ :active
34
+ end
35
+ end
9
36
  end
10
37
  end
11
38
  end
@@ -40,7 +40,7 @@ module Karafka
40
40
  .sort_by(&:started_at)
41
41
  end
42
42
 
43
- # @return [Integer] collective lag on this process
43
+ # @return [Integer] collective stored lag on this process
44
44
  def lag_stored
45
45
  consumer_groups
46
46
  .flat_map(&:subscription_groups)
@@ -51,6 +51,17 @@ module Karafka
51
51
  .sum
52
52
  end
53
53
 
54
+ # @return [Integer] collective lag on this process
55
+ def lag
56
+ consumer_groups
57
+ .flat_map(&:subscription_groups)
58
+ .flat_map(&:topics)
59
+ .flat_map(&:partitions)
60
+ .map(&:lag)
61
+ .delete_if(&:negative?)
62
+ .sum
63
+ end
64
+
54
65
  # @return [Integer] number of partitions to which we are currently subscribed
55
66
  def subscribed_partitions_count
56
67
  consumer_groups