karafka-web 0.6.3 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (214) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +13 -4
  4. data/CHANGELOG.md +119 -5
  5. data/Gemfile +1 -0
  6. data/Gemfile.lock +27 -24
  7. data/README.md +2 -0
  8. data/bin/rspecs +6 -0
  9. data/certs/cert_chain.pem +21 -21
  10. data/docker-compose.yml +22 -0
  11. data/karafka-web.gemspec +3 -3
  12. data/lib/karafka/web/app.rb +6 -2
  13. data/lib/karafka/web/cli.rb +51 -47
  14. data/lib/karafka/web/config.rb +33 -9
  15. data/lib/karafka/web/contracts/base.rb +32 -0
  16. data/lib/karafka/web/contracts/config.rb +63 -0
  17. data/lib/karafka/web/deserializer.rb +10 -1
  18. data/lib/karafka/web/errors.rb +29 -7
  19. data/lib/karafka/web/installer.rb +58 -148
  20. data/lib/karafka/web/management/base.rb +34 -0
  21. data/lib/karafka/web/management/clean_boot_file.rb +31 -0
  22. data/lib/karafka/web/management/create_initial_states.rb +101 -0
  23. data/lib/karafka/web/management/create_topics.rb +127 -0
  24. data/lib/karafka/web/management/delete_topics.rb +28 -0
  25. data/lib/karafka/web/management/enable.rb +82 -0
  26. data/lib/karafka/web/management/extend_boot_file.rb +37 -0
  27. data/lib/karafka/web/processing/consumer.rb +73 -17
  28. data/lib/karafka/web/processing/consumers/aggregators/base.rb +56 -0
  29. data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +154 -0
  30. data/lib/karafka/web/processing/consumers/aggregators/state.rb +180 -0
  31. data/lib/karafka/web/processing/consumers/contracts/aggregated_stats.rb +32 -0
  32. data/lib/karafka/web/processing/consumers/contracts/metrics.rb +53 -0
  33. data/lib/karafka/web/processing/consumers/contracts/process.rb +19 -0
  34. data/lib/karafka/web/processing/consumers/contracts/state.rb +49 -0
  35. data/lib/karafka/web/processing/consumers/contracts/topic_stats.rb +21 -0
  36. data/lib/karafka/web/processing/consumers/metrics.rb +29 -0
  37. data/lib/karafka/web/processing/consumers/schema_manager.rb +56 -0
  38. data/lib/karafka/web/processing/consumers/state.rb +6 -9
  39. data/lib/karafka/web/processing/time_series_tracker.rb +130 -0
  40. data/lib/karafka/web/tracking/consumers/contracts/consumer_group.rb +2 -2
  41. data/lib/karafka/web/tracking/consumers/contracts/job.rb +2 -1
  42. data/lib/karafka/web/tracking/consumers/contracts/partition.rb +14 -1
  43. data/lib/karafka/web/tracking/consumers/contracts/report.rb +10 -8
  44. data/lib/karafka/web/tracking/consumers/contracts/subscription_group.rb +2 -2
  45. data/lib/karafka/web/tracking/consumers/contracts/topic.rb +2 -2
  46. data/lib/karafka/web/tracking/consumers/listeners/processing.rb +6 -2
  47. data/lib/karafka/web/tracking/consumers/listeners/statistics.rb +15 -1
  48. data/lib/karafka/web/tracking/consumers/reporter.rb +14 -6
  49. data/lib/karafka/web/tracking/consumers/sampler.rb +80 -39
  50. data/lib/karafka/web/tracking/contracts/error.rb +2 -1
  51. data/lib/karafka/web/ui/app.rb +20 -10
  52. data/lib/karafka/web/ui/base.rb +56 -6
  53. data/lib/karafka/web/ui/controllers/base.rb +28 -0
  54. data/lib/karafka/web/ui/controllers/become_pro.rb +1 -1
  55. data/lib/karafka/web/ui/controllers/cluster.rb +12 -6
  56. data/lib/karafka/web/ui/controllers/consumers.rb +4 -2
  57. data/lib/karafka/web/ui/controllers/dashboard.rb +32 -0
  58. data/lib/karafka/web/ui/controllers/errors.rb +19 -6
  59. data/lib/karafka/web/ui/controllers/jobs.rb +4 -2
  60. data/lib/karafka/web/ui/controllers/requests/params.rb +28 -0
  61. data/lib/karafka/web/ui/controllers/responses/redirect.rb +29 -0
  62. data/lib/karafka/web/ui/helpers/application_helper.rb +57 -14
  63. data/lib/karafka/web/ui/helpers/paths_helper.rb +48 -0
  64. data/lib/karafka/web/ui/lib/hash_proxy.rb +18 -6
  65. data/lib/karafka/web/ui/lib/paginations/base.rb +61 -0
  66. data/lib/karafka/web/ui/lib/paginations/offset_based.rb +96 -0
  67. data/lib/karafka/web/ui/lib/paginations/page_based.rb +70 -0
  68. data/lib/karafka/web/ui/lib/paginations/paginators/arrays.rb +33 -0
  69. data/lib/karafka/web/ui/lib/paginations/paginators/base.rb +23 -0
  70. data/lib/karafka/web/ui/lib/paginations/paginators/partitions.rb +52 -0
  71. data/lib/karafka/web/ui/lib/paginations/paginators/sets.rb +85 -0
  72. data/lib/karafka/web/ui/lib/paginations/watermark_offsets_based.rb +75 -0
  73. data/lib/karafka/web/ui/lib/ttl_cache.rb +82 -0
  74. data/lib/karafka/web/ui/models/cluster_info.rb +59 -0
  75. data/lib/karafka/web/ui/models/consumers_metrics.rb +46 -0
  76. data/lib/karafka/web/ui/models/{state.rb → consumers_state.rb} +6 -2
  77. data/lib/karafka/web/ui/models/health.rb +37 -7
  78. data/lib/karafka/web/ui/models/message.rb +123 -39
  79. data/lib/karafka/web/ui/models/metrics/aggregated.rb +196 -0
  80. data/lib/karafka/web/ui/models/metrics/charts/aggregated.rb +50 -0
  81. data/lib/karafka/web/ui/models/metrics/charts/topics.rb +109 -0
  82. data/lib/karafka/web/ui/models/metrics/topics.rb +101 -0
  83. data/lib/karafka/web/ui/models/partition.rb +27 -0
  84. data/lib/karafka/web/ui/models/process.rb +12 -1
  85. data/lib/karafka/web/ui/models/status.rb +110 -22
  86. data/lib/karafka/web/ui/models/visibility_filter.rb +33 -0
  87. data/lib/karafka/web/ui/pro/app.rb +87 -19
  88. data/lib/karafka/web/ui/pro/controllers/cluster.rb +11 -0
  89. data/lib/karafka/web/ui/pro/controllers/consumers.rb +13 -7
  90. data/lib/karafka/web/ui/pro/controllers/dashboard.rb +54 -0
  91. data/lib/karafka/web/ui/pro/controllers/dlq.rb +1 -2
  92. data/lib/karafka/web/ui/pro/controllers/errors.rb +46 -10
  93. data/lib/karafka/web/ui/pro/controllers/explorer.rb +145 -15
  94. data/lib/karafka/web/ui/pro/controllers/health.rb +10 -2
  95. data/lib/karafka/web/ui/pro/controllers/messages.rb +62 -0
  96. data/lib/karafka/web/ui/pro/controllers/routing.rb +44 -0
  97. data/lib/karafka/web/ui/pro/views/consumers/_breadcrumbs.erb +7 -1
  98. data/lib/karafka/web/ui/pro/views/consumers/_consumer.erb +1 -1
  99. data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +7 -5
  100. data/lib/karafka/web/ui/pro/views/consumers/consumer/_job.erb +3 -3
  101. data/lib/karafka/web/ui/pro/views/consumers/consumer/_metrics.erb +5 -4
  102. data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +13 -4
  103. data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +3 -2
  104. data/lib/karafka/web/ui/pro/views/consumers/consumer/_tabs.erb +7 -0
  105. data/lib/karafka/web/ui/pro/views/consumers/details.erb +21 -0
  106. data/lib/karafka/web/ui/pro/views/consumers/index.erb +4 -2
  107. data/lib/karafka/web/ui/pro/views/dashboard/_ranges_selector.erb +39 -0
  108. data/lib/karafka/web/ui/pro/views/dashboard/index.erb +82 -0
  109. data/lib/karafka/web/ui/pro/views/dlq/_topic.erb +1 -1
  110. data/lib/karafka/web/ui/pro/views/errors/_breadcrumbs.erb +8 -6
  111. data/lib/karafka/web/ui/pro/views/errors/_error.erb +2 -2
  112. data/lib/karafka/web/ui/pro/views/errors/_partition_option.erb +1 -1
  113. data/lib/karafka/web/ui/pro/views/errors/_table.erb +21 -0
  114. data/lib/karafka/web/ui/pro/views/errors/_title_with_select.erb +31 -0
  115. data/lib/karafka/web/ui/pro/views/errors/index.erb +9 -56
  116. data/lib/karafka/web/ui/pro/views/errors/partition.erb +17 -0
  117. data/lib/karafka/web/ui/pro/views/errors/show.erb +1 -1
  118. data/lib/karafka/web/ui/pro/views/explorer/_breadcrumbs.erb +6 -4
  119. data/lib/karafka/web/ui/pro/views/explorer/_filtered.erb +16 -0
  120. data/lib/karafka/web/ui/pro/views/explorer/_message.erb +14 -4
  121. data/lib/karafka/web/ui/pro/views/explorer/_no_topics.erb +7 -0
  122. data/lib/karafka/web/ui/pro/views/explorer/_partition_option.erb +3 -3
  123. data/lib/karafka/web/ui/pro/views/explorer/_topic.erb +1 -1
  124. data/lib/karafka/web/ui/pro/views/explorer/index.erb +12 -8
  125. data/lib/karafka/web/ui/pro/views/explorer/messages/_headers.erb +15 -0
  126. data/lib/karafka/web/ui/pro/views/explorer/messages/_key.erb +12 -0
  127. data/lib/karafka/web/ui/pro/views/explorer/partition/_details.erb +35 -0
  128. data/lib/karafka/web/ui/pro/views/explorer/partition/_messages.erb +1 -0
  129. data/lib/karafka/web/ui/pro/views/explorer/partition.erb +6 -4
  130. data/lib/karafka/web/ui/pro/views/explorer/show.erb +48 -5
  131. data/lib/karafka/web/ui/pro/views/explorer/topic/_details.erb +23 -0
  132. data/lib/karafka/web/ui/pro/views/explorer/topic/_empty.erb +3 -0
  133. data/lib/karafka/web/ui/pro/views/explorer/topic/_limited.erb +4 -0
  134. data/lib/karafka/web/ui/pro/views/explorer/topic.erb +51 -0
  135. data/lib/karafka/web/ui/pro/views/health/_breadcrumbs.erb +16 -0
  136. data/lib/karafka/web/ui/pro/views/health/_no_data.erb +9 -0
  137. data/lib/karafka/web/ui/pro/views/health/_partition.erb +17 -15
  138. data/lib/karafka/web/ui/pro/views/health/_partition_offset.erb +40 -0
  139. data/lib/karafka/web/ui/pro/views/health/_tabs.erb +27 -0
  140. data/lib/karafka/web/ui/pro/views/health/offsets.erb +71 -0
  141. data/lib/karafka/web/ui/pro/views/health/overview.erb +68 -0
  142. data/lib/karafka/web/ui/pro/views/jobs/_job.erb +6 -3
  143. data/lib/karafka/web/ui/pro/views/jobs/index.erb +4 -1
  144. data/lib/karafka/web/ui/pro/views/routing/_consumer_group.erb +37 -0
  145. data/lib/karafka/web/ui/pro/views/routing/_detail.erb +25 -0
  146. data/lib/karafka/web/ui/pro/views/routing/_topic.erb +23 -0
  147. data/lib/karafka/web/ui/pro/views/routing/index.erb +10 -0
  148. data/lib/karafka/web/ui/pro/views/routing/show.erb +26 -0
  149. data/lib/karafka/web/ui/pro/views/shared/_navigation.erb +7 -10
  150. data/lib/karafka/web/ui/public/images/logo-gray.svg +28 -0
  151. data/lib/karafka/web/ui/public/javascripts/application.js +30 -0
  152. data/lib/karafka/web/ui/public/javascripts/chart.min.js +14 -0
  153. data/lib/karafka/web/ui/public/javascripts/charts.js +330 -0
  154. data/lib/karafka/web/ui/public/javascripts/datepicker.js +6 -0
  155. data/lib/karafka/web/ui/public/javascripts/live_poll.js +39 -12
  156. data/lib/karafka/web/ui/public/javascripts/offset_datetime.js +74 -0
  157. data/lib/karafka/web/ui/public/javascripts/tabs.js +59 -0
  158. data/lib/karafka/web/ui/public/stylesheets/application.css +11 -0
  159. data/lib/karafka/web/ui/public/stylesheets/datepicker.min.css +12 -0
  160. data/lib/karafka/web/ui/views/cluster/_no_partitions.erb +3 -0
  161. data/lib/karafka/web/ui/views/cluster/_partition.erb +20 -22
  162. data/lib/karafka/web/ui/views/cluster/index.erb +6 -1
  163. data/lib/karafka/web/ui/views/consumers/_consumer.erb +1 -1
  164. data/lib/karafka/web/ui/views/consumers/_counters.erb +6 -4
  165. data/lib/karafka/web/ui/views/consumers/_summary.erb +3 -3
  166. data/lib/karafka/web/ui/views/consumers/index.erb +3 -1
  167. data/lib/karafka/web/ui/views/dashboard/_feature_pro.erb +3 -0
  168. data/lib/karafka/web/ui/views/dashboard/_not_enough_data.erb +15 -0
  169. data/lib/karafka/web/ui/views/dashboard/_ranges_selector.erb +23 -0
  170. data/lib/karafka/web/ui/views/dashboard/index.erb +95 -0
  171. data/lib/karafka/web/ui/views/errors/_detail.erb +12 -0
  172. data/lib/karafka/web/ui/views/errors/_error.erb +2 -2
  173. data/lib/karafka/web/ui/views/errors/show.erb +1 -1
  174. data/lib/karafka/web/ui/views/jobs/index.erb +3 -1
  175. data/lib/karafka/web/ui/views/layout.erb +10 -3
  176. data/lib/karafka/web/ui/views/routing/_consumer_group.erb +8 -6
  177. data/lib/karafka/web/ui/views/routing/_detail.erb +2 -2
  178. data/lib/karafka/web/ui/views/routing/_topic.erb +1 -1
  179. data/lib/karafka/web/ui/views/routing/show.erb +1 -1
  180. data/lib/karafka/web/ui/views/shared/_brand.erb +2 -2
  181. data/lib/karafka/web/ui/views/shared/_chart.erb +14 -0
  182. data/lib/karafka/web/ui/views/shared/_content.erb +2 -2
  183. data/lib/karafka/web/ui/views/shared/_feature_pro.erb +1 -1
  184. data/lib/karafka/web/ui/views/shared/_flashes.erb +9 -0
  185. data/lib/karafka/web/ui/views/shared/_footer.erb +22 -0
  186. data/lib/karafka/web/ui/views/shared/_header.erb +15 -9
  187. data/lib/karafka/web/ui/views/shared/_live_poll.erb +7 -0
  188. data/lib/karafka/web/ui/views/shared/_navigation.erb +5 -8
  189. data/lib/karafka/web/ui/views/shared/_no_paginated_data.erb +9 -0
  190. data/lib/karafka/web/ui/views/shared/_pagination.erb +17 -13
  191. data/lib/karafka/web/ui/views/shared/_tab_nav.erb +7 -0
  192. data/lib/karafka/web/ui/views/shared/exceptions/not_found.erb +34 -32
  193. data/lib/karafka/web/ui/views/shared/exceptions/pro_only.erb +45 -43
  194. data/lib/karafka/web/ui/views/status/failures/_consumers_reports_schema_state.erb +15 -0
  195. data/lib/karafka/web/ui/views/status/failures/_enabled.erb +8 -0
  196. data/lib/karafka/web/ui/views/status/failures/_initial_consumers_metrics.erb +11 -0
  197. data/lib/karafka/web/ui/views/status/failures/{_initial_state.erb → _initial_consumers_state.erb} +3 -3
  198. data/lib/karafka/web/ui/views/status/failures/_partitions.erb +14 -6
  199. data/lib/karafka/web/ui/views/status/info/_components.erb +21 -1
  200. data/lib/karafka/web/ui/views/status/show.erb +62 -5
  201. data/lib/karafka/web/ui/views/status/successes/_enabled.erb +1 -0
  202. data/lib/karafka/web/ui/views/status/warnings/_replication.erb +19 -0
  203. data/lib/karafka/web/version.rb +1 -1
  204. data/lib/karafka/web.rb +11 -0
  205. data.tar.gz.sig +0 -0
  206. metadata +124 -39
  207. metadata.gz.sig +0 -0
  208. data/lib/karafka/web/processing/consumers/aggregator.rb +0 -130
  209. data/lib/karafka/web/tracking/contracts/base.rb +0 -34
  210. data/lib/karafka/web/ui/lib/paginate_array.rb +0 -38
  211. data/lib/karafka/web/ui/pro/views/explorer/_encryption_enabled.erb +0 -18
  212. data/lib/karafka/web/ui/pro/views/explorer/partition/_watermark_offsets.erb +0 -10
  213. data/lib/karafka/web/ui/pro/views/health/index.erb +0 -60
  214. /data/lib/karafka/web/ui/pro/views/explorer/{_detail.erb → messages/_detail.erb} +0 -0
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ # Namespace for consumer sub-components
7
+ module Consumers
8
+ module Aggregators
9
+ # Aggregator that tracks consumers processes states, aggregates the metrics and converts
10
+ # data points into a materialized current state.
11
+ #
12
+ # There are two types of metrics:
13
+ # - totals - metrics that represent absolute values like number of messages processed
14
+ # in total. Things that need to be incremented/updated with each incoming consumer
15
+ # process report. They cannot be "batch computed" because they do not represent a
16
+ # a state of time but progress.
17
+ # - aggregated state - a state that represents a "snapshot" of things happening right
18
+ # now. Right now is the moment of time on which we operate.
19
+ class State < Base
20
+ # Current schema version
21
+ # This can be used in the future for detecting incompatible changes and writing
22
+ # migrations
23
+ SCHEMA_VERSION = '1.1.0'
24
+
25
+ # @param schema_manager [Karafka::Web::Processing::Consumers::SchemaManager] schema
26
+ # manager that tracks the compatibility of schemas.
27
+ def initialize(schema_manager)
28
+ super()
29
+ @schema_manager = schema_manager
30
+ end
31
+
32
+ # Uses provided process state report to update the current materialized state
33
+ # @param report [Hash] consumer process state report
34
+ # @param offset [Integer] offset of the message with the state report. This offset is
35
+ # needed as we need to be able to get all the consumers reports from a given offset.
36
+ def add(report, offset)
37
+ super(report)
38
+ increment_total_counters(report)
39
+ update_process_state(report, offset)
40
+ # We always evict after counters updates because we want to use expired (stopped)
41
+ # data for counters as it was valid previously. This can happen only when web consumer
42
+ # had a lag and is catching up.
43
+ evict_expired_processes
44
+ # current means current in the context of processing window (usually now but in case
45
+ # of lag, this state may be from the past)
46
+ refresh_current_stats
47
+ end
48
+
49
+ # @return [Array<Hash, Float>] aggregated current stats value and time from which this
50
+ # aggregation comes from
51
+ #
52
+ # @note We return a copy, because we use the internal one to track state changes and
53
+ # unless we would return a copy, other aggregators could have this mutated in an
54
+ # unexpected way
55
+ def stats
56
+ state.fetch(:stats).dup
57
+ end
58
+
59
+ # Sets the dispatch time and returns the hash that can be shipped to the states topic
60
+ #
61
+ # @param _args [Object] extra parsing arguments (not used)
62
+ # @return [Hash] Hash that we can use to ship states data to Kafka
63
+ def to_h(*_args)
64
+ state[:schema_version] = SCHEMA_VERSION
65
+ state[:dispatched_at] = float_now
66
+ state[:schema_state] = @schema_manager.to_s
67
+
68
+ state
69
+ end
70
+
71
+ private
72
+
73
+ # @return [Hash] hash with current state from Kafka
74
+ def state
75
+ @state ||= Consumers::State.current!
76
+ end
77
+
78
+ # Increments the total counters based on the provided report
79
+ # @param report [Hash]
80
+ def increment_total_counters(report)
81
+ report[:stats][:total].each do |key, value|
82
+ state[:stats][key] ||= 0
83
+ state[:stats][key] += value
84
+ end
85
+ end
86
+
87
+ # Registers or updates the given process state based on the report
88
+ #
89
+ # @param report [Hash]
90
+ # @param offset [Integer]
91
+ def update_process_state(report, offset)
92
+ process_name = report[:process][:name]
93
+
94
+ state[:processes][process_name] = {
95
+ dispatched_at: report[:dispatched_at],
96
+ offset: offset
97
+ }
98
+ end
99
+
100
+ # Evicts expired processes from the current state
101
+ # We consider processes dead if they do not report often enough
102
+ # @note We do not evict based on states (stopped), because we want to report the
103
+ # stopped processes for extra time within the ttl limitations. This makes tracking of
104
+ # things from UX perspective nicer.
105
+ def evict_expired_processes
106
+ max_ttl = @aggregated_from - ::Karafka::Web.config.ttl / 1_000
107
+
108
+ state[:processes].delete_if do |_name, details|
109
+ details[:dispatched_at] < max_ttl
110
+ end
111
+
112
+ @active_reports.delete_if do |_name, details|
113
+ details[:dispatched_at] < max_ttl
114
+ end
115
+ end
116
+
117
+ # Refreshes the counters that are computed based on incoming reports and not a total sum.
118
+ # For this we use active reports we have in memory. It may not be accurate for the first
119
+ # few seconds but it is much more optimal from performance perspective than computing
120
+ # this fetching all data from Kafka for each view.
121
+ def refresh_current_stats
122
+ stats = state[:stats]
123
+
124
+ stats[:busy] = 0
125
+ stats[:enqueued] = 0
126
+ stats[:workers] = 0
127
+ stats[:processes] = 0
128
+ stats[:rss] = 0
129
+ stats[:listeners] = 0
130
+ stats[:lag] = 0
131
+ stats[:lag_stored] = 0
132
+ utilization = 0
133
+
134
+ @active_reports
135
+ .values
136
+ .reject { |report| report[:process][:status] == 'stopped' }
137
+ .each do |report|
138
+ report_stats = report[:stats]
139
+ report_process = report[:process]
140
+
141
+ lags = []
142
+ lags_stored = []
143
+
144
+ iterate_partitions(report) do |partition_stats|
145
+ lags << partition_stats[:lag]
146
+ lags_stored << partition_stats[:lag_stored]
147
+ end
148
+
149
+ stats[:busy] += report_stats[:busy]
150
+ stats[:enqueued] += report_stats[:enqueued]
151
+ stats[:workers] += report_process[:workers] || 0
152
+ stats[:listeners] += report_process[:listeners] || 0
153
+ stats[:processes] += 1
154
+ stats[:rss] += report_process[:memory_usage]
155
+ stats[:lag] += lags.reject(&:negative?).sum
156
+ stats[:lag_stored] += lags_stored.reject(&:negative?).sum
157
+ utilization += report_stats[:utilization]
158
+ end
159
+
160
+ stats[:utilization] = utilization / (stats[:processes] + 0.0001)
161
+ end
162
+
163
+ # @param report [Hash]
164
+ def iterate_partitions(report)
165
+ report[:consumer_groups].each_value do |consumer_group|
166
+ consumer_group[:subscription_groups].each_value do |subscription_group|
167
+ subscription_group[:topics].each_value do |topic|
168
+ topic[:partitions].each_value do |partition|
169
+ yield(partition)
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ # Consumer tracking related contracts
8
+ module Contracts
9
+ # Contract used to validate the stats that are both present in state and metrics
10
+ class AggregatedStats < Web::Contracts::Base
11
+ configure
12
+
13
+ required(:batches) { |val| val.is_a?(Integer) && val >= 0 }
14
+ required(:messages) { |val| val.is_a?(Integer) && val >= 0 }
15
+ required(:retries) { |val| val.is_a?(Integer) && val >= 0 }
16
+ required(:dead) { |val| val.is_a?(Integer) && val >= 0 }
17
+ required(:errors) { |val| val.is_a?(Integer) && val >= 0 }
18
+ required(:busy) { |val| val.is_a?(Integer) && val >= 0 }
19
+ required(:enqueued) { |val| val.is_a?(Integer) && val >= 0 }
20
+ required(:workers) { |val| val.is_a?(Integer) && val >= 0 }
21
+ required(:processes) { |val| val.is_a?(Integer) && val >= 0 }
22
+ required(:rss) { |val| val.is_a?(Numeric) && val >= 0 }
23
+ required(:listeners) { |val| val.is_a?(Integer) && val >= 0 }
24
+ required(:utilization) { |val| val.is_a?(Numeric) && val >= 0 }
25
+ required(:lag_stored) { |val| val.is_a?(Integer) }
26
+ required(:lag) { |val| val.is_a?(Integer) }
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ module Contracts
8
+ # Contract that describes the schema for metric reporting
9
+ class Metrics < Web::Contracts::Base
10
+ configure
11
+
12
+ required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
13
+ required(:schema_version) { |val| val.is_a?(String) && !val.empty? }
14
+
15
+ # Ensure, that all aggregated metrics are as expected (values)
16
+ virtual do |data, errors|
17
+ next unless errors.empty?
18
+
19
+ stats_contract = Contracts::AggregatedStats.new
20
+
21
+ data.fetch(:aggregated).each_value do |range_sample|
22
+ # Older metrics should have been validated previously so we need to check only
23
+ # the most recently materialized one
24
+ stats_contract.validate!(range_sample.last.last)
25
+ end
26
+
27
+ nil
28
+ end
29
+
30
+ # Ensure that all the consumer groups topics details are as expected
31
+ virtual do |data, errors|
32
+ next unless errors.empty?
33
+
34
+ topic_contract = Contracts::TopicStats.new
35
+
36
+ data.fetch(:consumer_groups).each_value do |range_sample|
37
+ consumer_group = range_sample.last.last
38
+
39
+ consumer_group.each_value do |topics|
40
+ topics.each_value do |topic_stats|
41
+ topic_contract.validate!(topic_stats)
42
+ end
43
+ end
44
+ end
45
+
46
+ nil
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ module Contracts
8
+ # State process details contract
9
+ class Process < Web::Contracts::Base
10
+ configure
11
+
12
+ required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
13
+ required(:offset) { |val| val.is_a?(Integer) && val >= 0 }
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ # Consumer tracking related contracts
8
+ module Contracts
9
+ # Contract used to ensure the consistency of the data generated to the consumers states
10
+ # topic
11
+ class State < Web::Contracts::Base
12
+ configure
13
+
14
+ # Valid schema manager states
15
+ VALID_SCHEMA_STATES = %w[compatible incompatible].freeze
16
+
17
+ private_constant :VALID_SCHEMA_STATES
18
+
19
+ required(:schema_version) { |val| val.is_a?(String) && !val.empty? }
20
+ required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
21
+ required(:stats) { |val| val.is_a?(Hash) }
22
+ required(:processes) { |val| val.is_a?(Hash) }
23
+ required(:schema_state) { |val| VALID_SCHEMA_STATES.include?(val) }
24
+
25
+ virtual do |data, errors|
26
+ next unless errors.empty?
27
+
28
+ Contracts::AggregatedStats.new.validate!(data.fetch(:stats))
29
+
30
+ nil
31
+ end
32
+
33
+ virtual do |data, errors|
34
+ next unless errors.empty?
35
+
36
+ process_contract = Contracts::Process.new
37
+
38
+ data.fetch(:processes).each_value do |details|
39
+ process_contract.validate!(details)
40
+ end
41
+
42
+ nil
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ module Contracts
8
+ # Topic metrics checks
9
+ class TopicStats < Web::Contracts::Base
10
+ configure
11
+
12
+ required(:lag_stored) { |val| val.is_a?(Integer) }
13
+ required(:lag) { |val| val.is_a?(Integer) }
14
+ required(:pace) { |val| val.is_a?(Integer) }
15
+ required(:ls_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ # Fetches the current consumers historical metrics data
8
+ class Metrics
9
+ class << self
10
+ # Fetch the current metrics data that is expected to exist
11
+ #
12
+ # @return [Hash] latest (current) aggregated metrics state
13
+ def current!
14
+ metrics_message = ::Karafka::Admin.read_topic(
15
+ Karafka::Web.config.topics.consumers.metrics,
16
+ 0,
17
+ 1
18
+ ).last
19
+
20
+ return metrics_message.payload if metrics_message
21
+
22
+ raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsError)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ module Consumers
7
+ # Schema manager is responsible for making sure, that the consumers reports messages that
8
+ # we consume have a compatible schema with the current process that is suppose to
9
+ # materialize them.
10
+ #
11
+ # In general we always support at least one major version back and we recommend upgrades
12
+ # to previous versions (0.5 => 0.6 => 0.7)
13
+ #
14
+ # This is needed in scenarios where a rolling deploy would get new karafka processes
15
+ # reporting data but consumption would still run in older.
16
+ class SchemaManager
17
+ # Current reports version for comparing
18
+ CURRENT_VERSION = ::Gem::Version.new(
19
+ ::Karafka::Web::Tracking::Consumers::Sampler::SCHEMA_VERSION
20
+ )
21
+
22
+ private_constant :CURRENT_VERSION
23
+
24
+ def initialize
25
+ @cache = {}
26
+ @valid = true
27
+ end
28
+
29
+ # @param message [Karafka::Messages::Message] consumer report
30
+ # @return [Boolean] true if all good or false if incompatible
31
+ #
32
+ # @note The state switch is one-direction only. If we encounter an incompatible message
33
+ # we need to stop processing so further checks even with valid should not switch it
34
+ # back to valid
35
+ def compatible?(message)
36
+ schema_version = message.payload[:schema_version]
37
+
38
+ # Save on memory allocation by reusing
39
+ # Most of the time we will deal with compatible schemas, so it is not worth creating
40
+ # an object with each message
41
+ message_version = @cache[schema_version] ||= ::Gem::Version.new(schema_version)
42
+
43
+ return true if message_version <= CURRENT_VERSION
44
+
45
+ @valid = false
46
+ end
47
+
48
+ # @return [String] state that we can use in the materialized state for the UI reporting
49
+ def to_s
50
+ @valid ? 'compatible' : 'incompatible'
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -6,23 +6,20 @@ module Karafka
6
6
  module Consumers
7
7
  # Fetches the current consumer processes aggregated state
8
8
  class State
9
- extend ::Karafka::Core::Helpers::Time
10
-
11
9
  class << self
12
- # Try bootstrapping from the current state from Kafka if exists and if not, just use
13
- # a blank state. Blank state will not be flushed because materialization into Kafka
14
- # happens only after first report is received.
10
+ # Fetch the current consumers state that is expected to exist
15
11
  #
16
- # @return [Hash, false] last (current) aggregated processes state or false if no
17
- # state is available
18
- def current
12
+ # @return [Hash] last (current) aggregated processes state
13
+ def current!
19
14
  state_message = ::Karafka::Admin.read_topic(
20
15
  Karafka::Web.config.topics.consumers.states,
21
16
  0,
22
17
  1
23
18
  ).last
24
19
 
25
- state_message ? state_message.payload : { processes: {}, stats: {} }
20
+ return state_message.payload if state_message
21
+
22
+ raise(::Karafka::Web::Errors::Processing::MissingConsumersStateError)
26
23
  end
27
24
  end
28
25
  end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Web
5
+ module Processing
6
+ # Allows us to accumulate and track time series data with given resolution
7
+ #
8
+ # We aggregate for last:
9
+ # - 7 days (every day)
10
+ # - 24 hours (every hour)
11
+ # - 1 hour (every minute) + the most recent as an update every time (leading)
12
+ #
13
+ # @note Please note we publish always **absolute** metrics and not deltas in reference to
14
+ # a given time window. This needs to be computed in the frontend as we want to have
15
+ # state facts in the storage.
16
+ #
17
+ # @note Please note we evict and cleanup data only before we want to use it. This will put
18
+ # more stress on memory but makes this tracker 70-90% faster. Since by default we anyhow
19
+ # sample every few seconds, this trade-off makes sense.
20
+ class TimeSeriesTracker
21
+ include ::Karafka::Core::Helpers::Time
22
+
23
+ # How many samples and in what resolution should we track for given time range
24
+ # @note We add one more than we want to display for delta computation when ranges
25
+ # are full in the UI
26
+ TIME_RANGES = {
27
+ # 7 days sampling
28
+ days: {
29
+ # Sample every 8 hours so we end up with 56 samples over a week + 1 for baseline
30
+ resolution: 8 * 60 * 60,
31
+ limit: 57
32
+ }.freeze,
33
+ # 24 hours sampling
34
+ hours: {
35
+ # Every 30 minutes for 24 hours + baseline
36
+ resolution: 30 * 60,
37
+ limit: 49
38
+ }.freeze,
39
+ # 60 minutes sampling
40
+ minutes: {
41
+ # Every one minute for an hour => 60 samples
42
+ resolution: 60,
43
+ limit: 61
44
+ }.freeze,
45
+ # 5 minutes sampling
46
+ seconds: {
47
+ # Every 5 seconds with 60 samples + baseline. That is 300 seconds => 5 minutes
48
+ resolution: 5,
49
+ limit: 61
50
+ }.freeze
51
+ }.freeze
52
+
53
+ # @param existing [Hash] existing historical metrics (may be empty for the first state)
54
+ def initialize(existing)
55
+ # Builds an empty structure for potential time ranges we are interested in
56
+ @historicals = TIME_RANGES.keys.map { |name| [name, []] }.to_h
57
+
58
+ # Fetch the existing (if any) historical values that we already have
59
+ import_existing(existing)
60
+ end
61
+
62
+ # Adds current state into the states for tracking
63
+ # @param current [Hash] hash with current state
64
+ # @param state_time [Float] float UTC time from which the state comes
65
+ def add(current, state_time)
66
+ # Inject the time point into all the historicals
67
+ inject(current, state_time)
68
+ end
69
+
70
+ # Evicts expired and duplicated series and returns the cleaned hash
71
+ # @return [Hash] aggregated historicals hash
72
+ def to_h
73
+ evict
74
+
75
+ @historicals
76
+ end
77
+
78
+ private
79
+
80
+ # Import existing previous historical metrics as they are
81
+ #
82
+ # @param existing [Hash] existing historical metrics
83
+ def import_existing(existing)
84
+ existing.each do |range_name, values|
85
+ @historicals[range_name] = values
86
+ end
87
+ end
88
+
89
+ # Injects the current most recent stats sample into each of the time ranges on which we
90
+ # operate. This allows us on all the charts to present the most recent value before a
91
+ # given time window is completed
92
+ #
93
+ # @param current [Hash] current stats
94
+ # @param state_time [Float] time from which this state comes
95
+ def inject(current, state_time)
96
+ @historicals.each_value do |points|
97
+ points << [state_time.floor, current]
98
+ end
99
+ end
100
+
101
+ # Removes historical metrics that are beyond our expected range, so we maintain a stable
102
+ # count and not overload the states topic with extensive data.
103
+ def evict
104
+ # Evict old metrics that are beyond our aggregated range
105
+ # Builds a sliding window that goes backwards
106
+ @historicals.each do |range_name, values|
107
+ rules = TIME_RANGES.fetch(range_name)
108
+ limit = rules.fetch(:limit)
109
+ resolution = rules.fetch(:resolution)
110
+
111
+ grouped = values.group_by { |sample| sample.first / resolution }
112
+ times = grouped.values.map(&:first)
113
+
114
+ # Inject the most recent to always have it in each reporting range
115
+ # Otherwise for a longer time ranges we would not have the most recent state
116
+ # available
117
+ times << values.last unless values.empty?
118
+
119
+ times.uniq!(&:first)
120
+
121
+ # Squash in case there would be two events from the same time
122
+ times.sort_by!(&:first)
123
+
124
+ @historicals[range_name] = times.last(limit)
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
@@ -8,7 +8,7 @@ module Karafka
8
8
  module Contracts
9
9
  # Expected data for each consumer group
10
10
  # It's mostly about subscription groups details
11
- class ConsumerGroup < Tracking::Contracts::Base
11
+ class ConsumerGroup < Web::Contracts::Base
12
12
  configure
13
13
 
14
14
  required(:id) { |val| val.is_a?(String) && !val.empty? }
@@ -19,7 +19,7 @@ module Karafka
19
19
 
20
20
  subscription_group_contract = SubscriptionGroup.new
21
21
 
22
- data.fetch(:subscription_groups).each do |_subscription_group_name, details|
22
+ data.fetch(:subscription_groups).each_value do |details|
23
23
  subscription_group_contract.validate!(details)
24
24
  end
25
25
 
@@ -6,7 +6,7 @@ module Karafka
6
6
  module Consumers
7
7
  module Contracts
8
8
  # Contract for the job reporting details
9
- class Job < Tracking::Contracts::Base
9
+ class Job < Web::Contracts::Base
10
10
  configure
11
11
 
12
12
  required(:consumer) { |val| val.is_a?(String) }
@@ -17,6 +17,7 @@ module Karafka
17
17
  required(:first_offset) { |val| val.is_a?(Integer) && (val >= 0 || val == -1001) }
18
18
  required(:last_offset) { |val| val.is_a?(Integer) && (val >= 0 || val == -1001) }
19
19
  required(:committed_offset) { |val| val.is_a?(Integer) }
20
+ required(:messages) { |val| val.is_a?(Integer) && val >= 0 }
20
21
  required(:type) { |val| %w[consume revoked shutdown].include?(val) }
21
22
  required(:tags) { |val| val.is_a?(Karafka::Core::Taggable::Tags) }
22
23
  # -1 can be here for workless flows
@@ -6,14 +6,27 @@ module Karafka
6
6
  module Consumers
7
7
  module Contracts
8
8
  # Partition metrics required for web to operate
9
- class Partition < Tracking::Contracts::Base
9
+ class Partition < Web::Contracts::Base
10
10
  configure
11
11
 
12
12
  required(:id) { |val| val.is_a?(Integer) && val >= 0 }
13
+ required(:lag) { |val| val.is_a?(Integer) }
14
+ required(:lag_d) { |val| val.is_a?(Integer) }
13
15
  required(:lag_stored) { |val| val.is_a?(Integer) }
14
16
  required(:lag_stored_d) { |val| val.is_a?(Integer) }
15
17
  required(:committed_offset) { |val| val.is_a?(Integer) }
18
+ required(:committed_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
16
19
  required(:stored_offset) { |val| val.is_a?(Integer) }
20
+ required(:stored_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
21
+ required(:fetch_state) { |val| val.is_a?(String) && !val.empty? }
22
+ required(:poll_state) { |val| val.is_a?(String) && !val.empty? }
23
+ required(:hi_offset) { |val| val.is_a?(Integer) }
24
+ required(:hi_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
25
+ required(:lo_offset) { |val| val.is_a?(Integer) }
26
+ required(:eof_offset) { |val| val.is_a?(Integer) }
27
+ required(:ls_offset) { |val| val.is_a?(Integer) }
28
+ required(:ls_offset_d) { |val| val.is_a?(Integer) }
29
+ required(:ls_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
17
30
  end
18
31
  end
19
32
  end