karafka-web 0.6.3 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +13 -4
- data/CHANGELOG.md +119 -5
- data/Gemfile +1 -0
- data/Gemfile.lock +27 -24
- data/README.md +2 -0
- data/bin/rspecs +6 -0
- data/certs/cert_chain.pem +21 -21
- data/docker-compose.yml +22 -0
- data/karafka-web.gemspec +3 -3
- data/lib/karafka/web/app.rb +6 -2
- data/lib/karafka/web/cli.rb +51 -47
- data/lib/karafka/web/config.rb +33 -9
- data/lib/karafka/web/contracts/base.rb +32 -0
- data/lib/karafka/web/contracts/config.rb +63 -0
- data/lib/karafka/web/deserializer.rb +10 -1
- data/lib/karafka/web/errors.rb +29 -7
- data/lib/karafka/web/installer.rb +58 -148
- data/lib/karafka/web/management/base.rb +34 -0
- data/lib/karafka/web/management/clean_boot_file.rb +31 -0
- data/lib/karafka/web/management/create_initial_states.rb +101 -0
- data/lib/karafka/web/management/create_topics.rb +127 -0
- data/lib/karafka/web/management/delete_topics.rb +28 -0
- data/lib/karafka/web/management/enable.rb +82 -0
- data/lib/karafka/web/management/extend_boot_file.rb +37 -0
- data/lib/karafka/web/processing/consumer.rb +73 -17
- data/lib/karafka/web/processing/consumers/aggregators/base.rb +56 -0
- data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +154 -0
- data/lib/karafka/web/processing/consumers/aggregators/state.rb +180 -0
- data/lib/karafka/web/processing/consumers/contracts/aggregated_stats.rb +32 -0
- data/lib/karafka/web/processing/consumers/contracts/metrics.rb +53 -0
- data/lib/karafka/web/processing/consumers/contracts/process.rb +19 -0
- data/lib/karafka/web/processing/consumers/contracts/state.rb +49 -0
- data/lib/karafka/web/processing/consumers/contracts/topic_stats.rb +21 -0
- data/lib/karafka/web/processing/consumers/metrics.rb +29 -0
- data/lib/karafka/web/processing/consumers/schema_manager.rb +56 -0
- data/lib/karafka/web/processing/consumers/state.rb +6 -9
- data/lib/karafka/web/processing/time_series_tracker.rb +130 -0
- data/lib/karafka/web/tracking/consumers/contracts/consumer_group.rb +2 -2
- data/lib/karafka/web/tracking/consumers/contracts/job.rb +2 -1
- data/lib/karafka/web/tracking/consumers/contracts/partition.rb +14 -1
- data/lib/karafka/web/tracking/consumers/contracts/report.rb +10 -8
- data/lib/karafka/web/tracking/consumers/contracts/subscription_group.rb +2 -2
- data/lib/karafka/web/tracking/consumers/contracts/topic.rb +2 -2
- data/lib/karafka/web/tracking/consumers/listeners/processing.rb +6 -2
- data/lib/karafka/web/tracking/consumers/listeners/statistics.rb +15 -1
- data/lib/karafka/web/tracking/consumers/reporter.rb +14 -6
- data/lib/karafka/web/tracking/consumers/sampler.rb +80 -39
- data/lib/karafka/web/tracking/contracts/error.rb +2 -1
- data/lib/karafka/web/ui/app.rb +20 -10
- data/lib/karafka/web/ui/base.rb +56 -6
- data/lib/karafka/web/ui/controllers/base.rb +28 -0
- data/lib/karafka/web/ui/controllers/become_pro.rb +1 -1
- data/lib/karafka/web/ui/controllers/cluster.rb +12 -6
- data/lib/karafka/web/ui/controllers/consumers.rb +4 -2
- data/lib/karafka/web/ui/controllers/dashboard.rb +32 -0
- data/lib/karafka/web/ui/controllers/errors.rb +19 -6
- data/lib/karafka/web/ui/controllers/jobs.rb +4 -2
- data/lib/karafka/web/ui/controllers/requests/params.rb +28 -0
- data/lib/karafka/web/ui/controllers/responses/redirect.rb +29 -0
- data/lib/karafka/web/ui/helpers/application_helper.rb +57 -14
- data/lib/karafka/web/ui/helpers/paths_helper.rb +48 -0
- data/lib/karafka/web/ui/lib/hash_proxy.rb +18 -6
- data/lib/karafka/web/ui/lib/paginations/base.rb +61 -0
- data/lib/karafka/web/ui/lib/paginations/offset_based.rb +96 -0
- data/lib/karafka/web/ui/lib/paginations/page_based.rb +70 -0
- data/lib/karafka/web/ui/lib/paginations/paginators/arrays.rb +33 -0
- data/lib/karafka/web/ui/lib/paginations/paginators/base.rb +23 -0
- data/lib/karafka/web/ui/lib/paginations/paginators/partitions.rb +52 -0
- data/lib/karafka/web/ui/lib/paginations/paginators/sets.rb +85 -0
- data/lib/karafka/web/ui/lib/paginations/watermark_offsets_based.rb +75 -0
- data/lib/karafka/web/ui/lib/ttl_cache.rb +82 -0
- data/lib/karafka/web/ui/models/cluster_info.rb +59 -0
- data/lib/karafka/web/ui/models/consumers_metrics.rb +46 -0
- data/lib/karafka/web/ui/models/{state.rb → consumers_state.rb} +6 -2
- data/lib/karafka/web/ui/models/health.rb +37 -7
- data/lib/karafka/web/ui/models/message.rb +123 -39
- data/lib/karafka/web/ui/models/metrics/aggregated.rb +196 -0
- data/lib/karafka/web/ui/models/metrics/charts/aggregated.rb +50 -0
- data/lib/karafka/web/ui/models/metrics/charts/topics.rb +109 -0
- data/lib/karafka/web/ui/models/metrics/topics.rb +101 -0
- data/lib/karafka/web/ui/models/partition.rb +27 -0
- data/lib/karafka/web/ui/models/process.rb +12 -1
- data/lib/karafka/web/ui/models/status.rb +110 -22
- data/lib/karafka/web/ui/models/visibility_filter.rb +33 -0
- data/lib/karafka/web/ui/pro/app.rb +87 -19
- data/lib/karafka/web/ui/pro/controllers/cluster.rb +11 -0
- data/lib/karafka/web/ui/pro/controllers/consumers.rb +13 -7
- data/lib/karafka/web/ui/pro/controllers/dashboard.rb +54 -0
- data/lib/karafka/web/ui/pro/controllers/dlq.rb +1 -2
- data/lib/karafka/web/ui/pro/controllers/errors.rb +46 -10
- data/lib/karafka/web/ui/pro/controllers/explorer.rb +145 -15
- data/lib/karafka/web/ui/pro/controllers/health.rb +10 -2
- data/lib/karafka/web/ui/pro/controllers/messages.rb +62 -0
- data/lib/karafka/web/ui/pro/controllers/routing.rb +44 -0
- data/lib/karafka/web/ui/pro/views/consumers/_breadcrumbs.erb +7 -1
- data/lib/karafka/web/ui/pro/views/consumers/_consumer.erb +1 -1
- data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +7 -5
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_job.erb +3 -3
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_metrics.erb +5 -4
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +13 -4
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +3 -2
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_tabs.erb +7 -0
- data/lib/karafka/web/ui/pro/views/consumers/details.erb +21 -0
- data/lib/karafka/web/ui/pro/views/consumers/index.erb +4 -2
- data/lib/karafka/web/ui/pro/views/dashboard/_ranges_selector.erb +39 -0
- data/lib/karafka/web/ui/pro/views/dashboard/index.erb +82 -0
- data/lib/karafka/web/ui/pro/views/dlq/_topic.erb +1 -1
- data/lib/karafka/web/ui/pro/views/errors/_breadcrumbs.erb +8 -6
- data/lib/karafka/web/ui/pro/views/errors/_error.erb +2 -2
- data/lib/karafka/web/ui/pro/views/errors/_partition_option.erb +1 -1
- data/lib/karafka/web/ui/pro/views/errors/_table.erb +21 -0
- data/lib/karafka/web/ui/pro/views/errors/_title_with_select.erb +31 -0
- data/lib/karafka/web/ui/pro/views/errors/index.erb +9 -56
- data/lib/karafka/web/ui/pro/views/errors/partition.erb +17 -0
- data/lib/karafka/web/ui/pro/views/errors/show.erb +1 -1
- data/lib/karafka/web/ui/pro/views/explorer/_breadcrumbs.erb +6 -4
- data/lib/karafka/web/ui/pro/views/explorer/_filtered.erb +16 -0
- data/lib/karafka/web/ui/pro/views/explorer/_message.erb +14 -4
- data/lib/karafka/web/ui/pro/views/explorer/_no_topics.erb +7 -0
- data/lib/karafka/web/ui/pro/views/explorer/_partition_option.erb +3 -3
- data/lib/karafka/web/ui/pro/views/explorer/_topic.erb +1 -1
- data/lib/karafka/web/ui/pro/views/explorer/index.erb +12 -8
- data/lib/karafka/web/ui/pro/views/explorer/messages/_headers.erb +15 -0
- data/lib/karafka/web/ui/pro/views/explorer/messages/_key.erb +12 -0
- data/lib/karafka/web/ui/pro/views/explorer/partition/_details.erb +35 -0
- data/lib/karafka/web/ui/pro/views/explorer/partition/_messages.erb +1 -0
- data/lib/karafka/web/ui/pro/views/explorer/partition.erb +6 -4
- data/lib/karafka/web/ui/pro/views/explorer/show.erb +48 -5
- data/lib/karafka/web/ui/pro/views/explorer/topic/_details.erb +23 -0
- data/lib/karafka/web/ui/pro/views/explorer/topic/_empty.erb +3 -0
- data/lib/karafka/web/ui/pro/views/explorer/topic/_limited.erb +4 -0
- data/lib/karafka/web/ui/pro/views/explorer/topic.erb +51 -0
- data/lib/karafka/web/ui/pro/views/health/_breadcrumbs.erb +16 -0
- data/lib/karafka/web/ui/pro/views/health/_no_data.erb +9 -0
- data/lib/karafka/web/ui/pro/views/health/_partition.erb +17 -15
- data/lib/karafka/web/ui/pro/views/health/_partition_offset.erb +40 -0
- data/lib/karafka/web/ui/pro/views/health/_tabs.erb +27 -0
- data/lib/karafka/web/ui/pro/views/health/offsets.erb +71 -0
- data/lib/karafka/web/ui/pro/views/health/overview.erb +68 -0
- data/lib/karafka/web/ui/pro/views/jobs/_job.erb +6 -3
- data/lib/karafka/web/ui/pro/views/jobs/index.erb +4 -1
- data/lib/karafka/web/ui/pro/views/routing/_consumer_group.erb +37 -0
- data/lib/karafka/web/ui/pro/views/routing/_detail.erb +25 -0
- data/lib/karafka/web/ui/pro/views/routing/_topic.erb +23 -0
- data/lib/karafka/web/ui/pro/views/routing/index.erb +10 -0
- data/lib/karafka/web/ui/pro/views/routing/show.erb +26 -0
- data/lib/karafka/web/ui/pro/views/shared/_navigation.erb +7 -10
- data/lib/karafka/web/ui/public/images/logo-gray.svg +28 -0
- data/lib/karafka/web/ui/public/javascripts/application.js +30 -0
- data/lib/karafka/web/ui/public/javascripts/chart.min.js +14 -0
- data/lib/karafka/web/ui/public/javascripts/charts.js +330 -0
- data/lib/karafka/web/ui/public/javascripts/datepicker.js +6 -0
- data/lib/karafka/web/ui/public/javascripts/live_poll.js +39 -12
- data/lib/karafka/web/ui/public/javascripts/offset_datetime.js +74 -0
- data/lib/karafka/web/ui/public/javascripts/tabs.js +59 -0
- data/lib/karafka/web/ui/public/stylesheets/application.css +11 -0
- data/lib/karafka/web/ui/public/stylesheets/datepicker.min.css +12 -0
- data/lib/karafka/web/ui/views/cluster/_no_partitions.erb +3 -0
- data/lib/karafka/web/ui/views/cluster/_partition.erb +20 -22
- data/lib/karafka/web/ui/views/cluster/index.erb +6 -1
- data/lib/karafka/web/ui/views/consumers/_consumer.erb +1 -1
- data/lib/karafka/web/ui/views/consumers/_counters.erb +6 -4
- data/lib/karafka/web/ui/views/consumers/_summary.erb +3 -3
- data/lib/karafka/web/ui/views/consumers/index.erb +3 -1
- data/lib/karafka/web/ui/views/dashboard/_feature_pro.erb +3 -0
- data/lib/karafka/web/ui/views/dashboard/_not_enough_data.erb +15 -0
- data/lib/karafka/web/ui/views/dashboard/_ranges_selector.erb +23 -0
- data/lib/karafka/web/ui/views/dashboard/index.erb +95 -0
- data/lib/karafka/web/ui/views/errors/_detail.erb +12 -0
- data/lib/karafka/web/ui/views/errors/_error.erb +2 -2
- data/lib/karafka/web/ui/views/errors/show.erb +1 -1
- data/lib/karafka/web/ui/views/jobs/index.erb +3 -1
- data/lib/karafka/web/ui/views/layout.erb +10 -3
- data/lib/karafka/web/ui/views/routing/_consumer_group.erb +8 -6
- data/lib/karafka/web/ui/views/routing/_detail.erb +2 -2
- data/lib/karafka/web/ui/views/routing/_topic.erb +1 -1
- data/lib/karafka/web/ui/views/routing/show.erb +1 -1
- data/lib/karafka/web/ui/views/shared/_brand.erb +2 -2
- data/lib/karafka/web/ui/views/shared/_chart.erb +14 -0
- data/lib/karafka/web/ui/views/shared/_content.erb +2 -2
- data/lib/karafka/web/ui/views/shared/_feature_pro.erb +1 -1
- data/lib/karafka/web/ui/views/shared/_flashes.erb +9 -0
- data/lib/karafka/web/ui/views/shared/_footer.erb +22 -0
- data/lib/karafka/web/ui/views/shared/_header.erb +15 -9
- data/lib/karafka/web/ui/views/shared/_live_poll.erb +7 -0
- data/lib/karafka/web/ui/views/shared/_navigation.erb +5 -8
- data/lib/karafka/web/ui/views/shared/_no_paginated_data.erb +9 -0
- data/lib/karafka/web/ui/views/shared/_pagination.erb +17 -13
- data/lib/karafka/web/ui/views/shared/_tab_nav.erb +7 -0
- data/lib/karafka/web/ui/views/shared/exceptions/not_found.erb +34 -32
- data/lib/karafka/web/ui/views/shared/exceptions/pro_only.erb +45 -43
- data/lib/karafka/web/ui/views/status/failures/_consumers_reports_schema_state.erb +15 -0
- data/lib/karafka/web/ui/views/status/failures/_enabled.erb +8 -0
- data/lib/karafka/web/ui/views/status/failures/_initial_consumers_metrics.erb +11 -0
- data/lib/karafka/web/ui/views/status/failures/{_initial_state.erb → _initial_consumers_state.erb} +3 -3
- data/lib/karafka/web/ui/views/status/failures/_partitions.erb +14 -6
- data/lib/karafka/web/ui/views/status/info/_components.erb +21 -1
- data/lib/karafka/web/ui/views/status/show.erb +62 -5
- data/lib/karafka/web/ui/views/status/successes/_enabled.erb +1 -0
- data/lib/karafka/web/ui/views/status/warnings/_replication.erb +19 -0
- data/lib/karafka/web/version.rb +1 -1
- data/lib/karafka/web.rb +11 -0
- data.tar.gz.sig +0 -0
- metadata +124 -39
- metadata.gz.sig +0 -0
- data/lib/karafka/web/processing/consumers/aggregator.rb +0 -130
- data/lib/karafka/web/tracking/contracts/base.rb +0 -34
- data/lib/karafka/web/ui/lib/paginate_array.rb +0 -38
- data/lib/karafka/web/ui/pro/views/explorer/_encryption_enabled.erb +0 -18
- data/lib/karafka/web/ui/pro/views/explorer/partition/_watermark_offsets.erb +0 -10
- data/lib/karafka/web/ui/pro/views/health/index.erb +0 -60
- /data/lib/karafka/web/ui/pro/views/explorer/{_detail.erb → messages/_detail.erb} +0 -0
@@ -0,0 +1,180 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
# Namespace for consumer sub-components
|
7
|
+
module Consumers
|
8
|
+
module Aggregators
|
9
|
+
# Aggregator that tracks consumers processes states, aggregates the metrics and converts
|
10
|
+
# data points into a materialized current state.
|
11
|
+
#
|
12
|
+
# There are two types of metrics:
|
13
|
+
# - totals - metrics that represent absolute values like number of messages processed
|
14
|
+
# in total. Things that need to be incremented/updated with each incoming consumer
|
15
|
+
# process report. They cannot be "batch computed" because they do not represent a
|
16
|
+
# a state of time but progress.
|
17
|
+
# - aggregated state - a state that represents a "snapshot" of things happening right
|
18
|
+
# now. Right now is the moment of time on which we operate.
|
19
|
+
class State < Base
|
20
|
+
# Current schema version
|
21
|
+
# This can be used in the future for detecting incompatible changes and writing
|
22
|
+
# migrations
|
23
|
+
SCHEMA_VERSION = '1.1.0'
|
24
|
+
|
25
|
+
# @param schema_manager [Karafka::Web::Processing::Consumers::SchemaManager] schema
|
26
|
+
# manager that tracks the compatibility of schemas.
|
27
|
+
def initialize(schema_manager)
|
28
|
+
super()
|
29
|
+
@schema_manager = schema_manager
|
30
|
+
end
|
31
|
+
|
32
|
+
# Uses provided process state report to update the current materialized state
|
33
|
+
# @param report [Hash] consumer process state report
|
34
|
+
# @param offset [Integer] offset of the message with the state report. This offset is
|
35
|
+
# needed as we need to be able to get all the consumers reports from a given offset.
|
36
|
+
def add(report, offset)
|
37
|
+
super(report)
|
38
|
+
increment_total_counters(report)
|
39
|
+
update_process_state(report, offset)
|
40
|
+
# We always evict after counters updates because we want to use expired (stopped)
|
41
|
+
# data for counters as it was valid previously. This can happen only when web consumer
|
42
|
+
# had a lag and is catching up.
|
43
|
+
evict_expired_processes
|
44
|
+
# current means current in the context of processing window (usually now but in case
|
45
|
+
# of lag, this state may be from the past)
|
46
|
+
refresh_current_stats
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [Array<Hash, Float>] aggregated current stats value and time from which this
|
50
|
+
# aggregation comes from
|
51
|
+
#
|
52
|
+
# @note We return a copy, because we use the internal one to track state changes and
|
53
|
+
# unless we would return a copy, other aggregators could have this mutated in an
|
54
|
+
# unexpected way
|
55
|
+
def stats
|
56
|
+
state.fetch(:stats).dup
|
57
|
+
end
|
58
|
+
|
59
|
+
# Sets the dispatch time and returns the hash that can be shipped to the states topic
|
60
|
+
#
|
61
|
+
# @param _args [Object] extra parsing arguments (not used)
|
62
|
+
# @return [Hash] Hash that we can use to ship states data to Kafka
|
63
|
+
def to_h(*_args)
|
64
|
+
state[:schema_version] = SCHEMA_VERSION
|
65
|
+
state[:dispatched_at] = float_now
|
66
|
+
state[:schema_state] = @schema_manager.to_s
|
67
|
+
|
68
|
+
state
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
# @return [Hash] hash with current state from Kafka
|
74
|
+
def state
|
75
|
+
@state ||= Consumers::State.current!
|
76
|
+
end
|
77
|
+
|
78
|
+
# Increments the total counters based on the provided report
|
79
|
+
# @param report [Hash]
|
80
|
+
def increment_total_counters(report)
|
81
|
+
report[:stats][:total].each do |key, value|
|
82
|
+
state[:stats][key] ||= 0
|
83
|
+
state[:stats][key] += value
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Registers or updates the given process state based on the report
|
88
|
+
#
|
89
|
+
# @param report [Hash]
|
90
|
+
# @param offset [Integer]
|
91
|
+
def update_process_state(report, offset)
|
92
|
+
process_name = report[:process][:name]
|
93
|
+
|
94
|
+
state[:processes][process_name] = {
|
95
|
+
dispatched_at: report[:dispatched_at],
|
96
|
+
offset: offset
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
# Evicts expired processes from the current state
|
101
|
+
# We consider processes dead if they do not report often enough
|
102
|
+
# @note We do not evict based on states (stopped), because we want to report the
|
103
|
+
# stopped processes for extra time within the ttl limitations. This makes tracking of
|
104
|
+
# things from UX perspective nicer.
|
105
|
+
def evict_expired_processes
|
106
|
+
max_ttl = @aggregated_from - ::Karafka::Web.config.ttl / 1_000
|
107
|
+
|
108
|
+
state[:processes].delete_if do |_name, details|
|
109
|
+
details[:dispatched_at] < max_ttl
|
110
|
+
end
|
111
|
+
|
112
|
+
@active_reports.delete_if do |_name, details|
|
113
|
+
details[:dispatched_at] < max_ttl
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Refreshes the counters that are computed based on incoming reports and not a total sum.
|
118
|
+
# For this we use active reports we have in memory. It may not be accurate for the first
|
119
|
+
# few seconds but it is much more optimal from performance perspective than computing
|
120
|
+
# this fetching all data from Kafka for each view.
|
121
|
+
def refresh_current_stats
|
122
|
+
stats = state[:stats]
|
123
|
+
|
124
|
+
stats[:busy] = 0
|
125
|
+
stats[:enqueued] = 0
|
126
|
+
stats[:workers] = 0
|
127
|
+
stats[:processes] = 0
|
128
|
+
stats[:rss] = 0
|
129
|
+
stats[:listeners] = 0
|
130
|
+
stats[:lag] = 0
|
131
|
+
stats[:lag_stored] = 0
|
132
|
+
utilization = 0
|
133
|
+
|
134
|
+
@active_reports
|
135
|
+
.values
|
136
|
+
.reject { |report| report[:process][:status] == 'stopped' }
|
137
|
+
.each do |report|
|
138
|
+
report_stats = report[:stats]
|
139
|
+
report_process = report[:process]
|
140
|
+
|
141
|
+
lags = []
|
142
|
+
lags_stored = []
|
143
|
+
|
144
|
+
iterate_partitions(report) do |partition_stats|
|
145
|
+
lags << partition_stats[:lag]
|
146
|
+
lags_stored << partition_stats[:lag_stored]
|
147
|
+
end
|
148
|
+
|
149
|
+
stats[:busy] += report_stats[:busy]
|
150
|
+
stats[:enqueued] += report_stats[:enqueued]
|
151
|
+
stats[:workers] += report_process[:workers] || 0
|
152
|
+
stats[:listeners] += report_process[:listeners] || 0
|
153
|
+
stats[:processes] += 1
|
154
|
+
stats[:rss] += report_process[:memory_usage]
|
155
|
+
stats[:lag] += lags.reject(&:negative?).sum
|
156
|
+
stats[:lag_stored] += lags_stored.reject(&:negative?).sum
|
157
|
+
utilization += report_stats[:utilization]
|
158
|
+
end
|
159
|
+
|
160
|
+
stats[:utilization] = utilization / (stats[:processes] + 0.0001)
|
161
|
+
end
|
162
|
+
|
163
|
+
# @param report [Hash]
|
164
|
+
def iterate_partitions(report)
|
165
|
+
report[:consumer_groups].each_value do |consumer_group|
|
166
|
+
consumer_group[:subscription_groups].each_value do |subscription_group|
|
167
|
+
subscription_group[:topics].each_value do |topic|
|
168
|
+
topic[:partitions].each_value do |partition|
|
169
|
+
yield(partition)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
# Consumer tracking related contracts
|
8
|
+
module Contracts
|
9
|
+
# Contract used to validate the stats that are both present in state and metrics
|
10
|
+
class AggregatedStats < Web::Contracts::Base
|
11
|
+
configure
|
12
|
+
|
13
|
+
required(:batches) { |val| val.is_a?(Integer) && val >= 0 }
|
14
|
+
required(:messages) { |val| val.is_a?(Integer) && val >= 0 }
|
15
|
+
required(:retries) { |val| val.is_a?(Integer) && val >= 0 }
|
16
|
+
required(:dead) { |val| val.is_a?(Integer) && val >= 0 }
|
17
|
+
required(:errors) { |val| val.is_a?(Integer) && val >= 0 }
|
18
|
+
required(:busy) { |val| val.is_a?(Integer) && val >= 0 }
|
19
|
+
required(:enqueued) { |val| val.is_a?(Integer) && val >= 0 }
|
20
|
+
required(:workers) { |val| val.is_a?(Integer) && val >= 0 }
|
21
|
+
required(:processes) { |val| val.is_a?(Integer) && val >= 0 }
|
22
|
+
required(:rss) { |val| val.is_a?(Numeric) && val >= 0 }
|
23
|
+
required(:listeners) { |val| val.is_a?(Integer) && val >= 0 }
|
24
|
+
required(:utilization) { |val| val.is_a?(Numeric) && val >= 0 }
|
25
|
+
required(:lag_stored) { |val| val.is_a?(Integer) }
|
26
|
+
required(:lag) { |val| val.is_a?(Integer) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
module Contracts
|
8
|
+
# Contract that describes the schema for metric reporting
|
9
|
+
class Metrics < Web::Contracts::Base
|
10
|
+
configure
|
11
|
+
|
12
|
+
required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
|
13
|
+
required(:schema_version) { |val| val.is_a?(String) && !val.empty? }
|
14
|
+
|
15
|
+
# Ensure, that all aggregated metrics are as expected (values)
|
16
|
+
virtual do |data, errors|
|
17
|
+
next unless errors.empty?
|
18
|
+
|
19
|
+
stats_contract = Contracts::AggregatedStats.new
|
20
|
+
|
21
|
+
data.fetch(:aggregated).each_value do |range_sample|
|
22
|
+
# Older metrics should have been validated previously so we need to check only
|
23
|
+
# the most recently materialized one
|
24
|
+
stats_contract.validate!(range_sample.last.last)
|
25
|
+
end
|
26
|
+
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# Ensure that all the consumer groups topics details are as expected
|
31
|
+
virtual do |data, errors|
|
32
|
+
next unless errors.empty?
|
33
|
+
|
34
|
+
topic_contract = Contracts::TopicStats.new
|
35
|
+
|
36
|
+
data.fetch(:consumer_groups).each_value do |range_sample|
|
37
|
+
consumer_group = range_sample.last.last
|
38
|
+
|
39
|
+
consumer_group.each_value do |topics|
|
40
|
+
topics.each_value do |topic_stats|
|
41
|
+
topic_contract.validate!(topic_stats)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
module Contracts
|
8
|
+
# State process details contract
|
9
|
+
class Process < Web::Contracts::Base
|
10
|
+
configure
|
11
|
+
|
12
|
+
required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
|
13
|
+
required(:offset) { |val| val.is_a?(Integer) && val >= 0 }
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
# Consumer tracking related contracts
|
8
|
+
module Contracts
|
9
|
+
# Contract used to ensure the consistency of the data generated to the consumers states
|
10
|
+
# topic
|
11
|
+
class State < Web::Contracts::Base
|
12
|
+
configure
|
13
|
+
|
14
|
+
# Valid schema manager states
|
15
|
+
VALID_SCHEMA_STATES = %w[compatible incompatible].freeze
|
16
|
+
|
17
|
+
private_constant :VALID_SCHEMA_STATES
|
18
|
+
|
19
|
+
required(:schema_version) { |val| val.is_a?(String) && !val.empty? }
|
20
|
+
required(:dispatched_at) { |val| val.is_a?(Numeric) && val.positive? }
|
21
|
+
required(:stats) { |val| val.is_a?(Hash) }
|
22
|
+
required(:processes) { |val| val.is_a?(Hash) }
|
23
|
+
required(:schema_state) { |val| VALID_SCHEMA_STATES.include?(val) }
|
24
|
+
|
25
|
+
virtual do |data, errors|
|
26
|
+
next unless errors.empty?
|
27
|
+
|
28
|
+
Contracts::AggregatedStats.new.validate!(data.fetch(:stats))
|
29
|
+
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
|
33
|
+
virtual do |data, errors|
|
34
|
+
next unless errors.empty?
|
35
|
+
|
36
|
+
process_contract = Contracts::Process.new
|
37
|
+
|
38
|
+
data.fetch(:processes).each_value do |details|
|
39
|
+
process_contract.validate!(details)
|
40
|
+
end
|
41
|
+
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
module Contracts
|
8
|
+
# Topic metrics checks
|
9
|
+
class TopicStats < Web::Contracts::Base
|
10
|
+
configure
|
11
|
+
|
12
|
+
required(:lag_stored) { |val| val.is_a?(Integer) }
|
13
|
+
required(:lag) { |val| val.is_a?(Integer) }
|
14
|
+
required(:pace) { |val| val.is_a?(Integer) }
|
15
|
+
required(:ls_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
# Fetches the current consumers historical metrics data
|
8
|
+
class Metrics
|
9
|
+
class << self
|
10
|
+
# Fetch the current metrics data that is expected to exist
|
11
|
+
#
|
12
|
+
# @return [Hash] latest (current) aggregated metrics state
|
13
|
+
def current!
|
14
|
+
metrics_message = ::Karafka::Admin.read_topic(
|
15
|
+
Karafka::Web.config.topics.consumers.metrics,
|
16
|
+
0,
|
17
|
+
1
|
18
|
+
).last
|
19
|
+
|
20
|
+
return metrics_message.payload if metrics_message
|
21
|
+
|
22
|
+
raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsError)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
module Consumers
|
7
|
+
# Schema manager is responsible for making sure, that the consumers reports messages that
|
8
|
+
# we consume have a compatible schema with the current process that is suppose to
|
9
|
+
# materialize them.
|
10
|
+
#
|
11
|
+
# In general we always support at least one major version back and we recommend upgrades
|
12
|
+
# to previous versions (0.5 => 0.6 => 0.7)
|
13
|
+
#
|
14
|
+
# This is needed in scenarios where a rolling deploy would get new karafka processes
|
15
|
+
# reporting data but consumption would still run in older.
|
16
|
+
class SchemaManager
|
17
|
+
# Current reports version for comparing
|
18
|
+
CURRENT_VERSION = ::Gem::Version.new(
|
19
|
+
::Karafka::Web::Tracking::Consumers::Sampler::SCHEMA_VERSION
|
20
|
+
)
|
21
|
+
|
22
|
+
private_constant :CURRENT_VERSION
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@cache = {}
|
26
|
+
@valid = true
|
27
|
+
end
|
28
|
+
|
29
|
+
# @param message [Karafka::Messages::Message] consumer report
|
30
|
+
# @return [Boolean] true if all good or false if incompatible
|
31
|
+
#
|
32
|
+
# @note The state switch is one-direction only. If we encounter an incompatible message
|
33
|
+
# we need to stop processing so further checks even with valid should not switch it
|
34
|
+
# back to valid
|
35
|
+
def compatible?(message)
|
36
|
+
schema_version = message.payload[:schema_version]
|
37
|
+
|
38
|
+
# Save on memory allocation by reusing
|
39
|
+
# Most of the time we will deal with compatible schemas, so it is not worth creating
|
40
|
+
# an object with each message
|
41
|
+
message_version = @cache[schema_version] ||= ::Gem::Version.new(schema_version)
|
42
|
+
|
43
|
+
return true if message_version <= CURRENT_VERSION
|
44
|
+
|
45
|
+
@valid = false
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [String] state that we can use in the materialized state for the UI reporting
|
49
|
+
def to_s
|
50
|
+
@valid ? 'compatible' : 'incompatible'
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -6,23 +6,20 @@ module Karafka
|
|
6
6
|
module Consumers
|
7
7
|
# Fetches the current consumer processes aggregated state
|
8
8
|
class State
|
9
|
-
extend ::Karafka::Core::Helpers::Time
|
10
|
-
|
11
9
|
class << self
|
12
|
-
#
|
13
|
-
# a blank state. Blank state will not be flushed because materialization into Kafka
|
14
|
-
# happens only after first report is received.
|
10
|
+
# Fetch the current consumers state that is expected to exist
|
15
11
|
#
|
16
|
-
# @return [Hash
|
17
|
-
|
18
|
-
def current
|
12
|
+
# @return [Hash] last (current) aggregated processes state
|
13
|
+
def current!
|
19
14
|
state_message = ::Karafka::Admin.read_topic(
|
20
15
|
Karafka::Web.config.topics.consumers.states,
|
21
16
|
0,
|
22
17
|
1
|
23
18
|
).last
|
24
19
|
|
25
|
-
|
20
|
+
return state_message.payload if state_message
|
21
|
+
|
22
|
+
raise(::Karafka::Web::Errors::Processing::MissingConsumersStateError)
|
26
23
|
end
|
27
24
|
end
|
28
25
|
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Processing
|
6
|
+
# Allows us to accumulate and track time series data with given resolution
|
7
|
+
#
|
8
|
+
# We aggregate for last:
|
9
|
+
# - 7 days (every day)
|
10
|
+
# - 24 hours (every hour)
|
11
|
+
# - 1 hour (every minute) + the most recent as an update every time (leading)
|
12
|
+
#
|
13
|
+
# @note Please note we publish always **absolute** metrics and not deltas in reference to
|
14
|
+
# a given time window. This needs to be computed in the frontend as we want to have
|
15
|
+
# state facts in the storage.
|
16
|
+
#
|
17
|
+
# @note Please note we evict and cleanup data only before we want to use it. This will put
|
18
|
+
# more stress on memory but makes this tracker 70-90% faster. Since by default we anyhow
|
19
|
+
# sample every few seconds, this trade-off makes sense.
|
20
|
+
class TimeSeriesTracker
|
21
|
+
include ::Karafka::Core::Helpers::Time
|
22
|
+
|
23
|
+
# How many samples and in what resolution should we track for given time range
|
24
|
+
# @note We add one more than we want to display for delta computation when ranges
|
25
|
+
# are full in the UI
|
26
|
+
TIME_RANGES = {
|
27
|
+
# 7 days sampling
|
28
|
+
days: {
|
29
|
+
# Sample every 8 hours so we end up with 56 samples over a week + 1 for baseline
|
30
|
+
resolution: 8 * 60 * 60,
|
31
|
+
limit: 57
|
32
|
+
}.freeze,
|
33
|
+
# 24 hours sampling
|
34
|
+
hours: {
|
35
|
+
# Every 30 minutes for 24 hours + baseline
|
36
|
+
resolution: 30 * 60,
|
37
|
+
limit: 49
|
38
|
+
}.freeze,
|
39
|
+
# 60 minutes sampling
|
40
|
+
minutes: {
|
41
|
+
# Every one minute for an hour => 60 samples
|
42
|
+
resolution: 60,
|
43
|
+
limit: 61
|
44
|
+
}.freeze,
|
45
|
+
# 5 minutes sampling
|
46
|
+
seconds: {
|
47
|
+
# Every 5 seconds with 60 samples + baseline. That is 300 seconds => 5 minutes
|
48
|
+
resolution: 5,
|
49
|
+
limit: 61
|
50
|
+
}.freeze
|
51
|
+
}.freeze
|
52
|
+
|
53
|
+
# @param existing [Hash] existing historical metrics (may be empty for the first state)
|
54
|
+
def initialize(existing)
|
55
|
+
# Builds an empty structure for potential time ranges we are interested in
|
56
|
+
@historicals = TIME_RANGES.keys.map { |name| [name, []] }.to_h
|
57
|
+
|
58
|
+
# Fetch the existing (if any) historical values that we already have
|
59
|
+
import_existing(existing)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Adds current state into the states for tracking
|
63
|
+
# @param current [Hash] hash with current state
|
64
|
+
# @param state_time [Float] float UTC time from which the state comes
|
65
|
+
def add(current, state_time)
|
66
|
+
# Inject the time point into all the historicals
|
67
|
+
inject(current, state_time)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Evicts expired and duplicated series and returns the cleaned hash
|
71
|
+
# @return [Hash] aggregated historicals hash
|
72
|
+
def to_h
|
73
|
+
evict
|
74
|
+
|
75
|
+
@historicals
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Import existing previous historical metrics as they are
|
81
|
+
#
|
82
|
+
# @param existing [Hash] existing historical metrics
|
83
|
+
def import_existing(existing)
|
84
|
+
existing.each do |range_name, values|
|
85
|
+
@historicals[range_name] = values
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Injects the current most recent stats sample into each of the time ranges on which we
|
90
|
+
# operate. This allows us on all the charts to present the most recent value before a
|
91
|
+
# given time window is completed
|
92
|
+
#
|
93
|
+
# @param current [Hash] current stats
|
94
|
+
# @param state_time [Float] time from which this state comes
|
95
|
+
def inject(current, state_time)
|
96
|
+
@historicals.each_value do |points|
|
97
|
+
points << [state_time.floor, current]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Removes historical metrics that are beyond our expected range, so we maintain a stable
|
102
|
+
# count and not overload the states topic with extensive data.
|
103
|
+
def evict
|
104
|
+
# Evict old metrics that are beyond our aggregated range
|
105
|
+
# Builds a sliding window that goes backwards
|
106
|
+
@historicals.each do |range_name, values|
|
107
|
+
rules = TIME_RANGES.fetch(range_name)
|
108
|
+
limit = rules.fetch(:limit)
|
109
|
+
resolution = rules.fetch(:resolution)
|
110
|
+
|
111
|
+
grouped = values.group_by { |sample| sample.first / resolution }
|
112
|
+
times = grouped.values.map(&:first)
|
113
|
+
|
114
|
+
# Inject the most recent to always have it in each reporting range
|
115
|
+
# Otherwise for a longer time ranges we would not have the most recent state
|
116
|
+
# available
|
117
|
+
times << values.last unless values.empty?
|
118
|
+
|
119
|
+
times.uniq!(&:first)
|
120
|
+
|
121
|
+
# Squash in case there would be two events from the same time
|
122
|
+
times.sort_by!(&:first)
|
123
|
+
|
124
|
+
@historicals[range_name] = times.last(limit)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
module Contracts
|
9
9
|
# Expected data for each consumer group
|
10
10
|
# It's mostly about subscription groups details
|
11
|
-
class ConsumerGroup <
|
11
|
+
class ConsumerGroup < Web::Contracts::Base
|
12
12
|
configure
|
13
13
|
|
14
14
|
required(:id) { |val| val.is_a?(String) && !val.empty? }
|
@@ -19,7 +19,7 @@ module Karafka
|
|
19
19
|
|
20
20
|
subscription_group_contract = SubscriptionGroup.new
|
21
21
|
|
22
|
-
data.fetch(:subscription_groups).
|
22
|
+
data.fetch(:subscription_groups).each_value do |details|
|
23
23
|
subscription_group_contract.validate!(details)
|
24
24
|
end
|
25
25
|
|
@@ -6,7 +6,7 @@ module Karafka
|
|
6
6
|
module Consumers
|
7
7
|
module Contracts
|
8
8
|
# Contract for the job reporting details
|
9
|
-
class Job <
|
9
|
+
class Job < Web::Contracts::Base
|
10
10
|
configure
|
11
11
|
|
12
12
|
required(:consumer) { |val| val.is_a?(String) }
|
@@ -17,6 +17,7 @@ module Karafka
|
|
17
17
|
required(:first_offset) { |val| val.is_a?(Integer) && (val >= 0 || val == -1001) }
|
18
18
|
required(:last_offset) { |val| val.is_a?(Integer) && (val >= 0 || val == -1001) }
|
19
19
|
required(:committed_offset) { |val| val.is_a?(Integer) }
|
20
|
+
required(:messages) { |val| val.is_a?(Integer) && val >= 0 }
|
20
21
|
required(:type) { |val| %w[consume revoked shutdown].include?(val) }
|
21
22
|
required(:tags) { |val| val.is_a?(Karafka::Core::Taggable::Tags) }
|
22
23
|
# -1 can be here for workless flows
|
@@ -6,14 +6,27 @@ module Karafka
|
|
6
6
|
module Consumers
|
7
7
|
module Contracts
|
8
8
|
# Partition metrics required for web to operate
|
9
|
-
class Partition <
|
9
|
+
class Partition < Web::Contracts::Base
|
10
10
|
configure
|
11
11
|
|
12
12
|
required(:id) { |val| val.is_a?(Integer) && val >= 0 }
|
13
|
+
required(:lag) { |val| val.is_a?(Integer) }
|
14
|
+
required(:lag_d) { |val| val.is_a?(Integer) }
|
13
15
|
required(:lag_stored) { |val| val.is_a?(Integer) }
|
14
16
|
required(:lag_stored_d) { |val| val.is_a?(Integer) }
|
15
17
|
required(:committed_offset) { |val| val.is_a?(Integer) }
|
18
|
+
required(:committed_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
|
16
19
|
required(:stored_offset) { |val| val.is_a?(Integer) }
|
20
|
+
required(:stored_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
|
21
|
+
required(:fetch_state) { |val| val.is_a?(String) && !val.empty? }
|
22
|
+
required(:poll_state) { |val| val.is_a?(String) && !val.empty? }
|
23
|
+
required(:hi_offset) { |val| val.is_a?(Integer) }
|
24
|
+
required(:hi_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
|
25
|
+
required(:lo_offset) { |val| val.is_a?(Integer) }
|
26
|
+
required(:eof_offset) { |val| val.is_a?(Integer) }
|
27
|
+
required(:ls_offset) { |val| val.is_a?(Integer) }
|
28
|
+
required(:ls_offset_d) { |val| val.is_a?(Integer) }
|
29
|
+
required(:ls_offset_fd) { |val| val.is_a?(Integer) && val >= 0 }
|
17
30
|
end
|
18
31
|
end
|
19
32
|
end
|