karafka-web 0.7.9 → 0.8.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +21 -6
- data/.ruby-version +1 -1
- data/CHANGELOG.md +66 -0
- data/Gemfile.lock +22 -22
- data/docker-compose.yml +3 -1
- data/karafka-web.gemspec +2 -2
- data/lib/karafka/web/config.rb +16 -3
- data/lib/karafka/web/contracts/config.rb +7 -2
- data/lib/karafka/web/errors.rb +12 -0
- data/lib/karafka/web/inflector.rb +33 -0
- data/lib/karafka/web/installer.rb +20 -11
- data/lib/karafka/web/management/actions/base.rb +36 -0
- data/lib/karafka/web/management/actions/clean_boot_file.rb +33 -0
- data/lib/karafka/web/management/actions/create_initial_states.rb +77 -0
- data/lib/karafka/web/management/actions/create_topics.rb +139 -0
- data/lib/karafka/web/management/actions/delete_topics.rb +30 -0
- data/lib/karafka/web/management/actions/enable.rb +117 -0
- data/lib/karafka/web/management/actions/extend_boot_file.rb +39 -0
- data/lib/karafka/web/management/actions/migrate_states_data.rb +18 -0
- data/lib/karafka/web/management/migrations/0_base.rb +58 -0
- data/lib/karafka/web/management/migrations/0_set_initial_consumers_metrics.rb +36 -0
- data/lib/karafka/web/management/migrations/0_set_initial_consumers_state.rb +43 -0
- data/lib/karafka/web/management/migrations/1699543515_fill_missing_received_and_sent_bytes_in_consumers_metrics.rb +26 -0
- data/lib/karafka/web/management/migrations/1699543515_fill_missing_received_and_sent_bytes_in_consumers_state.rb +23 -0
- data/lib/karafka/web/management/migrations/1700234522_introduce_waiting_in_consumers_metrics.rb +24 -0
- data/lib/karafka/web/management/migrations/1700234522_introduce_waiting_in_consumers_state.rb +20 -0
- data/lib/karafka/web/management/migrations/1700234522_remove_processing_from_consumers_metrics.rb +24 -0
- data/lib/karafka/web/management/migrations/1700234522_remove_processing_from_consumers_state.rb +20 -0
- data/lib/karafka/web/management/migrations/1704722380_split_listeners_into_active_and_paused_in_metrics.rb +36 -0
- data/lib/karafka/web/management/migrations/1704722380_split_listeners_into_active_and_paused_in_states.rb +32 -0
- data/lib/karafka/web/management/migrator.rb +117 -0
- data/lib/karafka/web/processing/consumer.rb +39 -38
- data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +15 -7
- data/lib/karafka/web/processing/consumers/aggregators/state.rb +8 -3
- data/lib/karafka/web/processing/consumers/contracts/aggregated_stats.rb +5 -1
- data/lib/karafka/web/processing/publisher.rb +59 -0
- data/lib/karafka/web/tracking/consumers/contracts/job.rb +3 -2
- data/lib/karafka/web/tracking/consumers/contracts/partition.rb +1 -0
- data/lib/karafka/web/tracking/consumers/contracts/report.rb +6 -1
- data/lib/karafka/web/tracking/consumers/contracts/subscription_group.rb +10 -1
- data/lib/karafka/web/tracking/consumers/listeners/connections.rb +49 -0
- data/lib/karafka/web/tracking/consumers/listeners/pausing.rb +7 -4
- data/lib/karafka/web/tracking/consumers/listeners/processing.rb +78 -70
- data/lib/karafka/web/tracking/consumers/listeners/statistics.rb +40 -13
- data/lib/karafka/web/tracking/consumers/sampler.rb +82 -25
- data/lib/karafka/web/tracking/helpers/ttls/array.rb +72 -0
- data/lib/karafka/web/tracking/helpers/ttls/hash.rb +34 -0
- data/lib/karafka/web/tracking/helpers/ttls/stats.rb +49 -0
- data/lib/karafka/web/tracking/helpers/ttls/windows.rb +32 -0
- data/lib/karafka/web/tracking/reporter.rb +1 -0
- data/lib/karafka/web/ui/app.rb +22 -4
- data/lib/karafka/web/ui/base.rb +18 -2
- data/lib/karafka/web/ui/controllers/base.rb +34 -4
- data/lib/karafka/web/ui/controllers/become_pro.rb +1 -1
- data/lib/karafka/web/ui/controllers/cluster.rb +33 -9
- data/lib/karafka/web/ui/controllers/consumers.rb +8 -2
- data/lib/karafka/web/ui/controllers/dashboard.rb +2 -2
- data/lib/karafka/web/ui/controllers/errors.rb +2 -2
- data/lib/karafka/web/ui/controllers/jobs.rb +55 -5
- data/lib/karafka/web/ui/controllers/requests/params.rb +5 -0
- data/lib/karafka/web/ui/controllers/responses/deny.rb +15 -0
- data/lib/karafka/web/ui/controllers/responses/file.rb +23 -0
- data/lib/karafka/web/ui/controllers/responses/{data.rb → render.rb} +3 -3
- data/lib/karafka/web/ui/controllers/routing.rb +11 -2
- data/lib/karafka/web/ui/controllers/status.rb +1 -1
- data/lib/karafka/web/ui/helpers/application_helper.rb +70 -0
- data/lib/karafka/web/ui/lib/hash_proxy.rb +29 -14
- data/lib/karafka/web/ui/lib/sorter.rb +170 -0
- data/lib/karafka/web/ui/models/counters.rb +6 -0
- data/lib/karafka/web/ui/models/health.rb +23 -2
- data/lib/karafka/web/ui/models/jobs.rb +48 -0
- data/lib/karafka/web/ui/models/metrics/charts/aggregated.rb +33 -0
- data/lib/karafka/web/ui/models/metrics/charts/topics.rb +1 -10
- data/lib/karafka/web/ui/models/process.rb +2 -1
- data/lib/karafka/web/ui/models/status.rb +23 -7
- data/lib/karafka/web/ui/models/topic.rb +3 -1
- data/lib/karafka/web/ui/models/visibility_filter.rb +16 -0
- data/lib/karafka/web/ui/pro/app.rb +44 -6
- data/lib/karafka/web/ui/pro/controllers/cluster.rb +1 -0
- data/lib/karafka/web/ui/pro/controllers/consumers.rb +52 -6
- data/lib/karafka/web/ui/pro/controllers/dashboard.rb +1 -1
- data/lib/karafka/web/ui/pro/controllers/dlq.rb +1 -1
- data/lib/karafka/web/ui/pro/controllers/errors.rb +3 -3
- data/lib/karafka/web/ui/pro/controllers/explorer.rb +8 -8
- data/lib/karafka/web/ui/pro/controllers/health.rb +34 -2
- data/lib/karafka/web/ui/pro/controllers/jobs.rb +11 -0
- data/lib/karafka/web/ui/pro/controllers/messages.rb +42 -0
- data/lib/karafka/web/ui/pro/controllers/routing.rb +11 -2
- data/lib/karafka/web/ui/pro/views/consumers/_breadcrumbs.erb +8 -2
- data/lib/karafka/web/ui/pro/views/consumers/_consumer.erb +14 -8
- data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +8 -6
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_job.erb +4 -1
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_no_jobs.erb +1 -1
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +1 -3
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +28 -11
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_tabs.erb +10 -3
- data/lib/karafka/web/ui/pro/views/consumers/index.erb +3 -3
- data/lib/karafka/web/ui/pro/views/consumers/pending_jobs.erb +43 -0
- data/lib/karafka/web/ui/pro/views/consumers/{jobs.erb → running_jobs.erb} +11 -10
- data/lib/karafka/web/ui/pro/views/dashboard/index.erb +7 -1
- data/lib/karafka/web/ui/pro/views/explorer/message/_message_actions.erb +18 -0
- data/lib/karafka/web/ui/pro/views/explorer/message/_metadata.erb +43 -0
- data/lib/karafka/web/ui/pro/views/explorer/message/_payload.erb +21 -0
- data/lib/karafka/web/ui/pro/views/explorer/message/_payload_actions.erb +19 -0
- data/lib/karafka/web/ui/pro/views/explorer/show.erb +9 -84
- data/lib/karafka/web/ui/pro/views/health/_breadcrumbs.erb +8 -0
- data/lib/karafka/web/ui/pro/views/health/_partition.erb +1 -3
- data/lib/karafka/web/ui/pro/views/health/_partition_offset.erb +4 -4
- data/lib/karafka/web/ui/pro/views/health/_partition_times.erb +32 -0
- data/lib/karafka/web/ui/pro/views/health/_tabs.erb +9 -0
- data/lib/karafka/web/ui/pro/views/health/changes.erb +66 -0
- data/lib/karafka/web/ui/pro/views/health/offsets.erb +14 -14
- data/lib/karafka/web/ui/pro/views/health/overview.erb +11 -11
- data/lib/karafka/web/ui/pro/views/jobs/_job.erb +1 -1
- data/lib/karafka/web/ui/pro/views/jobs/_no_jobs.erb +1 -1
- data/lib/karafka/web/ui/pro/views/jobs/pending.erb +39 -0
- data/lib/karafka/web/ui/pro/views/jobs/running.erb +39 -0
- data/lib/karafka/web/ui/pro/views/routing/_consumer_group.erb +2 -2
- data/lib/karafka/web/ui/pro/views/routing/_topic.erb +9 -0
- data/lib/karafka/web/ui/pro/views/routing/show.erb +12 -0
- data/lib/karafka/web/ui/pro/views/shared/_navigation.erb +1 -1
- data/lib/karafka/web/ui/public/javascripts/application.js +10 -0
- data/lib/karafka/web/ui/public/stylesheets/application.css +4 -0
- data/lib/karafka/web/ui/views/cluster/_breadcrumbs.erb +16 -0
- data/lib/karafka/web/ui/views/cluster/_tabs.erb +27 -0
- data/lib/karafka/web/ui/views/cluster/brokers.erb +27 -0
- data/lib/karafka/web/ui/views/cluster/topics.erb +35 -0
- data/lib/karafka/web/ui/views/consumers/_counters.erb +8 -6
- data/lib/karafka/web/ui/views/consumers/_summary.erb +2 -2
- data/lib/karafka/web/ui/views/consumers/index.erb +3 -3
- data/lib/karafka/web/ui/views/dashboard/_ranges_selector.erb +23 -7
- data/lib/karafka/web/ui/views/dashboard/index.erb +19 -8
- data/lib/karafka/web/ui/views/errors/show.erb +2 -23
- data/lib/karafka/web/ui/views/jobs/_breadcrumbs.erb +17 -1
- data/lib/karafka/web/ui/views/jobs/_job.erb +1 -1
- data/lib/karafka/web/ui/views/jobs/_no_jobs.erb +1 -1
- data/lib/karafka/web/ui/views/jobs/_tabs.erb +27 -0
- data/lib/karafka/web/ui/views/jobs/{index.erb → pending.erb} +9 -7
- data/lib/karafka/web/ui/{pro/views/jobs/index.erb → views/jobs/running.erb} +9 -11
- data/lib/karafka/web/ui/views/routing/_consumer_group.erb +14 -12
- data/lib/karafka/web/ui/views/shared/_navigation.erb +1 -1
- data/lib/karafka/web/ui/views/shared/_pagination.erb +1 -1
- data/lib/karafka/web/ui/views/shared/exceptions/not_allowed.erb +37 -0
- data/lib/karafka/web/ui/views/status/show.erb +17 -2
- data/lib/karafka/web/ui/views/status/warnings/_routing_topics_presence.erb +15 -0
- data/lib/karafka/web/version.rb +1 -1
- data/lib/karafka/web.rb +6 -2
- data.tar.gz.sig +0 -0
- metadata +61 -26
- metadata.gz.sig +0 -0
- data/lib/karafka/web/management/base.rb +0 -34
- data/lib/karafka/web/management/clean_boot_file.rb +0 -31
- data/lib/karafka/web/management/create_initial_states.rb +0 -101
- data/lib/karafka/web/management/create_topics.rb +0 -133
- data/lib/karafka/web/management/delete_topics.rb +0 -28
- data/lib/karafka/web/management/enable.rb +0 -102
- data/lib/karafka/web/management/extend_boot_file.rb +0 -37
- data/lib/karafka/web/tracking/ttl_array.rb +0 -59
- data/lib/karafka/web/tracking/ttl_hash.rb +0 -16
- data/lib/karafka/web/ui/pro/views/dashboard/_ranges_selector.erb +0 -39
- data/lib/karafka/web/ui/views/cluster/index.erb +0 -74
@@ -12,10 +12,33 @@ module Karafka
|
|
12
12
|
# @param event [Karafka::Core::Monitoring::Event]
|
13
13
|
def on_worker_processed(event)
|
14
14
|
track do |sampler|
|
15
|
-
sampler.
|
15
|
+
sampler.windows.m1[:processed_total_time] << event[:time]
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
# We do not track idle jobs here because they are internal and not user-facing
|
20
|
+
%i[
|
21
|
+
consume
|
22
|
+
revoked
|
23
|
+
shutdown
|
24
|
+
tick
|
25
|
+
].each do |action|
|
26
|
+
# Tracks the job that is going to be scheduled so we can also display pending jobs
|
27
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
28
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
29
|
+
def on_consumer_before_schedule_#{action}(event)
|
30
|
+
consumer = event.payload[:caller]
|
31
|
+
jid = job_id(consumer, '#{action}')
|
32
|
+
job_details = job_details(consumer, '#{action}')
|
33
|
+
job_details[:status] = 'pending'
|
34
|
+
|
35
|
+
track do |sampler|
|
36
|
+
sampler.jobs[jid] = job_details
|
37
|
+
end
|
38
|
+
end
|
39
|
+
RUBY
|
40
|
+
end
|
41
|
+
|
19
42
|
# Counts work execution and processing states in consumer instances
|
20
43
|
#
|
21
44
|
# @param event [Karafka::Core::Monitoring::Event]
|
@@ -34,6 +57,19 @@ module Karafka
|
|
34
57
|
end
|
35
58
|
end
|
36
59
|
|
60
|
+
# Collect info about consumption event that occurred and its metrics
|
61
|
+
# Removes the job from running jobs
|
62
|
+
#
|
63
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
64
|
+
def on_consumer_consumed(event)
|
65
|
+
consumer = event.payload[:caller]
|
66
|
+
jid = job_id(consumer, 'consume')
|
67
|
+
|
68
|
+
track do |sampler|
|
69
|
+
sampler.jobs.delete(jid)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
37
73
|
# Removes failed job from active jobs
|
38
74
|
#
|
39
75
|
# @param event [Karafka::Core::Monitoring::Event]
|
@@ -46,6 +82,8 @@ module Karafka
|
|
46
82
|
'revoked'
|
47
83
|
when 'consumer.shutdown.error'
|
48
84
|
'shutdown'
|
85
|
+
when 'consumer.tick.error'
|
86
|
+
'tick'
|
49
87
|
# This is not a user facing execution flow, but internal system one
|
50
88
|
# that is why it will not be reported as a separate job for the UI
|
51
89
|
when 'consumer.idle.error'
|
@@ -65,72 +103,39 @@ module Karafka
|
|
65
103
|
end
|
66
104
|
end
|
67
105
|
|
68
|
-
#
|
69
|
-
#
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
# @param event [Karafka::Core::Monitoring::Event]
|
102
|
-
def on_consumer_revoked(event)
|
103
|
-
consumer = event.payload[:caller]
|
104
|
-
jid = job_id(consumer, 'revoked')
|
105
|
-
|
106
|
-
track do |sampler|
|
107
|
-
sampler.jobs.delete(jid)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
# Stores this job details
|
112
|
-
#
|
113
|
-
# @param event [Karafka::Core::Monitoring::Event]
|
114
|
-
def on_consumer_shutting_down(event)
|
115
|
-
consumer = event.payload[:caller]
|
116
|
-
jid = job_id(consumer, 'shutdown')
|
117
|
-
job_details = job_details(consumer, 'shutdown')
|
118
|
-
|
119
|
-
track do |sampler|
|
120
|
-
sampler.jobs[jid] = job_details
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
# Removes the job from running jobs
|
125
|
-
#
|
126
|
-
# @param event [Karafka::Core::Monitoring::Event]
|
127
|
-
def on_consumer_shutdown(event)
|
128
|
-
consumer = event.payload[:caller]
|
129
|
-
jid = job_id(consumer, 'shutdown')
|
130
|
-
|
131
|
-
track do |sampler|
|
132
|
-
sampler.jobs.delete(jid)
|
133
|
-
end
|
106
|
+
# Consume has a bit different reporting flow than other jobs because it bumps certain
|
107
|
+
# counters that other jobs do not. This is why it is defined above separately
|
108
|
+
[
|
109
|
+
[:revoke, :revoked, 'revoked'],
|
110
|
+
[:shutting_down, :shutdown, 'shutdown'],
|
111
|
+
[:tick, :ticked, 'tick']
|
112
|
+
].each do |pre, post, action|
|
113
|
+
class_eval <<~METHOD, __FILE__, __LINE__ + 1
|
114
|
+
# Stores this job details
|
115
|
+
#
|
116
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
117
|
+
def on_consumer_#{pre}(event)
|
118
|
+
consumer = event.payload[:caller]
|
119
|
+
jid = job_id(consumer, '#{action}')
|
120
|
+
job_details = job_details(consumer, '#{action}')
|
121
|
+
|
122
|
+
track do |sampler|
|
123
|
+
sampler.jobs[jid] = job_details
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# Removes the job from running jobs
|
128
|
+
#
|
129
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
130
|
+
def on_consumer_#{post}(event)
|
131
|
+
consumer = event.payload[:caller]
|
132
|
+
jid = job_id(consumer, '#{action}')
|
133
|
+
|
134
|
+
track do |sampler|
|
135
|
+
sampler.jobs.delete(jid)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
METHOD
|
134
139
|
end
|
135
140
|
|
136
141
|
private
|
@@ -152,14 +157,16 @@ module Karafka
|
|
152
157
|
# more details.
|
153
158
|
def job_details(consumer, type)
|
154
159
|
{
|
155
|
-
|
160
|
+
updated_at: float_now,
|
156
161
|
topic: consumer.topic.name,
|
157
162
|
partition: consumer.partition,
|
158
163
|
first_offset: consumer.messages.metadata.first_offset,
|
159
164
|
last_offset: consumer.messages.metadata.last_offset,
|
160
165
|
processing_lag: consumer.messages.metadata.processing_lag,
|
161
166
|
consumption_lag: consumer.messages.metadata.consumption_lag,
|
162
|
-
|
167
|
+
# Committed offset may be -1 when there is no committed offset. This can happen in
|
168
|
+
# case of ticking that started before any consumption job happened
|
169
|
+
committed_offset: consumer.coordinator.seek_offset.to_i - 1,
|
163
170
|
# In theory this is redundant because we have first and last offset, but it is
|
164
171
|
# needed because VPs do not have linear count. For VPs first and last offset
|
165
172
|
# will be further away than the total messages count for a particular VP
|
@@ -167,7 +174,8 @@ module Karafka
|
|
167
174
|
consumer: consumer.class.to_s,
|
168
175
|
consumer_group: consumer.topic.consumer_group.id,
|
169
176
|
type: type,
|
170
|
-
tags: consumer.tags
|
177
|
+
tags: consumer.tags,
|
178
|
+
status: 'running'
|
171
179
|
}
|
172
180
|
end
|
173
181
|
end
|
@@ -18,6 +18,8 @@ module Karafka
|
|
18
18
|
sg_id = event[:subscription_group_id]
|
19
19
|
sg_details = extract_sg_details(sg_id, cgrp)
|
20
20
|
|
21
|
+
track_transfers(statistics)
|
22
|
+
|
21
23
|
# More than one subscription group from the same consumer group may be reporting
|
22
24
|
# almost the same time. To prevent corruption of partial data, we put everything here
|
23
25
|
# in track as we merge data from multiple subscription groups
|
@@ -42,25 +44,41 @@ module Karafka
|
|
42
44
|
}
|
43
45
|
|
44
46
|
topic_details[:partitions][pt_id] = metrics.merge(
|
45
|
-
id: pt_id
|
47
|
+
id: pt_id
|
48
|
+
).merge(
|
46
49
|
# Pauses are stored on a consumer group since we do not process same topic
|
47
50
|
# twice in the multiple subscription groups
|
48
|
-
|
51
|
+
poll_details(sg_id, topic_name, pt_id)
|
49
52
|
)
|
50
53
|
end
|
51
54
|
end
|
52
55
|
|
53
|
-
sampler.consumer_groups[cg_id] ||= {
|
54
|
-
id: cg_id,
|
55
|
-
subscription_groups: {}
|
56
|
-
}
|
57
|
-
|
58
56
|
sampler.consumer_groups[cg_id][:subscription_groups][sg_id] = sg_details
|
59
57
|
end
|
60
58
|
end
|
61
59
|
|
62
60
|
private
|
63
61
|
|
62
|
+
# Tracks network transfers from and to the client using a 1 minute rolling window
|
63
|
+
#
|
64
|
+
# @param statistics [Hash] statistics hash
|
65
|
+
def track_transfers(statistics)
|
66
|
+
brokers = statistics.fetch('brokers', {})
|
67
|
+
|
68
|
+
return if brokers.empty?
|
69
|
+
|
70
|
+
track do |sampler|
|
71
|
+
client_name = statistics.fetch('name')
|
72
|
+
|
73
|
+
brokers.each do |broker_name, values|
|
74
|
+
scope_name = "#{client_name}-#{broker_name}"
|
75
|
+
|
76
|
+
sampler.windows.m1["#{scope_name}-rxbytes"] << values.fetch('rxbytes', 0)
|
77
|
+
sampler.windows.m1["#{scope_name}-txbytes"] << values.fetch('txbytes', 0)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
64
82
|
# Extracts basic consumer group related details
|
65
83
|
# @param sg_id [String]
|
66
84
|
# @param sg_stats [Hash]
|
@@ -75,7 +93,7 @@ module Karafka
|
|
75
93
|
'rebalance_age',
|
76
94
|
'rebalance_cnt',
|
77
95
|
'rebalance_reason'
|
78
|
-
),
|
96
|
+
).transform_keys(&:to_sym),
|
79
97
|
topics: {}
|
80
98
|
}
|
81
99
|
end
|
@@ -132,14 +150,23 @@ module Karafka
|
|
132
150
|
metrics
|
133
151
|
end
|
134
152
|
|
135
|
-
# @param
|
153
|
+
# @param sg_id [String] subscription group id
|
136
154
|
# @param topic_name [String]
|
137
|
-
# @param pt_id [Integer]
|
155
|
+
# @param pt_id [Integer] partition id
|
138
156
|
# @return [String] poll state / is partition paused or not
|
139
|
-
def
|
140
|
-
pause_id = [
|
157
|
+
def poll_details(sg_id, topic_name, pt_id)
|
158
|
+
pause_id = [sg_id, topic_name, pt_id].join('-')
|
159
|
+
|
160
|
+
details = { poll_state: 'active', poll_state_ch: 0 }
|
141
161
|
|
142
|
-
sampler.pauses
|
162
|
+
pause_details = sampler.pauses[pause_id]
|
163
|
+
|
164
|
+
return details unless pause_details
|
165
|
+
|
166
|
+
{
|
167
|
+
poll_state: 'paused',
|
168
|
+
poll_state_ch: [(pause_details.fetch(:paused_till) - monotonic_now).round, 0].max
|
169
|
+
}
|
143
170
|
end
|
144
171
|
end
|
145
172
|
end
|
@@ -9,18 +9,13 @@ module Karafka
|
|
9
9
|
class Sampler < Tracking::Sampler
|
10
10
|
include ::Karafka::Core::Helpers::Time
|
11
11
|
|
12
|
-
attr_reader :counters, :consumer_groups, :
|
12
|
+
attr_reader :counters, :consumer_groups, :subscription_groups, :errors,
|
13
|
+
:pauses, :jobs, :windows
|
13
14
|
|
14
15
|
# Current schema version
|
15
|
-
# This
|
16
|
-
#
|
17
|
-
SCHEMA_VERSION = '1.2.
|
18
|
-
|
19
|
-
# 60 seconds window for time tracked window-based metrics
|
20
|
-
TIMES_TTL = 60
|
21
|
-
|
22
|
-
# Times ttl in ms
|
23
|
-
TIMES_TTL_MS = TIMES_TTL * 1_000
|
16
|
+
# This is used for detecting incompatible changes and not using outdated data during
|
17
|
+
# upgrades
|
18
|
+
SCHEMA_VERSION = '1.2.9'
|
24
19
|
|
25
20
|
# Counters that count events occurrences during the given window
|
26
21
|
COUNTERS_BASE = {
|
@@ -36,17 +31,23 @@ module Karafka
|
|
36
31
|
dead: 0
|
37
32
|
}.freeze
|
38
33
|
|
39
|
-
private_constant :
|
34
|
+
private_constant :COUNTERS_BASE
|
40
35
|
|
41
36
|
def initialize
|
42
37
|
super
|
43
38
|
|
39
|
+
@windows = Helpers::Ttls::Windows.new
|
44
40
|
@counters = COUNTERS_BASE.dup
|
45
|
-
@
|
46
|
-
|
41
|
+
@consumer_groups = Hash.new do |h, cg_id|
|
42
|
+
h[cg_id] = {
|
43
|
+
id: cg_id,
|
44
|
+
subscription_groups: {}
|
45
|
+
}
|
46
|
+
end
|
47
|
+
@subscription_groups = {}
|
47
48
|
@errors = []
|
48
49
|
@started_at = float_now
|
49
|
-
@pauses =
|
50
|
+
@pauses = {}
|
50
51
|
@jobs = {}
|
51
52
|
@shell = MemoizedShell.new
|
52
53
|
@memory_total_usage = 0
|
@@ -81,7 +82,9 @@ module Karafka
|
|
81
82
|
cpus: cpus,
|
82
83
|
threads: threads,
|
83
84
|
cpu_usage: @cpu_usage,
|
84
|
-
tags: Karafka::Process.tags
|
85
|
+
tags: Karafka::Process.tags,
|
86
|
+
bytes_received: bytes_received,
|
87
|
+
bytes_sent: bytes_sent
|
85
88
|
},
|
86
89
|
|
87
90
|
versions: {
|
@@ -98,7 +101,7 @@ module Karafka
|
|
98
101
|
utilization: utilization
|
99
102
|
).merge(total: @counters),
|
100
103
|
|
101
|
-
consumer_groups:
|
104
|
+
consumer_groups: enriched_consumer_groups,
|
102
105
|
jobs: jobs.values
|
103
106
|
}
|
104
107
|
end
|
@@ -130,21 +133,28 @@ module Karafka
|
|
130
133
|
# utilized all the time within the given time window. 0% means, nothing is happening
|
131
134
|
# most if not all the time.
|
132
135
|
def utilization
|
133
|
-
|
136
|
+
totals = windows.m1[:processed_total_time]
|
137
|
+
|
138
|
+
return 0 if totals.empty?
|
134
139
|
|
135
|
-
# Max times ttl
|
136
140
|
timefactor = float_now - @started_at
|
137
|
-
timefactor = timefactor >
|
141
|
+
timefactor = timefactor > 60 ? 60 : timefactor
|
138
142
|
|
139
143
|
# We divide by 1_000 to convert from milliseconds
|
140
144
|
# We multiply by 100 to have it in % scale
|
141
|
-
|
145
|
+
totals.sum / 1_000 / workers / timefactor * 100
|
142
146
|
end
|
143
147
|
|
144
|
-
# @return [
|
148
|
+
# @return [Hash] number of active and standby listeners
|
145
149
|
def listeners
|
146
|
-
|
147
|
-
|
150
|
+
if Karafka::Server.listeners
|
151
|
+
active = Karafka::Server.listeners.count(&:active?)
|
152
|
+
total = Karafka::Server.listeners.count.to_i
|
153
|
+
|
154
|
+
{ active: active, standby: total - active }
|
155
|
+
else
|
156
|
+
{ active: 0, standby: 0 }
|
157
|
+
end
|
148
158
|
end
|
149
159
|
|
150
160
|
# @return [Integer] memory used by this process in kilobytes
|
@@ -175,9 +185,14 @@ module Karafka
|
|
175
185
|
# @return [Hash] job queue statistics
|
176
186
|
def jobs_queue_statistics
|
177
187
|
# We return empty stats in case jobs queue is not yet initialized
|
188
|
+
base = Karafka::Server.jobs_queue&.statistics || { busy: 0, enqueued: 0 }
|
189
|
+
stats = base.slice(:busy, :enqueued, :waiting)
|
190
|
+
stats[:waiting] ||= 0
|
178
191
|
# busy - represents number of jobs that are being executed currently
|
179
|
-
# enqueued -
|
180
|
-
|
192
|
+
# enqueued - jobs that are in the queue but not being picked up yet
|
193
|
+
# waiting - jobs that are not scheduled on the queue but will be
|
194
|
+
# be enqueued in case of advanced schedulers
|
195
|
+
stats
|
181
196
|
end
|
182
197
|
|
183
198
|
# Total memory used in the OS
|
@@ -265,6 +280,48 @@ module Karafka
|
|
265
280
|
@memory_threads_ps = false
|
266
281
|
end
|
267
282
|
end
|
283
|
+
|
284
|
+
# Consumer group details need to be enriched with details about polling that comes from
|
285
|
+
# Karafka level. It is also time based, hence we need to materialize it only at the
|
286
|
+
# moment of message dispatch to have it accurate.
|
287
|
+
def enriched_consumer_groups
|
288
|
+
@consumer_groups.each_value do |cg_details|
|
289
|
+
cg_details.each do
|
290
|
+
cg_details.fetch(:subscription_groups, {}).each do |sg_id, sg_details|
|
291
|
+
# This should be always available, since we subscription group polled at time
|
292
|
+
# is first initialized before we start polling, there should be no case where
|
293
|
+
# we have statistics about a given subscription group but we do not have the
|
294
|
+
# last polling time
|
295
|
+
polled_at = subscription_groups.fetch(sg_id).fetch(:polled_at)
|
296
|
+
sg_details[:state][:poll_age] = monotonic_now - polled_at
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
@consumer_groups
|
302
|
+
end
|
303
|
+
|
304
|
+
# @return [Integer] number of bytes received per second out of a one minute time window
|
305
|
+
# by all the consumers
|
306
|
+
# @note We use one minute window to compensate for cases where metrics would be reported
|
307
|
+
# or recorded faster or slower. This normalizes data
|
308
|
+
def bytes_received
|
309
|
+
@windows
|
310
|
+
.m1
|
311
|
+
.stats_from { |k, _v| k.end_with?('rxbytes') }
|
312
|
+
.rps
|
313
|
+
.round
|
314
|
+
end
|
315
|
+
|
316
|
+
# @return [Integer] number of bytes sent per second out of a one minute time window by
|
317
|
+
# all the consumers
|
318
|
+
def bytes_sent
|
319
|
+
@windows
|
320
|
+
.m1
|
321
|
+
.stats_from { |k, _v| k.end_with?('txbytes') }
|
322
|
+
.rps
|
323
|
+
.round
|
324
|
+
end
|
268
325
|
end
|
269
326
|
end
|
270
327
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Tracking
|
6
|
+
module Helpers
|
7
|
+
# Namespace for time sensitive related buffers and operators
|
8
|
+
module Ttls
|
9
|
+
# Array that allows us to store data points that expire over time automatically.
|
10
|
+
class Array
|
11
|
+
include ::Karafka::Core::Helpers::Time
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
# @param ttl [Integer] milliseconds ttl
|
15
|
+
def initialize(ttl)
|
16
|
+
@ttl = ttl
|
17
|
+
@accu = []
|
18
|
+
end
|
19
|
+
|
20
|
+
# Iterates over only active elements
|
21
|
+
def each
|
22
|
+
clear
|
23
|
+
|
24
|
+
@accu.each do |sample|
|
25
|
+
yield sample[:value]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @param value [Object] adds value to the array
|
30
|
+
# @return [Object] added element
|
31
|
+
def <<(value)
|
32
|
+
@accu << { value: value, added_at: monotonic_now }
|
33
|
+
|
34
|
+
clear
|
35
|
+
|
36
|
+
value
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Boolean] is the array empty
|
40
|
+
def empty?
|
41
|
+
clear
|
42
|
+
@accu.empty?
|
43
|
+
end
|
44
|
+
|
45
|
+
# Samples that are within our TTL time window with the times
|
46
|
+
#
|
47
|
+
# @return [Hash]
|
48
|
+
def samples
|
49
|
+
clear
|
50
|
+
@accu
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [::Array] pure array version with only active elements
|
54
|
+
def to_a
|
55
|
+
clear
|
56
|
+
super
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
# Evicts outdated samples
|
62
|
+
def clear
|
63
|
+
@accu.delete_if do |sample|
|
64
|
+
monotonic_now - sample[:added_at] > @ttl
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Tracking
|
6
|
+
module Helpers
|
7
|
+
module Ttls
|
8
|
+
# Hash that accumulates data that has an expiration date (ttl)
|
9
|
+
# Used to keep track of metrics in a window
|
10
|
+
class Hash < Hash
|
11
|
+
# @param ttl [Integer] milliseconds ttl
|
12
|
+
def initialize(ttl)
|
13
|
+
super() { |k, v| k[v] = Ttls::Array.new(ttl) }
|
14
|
+
end
|
15
|
+
|
16
|
+
# Takes a block where we provide a hash select filtering to select keys we are
|
17
|
+
# interested in using for aggregated stats. Once filtered, builds a Stats object out
|
18
|
+
# of the candidates
|
19
|
+
#
|
20
|
+
# @param block [Proc] block for selection of elements for stats
|
21
|
+
# @yieldparam [String] key
|
22
|
+
# @yieldparam [Ttls::Array] samples
|
23
|
+
# @return [Stats]
|
24
|
+
def stats_from(&block)
|
25
|
+
Stats.new(
|
26
|
+
select(&block)
|
27
|
+
)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Tracking
|
6
|
+
module Helpers
|
7
|
+
module Ttls
|
8
|
+
# Object that simplifies computing aggregated statistics out of ttl data
|
9
|
+
# For TTL based operations we may collect samples from multiple consumers/producers etc
|
10
|
+
# but in the end we are interested in the collective result of the whole process.
|
11
|
+
#
|
12
|
+
# For example when we talk about data received from Kafka, we want to materialize total
|
13
|
+
# number of bytes and not bytes per given client connection. This layer simplifies this
|
14
|
+
# by doing necessary aggregations and providing the final results
|
15
|
+
class Stats
|
16
|
+
# @param ttls_hash [Ttls::Hash, Hash] hash with window based samples
|
17
|
+
def initialize(ttls_hash)
|
18
|
+
@data = ttls_hash
|
19
|
+
.values
|
20
|
+
.map(&:samples)
|
21
|
+
.map(&:to_a)
|
22
|
+
.delete_if { |samples| samples.size < 2 }
|
23
|
+
.map { |samples| samples.map(&:values) }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Computes the rate out of the samples provided on a per second basis. The samples need
|
27
|
+
# to come from the window aggregations
|
28
|
+
#
|
29
|
+
# @return [Float] per second rate value
|
30
|
+
def rps
|
31
|
+
sub_results = @data.map do |samples|
|
32
|
+
oldest = samples.first
|
33
|
+
newest = samples.last
|
34
|
+
|
35
|
+
value = oldest[0] - newest[0]
|
36
|
+
# Convert to seconds as we want to have it in a 1 sec pace
|
37
|
+
time = (oldest[1] - newest[1]) / 1_000
|
38
|
+
|
39
|
+
value / time.to_f
|
40
|
+
end
|
41
|
+
|
42
|
+
sub_results.flatten.sum
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Web
|
5
|
+
module Tracking
|
6
|
+
module Helpers
|
7
|
+
module Ttls
|
8
|
+
# Object used to track process metrics in time windows. Those are shared, meaning they do
|
9
|
+
# not refer to particular metric type but allow us to store whatever we want.
|
10
|
+
#
|
11
|
+
# We have following time windows:
|
12
|
+
# - m1 - one minute big
|
13
|
+
# - m5 - five minute big
|
14
|
+
Windows = Struct.new(:m1, :m5) do
|
15
|
+
# @return [Ttls::Windows]
|
16
|
+
def initialize
|
17
|
+
super(
|
18
|
+
Ttls::Hash.new(60 * 1_000),
|
19
|
+
Ttls::Hash.new(5 * 60 * 1_000)
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Clears the TTLs windows
|
24
|
+
def clear
|
25
|
+
values.each(&:clear)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|