karafka-web 0.11.3 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +45 -32
- data/bin/integrations +44 -0
- data/bin/rspecs +6 -2
- data/docker-compose.yml +1 -1
- data/karafka-web.gemspec +1 -1
- data/lib/karafka/web/app.rb +2 -3
- data/lib/karafka/web/cli/help.rb +1 -1
- data/lib/karafka/web/config.rb +8 -0
- data/lib/karafka/web/contracts/base.rb +2 -4
- data/lib/karafka/web/contracts/config.rb +5 -5
- data/lib/karafka/web/management/actions/enable.rb +14 -1
- data/lib/karafka/web/management/migrations/consumers_reports/1761645571_rename_process_name_to_id.rb +38 -0
- data/lib/karafka/web/management/migrator.rb +3 -2
- data/lib/karafka/web/pro/commanding/commands/base.rb +1 -1
- data/lib/karafka/web/pro/commanding/contracts/config.rb +2 -4
- data/lib/karafka/web/pro/commanding/handlers/partitions/tracker.rb +2 -3
- data/lib/karafka/web/pro/ui/controllers/scheduled_messages/schedules_controller.rb +1 -2
- data/lib/karafka/web/pro/ui/controllers/topics/distributions_controller.rb +1 -3
- data/lib/karafka/web/pro/ui/lib/branding/contracts/config.rb +2 -4
- data/lib/karafka/web/pro/ui/lib/policies/contracts/config.rb +2 -4
- data/lib/karafka/web/pro/ui/lib/search/contracts/config.rb +3 -5
- data/lib/karafka/web/pro/ui/lib/search/contracts/form.rb +3 -5
- data/lib/karafka/web/pro/ui/lib/search/runner.rb +14 -1
- data/lib/karafka/web/pro/ui/routes/errors.rb +3 -3
- data/lib/karafka/web/pro/ui/routes/explorer.rb +3 -3
- data/lib/karafka/web/pro/ui/views/health/_no_partition_data.erb +9 -0
- data/lib/karafka/web/pro/ui/views/health/_partitions_with_fallback.erb +41 -0
- data/lib/karafka/web/pro/ui/views/health/changes.erb +12 -13
- data/lib/karafka/web/pro/ui/views/health/lags.erb +12 -13
- data/lib/karafka/web/pro/ui/views/health/offsets.erb +12 -13
- data/lib/karafka/web/pro/ui/views/health/overview.erb +15 -16
- data/lib/karafka/web/processing/consumer.rb +8 -3
- data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +1 -1
- data/lib/karafka/web/processing/consumers/aggregators/state.rb +5 -5
- data/lib/karafka/web/processing/consumers/contracts/state.rb +1 -1
- data/lib/karafka/web/processing/consumers/reports_migrator.rb +49 -0
- data/lib/karafka/web/processing/time_series_tracker.rb +1 -1
- data/lib/karafka/web/tracking/consumers/contracts/report.rb +1 -1
- data/lib/karafka/web/tracking/consumers/contracts/topic.rb +1 -0
- data/lib/karafka/web/tracking/consumers/listeners/errors.rb +2 -1
- data/lib/karafka/web/tracking/consumers/listeners/processing.rb +46 -0
- data/lib/karafka/web/tracking/consumers/listeners/statistics.rb +1 -0
- data/lib/karafka/web/tracking/consumers/sampler/enrichers/base.rb +20 -0
- data/lib/karafka/web/tracking/consumers/sampler/enrichers/consumer_groups.rb +116 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/base.rb +20 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/container.rb +113 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/jobs.rb +60 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/network.rb +48 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/os.rb +206 -0
- data/lib/karafka/web/tracking/consumers/sampler/metrics/server.rb +33 -0
- data/lib/karafka/web/tracking/consumers/sampler.rb +34 -215
- data/lib/karafka/web/tracking/contracts/error.rb +1 -0
- data/lib/karafka/web/tracking/helpers/ttls/hash.rb +2 -3
- data/lib/karafka/web/tracking/helpers/ttls/stats.rb +1 -2
- data/lib/karafka/web/tracking/producers/listeners/errors.rb +2 -1
- data/lib/karafka/web/tracking/ui/errors.rb +76 -0
- data/lib/karafka/web/ui/base.rb +19 -9
- data/lib/karafka/web/ui/controllers/requests/execution_wrapper.rb +2 -4
- data/lib/karafka/web/ui/controllers/requests/params.rb +1 -1
- data/lib/karafka/web/ui/helpers/application_helper.rb +1 -1
- data/lib/karafka/web/ui/helpers/paths_helper.rb +6 -9
- data/lib/karafka/web/ui/lib/sorter.rb +1 -1
- data/lib/karafka/web/ui/models/health.rb +14 -9
- data/lib/karafka/web/ui/models/jobs.rb +4 -6
- data/lib/karafka/web/ui/models/message.rb +7 -8
- data/lib/karafka/web/ui/models/metrics/aggregated.rb +4 -4
- data/lib/karafka/web/ui/models/metrics/charts/aggregated.rb +1 -2
- data/lib/karafka/web/ui/models/metrics/charts/topics.rb +2 -2
- data/lib/karafka/web/ui/models/metrics/topics.rb +3 -4
- data/lib/karafka/web/ui/models/recurring_tasks/schedule.rb +1 -1
- data/lib/karafka/web/ui/public/javascripts/application.min.js.gz +0 -0
- data/lib/karafka/web/ui/public/stylesheets/application.min.css +3 -0
- data/lib/karafka/web/ui/public/stylesheets/application.min.css.br +0 -0
- data/lib/karafka/web/ui/public/stylesheets/application.min.css.gz +0 -0
- data/lib/karafka/web/ui/routes/errors.rb +3 -3
- data/lib/karafka/web/ui/views/shared/exceptions/unhandled_error.erb +42 -0
- data/lib/karafka/web/version.rb +1 -1
- data/lib/karafka/web.rb +2 -3
- data/package-lock.json +180 -236
- data/package.json +3 -3
- data/renovate.json +13 -0
- metadata +18 -3
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Enrichers
|
|
9
|
+
# Enriches consumer groups data with polling details and transactional consumer handling
|
|
10
|
+
# This is responsible for materializing time-based data and filling statistical gaps
|
|
11
|
+
# for transactional consumers
|
|
12
|
+
class ConsumerGroups < Base
|
|
13
|
+
include ::Karafka::Core::Helpers::Time
|
|
14
|
+
|
|
15
|
+
# @param consumer_groups [Hash] consumer groups hash to be enriched
|
|
16
|
+
# @param subscription_groups [Hash] subscription groups tracking data
|
|
17
|
+
def initialize(consumer_groups, subscription_groups)
|
|
18
|
+
super()
|
|
19
|
+
@consumer_groups = consumer_groups
|
|
20
|
+
@subscription_groups = subscription_groups
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Enriches consumer groups with polling details and transactional consumer offsets
|
|
24
|
+
# @return [Hash] enriched consumer groups
|
|
25
|
+
def call
|
|
26
|
+
consumer_groups.each_value do |cg_details|
|
|
27
|
+
cg_details.each do
|
|
28
|
+
cg_details.fetch(:subscription_groups, {}).each do |sg_id, sg_details|
|
|
29
|
+
enrich_subscription_group(sg_id, sg_details)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
consumer_groups
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
attr_reader :consumer_groups, :subscription_groups
|
|
40
|
+
|
|
41
|
+
# Enriches a single subscription group with polling age and partition details
|
|
42
|
+
# @param sg_id [String] subscription group id
|
|
43
|
+
# @param sg_details [Hash] subscription group details from statistics
|
|
44
|
+
def enrich_subscription_group(sg_id, sg_details)
|
|
45
|
+
# This should be always available, since the subscription group polled at time
|
|
46
|
+
# is first initialized before we start polling, there should be no case where
|
|
47
|
+
# we have statistics about a given subscription group but we do not have the
|
|
48
|
+
# sg reference
|
|
49
|
+
sg_tracking = subscription_groups.fetch(sg_id)
|
|
50
|
+
|
|
51
|
+
polled_at = sg_tracking.fetch(:polled_at)
|
|
52
|
+
sg_details[:state][:poll_age] = (monotonic_now - polled_at).round(2)
|
|
53
|
+
|
|
54
|
+
sg_details[:topics].each do |topic_name, topic_details|
|
|
55
|
+
topic_details[:partitions].each do |partition_id, partition_details|
|
|
56
|
+
enrich_partition(sg_tracking, topic_name, partition_id, partition_details)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Enriches partition details for transactional consumers
|
|
62
|
+
# @param sg_tracking [Hash] subscription group tracking data
|
|
63
|
+
# @param topic_name [String] topic name
|
|
64
|
+
# @param partition_id [Integer] partition id
|
|
65
|
+
# @param partition_details [Hash] partition details from statistics
|
|
66
|
+
def enrich_partition(sg_tracking, topic_name, partition_id, partition_details)
|
|
67
|
+
# Always assume non-transactional as default. Will be overwritten by the
|
|
68
|
+
# consumer level details if collected
|
|
69
|
+
partition_details[:transactional] ||= false
|
|
70
|
+
|
|
71
|
+
# If we have stored offset or stored lag, it means it's not a transactional
|
|
72
|
+
# consumer at all so we can skip enrichment
|
|
73
|
+
return if partition_details[:lag_stored].positive?
|
|
74
|
+
return if partition_details[:stored_offset].positive?
|
|
75
|
+
return unless sg_tracking[:topics].key?(topic_name)
|
|
76
|
+
return unless sg_tracking[:topics][topic_name].key?(partition_id)
|
|
77
|
+
|
|
78
|
+
k_partition_details = sg_tracking[:topics][topic_name][partition_id]
|
|
79
|
+
|
|
80
|
+
# If seek offset was not yet set, nothing to enrich
|
|
81
|
+
return unless k_partition_details[:seek_offset].positive?
|
|
82
|
+
|
|
83
|
+
enrich_transactional_partition(k_partition_details, partition_details)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Enriches partition with transactional consumer offset details
|
|
87
|
+
# @param k_partition_details [Hash] Karafka-level partition details
|
|
88
|
+
# @param partition_details [Hash] partition details from statistics
|
|
89
|
+
def enrich_transactional_partition(k_partition_details, partition_details)
|
|
90
|
+
partition_details[:transactional] = k_partition_details[:transactional]
|
|
91
|
+
|
|
92
|
+
# Seek offset is always +1 from the last stored in Karafka
|
|
93
|
+
seek_offset = k_partition_details[:seek_offset]
|
|
94
|
+
stored_offset = seek_offset - 1
|
|
95
|
+
|
|
96
|
+
# In case of transactions we have to compute the lag ourselves
|
|
97
|
+
# -1 because ls offset (or high watermark) is last + 1
|
|
98
|
+
lag = partition_details[:ls_offset] - seek_offset
|
|
99
|
+
# This can happen if ls_offset is refreshed slower than our stored offset
|
|
100
|
+
# fetching from Karafka transactional layer
|
|
101
|
+
lag = 0 if lag.negative?
|
|
102
|
+
|
|
103
|
+
partition_details[:lag] = lag
|
|
104
|
+
partition_details[:lag_d] = 0
|
|
105
|
+
partition_details[:lag_stored] = lag
|
|
106
|
+
partition_details[:lag_stored_d] = 0
|
|
107
|
+
partition_details[:stored_offset] = stored_offset
|
|
108
|
+
partition_details[:committed_offset] = stored_offset
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
# Namespace for metrics collectors that gather various system and process statistics
|
|
9
|
+
module Metrics
|
|
10
|
+
# Base class for metrics collectors
|
|
11
|
+
# This is an abstract base class that can be extended to create custom metrics collectors
|
|
12
|
+
class Base
|
|
13
|
+
# Placeholder for future common functionality
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Metrics
|
|
9
|
+
# Collects metrics from containerized environments (Docker, Kubernetes)
|
|
10
|
+
# Extends OS metrics with container-aware memory limit detection from cgroups
|
|
11
|
+
# Supports both cgroups v1 and v2
|
|
12
|
+
class Container < Os
|
|
13
|
+
# Maximum value that represents "no limit" in cgroup v2
|
|
14
|
+
CGROUP_V2_MAX = 'max'
|
|
15
|
+
|
|
16
|
+
# Paths for cgroup detection and reading
|
|
17
|
+
CGROUP_V2_CONTROLLERS = '/sys/fs/cgroup/cgroup.controllers'
|
|
18
|
+
|
|
19
|
+
# Memory paths
|
|
20
|
+
# Path to cgroup v1 memory limit file
|
|
21
|
+
CGROUP_V1_MEMORY_LIMIT = '/sys/fs/cgroup/memory/memory.limit_in_bytes'
|
|
22
|
+
# Path to cgroup v2 memory limit file
|
|
23
|
+
CGROUP_V2_MEMORY_LIMIT = '/sys/fs/cgroup/memory.max'
|
|
24
|
+
|
|
25
|
+
private_constant(
|
|
26
|
+
:CGROUP_V2_MAX,
|
|
27
|
+
:CGROUP_V2_CONTROLLERS,
|
|
28
|
+
:CGROUP_V1_MEMORY_LIMIT,
|
|
29
|
+
:CGROUP_V2_MEMORY_LIMIT
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
# Checks if running in a containerized environment with cgroups
|
|
34
|
+
# @return [Boolean] true if cgroups are available, false otherwise
|
|
35
|
+
def active?
|
|
36
|
+
!cgroup_version.nil?
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Gets the memory limit for the container
|
|
40
|
+
# @return [Integer, nil] memory limit in kilobytes, or nil if not available
|
|
41
|
+
def memory_limit
|
|
42
|
+
return @memory_limit if instance_variable_defined?(:@memory_limit)
|
|
43
|
+
|
|
44
|
+
@memory_limit = case cgroup_version
|
|
45
|
+
when :v2
|
|
46
|
+
read_cgroup_v2_memory_limit
|
|
47
|
+
when :v1
|
|
48
|
+
read_cgroup_v1_memory_limit
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Detects which cgroup version is in use
|
|
55
|
+
# @return [Symbol, nil] :v2, :v1, or nil if not in a cgroup environment
|
|
56
|
+
def cgroup_version
|
|
57
|
+
return @cgroup_version if instance_variable_defined?(:@cgroup_version)
|
|
58
|
+
|
|
59
|
+
@cgroup_version = if File.exist?(CGROUP_V2_CONTROLLERS)
|
|
60
|
+
:v2
|
|
61
|
+
elsif File.exist?(CGROUP_V1_MEMORY_LIMIT)
|
|
62
|
+
:v1
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Reads memory limit from cgroup v2
|
|
67
|
+
# @return [Integer, nil] memory limit in kilobytes, or nil
|
|
68
|
+
def read_cgroup_v2_memory_limit
|
|
69
|
+
return nil unless File.exist?(CGROUP_V2_MEMORY_LIMIT)
|
|
70
|
+
|
|
71
|
+
limit = File.read(CGROUP_V2_MEMORY_LIMIT).strip
|
|
72
|
+
|
|
73
|
+
# "max" means no limit
|
|
74
|
+
return nil if limit == CGROUP_V2_MAX
|
|
75
|
+
|
|
76
|
+
# Convert from bytes to kilobytes
|
|
77
|
+
limit.to_i / 1024
|
|
78
|
+
rescue StandardError
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Reads memory limit from cgroup v1
|
|
83
|
+
# @return [Integer, nil] memory limit in kilobytes, or nil
|
|
84
|
+
def read_cgroup_v1_memory_limit
|
|
85
|
+
return nil unless File.exist?(CGROUP_V1_MEMORY_LIMIT)
|
|
86
|
+
|
|
87
|
+
limit = File.read(CGROUP_V1_MEMORY_LIMIT).strip.to_i
|
|
88
|
+
|
|
89
|
+
# Very large values (close to max int64) mean no limit
|
|
90
|
+
# Using a threshold of 2^60 as a reasonable "unlimited" indicator
|
|
91
|
+
return nil if limit > (2**60)
|
|
92
|
+
|
|
93
|
+
# Convert from bytes to kilobytes
|
|
94
|
+
limit / 1024
|
|
95
|
+
rescue StandardError
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @return [Integer] total amount of memory in kilobytes
|
|
101
|
+
# In containerized environments, returns the container's memory limit from cgroups.
|
|
102
|
+
# Falls back to host memory if no limit is set.
|
|
103
|
+
# @note Memoized at instance level to avoid repeated class method calls
|
|
104
|
+
def memory_size
|
|
105
|
+
@memory_size ||= self.class.memory_limit || super
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Metrics
|
|
9
|
+
# Collects job queue statistics and worker utilization metrics
|
|
10
|
+
class Jobs < Base
|
|
11
|
+
include ::Karafka::Core::Helpers::Time
|
|
12
|
+
|
|
13
|
+
# @param windows [Helpers::Ttls::Windows] time windows for aggregating metrics
|
|
14
|
+
# @param started_at [Float] process start time
|
|
15
|
+
# @param workers [Integer] number of worker threads
|
|
16
|
+
def initialize(windows, started_at, workers)
|
|
17
|
+
super()
|
|
18
|
+
@windows = windows
|
|
19
|
+
@started_at = started_at
|
|
20
|
+
@workers = workers
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Numeric] % utilization of all the threads. 100% means all the threads are
|
|
24
|
+
# utilized all the time within the given time window. 0% means, nothing is happening
|
|
25
|
+
# most if not all the time.
|
|
26
|
+
def utilization
|
|
27
|
+
totals = windows.m1[:processed_total_time]
|
|
28
|
+
|
|
29
|
+
return 0 if totals.empty?
|
|
30
|
+
|
|
31
|
+
timefactor = [float_now - started_at, 60].min
|
|
32
|
+
|
|
33
|
+
# We divide by 1_000 to convert from milliseconds
|
|
34
|
+
# We multiply by 100 to have it in % scale
|
|
35
|
+
(totals.sum / 1_000 / workers / timefactor * 100).round(2)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [Hash] job queue statistics
|
|
39
|
+
def jobs_queue_statistics
|
|
40
|
+
# We return empty stats in case jobs queue is not yet initialized
|
|
41
|
+
base = Karafka::Server.jobs_queue&.statistics || { busy: 0, enqueued: 0 }
|
|
42
|
+
stats = base.slice(:busy, :enqueued, :waiting)
|
|
43
|
+
stats[:waiting] ||= 0
|
|
44
|
+
# busy - represents number of jobs that are being executed currently
|
|
45
|
+
# enqueued - jobs that are in the queue but not being picked up yet
|
|
46
|
+
# waiting - jobs that are not scheduled on the queue but will be
|
|
47
|
+
# be enqueued in case of advanced schedulers
|
|
48
|
+
stats
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
attr_reader :windows, :started_at, :workers
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Metrics
|
|
9
|
+
# Collects network throughput metrics (bytes received/sent per second)
|
|
10
|
+
class Network < Base
|
|
11
|
+
# @param windows [Helpers::Ttls::Windows] time windows for aggregating metrics
|
|
12
|
+
def initialize(windows)
|
|
13
|
+
super()
|
|
14
|
+
@windows = windows
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# @return [Integer] number of bytes received per second out of a one minute time window
|
|
18
|
+
# by all the consumers
|
|
19
|
+
# @note We use one minute window to compensate for cases where metrics would be reported
|
|
20
|
+
# or recorded faster or slower. This normalizes data
|
|
21
|
+
def bytes_received
|
|
22
|
+
windows
|
|
23
|
+
.m1
|
|
24
|
+
.stats_from { |k, _v| k.end_with?('rxbytes') }
|
|
25
|
+
.rps
|
|
26
|
+
.round
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [Integer] number of bytes sent per second out of a one minute time window by
|
|
30
|
+
# all the consumers
|
|
31
|
+
def bytes_sent
|
|
32
|
+
windows
|
|
33
|
+
.m1
|
|
34
|
+
.stats_from { |k, _v| k.end_with?('txbytes') }
|
|
35
|
+
.rps
|
|
36
|
+
.round
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
attr_reader :windows
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Metrics
|
|
9
|
+
# Collects OS-level metrics from /proc filesystem and system commands
|
|
10
|
+
# Used when running directly on a host OS (not in containers)
|
|
11
|
+
class Os < Base
|
|
12
|
+
# @param shell [MemoizedShell] shell executor for running system commands
|
|
13
|
+
def initialize(shell)
|
|
14
|
+
super()
|
|
15
|
+
@shell = shell
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return [Integer] memory used by this process in kilobytes (RSS - Resident Set Size)
|
|
19
|
+
# This is the amount of physical memory currently used by the Karafka process.
|
|
20
|
+
# On Linux: reads VmRSS from /proc/{pid}/status
|
|
21
|
+
# On macOS: uses ps command to get RSS for current process
|
|
22
|
+
# @note This represents ONLY the current Karafka process memory usage
|
|
23
|
+
def memory_usage
|
|
24
|
+
pid = ::Process.pid
|
|
25
|
+
|
|
26
|
+
case RUBY_PLATFORM
|
|
27
|
+
# Reading this that way is cheaper than running a shell command
|
|
28
|
+
when /linux/
|
|
29
|
+
File.readlines("/proc/#{pid}/status").each do |line|
|
|
30
|
+
next unless line.start_with?('VmRSS:')
|
|
31
|
+
|
|
32
|
+
break line.split[1].to_i
|
|
33
|
+
end
|
|
34
|
+
when /darwin|bsd/
|
|
35
|
+
shell
|
|
36
|
+
.call("ps -o pid,rss -p #{pid}")
|
|
37
|
+
.lines
|
|
38
|
+
.last
|
|
39
|
+
.split
|
|
40
|
+
.last
|
|
41
|
+
.to_i
|
|
42
|
+
else
|
|
43
|
+
0
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @param memory_threads_ps [Array, false] parsed ps/proc output for all processes
|
|
48
|
+
# @return [Integer] total memory used by all processes in the system (or container)
|
|
49
|
+
# This represents system-wide (or container-wide) memory usage by summing RSS
|
|
50
|
+
# across all processes.
|
|
51
|
+
# On bare metal: sums memory for all processes on the host
|
|
52
|
+
# In containers: sums memory for all processes within the container (due to PID namespace)
|
|
53
|
+
# @note This is DIFFERENT from memory_usage which only shows current process memory
|
|
54
|
+
# @note Used in Web UI to show "OS memory used" metric
|
|
55
|
+
def memory_total_usage(memory_threads_ps)
|
|
56
|
+
return 0 unless memory_threads_ps
|
|
57
|
+
|
|
58
|
+
memory_threads_ps.sum(&:first)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @return [Integer] total amount of available memory in kilobytes
|
|
62
|
+
# This is the total physical memory available to the system/container.
|
|
63
|
+
# On Linux: reads MemTotal from /proc/meminfo
|
|
64
|
+
# On macOS: uses sysctl hw.memsize
|
|
65
|
+
# In containers: Container class overrides this to return cgroup memory limit
|
|
66
|
+
# @note This is a STATIC value (system RAM capacity), memoized for performance
|
|
67
|
+
# @note Used in Web UI to show "OS memory available" metric
|
|
68
|
+
def memory_size
|
|
69
|
+
return @memory_size if instance_variable_defined?(:@memory_size)
|
|
70
|
+
|
|
71
|
+
@memory_size = case RUBY_PLATFORM
|
|
72
|
+
when /linux/
|
|
73
|
+
mem_info = File.read('/proc/meminfo')
|
|
74
|
+
mem_total_line = mem_info.match(/MemTotal:\s*(?<total>\d+)/)
|
|
75
|
+
mem_total_line['total'].to_i
|
|
76
|
+
when /darwin|bsd/
|
|
77
|
+
shell
|
|
78
|
+
.call('sysctl -a')
|
|
79
|
+
.split("\n")
|
|
80
|
+
.find { |line| line.start_with?('hw.memsize:') }
|
|
81
|
+
.to_s
|
|
82
|
+
.split
|
|
83
|
+
.last
|
|
84
|
+
.to_i
|
|
85
|
+
else
|
|
86
|
+
0
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# @return [Array<Float>] load averages for last 1, 5 and 15 minutes
|
|
91
|
+
def cpu_usage
|
|
92
|
+
case RUBY_PLATFORM
|
|
93
|
+
when /linux/
|
|
94
|
+
File
|
|
95
|
+
.read('/proc/loadavg')
|
|
96
|
+
.split
|
|
97
|
+
.first(3)
|
|
98
|
+
.map(&:to_f)
|
|
99
|
+
when /darwin|bsd/
|
|
100
|
+
shell
|
|
101
|
+
.call('w | head -1')
|
|
102
|
+
.strip
|
|
103
|
+
.split
|
|
104
|
+
.map(&:to_f)
|
|
105
|
+
.last(3)
|
|
106
|
+
else
|
|
107
|
+
[-1, -1, -1]
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# @return [Integer] CPU count
|
|
112
|
+
def cpus
|
|
113
|
+
@cpus ||= Etc.nprocessors
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @param memory_threads_ps [Array, false] parsed ps output
|
|
117
|
+
# @return [Integer] number of process threads
|
|
118
|
+
# @note This returns total number of threads from the OS perspective including
|
|
119
|
+
# native extensions threads, etc.
|
|
120
|
+
def threads(memory_threads_ps)
|
|
121
|
+
return 0 unless memory_threads_ps
|
|
122
|
+
|
|
123
|
+
memory_threads_ps.find { |row| row.last == ::Process.pid }[1]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Loads process information for all running processes
|
|
127
|
+
# @return [Array<Array<Integer, Integer, Integer>>, false] array of [rss_kb, threads, pid]
|
|
128
|
+
# for each process, or false if unavailable
|
|
129
|
+
#
|
|
130
|
+
# This method reads information about ALL processes on the system (or in the container).
|
|
131
|
+
# The data is used by multiple metrics:
|
|
132
|
+
# - memory_total_usage: sums RSS across all processes
|
|
133
|
+
# - threads: extracts thread count for current process
|
|
134
|
+
#
|
|
135
|
+
# Format of each array element: [memory_in_kb, thread_count, process_id]
|
|
136
|
+
# - memory_in_kb: RSS (Resident Set Size) in kilobytes
|
|
137
|
+
# - thread_count: Number of threads (only populated for current process, 0 for others)
|
|
138
|
+
# - process_id: Process ID
|
|
139
|
+
#
|
|
140
|
+
# Platform behavior:
|
|
141
|
+
# - Linux: Reads /proc/[0-9]*/statm for ALL processes on host/container
|
|
142
|
+
# - macOS: Uses `ps -A` to get all processes
|
|
143
|
+
# - Containers: Due to PID namespaces, only sees processes within the container
|
|
144
|
+
#
|
|
145
|
+
# @note Sampler calls this once per sample cycle (every ~5 seconds) and caches the result
|
|
146
|
+
# in @memory_threads_ps to ensure consistent data within a single sample snapshot
|
|
147
|
+
# @note The cache is refreshed on EVERY sample cycle, so data stays current
|
|
148
|
+
# @note On Linux, thread count is only extracted for the current process to optimize performance
|
|
149
|
+
def memory_threads_ps
|
|
150
|
+
case RUBY_PLATFORM
|
|
151
|
+
when /linux/
|
|
152
|
+
page_size = Helpers::Sysconf.page_size
|
|
153
|
+
current_pid = ::Process.pid
|
|
154
|
+
|
|
155
|
+
# Read all processes from /proc
|
|
156
|
+
Dir.glob('/proc/[0-9]*/statm').filter_map do |statm_file|
|
|
157
|
+
pid = statm_file.match(%r{/proc/(\d+)/statm})[1].to_i
|
|
158
|
+
status_file = "/proc/#{pid}/status"
|
|
159
|
+
|
|
160
|
+
# Extract RSS from /proc/<pid>/statm (second field)
|
|
161
|
+
rss_pages = begin
|
|
162
|
+
File.read(statm_file).split[1].to_i
|
|
163
|
+
rescue StandardError
|
|
164
|
+
next # Process may have exited
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Extract thread count from /proc/<pid>/status (only for current process)
|
|
168
|
+
thcount = if pid == current_pid
|
|
169
|
+
begin
|
|
170
|
+
File.read(status_file)[/^Threads:\s+(\d+)/, 1].to_i
|
|
171
|
+
rescue StandardError
|
|
172
|
+
0
|
|
173
|
+
end
|
|
174
|
+
else
|
|
175
|
+
0
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Convert RSS from pages to kilobytes
|
|
179
|
+
rss_kb = (rss_pages * page_size) / 1024
|
|
180
|
+
|
|
181
|
+
[rss_kb, thcount, pid]
|
|
182
|
+
end
|
|
183
|
+
# thcount is not available on macos ps
|
|
184
|
+
# because of that we inject 0 as threads count similar to how
|
|
185
|
+
# we do on windows
|
|
186
|
+
when /darwin|bsd/
|
|
187
|
+
shell
|
|
188
|
+
.call('ps -A -o rss=,pid=')
|
|
189
|
+
.split("\n")
|
|
190
|
+
.map { |row| row.strip.split.map(&:to_i) }
|
|
191
|
+
.map { |row| [row.first, 0, row.last] }
|
|
192
|
+
else
|
|
193
|
+
false
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
attr_reader :shell
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Web
|
|
5
|
+
module Tracking
|
|
6
|
+
module Consumers
|
|
7
|
+
class Sampler < Tracking::Sampler
|
|
8
|
+
module Metrics
|
|
9
|
+
# Collects Karafka server state metrics (listeners, workers, status)
|
|
10
|
+
class Server < Base
|
|
11
|
+
# @return [Hash] number of active and standby listeners
|
|
12
|
+
def listeners
|
|
13
|
+
if Karafka::Server.listeners
|
|
14
|
+
active = Karafka::Server.listeners.count(&:active?)
|
|
15
|
+
total = Karafka::Server.listeners.count.to_i
|
|
16
|
+
|
|
17
|
+
{ active: active, standby: total - active }
|
|
18
|
+
else
|
|
19
|
+
{ active: 0, standby: 0 }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Integer] number of threads that process work
|
|
24
|
+
def workers
|
|
25
|
+
Karafka::App.config.concurrency
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|