karafka-web 0.7.3 → 0.7.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +10 -4
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +3 -3
- data/bin/wait_for_kafka +24 -0
- data/docker-compose.yml +17 -16
- data/karafka-web.gemspec +1 -1
- data/lib/karafka/web/errors.rb +10 -1
- data/lib/karafka/web/installer.rb +27 -2
- data/lib/karafka/web/management/create_topics.rb +52 -46
- data/lib/karafka/web/processing/consumer.rb +23 -6
- data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +56 -46
- data/lib/karafka/web/processing/consumers/metrics.rb +4 -0
- data/lib/karafka/web/processing/consumers/schema_manager.rb +14 -7
- data/lib/karafka/web/processing/consumers/state.rb +4 -0
- data/lib/karafka/web/processing/time_series_tracker.rb +4 -1
- data/lib/karafka/web/tracking/consumers/sampler.rb +11 -2
- data/lib/karafka/web/ui/app.rb +1 -1
- data/lib/karafka/web/ui/base.rb +1 -1
- data/lib/karafka/web/ui/helpers/application_helper.rb +3 -2
- data/lib/karafka/web/ui/models/health.rb +5 -1
- data/lib/karafka/web/ui/pro/app.rb +1 -1
- data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +24 -8
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +0 -3
- data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +40 -34
- data/lib/karafka/web/ui/public/javascripts/bootstrap.min.js +0 -1
- data/lib/karafka/web/ui/public/javascripts/chart.min.js +0 -1
- data/lib/karafka/web/ui/public/javascripts/timeago.min.js +5 -0
- data/lib/karafka/web/ui/public/stylesheets/bootstrap.min.css +0 -1
- data/lib/karafka/web/ui/views/consumers/_counters.erb +21 -7
- data/lib/karafka/web/ui/views/shared/_header.erb +1 -1
- data/lib/karafka/web/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +5 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/web/ui/public/stylesheets/bootstrap.min.css.map +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: caf2303a2e877dd3f06b738974c4093f7a9cf1bdb24b1152183ba9173432b640
|
4
|
+
data.tar.gz: 80b56d4efec23d589adb731c18e5b7137718862edc246fb2c32ff0baaadb84a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b28806d2442202259c3c7c4cf3e9d2193e47c2a6b2113f31a5906019f94f4fb2f342ef170721e63ff8ff0e037f1c574509b9be0a98f0c3b8b5b9ff289bd4fbc
|
7
|
+
data.tar.gz: 9c6b42c67d4f305517e42c7446278ba7f5a65f7ad9c4ac52657ebc3231fe92619e6d2b6a35b857bb03b0e29568cfeb596ff4153bcd023ee30e0e235e0917af99
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
@@ -20,6 +20,7 @@ jobs:
|
|
20
20
|
fail-fast: false
|
21
21
|
matrix:
|
22
22
|
ruby:
|
23
|
+
- '3.3.0-preview2'
|
23
24
|
- '3.2'
|
24
25
|
- '3.1'
|
25
26
|
- '3.0'
|
@@ -28,18 +29,19 @@ jobs:
|
|
28
29
|
- ruby: '3.2'
|
29
30
|
coverage: 'true'
|
30
31
|
steps:
|
31
|
-
- uses: actions/checkout@
|
32
|
+
- uses: actions/checkout@v4
|
32
33
|
- name: Install package dependencies
|
33
34
|
run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
|
34
35
|
|
35
36
|
- name: Start Kafka with docker-compose
|
36
37
|
run: |
|
37
|
-
docker-compose up -d
|
38
|
+
docker-compose up -d || (sleep 5 && docker-compose up -d)
|
38
39
|
|
39
40
|
- name: Set up Ruby
|
40
41
|
uses: ruby/setup-ruby@v1
|
41
42
|
with:
|
42
43
|
ruby-version: ${{matrix.ruby}}
|
44
|
+
bundler-cache: true
|
43
45
|
|
44
46
|
- name: Install latest bundler
|
45
47
|
run: |
|
@@ -51,6 +53,10 @@ jobs:
|
|
51
53
|
bundle config set without development
|
52
54
|
bundle install --jobs 4 --retry 3
|
53
55
|
|
56
|
+
- name: Wait for Kafka
|
57
|
+
run: |
|
58
|
+
bundle exec bin/wait_for_kafka
|
59
|
+
|
54
60
|
- name: Run all tests
|
55
61
|
env:
|
56
62
|
GITHUB_COVERAGE: ${{matrix.coverage}}
|
@@ -62,7 +68,7 @@ jobs:
|
|
62
68
|
strategy:
|
63
69
|
fail-fast: false
|
64
70
|
steps:
|
65
|
-
- uses: actions/checkout@
|
71
|
+
- uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
|
66
72
|
with:
|
67
73
|
fetch-depth: 0
|
68
74
|
|
@@ -83,7 +89,7 @@ jobs:
|
|
83
89
|
strategy:
|
84
90
|
fail-fast: false
|
85
91
|
steps:
|
86
|
-
- uses: actions/checkout@
|
92
|
+
- uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
|
87
93
|
with:
|
88
94
|
fetch-depth: 0
|
89
95
|
- name: Run Coditsu
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# Karafka Web changelog
|
2
2
|
|
3
|
+
## 0.7.5 (2023-09-29)
|
4
|
+
- [Enhancement] Update order of topics creation for the setup of Web to support zero-downtime setup of Web in running Karafka projects.
|
5
|
+
- [Enhancement] Add space delimiter to counters numbers to make them look better.
|
6
|
+
- [Improvement] Normalize per-process job tables and health tables structure (topic name on top).
|
7
|
+
- [Fix] Fix a case where charts aggregated data would not include all topics.
|
8
|
+
- [Fix] Make sure, that most recent per partition data for Health is never overwritten by an old state from a previous partition owner.
|
9
|
+
- [Fix] Cache assets for 1 year instead of 7 days.
|
10
|
+
- [Fix] Remove source maps pointing to non-existing locations.
|
11
|
+
- [Maintenance] Include license and copyrights notice for `timeago.js` that was missing in the JS min file.
|
12
|
+
|
13
|
+
## 0.7.4 (2023-09-19)
|
14
|
+
- [Improvement] Skip aggregations on older schemas during upgrades. This only skips process-reports (that are going to be rolled) on the 5s window in case of an upgrade that should not be a rolling one anyhow. This simplifies the operations and minimizes the risk on breaking upgrades.
|
15
|
+
- [Fix] Fix not working `ps` for macOS.
|
16
|
+
|
3
17
|
## 0.7.3 (2023-09-18)
|
4
18
|
- [Improvement] Mitigate a case where a race-condition during upgrade would crash data.
|
5
19
|
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka-web (0.7.
|
4
|
+
karafka-web (0.7.5)
|
5
5
|
erubi (~> 1.4)
|
6
|
-
karafka (>= 2.2.
|
6
|
+
karafka (>= 2.2.6, < 3.0.0)
|
7
7
|
karafka-core (>= 2.2.2, < 3.0.0)
|
8
8
|
roda (~> 3.68, >= 3.69)
|
9
9
|
tilt (~> 2.0)
|
@@ -26,7 +26,7 @@ GEM
|
|
26
26
|
ffi (1.15.5)
|
27
27
|
i18n (1.14.1)
|
28
28
|
concurrent-ruby (~> 1.0)
|
29
|
-
karafka (2.2.
|
29
|
+
karafka (2.2.6)
|
30
30
|
karafka-core (>= 2.2.2, < 2.3.0)
|
31
31
|
thor (>= 0.20)
|
32
32
|
waterdrop (>= 2.6.6, < 3.0.0)
|
data/bin/wait_for_kafka
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Waits for Kafka to be ready
|
4
|
+
# Useful in CI where Kafka needs to be fully started before we run any tests
|
5
|
+
|
6
|
+
require 'karafka'
|
7
|
+
|
8
|
+
Karafka::App.setup do |config|
|
9
|
+
config.kafka[:'bootstrap.servers'] = '127.0.0.1:9092'
|
10
|
+
end
|
11
|
+
|
12
|
+
60.times do
|
13
|
+
begin
|
14
|
+
# Stop if we can connect to the cluster and get info
|
15
|
+
exit if Karafka::Admin.cluster_info
|
16
|
+
rescue Rdkafka::RdkafkaError
|
17
|
+
puts "Kafka not available, retrying..."
|
18
|
+
sleep(1)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
puts 'Kafka not available!'
|
23
|
+
|
24
|
+
exit 1
|
data/docker-compose.yml
CHANGED
@@ -1,22 +1,23 @@
|
|
1
1
|
version: '2'
|
2
|
-
services:
|
3
|
-
zookeeper:
|
4
|
-
container_name: karafka_web_21_zookeeper
|
5
|
-
image: wurstmeister/zookeeper
|
6
|
-
restart: on-failure
|
7
|
-
ports:
|
8
|
-
- '2181:2181'
|
9
2
|
|
3
|
+
services:
|
10
4
|
kafka:
|
11
|
-
container_name:
|
12
|
-
image:
|
5
|
+
container_name: kafka
|
6
|
+
image: confluentinc/cp-kafka:7.5.0
|
7
|
+
|
13
8
|
ports:
|
14
|
-
-
|
9
|
+
- 9092:9092
|
10
|
+
|
15
11
|
environment:
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
CLUSTER_ID: kafka-docker-cluster-1
|
13
|
+
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
14
|
+
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
15
|
+
KAFKA_PROCESS_ROLES: broker,controller
|
16
|
+
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
|
17
|
+
KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093
|
18
|
+
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
|
19
|
+
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092
|
20
|
+
KAFKA_BROKER_ID: 1
|
21
|
+
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@127.0.0.1:9093
|
22
|
+
ALLOW_PLAINTEXT_LISTENER: 'yes'
|
19
23
|
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
20
|
-
volumes:
|
21
|
-
- /var/run/docker.sock:/var/run/docker.sock
|
22
|
-
restart: on-failure
|
data/karafka-web.gemspec
CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.licenses = %w[LGPL-3.0 Commercial]
|
18
18
|
|
19
19
|
spec.add_dependency 'erubi', '~> 1.4'
|
20
|
-
spec.add_dependency 'karafka', '>= 2.2.
|
20
|
+
spec.add_dependency 'karafka', '>= 2.2.6', '< 3.0.0'
|
21
21
|
spec.add_dependency 'karafka-core', '>= 2.2.2', '< 3.0.0'
|
22
22
|
spec.add_dependency 'roda', '~> 3.68', '>= 3.69'
|
23
23
|
spec.add_dependency 'tilt', '~> 2.0'
|
data/lib/karafka/web/errors.rb
CHANGED
@@ -17,9 +17,18 @@ module Karafka
|
|
17
17
|
# If you see this error, it probably means, that you did not bootstrap Web-UI correctly
|
18
18
|
MissingConsumersStateError = Class.new(BaseError)
|
19
19
|
|
20
|
-
#
|
20
|
+
# Raised when we try to materialize the state but the consumers states topic does not
|
21
|
+
# exist and we do not have a way to get the initial state.
|
22
|
+
# It differs from the above because above indicates that the topic exists but that there
|
23
|
+
# is no initial state, while this indicates, that there is no consumers states topic.
|
24
|
+
MissingConsumersStatesTopicError = Class.new(BaseError)
|
25
|
+
|
26
|
+
# Similar to the above. It should be created during install / migration
|
21
27
|
MissingConsumersMetricsError = Class.new(BaseError)
|
22
28
|
|
29
|
+
# Similar to the one related to consumers states
|
30
|
+
MissingConsumersMetricsTopicError = Class.new(BaseError)
|
31
|
+
|
23
32
|
# This error occurs when consumer running older version of the web-ui tries to materialize
|
24
33
|
# states from newer versions. Karafka Web-UI provides only backwards compatibility, so
|
25
34
|
# you need to have an up-to-date consumer materializing reported states.
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
puts 'Creating necessary topics and populating state data...'
|
19
19
|
puts
|
20
20
|
Management::CreateTopics.new.call(replication_factor)
|
21
|
-
|
21
|
+
wait_for_topics
|
22
22
|
Management::CreateInitialStates.new.call
|
23
23
|
puts
|
24
24
|
Management::ExtendBootFile.new.call
|
@@ -36,6 +36,7 @@ module Karafka
|
|
36
36
|
puts 'Creating necessary topics and populating state data...'
|
37
37
|
puts
|
38
38
|
Management::CreateTopics.new.call(replication_factor)
|
39
|
+
wait_for_topics
|
39
40
|
Management::CreateInitialStates.new.call
|
40
41
|
puts
|
41
42
|
puts("Migration #{green('completed')}. Have fun!")
|
@@ -51,7 +52,7 @@ module Karafka
|
|
51
52
|
Management::DeleteTopics.new.call
|
52
53
|
puts
|
53
54
|
Management::CreateTopics.new.call(replication_factor)
|
54
|
-
|
55
|
+
wait_for_topics
|
55
56
|
Management::CreateInitialStates.new.call
|
56
57
|
puts
|
57
58
|
puts("Resetting #{green('completed')}. Have fun!")
|
@@ -74,6 +75,30 @@ module Karafka
|
|
74
75
|
def enable!
|
75
76
|
Management::Enable.new.call
|
76
77
|
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# Waits with a message, that we are waiting on topics
|
82
|
+
# This is not doing much, just waiting as there are some cases that it takes a bit of time
|
83
|
+
# for Kafka to actually propagate new topics knowledge across the cluster. We give it that
|
84
|
+
# bit of time just in case.
|
85
|
+
def wait_for_topics
|
86
|
+
puts
|
87
|
+
print 'Waiting for the topics to synchronize in the cluster'
|
88
|
+
wait(5)
|
89
|
+
puts
|
90
|
+
end
|
91
|
+
|
92
|
+
# Waits for given number of seconds and prints `.` every second.
|
93
|
+
# @param time_in_seconds [Integer] time of wait
|
94
|
+
def wait(time_in_seconds)
|
95
|
+
time_in_seconds.times do
|
96
|
+
sleep(1)
|
97
|
+
print '.'
|
98
|
+
end
|
99
|
+
|
100
|
+
print "\n"
|
101
|
+
end
|
77
102
|
end
|
78
103
|
end
|
79
104
|
end
|
@@ -9,54 +9,36 @@ module Karafka
|
|
9
9
|
# Runs the creation process
|
10
10
|
#
|
11
11
|
# @param replication_factor [Integer] replication factor for Web-UI topics
|
12
|
+
#
|
13
|
+
# @note The order of creation of those topics is important. In order to support the
|
14
|
+
# zero-downtime bootstrap, we use the presence of the states topic and its initial state
|
15
|
+
# existence as an indicator that the setup went as expected. It the consumers states
|
16
|
+
# topic exists and contains needed data, it means all went as expected and that
|
17
|
+
# topics created before it also exist (as no error).
|
12
18
|
def call(replication_factor)
|
13
19
|
consumers_states_topic = ::Karafka::Web.config.topics.consumers.states
|
14
20
|
consumers_metrics_topic = ::Karafka::Web.config.topics.consumers.metrics
|
15
21
|
consumers_reports_topic = ::Karafka::Web.config.topics.consumers.reports
|
16
22
|
errors_topic = ::Karafka::Web.config.topics.errors
|
17
23
|
|
18
|
-
|
19
|
-
|
20
|
-
exists(consumers_states_topic)
|
21
|
-
else
|
22
|
-
creating(consumers_states_topic)
|
23
|
-
# This topic needs to have one partition
|
24
|
-
::Karafka::Admin.create_topic(
|
25
|
-
consumers_states_topic,
|
26
|
-
1,
|
27
|
-
replication_factor,
|
28
|
-
# We care only about the most recent state, previous are irrelevant. So we can easily
|
29
|
-
# compact after one minute. We do not use this beyond the most recent collective
|
30
|
-
# state, hence it all can easily go away. We also limit the segment size to at most
|
31
|
-
# 100MB not to use more space ever.
|
32
|
-
{
|
33
|
-
'cleanup.policy': 'compact',
|
34
|
-
'retention.ms': 60 * 60 * 1_000,
|
35
|
-
'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
|
36
|
-
'segment.bytes': 104_857_600 # 100MB
|
37
|
-
}
|
38
|
-
)
|
39
|
-
created(consumers_states_topic)
|
40
|
-
end
|
41
|
-
|
42
|
-
if existing_topics_names.include?(consumers_metrics_topic)
|
43
|
-
exists(consumers_metrics_topic)
|
24
|
+
if existing_topics_names.include?(errors_topic)
|
25
|
+
exists(errors_topic)
|
44
26
|
else
|
45
|
-
creating(
|
46
|
-
#
|
47
|
-
#
|
27
|
+
creating(errors_topic)
|
28
|
+
# All the errors will be dispatched here
|
29
|
+
# This topic can have multiple partitions but we go with one by default. A single Ruby
|
30
|
+
# process should not crash that often and if there is an expectation of a higher volume
|
31
|
+
# of errors, this can be changed by the end user
|
48
32
|
::Karafka::Admin.create_topic(
|
49
|
-
|
33
|
+
errors_topic,
|
50
34
|
1,
|
51
35
|
replication_factor,
|
36
|
+
# Remove really old errors (older than 3 months just to preserve space)
|
52
37
|
{
|
53
|
-
'
|
54
|
-
'retention.ms': 60 * 60 * 1_000, # 1h
|
55
|
-
'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
|
56
|
-
'segment.bytes': 104_857_600 # 100MB
|
38
|
+
'retention.ms': 3 * 31 * 24 * 60 * 60 * 1_000 # 3 months
|
57
39
|
}
|
58
40
|
)
|
59
|
-
created(
|
41
|
+
created(errors_topic)
|
60
42
|
end
|
61
43
|
|
62
44
|
if existing_topics_names.include?(consumers_reports_topic)
|
@@ -81,24 +63,48 @@ module Karafka
|
|
81
63
|
created(consumers_reports_topic)
|
82
64
|
end
|
83
65
|
|
84
|
-
if existing_topics_names.include?(
|
85
|
-
exists(
|
66
|
+
if existing_topics_names.include?(consumers_metrics_topic)
|
67
|
+
exists(consumers_metrics_topic)
|
86
68
|
else
|
87
|
-
creating(
|
88
|
-
#
|
89
|
-
#
|
90
|
-
# process should not crash that often and if there is an expectation of a higher volume
|
91
|
-
# of errors, this can be changed by the end user
|
69
|
+
creating(consumers_metrics_topic)
|
70
|
+
# This topic needs to have one partition
|
71
|
+
# Same as states - only most recent is relevant as it is a materialized state
|
92
72
|
::Karafka::Admin.create_topic(
|
93
|
-
|
73
|
+
consumers_metrics_topic,
|
94
74
|
1,
|
95
75
|
replication_factor,
|
96
|
-
# Remove really old errors (older than 3 months just to preserve space)
|
97
76
|
{
|
98
|
-
'
|
77
|
+
'cleanup.policy': 'compact',
|
78
|
+
'retention.ms': 60 * 60 * 1_000, # 1h
|
79
|
+
'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
|
80
|
+
'segment.bytes': 104_857_600 # 100MB
|
99
81
|
}
|
100
82
|
)
|
101
|
-
created(
|
83
|
+
created(consumers_metrics_topic)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Create only if needed
|
87
|
+
if existing_topics_names.include?(consumers_states_topic)
|
88
|
+
exists(consumers_states_topic)
|
89
|
+
else
|
90
|
+
creating(consumers_states_topic)
|
91
|
+
# This topic needs to have one partition
|
92
|
+
::Karafka::Admin.create_topic(
|
93
|
+
consumers_states_topic,
|
94
|
+
1,
|
95
|
+
replication_factor,
|
96
|
+
# We care only about the most recent state, previous are irrelevant. So we can easily
|
97
|
+
# compact after one minute. We do not use this beyond the most recent collective
|
98
|
+
# state, hence it all can easily go away. We also limit the segment size to at most
|
99
|
+
# 100MB not to use more space ever.
|
100
|
+
{
|
101
|
+
'cleanup.policy': 'compact',
|
102
|
+
'retention.ms': 60 * 60 * 1_000,
|
103
|
+
'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
|
104
|
+
'segment.bytes': 104_857_600 # 100MB
|
105
|
+
}
|
106
|
+
)
|
107
|
+
created(consumers_states_topic)
|
102
108
|
end
|
103
109
|
end
|
104
110
|
|
@@ -26,6 +26,7 @@ module Karafka
|
|
26
26
|
|
27
27
|
# We set this that way so we report with first batch and so we report as fast as possible
|
28
28
|
@flushed_at = monotonic_now - @flush_interval
|
29
|
+
@established = false
|
29
30
|
end
|
30
31
|
|
31
32
|
# Aggregates consumers state into a single current state representation
|
@@ -34,10 +35,24 @@ module Karafka
|
|
34
35
|
|
35
36
|
# If there is even one incompatible message, we need to stop
|
36
37
|
consumers_messages.each do |message|
|
37
|
-
|
38
|
+
case @schema_manager.call(message)
|
39
|
+
when :current
|
40
|
+
true
|
41
|
+
when :newer
|
42
|
+
@schema_manager.invalidate!
|
43
|
+
|
38
44
|
dispatch
|
39
45
|
|
40
46
|
raise ::Karafka::Web::Errors::Processing::IncompatibleSchemaError
|
47
|
+
# Older reports mean someone is in the middle of upgrade. Schema change related
|
48
|
+
# upgrades always should happen without a rolling-upgrade, hence we can reject those
|
49
|
+
# requests without significant or any impact on data quality but without having to
|
50
|
+
# worry about backwards compatibility. Errors are tracked independently, so it should
|
51
|
+
# not be a problem.
|
52
|
+
when :older
|
53
|
+
next
|
54
|
+
else
|
55
|
+
raise ::Karafka::Errors::UnsupportedCaseError
|
41
56
|
end
|
42
57
|
|
43
58
|
# We need to run the aggregations on each message in order to compensate for
|
@@ -45,6 +60,10 @@ module Karafka
|
|
45
60
|
@state_aggregator.add(message.payload, message.offset)
|
46
61
|
@metrics_aggregator.add_report(message.payload)
|
47
62
|
@metrics_aggregator.add_stats(@state_aggregator.stats)
|
63
|
+
# Indicates that we had at least one report we used to enrich data
|
64
|
+
# If there were no state changes, there is no reason to flush data. This can occur
|
65
|
+
# when we had some messages but we skipped them for any reason on a first run
|
66
|
+
@established = true
|
48
67
|
|
49
68
|
# Optimize memory usage in pro
|
50
69
|
message.clean! if Karafka.pro?
|
@@ -59,17 +78,15 @@ module Karafka
|
|
59
78
|
|
60
79
|
# Flush final state on shutdown
|
61
80
|
def shutdown
|
62
|
-
|
63
|
-
|
64
|
-
materialize
|
65
|
-
validate!
|
66
|
-
flush
|
81
|
+
dispatch
|
67
82
|
end
|
68
83
|
|
69
84
|
private
|
70
85
|
|
71
86
|
# Flushes the state of the Web-UI to the DB
|
72
87
|
def dispatch
|
88
|
+
return unless @established
|
89
|
+
|
73
90
|
materialize
|
74
91
|
validate!
|
75
92
|
flush
|
@@ -86,65 +86,75 @@ module Karafka
|
|
86
86
|
|
87
87
|
# Materializes the current state of consumers group data
|
88
88
|
#
|
89
|
-
# At the moment we report only topics lags but the format we are using supports
|
90
|
-
# extending this information in the future if it would be needed.
|
91
|
-
#
|
92
89
|
# @return [Hash] hash with nested consumers and their topics details structure
|
93
90
|
# @note We do **not** report on a per partition basis because it would significantly
|
94
91
|
# increase needed storage.
|
95
92
|
def materialize_consumers_groups_current_state
|
96
93
|
cgs = {}
|
97
94
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
95
|
+
iterate_partitions_data do |group_name, topic_name, partitions_data|
|
96
|
+
lags = partitions_data
|
97
|
+
.map { |p_details| p_details.fetch(:lag, -1) }
|
98
|
+
.reject(&:negative?)
|
99
|
+
|
100
|
+
lags_stored = partitions_data
|
101
|
+
.map { |p_details| p_details.fetch(:lag_stored, -1) }
|
102
|
+
.reject(&:negative?)
|
103
103
|
|
104
|
-
|
105
|
-
.map { |p_details| p_details
|
104
|
+
offsets_hi = partitions_data
|
105
|
+
.map { |p_details| p_details.fetch(:hi_offset, -1) }
|
106
106
|
.reject(&:negative?)
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
108
|
+
# Last stable offsets freeze durations - we pick the max freeze to indicate
|
109
|
+
# the longest open transaction that potentially may be hanging
|
110
|
+
ls_offsets_fd = partitions_data
|
111
|
+
.map { |p_details| p_details.fetch(:ls_offset_fd, 0) }
|
112
|
+
.reject(&:negative?)
|
113
|
+
|
114
|
+
cgs[group_name] ||= {}
|
115
|
+
cgs[group_name][topic_name] = {
|
116
|
+
lag_stored: lags_stored.sum,
|
117
|
+
lag: lags.sum,
|
118
|
+
pace: offsets_hi.sum,
|
119
|
+
# Take max last stable offset duration without any change. This can
|
120
|
+
# indicate a hanging transaction, because the offset will not move forward
|
121
|
+
# and will stay with a growing freeze duration when stuck
|
122
|
+
ls_offset_fd: ls_offsets_fd.max || 0
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
cgs
|
127
|
+
end
|
128
|
+
|
129
|
+
# Converts our reports data into an iterator per partition
|
130
|
+
# Compensates for a case where same partition data would be available for a short
|
131
|
+
# period of time in multiple processes reports due to rebalances.
|
132
|
+
def iterate_partitions_data
|
133
|
+
cgs_topics = Hash.new { |h, v| h[v] = Hash.new { |h2, v2| h2[v2] = {} } }
|
134
|
+
|
135
|
+
# We need to sort them in case we have same reports containing data about same
|
136
|
+
# topics partitions. Mostly during shutdowns and rebalances
|
137
|
+
@active_reports
|
138
|
+
.values
|
139
|
+
.sort_by { |report| report.fetch(:dispatched_at) }
|
140
|
+
.map { |details| details.fetch(:consumer_groups) }
|
141
|
+
.each do |consumer_groups|
|
142
|
+
consumer_groups.each do |group_name, group_details|
|
143
|
+
group_details.fetch(:subscription_groups).each_value do |sg_details|
|
144
|
+
sg_details.fetch(:topics).each do |topic_name, topic_details|
|
145
|
+
topic_details.fetch(:partitions).each do |partition_id, partition_data|
|
146
|
+
cgs_topics[group_name][topic_name][partition_id] = partition_data
|
147
|
+
end
|
148
|
+
end
|
142
149
|
end
|
143
150
|
end
|
144
151
|
end
|
145
|
-
end
|
146
152
|
|
147
|
-
|
153
|
+
cgs_topics.each do |group_name, topics_data|
|
154
|
+
topics_data.each do |topic_name, partitions_data|
|
155
|
+
yield(group_name, topic_name, partitions_data.values)
|
156
|
+
end
|
157
|
+
end
|
148
158
|
end
|
149
159
|
end
|
150
160
|
end
|
@@ -20,6 +20,10 @@ module Karafka
|
|
20
20
|
return metrics_message.payload if metrics_message
|
21
21
|
|
22
22
|
raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsError)
|
23
|
+
rescue Rdkafka::RdkafkaError => e
|
24
|
+
raise(e) unless e.code == :unknown_partition
|
25
|
+
|
26
|
+
raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsTopicError)
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|
@@ -27,12 +27,8 @@ module Karafka
|
|
27
27
|
end
|
28
28
|
|
29
29
|
# @param message [Karafka::Messages::Message] consumer report
|
30
|
-
# @return [
|
31
|
-
|
32
|
-
# @note The state switch is one-direction only. If we encounter an incompatible message
|
33
|
-
# we need to stop processing so further checks even with valid should not switch it
|
34
|
-
# back to valid
|
35
|
-
def compatible?(message)
|
30
|
+
# @return [Symbol] is the given message using older, newer or current schema
|
31
|
+
def call(message)
|
36
32
|
schema_version = message.payload[:schema_version]
|
37
33
|
|
38
34
|
# Save on memory allocation by reusing
|
@@ -40,8 +36,19 @@ module Karafka
|
|
40
36
|
# an object with each message
|
41
37
|
message_version = @cache[schema_version] ||= ::Gem::Version.new(schema_version)
|
42
38
|
|
43
|
-
return
|
39
|
+
return :older if message_version < CURRENT_VERSION
|
40
|
+
return :newer if message_version > CURRENT_VERSION
|
41
|
+
|
42
|
+
:current
|
43
|
+
end
|
44
44
|
|
45
|
+
# Moves the schema manager state to incompatible to indicate in the Web-UI that we
|
46
|
+
# cannot move forward because schema is incompatible.
|
47
|
+
#
|
48
|
+
# @note The state switch is one-direction only. If we encounter an incompatible message
|
49
|
+
# we need to stop processing so further checks even with valid should not switch it
|
50
|
+
# back to valid
|
51
|
+
def invalidate!
|
45
52
|
@valid = false
|
46
53
|
end
|
47
54
|
|
@@ -20,6 +20,10 @@ module Karafka
|
|
20
20
|
return state_message.payload if state_message
|
21
21
|
|
22
22
|
raise(::Karafka::Web::Errors::Processing::MissingConsumersStateError)
|
23
|
+
rescue Rdkafka::RdkafkaError => e
|
24
|
+
raise(e) unless e.code == :unknown_partition
|
25
|
+
|
26
|
+
raise(::Karafka::Web::Errors::Processing::MissingConsumersStatesTopicError)
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|