karafka-web 0.7.3 → 0.7.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +10 -4
  4. data/CHANGELOG.md +14 -0
  5. data/Gemfile.lock +3 -3
  6. data/bin/wait_for_kafka +24 -0
  7. data/docker-compose.yml +17 -16
  8. data/karafka-web.gemspec +1 -1
  9. data/lib/karafka/web/errors.rb +10 -1
  10. data/lib/karafka/web/installer.rb +27 -2
  11. data/lib/karafka/web/management/create_topics.rb +52 -46
  12. data/lib/karafka/web/processing/consumer.rb +23 -6
  13. data/lib/karafka/web/processing/consumers/aggregators/metrics.rb +56 -46
  14. data/lib/karafka/web/processing/consumers/metrics.rb +4 -0
  15. data/lib/karafka/web/processing/consumers/schema_manager.rb +14 -7
  16. data/lib/karafka/web/processing/consumers/state.rb +4 -0
  17. data/lib/karafka/web/processing/time_series_tracker.rb +4 -1
  18. data/lib/karafka/web/tracking/consumers/sampler.rb +11 -2
  19. data/lib/karafka/web/ui/app.rb +1 -1
  20. data/lib/karafka/web/ui/base.rb +1 -1
  21. data/lib/karafka/web/ui/helpers/application_helper.rb +3 -2
  22. data/lib/karafka/web/ui/models/health.rb +5 -1
  23. data/lib/karafka/web/ui/pro/app.rb +1 -1
  24. data/lib/karafka/web/ui/pro/views/consumers/_counters.erb +24 -8
  25. data/lib/karafka/web/ui/pro/views/consumers/consumer/_partition.erb +0 -3
  26. data/lib/karafka/web/ui/pro/views/consumers/consumer/_subscription_group.erb +40 -34
  27. data/lib/karafka/web/ui/public/javascripts/bootstrap.min.js +0 -1
  28. data/lib/karafka/web/ui/public/javascripts/chart.min.js +0 -1
  29. data/lib/karafka/web/ui/public/javascripts/timeago.min.js +5 -0
  30. data/lib/karafka/web/ui/public/stylesheets/bootstrap.min.css +0 -1
  31. data/lib/karafka/web/ui/views/consumers/_counters.erb +21 -7
  32. data/lib/karafka/web/ui/views/shared/_header.erb +1 -1
  33. data/lib/karafka/web/version.rb +1 -1
  34. data.tar.gz.sig +0 -0
  35. metadata +5 -5
  36. metadata.gz.sig +0 -0
  37. data/lib/karafka/web/ui/public/stylesheets/bootstrap.min.css.map +0 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc9572e3dfbb4565361fdbc9521fb7a9a7833664f42bd8e9f0e63e940a397034
4
- data.tar.gz: 07fb369bcd0d1aa6c80ef20ff96289989c38ff934a1eb795ab85f39b7adf1228
3
+ metadata.gz: caf2303a2e877dd3f06b738974c4093f7a9cf1bdb24b1152183ba9173432b640
4
+ data.tar.gz: 80b56d4efec23d589adb731c18e5b7137718862edc246fb2c32ff0baaadb84a5
5
5
  SHA512:
6
- metadata.gz: 51bc23c17be963c3b19143aa8034046c0bd85df4251dc9e2e911a8ab28ae194d9ae62e2ab1d3aa0b3e2f8dba33649ad51881d9e96fe669fffbdc971f406ce3eb
7
- data.tar.gz: 332247644ed57d25687af816ed850f7847b6eb8e63646bf492460424f0d5855c8dace2f1127d0d84631bffa8f6a9fb71e42a64ece7141a6acb96d5b5f987896c
6
+ metadata.gz: 0b28806d2442202259c3c7c4cf3e9d2193e47c2a6b2113f31a5906019f94f4fb2f342ef170721e63ff8ff0e037f1c574509b9be0a98f0c3b8b5b9ff289bd4fbc
7
+ data.tar.gz: 9c6b42c67d4f305517e42c7446278ba7f5a65f7ad9c4ac52657ebc3231fe92619e6d2b6a35b857bb03b0e29568cfeb596ff4153bcd023ee30e0e235e0917af99
checksums.yaml.gz.sig CHANGED
Binary file
@@ -20,6 +20,7 @@ jobs:
20
20
  fail-fast: false
21
21
  matrix:
22
22
  ruby:
23
+ - '3.3.0-preview2'
23
24
  - '3.2'
24
25
  - '3.1'
25
26
  - '3.0'
@@ -28,18 +29,19 @@ jobs:
28
29
  - ruby: '3.2'
29
30
  coverage: 'true'
30
31
  steps:
31
- - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
32
+ - uses: actions/checkout@v4
32
33
  - name: Install package dependencies
33
34
  run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
34
35
 
35
36
  - name: Start Kafka with docker-compose
36
37
  run: |
37
- docker-compose up -d
38
+ docker-compose up -d || (sleep 5 && docker-compose up -d)
38
39
 
39
40
  - name: Set up Ruby
40
41
  uses: ruby/setup-ruby@v1
41
42
  with:
42
43
  ruby-version: ${{matrix.ruby}}
44
+ bundler-cache: true
43
45
 
44
46
  - name: Install latest bundler
45
47
  run: |
@@ -51,6 +53,10 @@ jobs:
51
53
  bundle config set without development
52
54
  bundle install --jobs 4 --retry 3
53
55
 
56
+ - name: Wait for Kafka
57
+ run: |
58
+ bundle exec bin/wait_for_kafka
59
+
54
60
  - name: Run all tests
55
61
  env:
56
62
  GITHUB_COVERAGE: ${{matrix.coverage}}
@@ -62,7 +68,7 @@ jobs:
62
68
  strategy:
63
69
  fail-fast: false
64
70
  steps:
65
- - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
71
+ - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
66
72
  with:
67
73
  fetch-depth: 0
68
74
 
@@ -83,7 +89,7 @@ jobs:
83
89
  strategy:
84
90
  fail-fast: false
85
91
  steps:
86
- - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
92
+ - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
87
93
  with:
88
94
  fetch-depth: 0
89
95
  - name: Run Coditsu
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Karafka Web changelog
2
2
 
3
+ ## 0.7.5 (2023-09-29)
4
+ - [Enhancement] Update order of topics creation for the setup of Web to support zero-downtime setup of Web in running Karafka projects.
5
+ - [Enhancement] Add space delimiter to counters numbers to make them look better.
6
+ - [Improvement] Normalize per-process job tables and health tables structure (topic name on top).
7
+ - [Fix] Fix a case where charts aggregated data would not include all topics.
8
+ - [Fix] Make sure, that most recent per partition data for Health is never overwritten by an old state from a previous partition owner.
9
+ - [Fix] Cache assets for 1 year instead of 7 days.
10
+ - [Fix] Remove source maps pointing to non-existing locations.
11
+ - [Maintenance] Include license and copyrights notice for `timeago.js` that was missing in the JS min file.
12
+
13
+ ## 0.7.4 (2023-09-19)
14
+ - [Improvement] Skip aggregations on older schemas during upgrades. This only skips process-reports (that are going to be rolled) on the 5s window in case of an upgrade that should not be a rolling one anyhow. This simplifies the operations and minimizes the risk on breaking upgrades.
15
+ - [Fix] Fix not working `ps` for macOS.
16
+
3
17
  ## 0.7.3 (2023-09-18)
4
18
  - [Improvement] Mitigate a case where a race-condition during upgrade would crash data.
5
19
 
data/Gemfile.lock CHANGED
@@ -1,9 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka-web (0.7.3)
4
+ karafka-web (0.7.5)
5
5
  erubi (~> 1.4)
6
- karafka (>= 2.2.3, < 3.0.0)
6
+ karafka (>= 2.2.6, < 3.0.0)
7
7
  karafka-core (>= 2.2.2, < 3.0.0)
8
8
  roda (~> 3.68, >= 3.69)
9
9
  tilt (~> 2.0)
@@ -26,7 +26,7 @@ GEM
26
26
  ffi (1.15.5)
27
27
  i18n (1.14.1)
28
28
  concurrent-ruby (~> 1.0)
29
- karafka (2.2.3)
29
+ karafka (2.2.6)
30
30
  karafka-core (>= 2.2.2, < 2.3.0)
31
31
  thor (>= 0.20)
32
32
  waterdrop (>= 2.6.6, < 3.0.0)
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Waits for Kafka to be ready
4
+ # Useful in CI where Kafka needs to be fully started before we run any tests
5
+
6
+ require 'karafka'
7
+
8
+ Karafka::App.setup do |config|
9
+ config.kafka[:'bootstrap.servers'] = '127.0.0.1:9092'
10
+ end
11
+
12
+ 60.times do
13
+ begin
14
+ # Stop if we can connect to the cluster and get info
15
+ exit if Karafka::Admin.cluster_info
16
+ rescue Rdkafka::RdkafkaError
17
+ puts "Kafka not available, retrying..."
18
+ sleep(1)
19
+ end
20
+ end
21
+
22
+ puts 'Kafka not available!'
23
+
24
+ exit 1
data/docker-compose.yml CHANGED
@@ -1,22 +1,23 @@
1
1
  version: '2'
2
- services:
3
- zookeeper:
4
- container_name: karafka_web_21_zookeeper
5
- image: wurstmeister/zookeeper
6
- restart: on-failure
7
- ports:
8
- - '2181:2181'
9
2
 
3
+ services:
10
4
  kafka:
11
- container_name: karafka_web_21_kafka
12
- image: wurstmeister/kafka
5
+ container_name: kafka
6
+ image: confluentinc/cp-kafka:7.5.0
7
+
13
8
  ports:
14
- - '9092:9092'
9
+ - 9092:9092
10
+
15
11
  environment:
16
- KAFKA_ADVERTISED_HOST_NAME: localhost
17
- KAFKA_ADVERTISED_PORT: 9092
18
- KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
12
+ CLUSTER_ID: kafka-docker-cluster-1
13
+ KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
14
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
15
+ KAFKA_PROCESS_ROLES: broker,controller
16
+ KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
17
+ KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093
18
+ KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
19
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092
20
+ KAFKA_BROKER_ID: 1
21
+ KAFKA_CONTROLLER_QUORUM_VOTERS: 1@127.0.0.1:9093
22
+ ALLOW_PLAINTEXT_LISTENER: 'yes'
19
23
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
20
- volumes:
21
- - /var/run/docker.sock:/var/run/docker.sock
22
- restart: on-failure
data/karafka-web.gemspec CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.licenses = %w[LGPL-3.0 Commercial]
18
18
 
19
19
  spec.add_dependency 'erubi', '~> 1.4'
20
- spec.add_dependency 'karafka', '>= 2.2.3', '< 3.0.0'
20
+ spec.add_dependency 'karafka', '>= 2.2.6', '< 3.0.0'
21
21
  spec.add_dependency 'karafka-core', '>= 2.2.2', '< 3.0.0'
22
22
  spec.add_dependency 'roda', '~> 3.68', '>= 3.69'
23
23
  spec.add_dependency 'tilt', '~> 2.0'
@@ -17,9 +17,18 @@ module Karafka
17
17
  # If you see this error, it probably means, that you did not bootstrap Web-UI correctly
18
18
  MissingConsumersStateError = Class.new(BaseError)
19
19
 
20
- # Similar to the above. It should be created during install
20
+ # Raised when we try to materialize the state but the consumers states topic does not
21
+ # exist and we do not have a way to get the initial state.
22
+ # It differs from the above because above indicates that the topic exists but that there
23
+ # is no initial state, while this indicates, that there is no consumers states topic.
24
+ MissingConsumersStatesTopicError = Class.new(BaseError)
25
+
26
+ # Similar to the above. It should be created during install / migration
21
27
  MissingConsumersMetricsError = Class.new(BaseError)
22
28
 
29
+ # Similar to the one related to consumers states
30
+ MissingConsumersMetricsTopicError = Class.new(BaseError)
31
+
23
32
  # This error occurs when consumer running older version of the web-ui tries to materialize
24
33
  # states from newer versions. Karafka Web-UI provides only backwards compatibility, so
25
34
  # you need to have an up-to-date consumer materializing reported states.
@@ -18,7 +18,7 @@ module Karafka
18
18
  puts 'Creating necessary topics and populating state data...'
19
19
  puts
20
20
  Management::CreateTopics.new.call(replication_factor)
21
- puts
21
+ wait_for_topics
22
22
  Management::CreateInitialStates.new.call
23
23
  puts
24
24
  Management::ExtendBootFile.new.call
@@ -36,6 +36,7 @@ module Karafka
36
36
  puts 'Creating necessary topics and populating state data...'
37
37
  puts
38
38
  Management::CreateTopics.new.call(replication_factor)
39
+ wait_for_topics
39
40
  Management::CreateInitialStates.new.call
40
41
  puts
41
42
  puts("Migration #{green('completed')}. Have fun!")
@@ -51,7 +52,7 @@ module Karafka
51
52
  Management::DeleteTopics.new.call
52
53
  puts
53
54
  Management::CreateTopics.new.call(replication_factor)
54
- puts
55
+ wait_for_topics
55
56
  Management::CreateInitialStates.new.call
56
57
  puts
57
58
  puts("Resetting #{green('completed')}. Have fun!")
@@ -74,6 +75,30 @@ module Karafka
74
75
  def enable!
75
76
  Management::Enable.new.call
76
77
  end
78
+
79
+ private
80
+
81
+ # Waits with a message, that we are waiting on topics
82
+ # This is not doing much, just waiting as there are some cases that it takes a bit of time
83
+ # for Kafka to actually propagate new topics knowledge across the cluster. We give it that
84
+ # bit of time just in case.
85
+ def wait_for_topics
86
+ puts
87
+ print 'Waiting for the topics to synchronize in the cluster'
88
+ wait(5)
89
+ puts
90
+ end
91
+
92
+ # Waits for given number of seconds and prints `.` every second.
93
+ # @param time_in_seconds [Integer] time of wait
94
+ def wait(time_in_seconds)
95
+ time_in_seconds.times do
96
+ sleep(1)
97
+ print '.'
98
+ end
99
+
100
+ print "\n"
101
+ end
77
102
  end
78
103
  end
79
104
  end
@@ -9,54 +9,36 @@ module Karafka
9
9
  # Runs the creation process
10
10
  #
11
11
  # @param replication_factor [Integer] replication factor for Web-UI topics
12
+ #
13
+ # @note The order of creation of those topics is important. In order to support the
14
+ # zero-downtime bootstrap, we use the presence of the states topic and its initial state
15
+ # existence as an indicator that the setup went as expected. It the consumers states
16
+ # topic exists and contains needed data, it means all went as expected and that
17
+ # topics created before it also exist (as no error).
12
18
  def call(replication_factor)
13
19
  consumers_states_topic = ::Karafka::Web.config.topics.consumers.states
14
20
  consumers_metrics_topic = ::Karafka::Web.config.topics.consumers.metrics
15
21
  consumers_reports_topic = ::Karafka::Web.config.topics.consumers.reports
16
22
  errors_topic = ::Karafka::Web.config.topics.errors
17
23
 
18
- # Create only if needed
19
- if existing_topics_names.include?(consumers_states_topic)
20
- exists(consumers_states_topic)
21
- else
22
- creating(consumers_states_topic)
23
- # This topic needs to have one partition
24
- ::Karafka::Admin.create_topic(
25
- consumers_states_topic,
26
- 1,
27
- replication_factor,
28
- # We care only about the most recent state, previous are irrelevant. So we can easily
29
- # compact after one minute. We do not use this beyond the most recent collective
30
- # state, hence it all can easily go away. We also limit the segment size to at most
31
- # 100MB not to use more space ever.
32
- {
33
- 'cleanup.policy': 'compact',
34
- 'retention.ms': 60 * 60 * 1_000,
35
- 'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
36
- 'segment.bytes': 104_857_600 # 100MB
37
- }
38
- )
39
- created(consumers_states_topic)
40
- end
41
-
42
- if existing_topics_names.include?(consumers_metrics_topic)
43
- exists(consumers_metrics_topic)
24
+ if existing_topics_names.include?(errors_topic)
25
+ exists(errors_topic)
44
26
  else
45
- creating(consumers_metrics_topic)
46
- # This topic needs to have one partition
47
- # Same as states - only most recent is relevant as it is a materialized state
27
+ creating(errors_topic)
28
+ # All the errors will be dispatched here
29
+ # This topic can have multiple partitions but we go with one by default. A single Ruby
30
+ # process should not crash that often and if there is an expectation of a higher volume
31
+ # of errors, this can be changed by the end user
48
32
  ::Karafka::Admin.create_topic(
49
- consumers_metrics_topic,
33
+ errors_topic,
50
34
  1,
51
35
  replication_factor,
36
+ # Remove really old errors (older than 3 months just to preserve space)
52
37
  {
53
- 'cleanup.policy': 'compact',
54
- 'retention.ms': 60 * 60 * 1_000, # 1h
55
- 'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
56
- 'segment.bytes': 104_857_600 # 100MB
38
+ 'retention.ms': 3 * 31 * 24 * 60 * 60 * 1_000 # 3 months
57
39
  }
58
40
  )
59
- created(consumers_metrics_topic)
41
+ created(errors_topic)
60
42
  end
61
43
 
62
44
  if existing_topics_names.include?(consumers_reports_topic)
@@ -81,24 +63,48 @@ module Karafka
81
63
  created(consumers_reports_topic)
82
64
  end
83
65
 
84
- if existing_topics_names.include?(errors_topic)
85
- exists(errors_topic)
66
+ if existing_topics_names.include?(consumers_metrics_topic)
67
+ exists(consumers_metrics_topic)
86
68
  else
87
- creating(errors_topic)
88
- # All the errors will be dispatched here
89
- # This topic can have multiple partitions but we go with one by default. A single Ruby
90
- # process should not crash that often and if there is an expectation of a higher volume
91
- # of errors, this can be changed by the end user
69
+ creating(consumers_metrics_topic)
70
+ # This topic needs to have one partition
71
+ # Same as states - only most recent is relevant as it is a materialized state
92
72
  ::Karafka::Admin.create_topic(
93
- errors_topic,
73
+ consumers_metrics_topic,
94
74
  1,
95
75
  replication_factor,
96
- # Remove really old errors (older than 3 months just to preserve space)
97
76
  {
98
- 'retention.ms': 3 * 31 * 24 * 60 * 60 * 1_000 # 3 months
77
+ 'cleanup.policy': 'compact',
78
+ 'retention.ms': 60 * 60 * 1_000, # 1h
79
+ 'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
80
+ 'segment.bytes': 104_857_600 # 100MB
99
81
  }
100
82
  )
101
- created(errors_topic)
83
+ created(consumers_metrics_topic)
84
+ end
85
+
86
+ # Create only if needed
87
+ if existing_topics_names.include?(consumers_states_topic)
88
+ exists(consumers_states_topic)
89
+ else
90
+ creating(consumers_states_topic)
91
+ # This topic needs to have one partition
92
+ ::Karafka::Admin.create_topic(
93
+ consumers_states_topic,
94
+ 1,
95
+ replication_factor,
96
+ # We care only about the most recent state, previous are irrelevant. So we can easily
97
+ # compact after one minute. We do not use this beyond the most recent collective
98
+ # state, hence it all can easily go away. We also limit the segment size to at most
99
+ # 100MB not to use more space ever.
100
+ {
101
+ 'cleanup.policy': 'compact',
102
+ 'retention.ms': 60 * 60 * 1_000,
103
+ 'segment.ms': 24 * 60 * 60 * 1_000, # 1 day
104
+ 'segment.bytes': 104_857_600 # 100MB
105
+ }
106
+ )
107
+ created(consumers_states_topic)
102
108
  end
103
109
  end
104
110
 
@@ -26,6 +26,7 @@ module Karafka
26
26
 
27
27
  # We set this that way so we report with first batch and so we report as fast as possible
28
28
  @flushed_at = monotonic_now - @flush_interval
29
+ @established = false
29
30
  end
30
31
 
31
32
  # Aggregates consumers state into a single current state representation
@@ -34,10 +35,24 @@ module Karafka
34
35
 
35
36
  # If there is even one incompatible message, we need to stop
36
37
  consumers_messages.each do |message|
37
- unless @schema_manager.compatible?(message)
38
+ case @schema_manager.call(message)
39
+ when :current
40
+ true
41
+ when :newer
42
+ @schema_manager.invalidate!
43
+
38
44
  dispatch
39
45
 
40
46
  raise ::Karafka::Web::Errors::Processing::IncompatibleSchemaError
47
+ # Older reports mean someone is in the middle of upgrade. Schema change related
48
+ # upgrades always should happen without a rolling-upgrade, hence we can reject those
49
+ # requests without significant or any impact on data quality but without having to
50
+ # worry about backwards compatibility. Errors are tracked independently, so it should
51
+ # not be a problem.
52
+ when :older
53
+ next
54
+ else
55
+ raise ::Karafka::Errors::UnsupportedCaseError
41
56
  end
42
57
 
43
58
  # We need to run the aggregations on each message in order to compensate for
@@ -45,6 +60,10 @@ module Karafka
45
60
  @state_aggregator.add(message.payload, message.offset)
46
61
  @metrics_aggregator.add_report(message.payload)
47
62
  @metrics_aggregator.add_stats(@state_aggregator.stats)
63
+ # Indicates that we had at least one report we used to enrich data
64
+ # If there were no state changes, there is no reason to flush data. This can occur
65
+ # when we had some messages but we skipped them for any reason on a first run
66
+ @established = true
48
67
 
49
68
  # Optimize memory usage in pro
50
69
  message.clean! if Karafka.pro?
@@ -59,17 +78,15 @@ module Karafka
59
78
 
60
79
  # Flush final state on shutdown
61
80
  def shutdown
62
- return unless @state_aggregator
63
-
64
- materialize
65
- validate!
66
- flush
81
+ dispatch
67
82
  end
68
83
 
69
84
  private
70
85
 
71
86
  # Flushes the state of the Web-UI to the DB
72
87
  def dispatch
88
+ return unless @established
89
+
73
90
  materialize
74
91
  validate!
75
92
  flush
@@ -86,65 +86,75 @@ module Karafka
86
86
 
87
87
  # Materializes the current state of consumers group data
88
88
  #
89
- # At the moment we report only topics lags but the format we are using supports
90
- # extending this information in the future if it would be needed.
91
- #
92
89
  # @return [Hash] hash with nested consumers and their topics details structure
93
90
  # @note We do **not** report on a per partition basis because it would significantly
94
91
  # increase needed storage.
95
92
  def materialize_consumers_groups_current_state
96
93
  cgs = {}
97
94
 
98
- @active_reports.each do |_, details|
99
- details.fetch(:consumer_groups).each do |group_name, group_details|
100
- group_details.fetch(:subscription_groups).each do |_sg_name, sg_details|
101
- sg_details.fetch(:topics).each do |topic_name, topic_details|
102
- partitions_data = topic_details.fetch(:partitions).values
95
+ iterate_partitions_data do |group_name, topic_name, partitions_data|
96
+ lags = partitions_data
97
+ .map { |p_details| p_details.fetch(:lag, -1) }
98
+ .reject(&:negative?)
99
+
100
+ lags_stored = partitions_data
101
+ .map { |p_details| p_details.fetch(:lag_stored, -1) }
102
+ .reject(&:negative?)
103
103
 
104
- lags = partitions_data
105
- .map { |p_details| p_details[:lag] || 0 }
104
+ offsets_hi = partitions_data
105
+ .map { |p_details| p_details.fetch(:hi_offset, -1) }
106
106
  .reject(&:negative?)
107
107
 
108
- lags_stored = partitions_data
109
- .map { |p_details| p_details.fetch(:lag_stored, -1) }
110
- .reject(&:negative?)
111
-
112
- offsets_hi = partitions_data
113
- .map { |p_details| p_details.fetch(:hi_offset, -1) }
114
- .reject(&:negative?)
115
-
116
- # Last stable offsets freeze durations - we pick the max freeze to indicate
117
- # the longest open transaction that potentially may be hanging
118
- ls_offsets_fd = partitions_data
119
- .map { |p_details| p_details.fetch(:ls_offset_fd, 0) }
120
- .reject(&:negative?)
121
-
122
- # If there is no lag that would not be negative, it means we did not mark
123
- # any messages as consumed on this topic in any partitions, hence we cannot
124
- # compute lag easily
125
- # We do not want to initialize any data for this topic, when there is nothing
126
- # useful we could present
127
- #
128
- # In theory lag stored must mean that lag must exist but just to be sure we
129
- # check both here
130
- next if lags.empty? || lags_stored.empty?
131
-
132
- cgs[group_name] ||= {}
133
- cgs[group_name][topic_name] = {
134
- lag_stored: lags_stored.sum,
135
- lag: lags.sum,
136
- pace: offsets_hi.sum,
137
- # Take max last stable offset duration without any change. This can
138
- # indicate a hanging transaction, because the offset will not move forward
139
- # and will stay with a growing freeze duration when stuck
140
- ls_offset_fd: ls_offsets_fd.max
141
- }
108
+ # Last stable offsets freeze durations - we pick the max freeze to indicate
109
+ # the longest open transaction that potentially may be hanging
110
+ ls_offsets_fd = partitions_data
111
+ .map { |p_details| p_details.fetch(:ls_offset_fd, 0) }
112
+ .reject(&:negative?)
113
+
114
+ cgs[group_name] ||= {}
115
+ cgs[group_name][topic_name] = {
116
+ lag_stored: lags_stored.sum,
117
+ lag: lags.sum,
118
+ pace: offsets_hi.sum,
119
+ # Take max last stable offset duration without any change. This can
120
+ # indicate a hanging transaction, because the offset will not move forward
121
+ # and will stay with a growing freeze duration when stuck
122
+ ls_offset_fd: ls_offsets_fd.max || 0
123
+ }
124
+ end
125
+
126
+ cgs
127
+ end
128
+
129
+ # Converts our reports data into an iterator per partition
130
+ # Compensates for a case where same partition data would be available for a short
131
+ # period of time in multiple processes reports due to rebalances.
132
+ def iterate_partitions_data
133
+ cgs_topics = Hash.new { |h, v| h[v] = Hash.new { |h2, v2| h2[v2] = {} } }
134
+
135
+ # We need to sort them in case we have same reports containing data about same
136
+ # topics partitions. Mostly during shutdowns and rebalances
137
+ @active_reports
138
+ .values
139
+ .sort_by { |report| report.fetch(:dispatched_at) }
140
+ .map { |details| details.fetch(:consumer_groups) }
141
+ .each do |consumer_groups|
142
+ consumer_groups.each do |group_name, group_details|
143
+ group_details.fetch(:subscription_groups).each_value do |sg_details|
144
+ sg_details.fetch(:topics).each do |topic_name, topic_details|
145
+ topic_details.fetch(:partitions).each do |partition_id, partition_data|
146
+ cgs_topics[group_name][topic_name][partition_id] = partition_data
147
+ end
148
+ end
142
149
  end
143
150
  end
144
151
  end
145
- end
146
152
 
147
- cgs
153
+ cgs_topics.each do |group_name, topics_data|
154
+ topics_data.each do |topic_name, partitions_data|
155
+ yield(group_name, topic_name, partitions_data.values)
156
+ end
157
+ end
148
158
  end
149
159
  end
150
160
  end
@@ -20,6 +20,10 @@ module Karafka
20
20
  return metrics_message.payload if metrics_message
21
21
 
22
22
  raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsError)
23
+ rescue Rdkafka::RdkafkaError => e
24
+ raise(e) unless e.code == :unknown_partition
25
+
26
+ raise(::Karafka::Web::Errors::Processing::MissingConsumersMetricsTopicError)
23
27
  end
24
28
  end
25
29
  end
@@ -27,12 +27,8 @@ module Karafka
27
27
  end
28
28
 
29
29
  # @param message [Karafka::Messages::Message] consumer report
30
- # @return [Boolean] true if all good or false if incompatible
31
- #
32
- # @note The state switch is one-direction only. If we encounter an incompatible message
33
- # we need to stop processing so further checks even with valid should not switch it
34
- # back to valid
35
- def compatible?(message)
30
+ # @return [Symbol] is the given message using older, newer or current schema
31
+ def call(message)
36
32
  schema_version = message.payload[:schema_version]
37
33
 
38
34
  # Save on memory allocation by reusing
@@ -40,8 +36,19 @@ module Karafka
40
36
  # an object with each message
41
37
  message_version = @cache[schema_version] ||= ::Gem::Version.new(schema_version)
42
38
 
43
- return true if message_version <= CURRENT_VERSION
39
+ return :older if message_version < CURRENT_VERSION
40
+ return :newer if message_version > CURRENT_VERSION
41
+
42
+ :current
43
+ end
44
44
 
45
+ # Moves the schema manager state to incompatible to indicate in the Web-UI that we
46
+ # cannot move forward because schema is incompatible.
47
+ #
48
+ # @note The state switch is one-direction only. If we encounter an incompatible message
49
+ # we need to stop processing so further checks even with valid should not switch it
50
+ # back to valid
51
+ def invalidate!
45
52
  @valid = false
46
53
  end
47
54
 
@@ -20,6 +20,10 @@ module Karafka
20
20
  return state_message.payload if state_message
21
21
 
22
22
  raise(::Karafka::Web::Errors::Processing::MissingConsumersStateError)
23
+ rescue Rdkafka::RdkafkaError => e
24
+ raise(e) unless e.code == :unknown_partition
25
+
26
+ raise(::Karafka::Web::Errors::Processing::MissingConsumersStatesTopicError)
23
27
  end
24
28
  end
25
29
  end