karafka 2.4.18 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +59 -15
  4. data/.github/workflows/push.yml +35 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +75 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +72 -53
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +20 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +5 -1
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/avro/.gitkeep +0 -0
  22. data/examples/payloads/json/sample_set_01/enrollment_event.json +579 -0
  23. data/examples/payloads/json/sample_set_01/ingestion_event.json +30 -0
  24. data/examples/payloads/json/sample_set_01/transaction_event.json +17 -0
  25. data/examples/payloads/json/sample_set_01/user_event.json +11 -0
  26. data/karafka.gemspec +3 -8
  27. data/lib/karafka/active_job/current_attributes.rb +1 -1
  28. data/lib/karafka/active_job/job_extensions.rb +4 -1
  29. data/lib/karafka/admin/acl.rb +5 -1
  30. data/lib/karafka/admin/configs.rb +5 -1
  31. data/lib/karafka/admin.rb +89 -42
  32. data/lib/karafka/base_consumer.rb +17 -8
  33. data/lib/karafka/cli/base.rb +8 -2
  34. data/lib/karafka/cli/topics/align.rb +7 -4
  35. data/lib/karafka/cli/topics/base.rb +17 -0
  36. data/lib/karafka/cli/topics/create.rb +9 -7
  37. data/lib/karafka/cli/topics/delete.rb +4 -2
  38. data/lib/karafka/cli/topics/help.rb +39 -0
  39. data/lib/karafka/cli/topics/repartition.rb +4 -2
  40. data/lib/karafka/cli/topics.rb +10 -3
  41. data/lib/karafka/cli.rb +2 -0
  42. data/lib/karafka/connection/client.rb +39 -9
  43. data/lib/karafka/connection/listener.rb +24 -12
  44. data/lib/karafka/connection/messages_buffer.rb +1 -1
  45. data/lib/karafka/connection/proxy.rb +4 -1
  46. data/lib/karafka/constraints.rb +3 -3
  47. data/lib/karafka/contracts/base.rb +3 -2
  48. data/lib/karafka/contracts/config.rb +5 -1
  49. data/lib/karafka/contracts/topic.rb +1 -1
  50. data/lib/karafka/errors.rb +46 -2
  51. data/lib/karafka/helpers/async.rb +3 -1
  52. data/lib/karafka/helpers/interval_runner.rb +8 -0
  53. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  54. data/lib/karafka/instrumentation/logger_listener.rb +95 -32
  55. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  56. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  57. data/lib/karafka/instrumentation/vendors/kubernetes/base_listener.rb +17 -2
  58. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +29 -6
  59. data/lib/karafka/instrumentation/vendors/kubernetes/swarm_liveness_listener.rb +9 -0
  60. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  61. data/lib/karafka/pro/cleaner.rb +8 -0
  62. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  63. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  64. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  65. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  66. data/lib/karafka/pro/connection/manager.rb +5 -8
  67. data/lib/karafka/pro/encryption.rb +12 -1
  68. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  69. data/lib/karafka/pro/iterator/expander.rb +5 -3
  70. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  71. data/lib/karafka/pro/loader.rb +10 -0
  72. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  73. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +32 -3
  74. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  75. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  76. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  77. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  78. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  79. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  80. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  81. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  82. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  83. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  84. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  85. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  88. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  91. data/lib/karafka/pro/processing/strategies/dlq/default.rb +15 -10
  92. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  94. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  95. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  96. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  97. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  98. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  99. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  100. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  101. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  102. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  103. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  104. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  105. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  106. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  107. data/lib/karafka/pro/recurring_tasks.rb +21 -2
  108. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  109. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  110. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  111. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  112. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  113. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  114. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  115. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  116. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  117. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  118. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  119. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  120. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  121. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  122. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +3 -2
  123. data/lib/karafka/pro/routing/features/swarm.rb +4 -1
  124. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  125. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  126. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  127. data/lib/karafka/pro/scheduled_messages/consumer.rb +61 -26
  128. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  129. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  130. data/lib/karafka/pro/scheduled_messages/dispatcher.rb +2 -1
  131. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  132. data/lib/karafka/pro/scheduled_messages/proxy.rb +15 -3
  133. data/lib/karafka/pro/scheduled_messages/serializer.rb +2 -4
  134. data/lib/karafka/pro/scheduled_messages/state.rb +20 -23
  135. data/lib/karafka/pro/scheduled_messages/tracker.rb +34 -8
  136. data/lib/karafka/pro/scheduled_messages.rb +17 -1
  137. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  138. data/lib/karafka/processing/strategies/default.rb +4 -4
  139. data/lib/karafka/routing/builder.rb +12 -3
  140. data/lib/karafka/routing/features/base/expander.rb +8 -2
  141. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  142. data/lib/karafka/routing/subscription_group.rb +1 -1
  143. data/lib/karafka/runner.rb +7 -1
  144. data/lib/karafka/server.rb +21 -18
  145. data/lib/karafka/setup/attributes_map.rb +2 -0
  146. data/lib/karafka/setup/config.rb +40 -7
  147. data/lib/karafka/setup/defaults_injector.rb +26 -1
  148. data/lib/karafka/status.rb +6 -1
  149. data/lib/karafka/swarm/node.rb +31 -0
  150. data/lib/karafka/swarm/supervisor.rb +9 -2
  151. data/lib/karafka/templates/karafka.rb.erb +14 -1
  152. data/lib/karafka/version.rb +1 -1
  153. data/lib/karafka.rb +17 -9
  154. data/renovate.json +14 -2
  155. metadata +41 -40
  156. checksums.yaml.gz.sig +0 -0
  157. data/certs/cert.pem +0 -26
  158. data.tar.gz.sig +0 -0
  159. metadata.gz.sig +0 -0
data/bin/integrations CHANGED
@@ -45,23 +45,29 @@ class Scenario
45
45
  'shutdown/on_hanging_on_shutdown_job_and_a_shutdown_spec.rb' => [2].freeze,
46
46
  'shutdown/on_hanging_listener_and_shutdown_spec.rb' => [2].freeze,
47
47
  'swarm/forceful_shutdown_of_hanging_spec.rb' => [2].freeze,
48
+ 'swarm/with_blocking_at_exit_spec.rb' => [2].freeze,
48
49
  'instrumentation/post_errors_instrumentation_error_spec.rb' => [1].freeze,
49
50
  'cli/declaratives/delete/existing_with_exit_code_spec.rb' => [2].freeze,
50
51
  'cli/declaratives/create/new_with_exit_code_spec.rb' => [2].freeze,
51
- 'cli/declaratives/plan/when_changes_with_detailed_exit_code_spec.rb' => [2].freeze
52
+ 'cli/declaratives/plan/when_changes_with_detailed_exit_code_spec.rb' => [2].freeze,
53
+ 'cli/declaratives/align/incorrectly_spec.rb' => [1].freeze
52
54
  }.freeze
53
55
 
54
56
  private_constant :MAX_RUN_TIME, :EXIT_CODES
55
57
 
58
+ attr_reader :index
59
+
56
60
  # Creates scenario instance and runs in the background process
57
61
  #
58
62
  # @param path [String] path to the scenarios file
59
- def initialize(path)
63
+ def initialize(path, index)
60
64
  @path = path
61
65
  # First 1024 characters from stdout
62
66
  @stdout_head = ''
63
67
  # Last 1024 characters from stdout
64
68
  @stdout_tail = ''
69
+ # Assigns the index for parallel execution in the CI if requested
70
+ @index = index
65
71
  end
66
72
 
67
73
  # Starts running given scenario in a separate process
@@ -252,16 +258,24 @@ specs.delete_if do |spec|
252
258
  false
253
259
  end
254
260
 
255
- raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if specs.empty?
256
-
257
261
  # Randomize order
258
- seed = (ENV['SEED'] || rand(0..10_000)).to_i
262
+ seed = (ENV['SPECS_SEED'] || rand(0..10_000)).to_i
263
+ group = (ENV['SPECS_GROUP'] || -1).to_i
264
+ groups = (ENV['SPECS_GROUPS'] || 2).to_i
259
265
 
260
266
  puts "Random seed: #{seed}"
267
+ puts "Group: #{group}"
268
+ puts "Groups: #{groups}"
261
269
 
262
270
  scenarios = specs
263
271
  .shuffle(random: Random.new(seed))
264
- .map { |integration_test| Scenario.new(integration_test) }
272
+ .map
273
+ .with_index { |integration, index| Scenario.new(integration, index % groups) }
274
+ .delete_if { |scenario| scenario.index != group && group != -1 }
275
+
276
+ raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if scenarios.empty?
277
+
278
+ puts "Running #{scenarios.size} scenarios"
265
279
 
266
280
  regulars = scenarios.reject(&:linear?)
267
281
  linears = scenarios - regulars
data/bin/rspecs CHANGED
@@ -3,8 +3,20 @@
3
3
  set -e
4
4
 
5
5
  # Run only regular non-forking specs first
6
- SPECS_TYPE=regular bundle exec rspec --tag ~type:pro --tag ~mode:fork
6
+ SPECS_TYPE=regular bundle exec rspec \
7
+ --tag ~type:pro \
8
+ --tag ~mode:fork \
9
+ --exclude-pattern "**/pro/**/*_spec.rb" \
10
+ spec/lib/
11
+
7
12
  # Run forking specs, they need to run in isolation not to crash because of librdkafka
8
- SPECS_TYPE=regular bundle exec rspec --tag mode:fork
13
+ SPECS_TYPE=regular bundle exec rspec \
14
+ --tag mode:fork \
15
+ --exclude-pattern "**/pro/**/*_spec.rb" \
16
+ spec/lib/
17
+
9
18
  # Run pro specs at the end
10
- SPECS_TYPE=pro bundle exec rspec --tag type:pro --tag ~mode:fork
19
+ SPECS_TYPE=pro bundle exec rspec \
20
+ --tag type:pro \
21
+ --tag ~mode:fork \
22
+ spec/lib/
@@ -0,0 +1,35 @@
1
+ #!/bin/bash
2
+
3
+ # Checks Kafka logs for unsupported warning patterns
4
+ # Only specified warnings are allowed, all others should trigger failure
5
+
6
+ allowed_patterns=(
7
+ "Performing controller activation"
8
+ "registered with feature metadata.version"
9
+ "Replayed TopicRecord for"
10
+ "Replayed PartitionRecord for"
11
+ "Previous leader None and previous leader epoch"
12
+ "Creating new"
13
+ )
14
+
15
+ # Get all warnings
16
+ warnings=$(docker logs --since=0 kafka | grep WARN)
17
+ exit_code=0
18
+
19
+ while IFS= read -r line; do
20
+ allowed=0
21
+ for pattern in "${allowed_patterns[@]}"; do
22
+ if echo "$line" | grep -q "$pattern"; then
23
+ allowed=1
24
+ break
25
+ fi
26
+ done
27
+
28
+ if [ $allowed -eq 0 ]; then
29
+ echo "Unexpected warning found:"
30
+ echo "$line"
31
+ exit_code=1
32
+ fi
33
+ done <<< "$warnings"
34
+
35
+ exit $exit_code
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This script verifies that we do not create (except few needed exceptions) test topics that do
4
+ # not start with the "it-" prefix which is our standard.
5
+ #
6
+ # This ensures that we can clearly identify all test topics for removal in case of doing dev work
7
+ # on a long-lived Kafka cluster without option to fully reset it.
8
+ #
9
+ # It also ensures we have one convention that we can follow.
10
+
11
+ require_relative '../spec/integrations_helper.rb'
12
+
13
+ setup_karafka
14
+
15
+ # Please note that "__" starting topics are not here by default. It is expected.
16
+ invalid = Karafka::Admin
17
+ .cluster_info
18
+ .topics
19
+ .map { |topic| topic[:topic_name] }
20
+ .select { |topic| !topic.start_with?('it-') }
21
+ .select { |topic| topic.length <= 6 }
22
+
23
+ invalid.each do |invalid_name|
24
+ puts "#{invalid_name} does not start with the \"it-\" prefix"
25
+ end
26
+
27
+ exit invalid.empty? ? 0 : 1
@@ -24,6 +24,7 @@ en:
24
24
  key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
25
25
  max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
26
26
  shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
27
+ worker_thread_priority_format: must be between -3 and 3
27
28
 
28
29
  oauth.token_provider_listener_format: 'must be false or respond to #on_oauthbearer_token_refresh'
29
30
 
@@ -36,6 +37,7 @@ en:
36
37
  internal.processing.expansions_selector_format: cannot be nil
37
38
  internal.processing.executor_class_format: cannot be nil
38
39
  internal.processing.worker_job_call_wrapper_format: 'needs to be false or respond to #wrap'
40
+ internal.processing.errors_tracker_class_format: 'needs to be nil or a class'
39
41
 
40
42
  internal.active_job.dispatcher_format: cannot be nil
41
43
  internal.active_job.job_options_contract_format: cannot be nil
@@ -67,6 +69,7 @@ en:
67
69
  internal.connection.proxy.metadata.timeout_format: needs to be an integer bigger than 0
68
70
  internal.connection.proxy.metadata.max_attempts_format: needs to be an integer bigger than 0
69
71
  internal.connection.proxy.metadata.wait_time_format: needs to be an integer bigger than 0
72
+ internal.connection.listener_thread_priority_format: must be between -3 and 3
70
73
 
71
74
  internal.swarm.manager_format: cannot be nil
72
75
  internal.swarm.orphaned_exit_code_format: needs to be an integer bigger or equal to 0
@@ -81,7 +84,8 @@ en:
81
84
  admin.kafka_format: needs to be a hash
82
85
  admin.group_id_format: 'needs to be a string with a Kafka accepted format'
83
86
  admin.max_wait_time_format: 'needs to be an integer bigger than 0'
84
- admin.max_attempts_format: 'needs to be an integer bigger than 0'
87
+ admin.retry_backoff_format: 'needs to be an integer bigger than 100'
88
+ admin.max_retries_duration_format: 'needs to be an integer bigger than 1000'
85
89
 
86
90
  swarm.nodes_format: 'needs to be an integer bigger than 0'
87
91
  swarm.node_format: needs to be false or node instance
@@ -5,6 +5,7 @@ en:
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
  virtual_partitions.offset_metadata_strategy_format: needs to be either :exact or :current
7
7
  virtual_partitions.reducer_format: "needs to respond to `#call`"
8
+ virtual_partitions.distribution_format: "needs to be either :consistent or :balanced"
8
9
 
9
10
  long_running_job.active_format: needs to be either true or false
10
11
 
@@ -58,6 +59,8 @@ en:
58
59
  subscription_group_details_multiplexing_boot_mismatch: 'boot needs to be between min and max'
59
60
  subscription_group_details.multiplexing_boot_format: 'needs to be an integer equal or more than 1'
60
61
  subscription_group_details.multiplexing_boot_not_dynamic: 'needs to be equal to max when not in dynamic mode'
62
+ subscription_group_details_multiplexing_one_not_enough: 'min and max cannot equal 1'
63
+ subscription_group_details.multiplexing_scale_delay_format: 'needs to be an integer equal or more than 1000'
61
64
 
62
65
  swarm.active_format: needs to be true
63
66
  swarm.nodes_format: needs to be a range, array of nodes ids or a hash with direct assignments
@@ -88,8 +91,16 @@ en:
88
91
  patterns_format: must be an array with hashes
89
92
  patterns_missing: needs to be present
90
93
  patterns_regexps_not_unique: 'must be unique within consumer group'
94
+
91
95
  direct_assignments_homogenous: 'single consumer group cannot mix regular and direct assignments'
92
96
 
97
+ parallel_segments.partitioner_format: needs to be defined and needs to respond to `#call`
98
+ parallel_segments.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
99
+ parallel_segments.count_format: needs to be equal or more than 1
100
+ parallel_segments.active_format: needs to be boolean
101
+ parallel_segments.reducer_format: "needs to respond to `#call`"
102
+ parallel_segments.merge_key_format: "needs to be a non-empty string"
103
+
93
104
  pattern:
94
105
  regexp_format: must be a regular expression
95
106
  name_format: 'needs to be a string with a Kafka accepted format'
@@ -113,8 +124,8 @@ en:
113
124
 
114
125
  recurring_tasks.consumer_class_format: 'needs to inherit from Karafka::BaseConsumer'
115
126
  recurring_tasks.group_id_format: 'needs to be a string with a Kafka accepted format'
116
- recurring_tasks.topics.schedules_format: 'needs to be a string with a Kafka accepted format'
117
- recurring_tasks.topics.logs_format: 'needs to be a string with a Kafka accepted format'
127
+ recurring_tasks.topics.schedules.name_format: 'needs to be a string with a Kafka accepted format'
128
+ recurring_tasks.topics.logs.name_format: 'needs to be a string with a Kafka accepted format'
118
129
  recurring_tasks.interval_format: 'needs to be equal or more than 1000 and an integer'
119
130
  recurring_tasks.deserializer_format: 'needs to be configured'
120
131
  recurring_tasks.logging_format: needs to be a boolean
data/docker-compose.yml CHANGED
@@ -1,7 +1,7 @@
1
1
  services:
2
2
  kafka:
3
3
  container_name: kafka
4
- image: confluentinc/cp-kafka:7.8.0
4
+ image: confluentinc/cp-kafka:8.0.0
5
5
 
6
6
  ports:
7
7
  - 9092:9092
File without changes