karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
data/bin/benchmarks ADDED
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Runner for running given benchmark cases
4
+ # Some of the cases require pre-populated data and we populate this in places that need it
5
+ # In other cases we generate this data in a background process, so the partitions data stream
6
+ # is consistent and we don't end up consuming huge batches of a single partition.
7
+
8
+ require 'open3'
9
+ require 'pathname'
10
+
11
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..'))
13
+
14
+ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
15
+
16
+ # Load all the benchmarks
17
+ benchmarks = Dir[ROOT_PATH.join('spec/benchmarks/**/*.rb')]
18
+
19
+ # If filter is provided, apply
20
+ benchmarks.delete_if { |name| !name.include?(ARGV[0]) } if ARGV[0]
21
+
22
+ raise ArgumentError, "No benchmarks with filter: #{ARGV[0]}" if benchmarks.empty?
23
+
24
+ # We may skip seeding if we are running the benchmarks multiple times, then since we do not
25
+ # commit offsets we can skip generating more data
26
+ if ENV['SEED']
27
+ require 'spec/benchmarks_helper'
28
+
29
+ # We need to setup karafka here to have producer for data seeding
30
+ setup_karafka
31
+
32
+ # This takes some time but needs to run only once per benchmark session
33
+ puts 'Seeding benchmarks data...'
34
+
35
+ producer = Karafka::App.producer
36
+
37
+ # We make our data json compatible so we can also benchmark serialization
38
+ elements = Array.new(100_000) { { a: :b }.to_json }
39
+
40
+ # We do not populate data of benchmarks_0_10 as we use it with life-stream data only
41
+ %w[
42
+ benchmarks_00_01
43
+ benchmarks_00_05
44
+ ].each do |topic_name|
45
+ partitions_count = topic_name.split('_').last.to_i
46
+
47
+ partitions_count.times do |partition|
48
+ puts "Seeding #{topic_name}:#{partition}"
49
+
50
+ elements.each_slice(10_000) do |data_slice|
51
+ data = data_slice.map do |data|
52
+ { topic: topic_name, payload: data, partition: partition }
53
+ end
54
+
55
+ producer.buffer_many(data)
56
+ producer.flush_sync
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ # Selects requested benchmarks and runs them one after another
63
+ benchmarks.each do |benchmark_path|
64
+ puts "Running #{benchmark_path.gsub("#{ROOT_PATH}/spec/benchmarks/", '')}"
65
+
66
+ benchmark = "bundle exec ruby -r ./spec/benchmarks_helper.rb #{benchmark_path}"
67
+
68
+ Open3.popen3(benchmark) do |stdin, stdout, stderr, thread|
69
+ t1 = Thread.new do
70
+ while line = stdout.gets
71
+ puts(line)
72
+ end
73
+ rescue IOError
74
+ end
75
+
76
+ t2 = Thread.new do
77
+ while line = stderr.gets
78
+ puts(line)
79
+ end
80
+ rescue IOError
81
+ end
82
+
83
+ thread.join
84
+ end
85
+ end
data/bin/create_token ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'openssl'
4
+ require 'base64'
5
+ require 'json'
6
+ require 'date'
7
+
8
+ PRIVATE_KEY_LOCATION = File.join(Dir.home, '.ssh', 'karafka-pro', 'id_rsa')
9
+
10
+ # Name of the entity that acquires the license
11
+ ENTITY = ARGV[0]
12
+
13
+ raise ArgumentError, 'Entity missing' if ENTITY.nil? || ENTITY.empty?
14
+
15
+ pro_token_data = { entity: ENTITY }
16
+
17
+ # This code uses my private key to generate a new token for Karafka Pro capabilities
18
+ private_key = OpenSSL::PKey::RSA.new(File.read(PRIVATE_KEY_LOCATION))
19
+
20
+ bin_key = private_key.private_encrypt(pro_token_data.to_json)
21
+
22
+ puts Base64.encode64(bin_key)
data/bin/integrations ADDED
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Runner to run integration specs in parallel
4
+
5
+ # Part of integration specs run pristine without bundler.
6
+ # If we would run bundle exec when running this code, bundler would inject its own context
7
+ # into them, messing things up heavily
8
+ raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
9
+
10
+ require 'open3'
11
+ require 'fileutils'
12
+ require 'pathname'
13
+ require 'tmpdir'
14
+ require 'etc'
15
+
16
+ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
17
+
18
+ # How many child processes with integration specs do we want to run in parallel
19
+ # When the value is high, there's a problem with thread allocation on Github CI, tht is why
20
+ # we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
21
+ # of CPU
22
+ CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 2
23
+
24
+ # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
25
+ MAX_BUFFER_OUTPUT = 51_200
26
+
27
+ # Abstraction around a single test scenario execution process
28
+ class Scenario
29
+ # How long a scenario can run before we kill it
30
+ # This is a fail-safe just in case something would hang
31
+ MAX_RUN_TIME = 3 * 60 # 3 minutes tops
32
+
33
+ # There are rare cases where Karafka may force shutdown for some of the integration cases
34
+ # This includes exactly those
35
+ EXIT_CODES = {
36
+ default: [0],
37
+ 'consumption/worker_critical_error_behaviour.rb' => [0, 2].freeze,
38
+ 'shutdown/on_hanging_jobs_and_a_shutdown.rb' => [2].freeze,
39
+ 'shutdown/on_hanging_on_shutdown_job_and_a_shutdown.rb' => [2].freeze,
40
+ 'shutdown/on_hanging_listener_and_shutdown.rb' => [2].freeze
41
+ }.freeze
42
+
43
+ private_constant :MAX_RUN_TIME, :EXIT_CODES
44
+
45
+ # Creates scenario instance and runs in the background process
46
+ #
47
+ # @param path [String] path to the scenarios file
48
+ def initialize(path)
49
+ @path = path
50
+ # Last 1024 characters from stdout
51
+ @stdout_tail = ''
52
+ end
53
+
54
+ # Starts running given scenario in a separate process
55
+ def start
56
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
57
+ @started_at = current_time
58
+ end
59
+
60
+ # @return [String] integration spec name
61
+ def name
62
+ @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
63
+ end
64
+
65
+ # @return [Boolean] true if spec is pristine
66
+ def pristine?
67
+ scenario_dir = File.dirname(@path)
68
+
69
+ # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
70
+ # to run bundle install, etc in order to run it
71
+ File.exist?(File.join(scenario_dir, 'Gemfile'))
72
+ end
73
+
74
+ # @return [Boolean] did this scenario finished or is it still running
75
+ def finished?
76
+ # If the thread is running too long, kill it
77
+ if current_time - @started_at > MAX_RUN_TIME
78
+ @wait_thr.kill
79
+
80
+ begin
81
+ Process.kill('TERM', pid)
82
+ # It may finish right after we want to kill it, that's why we ignore this
83
+ rescue Errno::ESRCH
84
+ end
85
+ end
86
+
87
+ # We read it so it won't grow as we use our default logger that prints to both test.log and
88
+ # to stdout. Otherwise after reaching the buffer size, it would hang
89
+ buffer = ''
90
+ @stdout.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
91
+ @stdout_tail << buffer
92
+ @stdout_tail = @stdout_tail[-MAX_BUFFER_OUTPUT..-1] || @stdout_tail
93
+
94
+ !@wait_thr.alive?
95
+ end
96
+
97
+ # @return [Boolean] did this scenario finish successfully or not
98
+ def success?
99
+ expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
100
+
101
+ expected_exit_codes.include?(exit_code)
102
+ end
103
+
104
+ # @return [Integer] pid of the process of this scenario
105
+ def pid
106
+ @wait_thr.pid
107
+ end
108
+
109
+ # @return [Integer] exit code of the process running given scenario
110
+ def exit_code
111
+ # There may be no exit status if we killed the thread
112
+ @wait_thr.value&.exitstatus || 123
113
+ end
114
+
115
+ # Prints a status report when scenario is finished and stdout if it failed
116
+ def report
117
+ if success?
118
+ print "\e[#{32}m#{'.'}\e[0m"
119
+ else
120
+ buffer = ''
121
+
122
+ @stderr.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
123
+
124
+ puts
125
+ puts "\e[#{31}m#{'[FAILED]'}\e[0m #{name}"
126
+ puts "Exit code: #{exit_code}"
127
+ puts @stdout_tail
128
+ puts buffer
129
+ puts
130
+ end
131
+ end
132
+
133
+ private
134
+
135
+ # Sets up a proper environment for a given spec to run and returns the run command
136
+ # @return [String] run command
137
+ def init_and_build_cmd
138
+ # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
139
+ # to run bundle install, etc in order to run it
140
+ if pristine?
141
+ scenario_dir = File.dirname(@path)
142
+ # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
143
+ temp_dir = Dir.mktmpdir
144
+ file_name = File.basename(@path)
145
+
146
+ FileUtils.cp_r("#{scenario_dir}/.", temp_dir)
147
+
148
+ <<~CMD
149
+ cd #{temp_dir} &&
150
+ KARAFKA_GEM_DIR=#{ROOT_PATH} \
151
+ BUNDLE_AUTO_INSTALL=true \
152
+ PRISTINE_MODE=true \
153
+ bundle exec ruby -r #{ROOT_PATH}/spec/integrations_helper.rb #{file_name}
154
+ CMD
155
+ else
156
+ <<~CMD
157
+ KARAFKA_GEM_DIR=#{ROOT_PATH} \
158
+ bundle exec ruby -r ./spec/integrations_helper.rb #{@path}
159
+ CMD
160
+ end
161
+ end
162
+
163
+ # @return [Float] current machine time
164
+ def current_time
165
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
166
+ end
167
+ end
168
+
169
+ # Load all the specs
170
+ specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
171
+
172
+ # If filters is provided, apply
173
+ # Allows to provide several filters one after another and applies all of them
174
+ ARGV.each do |filter|
175
+ specs.delete_if { |name| !name.include?(filter) }
176
+ end
177
+
178
+ raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if specs.empty?
179
+
180
+ # Randomize order
181
+ seed = (ENV['SEED'] || rand(0..10_000)).to_i
182
+
183
+ puts "Random seed: #{seed}"
184
+
185
+ scenarios = specs
186
+ .shuffle(random: Random.new(seed))
187
+ .map { |integration_test| Scenario.new(integration_test) }
188
+
189
+ regulars = scenarios.reject(&:pristine?)
190
+ pristine = scenarios.select(&:pristine?)
191
+
192
+ active_scenarios = []
193
+ finished_scenarios = []
194
+
195
+ while finished_scenarios.size < scenarios.size
196
+ # If we have space to run another scenario, we add it
197
+ if active_scenarios.size < CONCURRENCY
198
+ scenario = nil
199
+ # We can run only one pristine at the same time due to concurrency issues within bundler
200
+ # Since they usually take longer than others, we try to run them as fast as possible when there
201
+ # is a slot
202
+ scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
203
+ scenario ||= regulars.pop
204
+
205
+ if scenario
206
+ scenario.start
207
+ active_scenarios << scenario
208
+ end
209
+ end
210
+
211
+ active_scenarios.select(&:finished?).each do |exited|
212
+ scenario = active_scenarios.delete(exited)
213
+ scenario.report
214
+ finished_scenarios << scenario
215
+ end
216
+
217
+ sleep(0.1)
218
+ end
219
+
220
+ failed_scenarios = finished_scenarios.reject(&:success?)
221
+
222
+ # Report once more on the failed jobs
223
+ # This will only list scenarios that failed without printing their stdout here.
224
+ if failed_scenarios.empty?
225
+ puts
226
+ else
227
+ puts "\nFailed scenarios:\n\n"
228
+
229
+ failed_scenarios.each do |scenario|
230
+ puts "\e[#{31}m#{'[FAILED]'}\e[0m #{scenario.name}"
231
+ end
232
+
233
+ puts
234
+
235
+ # Exit with 1 if not all scenarios were successful
236
+ exit 1
237
+ end
data/bin/karafka CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  require 'karafka'
4
4
 
5
+ # We set this to indicate, that the process in which we are (whatever it does) was started using
6
+ # our bin/karafka cli
7
+ ENV['KARAFKA_CLI'] = 'true'
8
+
5
9
  # If there is a boot file, we need to require it as we expect it to contain
6
10
  # Karafka app setup, routes, etc
7
11
  if File.exist?(Karafka.boot_file)
data/bin/scenario ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Runner for non-parallel execution of a single scenario.
4
+ # It prints all the info stdout, etc and basically replaces itself with the scenario execution.
5
+ # It is useful when we work with a single spec and we need all the debug info
6
+
7
+ raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
8
+
9
+ require 'open3'
10
+ require 'fileutils'
11
+ require 'pathname'
12
+ require 'tmpdir'
13
+ require 'etc'
14
+
15
+ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
16
+
17
+ # Load all the specs
18
+ specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
19
+
20
+ # If filters is provided, apply
21
+ # Allows to provide several filters one after another and applies all of them
22
+ ARGV.each do |filter|
23
+ specs.delete_if { |name| !name.include?(filter) }
24
+ end
25
+
26
+ raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if specs.empty?
27
+ raise ArgumentError, "Many specs found with filters: #{ARGV.join(', ')}" if specs.size != 1
28
+
29
+ exec("bundle exec ruby -r #{ROOT_PATH}/spec/integrations_helper.rb #{specs[0]}")
data/bin/stress_many ADDED
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ # Runs integration specs in an endless loop
4
+ # This allows us to ensure (after long enough time) that the integrations test suit is stable and
5
+ # that there are no anomalies when running it for a long period of time
6
+
7
+ set -e
8
+
9
+ while :
10
+ do
11
+ clear
12
+ bin/integrations $1
13
+ done
data/bin/stress_one ADDED
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ # Runs a single integration spec in an endless loop
4
+ # This allows us to ensure (after long enough time) that the integration spec is stable and
5
+ # that there are no anomalies when running it for a long period of time
6
+
7
+ set -e
8
+
9
+ while :
10
+ do
11
+ clear
12
+ bin/scenario $1
13
+ done
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+
3
+ # This script allows us to wait for Kafka docker to fully be ready
4
+ # We consider it fully ready when all our topics that need to be created are created as expected
5
+
6
+ KAFKA_NAME='karafka_20_kafka'
7
+ ZOOKEEPER='zookeeper:2181'
8
+ LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
9
+
10
+ # Take the number of topics that we need to create prior to running anything
11
+ TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
12
+
13
+ # And wait until all of them are created
14
+ until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
15
+ do
16
+ echo "Waiting for Kafka to create all the needed topics..."
17
+ sleep 1
18
+ done
19
+
20
+ echo "All the needed topics created."
@@ -0,0 +1,11 @@
1
+ -----BEGIN RSA PUBLIC KEY-----
2
+ MIIBigKCAYEApcd6ybskiNs9WUvBGVUE8GdWDehjZ9TyjSj/fDl/UcMYqY0R5YX9
3
+ tnYxEwZZRMdVltKWxr88Qmshh1IQz6CpJVbcfYjt/158pSGPm+AUua6tkLqIvZDM
4
+ ocFOMafmroI+BMuL+Zu5QH7HC2tkT16jclGYfMQkJjXVUQTk2UZr+94+8RlUz/CH
5
+ Y6hPA7xPgIyPfyPCxz1VWzAwXwT++NCJQPBr5MqT84LNSEzUSlR9pFNShf3UCUT+
6
+ 8LWOvjFSNGmMMSsbo2T7/+dz9/FM02YG00EO0x04qteggwcaEYLFrigDN6/fM0ih
7
+ BXZILnMUqC/qrfW2YFg4ZqKZJuxaALqqkPxrkBDYqoqcAloqn36jBSke6tc/2I/J
8
+ 2Afq3r53UoAbUH7h5I/L8YeaiA4MYjAuq724lHlrOmIr4D6yjYC0a1LGlPjLk869
9
+ 2nsVXNgomhVb071E6amR+rJJnfvkdZgCmEBFnqnBV5A1u4qgNsa2rVcD+gJRvb2T
10
+ aQtjlQWKPx5xAgMBAAE=
11
+ -----END RSA PUBLIC KEY-----
data/config/errors.yml CHANGED
@@ -1,41 +1,56 @@
1
1
  en:
2
- dry_validation:
3
- errors:
4
- invalid_broker_schema: >
5
- has an invalid format
6
- Expected schema, host and port number
7
- Example: kafka://127.0.0.1:9092 or kafka+ssl://127.0.0.1:9092
8
- invalid_certificate: >
9
- is not a valid certificate
10
- invalid_certificate_from_path: >
11
- is not a valid certificate
12
- invalid_private_key: >
13
- is not a valid private key
14
- max_timeout_size_for_exponential: >
15
- pause_timeout cannot be more than pause_max_timeout
16
- max_wait_time_limit:
17
- max_wait_time cannot be more than socket_timeout
18
- topics_names_not_unique: >
19
- all topic names within a single consumer group must be unique
20
- ssl_client_cert_with_ssl_client_cert_key: >
21
- Both ssl_client_cert and ssl_client_cert_key need to be provided
22
- ssl_client_cert_key_with_ssl_client_cert: >
23
- Both ssl_client_cert_key and ssl_client_cert need to be provided
24
- ssl_client_cert_chain_with_ssl_client_cert: >
25
- Both ssl_client_cert_chain and ssl_client_cert need to be provided
26
- ssl_client_cert_chain_with_ssl_client_cert_key: >
27
- Both ssl_client_cert_chain and ssl_client_cert_key need to be provided
28
- ssl_client_cert_key_password_with_ssl_client_cert_key: >
29
- Both ssl_client_cert_key_password and ssl_client_cert_key need to be provided
30
- does_not_respond_to_token: >
31
- needs to respond to a #token method
32
- required_usage_count: >
33
- Given topic must be used at least once
34
- pid_already_exists: >
35
- Pidfile already exists
36
- consumer_groups_inclusion: >
37
- Unknown consumer group
38
- does_not_exist:
39
- Given file does not exist or cannot be read
40
- does_not_respond_to_call: >
41
- needs to respond to a #call method
2
+ validations:
3
+ config:
4
+ missing: needs to be present
5
+ client_id_format: 'needs to be a string with a Kafka accepted format'
6
+ license.entity_format: needs to be a string
7
+ license.token_format: needs to be either false or a string
8
+ license.expires_on_format: needs to be a valid date
9
+ concurrency_format: needs to be an integer bigger than 0
10
+ consumer_mapper_format: needs to be present
11
+ consumer_persistence_format: needs to be either true or false
12
+ pause_timeout_format: needs to be an integer bigger than 0
13
+ pause_max_timeout_format: needs to be an integer bigger than 0
14
+ pause_with_exponential_backoff_format: needs to be either true or false
15
+ shutdown_timeout_format: needs to be an integer bigger than 0
16
+ max_wait_time_format: needs to be an integer bigger than 0
17
+ kafka_format: needs to be a filled hash
18
+ internal.status_format: needs to be present
19
+ internal.process_format: needs to be present
20
+ internal.routing.builder_format: needs to be present
21
+ internal.routing.subscription_groups_builder_format: needs to be present
22
+ key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
23
+ max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
24
+ shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
25
+
26
+ server_cli_options:
27
+ missing: needs to be present
28
+ consumer_groups_inclusion: Unknown consumer group
29
+
30
+ consumer_group_topic:
31
+ missing: needs to be present
32
+ name_format: 'needs to be a string with a Kafka accepted format'
33
+ deserializer_format: needs to be present
34
+ manual_offset_management_format: needs to be either true or false
35
+ consumer_format: needs to be present
36
+ id_format: 'needs to be a string with a Kafka accepted format'
37
+ initial_offset_format: needs to be either earliest or latest
38
+
39
+ consumer_group:
40
+ missing: needs to be present
41
+ topics_names_not_unique: all topic names within a single consumer group must be unique
42
+ id_format: 'needs to be a string with a Kafka accepted format'
43
+ topics_format: needs to be a non-empty array
44
+
45
+ job_options:
46
+ missing: needs to be present
47
+ dispatch_method_format: needs to be either :produce_async or :produce_sync
48
+ partitioner_format: 'needs to respond to #call'
49
+ partition_key_type_format: 'needs to be either :key or :partition_key'
50
+
51
+ test:
52
+ missing: needs to be present
53
+ id_format: needs to be a String
54
+
55
+ pro_consumer_group_topic:
56
+ consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
data/docker-compose.yml CHANGED
@@ -1,17 +1,53 @@
1
1
  version: '2'
2
2
  services:
3
3
  zookeeper:
4
+ container_name: karafka_20_zookeeper
4
5
  image: wurstmeister/zookeeper
5
6
  ports:
6
- - "2181:2181"
7
+ - '2181:2181'
7
8
  kafka:
8
- image: wurstmeister/kafka:1.0.1
9
+ container_name: karafka_20_kafka
10
+ image: wurstmeister/kafka
9
11
  ports:
10
- - "9092:9092"
12
+ - '9092:9092'
11
13
  environment:
12
14
  KAFKA_ADVERTISED_HOST_NAME: localhost
13
15
  KAFKA_ADVERTISED_PORT: 9092
14
16
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
17
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
18
+ KAFKA_CREATE_TOPICS:
19
+ "integrations_00_02:2:1,\
20
+ integrations_01_02:2:1,\
21
+ integrations_02_02:2:1,\
22
+ integrations_03_02:2:1,\
23
+ integrations_04_02:2:1,\
24
+ integrations_05_02:2:1,\
25
+ integrations_06_02:2:1,\
26
+ integrations_07_02:2:1,\
27
+ integrations_08_02:2:1,\
28
+ integrations_09_02:2:1,\
29
+ integrations_10_02:2:1,\
30
+ integrations_11_02:2:1,\
31
+ integrations_12_02:2:1,\
32
+ integrations_13_02:2:1,\
33
+ integrations_14_02:2:1,\
34
+ integrations_15_02:2:1,\
35
+ integrations_16_02:2:1,\
36
+ integrations_17_02:2:1,\
37
+ integrations_18_02:2:1,\
38
+ integrations_19_02:2:1,\
39
+ integrations_20_02:2:1,\
40
+ integrations_21_02:2:1,\
41
+ integrations_00_03:3:1,\
42
+ integrations_01_03:3:1,\
43
+ integrations_02_03:3:1,\
44
+ integrations_03_03:3:1,\
45
+ integrations_04_03:3:1,\
46
+ integrations_00_10:10:1,\
47
+ integrations_01_10:10:1,\
48
+ benchmarks_00_01:1:1,\
49
+ benchmarks_00_05:5:1,\
50
+ benchmarks_01_05:5:1,\
51
+ benchmarks_00_10:10:1"
16
52
  volumes:
17
53
  - /var/run/docker.sock:/var/run/docker.sock
data/karafka.gemspec CHANGED
@@ -5,29 +5,24 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'karafka/version'
7
7
 
8
- # rubocop:disable Metrics/BlockLength
9
8
  Gem::Specification.new do |spec|
10
9
  spec.name = 'karafka'
11
10
  spec.version = ::Karafka::VERSION
12
11
  spec.platform = Gem::Platform::RUBY
13
- spec.authors = ['Maciej Mensfeld', 'Pavlo Vavruk', 'Adam Gwozdowski']
14
- spec.email = %w[maciej@mensfeld.pl pavlo.vavruk@gmail.com adam99g@gmail.com]
12
+ spec.authors = ['Maciej Mensfeld']
13
+ spec.email = %w[maciej@mensfeld.pl]
15
14
  spec.homepage = 'https://karafka.io'
16
- spec.summary = 'Ruby based framework for working with Apache Kafka'
15
+ spec.summary = 'Efficient Kafka processing framework for Ruby and Rails'
17
16
  spec.description = 'Framework used to simplify Apache Kafka based Ruby applications development'
18
- spec.license = 'MIT'
17
+ spec.licenses = ['LGPL-3.0', 'Commercial']
19
18
 
20
- spec.add_dependency 'dry-configurable', '~> 0.13'
21
- spec.add_dependency 'dry-inflector', '~> 0.2'
22
- spec.add_dependency 'dry-monitor', '~> 0.5'
23
- spec.add_dependency 'dry-validation', '~> 1.7'
24
- spec.add_dependency 'envlogic', '~> 1.1'
25
- spec.add_dependency 'ruby-kafka', '>= 1.3.0'
26
- spec.add_dependency 'thor', '>= 1.1'
27
- spec.add_dependency 'waterdrop', '~> 1.4'
28
- spec.add_dependency 'zeitwerk', '~> 2.4'
19
+ spec.add_dependency 'karafka-core', '>= 2.0.2', '< 3.0.0'
20
+ spec.add_dependency 'rdkafka', '>= 0.12'
21
+ spec.add_dependency 'thor', '>= 0.20'
22
+ spec.add_dependency 'waterdrop', '>= 2.4.1', '< 3.0.0'
23
+ spec.add_dependency 'zeitwerk', '~> 2.3'
29
24
 
30
- spec.required_ruby_version = '>= 2.7'
25
+ spec.required_ruby_version = '>= 2.7.0'
31
26
 
32
27
  if $PROGRAM_NAME.end_with?('gem')
33
28
  spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem')
@@ -35,7 +30,7 @@ Gem::Specification.new do |spec|
35
30
 
36
31
  spec.cert_chain = %w[certs/mensfeld.pem]
37
32
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
38
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
33
+ spec.executables = %w[karafka]
39
34
  spec.require_paths = %w[lib]
40
35
 
41
36
  spec.metadata = {
@@ -43,4 +38,3 @@ Gem::Specification.new do |spec|
43
38
  'rubygems_mfa_required' => 'true'
44
39
  }
45
40
  end
46
- # rubocop:enable Metrics/BlockLength