minitest-distributed 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +48 -0
  3. data/.gitignore +8 -0
  4. data/.rubocop.yml +63 -0
  5. data/.travis.yml +6 -0
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +12 -0
  8. data/Gemfile.lock +53 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +115 -0
  11. data/Rakefile +12 -0
  12. data/bin/console +15 -0
  13. data/bin/rake +29 -0
  14. data/bin/rubocop +29 -0
  15. data/bin/setup +8 -0
  16. data/bin/srb +29 -0
  17. data/lib/minitest/distributed.rb +36 -0
  18. data/lib/minitest/distributed/configuration.rb +53 -0
  19. data/lib/minitest/distributed/coordinators/coordinator_interface.rb +29 -0
  20. data/lib/minitest/distributed/coordinators/memory_coordinator.rb +67 -0
  21. data/lib/minitest/distributed/coordinators/redis_coordinator.rb +387 -0
  22. data/lib/minitest/distributed/enqueued_runnable.rb +88 -0
  23. data/lib/minitest/distributed/filters/exclude_filter.rb +35 -0
  24. data/lib/minitest/distributed/filters/filter_interface.rb +25 -0
  25. data/lib/minitest/distributed/filters/include_filter.rb +35 -0
  26. data/lib/minitest/distributed/reporters/distributed_progress_reporter.rb +76 -0
  27. data/lib/minitest/distributed/reporters/distributed_summary_reporter.rb +48 -0
  28. data/lib/minitest/distributed/reporters/redis_coordinator_warnings_reporter.rb +61 -0
  29. data/lib/minitest/distributed/result_aggregate.rb +67 -0
  30. data/lib/minitest/distributed/result_type.rb +28 -0
  31. data/lib/minitest/distributed/test_runner.rb +37 -0
  32. data/lib/minitest/distributed/test_selector.rb +54 -0
  33. data/lib/minitest/distributed/version.rb +8 -0
  34. data/lib/minitest/distributed_plugin.rb +51 -0
  35. data/minitest-distributed.gemspec +50 -0
  36. data/sorbet/config +2 -0
  37. data/sorbet/rbi/minitest.rbi +238 -0
  38. data/sorbet/rbi/rbconfig.rbi +6 -0
  39. data/sorbet/rbi/redis.rbi +70 -0
  40. data/sorbet/rbi/winsize.rbi +7 -0
  41. metadata +142 -0
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rake' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("rake", "rake"))
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("rubocop", "rubocop"))
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/bin/srb ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'srb' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("sorbet", "srb"))
@@ -0,0 +1,36 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'minitest'
5
+ require 'sorbet-runtime'
6
+
7
+ require "minitest/distributed/configuration"
8
+ require "minitest/distributed/test_runner"
9
+ require "minitest/distributed/test_selector"
10
+ require "minitest/distributed/enqueued_runnable"
11
+ require "minitest/distributed/result_type"
12
+ require "minitest/distributed/result_aggregate"
13
+ require "minitest/distributed/filters/filter_interface"
14
+ require "minitest/distributed/filters/include_filter"
15
+ require "minitest/distributed/filters/exclude_filter"
16
+ require "minitest/distributed/coordinators/coordinator_interface"
17
+ require "minitest/distributed/coordinators/memory_coordinator"
18
+ require "minitest/distributed/coordinators/redis_coordinator"
19
+ require "minitest/distributed/reporters/redis_coordinator_warnings_reporter"
20
+ require "minitest/distributed/reporters/distributed_progress_reporter"
21
+ require "minitest/distributed/reporters/distributed_summary_reporter"
22
+
23
+ module Minitest
24
+ module Distributed
25
+ class Error < StandardError; end
26
+
27
+ module TestRunnerPatch
28
+ extend T::Sig
29
+
30
+ sig { params(reporter: Minitest::AbstractReporter, options: T::Hash[Symbol, T.untyped]).void }
31
+ def __run(reporter, options)
32
+ TestRunner.new(options).run(reporter)
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,53 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'uri'
5
+ require 'securerandom'
6
+
7
+ module Minitest
8
+ module Distributed
9
+ class Configuration < T::Struct
10
+ DEFAULT_BATCH_SIZE = 10
11
+ DEFAULT_MAX_ATTEMPTS = 3
12
+ DEFAULT_TEST_TIMEOUT = 30_000 # milliseconds
13
+
14
+ class << self
15
+ extend T::Sig
16
+
17
+ sig { params(env: T::Hash[String, T.nilable(String)]).returns(T.attached_class) }
18
+ def from_env(env = ENV.to_h)
19
+ new(
20
+ coordinator_uri: URI(env['MINITEST_COORDINATOR'] || 'memory:'),
21
+ run_id: env['MINITEST_RUN_ID'] || SecureRandom.uuid,
22
+ worker_id: env['MINITEST_WORKER_ID'] || SecureRandom.uuid,
23
+ test_timeout: Integer(env['MINITEST_TEST_TIMEOUT'] || DEFAULT_TEST_TIMEOUT),
24
+ test_batch_size: Integer(env['MINITEST_TEST_BATCH_SIZE'] || DEFAULT_BATCH_SIZE),
25
+ max_attempts: Integer(env['MINITEST_MAX_ATTEMPTS'] || DEFAULT_MAX_ATTEMPTS),
26
+ )
27
+ end
28
+ end
29
+
30
+ extend T::Sig
31
+
32
+ prop :coordinator_uri, URI::Generic, default: URI('memory:')
33
+ prop :run_id, String, factory: -> { SecureRandom.uuid }
34
+ prop :worker_id, String, factory: -> { SecureRandom.uuid }
35
+ prop :test_timeout, Integer, default: DEFAULT_TEST_TIMEOUT
36
+ prop :test_batch_size, Integer, default: DEFAULT_BATCH_SIZE
37
+ prop :max_attempts, Integer, default: DEFAULT_MAX_ATTEMPTS
38
+
39
+ sig { returns(Coordinators::CoordinatorInterface) }
40
+ def coordinator
41
+ @coordinator = T.let(@coordinator, T.nilable(Coordinators::CoordinatorInterface))
42
+ @coordinator ||= case coordinator_uri.scheme
43
+ when 'redis'
44
+ Coordinators::RedisCoordinator.new(configuration: self)
45
+ when 'memory'
46
+ Coordinators::MemoryCoordinator.new(configuration: self)
47
+ else
48
+ raise NotImplementedError, "Unknown coordinator implementation: #{coordinator_uri.scheme}"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module Minitest
5
+ module Distributed
6
+ module Coordinators
7
+ module CoordinatorInterface
8
+ extend T::Sig
9
+ extend T::Helpers
10
+ interface!
11
+
12
+ sig { abstract.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
13
+ def register_reporters(reporter:, options:); end
14
+
15
+ sig { abstract.returns(ResultAggregate) }
16
+ def local_results; end
17
+
18
+ sig { abstract.returns(ResultAggregate) }
19
+ def combined_results; end
20
+
21
+ sig { abstract.params(test_selector: TestSelector).void }
22
+ def produce(test_selector:); end
23
+
24
+ sig { abstract.params(reporter: Minitest::AbstractReporter).void }
25
+ def consume(reporter:); end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,67 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module Minitest
5
+ module Distributed
6
+ module Coordinators
7
+ class MemoryCoordinator
8
+ extend T::Sig
9
+ include CoordinatorInterface
10
+
11
+ sig { returns(Configuration) }
12
+ attr_reader :configuration
13
+
14
+ sig { returns(Queue) }
15
+ attr_reader :queue
16
+
17
+ sig { override.returns(ResultAggregate) }
18
+ attr_reader :local_results
19
+
20
+ alias_method :combined_results, :local_results
21
+
22
+ sig { params(configuration: Configuration).void }
23
+ def initialize(configuration:)
24
+ @configuration = configuration
25
+
26
+ @leader = T.let(Mutex.new, Mutex)
27
+ @queue = T.let(Queue.new, Queue)
28
+ @local_results = T.let(ResultAggregate.new, ResultAggregate)
29
+ end
30
+
31
+ sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
32
+ def register_reporters(reporter:, options:)
33
+ # No need for any additional reporters
34
+ end
35
+
36
+ sig { override.params(test_selector: TestSelector).void }
37
+ def produce(test_selector:)
38
+ if @leader.try_lock
39
+ tests = test_selector.tests
40
+ @local_results.size = tests.size
41
+ if tests.empty?
42
+ queue.close
43
+ else
44
+ tests.each { |test| queue << test }
45
+ end
46
+ end
47
+ end
48
+
49
+ sig { override.params(reporter: AbstractReporter).void }
50
+ def consume(reporter:)
51
+ until queue.empty? && queue.closed?
52
+ enqueued_runnable = queue.pop
53
+ reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
54
+ result = enqueued_runnable.run
55
+
56
+ local_results.update_with_result(result)
57
+ local_results.acks += 1
58
+
59
+ reporter.record(result)
60
+
61
+ queue.close if local_results.completed?
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,387 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'redis'
5
+
6
+ module Minitest
7
+ module Distributed
8
+ module Coordinators
9
+ # The RedisCoordinator is an implementation of the test coordinator interface
10
+ # using a Redis stream + consumergroup for coordination.
11
+ #
12
+ # We assume a bunch of workers will be started at the same time. Every worker
13
+ # will try to become the leader by trying to create the consumergroup. Only one
14
+ # will succeed, which will then continue to populate the list of tests to run
15
+ # to the stream.
16
+ #
17
+ # AFter that, all workers will start consuming from the stream. They will first
18
+ # try to claim stale entries from other workers (determined by the `test_timeout`
19
+ # option), and process them tp to a maxumim of `max_attempts` attempts. Then,
20
+ # they will consume tests from the stream, run them, and ack them. This is done
21
+ # in batches to reduce load on Redis.
22
+ #
23
+ # Finally, when we have acked the same number of tests as we populated into the
24
+ # queue, the run is considered finished. The first worker to detect this will
25
+ # remove the consumergroup and the associated stream from Redis.
26
+ #
27
+ # If a worker starts for the same run_id while it is already considered completed,
28
+ # it will start a "retry run". It will find all the tests that failed/errored on
29
+ # the previous attempt, and schedule only those tests to be run, rather than the
30
+ # full test suite returned by the test selector. This can be useful to retry flaky
31
+ # tests. Subsequent workers coming online will join this worker to form a consumer
32
+ # group exactly as described above.
33
+ class RedisCoordinator
34
+ extend T::Sig
35
+ include CoordinatorInterface
36
+
37
+ sig { returns(Configuration) }
38
+ attr_reader :configuration
39
+
40
+ sig { returns(String) }
41
+ attr_reader :stream_key
42
+
43
+ sig { returns(String) }
44
+ attr_reader :group_name
45
+
46
+ sig { override.returns(ResultAggregate) }
47
+ attr_reader :local_results
48
+
49
+ sig { returns(T::Set[EnqueuedRunnable]) }
50
+ attr_reader :reclaimed_tests
51
+
52
+ sig { params(configuration: Configuration).void }
53
+ def initialize(configuration:)
54
+ @configuration = configuration
55
+
56
+ @redis = T.let(nil, T.nilable(Redis))
57
+ @stream_key = T.let(key('queue'), String)
58
+ @group_name = T.let('minitest-distributed', String)
59
+ @local_results = T.let(ResultAggregate.new, ResultAggregate)
60
+ @combined_results = T.let(nil, T.nilable(ResultAggregate))
61
+ @reclaimed_tests = T.let(Set.new, T::Set[EnqueuedRunnable])
62
+ end
63
+
64
+ sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
65
+ def register_reporters(reporter:, options:)
66
+ reporter << Reporters::RedisCoordinatorWarningsReporter.new(options[:io], options)
67
+ end
68
+
69
+ sig { override.returns(ResultAggregate) }
70
+ def combined_results
71
+ @combined_results ||= begin
72
+ stats_as_string = redis.mget(key('runs'), key('assertions'), key('passes'),
73
+ key('failures'), key('errors'), key('skips'), key('reruns'), key('acks'), key('size'))
74
+
75
+ ResultAggregate.new(
76
+ runs: Integer(stats_as_string.fetch(0) || 0),
77
+ assertions: Integer(stats_as_string.fetch(1) || 0),
78
+ passes: Integer(stats_as_string.fetch(2) || 0),
79
+ failures: Integer(stats_as_string.fetch(3) || 0),
80
+ errors: Integer(stats_as_string.fetch(4) || 0),
81
+ skips: Integer(stats_as_string.fetch(5) || 0),
82
+ reruns: Integer(stats_as_string.fetch(6) || 0),
83
+ acks: Integer(stats_as_string.fetch(7) || 0),
84
+
85
+ # In the case where we have no build szie number published yet, we initialize
86
+ # thesize of the test suite to be arbitrarity large, to make sure it is
87
+ # higher than the number of acks, so the run is not consider completed yet.
88
+ size: Integer(stats_as_string.fetch(8) || 2_147_483_647),
89
+ )
90
+ end
91
+ end
92
+
93
+ sig { override.params(test_selector: TestSelector).void }
94
+ def produce(test_selector:)
95
+ # Whoever ends up creating the consumer group will act as leader,
96
+ # and publish the list of tests to the stream.
97
+
98
+ begin
99
+ # When using `redis.multi`, the second DEL command gets executed even if the initial GROUP
100
+ # fails. This is bad, because only the leader should be issuing the DEL command.
101
+ # When using EVAL and a Lua script, the script aborts after the first XGROUP command
102
+ # fails, and the DEL never gets executed for followers.
103
+ redis.evalsha(
104
+ register_consumergroup_script,
105
+ keys: [stream_key, key('size'), key('acks')],
106
+ argv: [group_name],
107
+ )
108
+
109
+ rescue Redis::CommandError => ce
110
+ if ce.message.include?('BUSYGROUP')
111
+ # If Redis returns a BUSYGROUP error, it means that the consumer group already
112
+ # exists. In our case, it means that another worker managed to successfully
113
+ # run the XGROUP command, and will act as leader and publish the tests.
114
+ # This worker can simply move on the consumer mode.
115
+ return
116
+ else
117
+ raise
118
+ end
119
+ end
120
+
121
+ run_attempt, previous_failures, previous_errors, _deleted = redis.multi do
122
+ redis.incr(key('attempt'))
123
+ redis.lrange(key('failure_list'), 0, -1)
124
+ redis.lrange(key('error_list'), 0, -1)
125
+ redis.del(key('failure_list'), key('error_list'))
126
+ end
127
+
128
+ tests = if run_attempt == 1
129
+ # If this is the first attempt for this run ID, we will schedule the full
130
+ # test suite as returned by the test selector to run.
131
+ test_selector.tests
132
+ else
133
+ # For subsequent attempts, we check the list of previous failures and
134
+ # errors, and only schedule to re-run those tests. This allows for faster
135
+ # retries of potentially flaky tests.
136
+ (previous_failures + previous_errors).map do |test_to_retry|
137
+ EnqueuedRunnable.from_hash!(Marshal.load(test_to_retry))
138
+ end
139
+ end
140
+
141
+ # We set the `size` key to the number of tests we are planning to schedule.
142
+ # This will allow workers to tell when the run is done. We also adjust the
143
+ # number of failures and errors in case of a retry run.
144
+ adjust_combined_results(ResultAggregate.new(
145
+ size: tests.size,
146
+ failures: -previous_failures.length,
147
+ errors: -previous_errors.length,
148
+ reruns: previous_failures.length + previous_errors.length,
149
+ ))
150
+
151
+ # TODO: break this up in batches.
152
+ tests.each { |test| redis.xadd(stream_key, test.serialize) }
153
+ end
154
+
155
+ sig { override.params(reporter: AbstractReporter).void }
156
+ def consume(reporter:)
157
+ exponential_backoff = INITIAL_BACKOFF
158
+ loop do
159
+ # First, see if there are any pending tests from other workers to claim.
160
+ stale_runnables = claim_stale_runnables
161
+ stale_processed = process_batch(stale_runnables, reporter)
162
+
163
+ # Finally, try to process a regular batch of messages
164
+ fresh_runnables = claim_fresh_runnables(block: exponential_backoff)
165
+ fresh_processed = process_batch(fresh_runnables, reporter)
166
+
167
+ # If we have acked the same amount of tests as we were supposed to, the run
168
+ # is complete and we can exit our loop. Generally, only one worker will detect
169
+ # this condition. The pther workers will quit their consumer loop because the
170
+ # consumergroup will be deleted by the first worker, and their Redis commands
171
+ # will start to fail - see the rescue block below.
172
+ break if combined_results.completed?
173
+
174
+ # To make sure we don't end up in a busy loop overwhelming Redis with commands
175
+ # when there is no work to do, we increase the blocking time exponentially,
176
+ # and reset it to the initial value if we processed any messages
177
+ if stale_processed > 0 || fresh_processed > 0
178
+ exponential_backoff = INITIAL_BACKOFF
179
+ else
180
+ exponential_backoff <<= 1
181
+ end
182
+ end
183
+
184
+ cleanup
185
+ rescue Redis::CommandError => ce
186
+ if ce.message.start_with?('NOGROUP')
187
+ # When a redis conumer group commands fails with a NOGROUP error, we assume the
188
+ # consumer group was deleted by the first worker that detected the run is complete.
189
+ # So this worker can exit its loop as well.
190
+
191
+ # We have to invalidate the local combined_results cache so we get fresh
192
+ # final values from Redis when we try to report results in our summarizer.
193
+ @combined_results = nil
194
+ else
195
+ raise
196
+ end
197
+ end
198
+
199
+ private
200
+
201
+ sig { returns(Redis) }
202
+ def redis
203
+ @redis ||= Redis.new(url: configuration.coordinator_uri)
204
+ end
205
+
206
+ sig { returns(String) }
207
+ def ack_batch_script
208
+ @ack_batch_script = T.let(@ack_batch_script, T.nilable(String))
209
+ @ack_batch_script ||= redis.script(:load, <<~LUA)
210
+ local acked_ids, acked, i = {}, 0, 2
211
+ while ARGV[i] do
212
+ if redis.call('XACK', KEYS[1], ARGV[1], ARGV[i]) > 0 then
213
+ acked = acked + 1
214
+ acked_ids[acked] = ARGV[i]
215
+ end
216
+ i = i + 1
217
+ end
218
+ return acked_ids
219
+ LUA
220
+ end
221
+
222
+ sig { returns(String) }
223
+ def register_consumergroup_script
224
+ @register_consumergroup_script = T.let(@register_consumergroup_script, T.nilable(String))
225
+ @register_consumergroup_script ||= redis.script(:load, <<~LUA)
226
+ redis.call('XGROUP', 'CREATE', KEYS[1], ARGV[1], '0', 'MKSTREAM')
227
+ redis.call('DEL', KEYS[2], KEYS[3])
228
+ LUA
229
+ end
230
+
231
+ sig { params(block: Integer).returns(T::Array[EnqueuedRunnable]) }
232
+ def claim_fresh_runnables(block:)
233
+ result = redis.xreadgroup(group_name, configuration.worker_id, stream_key, '>',
234
+ block: block, count: configuration.test_batch_size)
235
+ EnqueuedRunnable.from_redis_stream_claim(result.fetch(stream_key, []))
236
+ end
237
+
238
+ sig { returns(T::Array[EnqueuedRunnable]) }
239
+ def claim_stale_runnables
240
+ # When we have to reclaim stale tests, those test are potentially too slow
241
+ # to run inside the test timeout. We only claim one test at a time in order
242
+ # to prevent the exact same batch from being too slow on repeated attempts,
243
+ # which would cause us to mark all the tests in that batch as failed.
244
+ #
245
+ # This has the side effect that for a retried test, the test timeout
246
+ # will be TEST_TIMEOUT * BATCH_SIZE in practice. This gives us a higher
247
+ # likelihood that the test will pass if the batch size > 1.
248
+ pending = redis.xpending(stream_key, group_name, '-', '+', 1)
249
+
250
+ # Every test is allowed to take test_timeout milliseconds. Because we process tests in
251
+ # batches, they should never be pending for TEST_TIMEOUT * BATCH_SIZE milliseconds.
252
+ # So, only try to claim messages older than that, with a bit of jitter.
253
+ max_idle_time = configuration.test_timeout * configuration.test_batch_size
254
+ max_idle_time_with_jitter = max_idle_time * rand(1.0...1.2)
255
+ to_claim = pending.each_with_object({}) do |message, hash|
256
+ if message['elapsed'] > max_idle_time_with_jitter
257
+ hash[message.fetch('entry_id')] = message
258
+ end
259
+ end
260
+
261
+ if to_claim.empty?
262
+ []
263
+ else
264
+ claimed = redis.xclaim(stream_key, group_name, configuration.worker_id, max_idle_time, to_claim.keys)
265
+ enqueued_runnables = EnqueuedRunnable.from_redis_stream_claim(claimed)
266
+ enqueued_runnables.each do |er|
267
+ # `count` will be set to the current attempt of a different worker that has timed out.
268
+ # The attempt we are going to try will be the next one, so add one.
269
+ attempt = to_claim.fetch(er.execution_id).fetch('count') + 1
270
+ if attempt > configuration.max_attempts
271
+ # If we exhaust our attempts, we will mark the test to immediately fail when it will be run next.
272
+ mark_runnable_to_fail_immediately(er)
273
+ else
274
+ reclaimed_tests << er
275
+ end
276
+ end
277
+
278
+ enqueued_runnables
279
+ end
280
+ end
281
+
282
+ sig { void }
283
+ def cleanup
284
+ redis.xgroup(:destroy, stream_key, group_name)
285
+ redis.del(stream_key)
286
+ rescue Redis::CommandError
287
+ # Apparently another consumer already removed the consumer group,
288
+ # so we can assume that all the Redis cleanup was completed.
289
+ end
290
+
291
+ sig { params(er: EnqueuedRunnable).void }
292
+ def mark_runnable_to_fail_immediately(er)
293
+ assertion = Minitest::Assertion.new(<<~EOM.chomp)
294
+ This test takes too long to run (> #{configuration.test_timeout}ms).
295
+
296
+ We have tried running this test #{configuration.max_attempts} on different workers, but every time the worker has not reported back a result within #{configuration.test_timeout}ms.
297
+ Try to make the test faster, or increase the test timeout.
298
+ EOM
299
+ assertion.set_backtrace(caller)
300
+ er.canned_failure = assertion
301
+ end
302
+
303
+ sig { params(results: ResultAggregate).void }
304
+ def adjust_combined_results(results)
305
+ updated = redis.multi do
306
+ redis.incrby(key('runs'), results.runs)
307
+ redis.incrby(key('assertions'), results.assertions)
308
+ redis.incrby(key('passes'), results.passes)
309
+ redis.incrby(key('failures'), results.failures)
310
+ redis.incrby(key('errors'), results.errors)
311
+ redis.incrby(key('skips'), results.skips)
312
+ redis.incrby(key('reruns'), results.reruns)
313
+ redis.incrby(key('acks'), results.acks)
314
+ redis.incrby(key('size'), results.size)
315
+ end
316
+
317
+ @combined_results = ResultAggregate.new(runs: updated[0], assertions: updated[1], passes: updated[2],
318
+ failures: updated[3], errors: updated[4], skips: updated[5], reruns: updated[6],
319
+ acks: updated[7], size: updated[8])
320
+ end
321
+
322
+ sig { params(name: String).returns(String) }
323
+ def key(name)
324
+ "minitest/#{configuration.run_id}/#{name}"
325
+ end
326
+
327
+ sig { params(batch: T::Array[EnqueuedRunnable], reporter: AbstractReporter).returns(Integer) }
328
+ def process_batch(batch, reporter)
329
+ to_be_acked = {}
330
+
331
+ batch.each do |enqueued_runnable|
332
+ local_results.size += 1
333
+ reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
334
+ result = enqueued_runnable.run
335
+
336
+ case (result_type = ResultType.of(result))
337
+ when ResultType::Passed
338
+ # noop
339
+ when ResultType::Skipped
340
+ redis.lpush(key('skip_list'), Marshal.dump(enqueued_runnable.serialize))
341
+ when ResultType::Failed
342
+ redis.lpush(key('failure_list'), Marshal.dump(enqueued_runnable.serialize))
343
+ when ResultType::Error
344
+ redis.lpush(key('error_list'), Marshal.dump(enqueued_runnable.serialize))
345
+ else
346
+ T.absurd(result_type)
347
+ end
348
+
349
+ local_results.update_with_result(result)
350
+ to_be_acked[enqueued_runnable.execution_id] = result
351
+ end
352
+
353
+ return 0 if to_be_acked.empty?
354
+
355
+ acked = redis.evalsha(
356
+ ack_batch_script,
357
+ keys: [stream_key],
358
+ argv: [group_name] + to_be_acked.keys
359
+ )
360
+
361
+ batch_results = ResultAggregate.new(acks: acked.length)
362
+ acked.each do |execution_id|
363
+ acked_result = to_be_acked.delete(execution_id)
364
+ reporter.record(acked_result)
365
+ batch_results.update_with_result(acked_result)
366
+ end
367
+
368
+ to_be_acked.each do |_execution_id, unacked_result|
369
+ # TODO: use custom assertion class.
370
+ discard_assertion = Minitest::Skip.new("The test result was discarded, " \
371
+ "because the test has been claimed another worker.")
372
+ discard_assertion.set_backtrace(caller)
373
+ unacked_result.failures = [discard_assertion]
374
+ reporter.record(unacked_result)
375
+ end
376
+
377
+ adjust_combined_results(batch_results)
378
+ local_results.acks += acked.length
379
+ acked.length
380
+ end
381
+
382
+ INITIAL_BACKOFF = 10 # milliseconds
383
+ private_constant :INITIAL_BACKOFF
384
+ end
385
+ end
386
+ end
387
+ end