minitest-distributed 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +48 -0
  3. data/.gitignore +8 -0
  4. data/.rubocop.yml +63 -0
  5. data/.travis.yml +6 -0
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +12 -0
  8. data/Gemfile.lock +53 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +115 -0
  11. data/Rakefile +12 -0
  12. data/bin/console +15 -0
  13. data/bin/rake +29 -0
  14. data/bin/rubocop +29 -0
  15. data/bin/setup +8 -0
  16. data/bin/srb +29 -0
  17. data/lib/minitest/distributed.rb +36 -0
  18. data/lib/minitest/distributed/configuration.rb +53 -0
  19. data/lib/minitest/distributed/coordinators/coordinator_interface.rb +29 -0
  20. data/lib/minitest/distributed/coordinators/memory_coordinator.rb +67 -0
  21. data/lib/minitest/distributed/coordinators/redis_coordinator.rb +387 -0
  22. data/lib/minitest/distributed/enqueued_runnable.rb +88 -0
  23. data/lib/minitest/distributed/filters/exclude_filter.rb +35 -0
  24. data/lib/minitest/distributed/filters/filter_interface.rb +25 -0
  25. data/lib/minitest/distributed/filters/include_filter.rb +35 -0
  26. data/lib/minitest/distributed/reporters/distributed_progress_reporter.rb +76 -0
  27. data/lib/minitest/distributed/reporters/distributed_summary_reporter.rb +48 -0
  28. data/lib/minitest/distributed/reporters/redis_coordinator_warnings_reporter.rb +61 -0
  29. data/lib/minitest/distributed/result_aggregate.rb +67 -0
  30. data/lib/minitest/distributed/result_type.rb +28 -0
  31. data/lib/minitest/distributed/test_runner.rb +37 -0
  32. data/lib/minitest/distributed/test_selector.rb +54 -0
  33. data/lib/minitest/distributed/version.rb +8 -0
  34. data/lib/minitest/distributed_plugin.rb +51 -0
  35. data/minitest-distributed.gemspec +50 -0
  36. data/sorbet/config +2 -0
  37. data/sorbet/rbi/minitest.rbi +238 -0
  38. data/sorbet/rbi/rbconfig.rbi +6 -0
  39. data/sorbet/rbi/redis.rbi +70 -0
  40. data/sorbet/rbi/winsize.rbi +7 -0
  41. metadata +142 -0
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rake' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("rake", "rake"))
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("rubocop", "rubocop"))
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/bin/srb ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'srb' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load(Gem.bin_path("sorbet", "srb"))
@@ -0,0 +1,36 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'minitest'
5
+ require 'sorbet-runtime'
6
+
7
+ require "minitest/distributed/configuration"
8
+ require "minitest/distributed/test_runner"
9
+ require "minitest/distributed/test_selector"
10
+ require "minitest/distributed/enqueued_runnable"
11
+ require "minitest/distributed/result_type"
12
+ require "minitest/distributed/result_aggregate"
13
+ require "minitest/distributed/filters/filter_interface"
14
+ require "minitest/distributed/filters/include_filter"
15
+ require "minitest/distributed/filters/exclude_filter"
16
+ require "minitest/distributed/coordinators/coordinator_interface"
17
+ require "minitest/distributed/coordinators/memory_coordinator"
18
+ require "minitest/distributed/coordinators/redis_coordinator"
19
+ require "minitest/distributed/reporters/redis_coordinator_warnings_reporter"
20
+ require "minitest/distributed/reporters/distributed_progress_reporter"
21
+ require "minitest/distributed/reporters/distributed_summary_reporter"
22
+
23
+ module Minitest
24
+ module Distributed
25
+ class Error < StandardError; end
26
+
27
+ module TestRunnerPatch
28
+ extend T::Sig
29
+
30
+ sig { params(reporter: Minitest::AbstractReporter, options: T::Hash[Symbol, T.untyped]).void }
31
+ def __run(reporter, options)
32
+ TestRunner.new(options).run(reporter)
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,53 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'uri'
5
+ require 'securerandom'
6
+
7
+ module Minitest
8
+ module Distributed
9
+ class Configuration < T::Struct
10
+ DEFAULT_BATCH_SIZE = 10
11
+ DEFAULT_MAX_ATTEMPTS = 3
12
+ DEFAULT_TEST_TIMEOUT = 30_000 # milliseconds
13
+
14
+ class << self
15
+ extend T::Sig
16
+
17
+ sig { params(env: T::Hash[String, T.nilable(String)]).returns(T.attached_class) }
18
+ def from_env(env = ENV.to_h)
19
+ new(
20
+ coordinator_uri: URI(env['MINITEST_COORDINATOR'] || 'memory:'),
21
+ run_id: env['MINITEST_RUN_ID'] || SecureRandom.uuid,
22
+ worker_id: env['MINITEST_WORKER_ID'] || SecureRandom.uuid,
23
+ test_timeout: Integer(env['MINITEST_TEST_TIMEOUT'] || DEFAULT_TEST_TIMEOUT),
24
+ test_batch_size: Integer(env['MINITEST_TEST_BATCH_SIZE'] || DEFAULT_BATCH_SIZE),
25
+ max_attempts: Integer(env['MINITEST_MAX_ATTEMPTS'] || DEFAULT_MAX_ATTEMPTS),
26
+ )
27
+ end
28
+ end
29
+
30
+ extend T::Sig
31
+
32
+ prop :coordinator_uri, URI::Generic, default: URI('memory:')
33
+ prop :run_id, String, factory: -> { SecureRandom.uuid }
34
+ prop :worker_id, String, factory: -> { SecureRandom.uuid }
35
+ prop :test_timeout, Integer, default: DEFAULT_TEST_TIMEOUT
36
+ prop :test_batch_size, Integer, default: DEFAULT_BATCH_SIZE
37
+ prop :max_attempts, Integer, default: DEFAULT_MAX_ATTEMPTS
38
+
39
+ sig { returns(Coordinators::CoordinatorInterface) }
40
+ def coordinator
41
+ @coordinator = T.let(@coordinator, T.nilable(Coordinators::CoordinatorInterface))
42
+ @coordinator ||= case coordinator_uri.scheme
43
+ when 'redis'
44
+ Coordinators::RedisCoordinator.new(configuration: self)
45
+ when 'memory'
46
+ Coordinators::MemoryCoordinator.new(configuration: self)
47
+ else
48
+ raise NotImplementedError, "Unknown coordinator implementation: #{coordinator_uri.scheme}"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module Minitest
5
+ module Distributed
6
+ module Coordinators
7
+ module CoordinatorInterface
8
+ extend T::Sig
9
+ extend T::Helpers
10
+ interface!
11
+
12
+ sig { abstract.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
13
+ def register_reporters(reporter:, options:); end
14
+
15
+ sig { abstract.returns(ResultAggregate) }
16
+ def local_results; end
17
+
18
+ sig { abstract.returns(ResultAggregate) }
19
+ def combined_results; end
20
+
21
+ sig { abstract.params(test_selector: TestSelector).void }
22
+ def produce(test_selector:); end
23
+
24
+ sig { abstract.params(reporter: Minitest::AbstractReporter).void }
25
+ def consume(reporter:); end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,67 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module Minitest
5
+ module Distributed
6
+ module Coordinators
7
+ class MemoryCoordinator
8
+ extend T::Sig
9
+ include CoordinatorInterface
10
+
11
+ sig { returns(Configuration) }
12
+ attr_reader :configuration
13
+
14
+ sig { returns(Queue) }
15
+ attr_reader :queue
16
+
17
+ sig { override.returns(ResultAggregate) }
18
+ attr_reader :local_results
19
+
20
+ alias_method :combined_results, :local_results
21
+
22
+ sig { params(configuration: Configuration).void }
23
+ def initialize(configuration:)
24
+ @configuration = configuration
25
+
26
+ @leader = T.let(Mutex.new, Mutex)
27
+ @queue = T.let(Queue.new, Queue)
28
+ @local_results = T.let(ResultAggregate.new, ResultAggregate)
29
+ end
30
+
31
+ sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
32
+ def register_reporters(reporter:, options:)
33
+ # No need for any additional reporters
34
+ end
35
+
36
+ sig { override.params(test_selector: TestSelector).void }
37
+ def produce(test_selector:)
38
+ if @leader.try_lock
39
+ tests = test_selector.tests
40
+ @local_results.size = tests.size
41
+ if tests.empty?
42
+ queue.close
43
+ else
44
+ tests.each { |test| queue << test }
45
+ end
46
+ end
47
+ end
48
+
49
+ sig { override.params(reporter: AbstractReporter).void }
50
+ def consume(reporter:)
51
+ until queue.empty? && queue.closed?
52
+ enqueued_runnable = queue.pop
53
+ reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
54
+ result = enqueued_runnable.run
55
+
56
+ local_results.update_with_result(result)
57
+ local_results.acks += 1
58
+
59
+ reporter.record(result)
60
+
61
+ queue.close if local_results.completed?
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,387 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'redis'
5
+
6
+ module Minitest
7
+ module Distributed
8
+ module Coordinators
9
+ # The RedisCoordinator is an implementation of the test coordinator interface
10
+ # using a Redis stream + consumergroup for coordination.
11
+ #
12
+ # We assume a bunch of workers will be started at the same time. Every worker
13
+ # will try to become the leader by trying to create the consumergroup. Only one
14
+ # will succeed, which will then continue to populate the list of tests to run
15
+ # to the stream.
16
+ #
17
+ # AFter that, all workers will start consuming from the stream. They will first
18
+ # try to claim stale entries from other workers (determined by the `test_timeout`
19
+ # option), and process them tp to a maxumim of `max_attempts` attempts. Then,
20
+ # they will consume tests from the stream, run them, and ack them. This is done
21
+ # in batches to reduce load on Redis.
22
+ #
23
+ # Finally, when we have acked the same number of tests as we populated into the
24
+ # queue, the run is considered finished. The first worker to detect this will
25
+ # remove the consumergroup and the associated stream from Redis.
26
+ #
27
+ # If a worker starts for the same run_id while it is already considered completed,
28
+ # it will start a "retry run". It will find all the tests that failed/errored on
29
+ # the previous attempt, and schedule only those tests to be run, rather than the
30
+ # full test suite returned by the test selector. This can be useful to retry flaky
31
+ # tests. Subsequent workers coming online will join this worker to form a consumer
32
+ # group exactly as described above.
33
+ class RedisCoordinator
34
+ extend T::Sig
35
+ include CoordinatorInterface
36
+
37
+ sig { returns(Configuration) }
38
+ attr_reader :configuration
39
+
40
+ sig { returns(String) }
41
+ attr_reader :stream_key
42
+
43
+ sig { returns(String) }
44
+ attr_reader :group_name
45
+
46
+ sig { override.returns(ResultAggregate) }
47
+ attr_reader :local_results
48
+
49
+ sig { returns(T::Set[EnqueuedRunnable]) }
50
+ attr_reader :reclaimed_tests
51
+
52
+ sig { params(configuration: Configuration).void }
53
+ def initialize(configuration:)
54
+ @configuration = configuration
55
+
56
+ @redis = T.let(nil, T.nilable(Redis))
57
+ @stream_key = T.let(key('queue'), String)
58
+ @group_name = T.let('minitest-distributed', String)
59
+ @local_results = T.let(ResultAggregate.new, ResultAggregate)
60
+ @combined_results = T.let(nil, T.nilable(ResultAggregate))
61
+ @reclaimed_tests = T.let(Set.new, T::Set[EnqueuedRunnable])
62
+ end
63
+
64
+ sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
65
+ def register_reporters(reporter:, options:)
66
+ reporter << Reporters::RedisCoordinatorWarningsReporter.new(options[:io], options)
67
+ end
68
+
69
+ sig { override.returns(ResultAggregate) }
70
+ def combined_results
71
+ @combined_results ||= begin
72
+ stats_as_string = redis.mget(key('runs'), key('assertions'), key('passes'),
73
+ key('failures'), key('errors'), key('skips'), key('reruns'), key('acks'), key('size'))
74
+
75
+ ResultAggregate.new(
76
+ runs: Integer(stats_as_string.fetch(0) || 0),
77
+ assertions: Integer(stats_as_string.fetch(1) || 0),
78
+ passes: Integer(stats_as_string.fetch(2) || 0),
79
+ failures: Integer(stats_as_string.fetch(3) || 0),
80
+ errors: Integer(stats_as_string.fetch(4) || 0),
81
+ skips: Integer(stats_as_string.fetch(5) || 0),
82
+ reruns: Integer(stats_as_string.fetch(6) || 0),
83
+ acks: Integer(stats_as_string.fetch(7) || 0),
84
+
85
+ # In the case where we have no build szie number published yet, we initialize
86
+ # thesize of the test suite to be arbitrarity large, to make sure it is
87
+ # higher than the number of acks, so the run is not consider completed yet.
88
+ size: Integer(stats_as_string.fetch(8) || 2_147_483_647),
89
+ )
90
+ end
91
+ end
92
+
93
+ sig { override.params(test_selector: TestSelector).void }
94
+ def produce(test_selector:)
95
+ # Whoever ends up creating the consumer group will act as leader,
96
+ # and publish the list of tests to the stream.
97
+
98
+ begin
99
+ # When using `redis.multi`, the second DEL command gets executed even if the initial GROUP
100
+ # fails. This is bad, because only the leader should be issuing the DEL command.
101
+ # When using EVAL and a Lua script, the script aborts after the first XGROUP command
102
+ # fails, and the DEL never gets executed for followers.
103
+ redis.evalsha(
104
+ register_consumergroup_script,
105
+ keys: [stream_key, key('size'), key('acks')],
106
+ argv: [group_name],
107
+ )
108
+
109
+ rescue Redis::CommandError => ce
110
+ if ce.message.include?('BUSYGROUP')
111
+ # If Redis returns a BUSYGROUP error, it means that the consumer group already
112
+ # exists. In our case, it means that another worker managed to successfully
113
+ # run the XGROUP command, and will act as leader and publish the tests.
114
+ # This worker can simply move on the consumer mode.
115
+ return
116
+ else
117
+ raise
118
+ end
119
+ end
120
+
121
+ run_attempt, previous_failures, previous_errors, _deleted = redis.multi do
122
+ redis.incr(key('attempt'))
123
+ redis.lrange(key('failure_list'), 0, -1)
124
+ redis.lrange(key('error_list'), 0, -1)
125
+ redis.del(key('failure_list'), key('error_list'))
126
+ end
127
+
128
+ tests = if run_attempt == 1
129
+ # If this is the first attempt for this run ID, we will schedule the full
130
+ # test suite as returned by the test selector to run.
131
+ test_selector.tests
132
+ else
133
+ # For subsequent attempts, we check the list of previous failures and
134
+ # errors, and only schedule to re-run those tests. This allows for faster
135
+ # retries of potentially flaky tests.
136
+ (previous_failures + previous_errors).map do |test_to_retry|
137
+ EnqueuedRunnable.from_hash!(Marshal.load(test_to_retry))
138
+ end
139
+ end
140
+
141
+ # We set the `size` key to the number of tests we are planning to schedule.
142
+ # This will allow workers to tell when the run is done. We also adjust the
143
+ # number of failures and errors in case of a retry run.
144
+ adjust_combined_results(ResultAggregate.new(
145
+ size: tests.size,
146
+ failures: -previous_failures.length,
147
+ errors: -previous_errors.length,
148
+ reruns: previous_failures.length + previous_errors.length,
149
+ ))
150
+
151
+ # TODO: break this up in batches.
152
+ tests.each { |test| redis.xadd(stream_key, test.serialize) }
153
+ end
154
+
155
+ sig { override.params(reporter: AbstractReporter).void }
156
+ def consume(reporter:)
157
+ exponential_backoff = INITIAL_BACKOFF
158
+ loop do
159
+ # First, see if there are any pending tests from other workers to claim.
160
+ stale_runnables = claim_stale_runnables
161
+ stale_processed = process_batch(stale_runnables, reporter)
162
+
163
+ # Finally, try to process a regular batch of messages
164
+ fresh_runnables = claim_fresh_runnables(block: exponential_backoff)
165
+ fresh_processed = process_batch(fresh_runnables, reporter)
166
+
167
+ # If we have acked the same amount of tests as we were supposed to, the run
168
+ # is complete and we can exit our loop. Generally, only one worker will detect
169
+ # this condition. The pther workers will quit their consumer loop because the
170
+ # consumergroup will be deleted by the first worker, and their Redis commands
171
+ # will start to fail - see the rescue block below.
172
+ break if combined_results.completed?
173
+
174
+ # To make sure we don't end up in a busy loop overwhelming Redis with commands
175
+ # when there is no work to do, we increase the blocking time exponentially,
176
+ # and reset it to the initial value if we processed any messages
177
+ if stale_processed > 0 || fresh_processed > 0
178
+ exponential_backoff = INITIAL_BACKOFF
179
+ else
180
+ exponential_backoff <<= 1
181
+ end
182
+ end
183
+
184
+ cleanup
185
+ rescue Redis::CommandError => ce
186
+ if ce.message.start_with?('NOGROUP')
187
+ # When a redis conumer group commands fails with a NOGROUP error, we assume the
188
+ # consumer group was deleted by the first worker that detected the run is complete.
189
+ # So this worker can exit its loop as well.
190
+
191
+ # We have to invalidate the local combined_results cache so we get fresh
192
+ # final values from Redis when we try to report results in our summarizer.
193
+ @combined_results = nil
194
+ else
195
+ raise
196
+ end
197
+ end
198
+
199
+ private
200
+
201
+ sig { returns(Redis) }
202
+ def redis
203
+ @redis ||= Redis.new(url: configuration.coordinator_uri)
204
+ end
205
+
206
+ sig { returns(String) }
207
+ def ack_batch_script
208
+ @ack_batch_script = T.let(@ack_batch_script, T.nilable(String))
209
+ @ack_batch_script ||= redis.script(:load, <<~LUA)
210
+ local acked_ids, acked, i = {}, 0, 2
211
+ while ARGV[i] do
212
+ if redis.call('XACK', KEYS[1], ARGV[1], ARGV[i]) > 0 then
213
+ acked = acked + 1
214
+ acked_ids[acked] = ARGV[i]
215
+ end
216
+ i = i + 1
217
+ end
218
+ return acked_ids
219
+ LUA
220
+ end
221
+
222
+ sig { returns(String) }
223
+ def register_consumergroup_script
224
+ @register_consumergroup_script = T.let(@register_consumergroup_script, T.nilable(String))
225
+ @register_consumergroup_script ||= redis.script(:load, <<~LUA)
226
+ redis.call('XGROUP', 'CREATE', KEYS[1], ARGV[1], '0', 'MKSTREAM')
227
+ redis.call('DEL', KEYS[2], KEYS[3])
228
+ LUA
229
+ end
230
+
231
+ sig { params(block: Integer).returns(T::Array[EnqueuedRunnable]) }
232
+ def claim_fresh_runnables(block:)
233
+ result = redis.xreadgroup(group_name, configuration.worker_id, stream_key, '>',
234
+ block: block, count: configuration.test_batch_size)
235
+ EnqueuedRunnable.from_redis_stream_claim(result.fetch(stream_key, []))
236
+ end
237
+
238
+ sig { returns(T::Array[EnqueuedRunnable]) }
239
+ def claim_stale_runnables
240
+ # When we have to reclaim stale tests, those test are potentially too slow
241
+ # to run inside the test timeout. We only claim one test at a time in order
242
+ # to prevent the exact same batch from being too slow on repeated attempts,
243
+ # which would cause us to mark all the tests in that batch as failed.
244
+ #
245
+ # This has the side effect that for a retried test, the test timeout
246
+ # will be TEST_TIMEOUT * BATCH_SIZE in practice. This gives us a higher
247
+ # likelihood that the test will pass if the batch size > 1.
248
+ pending = redis.xpending(stream_key, group_name, '-', '+', 1)
249
+
250
+ # Every test is allowed to take test_timeout milliseconds. Because we process tests in
251
+ # batches, they should never be pending for TEST_TIMEOUT * BATCH_SIZE milliseconds.
252
+ # So, only try to claim messages older than that, with a bit of jitter.
253
+ max_idle_time = configuration.test_timeout * configuration.test_batch_size
254
+ max_idle_time_with_jitter = max_idle_time * rand(1.0...1.2)
255
+ to_claim = pending.each_with_object({}) do |message, hash|
256
+ if message['elapsed'] > max_idle_time_with_jitter
257
+ hash[message.fetch('entry_id')] = message
258
+ end
259
+ end
260
+
261
+ if to_claim.empty?
262
+ []
263
+ else
264
+ claimed = redis.xclaim(stream_key, group_name, configuration.worker_id, max_idle_time, to_claim.keys)
265
+ enqueued_runnables = EnqueuedRunnable.from_redis_stream_claim(claimed)
266
+ enqueued_runnables.each do |er|
267
+ # `count` will be set to the current attempt of a different worker that has timed out.
268
+ # The attempt we are going to try will be the next one, so add one.
269
+ attempt = to_claim.fetch(er.execution_id).fetch('count') + 1
270
+ if attempt > configuration.max_attempts
271
+ # If we exhaust our attempts, we will mark the test to immediately fail when it will be run next.
272
+ mark_runnable_to_fail_immediately(er)
273
+ else
274
+ reclaimed_tests << er
275
+ end
276
+ end
277
+
278
+ enqueued_runnables
279
+ end
280
+ end
281
+
282
+ sig { void }
283
+ def cleanup
284
+ redis.xgroup(:destroy, stream_key, group_name)
285
+ redis.del(stream_key)
286
+ rescue Redis::CommandError
287
+ # Apparently another consumer already removed the consumer group,
288
+ # so we can assume that all the Redis cleanup was completed.
289
+ end
290
+
291
+ sig { params(er: EnqueuedRunnable).void }
292
+ def mark_runnable_to_fail_immediately(er)
293
+ assertion = Minitest::Assertion.new(<<~EOM.chomp)
294
+ This test takes too long to run (> #{configuration.test_timeout}ms).
295
+
296
+ We have tried running this test #{configuration.max_attempts} on different workers, but every time the worker has not reported back a result within #{configuration.test_timeout}ms.
297
+ Try to make the test faster, or increase the test timeout.
298
+ EOM
299
+ assertion.set_backtrace(caller)
300
+ er.canned_failure = assertion
301
+ end
302
+
303
+ sig { params(results: ResultAggregate).void }
304
+ def adjust_combined_results(results)
305
+ updated = redis.multi do
306
+ redis.incrby(key('runs'), results.runs)
307
+ redis.incrby(key('assertions'), results.assertions)
308
+ redis.incrby(key('passes'), results.passes)
309
+ redis.incrby(key('failures'), results.failures)
310
+ redis.incrby(key('errors'), results.errors)
311
+ redis.incrby(key('skips'), results.skips)
312
+ redis.incrby(key('reruns'), results.reruns)
313
+ redis.incrby(key('acks'), results.acks)
314
+ redis.incrby(key('size'), results.size)
315
+ end
316
+
317
+ @combined_results = ResultAggregate.new(runs: updated[0], assertions: updated[1], passes: updated[2],
318
+ failures: updated[3], errors: updated[4], skips: updated[5], reruns: updated[6],
319
+ acks: updated[7], size: updated[8])
320
+ end
321
+
322
+ sig { params(name: String).returns(String) }
323
+ def key(name)
324
+ "minitest/#{configuration.run_id}/#{name}"
325
+ end
326
+
327
+ sig { params(batch: T::Array[EnqueuedRunnable], reporter: AbstractReporter).returns(Integer) }
328
+ def process_batch(batch, reporter)
329
+ to_be_acked = {}
330
+
331
+ batch.each do |enqueued_runnable|
332
+ local_results.size += 1
333
+ reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
334
+ result = enqueued_runnable.run
335
+
336
+ case (result_type = ResultType.of(result))
337
+ when ResultType::Passed
338
+ # noop
339
+ when ResultType::Skipped
340
+ redis.lpush(key('skip_list'), Marshal.dump(enqueued_runnable.serialize))
341
+ when ResultType::Failed
342
+ redis.lpush(key('failure_list'), Marshal.dump(enqueued_runnable.serialize))
343
+ when ResultType::Error
344
+ redis.lpush(key('error_list'), Marshal.dump(enqueued_runnable.serialize))
345
+ else
346
+ T.absurd(result_type)
347
+ end
348
+
349
+ local_results.update_with_result(result)
350
+ to_be_acked[enqueued_runnable.execution_id] = result
351
+ end
352
+
353
+ return 0 if to_be_acked.empty?
354
+
355
+ acked = redis.evalsha(
356
+ ack_batch_script,
357
+ keys: [stream_key],
358
+ argv: [group_name] + to_be_acked.keys
359
+ )
360
+
361
+ batch_results = ResultAggregate.new(acks: acked.length)
362
+ acked.each do |execution_id|
363
+ acked_result = to_be_acked.delete(execution_id)
364
+ reporter.record(acked_result)
365
+ batch_results.update_with_result(acked_result)
366
+ end
367
+
368
+ to_be_acked.each do |_execution_id, unacked_result|
369
+ # TODO: use custom assertion class.
370
+ discard_assertion = Minitest::Skip.new("The test result was discarded, " \
371
+ "because the test has been claimed another worker.")
372
+ discard_assertion.set_backtrace(caller)
373
+ unacked_result.failures = [discard_assertion]
374
+ reporter.record(unacked_result)
375
+ end
376
+
377
+ adjust_combined_results(batch_results)
378
+ local_results.acks += acked.length
379
+ acked.length
380
+ end
381
+
382
+ INITIAL_BACKOFF = 10 # milliseconds
383
+ private_constant :INITIAL_BACKOFF
384
+ end
385
+ end
386
+ end
387
+ end