minitest-distributed 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ruby.yml +48 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +63 -0
- data/.travis.yml +6 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +53 -0
- data/LICENSE.txt +21 -0
- data/README.md +115 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/rake +29 -0
- data/bin/rubocop +29 -0
- data/bin/setup +8 -0
- data/bin/srb +29 -0
- data/lib/minitest/distributed.rb +36 -0
- data/lib/minitest/distributed/configuration.rb +53 -0
- data/lib/minitest/distributed/coordinators/coordinator_interface.rb +29 -0
- data/lib/minitest/distributed/coordinators/memory_coordinator.rb +67 -0
- data/lib/minitest/distributed/coordinators/redis_coordinator.rb +387 -0
- data/lib/minitest/distributed/enqueued_runnable.rb +88 -0
- data/lib/minitest/distributed/filters/exclude_filter.rb +35 -0
- data/lib/minitest/distributed/filters/filter_interface.rb +25 -0
- data/lib/minitest/distributed/filters/include_filter.rb +35 -0
- data/lib/minitest/distributed/reporters/distributed_progress_reporter.rb +76 -0
- data/lib/minitest/distributed/reporters/distributed_summary_reporter.rb +48 -0
- data/lib/minitest/distributed/reporters/redis_coordinator_warnings_reporter.rb +61 -0
- data/lib/minitest/distributed/result_aggregate.rb +67 -0
- data/lib/minitest/distributed/result_type.rb +28 -0
- data/lib/minitest/distributed/test_runner.rb +37 -0
- data/lib/minitest/distributed/test_selector.rb +54 -0
- data/lib/minitest/distributed/version.rb +8 -0
- data/lib/minitest/distributed_plugin.rb +51 -0
- data/minitest-distributed.gemspec +50 -0
- data/sorbet/config +2 -0
- data/sorbet/rbi/minitest.rbi +238 -0
- data/sorbet/rbi/rbconfig.rbi +6 -0
- data/sorbet/rbi/redis.rbi +70 -0
- data/sorbet/rbi/winsize.rbi +7 -0
- metadata +142 -0
data/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("rake", "rake"))
|
data/bin/rubocop
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("rubocop", "rubocop"))
|
data/bin/setup
ADDED
data/bin/srb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'srb' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("sorbet", "srb"))
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'minitest'
|
5
|
+
require 'sorbet-runtime'
|
6
|
+
|
7
|
+
require "minitest/distributed/configuration"
|
8
|
+
require "minitest/distributed/test_runner"
|
9
|
+
require "minitest/distributed/test_selector"
|
10
|
+
require "minitest/distributed/enqueued_runnable"
|
11
|
+
require "minitest/distributed/result_type"
|
12
|
+
require "minitest/distributed/result_aggregate"
|
13
|
+
require "minitest/distributed/filters/filter_interface"
|
14
|
+
require "minitest/distributed/filters/include_filter"
|
15
|
+
require "minitest/distributed/filters/exclude_filter"
|
16
|
+
require "minitest/distributed/coordinators/coordinator_interface"
|
17
|
+
require "minitest/distributed/coordinators/memory_coordinator"
|
18
|
+
require "minitest/distributed/coordinators/redis_coordinator"
|
19
|
+
require "minitest/distributed/reporters/redis_coordinator_warnings_reporter"
|
20
|
+
require "minitest/distributed/reporters/distributed_progress_reporter"
|
21
|
+
require "minitest/distributed/reporters/distributed_summary_reporter"
|
22
|
+
|
23
|
+
module Minitest
|
24
|
+
module Distributed
|
25
|
+
class Error < StandardError; end
|
26
|
+
|
27
|
+
module TestRunnerPatch
|
28
|
+
extend T::Sig
|
29
|
+
|
30
|
+
sig { params(reporter: Minitest::AbstractReporter, options: T::Hash[Symbol, T.untyped]).void }
|
31
|
+
def __run(reporter, options)
|
32
|
+
TestRunner.new(options).run(reporter)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'uri'
|
5
|
+
require 'securerandom'
|
6
|
+
|
7
|
+
module Minitest
|
8
|
+
module Distributed
|
9
|
+
class Configuration < T::Struct
|
10
|
+
DEFAULT_BATCH_SIZE = 10
|
11
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
12
|
+
DEFAULT_TEST_TIMEOUT = 30_000 # milliseconds
|
13
|
+
|
14
|
+
class << self
|
15
|
+
extend T::Sig
|
16
|
+
|
17
|
+
sig { params(env: T::Hash[String, T.nilable(String)]).returns(T.attached_class) }
|
18
|
+
def from_env(env = ENV.to_h)
|
19
|
+
new(
|
20
|
+
coordinator_uri: URI(env['MINITEST_COORDINATOR'] || 'memory:'),
|
21
|
+
run_id: env['MINITEST_RUN_ID'] || SecureRandom.uuid,
|
22
|
+
worker_id: env['MINITEST_WORKER_ID'] || SecureRandom.uuid,
|
23
|
+
test_timeout: Integer(env['MINITEST_TEST_TIMEOUT'] || DEFAULT_TEST_TIMEOUT),
|
24
|
+
test_batch_size: Integer(env['MINITEST_TEST_BATCH_SIZE'] || DEFAULT_BATCH_SIZE),
|
25
|
+
max_attempts: Integer(env['MINITEST_MAX_ATTEMPTS'] || DEFAULT_MAX_ATTEMPTS),
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
extend T::Sig
|
31
|
+
|
32
|
+
prop :coordinator_uri, URI::Generic, default: URI('memory:')
|
33
|
+
prop :run_id, String, factory: -> { SecureRandom.uuid }
|
34
|
+
prop :worker_id, String, factory: -> { SecureRandom.uuid }
|
35
|
+
prop :test_timeout, Integer, default: DEFAULT_TEST_TIMEOUT
|
36
|
+
prop :test_batch_size, Integer, default: DEFAULT_BATCH_SIZE
|
37
|
+
prop :max_attempts, Integer, default: DEFAULT_MAX_ATTEMPTS
|
38
|
+
|
39
|
+
sig { returns(Coordinators::CoordinatorInterface) }
|
40
|
+
def coordinator
|
41
|
+
@coordinator = T.let(@coordinator, T.nilable(Coordinators::CoordinatorInterface))
|
42
|
+
@coordinator ||= case coordinator_uri.scheme
|
43
|
+
when 'redis'
|
44
|
+
Coordinators::RedisCoordinator.new(configuration: self)
|
45
|
+
when 'memory'
|
46
|
+
Coordinators::MemoryCoordinator.new(configuration: self)
|
47
|
+
else
|
48
|
+
raise NotImplementedError, "Unknown coordinator implementation: #{coordinator_uri.scheme}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Minitest
|
5
|
+
module Distributed
|
6
|
+
module Coordinators
|
7
|
+
module CoordinatorInterface
|
8
|
+
extend T::Sig
|
9
|
+
extend T::Helpers
|
10
|
+
interface!
|
11
|
+
|
12
|
+
sig { abstract.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
13
|
+
def register_reporters(reporter:, options:); end
|
14
|
+
|
15
|
+
sig { abstract.returns(ResultAggregate) }
|
16
|
+
def local_results; end
|
17
|
+
|
18
|
+
sig { abstract.returns(ResultAggregate) }
|
19
|
+
def combined_results; end
|
20
|
+
|
21
|
+
sig { abstract.params(test_selector: TestSelector).void }
|
22
|
+
def produce(test_selector:); end
|
23
|
+
|
24
|
+
sig { abstract.params(reporter: Minitest::AbstractReporter).void }
|
25
|
+
def consume(reporter:); end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Minitest
|
5
|
+
module Distributed
|
6
|
+
module Coordinators
|
7
|
+
class MemoryCoordinator
|
8
|
+
extend T::Sig
|
9
|
+
include CoordinatorInterface
|
10
|
+
|
11
|
+
sig { returns(Configuration) }
|
12
|
+
attr_reader :configuration
|
13
|
+
|
14
|
+
sig { returns(Queue) }
|
15
|
+
attr_reader :queue
|
16
|
+
|
17
|
+
sig { override.returns(ResultAggregate) }
|
18
|
+
attr_reader :local_results
|
19
|
+
|
20
|
+
alias_method :combined_results, :local_results
|
21
|
+
|
22
|
+
sig { params(configuration: Configuration).void }
|
23
|
+
def initialize(configuration:)
|
24
|
+
@configuration = configuration
|
25
|
+
|
26
|
+
@leader = T.let(Mutex.new, Mutex)
|
27
|
+
@queue = T.let(Queue.new, Queue)
|
28
|
+
@local_results = T.let(ResultAggregate.new, ResultAggregate)
|
29
|
+
end
|
30
|
+
|
31
|
+
sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
32
|
+
def register_reporters(reporter:, options:)
|
33
|
+
# No need for any additional reporters
|
34
|
+
end
|
35
|
+
|
36
|
+
sig { override.params(test_selector: TestSelector).void }
|
37
|
+
def produce(test_selector:)
|
38
|
+
if @leader.try_lock
|
39
|
+
tests = test_selector.tests
|
40
|
+
@local_results.size = tests.size
|
41
|
+
if tests.empty?
|
42
|
+
queue.close
|
43
|
+
else
|
44
|
+
tests.each { |test| queue << test }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
sig { override.params(reporter: AbstractReporter).void }
|
50
|
+
def consume(reporter:)
|
51
|
+
until queue.empty? && queue.closed?
|
52
|
+
enqueued_runnable = queue.pop
|
53
|
+
reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
|
54
|
+
result = enqueued_runnable.run
|
55
|
+
|
56
|
+
local_results.update_with_result(result)
|
57
|
+
local_results.acks += 1
|
58
|
+
|
59
|
+
reporter.record(result)
|
60
|
+
|
61
|
+
queue.close if local_results.completed?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
module Minitest
|
7
|
+
module Distributed
|
8
|
+
module Coordinators
|
9
|
+
# The RedisCoordinator is an implementation of the test coordinator interface
|
10
|
+
# using a Redis stream + consumergroup for coordination.
|
11
|
+
#
|
12
|
+
# We assume a bunch of workers will be started at the same time. Every worker
|
13
|
+
# will try to become the leader by trying to create the consumergroup. Only one
|
14
|
+
# will succeed, which will then continue to populate the list of tests to run
|
15
|
+
# to the stream.
|
16
|
+
#
|
17
|
+
# AFter that, all workers will start consuming from the stream. They will first
|
18
|
+
# try to claim stale entries from other workers (determined by the `test_timeout`
|
19
|
+
# option), and process them tp to a maxumim of `max_attempts` attempts. Then,
|
20
|
+
# they will consume tests from the stream, run them, and ack them. This is done
|
21
|
+
# in batches to reduce load on Redis.
|
22
|
+
#
|
23
|
+
# Finally, when we have acked the same number of tests as we populated into the
|
24
|
+
# queue, the run is considered finished. The first worker to detect this will
|
25
|
+
# remove the consumergroup and the associated stream from Redis.
|
26
|
+
#
|
27
|
+
# If a worker starts for the same run_id while it is already considered completed,
|
28
|
+
# it will start a "retry run". It will find all the tests that failed/errored on
|
29
|
+
# the previous attempt, and schedule only those tests to be run, rather than the
|
30
|
+
# full test suite returned by the test selector. This can be useful to retry flaky
|
31
|
+
# tests. Subsequent workers coming online will join this worker to form a consumer
|
32
|
+
# group exactly as described above.
|
33
|
+
class RedisCoordinator
|
34
|
+
extend T::Sig
|
35
|
+
include CoordinatorInterface
|
36
|
+
|
37
|
+
sig { returns(Configuration) }
|
38
|
+
attr_reader :configuration
|
39
|
+
|
40
|
+
sig { returns(String) }
|
41
|
+
attr_reader :stream_key
|
42
|
+
|
43
|
+
sig { returns(String) }
|
44
|
+
attr_reader :group_name
|
45
|
+
|
46
|
+
sig { override.returns(ResultAggregate) }
|
47
|
+
attr_reader :local_results
|
48
|
+
|
49
|
+
sig { returns(T::Set[EnqueuedRunnable]) }
|
50
|
+
attr_reader :reclaimed_tests
|
51
|
+
|
52
|
+
sig { params(configuration: Configuration).void }
|
53
|
+
def initialize(configuration:)
|
54
|
+
@configuration = configuration
|
55
|
+
|
56
|
+
@redis = T.let(nil, T.nilable(Redis))
|
57
|
+
@stream_key = T.let(key('queue'), String)
|
58
|
+
@group_name = T.let('minitest-distributed', String)
|
59
|
+
@local_results = T.let(ResultAggregate.new, ResultAggregate)
|
60
|
+
@combined_results = T.let(nil, T.nilable(ResultAggregate))
|
61
|
+
@reclaimed_tests = T.let(Set.new, T::Set[EnqueuedRunnable])
|
62
|
+
end
|
63
|
+
|
64
|
+
sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
65
|
+
def register_reporters(reporter:, options:)
|
66
|
+
reporter << Reporters::RedisCoordinatorWarningsReporter.new(options[:io], options)
|
67
|
+
end
|
68
|
+
|
69
|
+
sig { override.returns(ResultAggregate) }
|
70
|
+
def combined_results
|
71
|
+
@combined_results ||= begin
|
72
|
+
stats_as_string = redis.mget(key('runs'), key('assertions'), key('passes'),
|
73
|
+
key('failures'), key('errors'), key('skips'), key('reruns'), key('acks'), key('size'))
|
74
|
+
|
75
|
+
ResultAggregate.new(
|
76
|
+
runs: Integer(stats_as_string.fetch(0) || 0),
|
77
|
+
assertions: Integer(stats_as_string.fetch(1) || 0),
|
78
|
+
passes: Integer(stats_as_string.fetch(2) || 0),
|
79
|
+
failures: Integer(stats_as_string.fetch(3) || 0),
|
80
|
+
errors: Integer(stats_as_string.fetch(4) || 0),
|
81
|
+
skips: Integer(stats_as_string.fetch(5) || 0),
|
82
|
+
reruns: Integer(stats_as_string.fetch(6) || 0),
|
83
|
+
acks: Integer(stats_as_string.fetch(7) || 0),
|
84
|
+
|
85
|
+
# In the case where we have no build szie number published yet, we initialize
|
86
|
+
# thesize of the test suite to be arbitrarity large, to make sure it is
|
87
|
+
# higher than the number of acks, so the run is not consider completed yet.
|
88
|
+
size: Integer(stats_as_string.fetch(8) || 2_147_483_647),
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
sig { override.params(test_selector: TestSelector).void }
|
94
|
+
def produce(test_selector:)
|
95
|
+
# Whoever ends up creating the consumer group will act as leader,
|
96
|
+
# and publish the list of tests to the stream.
|
97
|
+
|
98
|
+
begin
|
99
|
+
# When using `redis.multi`, the second DEL command gets executed even if the initial GROUP
|
100
|
+
# fails. This is bad, because only the leader should be issuing the DEL command.
|
101
|
+
# When using EVAL and a Lua script, the script aborts after the first XGROUP command
|
102
|
+
# fails, and the DEL never gets executed for followers.
|
103
|
+
redis.evalsha(
|
104
|
+
register_consumergroup_script,
|
105
|
+
keys: [stream_key, key('size'), key('acks')],
|
106
|
+
argv: [group_name],
|
107
|
+
)
|
108
|
+
|
109
|
+
rescue Redis::CommandError => ce
|
110
|
+
if ce.message.include?('BUSYGROUP')
|
111
|
+
# If Redis returns a BUSYGROUP error, it means that the consumer group already
|
112
|
+
# exists. In our case, it means that another worker managed to successfully
|
113
|
+
# run the XGROUP command, and will act as leader and publish the tests.
|
114
|
+
# This worker can simply move on the consumer mode.
|
115
|
+
return
|
116
|
+
else
|
117
|
+
raise
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
run_attempt, previous_failures, previous_errors, _deleted = redis.multi do
|
122
|
+
redis.incr(key('attempt'))
|
123
|
+
redis.lrange(key('failure_list'), 0, -1)
|
124
|
+
redis.lrange(key('error_list'), 0, -1)
|
125
|
+
redis.del(key('failure_list'), key('error_list'))
|
126
|
+
end
|
127
|
+
|
128
|
+
tests = if run_attempt == 1
|
129
|
+
# If this is the first attempt for this run ID, we will schedule the full
|
130
|
+
# test suite as returned by the test selector to run.
|
131
|
+
test_selector.tests
|
132
|
+
else
|
133
|
+
# For subsequent attempts, we check the list of previous failures and
|
134
|
+
# errors, and only schedule to re-run those tests. This allows for faster
|
135
|
+
# retries of potentially flaky tests.
|
136
|
+
(previous_failures + previous_errors).map do |test_to_retry|
|
137
|
+
EnqueuedRunnable.from_hash!(Marshal.load(test_to_retry))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# We set the `size` key to the number of tests we are planning to schedule.
|
142
|
+
# This will allow workers to tell when the run is done. We also adjust the
|
143
|
+
# number of failures and errors in case of a retry run.
|
144
|
+
adjust_combined_results(ResultAggregate.new(
|
145
|
+
size: tests.size,
|
146
|
+
failures: -previous_failures.length,
|
147
|
+
errors: -previous_errors.length,
|
148
|
+
reruns: previous_failures.length + previous_errors.length,
|
149
|
+
))
|
150
|
+
|
151
|
+
# TODO: break this up in batches.
|
152
|
+
tests.each { |test| redis.xadd(stream_key, test.serialize) }
|
153
|
+
end
|
154
|
+
|
155
|
+
sig { override.params(reporter: AbstractReporter).void }
|
156
|
+
def consume(reporter:)
|
157
|
+
exponential_backoff = INITIAL_BACKOFF
|
158
|
+
loop do
|
159
|
+
# First, see if there are any pending tests from other workers to claim.
|
160
|
+
stale_runnables = claim_stale_runnables
|
161
|
+
stale_processed = process_batch(stale_runnables, reporter)
|
162
|
+
|
163
|
+
# Finally, try to process a regular batch of messages
|
164
|
+
fresh_runnables = claim_fresh_runnables(block: exponential_backoff)
|
165
|
+
fresh_processed = process_batch(fresh_runnables, reporter)
|
166
|
+
|
167
|
+
# If we have acked the same amount of tests as we were supposed to, the run
|
168
|
+
# is complete and we can exit our loop. Generally, only one worker will detect
|
169
|
+
# this condition. The pther workers will quit their consumer loop because the
|
170
|
+
# consumergroup will be deleted by the first worker, and their Redis commands
|
171
|
+
# will start to fail - see the rescue block below.
|
172
|
+
break if combined_results.completed?
|
173
|
+
|
174
|
+
# To make sure we don't end up in a busy loop overwhelming Redis with commands
|
175
|
+
# when there is no work to do, we increase the blocking time exponentially,
|
176
|
+
# and reset it to the initial value if we processed any messages
|
177
|
+
if stale_processed > 0 || fresh_processed > 0
|
178
|
+
exponential_backoff = INITIAL_BACKOFF
|
179
|
+
else
|
180
|
+
exponential_backoff <<= 1
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
cleanup
|
185
|
+
rescue Redis::CommandError => ce
|
186
|
+
if ce.message.start_with?('NOGROUP')
|
187
|
+
# When a redis conumer group commands fails with a NOGROUP error, we assume the
|
188
|
+
# consumer group was deleted by the first worker that detected the run is complete.
|
189
|
+
# So this worker can exit its loop as well.
|
190
|
+
|
191
|
+
# We have to invalidate the local combined_results cache so we get fresh
|
192
|
+
# final values from Redis when we try to report results in our summarizer.
|
193
|
+
@combined_results = nil
|
194
|
+
else
|
195
|
+
raise
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
sig { returns(Redis) }
|
202
|
+
def redis
|
203
|
+
@redis ||= Redis.new(url: configuration.coordinator_uri)
|
204
|
+
end
|
205
|
+
|
206
|
+
sig { returns(String) }
|
207
|
+
def ack_batch_script
|
208
|
+
@ack_batch_script = T.let(@ack_batch_script, T.nilable(String))
|
209
|
+
@ack_batch_script ||= redis.script(:load, <<~LUA)
|
210
|
+
local acked_ids, acked, i = {}, 0, 2
|
211
|
+
while ARGV[i] do
|
212
|
+
if redis.call('XACK', KEYS[1], ARGV[1], ARGV[i]) > 0 then
|
213
|
+
acked = acked + 1
|
214
|
+
acked_ids[acked] = ARGV[i]
|
215
|
+
end
|
216
|
+
i = i + 1
|
217
|
+
end
|
218
|
+
return acked_ids
|
219
|
+
LUA
|
220
|
+
end
|
221
|
+
|
222
|
+
sig { returns(String) }
|
223
|
+
def register_consumergroup_script
|
224
|
+
@register_consumergroup_script = T.let(@register_consumergroup_script, T.nilable(String))
|
225
|
+
@register_consumergroup_script ||= redis.script(:load, <<~LUA)
|
226
|
+
redis.call('XGROUP', 'CREATE', KEYS[1], ARGV[1], '0', 'MKSTREAM')
|
227
|
+
redis.call('DEL', KEYS[2], KEYS[3])
|
228
|
+
LUA
|
229
|
+
end
|
230
|
+
|
231
|
+
sig { params(block: Integer).returns(T::Array[EnqueuedRunnable]) }
|
232
|
+
def claim_fresh_runnables(block:)
|
233
|
+
result = redis.xreadgroup(group_name, configuration.worker_id, stream_key, '>',
|
234
|
+
block: block, count: configuration.test_batch_size)
|
235
|
+
EnqueuedRunnable.from_redis_stream_claim(result.fetch(stream_key, []))
|
236
|
+
end
|
237
|
+
|
238
|
+
sig { returns(T::Array[EnqueuedRunnable]) }
|
239
|
+
def claim_stale_runnables
|
240
|
+
# When we have to reclaim stale tests, those test are potentially too slow
|
241
|
+
# to run inside the test timeout. We only claim one test at a time in order
|
242
|
+
# to prevent the exact same batch from being too slow on repeated attempts,
|
243
|
+
# which would cause us to mark all the tests in that batch as failed.
|
244
|
+
#
|
245
|
+
# This has the side effect that for a retried test, the test timeout
|
246
|
+
# will be TEST_TIMEOUT * BATCH_SIZE in practice. This gives us a higher
|
247
|
+
# likelihood that the test will pass if the batch size > 1.
|
248
|
+
pending = redis.xpending(stream_key, group_name, '-', '+', 1)
|
249
|
+
|
250
|
+
# Every test is allowed to take test_timeout milliseconds. Because we process tests in
|
251
|
+
# batches, they should never be pending for TEST_TIMEOUT * BATCH_SIZE milliseconds.
|
252
|
+
# So, only try to claim messages older than that, with a bit of jitter.
|
253
|
+
max_idle_time = configuration.test_timeout * configuration.test_batch_size
|
254
|
+
max_idle_time_with_jitter = max_idle_time * rand(1.0...1.2)
|
255
|
+
to_claim = pending.each_with_object({}) do |message, hash|
|
256
|
+
if message['elapsed'] > max_idle_time_with_jitter
|
257
|
+
hash[message.fetch('entry_id')] = message
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
if to_claim.empty?
|
262
|
+
[]
|
263
|
+
else
|
264
|
+
claimed = redis.xclaim(stream_key, group_name, configuration.worker_id, max_idle_time, to_claim.keys)
|
265
|
+
enqueued_runnables = EnqueuedRunnable.from_redis_stream_claim(claimed)
|
266
|
+
enqueued_runnables.each do |er|
|
267
|
+
# `count` will be set to the current attempt of a different worker that has timed out.
|
268
|
+
# The attempt we are going to try will be the next one, so add one.
|
269
|
+
attempt = to_claim.fetch(er.execution_id).fetch('count') + 1
|
270
|
+
if attempt > configuration.max_attempts
|
271
|
+
# If we exhaust our attempts, we will mark the test to immediately fail when it will be run next.
|
272
|
+
mark_runnable_to_fail_immediately(er)
|
273
|
+
else
|
274
|
+
reclaimed_tests << er
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
enqueued_runnables
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
sig { void }
|
283
|
+
def cleanup
|
284
|
+
redis.xgroup(:destroy, stream_key, group_name)
|
285
|
+
redis.del(stream_key)
|
286
|
+
rescue Redis::CommandError
|
287
|
+
# Apparently another consumer already removed the consumer group,
|
288
|
+
# so we can assume that all the Redis cleanup was completed.
|
289
|
+
end
|
290
|
+
|
291
|
+
sig { params(er: EnqueuedRunnable).void }
|
292
|
+
def mark_runnable_to_fail_immediately(er)
|
293
|
+
assertion = Minitest::Assertion.new(<<~EOM.chomp)
|
294
|
+
This test takes too long to run (> #{configuration.test_timeout}ms).
|
295
|
+
|
296
|
+
We have tried running this test #{configuration.max_attempts} on different workers, but every time the worker has not reported back a result within #{configuration.test_timeout}ms.
|
297
|
+
Try to make the test faster, or increase the test timeout.
|
298
|
+
EOM
|
299
|
+
assertion.set_backtrace(caller)
|
300
|
+
er.canned_failure = assertion
|
301
|
+
end
|
302
|
+
|
303
|
+
sig { params(results: ResultAggregate).void }
|
304
|
+
def adjust_combined_results(results)
|
305
|
+
updated = redis.multi do
|
306
|
+
redis.incrby(key('runs'), results.runs)
|
307
|
+
redis.incrby(key('assertions'), results.assertions)
|
308
|
+
redis.incrby(key('passes'), results.passes)
|
309
|
+
redis.incrby(key('failures'), results.failures)
|
310
|
+
redis.incrby(key('errors'), results.errors)
|
311
|
+
redis.incrby(key('skips'), results.skips)
|
312
|
+
redis.incrby(key('reruns'), results.reruns)
|
313
|
+
redis.incrby(key('acks'), results.acks)
|
314
|
+
redis.incrby(key('size'), results.size)
|
315
|
+
end
|
316
|
+
|
317
|
+
@combined_results = ResultAggregate.new(runs: updated[0], assertions: updated[1], passes: updated[2],
|
318
|
+
failures: updated[3], errors: updated[4], skips: updated[5], reruns: updated[6],
|
319
|
+
acks: updated[7], size: updated[8])
|
320
|
+
end
|
321
|
+
|
322
|
+
sig { params(name: String).returns(String) }
|
323
|
+
def key(name)
|
324
|
+
"minitest/#{configuration.run_id}/#{name}"
|
325
|
+
end
|
326
|
+
|
327
|
+
sig { params(batch: T::Array[EnqueuedRunnable], reporter: AbstractReporter).returns(Integer) }
|
328
|
+
def process_batch(batch, reporter)
|
329
|
+
to_be_acked = {}
|
330
|
+
|
331
|
+
batch.each do |enqueued_runnable|
|
332
|
+
local_results.size += 1
|
333
|
+
reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
|
334
|
+
result = enqueued_runnable.run
|
335
|
+
|
336
|
+
case (result_type = ResultType.of(result))
|
337
|
+
when ResultType::Passed
|
338
|
+
# noop
|
339
|
+
when ResultType::Skipped
|
340
|
+
redis.lpush(key('skip_list'), Marshal.dump(enqueued_runnable.serialize))
|
341
|
+
when ResultType::Failed
|
342
|
+
redis.lpush(key('failure_list'), Marshal.dump(enqueued_runnable.serialize))
|
343
|
+
when ResultType::Error
|
344
|
+
redis.lpush(key('error_list'), Marshal.dump(enqueued_runnable.serialize))
|
345
|
+
else
|
346
|
+
T.absurd(result_type)
|
347
|
+
end
|
348
|
+
|
349
|
+
local_results.update_with_result(result)
|
350
|
+
to_be_acked[enqueued_runnable.execution_id] = result
|
351
|
+
end
|
352
|
+
|
353
|
+
return 0 if to_be_acked.empty?
|
354
|
+
|
355
|
+
acked = redis.evalsha(
|
356
|
+
ack_batch_script,
|
357
|
+
keys: [stream_key],
|
358
|
+
argv: [group_name] + to_be_acked.keys
|
359
|
+
)
|
360
|
+
|
361
|
+
batch_results = ResultAggregate.new(acks: acked.length)
|
362
|
+
acked.each do |execution_id|
|
363
|
+
acked_result = to_be_acked.delete(execution_id)
|
364
|
+
reporter.record(acked_result)
|
365
|
+
batch_results.update_with_result(acked_result)
|
366
|
+
end
|
367
|
+
|
368
|
+
to_be_acked.each do |_execution_id, unacked_result|
|
369
|
+
# TODO: use custom assertion class.
|
370
|
+
discard_assertion = Minitest::Skip.new("The test result was discarded, " \
|
371
|
+
"because the test has been claimed another worker.")
|
372
|
+
discard_assertion.set_backtrace(caller)
|
373
|
+
unacked_result.failures = [discard_assertion]
|
374
|
+
reporter.record(unacked_result)
|
375
|
+
end
|
376
|
+
|
377
|
+
adjust_combined_results(batch_results)
|
378
|
+
local_results.acks += acked.length
|
379
|
+
acked.length
|
380
|
+
end
|
381
|
+
|
382
|
+
INITIAL_BACKOFF = 10 # milliseconds
|
383
|
+
private_constant :INITIAL_BACKOFF
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|