minitest-distributed 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/ruby.yml +48 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +63 -0
- data/.travis.yml +6 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +53 -0
- data/LICENSE.txt +21 -0
- data/README.md +115 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/rake +29 -0
- data/bin/rubocop +29 -0
- data/bin/setup +8 -0
- data/bin/srb +29 -0
- data/lib/minitest/distributed.rb +36 -0
- data/lib/minitest/distributed/configuration.rb +53 -0
- data/lib/minitest/distributed/coordinators/coordinator_interface.rb +29 -0
- data/lib/minitest/distributed/coordinators/memory_coordinator.rb +67 -0
- data/lib/minitest/distributed/coordinators/redis_coordinator.rb +387 -0
- data/lib/minitest/distributed/enqueued_runnable.rb +88 -0
- data/lib/minitest/distributed/filters/exclude_filter.rb +35 -0
- data/lib/minitest/distributed/filters/filter_interface.rb +25 -0
- data/lib/minitest/distributed/filters/include_filter.rb +35 -0
- data/lib/minitest/distributed/reporters/distributed_progress_reporter.rb +76 -0
- data/lib/minitest/distributed/reporters/distributed_summary_reporter.rb +48 -0
- data/lib/minitest/distributed/reporters/redis_coordinator_warnings_reporter.rb +61 -0
- data/lib/minitest/distributed/result_aggregate.rb +67 -0
- data/lib/minitest/distributed/result_type.rb +28 -0
- data/lib/minitest/distributed/test_runner.rb +37 -0
- data/lib/minitest/distributed/test_selector.rb +54 -0
- data/lib/minitest/distributed/version.rb +8 -0
- data/lib/minitest/distributed_plugin.rb +51 -0
- data/minitest-distributed.gemspec +50 -0
- data/sorbet/config +2 -0
- data/sorbet/rbi/minitest.rbi +238 -0
- data/sorbet/rbi/rbconfig.rbi +6 -0
- data/sorbet/rbi/redis.rbi +70 -0
- data/sorbet/rbi/winsize.rbi +7 -0
- metadata +142 -0
data/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("rake", "rake"))
|
data/bin/rubocop
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("rubocop", "rubocop"))
|
data/bin/setup
ADDED
data/bin/srb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'srb' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load(Gem.bin_path("sorbet", "srb"))
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'minitest'
|
5
|
+
require 'sorbet-runtime'
|
6
|
+
|
7
|
+
require "minitest/distributed/configuration"
|
8
|
+
require "minitest/distributed/test_runner"
|
9
|
+
require "minitest/distributed/test_selector"
|
10
|
+
require "minitest/distributed/enqueued_runnable"
|
11
|
+
require "minitest/distributed/result_type"
|
12
|
+
require "minitest/distributed/result_aggregate"
|
13
|
+
require "minitest/distributed/filters/filter_interface"
|
14
|
+
require "minitest/distributed/filters/include_filter"
|
15
|
+
require "minitest/distributed/filters/exclude_filter"
|
16
|
+
require "minitest/distributed/coordinators/coordinator_interface"
|
17
|
+
require "minitest/distributed/coordinators/memory_coordinator"
|
18
|
+
require "minitest/distributed/coordinators/redis_coordinator"
|
19
|
+
require "minitest/distributed/reporters/redis_coordinator_warnings_reporter"
|
20
|
+
require "minitest/distributed/reporters/distributed_progress_reporter"
|
21
|
+
require "minitest/distributed/reporters/distributed_summary_reporter"
|
22
|
+
|
23
|
+
module Minitest
|
24
|
+
module Distributed
|
25
|
+
class Error < StandardError; end
|
26
|
+
|
27
|
+
module TestRunnerPatch
|
28
|
+
extend T::Sig
|
29
|
+
|
30
|
+
sig { params(reporter: Minitest::AbstractReporter, options: T::Hash[Symbol, T.untyped]).void }
|
31
|
+
def __run(reporter, options)
|
32
|
+
TestRunner.new(options).run(reporter)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'uri'
|
5
|
+
require 'securerandom'
|
6
|
+
|
7
|
+
module Minitest
|
8
|
+
module Distributed
|
9
|
+
class Configuration < T::Struct
|
10
|
+
DEFAULT_BATCH_SIZE = 10
|
11
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
12
|
+
DEFAULT_TEST_TIMEOUT = 30_000 # milliseconds
|
13
|
+
|
14
|
+
class << self
|
15
|
+
extend T::Sig
|
16
|
+
|
17
|
+
sig { params(env: T::Hash[String, T.nilable(String)]).returns(T.attached_class) }
|
18
|
+
def from_env(env = ENV.to_h)
|
19
|
+
new(
|
20
|
+
coordinator_uri: URI(env['MINITEST_COORDINATOR'] || 'memory:'),
|
21
|
+
run_id: env['MINITEST_RUN_ID'] || SecureRandom.uuid,
|
22
|
+
worker_id: env['MINITEST_WORKER_ID'] || SecureRandom.uuid,
|
23
|
+
test_timeout: Integer(env['MINITEST_TEST_TIMEOUT'] || DEFAULT_TEST_TIMEOUT),
|
24
|
+
test_batch_size: Integer(env['MINITEST_TEST_BATCH_SIZE'] || DEFAULT_BATCH_SIZE),
|
25
|
+
max_attempts: Integer(env['MINITEST_MAX_ATTEMPTS'] || DEFAULT_MAX_ATTEMPTS),
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
extend T::Sig
|
31
|
+
|
32
|
+
prop :coordinator_uri, URI::Generic, default: URI('memory:')
|
33
|
+
prop :run_id, String, factory: -> { SecureRandom.uuid }
|
34
|
+
prop :worker_id, String, factory: -> { SecureRandom.uuid }
|
35
|
+
prop :test_timeout, Integer, default: DEFAULT_TEST_TIMEOUT
|
36
|
+
prop :test_batch_size, Integer, default: DEFAULT_BATCH_SIZE
|
37
|
+
prop :max_attempts, Integer, default: DEFAULT_MAX_ATTEMPTS
|
38
|
+
|
39
|
+
sig { returns(Coordinators::CoordinatorInterface) }
|
40
|
+
def coordinator
|
41
|
+
@coordinator = T.let(@coordinator, T.nilable(Coordinators::CoordinatorInterface))
|
42
|
+
@coordinator ||= case coordinator_uri.scheme
|
43
|
+
when 'redis'
|
44
|
+
Coordinators::RedisCoordinator.new(configuration: self)
|
45
|
+
when 'memory'
|
46
|
+
Coordinators::MemoryCoordinator.new(configuration: self)
|
47
|
+
else
|
48
|
+
raise NotImplementedError, "Unknown coordinator implementation: #{coordinator_uri.scheme}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Minitest
|
5
|
+
module Distributed
|
6
|
+
module Coordinators
|
7
|
+
module CoordinatorInterface
|
8
|
+
extend T::Sig
|
9
|
+
extend T::Helpers
|
10
|
+
interface!
|
11
|
+
|
12
|
+
sig { abstract.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
13
|
+
def register_reporters(reporter:, options:); end
|
14
|
+
|
15
|
+
sig { abstract.returns(ResultAggregate) }
|
16
|
+
def local_results; end
|
17
|
+
|
18
|
+
sig { abstract.returns(ResultAggregate) }
|
19
|
+
def combined_results; end
|
20
|
+
|
21
|
+
sig { abstract.params(test_selector: TestSelector).void }
|
22
|
+
def produce(test_selector:); end
|
23
|
+
|
24
|
+
sig { abstract.params(reporter: Minitest::AbstractReporter).void }
|
25
|
+
def consume(reporter:); end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Minitest
|
5
|
+
module Distributed
|
6
|
+
module Coordinators
|
7
|
+
class MemoryCoordinator
|
8
|
+
extend T::Sig
|
9
|
+
include CoordinatorInterface
|
10
|
+
|
11
|
+
sig { returns(Configuration) }
|
12
|
+
attr_reader :configuration
|
13
|
+
|
14
|
+
sig { returns(Queue) }
|
15
|
+
attr_reader :queue
|
16
|
+
|
17
|
+
sig { override.returns(ResultAggregate) }
|
18
|
+
attr_reader :local_results
|
19
|
+
|
20
|
+
alias_method :combined_results, :local_results
|
21
|
+
|
22
|
+
sig { params(configuration: Configuration).void }
|
23
|
+
def initialize(configuration:)
|
24
|
+
@configuration = configuration
|
25
|
+
|
26
|
+
@leader = T.let(Mutex.new, Mutex)
|
27
|
+
@queue = T.let(Queue.new, Queue)
|
28
|
+
@local_results = T.let(ResultAggregate.new, ResultAggregate)
|
29
|
+
end
|
30
|
+
|
31
|
+
sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
32
|
+
def register_reporters(reporter:, options:)
|
33
|
+
# No need for any additional reporters
|
34
|
+
end
|
35
|
+
|
36
|
+
sig { override.params(test_selector: TestSelector).void }
|
37
|
+
def produce(test_selector:)
|
38
|
+
if @leader.try_lock
|
39
|
+
tests = test_selector.tests
|
40
|
+
@local_results.size = tests.size
|
41
|
+
if tests.empty?
|
42
|
+
queue.close
|
43
|
+
else
|
44
|
+
tests.each { |test| queue << test }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
sig { override.params(reporter: AbstractReporter).void }
|
50
|
+
def consume(reporter:)
|
51
|
+
until queue.empty? && queue.closed?
|
52
|
+
enqueued_runnable = queue.pop
|
53
|
+
reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
|
54
|
+
result = enqueued_runnable.run
|
55
|
+
|
56
|
+
local_results.update_with_result(result)
|
57
|
+
local_results.acks += 1
|
58
|
+
|
59
|
+
reporter.record(result)
|
60
|
+
|
61
|
+
queue.close if local_results.completed?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
module Minitest
|
7
|
+
module Distributed
|
8
|
+
module Coordinators
|
9
|
+
# The RedisCoordinator is an implementation of the test coordinator interface
|
10
|
+
# using a Redis stream + consumergroup for coordination.
|
11
|
+
#
|
12
|
+
# We assume a bunch of workers will be started at the same time. Every worker
|
13
|
+
# will try to become the leader by trying to create the consumergroup. Only one
|
14
|
+
# will succeed, which will then continue to populate the list of tests to run
|
15
|
+
# to the stream.
|
16
|
+
#
|
17
|
+
# AFter that, all workers will start consuming from the stream. They will first
|
18
|
+
# try to claim stale entries from other workers (determined by the `test_timeout`
|
19
|
+
# option), and process them tp to a maxumim of `max_attempts` attempts. Then,
|
20
|
+
# they will consume tests from the stream, run them, and ack them. This is done
|
21
|
+
# in batches to reduce load on Redis.
|
22
|
+
#
|
23
|
+
# Finally, when we have acked the same number of tests as we populated into the
|
24
|
+
# queue, the run is considered finished. The first worker to detect this will
|
25
|
+
# remove the consumergroup and the associated stream from Redis.
|
26
|
+
#
|
27
|
+
# If a worker starts for the same run_id while it is already considered completed,
|
28
|
+
# it will start a "retry run". It will find all the tests that failed/errored on
|
29
|
+
# the previous attempt, and schedule only those tests to be run, rather than the
|
30
|
+
# full test suite returned by the test selector. This can be useful to retry flaky
|
31
|
+
# tests. Subsequent workers coming online will join this worker to form a consumer
|
32
|
+
# group exactly as described above.
|
33
|
+
class RedisCoordinator
|
34
|
+
extend T::Sig
|
35
|
+
include CoordinatorInterface
|
36
|
+
|
37
|
+
sig { returns(Configuration) }
|
38
|
+
attr_reader :configuration
|
39
|
+
|
40
|
+
sig { returns(String) }
|
41
|
+
attr_reader :stream_key
|
42
|
+
|
43
|
+
sig { returns(String) }
|
44
|
+
attr_reader :group_name
|
45
|
+
|
46
|
+
sig { override.returns(ResultAggregate) }
|
47
|
+
attr_reader :local_results
|
48
|
+
|
49
|
+
sig { returns(T::Set[EnqueuedRunnable]) }
|
50
|
+
attr_reader :reclaimed_tests
|
51
|
+
|
52
|
+
sig { params(configuration: Configuration).void }
|
53
|
+
def initialize(configuration:)
|
54
|
+
@configuration = configuration
|
55
|
+
|
56
|
+
@redis = T.let(nil, T.nilable(Redis))
|
57
|
+
@stream_key = T.let(key('queue'), String)
|
58
|
+
@group_name = T.let('minitest-distributed', String)
|
59
|
+
@local_results = T.let(ResultAggregate.new, ResultAggregate)
|
60
|
+
@combined_results = T.let(nil, T.nilable(ResultAggregate))
|
61
|
+
@reclaimed_tests = T.let(Set.new, T::Set[EnqueuedRunnable])
|
62
|
+
end
|
63
|
+
|
64
|
+
sig { override.params(reporter: Minitest::CompositeReporter, options: T::Hash[Symbol, T.untyped]).void }
|
65
|
+
def register_reporters(reporter:, options:)
|
66
|
+
reporter << Reporters::RedisCoordinatorWarningsReporter.new(options[:io], options)
|
67
|
+
end
|
68
|
+
|
69
|
+
sig { override.returns(ResultAggregate) }
|
70
|
+
def combined_results
|
71
|
+
@combined_results ||= begin
|
72
|
+
stats_as_string = redis.mget(key('runs'), key('assertions'), key('passes'),
|
73
|
+
key('failures'), key('errors'), key('skips'), key('reruns'), key('acks'), key('size'))
|
74
|
+
|
75
|
+
ResultAggregate.new(
|
76
|
+
runs: Integer(stats_as_string.fetch(0) || 0),
|
77
|
+
assertions: Integer(stats_as_string.fetch(1) || 0),
|
78
|
+
passes: Integer(stats_as_string.fetch(2) || 0),
|
79
|
+
failures: Integer(stats_as_string.fetch(3) || 0),
|
80
|
+
errors: Integer(stats_as_string.fetch(4) || 0),
|
81
|
+
skips: Integer(stats_as_string.fetch(5) || 0),
|
82
|
+
reruns: Integer(stats_as_string.fetch(6) || 0),
|
83
|
+
acks: Integer(stats_as_string.fetch(7) || 0),
|
84
|
+
|
85
|
+
# In the case where we have no build szie number published yet, we initialize
|
86
|
+
# thesize of the test suite to be arbitrarity large, to make sure it is
|
87
|
+
# higher than the number of acks, so the run is not consider completed yet.
|
88
|
+
size: Integer(stats_as_string.fetch(8) || 2_147_483_647),
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
sig { override.params(test_selector: TestSelector).void }
|
94
|
+
def produce(test_selector:)
|
95
|
+
# Whoever ends up creating the consumer group will act as leader,
|
96
|
+
# and publish the list of tests to the stream.
|
97
|
+
|
98
|
+
begin
|
99
|
+
# When using `redis.multi`, the second DEL command gets executed even if the initial GROUP
|
100
|
+
# fails. This is bad, because only the leader should be issuing the DEL command.
|
101
|
+
# When using EVAL and a Lua script, the script aborts after the first XGROUP command
|
102
|
+
# fails, and the DEL never gets executed for followers.
|
103
|
+
redis.evalsha(
|
104
|
+
register_consumergroup_script,
|
105
|
+
keys: [stream_key, key('size'), key('acks')],
|
106
|
+
argv: [group_name],
|
107
|
+
)
|
108
|
+
|
109
|
+
rescue Redis::CommandError => ce
|
110
|
+
if ce.message.include?('BUSYGROUP')
|
111
|
+
# If Redis returns a BUSYGROUP error, it means that the consumer group already
|
112
|
+
# exists. In our case, it means that another worker managed to successfully
|
113
|
+
# run the XGROUP command, and will act as leader and publish the tests.
|
114
|
+
# This worker can simply move on the consumer mode.
|
115
|
+
return
|
116
|
+
else
|
117
|
+
raise
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
run_attempt, previous_failures, previous_errors, _deleted = redis.multi do
|
122
|
+
redis.incr(key('attempt'))
|
123
|
+
redis.lrange(key('failure_list'), 0, -1)
|
124
|
+
redis.lrange(key('error_list'), 0, -1)
|
125
|
+
redis.del(key('failure_list'), key('error_list'))
|
126
|
+
end
|
127
|
+
|
128
|
+
tests = if run_attempt == 1
|
129
|
+
# If this is the first attempt for this run ID, we will schedule the full
|
130
|
+
# test suite as returned by the test selector to run.
|
131
|
+
test_selector.tests
|
132
|
+
else
|
133
|
+
# For subsequent attempts, we check the list of previous failures and
|
134
|
+
# errors, and only schedule to re-run those tests. This allows for faster
|
135
|
+
# retries of potentially flaky tests.
|
136
|
+
(previous_failures + previous_errors).map do |test_to_retry|
|
137
|
+
EnqueuedRunnable.from_hash!(Marshal.load(test_to_retry))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# We set the `size` key to the number of tests we are planning to schedule.
|
142
|
+
# This will allow workers to tell when the run is done. We also adjust the
|
143
|
+
# number of failures and errors in case of a retry run.
|
144
|
+
adjust_combined_results(ResultAggregate.new(
|
145
|
+
size: tests.size,
|
146
|
+
failures: -previous_failures.length,
|
147
|
+
errors: -previous_errors.length,
|
148
|
+
reruns: previous_failures.length + previous_errors.length,
|
149
|
+
))
|
150
|
+
|
151
|
+
# TODO: break this up in batches.
|
152
|
+
tests.each { |test| redis.xadd(stream_key, test.serialize) }
|
153
|
+
end
|
154
|
+
|
155
|
+
sig { override.params(reporter: AbstractReporter).void }
|
156
|
+
def consume(reporter:)
|
157
|
+
exponential_backoff = INITIAL_BACKOFF
|
158
|
+
loop do
|
159
|
+
# First, see if there are any pending tests from other workers to claim.
|
160
|
+
stale_runnables = claim_stale_runnables
|
161
|
+
stale_processed = process_batch(stale_runnables, reporter)
|
162
|
+
|
163
|
+
# Finally, try to process a regular batch of messages
|
164
|
+
fresh_runnables = claim_fresh_runnables(block: exponential_backoff)
|
165
|
+
fresh_processed = process_batch(fresh_runnables, reporter)
|
166
|
+
|
167
|
+
# If we have acked the same amount of tests as we were supposed to, the run
|
168
|
+
# is complete and we can exit our loop. Generally, only one worker will detect
|
169
|
+
# this condition. The pther workers will quit their consumer loop because the
|
170
|
+
# consumergroup will be deleted by the first worker, and their Redis commands
|
171
|
+
# will start to fail - see the rescue block below.
|
172
|
+
break if combined_results.completed?
|
173
|
+
|
174
|
+
# To make sure we don't end up in a busy loop overwhelming Redis with commands
|
175
|
+
# when there is no work to do, we increase the blocking time exponentially,
|
176
|
+
# and reset it to the initial value if we processed any messages
|
177
|
+
if stale_processed > 0 || fresh_processed > 0
|
178
|
+
exponential_backoff = INITIAL_BACKOFF
|
179
|
+
else
|
180
|
+
exponential_backoff <<= 1
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
cleanup
|
185
|
+
rescue Redis::CommandError => ce
|
186
|
+
if ce.message.start_with?('NOGROUP')
|
187
|
+
# When a redis conumer group commands fails with a NOGROUP error, we assume the
|
188
|
+
# consumer group was deleted by the first worker that detected the run is complete.
|
189
|
+
# So this worker can exit its loop as well.
|
190
|
+
|
191
|
+
# We have to invalidate the local combined_results cache so we get fresh
|
192
|
+
# final values from Redis when we try to report results in our summarizer.
|
193
|
+
@combined_results = nil
|
194
|
+
else
|
195
|
+
raise
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
sig { returns(Redis) }
|
202
|
+
def redis
|
203
|
+
@redis ||= Redis.new(url: configuration.coordinator_uri)
|
204
|
+
end
|
205
|
+
|
206
|
+
sig { returns(String) }
|
207
|
+
def ack_batch_script
|
208
|
+
@ack_batch_script = T.let(@ack_batch_script, T.nilable(String))
|
209
|
+
@ack_batch_script ||= redis.script(:load, <<~LUA)
|
210
|
+
local acked_ids, acked, i = {}, 0, 2
|
211
|
+
while ARGV[i] do
|
212
|
+
if redis.call('XACK', KEYS[1], ARGV[1], ARGV[i]) > 0 then
|
213
|
+
acked = acked + 1
|
214
|
+
acked_ids[acked] = ARGV[i]
|
215
|
+
end
|
216
|
+
i = i + 1
|
217
|
+
end
|
218
|
+
return acked_ids
|
219
|
+
LUA
|
220
|
+
end
|
221
|
+
|
222
|
+
sig { returns(String) }
|
223
|
+
def register_consumergroup_script
|
224
|
+
@register_consumergroup_script = T.let(@register_consumergroup_script, T.nilable(String))
|
225
|
+
@register_consumergroup_script ||= redis.script(:load, <<~LUA)
|
226
|
+
redis.call('XGROUP', 'CREATE', KEYS[1], ARGV[1], '0', 'MKSTREAM')
|
227
|
+
redis.call('DEL', KEYS[2], KEYS[3])
|
228
|
+
LUA
|
229
|
+
end
|
230
|
+
|
231
|
+
sig { params(block: Integer).returns(T::Array[EnqueuedRunnable]) }
|
232
|
+
def claim_fresh_runnables(block:)
|
233
|
+
result = redis.xreadgroup(group_name, configuration.worker_id, stream_key, '>',
|
234
|
+
block: block, count: configuration.test_batch_size)
|
235
|
+
EnqueuedRunnable.from_redis_stream_claim(result.fetch(stream_key, []))
|
236
|
+
end
|
237
|
+
|
238
|
+
sig { returns(T::Array[EnqueuedRunnable]) }
|
239
|
+
def claim_stale_runnables
|
240
|
+
# When we have to reclaim stale tests, those test are potentially too slow
|
241
|
+
# to run inside the test timeout. We only claim one test at a time in order
|
242
|
+
# to prevent the exact same batch from being too slow on repeated attempts,
|
243
|
+
# which would cause us to mark all the tests in that batch as failed.
|
244
|
+
#
|
245
|
+
# This has the side effect that for a retried test, the test timeout
|
246
|
+
# will be TEST_TIMEOUT * BATCH_SIZE in practice. This gives us a higher
|
247
|
+
# likelihood that the test will pass if the batch size > 1.
|
248
|
+
pending = redis.xpending(stream_key, group_name, '-', '+', 1)
|
249
|
+
|
250
|
+
# Every test is allowed to take test_timeout milliseconds. Because we process tests in
|
251
|
+
# batches, they should never be pending for TEST_TIMEOUT * BATCH_SIZE milliseconds.
|
252
|
+
# So, only try to claim messages older than that, with a bit of jitter.
|
253
|
+
max_idle_time = configuration.test_timeout * configuration.test_batch_size
|
254
|
+
max_idle_time_with_jitter = max_idle_time * rand(1.0...1.2)
|
255
|
+
to_claim = pending.each_with_object({}) do |message, hash|
|
256
|
+
if message['elapsed'] > max_idle_time_with_jitter
|
257
|
+
hash[message.fetch('entry_id')] = message
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
if to_claim.empty?
|
262
|
+
[]
|
263
|
+
else
|
264
|
+
claimed = redis.xclaim(stream_key, group_name, configuration.worker_id, max_idle_time, to_claim.keys)
|
265
|
+
enqueued_runnables = EnqueuedRunnable.from_redis_stream_claim(claimed)
|
266
|
+
enqueued_runnables.each do |er|
|
267
|
+
# `count` will be set to the current attempt of a different worker that has timed out.
|
268
|
+
# The attempt we are going to try will be the next one, so add one.
|
269
|
+
attempt = to_claim.fetch(er.execution_id).fetch('count') + 1
|
270
|
+
if attempt > configuration.max_attempts
|
271
|
+
# If we exhaust our attempts, we will mark the test to immediately fail when it will be run next.
|
272
|
+
mark_runnable_to_fail_immediately(er)
|
273
|
+
else
|
274
|
+
reclaimed_tests << er
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
enqueued_runnables
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
sig { void }
|
283
|
+
def cleanup
|
284
|
+
redis.xgroup(:destroy, stream_key, group_name)
|
285
|
+
redis.del(stream_key)
|
286
|
+
rescue Redis::CommandError
|
287
|
+
# Apparently another consumer already removed the consumer group,
|
288
|
+
# so we can assume that all the Redis cleanup was completed.
|
289
|
+
end
|
290
|
+
|
291
|
+
sig { params(er: EnqueuedRunnable).void }
|
292
|
+
def mark_runnable_to_fail_immediately(er)
|
293
|
+
assertion = Minitest::Assertion.new(<<~EOM.chomp)
|
294
|
+
This test takes too long to run (> #{configuration.test_timeout}ms).
|
295
|
+
|
296
|
+
We have tried running this test #{configuration.max_attempts} on different workers, but every time the worker has not reported back a result within #{configuration.test_timeout}ms.
|
297
|
+
Try to make the test faster, or increase the test timeout.
|
298
|
+
EOM
|
299
|
+
assertion.set_backtrace(caller)
|
300
|
+
er.canned_failure = assertion
|
301
|
+
end
|
302
|
+
|
303
|
+
sig { params(results: ResultAggregate).void }
|
304
|
+
def adjust_combined_results(results)
|
305
|
+
updated = redis.multi do
|
306
|
+
redis.incrby(key('runs'), results.runs)
|
307
|
+
redis.incrby(key('assertions'), results.assertions)
|
308
|
+
redis.incrby(key('passes'), results.passes)
|
309
|
+
redis.incrby(key('failures'), results.failures)
|
310
|
+
redis.incrby(key('errors'), results.errors)
|
311
|
+
redis.incrby(key('skips'), results.skips)
|
312
|
+
redis.incrby(key('reruns'), results.reruns)
|
313
|
+
redis.incrby(key('acks'), results.acks)
|
314
|
+
redis.incrby(key('size'), results.size)
|
315
|
+
end
|
316
|
+
|
317
|
+
@combined_results = ResultAggregate.new(runs: updated[0], assertions: updated[1], passes: updated[2],
|
318
|
+
failures: updated[3], errors: updated[4], skips: updated[5], reruns: updated[6],
|
319
|
+
acks: updated[7], size: updated[8])
|
320
|
+
end
|
321
|
+
|
322
|
+
sig { params(name: String).returns(String) }
|
323
|
+
def key(name)
|
324
|
+
"minitest/#{configuration.run_id}/#{name}"
|
325
|
+
end
|
326
|
+
|
327
|
+
sig { params(batch: T::Array[EnqueuedRunnable], reporter: AbstractReporter).returns(Integer) }
|
328
|
+
def process_batch(batch, reporter)
|
329
|
+
to_be_acked = {}
|
330
|
+
|
331
|
+
batch.each do |enqueued_runnable|
|
332
|
+
local_results.size += 1
|
333
|
+
reporter.prerecord(enqueued_runnable.runnable_class, enqueued_runnable.method_name)
|
334
|
+
result = enqueued_runnable.run
|
335
|
+
|
336
|
+
case (result_type = ResultType.of(result))
|
337
|
+
when ResultType::Passed
|
338
|
+
# noop
|
339
|
+
when ResultType::Skipped
|
340
|
+
redis.lpush(key('skip_list'), Marshal.dump(enqueued_runnable.serialize))
|
341
|
+
when ResultType::Failed
|
342
|
+
redis.lpush(key('failure_list'), Marshal.dump(enqueued_runnable.serialize))
|
343
|
+
when ResultType::Error
|
344
|
+
redis.lpush(key('error_list'), Marshal.dump(enqueued_runnable.serialize))
|
345
|
+
else
|
346
|
+
T.absurd(result_type)
|
347
|
+
end
|
348
|
+
|
349
|
+
local_results.update_with_result(result)
|
350
|
+
to_be_acked[enqueued_runnable.execution_id] = result
|
351
|
+
end
|
352
|
+
|
353
|
+
return 0 if to_be_acked.empty?
|
354
|
+
|
355
|
+
acked = redis.evalsha(
|
356
|
+
ack_batch_script,
|
357
|
+
keys: [stream_key],
|
358
|
+
argv: [group_name] + to_be_acked.keys
|
359
|
+
)
|
360
|
+
|
361
|
+
batch_results = ResultAggregate.new(acks: acked.length)
|
362
|
+
acked.each do |execution_id|
|
363
|
+
acked_result = to_be_acked.delete(execution_id)
|
364
|
+
reporter.record(acked_result)
|
365
|
+
batch_results.update_with_result(acked_result)
|
366
|
+
end
|
367
|
+
|
368
|
+
to_be_acked.each do |_execution_id, unacked_result|
|
369
|
+
# TODO: use custom assertion class.
|
370
|
+
discard_assertion = Minitest::Skip.new("The test result was discarded, " \
|
371
|
+
"because the test has been claimed another worker.")
|
372
|
+
discard_assertion.set_backtrace(caller)
|
373
|
+
unacked_result.failures = [discard_assertion]
|
374
|
+
reporter.record(unacked_result)
|
375
|
+
end
|
376
|
+
|
377
|
+
adjust_combined_results(batch_results)
|
378
|
+
local_results.acks += acked.length
|
379
|
+
acked.length
|
380
|
+
end
|
381
|
+
|
382
|
+
INITIAL_BACKOFF = 10 # milliseconds
|
383
|
+
private_constant :INITIAL_BACKOFF
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|