RubyGems - minitest-distributed - Versions diffs - 0.2.12 → 0.2.13 - Mend

minitest-distributed 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/minitest/distributed/coordinators/redis_coordinator.rb +80 -59
data/lib/minitest/distributed/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 722ec2cc00da4f9c5f5efc0591e39c2becc2338ce7fe171c60339d284116242b
-  data.tar.gz: aa1d3c52afa3b16c649a0b12d27e83ccb19ae11dd80fb68d3e57a669e0a081e5
+  metadata.gz: 1605e583ce4427da1909789380c1ac20ffe629a5f1ddf96452f93e45d50188f2
+  data.tar.gz: 052ba1210bee26b878052bb28207b800a67682f023e6fdcc1e03bebaea758376
 SHA512:
-  metadata.gz: c2cfc291aeb202bb680b04a6e579272fd907efe47ef1d5e5ea2b66ca5ffe23967ded9ea38458e485e1e9b42609d84a2058fd2df3250217cf76a60272f3b3255c
-  data.tar.gz: cb0934c54f7c3bdcc76abe8c0593b08e2e7ddfacd9cf2c4e7ce3a71e586b2e75170be91ebe3ac8b520fb7c8c7e4e961ae4a9a218c582bf1301dc916f41427847
+  metadata.gz: fb550faaa06a888561a3557b10f35fc867ca341e6e4008a49eb5c5137aa79b3490e6a5b24444d67c85ffc79c2e683aff7a21f948e7867b1b78bbafeccdebe9b8
+  data.tar.gz: ad60584710485b804af453696912a2676d56905e69642ead265111689ea10212768925f5c48017fd3c6da7547a94e8450b181dd2f89b5db82dc480d79d843d10

data/lib/minitest/distributed/coordinators/redis_coordinator.rb CHANGED Viewed

@@ -153,63 +153,65 @@ module Minitest
           return if consumer_group_exists
-          tests = T.let([], T::Array[Minitest::Runnable])
-          tests = if initial_attempt
-            # If this is the first attempt for this run ID, we will schedule the full
-            # test suite as returned by the test selector to run.
-            tests_from_selector = test_selector.tests
-            adjust_combined_results(ResultAggregate.new(size: tests_from_selector.size))
-            tests_from_selector
-          elsif configuration.retry_failures
-            # Before starting a retry attempt, we first check if the previous attempt
-            # was aborted before it was completed. If this is the case, we cannot use
-            # retry mode, and should immediately fail the attempt.
-            if combined_results.abort?
-              # We mark this run as aborted, which causes this worker to not be successful.
-              @aborted = true
-              # We still publish an empty size run to Redis, so if there are any followers,
-              # they will wind down normally. Only the leader will exit
-              # with a non-zero exit status and fail the build; any follower will
-              # exit with status 0.
-              adjust_combined_results(ResultAggregate.new(size: 0))
-              T.let([], T::Array[Minitest::Runnable])
-            else
-              previous_failures, previous_errors, _deleted = redis.multi do |pipeline|
-                pipeline.lrange(list_key(ResultType::Failed.serialize), 0, -1)
-                pipeline.lrange(list_key(ResultType::Error.serialize), 0, -1)
-                pipeline.del(list_key(ResultType::Failed.serialize), list_key(ResultType::Error.serialize))
-              end
+          tests = T.let(
+            if initial_attempt
+              # If this is the first attempt for this run ID, we will schedule the full
+              # test suite as returned by the test selector to run.
+              tests_from_selector = test_selector.tests
+              adjust_combined_results(ResultAggregate.new(size: tests_from_selector.size))
+              tests_from_selector
+            elsif configuration.retry_failures
+              # Before starting a retry attempt, we first check if the previous attempt
+              # was aborted before it was completed. If this is the case, we cannot use
+              # retry mode, and should immediately fail the attempt.
+              if combined_results.abort?
+                # We mark this run as aborted, which causes this worker to not be successful.
+                @aborted = true
+                # We still publish an empty size run to Redis, so if there are any followers,
+                # they will wind down normally. Only the leader will exit
+                # with a non-zero exit status and fail the build; any follower will
+                # exit with status 0.
+                adjust_combined_results(ResultAggregate.new(size: 0))
+                []
+              else
+                previous_failures, previous_errors, _deleted = redis.multi do |pipeline|
+                  pipeline.lrange(list_key(ResultType::Failed.serialize), 0, -1)
+                  pipeline.lrange(list_key(ResultType::Error.serialize), 0, -1)
+                  pipeline.del(list_key(ResultType::Failed.serialize), list_key(ResultType::Error.serialize))
+                end
-              # We set the `size` key to the number of tests we are planning to schedule.
-              # We also adjust the number of failures and errors back to 0.
-              # We set the number of requeues to the number of tests that failed, so the
-              # run statistics will reflect that we retried some failed test.
-              #
-              # However, normally requeues are not acked, as we expect the test to be acked
-              # by another worker later. This makes the test loop think iot is already done.
-              # To prevent this, we initialize the number of acks negatively, so it evens out
-              # in the statistics.
-              total_failures = previous_failures.length + previous_errors.length
-              adjust_combined_results(ResultAggregate.new(
-                size: total_failures,
-                failures: -previous_failures.length,
-                errors: -previous_errors.length,
-                requeues: total_failures,
-              ))
-              # For subsequent attempts, we check the list of previous failures and
-              # errors, and only schedule to re-run those tests. This allows for faster
-              # retries of potentially flaky tests.
-              test_identifiers_to_retry = T.let(previous_failures + previous_errors, T::Array[String])
-              test_identifiers_to_retry.map { |identifier| DefinedRunnable.from_identifier(identifier) }
-            end
-          else
-            adjust_combined_results(ResultAggregate.new(size: 0))
-            T.let([], T::Array[Minitest::Runnable])
-          end
+                # We set the `size` key to the number of tests we are planning to schedule.
+                # We also adjust the number of failures and errors back to 0.
+                # We set the number of requeues to the number of tests that failed, so the
+                # run statistics will reflect that we retried some failed test.
+                #
+                # However, normally requeues are not acked, as we expect the test to be acked
+                # by another worker later. This makes the test loop think iot is already done.
+                # To prevent this, we initialize the number of acks negatively, so it evens out
+                # in the statistics.
+                total_failures = previous_failures.length + previous_errors.length
+                adjust_combined_results(ResultAggregate.new(
+                  size: total_failures,
+                  failures: -previous_failures.length,
+                  errors: -previous_errors.length,
+                  requeues: total_failures,
+                ))
+                # For subsequent attempts, we check the list of previous failures and
+                # errors, and only schedule to re-run those tests. This allows for faster
+                # retries of potentially flaky tests.
+                test_identifiers_to_retry = T.let(previous_failures + previous_errors, T::Array[String])
+                test_identifiers_to_retry.map { |identifier| DefinedRunnable.from_identifier(identifier) }
+              end
+            else
+              adjust_combined_results(ResultAggregate.new(size: 0))
+              []
+            end,
+            T::Array[Minitest::Runnable],
+          )
           redis.pipelined do |pipeline|
             tests.each do |test|
@@ -243,10 +245,17 @@ module Minitest
             # To make sure we don't end up in a busy loop overwhelming Redis with commands
             # when there is no work to do, we increase the blocking time exponentially,
             # and reset it to the initial value if we processed any tests.
-            if stale_runnables.empty? && fresh_runnables.empty?
-              exponential_backoff <<= 1
+            #
+            # The backoff is capped at MAX_BACKOFF to bound how long a worker can sit
+            # inside a single XREADGROUP BLOCK call. Without a cap, after ~15 empty
+            # iterations the worker is blocked in Redis for 5+ minutes and cannot
+            # re-check `complete?` / `abort?` until the BLOCK returns, which manifests
+            # as a long post-100% teardown hang when pipelined XACKs race the progress
+            # reporter.
+            exponential_backoff = if stale_runnables.empty? && fresh_runnables.empty?
+              next_backoff(exponential_backoff)
             else
-              exponential_backoff = INITIAL_BACKOFF
+              INITIAL_BACKOFF
             end
           end
@@ -543,8 +552,20 @@ module Minitest
           @logger ||= T.let(Logger.new(log_path), T.nilable(Logger))
         end
+        sig { params(backoff: Integer).returns(Integer) }
+        def next_backoff(backoff)
+          [backoff << 1, MAX_BACKOFF].min
+        end
         INITIAL_BACKOFF = 10 # milliseconds
         private_constant :INITIAL_BACKOFF
+        # Cap on the XREADGROUP BLOCK timeout used by `consume`. Reached after roughly
+        # 9 consecutive empty iterations (10 ms * 2^9 = 5120 ms). Bounds the worst-case
+        # time a worker can be unresponsive to `complete?` / `abort?` after the queue
+        # is drained.
+        MAX_BACKOFF = 5_000 # milliseconds
+        private_constant :MAX_BACKOFF
       end
     end
   end

data/lib/minitest/distributed/version.rb CHANGED Viewed

@@ -3,6 +3,6 @@
 module Minitest
   module Distributed
-    VERSION = "0.2.12"
+    VERSION = "0.2.13"
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: minitest-distributed
 version: !ruby/object:Gem::Version
-  version: 0.2.12
+  version: 0.2.13
 platform: ruby
 authors:
 - Willem van Bergen
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.7.2
+rubygems_version: 4.0.12
 specification_version: 4
 summary: Distributed test executor plugin for Minitest
 test_files: []