ci-queue 0.89.0 → 0.91.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87b11dfe365c8f57a79ef1d5b65e4f422a1fce2bd94d51b0d653d10bfb95120a
4
- data.tar.gz: 1a530c84f9883b85f0784b33e67044f5a325a5815dc06d4b89872b76d1127a74
3
+ metadata.gz: e5cec1e550b8f99b2e3373bd4d6b2b6de207e2c35dc454b5de50beaf3f2984f9
4
+ data.tar.gz: b01254ad4c25f5924d2996e57ac38c28e20a07abd4b032d4a19dcfdbb5fb67fe
5
5
  SHA512:
6
- metadata.gz: 25e7f86d44fc3428e6f1d08430a3940348ae9d2d2561c190894d7a6e1b197d00b22a97fefcb5ade8462d2ccc0e14255ff9d4f10571e5f1f99dd8758b45d8a066
7
- data.tar.gz: 0c676475b941684ad92d43088612e31adb597f2f2c22f383e55c8d2dbc2cfa68646e013a58000191f5fecb16aee8151b5ce8f14cd97fe2d9ca2f11c3fd57cf3f
6
+ metadata.gz: 62db88f6caceddd5d560d1a56a96588e2eff82b20be167741a8eaee0707dfa271a92404911dc4833896346700c14df317ccbf1220dcd61c8bf56ba77edadeb5c
7
+ data.tar.gz: e35765237703ae8f169f79ce70f334bde33550fd8bba18fac59a72f8c0cb9ac859e02c3618b4ff2847210d5498c37651de847c1af2368186044982a3d8d51b2a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ci-queue (0.89.0)
4
+ ci-queue (0.91.0)
5
5
  logger
6
6
 
7
7
  GEM
@@ -6,6 +6,7 @@ module CI
6
6
  attr_accessor :requeue_tolerance, :namespace, :failing_test, :statsd_endpoint
7
7
  attr_accessor :max_test_duration, :max_test_duration_percentile, :track_test_duration
8
8
  attr_accessor :max_test_failed, :redis_ttl, :warnings_file, :debug_log, :max_missed_heartbeat_seconds
9
+ attr_writer :heartbeat_max_test_duration
9
10
  attr_accessor :lazy_load, :lazy_load_stream_batch_size
10
11
  attr_writer :lazy_load_streaming_timeout
11
12
  attr_accessor :lazy_load_test_helpers
@@ -57,7 +58,7 @@ module CI
57
58
  grind_count: nil, max_duration: nil, failure_file: nil, max_test_duration: nil,
58
59
  max_test_duration_percentile: 0.5, track_test_duration: false, max_test_failed: nil,
59
60
  queue_init_timeout: nil, redis_ttl: 8 * 60 * 60, report_timeout: nil, inactive_workers_timeout: nil,
60
- export_flaky_tests_file: nil, warnings_file: nil, debug_log: nil, max_missed_heartbeat_seconds: nil,
61
+ export_flaky_tests_file: nil, warnings_file: nil, debug_log: nil, max_missed_heartbeat_seconds: nil, heartbeat_max_test_duration: nil,
61
62
  lazy_load: false, lazy_load_stream_batch_size: nil, lazy_load_streaming_timeout: nil, lazy_load_test_helpers: nil,
62
63
  skip_stale_tests: false)
63
64
  @build_id = build_id
@@ -86,6 +87,7 @@ module CI
86
87
  @warnings_file = warnings_file
87
88
  @debug_log = debug_log
88
89
  @max_missed_heartbeat_seconds = max_missed_heartbeat_seconds
90
+ @heartbeat_max_test_duration = heartbeat_max_test_duration
89
91
  @lazy_load = lazy_load
90
92
  @lazy_load_stream_batch_size = lazy_load_stream_batch_size || 5_000
91
93
  @lazy_load_streaming_timeout = lazy_load_streaming_timeout
@@ -99,6 +101,11 @@ module CI
99
101
  @lazy_load_test_helpers.split(',').map(&:strip)
100
102
  end
101
103
 
104
+ def retry?
105
+ ENV.fetch("BUILDKITE_RETRY_COUNT", "0").to_i > 0 ||
106
+ ENV["SEMAPHORE_PIPELINE_RERUN"] == "true"
107
+ end
108
+
102
109
  def queue_init_timeout
103
110
  @queue_init_timeout || timeout
104
111
  end
@@ -148,6 +155,10 @@ module CI
148
155
  @inactive_workers_timeout || timeout
149
156
  end
150
157
 
158
+ def heartbeat_max_test_duration
159
+ @heartbeat_max_test_duration || (timeout * 10 if max_missed_heartbeat_seconds)
160
+ end
161
+
151
162
  def max_consecutive_failures=(max)
152
163
  if max
153
164
  @circuit_breakers << CircuitBreaker::MaxConsecutiveFailures.new(max_consecutive_failures: max)
@@ -63,7 +63,7 @@ module CI
63
63
  def with_heartbeat(id, lease: nil)
64
64
  if heartbeat_enabled?
65
65
  ensure_heartbeat_thread_alive!
66
- heartbeat_state.set(:tick, id, lease)
66
+ heartbeat_state.set(:tick, id, lease, Process.clock_gettime(Process::CLOCK_MONOTONIC))
67
67
  end
68
68
 
69
69
  yield
@@ -386,16 +386,32 @@ module CI
386
386
  Thread.current.name = "CI::Queue#heartbeat"
387
387
  Thread.current.abort_on_exception = true
388
388
 
389
+ capped = false
390
+
389
391
  loop do
390
392
  command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
391
393
 
392
394
  case command&.first
393
395
  when :tick
394
- # command = [:tick, entry_id, lease_id]
396
+ next if capped
397
+
398
+ max_duration = config.heartbeat_max_test_duration
399
+ if max_duration
400
+ # command = [:tick, entry_id, lease_id, started_at]
401
+ # Use the absolute start time from when with_heartbeat was called so that
402
+ # the elapsed calculation is not skewed by heartbeat thread startup delay.
403
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - command[3]
404
+ if elapsed >= max_duration
405
+ capped = true
406
+ next
407
+ end
408
+ end
409
+
410
+ # command = [:tick, entry_id, lease_id, started_at]
395
411
  heartbeat_process.tick!(command[1], command[2])
396
412
  when :reset
397
413
  # Test finished, stop ticking until next test starts
398
- nil
414
+ capped = false
399
415
  when :stop
400
416
  break
401
417
  end
@@ -39,6 +39,23 @@ module CI
39
39
  yield if block_given?
40
40
  end
41
41
 
42
+ # On retry runs (BUILDKITE_RETRY_COUNT > 0), the main queue is already
43
+ # exhausted from the original run. A retry worker may have found unresolved
44
+ # failures via the error-reports fallback and be running them via the Retry
45
+ # queue — but those tests are NOT in the Redis running set so active_workers?
46
+ # returns false and the loop above exits immediately.
47
+ #
48
+ # Wait up to inactive_workers_timeout for retry workers to clear error-reports.
49
+ # This prevents the summary from canceling retry workers before they finish.
50
+ if exhausted? && config.retry? && !rescue_connection_errors { build.failed_tests }.empty?
51
+ @time_left_with_no_workers = config.inactive_workers_timeout
52
+ until rescue_connection_errors { build.failed_tests }.empty? ||
53
+ @time_left_with_no_workers <= 0
54
+ sleep 1
55
+ @time_left_with_no_workers -= 1
56
+ end
57
+ end
58
+
42
59
  exhausted?
43
60
  rescue CI::Queue::Redis::LostMaster
44
61
  false
@@ -2,7 +2,7 @@
2
2
 
3
3
  module CI
4
4
  module Queue
5
- VERSION = '0.89.0'
5
+ VERSION = '0.91.0'
6
6
  DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../redis', __FILE__)
7
7
  RELEASE_SCRIPTS_ROOT = ::File.expand_path('../redis', __FILE__)
8
8
  end
@@ -267,8 +267,16 @@ module Minitest
267
267
  end
268
268
 
269
269
  def aggregates
270
- success = failures.zero? && errors.zero?
271
- failures_count = "#{failures} failures, #{errors} errors,"
270
+ # error-reports is authoritative when workers die before flushing per-test stats.
271
+ # Floor the displayed count so the summary line is never misleadingly green.
272
+ known_error_count = error_reports.size
273
+ effective_total = [failures + errors, known_error_count].max
274
+ success = effective_total.zero?
275
+ failures_count = if failures + errors >= known_error_count
276
+ "#{failures} failures, #{errors} errors,"
277
+ else
278
+ "#{effective_total} failures,"
279
+ end
272
280
 
273
281
  step([
274
282
  'Ran %d tests, %d assertions,' % [progress, assertions],
@@ -736,6 +736,16 @@ module Minitest
736
736
  queue_config.max_missed_heartbeat_seconds = time || 30
737
737
  end
738
738
 
739
+ help = <<~EOS
740
+ Maximum duration in seconds that the heartbeat will tick for a single test.
741
+ If a test runs longer than this, the heartbeat stops and the test entry becomes
742
+ eligible for reclamation by another worker.
743
+ Defaults to timeout * 10 when heartbeat is enabled.
744
+ EOS
745
+ opts.on("--heartbeat-max-test-duration SECONDS", Float, help) do |seconds|
746
+ queue_config.heartbeat_max_test_duration = seconds
747
+ end
748
+
739
749
 
740
750
  opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do
741
751
  self.verbose = true
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ci-queue
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.89.0
4
+ version: 0.91.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
@@ -305,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
305
305
  - !ruby/object:Gem::Version
306
306
  version: '0'
307
307
  requirements: []
308
- rubygems_version: 4.0.9
308
+ rubygems_version: 4.0.10
309
309
  specification_version: 4
310
310
  summary: Distribute tests over many workers using a queue
311
311
  test_files: []