ci-queue 0.84.0 → 0.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c47a6b5450a21d7f4fb79a2b9a862ee53d6602d49e61a1e60d0fdaba92e9d0fd
4
- data.tar.gz: 217ae043f06406663beff99e415dcf778a1a6cacd47ba6148d9e50504927dcf3
3
+ metadata.gz: 7708a4b0506c58da9ae1c6681dd335967c584869bf7b4a60546fc43f06a6cfe8
4
+ data.tar.gz: 22e9a6260641835f028c9952e10c3c0dfa4882156bf393b0b1a13befbee89432
5
5
  SHA512:
6
- metadata.gz: ca7a1134775424386068df3e1b3c80738f90bbf5a353ed254715cd12194d6dfa39e1313c0fe4ea3e851f84a459ed0c61bbc52045164764fd4941b833ee6d71eb
7
- data.tar.gz: 0ddca915e68afcfe1f6a99b41d34d9b46aa731499922ae1556de7db026b5e30dd48de08644df603c99426c430bebb782530825e626cc631f45dc94f0df2db200
6
+ metadata.gz: 697d15d2ada5cae5ace00714a47b91fbb32ebf61a3abf7b181fb4293b560ba837e02c1f88aef40832a9534fcdc45a049c1ef8d19bf5b4b537f2b53d4ca1d6cf5
7
+ data.tar.gz: 225d1ec46ad29137111f16f18431212db237eeaa91871fb9ece7e0e35ba34270b4272b2ce26cff3b178214feb6643a75b5266ae86d41aef5ba47952064996a81
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ci-queue (0.84.0)
4
+ ci-queue (0.86.0)
5
5
  logger
6
6
 
7
7
  GEM
data/ci-queue.gemspec CHANGED
@@ -41,9 +41,9 @@ Gem::Specification.new do |spec|
41
41
  spec.add_development_dependency 'simplecov', '~> 0.12'
42
42
  spec.add_development_dependency 'minitest-reporters', '~> 1.1'
43
43
 
44
+ spec.add_development_dependency 'rexml'
44
45
  spec.add_development_dependency 'snappy'
45
46
  spec.add_development_dependency 'msgpack'
46
47
  spec.add_development_dependency 'benchmark'
47
- spec.add_development_dependency 'rexml'
48
48
  spec.add_development_dependency 'rubocop'
49
49
  end
@@ -10,6 +10,12 @@ module CI
10
10
  end
11
11
  end
12
12
 
13
+ # Grind always eagerly loads test files and populates @index via
14
+ # Static#populate, so poll uses the @index path. Bypass entry_resolver
15
+ # to avoid JSON-formatting overhead on potentially millions of entries.
16
+ def entry_resolver; nil; end
17
+ def entry_resolver=(_); end
18
+
13
19
  def initialize(path, config)
14
20
  io = path == '-' ? STDIN : ::File.open(path)
15
21
 
@@ -4,12 +4,23 @@ local processed_key = KEYS[2]
4
4
  local owners_key = KEYS[3]
5
5
  local error_reports_key = KEYS[4]
6
6
  local requeued_by_key = KEYS[5]
7
+ local leases_key = KEYS[6]
7
8
 
8
9
  local entry = ARGV[1]
9
10
  local error = ARGV[2]
10
11
  local ttl = ARGV[3]
11
- redis.call('zrem', zset_key, entry)
12
- redis.call('hdel', owners_key, entry) -- Doesn't matter if it was reclaimed by another workers
12
+ local lease_id = ARGV[4]
13
+
14
+ -- Only the current lease holder can remove the entry from the running set.
15
+ -- If the lease was transferred (e.g. via reserve_lost), the stale worker
16
+ -- must not remove the running entry — that would let the supervisor think
17
+ -- the queue is exhausted while the new lease holder is still processing.
18
+ if tostring(redis.call('hget', leases_key, entry)) == lease_id then
19
+ redis.call('zrem', zset_key, entry)
20
+ redis.call('hdel', owners_key, entry)
21
+ redis.call('hdel', leases_key, entry)
22
+ end
23
+
13
24
  redis.call('hdel', requeued_by_key, entry)
14
25
  local acknowledged = redis.call('sadd', processed_key, entry) == 1
15
26
 
@@ -60,10 +60,10 @@ module CI
60
60
  [0, 0, 0.1, 0.5, 1, 3, 5]
61
61
  end
62
62
 
63
- def with_heartbeat(id)
63
+ def with_heartbeat(id, lease: nil)
64
64
  if heartbeat_enabled?
65
65
  ensure_heartbeat_thread_alive!
66
- heartbeat_state.set(:tick, id)
66
+ heartbeat_state.set(:tick, id, lease)
67
67
  end
68
68
 
69
69
  yield
@@ -264,12 +264,11 @@ module CI
264
264
  end
265
265
 
266
266
  class HeartbeatProcess
267
- def initialize(redis_url, zset_key, processed_key, owners_key, worker_queue_key)
267
+ def initialize(redis_url, zset_key, owners_key, leases_key)
268
268
  @redis_url = redis_url
269
269
  @zset_key = zset_key
270
- @processed_key = processed_key
271
270
  @owners_key = owners_key
272
- @worker_queue_key = worker_queue_key
271
+ @leases_key = leases_key
273
272
  end
274
273
 
275
274
  def boot!
@@ -281,9 +280,8 @@ module CI
281
280
  ::File.join(__dir__, "monitor.rb"),
282
281
  @redis_url,
283
282
  @zset_key,
284
- @processed_key,
285
283
  @owners_key,
286
- @worker_queue_key,
284
+ @leases_key,
287
285
  in: child_read,
288
286
  out: child_write,
289
287
  )
@@ -313,8 +311,8 @@ module CI
313
311
  end
314
312
  end
315
313
 
316
- def tick!(id)
317
- send_message(:tick!, id: id)
314
+ def tick!(id, lease)
315
+ send_message(:tick!, id: id, lease: lease.to_s)
318
316
  end
319
317
 
320
318
  private
@@ -355,9 +353,8 @@ module CI
355
353
  @heartbeat_process ||= HeartbeatProcess.new(
356
354
  @redis_url,
357
355
  key('running'),
358
- key('processed'),
359
356
  key('owners'),
360
- key('worker', worker_id, 'queue'),
357
+ key('leases'),
361
358
  )
362
359
  end
363
360
 
@@ -369,19 +366,16 @@ module CI
369
366
  Thread.current.name = "CI::Queue#heartbeat"
370
367
  Thread.current.abort_on_exception = true
371
368
 
372
- timeout = config.timeout.to_i
373
369
  loop do
374
- command = nil
375
370
  command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
376
371
 
377
372
  case command&.first
378
373
  when :tick
379
- if timeout > 0
380
- heartbeat_process.tick!(command.last)
381
- timeout -= 1
382
- end
374
+ # command = [:tick, entry_id, lease_id]
375
+ heartbeat_process.tick!(command[1], command[2])
383
376
  when :reset
384
- timeout = config.timeout.to_i
377
+ # Test finished, stop ticking until next test starts
378
+ nil
385
379
  when :stop
386
380
  break
387
381
  end
@@ -1,18 +1,17 @@
1
1
  -- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
2
2
  local zset_key = KEYS[1]
3
- local processed_key = KEYS[2]
4
- local owners_key = KEYS[3]
5
- local worker_queue_key = KEYS[4]
3
+ local leases_key = KEYS[2]
6
4
 
7
5
  local current_time = ARGV[1]
8
6
  local entry = ARGV[2]
7
+ local lease_id = ARGV[3]
9
8
 
10
- -- already processed, we do not need to bump the timestamp
11
- if redis.call('sismember', processed_key, entry) == 1 then
12
- return false
13
- end
14
-
15
- -- we're still the owner of the test, we can bump the timestamp
16
- if redis.call('hget', owners_key, entry) == worker_queue_key then
9
+ -- Only the current lease holder can bump the timestamp.
10
+ -- We intentionally do NOT check the processed set. A non-owner worker's
11
+ -- acknowledge can add the entry to processed, which would poison the
12
+ -- current lease holder's heartbeat if we checked it here.
13
+ -- The lease check alone is sufficient — once the lease holder acknowledges,
14
+ -- they zrem + hdel the lease, so the heartbeat will naturally stop.
15
+ if tostring(redis.call('hget', leases_key, entry)) == lease_id then
17
16
  return redis.call('zadd', zset_key, current_time, entry)
18
17
  end
@@ -13,11 +13,10 @@ module CI
13
13
  DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../../redis', __FILE__)
14
14
  RELEASE_SCRIPTS_ROOT = ::File.expand_path('../../redis', __FILE__)
15
15
 
16
- def initialize(pipe, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
16
+ def initialize(pipe, logger, redis_url, zset_key, owners_key, leases_key)
17
17
  @zset_key = zset_key
18
- @processed_key = processed_key
19
18
  @owners_key = owners_key
20
- @worker_queue_key = worker_queue_key
19
+ @leases_key = leases_key
21
20
  @logger = logger
22
21
  @redis = ::Redis.new(url: redis_url, reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5])
23
22
  @shutdown = false
@@ -36,11 +35,11 @@ module CI
36
35
  @self_pipe_writer << '.'
37
36
  end
38
37
 
39
- def process_tick!(id:)
38
+ def process_tick!(id:, lease:)
40
39
  eval_script(
41
40
  :heartbeat,
42
- keys: [@zset_key, @processed_key, @owners_key, @worker_queue_key],
43
- argv: [Time.now.to_f, id]
41
+ keys: [@zset_key, @leases_key],
42
+ argv: [Time.now.to_f, id, lease]
44
43
  )
45
44
  rescue => error
46
45
  @logger.info(error)
@@ -151,12 +150,11 @@ end
151
150
 
152
151
  redis_url = ARGV[0]
153
152
  zset_key = ARGV[1]
154
- processed_key = ARGV[2]
155
- owners_key = ARGV[3]
156
- worker_queue_key = ARGV[4]
153
+ owners_key = ARGV[2]
154
+ leases_key = ARGV[3]
157
155
 
158
- logger.debug("Starting monitor: #{redis_url} #{zset_key} #{processed_key}")
159
- manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
156
+ logger.debug("Starting monitor: #{redis_url} #{zset_key} #{leases_key}")
157
+ manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, owners_key, leases_key)
160
158
 
161
159
  # Notify the parent we're ready
162
160
  $stdout.puts(".")
@@ -2,6 +2,7 @@
2
2
  local zset_key = KEYS[1]
3
3
  local worker_queue_key = KEYS[2]
4
4
  local owners_key = KEYS[3]
5
+ local leases_key = KEYS[4]
5
6
 
6
7
  -- owned_tests = {"SomeTest", "worker:1", "SomeOtherTest", "worker:2", ...}
7
8
  local owned_tests = redis.call('hgetall', owners_key)
@@ -9,6 +10,7 @@ for index, owner_or_test in ipairs(owned_tests) do
9
10
  if owner_or_test == worker_queue_key then -- If we owned a test
10
11
  local test = owned_tests[index - 1]
11
12
  redis.call('zadd', zset_key, "0", test) -- We expire the lease immediately
13
+ redis.call('hdel', leases_key, test)
12
14
  return nil
13
15
  end
14
16
  end
@@ -7,15 +7,20 @@ local worker_queue_key = KEYS[5]
7
7
  local owners_key = KEYS[6]
8
8
  local error_reports_key = KEYS[7]
9
9
  local requeued_by_key = KEYS[8]
10
+ local leases_key = KEYS[9]
10
11
 
11
12
  local max_requeues = tonumber(ARGV[1])
12
13
  local global_max_requeues = tonumber(ARGV[2])
13
14
  local entry = ARGV[3]
14
15
  local offset = ARGV[4]
15
16
  local ttl = tonumber(ARGV[5])
17
+ local lease_id = ARGV[6]
16
18
 
17
- if redis.call('hget', owners_key, entry) == worker_queue_key then
18
- redis.call('hdel', owners_key, entry)
19
+ -- Only the current lease holder can requeue a test.
20
+ -- If the lease was transferred (e.g. via reserve_lost), reject the stale
21
+ -- worker's requeue so the running entry stays intact for the new holder.
22
+ if tostring(redis.call('hget', leases_key, entry)) ~= lease_id then
23
+ return false
19
24
  end
20
25
 
21
26
  if redis.call('sismember', processed_key, entry) == 1 then
@@ -49,6 +54,8 @@ if ttl and ttl > 0 then
49
54
  redis.call('expire', requeued_by_key, ttl)
50
55
  end
51
56
 
57
+ redis.call('hdel', owners_key, entry)
58
+ redis.call('hdel', leases_key, entry)
52
59
  redis.call('zrem', zset_key, entry)
53
60
 
54
61
  return true
@@ -6,6 +6,8 @@ local worker_queue_key = KEYS[4]
6
6
  local owners_key = KEYS[5]
7
7
  local requeued_by_key = KEYS[6]
8
8
  local workers_key = KEYS[7]
9
+ local leases_key = KEYS[8]
10
+ local lease_counter_key = KEYS[9]
9
11
 
10
12
  local current_time = ARGV[1]
11
13
  local defer_offset = tonumber(ARGV[2]) or 0
@@ -20,6 +22,15 @@ local function insert_with_offset(test)
20
22
  end
21
23
  end
22
24
 
25
+ local function claim_test(test)
26
+ local lease = redis.call('incr', lease_counter_key)
27
+ redis.call('zadd', zset_key, current_time, test)
28
+ redis.call('lpush', worker_queue_key, test)
29
+ redis.call('hset', owners_key, test, worker_queue_key)
30
+ redis.call('hset', leases_key, test, lease)
31
+ return {test, tostring(lease)}
32
+ end
33
+
23
34
  for attempt = 1, max_skip_attempts do
24
35
  local test = redis.call('rpop', queue_key)
25
36
  if not test then
@@ -31,10 +42,7 @@ for attempt = 1, max_skip_attempts do
31
42
  -- If this build only has one worker, allow immediate self-pickup.
32
43
  if redis.call('scard', workers_key) <= 1 then
33
44
  redis.call('hdel', requeued_by_key, test)
34
- redis.call('zadd', zset_key, current_time, test)
35
- redis.call('lpush', worker_queue_key, test)
36
- redis.call('hset', owners_key, test, worker_queue_key)
37
- return test
45
+ return claim_test(test)
38
46
  end
39
47
 
40
48
  insert_with_offset(test)
@@ -47,10 +55,7 @@ for attempt = 1, max_skip_attempts do
47
55
  end
48
56
  else
49
57
  redis.call('hdel', requeued_by_key, test)
50
- redis.call('zadd', zset_key, current_time, test)
51
- redis.call('lpush', worker_queue_key, test)
52
- redis.call('hset', owners_key, test, worker_queue_key)
53
- return test
58
+ return claim_test(test)
54
59
  end
55
60
  end
56
61
 
@@ -3,6 +3,8 @@ local zset_key = KEYS[1]
3
3
  local processed_key = KEYS[2]
4
4
  local worker_queue_key = KEYS[3]
5
5
  local owners_key = KEYS[4]
6
+ local leases_key = KEYS[5]
7
+ local lease_counter_key = KEYS[6]
6
8
 
7
9
  local current_time = ARGV[1]
8
10
  local timeout = ARGV[2]
@@ -10,10 +12,19 @@ local timeout = ARGV[2]
10
12
  local lost_tests = redis.call('zrangebyscore', zset_key, 0, current_time - timeout)
11
13
  for _, test in ipairs(lost_tests) do
12
14
  if redis.call('sismember', processed_key, test) == 0 then
15
+ local lease = redis.call('incr', lease_counter_key)
13
16
  redis.call('zadd', zset_key, current_time, test)
14
17
  redis.call('lpush', worker_queue_key, test)
15
- redis.call('hset', owners_key, test, worker_queue_key) -- Take ownership
16
- return test
18
+ redis.call('hset', owners_key, test, worker_queue_key)
19
+ redis.call('hset', leases_key, test, lease)
20
+ return {test, tostring(lease)}
21
+ else
22
+ -- Test is already processed but still in running (stale). This can happen when
23
+ -- a non-owner worker acknowledged the test (marking it processed) but could not
24
+ -- remove it from running due to the lease guard. Clean it up.
25
+ redis.call('zrem', zset_key, test)
26
+ redis.call('hdel', owners_key, test)
27
+ redis.call('hdel', leases_key, test)
17
28
  end
18
29
  end
19
30
 
@@ -12,6 +12,22 @@ module CI
12
12
  @build ||= CI::Queue::Redis::BuildRecord.new(self, redis, config)
13
13
  end
14
14
 
15
+ # Retry queue is pre-populated with failed test entries from the previous run.
16
+ # Don't replace them with the full preresolved/lazy test list.
17
+ # QueuePopulationStrategy#configure_lazy_queue will still set entry_resolver,
18
+ # so poll uses LazyEntryResolver to lazily load test files on demand.
19
+ # The random/batch_size params are intentionally ignored since we keep
20
+ # the existing queue contents as-is.
21
+ #
22
+ # Note: populate (non-stream) is intentionally NOT overridden here.
23
+ # RSpec and non-lazy Minitest retries call populate to build the
24
+ # @index mapping test IDs to runnable objects, which poll needs to
25
+ # yield proper test/example instances. In those paths, @queue contains
26
+ # bare test IDs that match @index keys, so populate works correctly.
27
+ def stream_populate(tests, random: nil, batch_size: nil)
28
+ self
29
+ end
30
+
15
31
  private
16
32
 
17
33
  attr_reader :redis
@@ -13,12 +13,18 @@ module CI
13
13
  self.requeue_offset = 42
14
14
  self.max_sleep_time = 2
15
15
 
16
+ # Minimal wrapper returned by resolve_entry when neither @index nor entry_resolver
17
+ # is available. Provides the interface callers expect (.id, .queue_entry) so that
18
+ # downstream code doesn't crash with NoMethodError on a raw String.
19
+ UnresolvedEntry = Struct.new(:id, :queue_entry)
20
+
16
21
  class Worker < Base
17
22
  attr_accessor :entry_resolver
18
23
  attr_reader :first_reserve_at
19
24
 
20
25
  def initialize(redis, config)
21
26
  @reserved_tests = Concurrent::Set.new
27
+ @reserved_leases = Concurrent::Map.new
22
28
  @shutdown_required = false
23
29
  @first_reserve_at = nil
24
30
  super(redis, config)
@@ -147,9 +153,10 @@ module CI
147
153
  def retry_queue
148
154
  failures = build.failed_tests.to_set
149
155
  log = redis.lrange(key('worker', worker_id, 'queue'), 0, -1)
150
- log = log.map { |entry| CI::Queue::QueueEntry.test_id(entry) }
151
- log.select! { |test_id| failures.include?(test_id) }
152
- log.uniq!
156
+ # Keep full entries (test_id + file_path) so lazy loading can resolve them.
157
+ # Filter by test_id against failures without stripping file paths.
158
+ log.select! { |entry| failures.include?(CI::Queue::QueueEntry.test_id(entry)) }
159
+ log.uniq! { |entry| CI::Queue::QueueEntry.test_id(entry) }
153
160
  log.reverse!
154
161
  Retry.new(log, config, redis: redis)
155
162
  end
@@ -172,6 +179,11 @@ module CI
172
179
  nil
173
180
  end
174
181
 
182
+ def lease_for(entry)
183
+ test_id = CI::Queue::QueueEntry.test_id(entry)
184
+ @reserved_leases[test_id]
185
+ end
186
+
175
187
  def report_worker_error(error)
176
188
  build.report_worker_error(error)
177
189
  end
@@ -180,11 +192,12 @@ module CI
180
192
  test_id = CI::Queue::QueueEntry.test_id(entry)
181
193
  assert_reserved!(test_id)
182
194
  entry = reserved_entries.fetch(test_id, entry)
195
+ lease = @reserved_leases.delete(test_id)
183
196
  unreserve_entry(test_id)
184
197
  eval_script(
185
198
  :acknowledge,
186
- keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by')],
187
- argv: [entry, error.to_s, config.redis_ttl],
199
+ keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by'), key('leases')],
200
+ argv: [entry, error.to_s, config.redis_ttl, lease.to_s],
188
201
  pipeline: pipeline,
189
202
  ) == 1
190
203
  end
@@ -193,6 +206,7 @@ module CI
193
206
  test_id = CI::Queue::QueueEntry.test_id(entry)
194
207
  assert_reserved!(test_id)
195
208
  entry = reserved_entries.fetch(test_id, entry)
209
+ lease = @reserved_leases.delete(test_id)
196
210
  unreserve_entry(test_id)
197
211
  global_max_requeues = config.global_max_requeues(total)
198
212
 
@@ -207,14 +221,16 @@ module CI
207
221
  key('owners'),
208
222
  key('error-reports'),
209
223
  key('requeued-by'),
224
+ key('leases'),
210
225
  ],
211
- argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl],
226
+ argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl, lease.to_s],
212
227
  ) == 1
213
228
 
214
229
  unless requeued
215
230
  reserved_tests << test_id
216
231
  reserved_entries[test_id] = entry
217
232
  reserved_entry_ids[entry] = test_id
233
+ @reserved_leases[test_id] = lease if lease
218
234
  end
219
235
  requeued
220
236
  end
@@ -222,7 +238,7 @@ module CI
222
238
  def release!
223
239
  eval_script(
224
240
  :release,
225
- keys: [key('running'), key('worker', worker_id, 'queue'), key('owners')],
241
+ keys: [key('running'), key('worker', worker_id, 'queue'), key('owners'), key('leases')],
226
242
  argv: [],
227
243
  )
228
244
  nil
@@ -254,11 +270,12 @@ module CI
254
270
  end
255
271
  end
256
272
 
257
- def reserve_entry(entry)
273
+ def reserve_entry(entry, lease = nil)
258
274
  test_id = CI::Queue::QueueEntry.test_id(entry)
259
275
  reserved_tests << test_id
260
276
  reserved_entries[test_id] = entry
261
277
  reserved_entry_ids[entry] = test_id
278
+ @reserved_leases[test_id] = lease if lease
262
279
  end
263
280
 
264
281
  def unreserve_entry(test_id)
@@ -282,7 +299,7 @@ module CI
282
299
 
283
300
  return entry_resolver.call(entry) if entry_resolver
284
301
 
285
- entry
302
+ UnresolvedEntry.new(test_id, entry)
286
303
  end
287
304
 
288
305
  def still_streaming?
@@ -343,12 +360,12 @@ module CI
343
360
  end
344
361
 
345
362
  def reserve
346
- (try_to_reserve_lost_test || try_to_reserve_test).tap do |entry|
347
- if entry
348
- @first_reserve_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
349
- reserve_entry(entry)
350
- end
363
+ entry, lease = try_to_reserve_lost_test || try_to_reserve_test || [nil, nil]
364
+ if entry
365
+ @first_reserve_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
366
+ reserve_entry(entry, lease)
351
367
  end
368
+ entry
352
369
  end
353
370
 
354
371
  def try_to_reserve_test
@@ -362,6 +379,8 @@ module CI
362
379
  key('owners'),
363
380
  key('requeued-by'),
364
381
  key('workers'),
382
+ key('leases'),
383
+ key('lease-counter'),
365
384
  ],
366
385
  argv: [CI::Queue.time_now.to_f, Redis.requeue_offset],
367
386
  )
@@ -370,25 +389,28 @@ module CI
370
389
  def try_to_reserve_lost_test
371
390
  timeout = config.max_missed_heartbeat_seconds ? config.max_missed_heartbeat_seconds : config.timeout
372
391
 
373
- lost_test = eval_script(
392
+ result = eval_script(
374
393
  :reserve_lost,
375
394
  keys: [
376
395
  key('running'),
377
396
  key('processed'),
378
397
  key('worker', worker_id, 'queue'),
379
398
  key('owners'),
399
+ key('leases'),
400
+ key('lease-counter'),
380
401
  ],
381
402
  argv: [CI::Queue.time_now.to_f, timeout],
382
403
  )
383
404
 
384
- if lost_test
385
- build.record_warning(Warnings::RESERVED_LOST_TEST, test: CI::Queue::QueueEntry.test_id(lost_test), timeout: config.timeout)
405
+ if result
406
+ entry = result.is_a?(Array) ? result[0] : result
407
+ build.record_warning(Warnings::RESERVED_LOST_TEST, test: CI::Queue::QueueEntry.test_id(entry), timeout: config.timeout)
386
408
  if CI::Queue.debug?
387
- $stderr.puts "[ci-queue][reserve_lost] worker=#{worker_id} test_id=#{CI::Queue::QueueEntry.test_id(lost_test)}"
409
+ $stderr.puts "[ci-queue][reserve_lost] worker=#{worker_id} test_id=#{CI::Queue::QueueEntry.test_id(entry)}"
388
410
  end
389
411
  end
390
412
 
391
- lost_test
413
+ result
392
414
  end
393
415
 
394
416
  def push(entries)
@@ -16,6 +16,7 @@ module CI
16
16
  TEN_MINUTES = 60 * 10
17
17
 
18
18
  attr_reader :progress, :total
19
+ attr_accessor :entry_resolver
19
20
 
20
21
  def initialize(tests, config)
21
22
  @queue = tests
@@ -50,10 +51,24 @@ module CI
50
51
  self
51
52
  end
52
53
 
53
- def with_heartbeat(id)
54
+ # Support lazy loading mode: accept an enumerator of entries and
55
+ # store them in queue order (no shuffling). This preserves the
56
+ # exact order from the input file for local reproduction.
57
+ def stream_populate(tests, random: nil, batch_size: nil)
58
+ @queue = []
59
+ tests.each { |entry| @queue << entry }
60
+ @total = @queue.size
61
+ self
62
+ end
63
+
64
+ def with_heartbeat(id, lease: nil)
54
65
  yield
55
66
  end
56
67
 
68
+ def lease_for(entry)
69
+ nil
70
+ end
71
+
57
72
  def ensure_heartbeat_thread_alive!; end
58
73
 
59
74
  def boot_heartbeat_process!; end
@@ -75,11 +90,15 @@ module CI
75
90
  end
76
91
 
77
92
  def populated?
78
- !!defined?(@index)
93
+ !!defined?(@index) || @queue.any?
79
94
  end
80
95
 
81
96
  def to_a
82
- @queue.map { |i| index.fetch(i) }
97
+ if defined?(@index) && @index
98
+ @queue.map { |i| index.fetch(i) }
99
+ else
100
+ @queue.dup
101
+ end
83
102
  end
84
103
 
85
104
  def size
@@ -97,9 +116,28 @@ module CI
97
116
  def poll
98
117
  while !@shutdown && config.circuit_breakers.none?(&:open?) && !max_test_failed? && reserved_test = @queue.shift
99
118
  reserved_tests << reserved_test
100
- yield index.fetch(reserved_test)
119
+ if entry_resolver
120
+ resolved = entry_resolver.call(reserved_test)
121
+ # Track the original queue entry so requeue can push it back
122
+ # with its full payload (file path, load-error data, etc.).
123
+ reserved_entries[resolved.id] = reserved_test if resolved.respond_to?(:id)
124
+ yield resolved
125
+ elsif defined?(@index) && @index
126
+ # Queue entries may be JSON-formatted (with test_id + file_path) while
127
+ # the index is keyed by bare test_id from populate. Try the raw entry
128
+ # first, then fall back to extracting the test_id.
129
+ test_id = begin
130
+ CI::Queue::QueueEntry.test_id(reserved_test)
131
+ rescue JSON::ParserError
132
+ reserved_test
133
+ end
134
+ yield index.fetch(test_id)
135
+ else
136
+ yield reserved_test
137
+ end
101
138
  end
102
139
  reserved_tests.clear
140
+ reserved_entries.clear
103
141
  end
104
142
 
105
143
  def exhausted?
@@ -130,7 +168,10 @@ module CI
130
168
  return false unless should_requeue?(test_id)
131
169
 
132
170
  requeues[test_id] += 1
133
- @queue.unshift(test_id)
171
+ # Push back the original queue entry (with file path / load-error payload)
172
+ # so entry_resolver can fully resolve it on the next poll iteration.
173
+ original_entry = reserved_entries.delete(test_id) || test_id
174
+ @queue.unshift(original_entry)
134
175
  true
135
176
  end
136
177
 
@@ -146,6 +187,10 @@ module CI
146
187
  @requeues ||= Hash.new(0)
147
188
  end
148
189
 
190
+ def reserved_entries
191
+ @reserved_entries ||= {}
192
+ end
193
+
149
194
  def reserved_tests
150
195
  @reserved_tests ||= Concurrent::Set.new
151
196
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module CI
4
4
  module Queue
5
- VERSION = '0.84.0'
5
+ VERSION = '0.86.0'
6
6
  DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../redis', __FILE__)
7
7
  RELEASE_SCRIPTS_ROOT = ::File.expand_path('../redis', __FILE__)
8
8
  end
@@ -35,6 +35,7 @@ module Minitest
35
35
  configure_lazy_queue
36
36
  queue.stream_populate(lazy_test_enumerator, random: ordering_seed, batch_size: queue_config.lazy_load_stream_batch_size)
37
37
  else
38
+ configure_lazy_queue
38
39
  queue.populate(Minitest.loaded_tests, random: ordering_seed)
39
40
  end
40
41
  end
@@ -118,6 +118,15 @@ module Minitest
118
118
  # minitest/autorun's at_exit hook, which may not be registered since
119
119
  # test files haven't been loaded yet. exit! prevents double-execution
120
120
  # if minitest/autorun was loaded by the leader during streaming.
121
+ #
122
+ # Re-check exhausted? after booting: slow workers may arrive after the queue
123
+ # has been fully drained by faster workers. In that case exit cleanly (0)
124
+ # rather than letting Minitest return false for a 0-test run.
125
+ if queue.rescue_connection_errors { queue.exhausted? }
126
+ puts green('All tests were ran already')
127
+ verify_reporters!(reporters)
128
+ exit!(0)
129
+ end
121
130
  passed = Minitest.run []
122
131
  verify_reporters!(reporters)
123
132
  exit!(passed ? 0 : 1)
@@ -163,7 +163,7 @@ module Minitest
163
163
  rescue_run_errors do
164
164
  begin
165
165
  queue.poll do |example|
166
- result = queue.with_heartbeat(example.queue_entry) do
166
+ result = queue.with_heartbeat(example.queue_entry, lease: queue.lease_for(example.queue_entry)) do
167
167
  example.run
168
168
  end
169
169
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ci-queue
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.84.0
4
+ version: 0.86.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
@@ -122,7 +122,7 @@ dependencies:
122
122
  - !ruby/object:Gem::Version
123
123
  version: '1.1'
124
124
  - !ruby/object:Gem::Dependency
125
- name: snappy
125
+ name: rexml
126
126
  requirement: !ruby/object:Gem::Requirement
127
127
  requirements:
128
128
  - - ">="
@@ -136,7 +136,7 @@ dependencies:
136
136
  - !ruby/object:Gem::Version
137
137
  version: '0'
138
138
  - !ruby/object:Gem::Dependency
139
- name: msgpack
139
+ name: snappy
140
140
  requirement: !ruby/object:Gem::Requirement
141
141
  requirements:
142
142
  - - ">="
@@ -150,7 +150,7 @@ dependencies:
150
150
  - !ruby/object:Gem::Version
151
151
  version: '0'
152
152
  - !ruby/object:Gem::Dependency
153
- name: benchmark
153
+ name: msgpack
154
154
  requirement: !ruby/object:Gem::Requirement
155
155
  requirements:
156
156
  - - ">="
@@ -164,7 +164,7 @@ dependencies:
164
164
  - !ruby/object:Gem::Version
165
165
  version: '0'
166
166
  - !ruby/object:Gem::Dependency
167
- name: rexml
167
+ name: benchmark
168
168
  requirement: !ruby/object:Gem::Requirement
169
169
  requirements:
170
170
  - - ">="
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
291
291
  - !ruby/object:Gem::Version
292
292
  version: '0'
293
293
  requirements: []
294
- rubygems_version: 4.0.8
294
+ rubygems_version: 4.0.9
295
295
  specification_version: 4
296
296
  summary: Distribute tests over many workers using a queue
297
297
  test_files: []