ci-queue 0.84.0 → 0.85.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/ci/queue/redis/acknowledge.lua +13 -2
- data/lib/ci/queue/redis/base.rb +12 -18
- data/lib/ci/queue/redis/heartbeat.lua +9 -10
- data/lib/ci/queue/redis/monitor.rb +9 -11
- data/lib/ci/queue/redis/release.lua +2 -0
- data/lib/ci/queue/redis/requeue.lua +9 -2
- data/lib/ci/queue/redis/reserve.lua +13 -8
- data/lib/ci/queue/redis/reserve_lost.lua +13 -2
- data/lib/ci/queue/redis/retry.rb +16 -0
- data/lib/ci/queue/redis/worker.rb +35 -18
- data/lib/ci/queue/static.rb +50 -5
- data/lib/ci/queue/version.rb +1 -1
- data/lib/minitest/queue.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d7f6896ca23e9f9cd64dae9300539d0d5d88ced4c9b59a193d67824d353726cd
|
|
4
|
+
data.tar.gz: 969ba0b363dbe7e491561095587b802f24aa400685a96bcb5fda4b23e69dc02b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dd6d91458d235f13c60df0f27374d09d3050d54354a7d87f0d427c21c558bac9338054a901a350eda42c37118af7893cfb763db96ab7be43b1f0d6a275ac2e6d
|
|
7
|
+
data.tar.gz: 833e87a9a0fd535589267462f98f81d134342f66432a43b246a0bc68632a64b85e9d80253a27d9ccf627898a5d846ea37d2c5bef7d2a602905d7c21195ddbff6
|
data/Gemfile.lock
CHANGED
|
@@ -4,12 +4,23 @@ local processed_key = KEYS[2]
|
|
|
4
4
|
local owners_key = KEYS[3]
|
|
5
5
|
local error_reports_key = KEYS[4]
|
|
6
6
|
local requeued_by_key = KEYS[5]
|
|
7
|
+
local leases_key = KEYS[6]
|
|
7
8
|
|
|
8
9
|
local entry = ARGV[1]
|
|
9
10
|
local error = ARGV[2]
|
|
10
11
|
local ttl = ARGV[3]
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
local lease_id = ARGV[4]
|
|
13
|
+
|
|
14
|
+
-- Only the current lease holder can remove the entry from the running set.
|
|
15
|
+
-- If the lease was transferred (e.g. via reserve_lost), the stale worker
|
|
16
|
+
-- must not remove the running entry — that would let the supervisor think
|
|
17
|
+
-- the queue is exhausted while the new lease holder is still processing.
|
|
18
|
+
if tostring(redis.call('hget', leases_key, entry)) == lease_id then
|
|
19
|
+
redis.call('zrem', zset_key, entry)
|
|
20
|
+
redis.call('hdel', owners_key, entry)
|
|
21
|
+
redis.call('hdel', leases_key, entry)
|
|
22
|
+
end
|
|
23
|
+
|
|
13
24
|
redis.call('hdel', requeued_by_key, entry)
|
|
14
25
|
local acknowledged = redis.call('sadd', processed_key, entry) == 1
|
|
15
26
|
|
data/lib/ci/queue/redis/base.rb
CHANGED
|
@@ -60,10 +60,10 @@ module CI
|
|
|
60
60
|
[0, 0, 0.1, 0.5, 1, 3, 5]
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
-
def with_heartbeat(id)
|
|
63
|
+
def with_heartbeat(id, lease: nil)
|
|
64
64
|
if heartbeat_enabled?
|
|
65
65
|
ensure_heartbeat_thread_alive!
|
|
66
|
-
heartbeat_state.set(:tick, id)
|
|
66
|
+
heartbeat_state.set(:tick, id, lease)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
yield
|
|
@@ -264,12 +264,11 @@ module CI
|
|
|
264
264
|
end
|
|
265
265
|
|
|
266
266
|
class HeartbeatProcess
|
|
267
|
-
def initialize(redis_url, zset_key,
|
|
267
|
+
def initialize(redis_url, zset_key, owners_key, leases_key)
|
|
268
268
|
@redis_url = redis_url
|
|
269
269
|
@zset_key = zset_key
|
|
270
|
-
@processed_key = processed_key
|
|
271
270
|
@owners_key = owners_key
|
|
272
|
-
@
|
|
271
|
+
@leases_key = leases_key
|
|
273
272
|
end
|
|
274
273
|
|
|
275
274
|
def boot!
|
|
@@ -281,9 +280,8 @@ module CI
|
|
|
281
280
|
::File.join(__dir__, "monitor.rb"),
|
|
282
281
|
@redis_url,
|
|
283
282
|
@zset_key,
|
|
284
|
-
@processed_key,
|
|
285
283
|
@owners_key,
|
|
286
|
-
@
|
|
284
|
+
@leases_key,
|
|
287
285
|
in: child_read,
|
|
288
286
|
out: child_write,
|
|
289
287
|
)
|
|
@@ -313,8 +311,8 @@ module CI
|
|
|
313
311
|
end
|
|
314
312
|
end
|
|
315
313
|
|
|
316
|
-
def tick!(id)
|
|
317
|
-
send_message(:tick!, id: id)
|
|
314
|
+
def tick!(id, lease)
|
|
315
|
+
send_message(:tick!, id: id, lease: lease.to_s)
|
|
318
316
|
end
|
|
319
317
|
|
|
320
318
|
private
|
|
@@ -355,9 +353,8 @@ module CI
|
|
|
355
353
|
@heartbeat_process ||= HeartbeatProcess.new(
|
|
356
354
|
@redis_url,
|
|
357
355
|
key('running'),
|
|
358
|
-
key('processed'),
|
|
359
356
|
key('owners'),
|
|
360
|
-
key('
|
|
357
|
+
key('leases'),
|
|
361
358
|
)
|
|
362
359
|
end
|
|
363
360
|
|
|
@@ -369,19 +366,16 @@ module CI
|
|
|
369
366
|
Thread.current.name = "CI::Queue#heartbeat"
|
|
370
367
|
Thread.current.abort_on_exception = true
|
|
371
368
|
|
|
372
|
-
timeout = config.timeout.to_i
|
|
373
369
|
loop do
|
|
374
|
-
command = nil
|
|
375
370
|
command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
|
|
376
371
|
|
|
377
372
|
case command&.first
|
|
378
373
|
when :tick
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
timeout -= 1
|
|
382
|
-
end
|
|
374
|
+
# command = [:tick, entry_id, lease_id]
|
|
375
|
+
heartbeat_process.tick!(command[1], command[2])
|
|
383
376
|
when :reset
|
|
384
|
-
|
|
377
|
+
# Test finished, stop ticking until next test starts
|
|
378
|
+
nil
|
|
385
379
|
when :stop
|
|
386
380
|
break
|
|
387
381
|
end
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
-- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
|
|
2
2
|
local zset_key = KEYS[1]
|
|
3
|
-
local
|
|
4
|
-
local owners_key = KEYS[3]
|
|
5
|
-
local worker_queue_key = KEYS[4]
|
|
3
|
+
local leases_key = KEYS[2]
|
|
6
4
|
|
|
7
5
|
local current_time = ARGV[1]
|
|
8
6
|
local entry = ARGV[2]
|
|
7
|
+
local lease_id = ARGV[3]
|
|
9
8
|
|
|
10
|
-
--
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
--
|
|
16
|
-
if redis.call('hget',
|
|
9
|
+
-- Only the current lease holder can bump the timestamp.
|
|
10
|
+
-- We intentionally do NOT check the processed set. A non-owner worker's
|
|
11
|
+
-- acknowledge can add the entry to processed, which would poison the
|
|
12
|
+
-- current lease holder's heartbeat if we checked it here.
|
|
13
|
+
-- The lease check alone is sufficient — once the lease holder acknowledges,
|
|
14
|
+
-- they zrem + hdel the lease, so the heartbeat will naturally stop.
|
|
15
|
+
if tostring(redis.call('hget', leases_key, entry)) == lease_id then
|
|
17
16
|
return redis.call('zadd', zset_key, current_time, entry)
|
|
18
17
|
end
|
|
@@ -13,11 +13,10 @@ module CI
|
|
|
13
13
|
DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../../redis', __FILE__)
|
|
14
14
|
RELEASE_SCRIPTS_ROOT = ::File.expand_path('../../redis', __FILE__)
|
|
15
15
|
|
|
16
|
-
def initialize(pipe, logger, redis_url, zset_key,
|
|
16
|
+
def initialize(pipe, logger, redis_url, zset_key, owners_key, leases_key)
|
|
17
17
|
@zset_key = zset_key
|
|
18
|
-
@processed_key = processed_key
|
|
19
18
|
@owners_key = owners_key
|
|
20
|
-
@
|
|
19
|
+
@leases_key = leases_key
|
|
21
20
|
@logger = logger
|
|
22
21
|
@redis = ::Redis.new(url: redis_url, reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5])
|
|
23
22
|
@shutdown = false
|
|
@@ -36,11 +35,11 @@ module CI
|
|
|
36
35
|
@self_pipe_writer << '.'
|
|
37
36
|
end
|
|
38
37
|
|
|
39
|
-
def process_tick!(id:)
|
|
38
|
+
def process_tick!(id:, lease:)
|
|
40
39
|
eval_script(
|
|
41
40
|
:heartbeat,
|
|
42
|
-
keys: [@zset_key, @
|
|
43
|
-
argv: [Time.now.to_f, id]
|
|
41
|
+
keys: [@zset_key, @leases_key],
|
|
42
|
+
argv: [Time.now.to_f, id, lease]
|
|
44
43
|
)
|
|
45
44
|
rescue => error
|
|
46
45
|
@logger.info(error)
|
|
@@ -151,12 +150,11 @@ end
|
|
|
151
150
|
|
|
152
151
|
redis_url = ARGV[0]
|
|
153
152
|
zset_key = ARGV[1]
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
worker_queue_key = ARGV[4]
|
|
153
|
+
owners_key = ARGV[2]
|
|
154
|
+
leases_key = ARGV[3]
|
|
157
155
|
|
|
158
|
-
logger.debug("Starting monitor: #{redis_url} #{zset_key} #{
|
|
159
|
-
manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key,
|
|
156
|
+
logger.debug("Starting monitor: #{redis_url} #{zset_key} #{leases_key}")
|
|
157
|
+
manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, owners_key, leases_key)
|
|
160
158
|
|
|
161
159
|
# Notify the parent we're ready
|
|
162
160
|
$stdout.puts(".")
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
local zset_key = KEYS[1]
|
|
3
3
|
local worker_queue_key = KEYS[2]
|
|
4
4
|
local owners_key = KEYS[3]
|
|
5
|
+
local leases_key = KEYS[4]
|
|
5
6
|
|
|
6
7
|
-- owned_tests = {"SomeTest", "worker:1", "SomeOtherTest", "worker:2", ...}
|
|
7
8
|
local owned_tests = redis.call('hgetall', owners_key)
|
|
@@ -9,6 +10,7 @@ for index, owner_or_test in ipairs(owned_tests) do
|
|
|
9
10
|
if owner_or_test == worker_queue_key then -- If we owned a test
|
|
10
11
|
local test = owned_tests[index - 1]
|
|
11
12
|
redis.call('zadd', zset_key, "0", test) -- We expire the lease immediately
|
|
13
|
+
redis.call('hdel', leases_key, test)
|
|
12
14
|
return nil
|
|
13
15
|
end
|
|
14
16
|
end
|
|
@@ -7,15 +7,20 @@ local worker_queue_key = KEYS[5]
|
|
|
7
7
|
local owners_key = KEYS[6]
|
|
8
8
|
local error_reports_key = KEYS[7]
|
|
9
9
|
local requeued_by_key = KEYS[8]
|
|
10
|
+
local leases_key = KEYS[9]
|
|
10
11
|
|
|
11
12
|
local max_requeues = tonumber(ARGV[1])
|
|
12
13
|
local global_max_requeues = tonumber(ARGV[2])
|
|
13
14
|
local entry = ARGV[3]
|
|
14
15
|
local offset = ARGV[4]
|
|
15
16
|
local ttl = tonumber(ARGV[5])
|
|
17
|
+
local lease_id = ARGV[6]
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
+
-- Only the current lease holder can requeue a test.
|
|
20
|
+
-- If the lease was transferred (e.g. via reserve_lost), reject the stale
|
|
21
|
+
-- worker's requeue so the running entry stays intact for the new holder.
|
|
22
|
+
if tostring(redis.call('hget', leases_key, entry)) ~= lease_id then
|
|
23
|
+
return false
|
|
19
24
|
end
|
|
20
25
|
|
|
21
26
|
if redis.call('sismember', processed_key, entry) == 1 then
|
|
@@ -49,6 +54,8 @@ if ttl and ttl > 0 then
|
|
|
49
54
|
redis.call('expire', requeued_by_key, ttl)
|
|
50
55
|
end
|
|
51
56
|
|
|
57
|
+
redis.call('hdel', owners_key, entry)
|
|
58
|
+
redis.call('hdel', leases_key, entry)
|
|
52
59
|
redis.call('zrem', zset_key, entry)
|
|
53
60
|
|
|
54
61
|
return true
|
|
@@ -6,6 +6,8 @@ local worker_queue_key = KEYS[4]
|
|
|
6
6
|
local owners_key = KEYS[5]
|
|
7
7
|
local requeued_by_key = KEYS[6]
|
|
8
8
|
local workers_key = KEYS[7]
|
|
9
|
+
local leases_key = KEYS[8]
|
|
10
|
+
local lease_counter_key = KEYS[9]
|
|
9
11
|
|
|
10
12
|
local current_time = ARGV[1]
|
|
11
13
|
local defer_offset = tonumber(ARGV[2]) or 0
|
|
@@ -20,6 +22,15 @@ local function insert_with_offset(test)
|
|
|
20
22
|
end
|
|
21
23
|
end
|
|
22
24
|
|
|
25
|
+
local function claim_test(test)
|
|
26
|
+
local lease = redis.call('incr', lease_counter_key)
|
|
27
|
+
redis.call('zadd', zset_key, current_time, test)
|
|
28
|
+
redis.call('lpush', worker_queue_key, test)
|
|
29
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
30
|
+
redis.call('hset', leases_key, test, lease)
|
|
31
|
+
return {test, tostring(lease)}
|
|
32
|
+
end
|
|
33
|
+
|
|
23
34
|
for attempt = 1, max_skip_attempts do
|
|
24
35
|
local test = redis.call('rpop', queue_key)
|
|
25
36
|
if not test then
|
|
@@ -31,10 +42,7 @@ for attempt = 1, max_skip_attempts do
|
|
|
31
42
|
-- If this build only has one worker, allow immediate self-pickup.
|
|
32
43
|
if redis.call('scard', workers_key) <= 1 then
|
|
33
44
|
redis.call('hdel', requeued_by_key, test)
|
|
34
|
-
|
|
35
|
-
redis.call('lpush', worker_queue_key, test)
|
|
36
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
37
|
-
return test
|
|
45
|
+
return claim_test(test)
|
|
38
46
|
end
|
|
39
47
|
|
|
40
48
|
insert_with_offset(test)
|
|
@@ -47,10 +55,7 @@ for attempt = 1, max_skip_attempts do
|
|
|
47
55
|
end
|
|
48
56
|
else
|
|
49
57
|
redis.call('hdel', requeued_by_key, test)
|
|
50
|
-
|
|
51
|
-
redis.call('lpush', worker_queue_key, test)
|
|
52
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
53
|
-
return test
|
|
58
|
+
return claim_test(test)
|
|
54
59
|
end
|
|
55
60
|
end
|
|
56
61
|
|
|
@@ -3,6 +3,8 @@ local zset_key = KEYS[1]
|
|
|
3
3
|
local processed_key = KEYS[2]
|
|
4
4
|
local worker_queue_key = KEYS[3]
|
|
5
5
|
local owners_key = KEYS[4]
|
|
6
|
+
local leases_key = KEYS[5]
|
|
7
|
+
local lease_counter_key = KEYS[6]
|
|
6
8
|
|
|
7
9
|
local current_time = ARGV[1]
|
|
8
10
|
local timeout = ARGV[2]
|
|
@@ -10,10 +12,19 @@ local timeout = ARGV[2]
|
|
|
10
12
|
local lost_tests = redis.call('zrangebyscore', zset_key, 0, current_time - timeout)
|
|
11
13
|
for _, test in ipairs(lost_tests) do
|
|
12
14
|
if redis.call('sismember', processed_key, test) == 0 then
|
|
15
|
+
local lease = redis.call('incr', lease_counter_key)
|
|
13
16
|
redis.call('zadd', zset_key, current_time, test)
|
|
14
17
|
redis.call('lpush', worker_queue_key, test)
|
|
15
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
16
|
-
|
|
18
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
19
|
+
redis.call('hset', leases_key, test, lease)
|
|
20
|
+
return {test, tostring(lease)}
|
|
21
|
+
else
|
|
22
|
+
-- Test is already processed but still in running (stale). This can happen when
|
|
23
|
+
-- a non-owner worker acknowledged the test (marking it processed) but could not
|
|
24
|
+
-- remove it from running due to the lease guard. Clean it up.
|
|
25
|
+
redis.call('zrem', zset_key, test)
|
|
26
|
+
redis.call('hdel', owners_key, test)
|
|
27
|
+
redis.call('hdel', leases_key, test)
|
|
17
28
|
end
|
|
18
29
|
end
|
|
19
30
|
|
data/lib/ci/queue/redis/retry.rb
CHANGED
|
@@ -12,6 +12,22 @@ module CI
|
|
|
12
12
|
@build ||= CI::Queue::Redis::BuildRecord.new(self, redis, config)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Retry queue is pre-populated with failed test entries from the previous run.
|
|
16
|
+
# Don't replace them with the full preresolved/lazy test list.
|
|
17
|
+
# QueuePopulationStrategy#configure_lazy_queue will still set entry_resolver,
|
|
18
|
+
# so poll uses LazyEntryResolver to lazily load test files on demand.
|
|
19
|
+
# The random/batch_size params are intentionally ignored since we keep
|
|
20
|
+
# the existing queue contents as-is.
|
|
21
|
+
#
|
|
22
|
+
# Note: populate (non-stream) is intentionally NOT overridden here.
|
|
23
|
+
# RSpec and non-lazy Minitest retries call populate to build the
|
|
24
|
+
# @index mapping test IDs to runnable objects, which poll needs to
|
|
25
|
+
# yield proper test/example instances. In those paths, @queue contains
|
|
26
|
+
# bare test IDs that match @index keys, so populate works correctly.
|
|
27
|
+
def stream_populate(tests, random: nil, batch_size: nil)
|
|
28
|
+
self
|
|
29
|
+
end
|
|
30
|
+
|
|
15
31
|
private
|
|
16
32
|
|
|
17
33
|
attr_reader :redis
|
|
@@ -19,6 +19,7 @@ module CI
|
|
|
19
19
|
|
|
20
20
|
def initialize(redis, config)
|
|
21
21
|
@reserved_tests = Concurrent::Set.new
|
|
22
|
+
@reserved_leases = Concurrent::Map.new
|
|
22
23
|
@shutdown_required = false
|
|
23
24
|
@first_reserve_at = nil
|
|
24
25
|
super(redis, config)
|
|
@@ -147,9 +148,10 @@ module CI
|
|
|
147
148
|
def retry_queue
|
|
148
149
|
failures = build.failed_tests.to_set
|
|
149
150
|
log = redis.lrange(key('worker', worker_id, 'queue'), 0, -1)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
log.
|
|
151
|
+
# Keep full entries (test_id + file_path) so lazy loading can resolve them.
|
|
152
|
+
# Filter by test_id against failures without stripping file paths.
|
|
153
|
+
log.select! { |entry| failures.include?(CI::Queue::QueueEntry.test_id(entry)) }
|
|
154
|
+
log.uniq! { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
153
155
|
log.reverse!
|
|
154
156
|
Retry.new(log, config, redis: redis)
|
|
155
157
|
end
|
|
@@ -172,6 +174,11 @@ module CI
|
|
|
172
174
|
nil
|
|
173
175
|
end
|
|
174
176
|
|
|
177
|
+
def lease_for(entry)
|
|
178
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
179
|
+
@reserved_leases[test_id]
|
|
180
|
+
end
|
|
181
|
+
|
|
175
182
|
def report_worker_error(error)
|
|
176
183
|
build.report_worker_error(error)
|
|
177
184
|
end
|
|
@@ -180,11 +187,12 @@ module CI
|
|
|
180
187
|
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
181
188
|
assert_reserved!(test_id)
|
|
182
189
|
entry = reserved_entries.fetch(test_id, entry)
|
|
190
|
+
lease = @reserved_leases.delete(test_id)
|
|
183
191
|
unreserve_entry(test_id)
|
|
184
192
|
eval_script(
|
|
185
193
|
:acknowledge,
|
|
186
|
-
keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by')],
|
|
187
|
-
argv: [entry, error.to_s, config.redis_ttl],
|
|
194
|
+
keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by'), key('leases')],
|
|
195
|
+
argv: [entry, error.to_s, config.redis_ttl, lease.to_s],
|
|
188
196
|
pipeline: pipeline,
|
|
189
197
|
) == 1
|
|
190
198
|
end
|
|
@@ -193,6 +201,7 @@ module CI
|
|
|
193
201
|
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
194
202
|
assert_reserved!(test_id)
|
|
195
203
|
entry = reserved_entries.fetch(test_id, entry)
|
|
204
|
+
lease = @reserved_leases.delete(test_id)
|
|
196
205
|
unreserve_entry(test_id)
|
|
197
206
|
global_max_requeues = config.global_max_requeues(total)
|
|
198
207
|
|
|
@@ -207,14 +216,16 @@ module CI
|
|
|
207
216
|
key('owners'),
|
|
208
217
|
key('error-reports'),
|
|
209
218
|
key('requeued-by'),
|
|
219
|
+
key('leases'),
|
|
210
220
|
],
|
|
211
|
-
argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl],
|
|
221
|
+
argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl, lease.to_s],
|
|
212
222
|
) == 1
|
|
213
223
|
|
|
214
224
|
unless requeued
|
|
215
225
|
reserved_tests << test_id
|
|
216
226
|
reserved_entries[test_id] = entry
|
|
217
227
|
reserved_entry_ids[entry] = test_id
|
|
228
|
+
@reserved_leases[test_id] = lease if lease
|
|
218
229
|
end
|
|
219
230
|
requeued
|
|
220
231
|
end
|
|
@@ -222,7 +233,7 @@ module CI
|
|
|
222
233
|
def release!
|
|
223
234
|
eval_script(
|
|
224
235
|
:release,
|
|
225
|
-
keys: [key('running'), key('worker', worker_id, 'queue'), key('owners')],
|
|
236
|
+
keys: [key('running'), key('worker', worker_id, 'queue'), key('owners'), key('leases')],
|
|
226
237
|
argv: [],
|
|
227
238
|
)
|
|
228
239
|
nil
|
|
@@ -254,11 +265,12 @@ module CI
|
|
|
254
265
|
end
|
|
255
266
|
end
|
|
256
267
|
|
|
257
|
-
def reserve_entry(entry)
|
|
268
|
+
def reserve_entry(entry, lease = nil)
|
|
258
269
|
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
259
270
|
reserved_tests << test_id
|
|
260
271
|
reserved_entries[test_id] = entry
|
|
261
272
|
reserved_entry_ids[entry] = test_id
|
|
273
|
+
@reserved_leases[test_id] = lease if lease
|
|
262
274
|
end
|
|
263
275
|
|
|
264
276
|
def unreserve_entry(test_id)
|
|
@@ -343,12 +355,12 @@ module CI
|
|
|
343
355
|
end
|
|
344
356
|
|
|
345
357
|
def reserve
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
end
|
|
358
|
+
entry, lease = try_to_reserve_lost_test || try_to_reserve_test || [nil, nil]
|
|
359
|
+
if entry
|
|
360
|
+
@first_reserve_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
361
|
+
reserve_entry(entry, lease)
|
|
351
362
|
end
|
|
363
|
+
entry
|
|
352
364
|
end
|
|
353
365
|
|
|
354
366
|
def try_to_reserve_test
|
|
@@ -362,6 +374,8 @@ module CI
|
|
|
362
374
|
key('owners'),
|
|
363
375
|
key('requeued-by'),
|
|
364
376
|
key('workers'),
|
|
377
|
+
key('leases'),
|
|
378
|
+
key('lease-counter'),
|
|
365
379
|
],
|
|
366
380
|
argv: [CI::Queue.time_now.to_f, Redis.requeue_offset],
|
|
367
381
|
)
|
|
@@ -370,25 +384,28 @@ module CI
|
|
|
370
384
|
def try_to_reserve_lost_test
|
|
371
385
|
timeout = config.max_missed_heartbeat_seconds ? config.max_missed_heartbeat_seconds : config.timeout
|
|
372
386
|
|
|
373
|
-
|
|
387
|
+
result = eval_script(
|
|
374
388
|
:reserve_lost,
|
|
375
389
|
keys: [
|
|
376
390
|
key('running'),
|
|
377
391
|
key('processed'),
|
|
378
392
|
key('worker', worker_id, 'queue'),
|
|
379
393
|
key('owners'),
|
|
394
|
+
key('leases'),
|
|
395
|
+
key('lease-counter'),
|
|
380
396
|
],
|
|
381
397
|
argv: [CI::Queue.time_now.to_f, timeout],
|
|
382
398
|
)
|
|
383
399
|
|
|
384
|
-
if
|
|
385
|
-
|
|
400
|
+
if result
|
|
401
|
+
entry = result.is_a?(Array) ? result[0] : result
|
|
402
|
+
build.record_warning(Warnings::RESERVED_LOST_TEST, test: CI::Queue::QueueEntry.test_id(entry), timeout: config.timeout)
|
|
386
403
|
if CI::Queue.debug?
|
|
387
|
-
$stderr.puts "[ci-queue][reserve_lost] worker=#{worker_id} test_id=#{CI::Queue::QueueEntry.test_id(
|
|
404
|
+
$stderr.puts "[ci-queue][reserve_lost] worker=#{worker_id} test_id=#{CI::Queue::QueueEntry.test_id(entry)}"
|
|
388
405
|
end
|
|
389
406
|
end
|
|
390
407
|
|
|
391
|
-
|
|
408
|
+
result
|
|
392
409
|
end
|
|
393
410
|
|
|
394
411
|
def push(entries)
|
data/lib/ci/queue/static.rb
CHANGED
|
@@ -16,6 +16,7 @@ module CI
|
|
|
16
16
|
TEN_MINUTES = 60 * 10
|
|
17
17
|
|
|
18
18
|
attr_reader :progress, :total
|
|
19
|
+
attr_accessor :entry_resolver
|
|
19
20
|
|
|
20
21
|
def initialize(tests, config)
|
|
21
22
|
@queue = tests
|
|
@@ -50,10 +51,24 @@ module CI
|
|
|
50
51
|
self
|
|
51
52
|
end
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
# Support lazy loading mode: accept an enumerator of entries and
|
|
55
|
+
# store them in queue order (no shuffling). This preserves the
|
|
56
|
+
# exact order from the input file for local reproduction.
|
|
57
|
+
def stream_populate(tests, random: nil, batch_size: nil)
|
|
58
|
+
@queue = []
|
|
59
|
+
tests.each { |entry| @queue << entry }
|
|
60
|
+
@total = @queue.size
|
|
61
|
+
self
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def with_heartbeat(id, lease: nil)
|
|
54
65
|
yield
|
|
55
66
|
end
|
|
56
67
|
|
|
68
|
+
def lease_for(entry)
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
|
|
57
72
|
def ensure_heartbeat_thread_alive!; end
|
|
58
73
|
|
|
59
74
|
def boot_heartbeat_process!; end
|
|
@@ -75,11 +90,15 @@ module CI
|
|
|
75
90
|
end
|
|
76
91
|
|
|
77
92
|
def populated?
|
|
78
|
-
!!defined?(@index)
|
|
93
|
+
!!defined?(@index) || @queue.any?
|
|
79
94
|
end
|
|
80
95
|
|
|
81
96
|
def to_a
|
|
82
|
-
@
|
|
97
|
+
if defined?(@index) && @index
|
|
98
|
+
@queue.map { |i| index.fetch(i) }
|
|
99
|
+
else
|
|
100
|
+
@queue.dup
|
|
101
|
+
end
|
|
83
102
|
end
|
|
84
103
|
|
|
85
104
|
def size
|
|
@@ -97,9 +116,28 @@ module CI
|
|
|
97
116
|
def poll
|
|
98
117
|
while !@shutdown && config.circuit_breakers.none?(&:open?) && !max_test_failed? && reserved_test = @queue.shift
|
|
99
118
|
reserved_tests << reserved_test
|
|
100
|
-
|
|
119
|
+
if entry_resolver
|
|
120
|
+
resolved = entry_resolver.call(reserved_test)
|
|
121
|
+
# Track the original queue entry so requeue can push it back
|
|
122
|
+
# with its full payload (file path, load-error data, etc.).
|
|
123
|
+
reserved_entries[resolved.id] = reserved_test if resolved.respond_to?(:id)
|
|
124
|
+
yield resolved
|
|
125
|
+
elsif defined?(@index) && @index
|
|
126
|
+
# Queue entries may be JSON-formatted (with test_id + file_path) while
|
|
127
|
+
# the index is keyed by bare test_id from populate. Try the raw entry
|
|
128
|
+
# first, then fall back to extracting the test_id.
|
|
129
|
+
test_id = begin
|
|
130
|
+
CI::Queue::QueueEntry.test_id(reserved_test)
|
|
131
|
+
rescue JSON::ParserError
|
|
132
|
+
reserved_test
|
|
133
|
+
end
|
|
134
|
+
yield index.fetch(test_id)
|
|
135
|
+
else
|
|
136
|
+
yield reserved_test
|
|
137
|
+
end
|
|
101
138
|
end
|
|
102
139
|
reserved_tests.clear
|
|
140
|
+
reserved_entries.clear
|
|
103
141
|
end
|
|
104
142
|
|
|
105
143
|
def exhausted?
|
|
@@ -130,7 +168,10 @@ module CI
|
|
|
130
168
|
return false unless should_requeue?(test_id)
|
|
131
169
|
|
|
132
170
|
requeues[test_id] += 1
|
|
133
|
-
|
|
171
|
+
# Push back the original queue entry (with file path / load-error payload)
|
|
172
|
+
# so entry_resolver can fully resolve it on the next poll iteration.
|
|
173
|
+
original_entry = reserved_entries.delete(test_id) || test_id
|
|
174
|
+
@queue.unshift(original_entry)
|
|
134
175
|
true
|
|
135
176
|
end
|
|
136
177
|
|
|
@@ -146,6 +187,10 @@ module CI
|
|
|
146
187
|
@requeues ||= Hash.new(0)
|
|
147
188
|
end
|
|
148
189
|
|
|
190
|
+
def reserved_entries
|
|
191
|
+
@reserved_entries ||= {}
|
|
192
|
+
end
|
|
193
|
+
|
|
149
194
|
def reserved_tests
|
|
150
195
|
@reserved_tests ||= Concurrent::Set.new
|
|
151
196
|
end
|
data/lib/ci/queue/version.rb
CHANGED
data/lib/minitest/queue.rb
CHANGED
|
@@ -163,7 +163,7 @@ module Minitest
|
|
|
163
163
|
rescue_run_errors do
|
|
164
164
|
begin
|
|
165
165
|
queue.poll do |example|
|
|
166
|
-
result = queue.with_heartbeat(example.queue_entry) do
|
|
166
|
+
result = queue.with_heartbeat(example.queue_entry, lease: queue.lease_for(example.queue_entry)) do
|
|
167
167
|
example.run
|
|
168
168
|
end
|
|
169
169
|
|