ci-queue 0.83.0 → 0.85.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +59 -47
- data/ci-queue.gemspec +3 -1
- data/lib/ci/queue/build_record.rb +5 -5
- data/lib/ci/queue/queue_entry.rb +3 -11
- data/lib/ci/queue/redis/acknowledge.lua +17 -7
- data/lib/ci/queue/redis/base.rb +12 -21
- data/lib/ci/queue/redis/build_record.rb +17 -17
- data/lib/ci/queue/redis/heartbeat.lua +9 -15
- data/lib/ci/queue/redis/monitor.rb +9 -13
- data/lib/ci/queue/redis/release.lua +2 -0
- data/lib/ci/queue/redis/requeue.lua +16 -10
- data/lib/ci/queue/redis/reserve.lua +13 -8
- data/lib/ci/queue/redis/reserve_lost.lua +14 -7
- data/lib/ci/queue/redis/retry.rb +16 -0
- data/lib/ci/queue/redis/worker.rb +47 -40
- data/lib/ci/queue/static.rb +54 -9
- data/lib/ci/queue/version.rb +1 -1
- data/lib/minitest/queue/build_status_recorder.rb +4 -4
- data/lib/minitest/queue/test_data.rb +1 -1
- data/lib/minitest/queue.rb +9 -6
- data/lib/rspec/queue/build_status_recorder.rb +4 -2
- data/lib/rspec/queue.rb +6 -2
- metadata +31 -4
- data/lib/ci/queue/redis/_entry_helpers.lua +0 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d7f6896ca23e9f9cd64dae9300539d0d5d88ced4c9b59a193d67824d353726cd
|
|
4
|
+
data.tar.gz: 969ba0b363dbe7e491561095587b802f24aa400685a96bcb5fda4b23e69dc02b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dd6d91458d235f13c60df0f27374d09d3050d54354a7d87f0d427c21c558bac9338054a901a350eda42c37118af7893cfb763db96ab7be43b1f0d6a275ac2e6d
|
|
7
|
+
data.tar.gz: 833e87a9a0fd535589267462f98f81d134342f66432a43b246a0bc68632a64b85e9d80253a27d9ccf627898a5d846ea37d2c5bef7d2a602905d7c21195ddbff6
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
4.0
|
data/Gemfile.lock
CHANGED
|
@@ -1,95 +1,105 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
ci-queue (0.
|
|
4
|
+
ci-queue (0.85.0)
|
|
5
5
|
logger
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
activesupport (
|
|
10
|
+
activesupport (8.1.3)
|
|
11
11
|
base64
|
|
12
12
|
bigdecimal
|
|
13
|
-
concurrent-ruby (~> 1.0, >= 1.
|
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
|
14
14
|
connection_pool (>= 2.2.5)
|
|
15
15
|
drb
|
|
16
16
|
i18n (>= 1.6, < 2)
|
|
17
|
+
json
|
|
18
|
+
logger (>= 1.4.2)
|
|
17
19
|
minitest (>= 5.1)
|
|
18
|
-
|
|
19
|
-
tzinfo (~> 2.0)
|
|
20
|
+
securerandom (>= 0.3)
|
|
21
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
|
22
|
+
uri (>= 0.13.1)
|
|
20
23
|
ansi (1.5.0)
|
|
21
|
-
ast (2.4.
|
|
22
|
-
base64 (0.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
ast (2.4.3)
|
|
25
|
+
base64 (0.3.0)
|
|
26
|
+
benchmark (0.5.0)
|
|
27
|
+
bigdecimal (4.0.1)
|
|
28
|
+
builder (3.3.0)
|
|
29
|
+
concurrent-ruby (1.3.6)
|
|
30
|
+
connection_pool (3.0.2)
|
|
31
|
+
diff-lcs (1.6.2)
|
|
32
|
+
docile (1.4.1)
|
|
33
|
+
drb (2.2.3)
|
|
34
|
+
i18n (1.14.8)
|
|
31
35
|
concurrent-ruby (~> 1.0)
|
|
32
|
-
json (2.
|
|
33
|
-
language_server-protocol (3.17.0.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
minitest
|
|
36
|
+
json (2.19.3)
|
|
37
|
+
language_server-protocol (3.17.0.5)
|
|
38
|
+
lint_roller (1.1.0)
|
|
39
|
+
logger (1.7.0)
|
|
40
|
+
minitest (5.27.0)
|
|
41
|
+
minitest-reporters (1.7.1)
|
|
37
42
|
ansi
|
|
38
43
|
builder
|
|
39
44
|
minitest (>= 5.0)
|
|
40
45
|
ruby-progressbar
|
|
41
|
-
msgpack (1.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
parser (3.3.0.5)
|
|
46
|
+
msgpack (1.8.0)
|
|
47
|
+
parallel (1.27.0)
|
|
48
|
+
parser (3.3.10.2)
|
|
45
49
|
ast (~> 2.4.1)
|
|
46
50
|
racc
|
|
47
|
-
|
|
51
|
+
prism (1.9.0)
|
|
52
|
+
racc (1.8.1)
|
|
48
53
|
rainbow (3.1.1)
|
|
49
|
-
rake (13.1
|
|
50
|
-
redis (5.1
|
|
51
|
-
redis-client (>= 0.
|
|
52
|
-
redis-client (0.
|
|
54
|
+
rake (13.3.1)
|
|
55
|
+
redis (5.4.1)
|
|
56
|
+
redis-client (>= 0.22.0)
|
|
57
|
+
redis-client (0.28.0)
|
|
53
58
|
connection_pool
|
|
54
|
-
regexp_parser (2.
|
|
55
|
-
rexml (3.
|
|
56
|
-
rspec (3.13.
|
|
59
|
+
regexp_parser (2.11.3)
|
|
60
|
+
rexml (3.4.4)
|
|
61
|
+
rspec (3.13.2)
|
|
57
62
|
rspec-core (~> 3.13.0)
|
|
58
63
|
rspec-expectations (~> 3.13.0)
|
|
59
64
|
rspec-mocks (~> 3.13.0)
|
|
60
|
-
rspec-core (3.13.
|
|
65
|
+
rspec-core (3.13.6)
|
|
61
66
|
rspec-support (~> 3.13.0)
|
|
62
|
-
rspec-expectations (3.13.
|
|
67
|
+
rspec-expectations (3.13.5)
|
|
63
68
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
64
69
|
rspec-support (~> 3.13.0)
|
|
65
|
-
rspec-mocks (3.13.
|
|
70
|
+
rspec-mocks (3.13.8)
|
|
66
71
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
67
72
|
rspec-support (~> 3.13.0)
|
|
68
|
-
rspec-support (3.13.
|
|
69
|
-
rubocop (1.
|
|
73
|
+
rspec-support (3.13.7)
|
|
74
|
+
rubocop (1.86.0)
|
|
70
75
|
json (~> 2.3)
|
|
71
|
-
language_server-protocol (
|
|
76
|
+
language_server-protocol (~> 3.17.0.2)
|
|
77
|
+
lint_roller (~> 1.1.0)
|
|
72
78
|
parallel (~> 1.10)
|
|
73
79
|
parser (>= 3.3.0.2)
|
|
74
80
|
rainbow (>= 2.2.2, < 4.0)
|
|
75
|
-
regexp_parser (>=
|
|
76
|
-
|
|
77
|
-
rubocop-ast (>= 1.31.1, < 2.0)
|
|
81
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
82
|
+
rubocop-ast (>= 1.49.0, < 2.0)
|
|
78
83
|
ruby-progressbar (~> 1.7)
|
|
79
|
-
unicode-display_width (>= 2.4.0, <
|
|
80
|
-
rubocop-ast (1.
|
|
81
|
-
parser (>= 3.3.
|
|
84
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
85
|
+
rubocop-ast (1.49.1)
|
|
86
|
+
parser (>= 3.3.7.2)
|
|
87
|
+
prism (~> 1.7)
|
|
82
88
|
ruby-progressbar (1.13.0)
|
|
89
|
+
securerandom (0.4.1)
|
|
83
90
|
simplecov (0.22.0)
|
|
84
91
|
docile (~> 1.1)
|
|
85
92
|
simplecov-html (~> 0.11)
|
|
86
93
|
simplecov_json_formatter (~> 0.1)
|
|
87
|
-
simplecov-html (0.
|
|
94
|
+
simplecov-html (0.13.2)
|
|
88
95
|
simplecov_json_formatter (0.1.4)
|
|
89
|
-
snappy (0.
|
|
96
|
+
snappy (0.5.1)
|
|
90
97
|
tzinfo (2.0.6)
|
|
91
98
|
concurrent-ruby (~> 1.0)
|
|
92
|
-
unicode-display_width (2.
|
|
99
|
+
unicode-display_width (3.2.0)
|
|
100
|
+
unicode-emoji (~> 4.1)
|
|
101
|
+
unicode-emoji (4.2.0)
|
|
102
|
+
uri (1.1.1)
|
|
93
103
|
|
|
94
104
|
PLATFORMS
|
|
95
105
|
arm64-darwin-23
|
|
@@ -97,6 +107,7 @@ PLATFORMS
|
|
|
97
107
|
|
|
98
108
|
DEPENDENCIES
|
|
99
109
|
activesupport
|
|
110
|
+
benchmark
|
|
100
111
|
bundler
|
|
101
112
|
ci-queue!
|
|
102
113
|
minitest (~> 5.11)
|
|
@@ -104,6 +115,7 @@ DEPENDENCIES
|
|
|
104
115
|
msgpack
|
|
105
116
|
rake
|
|
106
117
|
redis
|
|
118
|
+
rexml
|
|
107
119
|
rspec (~> 3.10)
|
|
108
120
|
rubocop
|
|
109
121
|
simplecov (~> 0.12)
|
data/ci-queue.gemspec
CHANGED
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
|
19
19
|
spec.homepage = 'https://github.com/Shopify/ci-queue'
|
|
20
20
|
spec.license = 'MIT'
|
|
21
21
|
|
|
22
|
-
spec.required_ruby_version = '>=
|
|
22
|
+
spec.required_ruby_version = '>= 3.1'
|
|
23
23
|
|
|
24
24
|
spec.files = lua_scripts + `git ls-files -z`.split("\x0").reject do |f|
|
|
25
25
|
f.match(%r{^(test|spec|features)/})
|
|
@@ -43,5 +43,7 @@ Gem::Specification.new do |spec|
|
|
|
43
43
|
|
|
44
44
|
spec.add_development_dependency 'snappy'
|
|
45
45
|
spec.add_development_dependency 'msgpack'
|
|
46
|
+
spec.add_development_dependency 'benchmark'
|
|
47
|
+
spec.add_development_dependency 'rexml'
|
|
46
48
|
spec.add_development_dependency 'rubocop'
|
|
47
49
|
end
|
|
@@ -18,17 +18,17 @@ module CI
|
|
|
18
18
|
@queue.exhausted?
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
-
def record_error(
|
|
22
|
-
error_reports[
|
|
21
|
+
def record_error(entry, payload, stat_delta: nil)
|
|
22
|
+
error_reports[entry] = payload
|
|
23
23
|
true
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
def record_success(
|
|
27
|
-
error_reports.delete(
|
|
26
|
+
def record_success(entry, skip_flaky_record: false, acknowledge: true)
|
|
27
|
+
error_reports.delete(entry)
|
|
28
28
|
true
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
def record_requeue(
|
|
31
|
+
def record_requeue(entry)
|
|
32
32
|
true
|
|
33
33
|
end
|
|
34
34
|
|
data/lib/ci/queue/queue_entry.rb
CHANGED
|
@@ -6,26 +6,18 @@ require 'json'
|
|
|
6
6
|
module CI
|
|
7
7
|
module Queue
|
|
8
8
|
module QueueEntry
|
|
9
|
-
DELIMITER = "\t"
|
|
10
9
|
LOAD_ERROR_PREFIX = '__ciq_load_error__:'.freeze
|
|
11
10
|
|
|
12
11
|
def self.test_id(entry)
|
|
13
|
-
|
|
14
|
-
pos ? entry[0, pos] : entry
|
|
12
|
+
JSON.parse(entry, symbolize_names: true)[:test_id]
|
|
15
13
|
end
|
|
16
14
|
|
|
17
15
|
def self.parse(entry)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
test_id, file_path = entry.split(DELIMITER, 2)
|
|
21
|
-
file_path = nil if file_path == ""
|
|
22
|
-
{ test_id: test_id, file_path: file_path }
|
|
16
|
+
JSON.parse(entry, symbolize_names: true)
|
|
23
17
|
end
|
|
24
18
|
|
|
25
19
|
def self.format(test_id, file_path)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"#{test_id}#{DELIMITER}#{file_path}"
|
|
20
|
+
JSON.dump({ test_id: test_id, file_path: file_path })
|
|
29
21
|
end
|
|
30
22
|
|
|
31
23
|
def self.load_error_payload?(file_path)
|
|
@@ -4,18 +4,28 @@ local processed_key = KEYS[2]
|
|
|
4
4
|
local owners_key = KEYS[3]
|
|
5
5
|
local error_reports_key = KEYS[4]
|
|
6
6
|
local requeued_by_key = KEYS[5]
|
|
7
|
+
local leases_key = KEYS[6]
|
|
7
8
|
|
|
8
9
|
local entry = ARGV[1]
|
|
9
|
-
local
|
|
10
|
-
local
|
|
11
|
-
local
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
local error = ARGV[2]
|
|
11
|
+
local ttl = ARGV[3]
|
|
12
|
+
local lease_id = ARGV[4]
|
|
13
|
+
|
|
14
|
+
-- Only the current lease holder can remove the entry from the running set.
|
|
15
|
+
-- If the lease was transferred (e.g. via reserve_lost), the stale worker
|
|
16
|
+
-- must not remove the running entry — that would let the supervisor think
|
|
17
|
+
-- the queue is exhausted while the new lease holder is still processing.
|
|
18
|
+
if tostring(redis.call('hget', leases_key, entry)) == lease_id then
|
|
19
|
+
redis.call('zrem', zset_key, entry)
|
|
20
|
+
redis.call('hdel', owners_key, entry)
|
|
21
|
+
redis.call('hdel', leases_key, entry)
|
|
22
|
+
end
|
|
23
|
+
|
|
14
24
|
redis.call('hdel', requeued_by_key, entry)
|
|
15
|
-
local acknowledged = redis.call('sadd', processed_key,
|
|
25
|
+
local acknowledged = redis.call('sadd', processed_key, entry) == 1
|
|
16
26
|
|
|
17
27
|
if acknowledged and error ~= "" then
|
|
18
|
-
redis.call('hset', error_reports_key,
|
|
28
|
+
redis.call('hset', error_reports_key, entry, error)
|
|
19
29
|
redis.call('expire', error_reports_key, ttl)
|
|
20
30
|
end
|
|
21
31
|
|
data/lib/ci/queue/redis/base.rb
CHANGED
|
@@ -60,10 +60,10 @@ module CI
|
|
|
60
60
|
[0, 0, 0.1, 0.5, 1, 3, 5]
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
-
def with_heartbeat(id)
|
|
63
|
+
def with_heartbeat(id, lease: nil)
|
|
64
64
|
if heartbeat_enabled?
|
|
65
65
|
ensure_heartbeat_thread_alive!
|
|
66
|
-
heartbeat_state.set(:tick, id)
|
|
66
|
+
heartbeat_state.set(:tick, id, lease)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
yield
|
|
@@ -264,13 +264,11 @@ module CI
|
|
|
264
264
|
end
|
|
265
265
|
|
|
266
266
|
class HeartbeatProcess
|
|
267
|
-
def initialize(redis_url, zset_key,
|
|
267
|
+
def initialize(redis_url, zset_key, owners_key, leases_key)
|
|
268
268
|
@redis_url = redis_url
|
|
269
269
|
@zset_key = zset_key
|
|
270
|
-
@processed_key = processed_key
|
|
271
270
|
@owners_key = owners_key
|
|
272
|
-
@
|
|
273
|
-
@entry_delimiter = entry_delimiter
|
|
271
|
+
@leases_key = leases_key
|
|
274
272
|
end
|
|
275
273
|
|
|
276
274
|
def boot!
|
|
@@ -282,10 +280,8 @@ module CI
|
|
|
282
280
|
::File.join(__dir__, "monitor.rb"),
|
|
283
281
|
@redis_url,
|
|
284
282
|
@zset_key,
|
|
285
|
-
@processed_key,
|
|
286
283
|
@owners_key,
|
|
287
|
-
@
|
|
288
|
-
@entry_delimiter,
|
|
284
|
+
@leases_key,
|
|
289
285
|
in: child_read,
|
|
290
286
|
out: child_write,
|
|
291
287
|
)
|
|
@@ -315,8 +311,8 @@ module CI
|
|
|
315
311
|
end
|
|
316
312
|
end
|
|
317
313
|
|
|
318
|
-
def tick!(id)
|
|
319
|
-
send_message(:tick!, id: id)
|
|
314
|
+
def tick!(id, lease)
|
|
315
|
+
send_message(:tick!, id: id, lease: lease.to_s)
|
|
320
316
|
end
|
|
321
317
|
|
|
322
318
|
private
|
|
@@ -357,10 +353,8 @@ module CI
|
|
|
357
353
|
@heartbeat_process ||= HeartbeatProcess.new(
|
|
358
354
|
@redis_url,
|
|
359
355
|
key('running'),
|
|
360
|
-
key('processed'),
|
|
361
356
|
key('owners'),
|
|
362
|
-
key('
|
|
363
|
-
entry_delimiter: CI::Queue::QueueEntry::DELIMITER,
|
|
357
|
+
key('leases'),
|
|
364
358
|
)
|
|
365
359
|
end
|
|
366
360
|
|
|
@@ -372,19 +366,16 @@ module CI
|
|
|
372
366
|
Thread.current.name = "CI::Queue#heartbeat"
|
|
373
367
|
Thread.current.abort_on_exception = true
|
|
374
368
|
|
|
375
|
-
timeout = config.timeout.to_i
|
|
376
369
|
loop do
|
|
377
|
-
command = nil
|
|
378
370
|
command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
|
|
379
371
|
|
|
380
372
|
case command&.first
|
|
381
373
|
when :tick
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
timeout -= 1
|
|
385
|
-
end
|
|
374
|
+
# command = [:tick, entry_id, lease_id]
|
|
375
|
+
heartbeat_process.tick!(command[1], command[2])
|
|
386
376
|
when :reset
|
|
387
|
-
|
|
377
|
+
# Test finished, stop ticking until next test starts
|
|
378
|
+
nil
|
|
388
379
|
when :stop
|
|
389
380
|
break
|
|
390
381
|
end
|
|
@@ -33,14 +33,14 @@ module CI
|
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
def failed_tests
|
|
36
|
-
redis.hkeys(key('error-reports'))
|
|
36
|
+
redis.hkeys(key('error-reports')).map { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
TOTAL_KEY = "___total___"
|
|
40
40
|
def requeued_tests
|
|
41
41
|
requeues = redis.hgetall(key('requeues-count'))
|
|
42
42
|
requeues.delete(TOTAL_KEY)
|
|
43
|
-
requeues
|
|
43
|
+
requeues.transform_keys { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
def pop_warnings
|
|
@@ -56,39 +56,39 @@ module CI
|
|
|
56
56
|
redis.rpush(key('warnings'), Marshal.dump([type, attributes]))
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def record_error(
|
|
59
|
+
def record_error(entry, payload, stat_delta: nil)
|
|
60
60
|
# Run acknowledge first so we know whether we're the first to ack
|
|
61
|
-
acknowledged = @queue.acknowledge(
|
|
61
|
+
acknowledged = @queue.acknowledge(entry, error: payload)
|
|
62
62
|
|
|
63
63
|
if acknowledged
|
|
64
64
|
# We were the first to ack; another worker already ack'd would get falsy from SADD
|
|
65
65
|
@queue.increment_test_failed
|
|
66
66
|
# Only the acknowledging worker's stats include this failure (others skip increment when ack=false).
|
|
67
67
|
# Store so we can subtract it if another worker records success later.
|
|
68
|
-
store_error_report_delta(
|
|
68
|
+
store_error_report_delta(entry, stat_delta) if stat_delta && stat_delta.any?
|
|
69
69
|
end
|
|
70
70
|
# Return so caller can roll back local counter when not acknowledged
|
|
71
71
|
!!acknowledged
|
|
72
72
|
end
|
|
73
73
|
|
|
74
|
-
def record_success(
|
|
74
|
+
def record_success(entry, skip_flaky_record: false)
|
|
75
75
|
acknowledged, error_reports_deleted_count, requeued_count, delta_json = redis.multi do |transaction|
|
|
76
|
-
@queue.acknowledge(
|
|
77
|
-
transaction.hdel(key('error-reports'),
|
|
78
|
-
transaction.hget(key('requeues-count'),
|
|
79
|
-
transaction.hget(key('error-report-deltas'),
|
|
76
|
+
@queue.acknowledge(entry, pipeline: transaction)
|
|
77
|
+
transaction.hdel(key('error-reports'), entry)
|
|
78
|
+
transaction.hget(key('requeues-count'), entry)
|
|
79
|
+
transaction.hget(key('error-report-deltas'), entry)
|
|
80
80
|
end
|
|
81
81
|
# When we're replacing a failure, subtract the (single) acknowledging worker's stat contribution
|
|
82
82
|
if error_reports_deleted_count.to_i > 0 && delta_json
|
|
83
83
|
apply_error_report_delta_correction(delta_json)
|
|
84
|
-
redis.hdel(key('error-report-deltas'),
|
|
84
|
+
redis.hdel(key('error-report-deltas'), entry)
|
|
85
85
|
end
|
|
86
|
-
record_flaky(
|
|
86
|
+
record_flaky(entry) if !skip_flaky_record && (error_reports_deleted_count.to_i > 0 || requeued_count.to_i > 0)
|
|
87
87
|
# Count this run when we ack'd or when we replaced a failure (so stats delta is applied)
|
|
88
88
|
!!(acknowledged || error_reports_deleted_count.to_i > 0)
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
-
def record_requeue(
|
|
91
|
+
def record_requeue(entry)
|
|
92
92
|
true
|
|
93
93
|
end
|
|
94
94
|
|
|
@@ -142,11 +142,11 @@ module CI
|
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
def error_reports
|
|
145
|
-
redis.hgetall(key('error-reports'))
|
|
145
|
+
redis.hgetall(key('error-reports')).transform_keys { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
146
146
|
end
|
|
147
147
|
|
|
148
148
|
def flaky_reports
|
|
149
|
-
redis.smembers(key('flaky-reports'))
|
|
149
|
+
redis.smembers(key('flaky-reports')).map { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
150
150
|
end
|
|
151
151
|
|
|
152
152
|
def record_worker_profile(profile)
|
|
@@ -187,10 +187,10 @@ module CI
|
|
|
187
187
|
['build', config.build_id, *args].join(':')
|
|
188
188
|
end
|
|
189
189
|
|
|
190
|
-
def store_error_report_delta(
|
|
190
|
+
def store_error_report_delta(entry, stat_delta)
|
|
191
191
|
# Only the acknowledging worker's stats include this test; store their delta for correction on success
|
|
192
192
|
payload = { 'worker_id' => config.worker_id.to_s }.merge(stat_delta)
|
|
193
|
-
redis.hset(key('error-report-deltas'),
|
|
193
|
+
redis.hset(key('error-report-deltas'), entry, JSON.generate(payload))
|
|
194
194
|
redis.expire(key('error-report-deltas'), config.redis_ttl)
|
|
195
195
|
end
|
|
196
196
|
|
|
@@ -1,23 +1,17 @@
|
|
|
1
1
|
-- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
|
|
2
|
-
-- @include _entry_helpers
|
|
3
|
-
|
|
4
2
|
local zset_key = KEYS[1]
|
|
5
|
-
local
|
|
6
|
-
local owners_key = KEYS[3]
|
|
7
|
-
local worker_queue_key = KEYS[4]
|
|
3
|
+
local leases_key = KEYS[2]
|
|
8
4
|
|
|
9
5
|
local current_time = ARGV[1]
|
|
10
6
|
local entry = ARGV[2]
|
|
11
|
-
local
|
|
12
|
-
|
|
13
|
-
local test_id = test_id_from_entry(entry, entry_delimiter)
|
|
14
|
-
|
|
15
|
-
-- already processed, we do not need to bump the timestamp
|
|
16
|
-
if redis.call('sismember', processed_key, test_id) == 1 then
|
|
17
|
-
return false
|
|
18
|
-
end
|
|
7
|
+
local lease_id = ARGV[3]
|
|
19
8
|
|
|
20
|
-
--
|
|
21
|
-
|
|
9
|
+
-- Only the current lease holder can bump the timestamp.
|
|
10
|
+
-- We intentionally do NOT check the processed set. A non-owner worker's
|
|
11
|
+
-- acknowledge can add the entry to processed, which would poison the
|
|
12
|
+
-- current lease holder's heartbeat if we checked it here.
|
|
13
|
+
-- The lease check alone is sufficient — once the lease holder acknowledges,
|
|
14
|
+
-- they zrem + hdel the lease, so the heartbeat will naturally stop.
|
|
15
|
+
if tostring(redis.call('hget', leases_key, entry)) == lease_id then
|
|
22
16
|
return redis.call('zadd', zset_key, current_time, entry)
|
|
23
17
|
end
|
|
@@ -13,12 +13,10 @@ module CI
|
|
|
13
13
|
DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../../redis', __FILE__)
|
|
14
14
|
RELEASE_SCRIPTS_ROOT = ::File.expand_path('../../redis', __FILE__)
|
|
15
15
|
|
|
16
|
-
def initialize(pipe, logger, redis_url, zset_key,
|
|
16
|
+
def initialize(pipe, logger, redis_url, zset_key, owners_key, leases_key)
|
|
17
17
|
@zset_key = zset_key
|
|
18
|
-
@processed_key = processed_key
|
|
19
18
|
@owners_key = owners_key
|
|
20
|
-
@
|
|
21
|
-
@entry_delimiter = entry_delimiter
|
|
19
|
+
@leases_key = leases_key
|
|
22
20
|
@logger = logger
|
|
23
21
|
@redis = ::Redis.new(url: redis_url, reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5])
|
|
24
22
|
@shutdown = false
|
|
@@ -37,11 +35,11 @@ module CI
|
|
|
37
35
|
@self_pipe_writer << '.'
|
|
38
36
|
end
|
|
39
37
|
|
|
40
|
-
def process_tick!(id:)
|
|
38
|
+
def process_tick!(id:, lease:)
|
|
41
39
|
eval_script(
|
|
42
40
|
:heartbeat,
|
|
43
|
-
keys: [@zset_key, @
|
|
44
|
-
argv: [Time.now.to_f, id,
|
|
41
|
+
keys: [@zset_key, @leases_key],
|
|
42
|
+
argv: [Time.now.to_f, id, lease]
|
|
45
43
|
)
|
|
46
44
|
rescue => error
|
|
47
45
|
@logger.info(error)
|
|
@@ -152,13 +150,11 @@ end
|
|
|
152
150
|
|
|
153
151
|
redis_url = ARGV[0]
|
|
154
152
|
zset_key = ARGV[1]
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
worker_queue_key = ARGV[4]
|
|
158
|
-
entry_delimiter = ARGV[5]
|
|
153
|
+
owners_key = ARGV[2]
|
|
154
|
+
leases_key = ARGV[3]
|
|
159
155
|
|
|
160
|
-
logger.debug("Starting monitor: #{redis_url} #{zset_key} #{
|
|
161
|
-
manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key,
|
|
156
|
+
logger.debug("Starting monitor: #{redis_url} #{zset_key} #{leases_key}")
|
|
157
|
+
manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, owners_key, leases_key)
|
|
162
158
|
|
|
163
159
|
# Notify the parent we're ready
|
|
164
160
|
$stdout.puts(".")
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
local zset_key = KEYS[1]
|
|
3
3
|
local worker_queue_key = KEYS[2]
|
|
4
4
|
local owners_key = KEYS[3]
|
|
5
|
+
local leases_key = KEYS[4]
|
|
5
6
|
|
|
6
7
|
-- owned_tests = {"SomeTest", "worker:1", "SomeOtherTest", "worker:2", ...}
|
|
7
8
|
local owned_tests = redis.call('hgetall', owners_key)
|
|
@@ -9,6 +10,7 @@ for index, owner_or_test in ipairs(owned_tests) do
|
|
|
9
10
|
if owner_or_test == worker_queue_key then -- If we owned a test
|
|
10
11
|
local test = owned_tests[index - 1]
|
|
11
12
|
redis.call('zadd', zset_key, "0", test) -- We expire the lease immediately
|
|
13
|
+
redis.call('hdel', leases_key, test)
|
|
12
14
|
return nil
|
|
13
15
|
end
|
|
14
16
|
end
|
|
@@ -7,19 +7,23 @@ local worker_queue_key = KEYS[5]
|
|
|
7
7
|
local owners_key = KEYS[6]
|
|
8
8
|
local error_reports_key = KEYS[7]
|
|
9
9
|
local requeued_by_key = KEYS[8]
|
|
10
|
+
local leases_key = KEYS[9]
|
|
10
11
|
|
|
11
12
|
local max_requeues = tonumber(ARGV[1])
|
|
12
13
|
local global_max_requeues = tonumber(ARGV[2])
|
|
13
14
|
local entry = ARGV[3]
|
|
14
|
-
local
|
|
15
|
-
local
|
|
16
|
-
local
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
local offset = ARGV[4]
|
|
16
|
+
local ttl = tonumber(ARGV[5])
|
|
17
|
+
local lease_id = ARGV[6]
|
|
18
|
+
|
|
19
|
+
-- Only the current lease holder can requeue a test.
|
|
20
|
+
-- If the lease was transferred (e.g. via reserve_lost), reject the stale
|
|
21
|
+
-- worker's requeue so the running entry stays intact for the new holder.
|
|
22
|
+
if tostring(redis.call('hget', leases_key, entry)) ~= lease_id then
|
|
23
|
+
return false
|
|
20
24
|
end
|
|
21
25
|
|
|
22
|
-
if redis.call('sismember', processed_key,
|
|
26
|
+
if redis.call('sismember', processed_key, entry) == 1 then
|
|
23
27
|
return false
|
|
24
28
|
end
|
|
25
29
|
|
|
@@ -28,15 +32,15 @@ if global_requeues and global_requeues >= tonumber(global_max_requeues) then
|
|
|
28
32
|
return false
|
|
29
33
|
end
|
|
30
34
|
|
|
31
|
-
local requeues = tonumber(redis.call('hget', requeues_count_key,
|
|
35
|
+
local requeues = tonumber(redis.call('hget', requeues_count_key, entry))
|
|
32
36
|
if requeues and requeues >= max_requeues then
|
|
33
37
|
return false
|
|
34
38
|
end
|
|
35
39
|
|
|
36
40
|
redis.call('hincrby', requeues_count_key, '___total___', 1)
|
|
37
|
-
redis.call('hincrby', requeues_count_key,
|
|
41
|
+
redis.call('hincrby', requeues_count_key, entry, 1)
|
|
38
42
|
|
|
39
|
-
redis.call('hdel', error_reports_key,
|
|
43
|
+
redis.call('hdel', error_reports_key, entry)
|
|
40
44
|
|
|
41
45
|
local pivot = redis.call('lrange', queue_key, -1 - offset, 0 - offset)[1]
|
|
42
46
|
if pivot then
|
|
@@ -50,6 +54,8 @@ if ttl and ttl > 0 then
|
|
|
50
54
|
redis.call('expire', requeued_by_key, ttl)
|
|
51
55
|
end
|
|
52
56
|
|
|
57
|
+
redis.call('hdel', owners_key, entry)
|
|
58
|
+
redis.call('hdel', leases_key, entry)
|
|
53
59
|
redis.call('zrem', zset_key, entry)
|
|
54
60
|
|
|
55
61
|
return true
|
|
@@ -6,6 +6,8 @@ local worker_queue_key = KEYS[4]
|
|
|
6
6
|
local owners_key = KEYS[5]
|
|
7
7
|
local requeued_by_key = KEYS[6]
|
|
8
8
|
local workers_key = KEYS[7]
|
|
9
|
+
local leases_key = KEYS[8]
|
|
10
|
+
local lease_counter_key = KEYS[9]
|
|
9
11
|
|
|
10
12
|
local current_time = ARGV[1]
|
|
11
13
|
local defer_offset = tonumber(ARGV[2]) or 0
|
|
@@ -20,6 +22,15 @@ local function insert_with_offset(test)
|
|
|
20
22
|
end
|
|
21
23
|
end
|
|
22
24
|
|
|
25
|
+
local function claim_test(test)
|
|
26
|
+
local lease = redis.call('incr', lease_counter_key)
|
|
27
|
+
redis.call('zadd', zset_key, current_time, test)
|
|
28
|
+
redis.call('lpush', worker_queue_key, test)
|
|
29
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
30
|
+
redis.call('hset', leases_key, test, lease)
|
|
31
|
+
return {test, tostring(lease)}
|
|
32
|
+
end
|
|
33
|
+
|
|
23
34
|
for attempt = 1, max_skip_attempts do
|
|
24
35
|
local test = redis.call('rpop', queue_key)
|
|
25
36
|
if not test then
|
|
@@ -31,10 +42,7 @@ for attempt = 1, max_skip_attempts do
|
|
|
31
42
|
-- If this build only has one worker, allow immediate self-pickup.
|
|
32
43
|
if redis.call('scard', workers_key) <= 1 then
|
|
33
44
|
redis.call('hdel', requeued_by_key, test)
|
|
34
|
-
|
|
35
|
-
redis.call('lpush', worker_queue_key, test)
|
|
36
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
37
|
-
return test
|
|
45
|
+
return claim_test(test)
|
|
38
46
|
end
|
|
39
47
|
|
|
40
48
|
insert_with_offset(test)
|
|
@@ -47,10 +55,7 @@ for attempt = 1, max_skip_attempts do
|
|
|
47
55
|
end
|
|
48
56
|
else
|
|
49
57
|
redis.call('hdel', requeued_by_key, test)
|
|
50
|
-
|
|
51
|
-
redis.call('lpush', worker_queue_key, test)
|
|
52
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
53
|
-
return test
|
|
58
|
+
return claim_test(test)
|
|
54
59
|
end
|
|
55
60
|
end
|
|
56
61
|
|
|
@@ -1,23 +1,30 @@
|
|
|
1
1
|
-- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
|
|
2
|
-
-- @include _entry_helpers
|
|
3
|
-
|
|
4
2
|
local zset_key = KEYS[1]
|
|
5
3
|
local processed_key = KEYS[2]
|
|
6
4
|
local worker_queue_key = KEYS[3]
|
|
7
5
|
local owners_key = KEYS[4]
|
|
6
|
+
local leases_key = KEYS[5]
|
|
7
|
+
local lease_counter_key = KEYS[6]
|
|
8
8
|
|
|
9
9
|
local current_time = ARGV[1]
|
|
10
10
|
local timeout = ARGV[2]
|
|
11
|
-
local entry_delimiter = ARGV[3]
|
|
12
11
|
|
|
13
12
|
local lost_tests = redis.call('zrangebyscore', zset_key, 0, current_time - timeout)
|
|
14
13
|
for _, test in ipairs(lost_tests) do
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
if redis.call('sismember', processed_key, test) == 0 then
|
|
15
|
+
local lease = redis.call('incr', lease_counter_key)
|
|
17
16
|
redis.call('zadd', zset_key, current_time, test)
|
|
18
17
|
redis.call('lpush', worker_queue_key, test)
|
|
19
|
-
redis.call('hset', owners_key, test, worker_queue_key)
|
|
20
|
-
|
|
18
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
19
|
+
redis.call('hset', leases_key, test, lease)
|
|
20
|
+
return {test, tostring(lease)}
|
|
21
|
+
else
|
|
22
|
+
-- Test is already processed but still in running (stale). This can happen when
|
|
23
|
+
-- a non-owner worker acknowledged the test (marking it processed) but could not
|
|
24
|
+
-- remove it from running due to the lease guard. Clean it up.
|
|
25
|
+
redis.call('zrem', zset_key, test)
|
|
26
|
+
redis.call('hdel', owners_key, test)
|
|
27
|
+
redis.call('hdel', leases_key, test)
|
|
21
28
|
end
|
|
22
29
|
end
|
|
23
30
|
|
data/lib/ci/queue/redis/retry.rb
CHANGED
|
@@ -12,6 +12,22 @@ module CI
|
|
|
12
12
|
@build ||= CI::Queue::Redis::BuildRecord.new(self, redis, config)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Retry queue is pre-populated with failed test entries from the previous run.
|
|
16
|
+
# Don't replace them with the full preresolved/lazy test list.
|
|
17
|
+
# QueuePopulationStrategy#configure_lazy_queue will still set entry_resolver,
|
|
18
|
+
# so poll uses LazyEntryResolver to lazily load test files on demand.
|
|
19
|
+
# The random/batch_size params are intentionally ignored since we keep
|
|
20
|
+
# the existing queue contents as-is.
|
|
21
|
+
#
|
|
22
|
+
# Note: populate (non-stream) is intentionally NOT overridden here.
|
|
23
|
+
# RSpec and non-lazy Minitest retries call populate to build the
|
|
24
|
+
# @index mapping test IDs to runnable objects, which poll needs to
|
|
25
|
+
# yield proper test/example instances. In those paths, @queue contains
|
|
26
|
+
# bare test IDs that match @index keys, so populate works correctly.
|
|
27
|
+
def stream_populate(tests, random: nil, batch_size: nil)
|
|
28
|
+
self
|
|
29
|
+
end
|
|
30
|
+
|
|
15
31
|
private
|
|
16
32
|
|
|
17
33
|
attr_reader :redis
|
|
@@ -19,6 +19,7 @@ module CI
|
|
|
19
19
|
|
|
20
20
|
def initialize(redis, config)
|
|
21
21
|
@reserved_tests = Concurrent::Set.new
|
|
22
|
+
@reserved_leases = Concurrent::Map.new
|
|
22
23
|
@shutdown_required = false
|
|
23
24
|
@first_reserve_at = nil
|
|
24
25
|
super(redis, config)
|
|
@@ -147,9 +148,10 @@ module CI
|
|
|
147
148
|
def retry_queue
|
|
148
149
|
failures = build.failed_tests.to_set
|
|
149
150
|
log = redis.lrange(key('worker', worker_id, 'queue'), 0, -1)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
log.
|
|
151
|
+
# Keep full entries (test_id + file_path) so lazy loading can resolve them.
|
|
152
|
+
# Filter by test_id against failures without stripping file paths.
|
|
153
|
+
log.select! { |entry| failures.include?(CI::Queue::QueueEntry.test_id(entry)) }
|
|
154
|
+
log.uniq! { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
153
155
|
log.reverse!
|
|
154
156
|
Retry.new(log, config, redis: redis)
|
|
155
157
|
end
|
|
@@ -172,27 +174,34 @@ module CI
|
|
|
172
174
|
nil
|
|
173
175
|
end
|
|
174
176
|
|
|
177
|
+
def lease_for(entry)
|
|
178
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
179
|
+
@reserved_leases[test_id]
|
|
180
|
+
end
|
|
181
|
+
|
|
175
182
|
def report_worker_error(error)
|
|
176
183
|
build.report_worker_error(error)
|
|
177
184
|
end
|
|
178
185
|
|
|
179
|
-
def acknowledge(
|
|
180
|
-
test_id =
|
|
186
|
+
def acknowledge(entry, error: nil, pipeline: redis)
|
|
187
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
181
188
|
assert_reserved!(test_id)
|
|
182
|
-
entry = reserved_entries.fetch(test_id,
|
|
189
|
+
entry = reserved_entries.fetch(test_id, entry)
|
|
190
|
+
lease = @reserved_leases.delete(test_id)
|
|
183
191
|
unreserve_entry(test_id)
|
|
184
192
|
eval_script(
|
|
185
193
|
:acknowledge,
|
|
186
|
-
keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by')],
|
|
187
|
-
argv: [entry,
|
|
194
|
+
keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by'), key('leases')],
|
|
195
|
+
argv: [entry, error.to_s, config.redis_ttl, lease.to_s],
|
|
188
196
|
pipeline: pipeline,
|
|
189
197
|
) == 1
|
|
190
198
|
end
|
|
191
199
|
|
|
192
|
-
def requeue(
|
|
193
|
-
test_id =
|
|
200
|
+
def requeue(entry, offset: Redis.requeue_offset)
|
|
201
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
194
202
|
assert_reserved!(test_id)
|
|
195
|
-
entry = reserved_entries.fetch(test_id,
|
|
203
|
+
entry = reserved_entries.fetch(test_id, entry)
|
|
204
|
+
lease = @reserved_leases.delete(test_id)
|
|
196
205
|
unreserve_entry(test_id)
|
|
197
206
|
global_max_requeues = config.global_max_requeues(total)
|
|
198
207
|
|
|
@@ -207,14 +216,16 @@ module CI
|
|
|
207
216
|
key('owners'),
|
|
208
217
|
key('error-reports'),
|
|
209
218
|
key('requeued-by'),
|
|
219
|
+
key('leases'),
|
|
210
220
|
],
|
|
211
|
-
argv: [config.max_requeues, global_max_requeues, entry,
|
|
221
|
+
argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl, lease.to_s],
|
|
212
222
|
) == 1
|
|
213
223
|
|
|
214
224
|
unless requeued
|
|
215
225
|
reserved_tests << test_id
|
|
216
226
|
reserved_entries[test_id] = entry
|
|
217
227
|
reserved_entry_ids[entry] = test_id
|
|
228
|
+
@reserved_leases[test_id] = lease if lease
|
|
218
229
|
end
|
|
219
230
|
requeued
|
|
220
231
|
end
|
|
@@ -222,7 +233,7 @@ module CI
|
|
|
222
233
|
def release!
|
|
223
234
|
eval_script(
|
|
224
235
|
:release,
|
|
225
|
-
keys: [key('running'), key('worker', worker_id, 'queue'), key('owners')],
|
|
236
|
+
keys: [key('running'), key('worker', worker_id, 'queue'), key('owners'), key('leases')],
|
|
226
237
|
argv: [],
|
|
227
238
|
)
|
|
228
239
|
nil
|
|
@@ -254,11 +265,12 @@ module CI
|
|
|
254
265
|
end
|
|
255
266
|
end
|
|
256
267
|
|
|
257
|
-
def reserve_entry(entry)
|
|
258
|
-
test_id =
|
|
268
|
+
def reserve_entry(entry, lease = nil)
|
|
269
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
259
270
|
reserved_tests << test_id
|
|
260
271
|
reserved_entries[test_id] = entry
|
|
261
272
|
reserved_entry_ids[entry] = test_id
|
|
273
|
+
@reserved_leases[test_id] = lease if lease
|
|
262
274
|
end
|
|
263
275
|
|
|
264
276
|
def unreserve_entry(test_id)
|
|
@@ -267,19 +279,6 @@ module CI
|
|
|
267
279
|
reserved_entry_ids.delete(entry) if entry
|
|
268
280
|
end
|
|
269
281
|
|
|
270
|
-
def normalize_test_id(test_key)
|
|
271
|
-
key = test_key.respond_to?(:id) ? test_key.id : test_key
|
|
272
|
-
if key.is_a?(String)
|
|
273
|
-
cached = reserved_entry_ids[key]
|
|
274
|
-
return cached if cached
|
|
275
|
-
end
|
|
276
|
-
queue_entry_test_id(key)
|
|
277
|
-
end
|
|
278
|
-
|
|
279
|
-
def queue_entry_test_id(entry)
|
|
280
|
-
CI::Queue::QueueEntry.test_id(entry)
|
|
281
|
-
end
|
|
282
|
-
|
|
283
282
|
def queue_entry_for(test)
|
|
284
283
|
return test.queue_entry if test.respond_to?(:queue_entry)
|
|
285
284
|
return test.id if test.respond_to?(:id)
|
|
@@ -288,7 +287,7 @@ module CI
|
|
|
288
287
|
end
|
|
289
288
|
|
|
290
289
|
def resolve_entry(entry)
|
|
291
|
-
test_id = reserved_entry_ids[entry] ||
|
|
290
|
+
test_id = reserved_entry_ids[entry] || CI::Queue::QueueEntry.test_id(entry)
|
|
292
291
|
if populated?
|
|
293
292
|
return index[test_id] if index.key?(test_id)
|
|
294
293
|
end
|
|
@@ -356,12 +355,12 @@ module CI
|
|
|
356
355
|
end
|
|
357
356
|
|
|
358
357
|
def reserve
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
end
|
|
358
|
+
entry, lease = try_to_reserve_lost_test || try_to_reserve_test || [nil, nil]
|
|
359
|
+
if entry
|
|
360
|
+
@first_reserve_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
361
|
+
reserve_entry(entry, lease)
|
|
364
362
|
end
|
|
363
|
+
entry
|
|
365
364
|
end
|
|
366
365
|
|
|
367
366
|
def try_to_reserve_test
|
|
@@ -375,6 +374,8 @@ module CI
|
|
|
375
374
|
key('owners'),
|
|
376
375
|
key('requeued-by'),
|
|
377
376
|
key('workers'),
|
|
377
|
+
key('leases'),
|
|
378
|
+
key('lease-counter'),
|
|
378
379
|
],
|
|
379
380
|
argv: [CI::Queue.time_now.to_f, Redis.requeue_offset],
|
|
380
381
|
)
|
|
@@ -383,22 +384,28 @@ module CI
|
|
|
383
384
|
def try_to_reserve_lost_test
|
|
384
385
|
timeout = config.max_missed_heartbeat_seconds ? config.max_missed_heartbeat_seconds : config.timeout
|
|
385
386
|
|
|
386
|
-
|
|
387
|
+
result = eval_script(
|
|
387
388
|
:reserve_lost,
|
|
388
389
|
keys: [
|
|
389
390
|
key('running'),
|
|
390
|
-
key('
|
|
391
|
+
key('processed'),
|
|
391
392
|
key('worker', worker_id, 'queue'),
|
|
392
393
|
key('owners'),
|
|
394
|
+
key('leases'),
|
|
395
|
+
key('lease-counter'),
|
|
393
396
|
],
|
|
394
|
-
argv: [CI::Queue.time_now.to_f, timeout
|
|
397
|
+
argv: [CI::Queue.time_now.to_f, timeout],
|
|
395
398
|
)
|
|
396
399
|
|
|
397
|
-
if
|
|
398
|
-
|
|
400
|
+
if result
|
|
401
|
+
entry = result.is_a?(Array) ? result[0] : result
|
|
402
|
+
build.record_warning(Warnings::RESERVED_LOST_TEST, test: CI::Queue::QueueEntry.test_id(entry), timeout: config.timeout)
|
|
403
|
+
if CI::Queue.debug?
|
|
404
|
+
$stderr.puts "[ci-queue][reserve_lost] worker=#{worker_id} test_id=#{CI::Queue::QueueEntry.test_id(entry)}"
|
|
405
|
+
end
|
|
399
406
|
end
|
|
400
407
|
|
|
401
|
-
|
|
408
|
+
result
|
|
402
409
|
end
|
|
403
410
|
|
|
404
411
|
def push(entries)
|
data/lib/ci/queue/static.rb
CHANGED
|
@@ -16,6 +16,7 @@ module CI
|
|
|
16
16
|
TEN_MINUTES = 60 * 10
|
|
17
17
|
|
|
18
18
|
attr_reader :progress, :total
|
|
19
|
+
attr_accessor :entry_resolver
|
|
19
20
|
|
|
20
21
|
def initialize(tests, config)
|
|
21
22
|
@queue = tests
|
|
@@ -50,10 +51,24 @@ module CI
|
|
|
50
51
|
self
|
|
51
52
|
end
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
# Support lazy loading mode: accept an enumerator of entries and
|
|
55
|
+
# store them in queue order (no shuffling). This preserves the
|
|
56
|
+
# exact order from the input file for local reproduction.
|
|
57
|
+
def stream_populate(tests, random: nil, batch_size: nil)
|
|
58
|
+
@queue = []
|
|
59
|
+
tests.each { |entry| @queue << entry }
|
|
60
|
+
@total = @queue.size
|
|
61
|
+
self
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def with_heartbeat(id, lease: nil)
|
|
54
65
|
yield
|
|
55
66
|
end
|
|
56
67
|
|
|
68
|
+
def lease_for(entry)
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
|
|
57
72
|
def ensure_heartbeat_thread_alive!; end
|
|
58
73
|
|
|
59
74
|
def boot_heartbeat_process!; end
|
|
@@ -75,11 +90,15 @@ module CI
|
|
|
75
90
|
end
|
|
76
91
|
|
|
77
92
|
def populated?
|
|
78
|
-
!!defined?(@index)
|
|
93
|
+
!!defined?(@index) || @queue.any?
|
|
79
94
|
end
|
|
80
95
|
|
|
81
96
|
def to_a
|
|
82
|
-
@
|
|
97
|
+
if defined?(@index) && @index
|
|
98
|
+
@queue.map { |i| index.fetch(i) }
|
|
99
|
+
else
|
|
100
|
+
@queue.dup
|
|
101
|
+
end
|
|
83
102
|
end
|
|
84
103
|
|
|
85
104
|
def size
|
|
@@ -97,9 +116,28 @@ module CI
|
|
|
97
116
|
def poll
|
|
98
117
|
while !@shutdown && config.circuit_breakers.none?(&:open?) && !max_test_failed? && reserved_test = @queue.shift
|
|
99
118
|
reserved_tests << reserved_test
|
|
100
|
-
|
|
119
|
+
if entry_resolver
|
|
120
|
+
resolved = entry_resolver.call(reserved_test)
|
|
121
|
+
# Track the original queue entry so requeue can push it back
|
|
122
|
+
# with its full payload (file path, load-error data, etc.).
|
|
123
|
+
reserved_entries[resolved.id] = reserved_test if resolved.respond_to?(:id)
|
|
124
|
+
yield resolved
|
|
125
|
+
elsif defined?(@index) && @index
|
|
126
|
+
# Queue entries may be JSON-formatted (with test_id + file_path) while
|
|
127
|
+
# the index is keyed by bare test_id from populate. Try the raw entry
|
|
128
|
+
# first, then fall back to extracting the test_id.
|
|
129
|
+
test_id = begin
|
|
130
|
+
CI::Queue::QueueEntry.test_id(reserved_test)
|
|
131
|
+
rescue JSON::ParserError
|
|
132
|
+
reserved_test
|
|
133
|
+
end
|
|
134
|
+
yield index.fetch(test_id)
|
|
135
|
+
else
|
|
136
|
+
yield reserved_test
|
|
137
|
+
end
|
|
101
138
|
end
|
|
102
139
|
reserved_tests.clear
|
|
140
|
+
reserved_entries.clear
|
|
103
141
|
end
|
|
104
142
|
|
|
105
143
|
def exhausted?
|
|
@@ -125,12 +163,15 @@ module CI
|
|
|
125
163
|
test_failed >= config.max_test_failed
|
|
126
164
|
end
|
|
127
165
|
|
|
128
|
-
def requeue(
|
|
129
|
-
|
|
130
|
-
return false unless should_requeue?(
|
|
166
|
+
def requeue(entry)
|
|
167
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
168
|
+
return false unless should_requeue?(test_id)
|
|
131
169
|
|
|
132
|
-
requeues[
|
|
133
|
-
|
|
170
|
+
requeues[test_id] += 1
|
|
171
|
+
# Push back the original queue entry (with file path / load-error payload)
|
|
172
|
+
# so entry_resolver can fully resolve it on the next poll iteration.
|
|
173
|
+
original_entry = reserved_entries.delete(test_id) || test_id
|
|
174
|
+
@queue.unshift(original_entry)
|
|
134
175
|
true
|
|
135
176
|
end
|
|
136
177
|
|
|
@@ -146,6 +187,10 @@ module CI
|
|
|
146
187
|
@requeues ||= Hash.new(0)
|
|
147
188
|
end
|
|
148
189
|
|
|
190
|
+
def reserved_entries
|
|
191
|
+
@reserved_entries ||= {}
|
|
192
|
+
end
|
|
193
|
+
|
|
149
194
|
def reserved_tests
|
|
150
195
|
@reserved_tests ||= Concurrent::Set.new
|
|
151
196
|
end
|
data/lib/ci/queue/version.rb
CHANGED
|
@@ -40,15 +40,15 @@ module Minitest
|
|
|
40
40
|
self.total_time = Minitest.clock_time - start_time
|
|
41
41
|
|
|
42
42
|
# Determine what type of result this is and record it
|
|
43
|
-
|
|
43
|
+
entry = test.queue_entry
|
|
44
44
|
delta = delta_for(test)
|
|
45
45
|
|
|
46
46
|
acknowledged = if (test.failure || test.error?) && !test.skipped?
|
|
47
|
-
build.record_error(
|
|
47
|
+
build.record_error(entry, dump(test), stat_delta: delta)
|
|
48
48
|
elsif test.requeued?
|
|
49
|
-
build.record_requeue(
|
|
49
|
+
build.record_requeue(entry)
|
|
50
50
|
else
|
|
51
|
-
build.record_success(
|
|
51
|
+
build.record_success(entry, skip_flaky_record: test.skipped?)
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
if acknowledged
|
|
@@ -138,7 +138,7 @@ module Minitest
|
|
|
138
138
|
@error_location ||= begin
|
|
139
139
|
last_before_assertion = ''
|
|
140
140
|
backtrace_for(exception).reverse_each do |s|
|
|
141
|
-
break if s =~ /in
|
|
141
|
+
break if s =~ /in [`'](?:[\w:]*[#.])?(assert|refute|flunk|pass|fail|raise|must|wont)/
|
|
142
142
|
|
|
143
143
|
last_before_assertion = s
|
|
144
144
|
end
|
data/lib/minitest/queue.rb
CHANGED
|
@@ -163,7 +163,7 @@ module Minitest
|
|
|
163
163
|
rescue_run_errors do
|
|
164
164
|
begin
|
|
165
165
|
queue.poll do |example|
|
|
166
|
-
result = queue.with_heartbeat(example.queue_entry) do
|
|
166
|
+
result = queue.with_heartbeat(example.queue_entry, lease: queue.lease_for(example.queue_entry)) do
|
|
167
167
|
example.run
|
|
168
168
|
end
|
|
169
169
|
|
|
@@ -195,15 +195,18 @@ module Minitest
|
|
|
195
195
|
# When we do a bisect, we don't care about the result other than the test we're running the bisect on
|
|
196
196
|
result.mark_as_flaked!
|
|
197
197
|
failed = false
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
if failed && CI::Queue.requeueable?(result) && queue.requeue(example.queue_entry)
|
|
201
|
+
result.requeue!
|
|
202
|
+
if CI::Queue.debug?
|
|
203
|
+
$stderr.puts "[ci-queue][requeue] test_id=#{example.id} error_class=#{result.failures.first&.class} error=#{result.failures.first&.message&.lines&.first&.chomp}"
|
|
204
|
+
end
|
|
198
205
|
elsif failed
|
|
199
206
|
queue.report_failure!
|
|
200
207
|
else
|
|
201
208
|
queue.report_success!
|
|
202
209
|
end
|
|
203
|
-
|
|
204
|
-
if failed && CI::Queue.requeueable?(result) && queue.requeue(example)
|
|
205
|
-
result.requeue!
|
|
206
|
-
end
|
|
207
210
|
reporter.record(result)
|
|
208
211
|
end
|
|
209
212
|
|
|
@@ -327,7 +330,7 @@ module Minitest
|
|
|
327
330
|
end
|
|
328
331
|
|
|
329
332
|
def queue_entry
|
|
330
|
-
id
|
|
333
|
+
@queue_entry ||= CI::Queue::QueueEntry.format(id, nil)
|
|
331
334
|
end
|
|
332
335
|
|
|
333
336
|
def <=>(other)
|
|
@@ -18,12 +18,14 @@ module RSpec
|
|
|
18
18
|
|
|
19
19
|
def example_passed(notification)
|
|
20
20
|
example = notification.example
|
|
21
|
-
|
|
21
|
+
entry = CI::Queue::QueueEntry.format(example.id, nil)
|
|
22
|
+
build.record_success(entry)
|
|
22
23
|
end
|
|
23
24
|
|
|
24
25
|
def example_failed(notification)
|
|
25
26
|
example = notification.example
|
|
26
|
-
|
|
27
|
+
entry = CI::Queue::QueueEntry.format(example.id, nil)
|
|
28
|
+
build.record_error(entry, dump(notification))
|
|
27
29
|
end
|
|
28
30
|
|
|
29
31
|
private
|
data/lib/rspec/queue.rb
CHANGED
|
@@ -253,6 +253,10 @@ module RSpec
|
|
|
253
253
|
example.id
|
|
254
254
|
end
|
|
255
255
|
|
|
256
|
+
def queue_entry
|
|
257
|
+
@queue_entry ||= CI::Queue::QueueEntry.format(id, nil)
|
|
258
|
+
end
|
|
259
|
+
|
|
256
260
|
def <=>(other)
|
|
257
261
|
id <=> other.id
|
|
258
262
|
end
|
|
@@ -411,7 +415,7 @@ module RSpec
|
|
|
411
415
|
end
|
|
412
416
|
|
|
413
417
|
def requeue
|
|
414
|
-
@queue.requeue(@example)
|
|
418
|
+
@queue.requeue(@example.queue_entry)
|
|
415
419
|
end
|
|
416
420
|
|
|
417
421
|
def cancel_run!
|
|
@@ -422,7 +426,7 @@ module RSpec
|
|
|
422
426
|
end
|
|
423
427
|
|
|
424
428
|
def acknowledge
|
|
425
|
-
@queue.acknowledge(@example)
|
|
429
|
+
@queue.acknowledge(@example.queue_entry)
|
|
426
430
|
end
|
|
427
431
|
end
|
|
428
432
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ci-queue
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.85.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jean Boussier
|
|
@@ -149,6 +149,34 @@ dependencies:
|
|
|
149
149
|
- - ">="
|
|
150
150
|
- !ruby/object:Gem::Version
|
|
151
151
|
version: '0'
|
|
152
|
+
- !ruby/object:Gem::Dependency
|
|
153
|
+
name: benchmark
|
|
154
|
+
requirement: !ruby/object:Gem::Requirement
|
|
155
|
+
requirements:
|
|
156
|
+
- - ">="
|
|
157
|
+
- !ruby/object:Gem::Version
|
|
158
|
+
version: '0'
|
|
159
|
+
type: :development
|
|
160
|
+
prerelease: false
|
|
161
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
162
|
+
requirements:
|
|
163
|
+
- - ">="
|
|
164
|
+
- !ruby/object:Gem::Version
|
|
165
|
+
version: '0'
|
|
166
|
+
- !ruby/object:Gem::Dependency
|
|
167
|
+
name: rexml
|
|
168
|
+
requirement: !ruby/object:Gem::Requirement
|
|
169
|
+
requirements:
|
|
170
|
+
- - ">="
|
|
171
|
+
- !ruby/object:Gem::Version
|
|
172
|
+
version: '0'
|
|
173
|
+
type: :development
|
|
174
|
+
prerelease: false
|
|
175
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
176
|
+
requirements:
|
|
177
|
+
- - ">="
|
|
178
|
+
- !ruby/object:Gem::Version
|
|
179
|
+
version: '0'
|
|
152
180
|
- !ruby/object:Gem::Dependency
|
|
153
181
|
name: rubocop
|
|
154
182
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -199,7 +227,6 @@ files:
|
|
|
199
227
|
- lib/ci/queue/output_helpers.rb
|
|
200
228
|
- lib/ci/queue/queue_entry.rb
|
|
201
229
|
- lib/ci/queue/redis.rb
|
|
202
|
-
- lib/ci/queue/redis/_entry_helpers.lua
|
|
203
230
|
- lib/ci/queue/redis/acknowledge.lua
|
|
204
231
|
- lib/ci/queue/redis/base.rb
|
|
205
232
|
- lib/ci/queue/redis/build_record.rb
|
|
@@ -257,14 +284,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
257
284
|
requirements:
|
|
258
285
|
- - ">="
|
|
259
286
|
- !ruby/object:Gem::Version
|
|
260
|
-
version: '
|
|
287
|
+
version: '3.1'
|
|
261
288
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
262
289
|
requirements:
|
|
263
290
|
- - ">="
|
|
264
291
|
- !ruby/object:Gem::Version
|
|
265
292
|
version: '0'
|
|
266
293
|
requirements: []
|
|
267
|
-
rubygems_version: 4.0.
|
|
294
|
+
rubygems_version: 4.0.8
|
|
268
295
|
specification_version: 4
|
|
269
296
|
summary: Distribute tests over many workers using a queue
|
|
270
297
|
test_files: []
|