ci-queue 0.82.0 → 0.84.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +59 -47
- data/README.md +87 -0
- data/ci-queue.gemspec +3 -1
- data/lib/ci/queue/build_record.rb +5 -5
- data/lib/ci/queue/class_resolver.rb +38 -0
- data/lib/ci/queue/configuration.rb +62 -1
- data/lib/ci/queue/file_loader.rb +101 -0
- data/lib/ci/queue/queue_entry.rb +48 -0
- data/lib/ci/queue/redis/acknowledge.lua +7 -5
- data/lib/ci/queue/redis/base.rb +29 -6
- data/lib/ci/queue/redis/build_record.rb +29 -17
- data/lib/ci/queue/redis/heartbeat.lua +4 -4
- data/lib/ci/queue/redis/monitor.rb +14 -2
- data/lib/ci/queue/redis/requeue.lua +17 -10
- data/lib/ci/queue/redis/reserve.lua +47 -8
- data/lib/ci/queue/redis/supervisor.rb +3 -3
- data/lib/ci/queue/redis/worker.rb +210 -27
- data/lib/ci/queue/static.rb +5 -5
- data/lib/ci/queue/version.rb +1 -1
- data/lib/ci/queue.rb +27 -0
- data/lib/minitest/queue/build_status_recorder.rb +4 -4
- data/lib/minitest/queue/junit_reporter.rb +2 -2
- data/lib/minitest/queue/lazy_entry_resolver.rb +55 -0
- data/lib/minitest/queue/lazy_test_discovery.rb +169 -0
- data/lib/minitest/queue/local_requeue_reporter.rb +11 -0
- data/lib/minitest/queue/order_reporter.rb +9 -2
- data/lib/minitest/queue/queue_population_strategy.rb +176 -0
- data/lib/minitest/queue/runner.rb +97 -22
- data/lib/minitest/queue/test_data.rb +15 -2
- data/lib/minitest/queue/worker_profile_reporter.rb +77 -0
- data/lib/minitest/queue.rb +278 -10
- data/lib/rspec/queue/build_status_recorder.rb +4 -2
- data/lib/rspec/queue.rb +6 -2
- metadata +38 -3
data/lib/ci/queue/redis/base.rb
CHANGED
|
@@ -144,19 +144,26 @@ module CI
|
|
|
144
144
|
end
|
|
145
145
|
|
|
146
146
|
def to_a
|
|
147
|
-
test_ids.reverse.map
|
|
147
|
+
test_ids.reverse.map do |entry|
|
|
148
|
+
index.fetch(entry) do
|
|
149
|
+
test_id = CI::Queue::QueueEntry.test_id(entry)
|
|
150
|
+
index.fetch(test_id)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
148
153
|
end
|
|
149
154
|
|
|
150
155
|
def progress
|
|
151
|
-
total - size
|
|
156
|
+
progress = total - size
|
|
157
|
+
progress < 0 ? 0 : progress
|
|
152
158
|
end
|
|
153
159
|
|
|
154
|
-
def wait_for_master(timeout: 30)
|
|
160
|
+
def wait_for_master(timeout: 30, allow_streaming: false)
|
|
155
161
|
return true if master?
|
|
156
162
|
return true if queue_initialized?
|
|
163
|
+
return true if allow_streaming && streaming?
|
|
157
164
|
|
|
158
165
|
(timeout * 10 + 1).to_i.times do
|
|
159
|
-
if queue_initialized?
|
|
166
|
+
if queue_initialized? || (allow_streaming && streaming?)
|
|
160
167
|
return true
|
|
161
168
|
else
|
|
162
169
|
sleep 0.1
|
|
@@ -177,6 +184,10 @@ module CI
|
|
|
177
184
|
end
|
|
178
185
|
end
|
|
179
186
|
|
|
187
|
+
def streaming?
|
|
188
|
+
master_status == 'streaming'
|
|
189
|
+
end
|
|
190
|
+
|
|
180
191
|
def queue_initializing?
|
|
181
192
|
master_status == 'setup'
|
|
182
193
|
end
|
|
@@ -235,9 +246,21 @@ module CI
|
|
|
235
246
|
end
|
|
236
247
|
|
|
237
248
|
def read_script(name)
|
|
238
|
-
|
|
249
|
+
resolve_lua_includes(
|
|
250
|
+
::File.read(::File.join(CI::Queue::DEV_SCRIPTS_ROOT, "#{name}.lua")),
|
|
251
|
+
CI::Queue::DEV_SCRIPTS_ROOT,
|
|
252
|
+
)
|
|
239
253
|
rescue SystemCallError
|
|
240
|
-
|
|
254
|
+
resolve_lua_includes(
|
|
255
|
+
::File.read(::File.join(CI::Queue::RELEASE_SCRIPTS_ROOT, "#{name}.lua")),
|
|
256
|
+
CI::Queue::RELEASE_SCRIPTS_ROOT,
|
|
257
|
+
)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def resolve_lua_includes(script, root)
|
|
261
|
+
script.gsub(/^-- @include (\S+)$/) do
|
|
262
|
+
::File.read(::File.join(root, "#{$1}.lua"))
|
|
263
|
+
end
|
|
241
264
|
end
|
|
242
265
|
|
|
243
266
|
class HeartbeatProcess
|
|
@@ -33,14 +33,14 @@ module CI
|
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
def failed_tests
|
|
36
|
-
redis.hkeys(key('error-reports'))
|
|
36
|
+
redis.hkeys(key('error-reports')).map { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
TOTAL_KEY = "___total___"
|
|
40
40
|
def requeued_tests
|
|
41
41
|
requeues = redis.hgetall(key('requeues-count'))
|
|
42
42
|
requeues.delete(TOTAL_KEY)
|
|
43
|
-
requeues
|
|
43
|
+
requeues.transform_keys { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
def pop_warnings
|
|
@@ -56,39 +56,39 @@ module CI
|
|
|
56
56
|
redis.rpush(key('warnings'), Marshal.dump([type, attributes]))
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def record_error(
|
|
59
|
+
def record_error(entry, payload, stat_delta: nil)
|
|
60
60
|
# Run acknowledge first so we know whether we're the first to ack
|
|
61
|
-
acknowledged = @queue.acknowledge(
|
|
61
|
+
acknowledged = @queue.acknowledge(entry, error: payload)
|
|
62
62
|
|
|
63
63
|
if acknowledged
|
|
64
64
|
# We were the first to ack; another worker already ack'd would get falsy from SADD
|
|
65
65
|
@queue.increment_test_failed
|
|
66
66
|
# Only the acknowledging worker's stats include this failure (others skip increment when ack=false).
|
|
67
67
|
# Store so we can subtract it if another worker records success later.
|
|
68
|
-
store_error_report_delta(
|
|
68
|
+
store_error_report_delta(entry, stat_delta) if stat_delta && stat_delta.any?
|
|
69
69
|
end
|
|
70
70
|
# Return so caller can roll back local counter when not acknowledged
|
|
71
71
|
!!acknowledged
|
|
72
72
|
end
|
|
73
73
|
|
|
74
|
-
def record_success(
|
|
74
|
+
def record_success(entry, skip_flaky_record: false)
|
|
75
75
|
acknowledged, error_reports_deleted_count, requeued_count, delta_json = redis.multi do |transaction|
|
|
76
|
-
@queue.acknowledge(
|
|
77
|
-
transaction.hdel(key('error-reports'),
|
|
78
|
-
transaction.hget(key('requeues-count'),
|
|
79
|
-
transaction.hget(key('error-report-deltas'),
|
|
76
|
+
@queue.acknowledge(entry, pipeline: transaction)
|
|
77
|
+
transaction.hdel(key('error-reports'), entry)
|
|
78
|
+
transaction.hget(key('requeues-count'), entry)
|
|
79
|
+
transaction.hget(key('error-report-deltas'), entry)
|
|
80
80
|
end
|
|
81
81
|
# When we're replacing a failure, subtract the (single) acknowledging worker's stat contribution
|
|
82
82
|
if error_reports_deleted_count.to_i > 0 && delta_json
|
|
83
83
|
apply_error_report_delta_correction(delta_json)
|
|
84
|
-
redis.hdel(key('error-report-deltas'),
|
|
84
|
+
redis.hdel(key('error-report-deltas'), entry)
|
|
85
85
|
end
|
|
86
|
-
record_flaky(
|
|
86
|
+
record_flaky(entry) if !skip_flaky_record && (error_reports_deleted_count.to_i > 0 || requeued_count.to_i > 0)
|
|
87
87
|
# Count this run when we ack'd or when we replaced a failure (so stats delta is applied)
|
|
88
88
|
!!(acknowledged || error_reports_deleted_count.to_i > 0)
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
-
def record_requeue(
|
|
91
|
+
def record_requeue(entry)
|
|
92
92
|
true
|
|
93
93
|
end
|
|
94
94
|
|
|
@@ -142,11 +142,23 @@ module CI
|
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
def error_reports
|
|
145
|
-
redis.hgetall(key('error-reports'))
|
|
145
|
+
redis.hgetall(key('error-reports')).transform_keys { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
146
146
|
end
|
|
147
147
|
|
|
148
148
|
def flaky_reports
|
|
149
|
-
redis.smembers(key('flaky-reports'))
|
|
149
|
+
redis.smembers(key('flaky-reports')).map { |entry| CI::Queue::QueueEntry.test_id(entry) }
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def record_worker_profile(profile)
|
|
153
|
+
redis.pipelined do |pipeline|
|
|
154
|
+
pipeline.hset(key('worker-profiles'), config.worker_id, JSON.dump(profile))
|
|
155
|
+
pipeline.expire(key('worker-profiles'), config.redis_ttl)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def worker_profiles
|
|
160
|
+
raw = redis.hgetall(key('worker-profiles'))
|
|
161
|
+
raw.transform_values { |v| JSON.parse(v) }
|
|
150
162
|
end
|
|
151
163
|
|
|
152
164
|
def fetch_stats(stat_names)
|
|
@@ -175,10 +187,10 @@ module CI
|
|
|
175
187
|
['build', config.build_id, *args].join(':')
|
|
176
188
|
end
|
|
177
189
|
|
|
178
|
-
def store_error_report_delta(
|
|
190
|
+
def store_error_report_delta(entry, stat_delta)
|
|
179
191
|
# Only the acknowledging worker's stats include this test; store their delta for correction on success
|
|
180
192
|
payload = { 'worker_id' => config.worker_id.to_s }.merge(stat_delta)
|
|
181
|
-
redis.hset(key('error-report-deltas'),
|
|
193
|
+
redis.hset(key('error-report-deltas'), entry, JSON.generate(payload))
|
|
182
194
|
redis.expire(key('error-report-deltas'), config.redis_ttl)
|
|
183
195
|
end
|
|
184
196
|
|
|
@@ -5,14 +5,14 @@ local owners_key = KEYS[3]
|
|
|
5
5
|
local worker_queue_key = KEYS[4]
|
|
6
6
|
|
|
7
7
|
local current_time = ARGV[1]
|
|
8
|
-
local
|
|
8
|
+
local entry = ARGV[2]
|
|
9
9
|
|
|
10
10
|
-- already processed, we do not need to bump the timestamp
|
|
11
|
-
if redis.call('sismember', processed_key,
|
|
11
|
+
if redis.call('sismember', processed_key, entry) == 1 then
|
|
12
12
|
return false
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
-- we're still the owner of the test, we can bump the timestamp
|
|
16
|
-
if redis.call('hget', owners_key,
|
|
17
|
-
return redis.call('zadd', zset_key, current_time,
|
|
16
|
+
if redis.call('hget', owners_key, entry) == worker_queue_key then
|
|
17
|
+
return redis.call('zadd', zset_key, current_time, entry)
|
|
18
18
|
end
|
|
@@ -56,9 +56,21 @@ module CI
|
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
def read_script(name)
|
|
59
|
-
|
|
59
|
+
resolve_lua_includes(
|
|
60
|
+
::File.read(::File.join(DEV_SCRIPTS_ROOT, "#{name}.lua")),
|
|
61
|
+
DEV_SCRIPTS_ROOT,
|
|
62
|
+
)
|
|
60
63
|
rescue SystemCallError
|
|
61
|
-
|
|
64
|
+
resolve_lua_includes(
|
|
65
|
+
::File.read(::File.join(RELEASE_SCRIPTS_ROOT, "#{name}.lua")),
|
|
66
|
+
RELEASE_SCRIPTS_ROOT,
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def resolve_lua_includes(script, root)
|
|
71
|
+
script.gsub(/^-- @include (\S+)$/) do
|
|
72
|
+
::File.read(::File.join(root, "#{$1}.lua"))
|
|
73
|
+
end
|
|
62
74
|
end
|
|
63
75
|
|
|
64
76
|
HEADER = 'L'
|
|
@@ -6,17 +6,19 @@ local zset_key = KEYS[4]
|
|
|
6
6
|
local worker_queue_key = KEYS[5]
|
|
7
7
|
local owners_key = KEYS[6]
|
|
8
8
|
local error_reports_key = KEYS[7]
|
|
9
|
+
local requeued_by_key = KEYS[8]
|
|
9
10
|
|
|
10
11
|
local max_requeues = tonumber(ARGV[1])
|
|
11
12
|
local global_max_requeues = tonumber(ARGV[2])
|
|
12
|
-
local
|
|
13
|
+
local entry = ARGV[3]
|
|
13
14
|
local offset = ARGV[4]
|
|
15
|
+
local ttl = tonumber(ARGV[5])
|
|
14
16
|
|
|
15
|
-
if redis.call('hget', owners_key,
|
|
16
|
-
redis.call('hdel', owners_key,
|
|
17
|
+
if redis.call('hget', owners_key, entry) == worker_queue_key then
|
|
18
|
+
redis.call('hdel', owners_key, entry)
|
|
17
19
|
end
|
|
18
20
|
|
|
19
|
-
if redis.call('sismember', processed_key,
|
|
21
|
+
if redis.call('sismember', processed_key, entry) == 1 then
|
|
20
22
|
return false
|
|
21
23
|
end
|
|
22
24
|
|
|
@@ -25,23 +27,28 @@ if global_requeues and global_requeues >= tonumber(global_max_requeues) then
|
|
|
25
27
|
return false
|
|
26
28
|
end
|
|
27
29
|
|
|
28
|
-
local requeues = tonumber(redis.call('hget', requeues_count_key,
|
|
30
|
+
local requeues = tonumber(redis.call('hget', requeues_count_key, entry))
|
|
29
31
|
if requeues and requeues >= max_requeues then
|
|
30
32
|
return false
|
|
31
33
|
end
|
|
32
34
|
|
|
33
35
|
redis.call('hincrby', requeues_count_key, '___total___', 1)
|
|
34
|
-
redis.call('hincrby', requeues_count_key,
|
|
36
|
+
redis.call('hincrby', requeues_count_key, entry, 1)
|
|
35
37
|
|
|
36
|
-
redis.call('hdel', error_reports_key,
|
|
38
|
+
redis.call('hdel', error_reports_key, entry)
|
|
37
39
|
|
|
38
40
|
local pivot = redis.call('lrange', queue_key, -1 - offset, 0 - offset)[1]
|
|
39
41
|
if pivot then
|
|
40
|
-
redis.call('linsert', queue_key, 'BEFORE', pivot,
|
|
42
|
+
redis.call('linsert', queue_key, 'BEFORE', pivot, entry)
|
|
41
43
|
else
|
|
42
|
-
redis.call('lpush', queue_key,
|
|
44
|
+
redis.call('lpush', queue_key, entry)
|
|
43
45
|
end
|
|
44
46
|
|
|
45
|
-
redis.call('
|
|
47
|
+
redis.call('hset', requeued_by_key, entry, worker_queue_key)
|
|
48
|
+
if ttl and ttl > 0 then
|
|
49
|
+
redis.call('expire', requeued_by_key, ttl)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
redis.call('zrem', zset_key, entry)
|
|
46
53
|
|
|
47
54
|
return true
|
|
@@ -4,15 +4,54 @@ local zset_key = KEYS[2]
|
|
|
4
4
|
local processed_key = KEYS[3]
|
|
5
5
|
local worker_queue_key = KEYS[4]
|
|
6
6
|
local owners_key = KEYS[5]
|
|
7
|
+
local requeued_by_key = KEYS[6]
|
|
8
|
+
local workers_key = KEYS[7]
|
|
7
9
|
|
|
8
10
|
local current_time = ARGV[1]
|
|
11
|
+
local defer_offset = tonumber(ARGV[2]) or 0
|
|
12
|
+
local max_skip_attempts = 4
|
|
9
13
|
|
|
10
|
-
local
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
return nil
|
|
14
|
+
local function insert_with_offset(test)
|
|
15
|
+
local pivot = redis.call('lrange', queue_key, -1 - defer_offset, 0 - defer_offset)[1]
|
|
16
|
+
if pivot then
|
|
17
|
+
redis.call('linsert', queue_key, 'BEFORE', pivot, test)
|
|
18
|
+
else
|
|
19
|
+
redis.call('lpush', queue_key, test)
|
|
20
|
+
end
|
|
18
21
|
end
|
|
22
|
+
|
|
23
|
+
for attempt = 1, max_skip_attempts do
|
|
24
|
+
local test = redis.call('rpop', queue_key)
|
|
25
|
+
if not test then
|
|
26
|
+
return nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
local requeued_by = redis.call('hget', requeued_by_key, test)
|
|
30
|
+
if requeued_by == worker_queue_key then
|
|
31
|
+
-- If this build only has one worker, allow immediate self-pickup.
|
|
32
|
+
if redis.call('scard', workers_key) <= 1 then
|
|
33
|
+
redis.call('hdel', requeued_by_key, test)
|
|
34
|
+
redis.call('zadd', zset_key, current_time, test)
|
|
35
|
+
redis.call('lpush', worker_queue_key, test)
|
|
36
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
37
|
+
return test
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
insert_with_offset(test)
|
|
41
|
+
|
|
42
|
+
-- If this worker only finds its own requeued tests, defer once by returning nil,
|
|
43
|
+
-- then allow pickup on a subsequent reserve attempt.
|
|
44
|
+
if attempt == max_skip_attempts then
|
|
45
|
+
redis.call('hdel', requeued_by_key, test)
|
|
46
|
+
return nil
|
|
47
|
+
end
|
|
48
|
+
else
|
|
49
|
+
redis.call('hdel', requeued_by_key, test)
|
|
50
|
+
redis.call('zadd', zset_key, current_time, test)
|
|
51
|
+
redis.call('lpush', worker_queue_key, test)
|
|
52
|
+
redis.call('hset', owners_key, test, worker_queue_key)
|
|
53
|
+
return test
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
return nil
|
|
@@ -9,7 +9,7 @@ module CI
|
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def total
|
|
12
|
-
wait_for_master(timeout: config.queue_init_timeout)
|
|
12
|
+
wait_for_master(timeout: config.queue_init_timeout, allow_streaming: true)
|
|
13
13
|
redis.get(key('total')).to_i
|
|
14
14
|
end
|
|
15
15
|
|
|
@@ -19,7 +19,7 @@ module CI
|
|
|
19
19
|
|
|
20
20
|
def wait_for_workers
|
|
21
21
|
duration = measure do
|
|
22
|
-
wait_for_master(timeout: config.queue_init_timeout)
|
|
22
|
+
wait_for_master(timeout: config.queue_init_timeout, allow_streaming: true)
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
yield if block_given?
|
|
@@ -30,7 +30,7 @@ module CI
|
|
|
30
30
|
@time_left -= 1
|
|
31
31
|
sleep 1
|
|
32
32
|
|
|
33
|
-
if active_workers?
|
|
33
|
+
if active_workers? || streaming?
|
|
34
34
|
@time_left_with_no_workers = config.inactive_workers_timeout
|
|
35
35
|
else
|
|
36
36
|
@time_left_with_no_workers -= 1
|