ci-queue 0.41.0 → 0.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ci/queue/circuit_breaker.rb +1 -1
- data/lib/ci/queue/configuration.rb +5 -3
- data/lib/ci/queue/redis/base.rb +196 -3
- data/lib/ci/queue/redis/heartbeat.lua +18 -0
- data/lib/ci/queue/redis/monitor.rb +153 -0
- data/lib/ci/queue/redis/supervisor.rb +1 -1
- data/lib/ci/queue/redis/worker.rb +5 -7
- data/lib/ci/queue/static.rb +11 -1
- data/lib/ci/queue/version.rb +1 -1
- data/lib/ci/queue.rb +10 -0
- data/lib/minitest/queue/runner.rb +16 -1
- data/lib/minitest/queue.rb +6 -2
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9fafefb68f5e00faba6e1c19944c0289bd0e7c5ca4d67090605fea402f2b04f3
|
|
4
|
+
data.tar.gz: fe85dc8a004f45a54203eac3e4294e5348bca9d8920b70ad2427fed9f5f26776
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a190ef07194b9cbb9c880de74abd9e54ede4d3e52480e26ba20c16a80720fafb66ce5933bc21df4213e7cefc9abb5f3893b75e46fb7e8327ee6e11b8a5e26de4
|
|
7
|
+
data.tar.gz: 5b3a127d41fde0f8094878e8cfcd088eb2d1a32fc86798b98adafbf06caa19e201631899fa4aec9c0afa8d8a9174215d529dd1dfdf637e4e9122dfb93f99d741
|
|
@@ -5,7 +5,7 @@ module CI
|
|
|
5
5
|
attr_accessor :timeout, :worker_id, :max_requeues, :grind_count, :failure_file, :export_flaky_tests_file
|
|
6
6
|
attr_accessor :requeue_tolerance, :namespace, :failing_test, :statsd_endpoint
|
|
7
7
|
attr_accessor :max_test_duration, :max_test_duration_percentile, :track_test_duration
|
|
8
|
-
attr_accessor :max_test_failed, :redis_ttl, :warnings_file
|
|
8
|
+
attr_accessor :max_test_failed, :redis_ttl, :warnings_file, :debug_log, :max_missed_heartbeat_seconds
|
|
9
9
|
attr_reader :circuit_breakers
|
|
10
10
|
attr_writer :seed, :build_id
|
|
11
11
|
attr_writer :queue_init_timeout, :report_timeout, :inactive_workers_timeout
|
|
@@ -19,6 +19,7 @@ module CI
|
|
|
19
19
|
flaky_tests: load_flaky_tests(env['CI_QUEUE_FLAKY_TESTS']),
|
|
20
20
|
statsd_endpoint: env['CI_QUEUE_STATSD_ADDR'],
|
|
21
21
|
redis_ttl: env['CI_QUEUE_REDIS_TTL']&.to_i || 8 * 60 * 60,
|
|
22
|
+
debug_log: env['CI_QUEUE_DEBUG_LOG'],
|
|
22
23
|
)
|
|
23
24
|
end
|
|
24
25
|
|
|
@@ -36,8 +37,7 @@ module CI
|
|
|
36
37
|
grind_count: nil, max_duration: nil, failure_file: nil, max_test_duration: nil,
|
|
37
38
|
max_test_duration_percentile: 0.5, track_test_duration: false, max_test_failed: nil,
|
|
38
39
|
queue_init_timeout: nil, redis_ttl: 8 * 60 * 60, report_timeout: nil, inactive_workers_timeout: nil,
|
|
39
|
-
export_flaky_tests_file: nil, warnings_file: nil
|
|
40
|
-
)
|
|
40
|
+
export_flaky_tests_file: nil, warnings_file: nil, debug_log: nil, max_missed_heartbeat_seconds: nil)
|
|
41
41
|
@build_id = build_id
|
|
42
42
|
@circuit_breakers = [CircuitBreaker::Disabled]
|
|
43
43
|
@failure_file = failure_file
|
|
@@ -62,6 +62,8 @@ module CI
|
|
|
62
62
|
@inactive_workers_timeout = inactive_workers_timeout
|
|
63
63
|
@export_flaky_tests_file = export_flaky_tests_file
|
|
64
64
|
@warnings_file = warnings_file
|
|
65
|
+
@debug_log = debug_log
|
|
66
|
+
@max_missed_heartbeat_seconds = max_missed_heartbeat_seconds
|
|
65
67
|
end
|
|
66
68
|
|
|
67
69
|
def queue_init_timeout
|
data/lib/ci/queue/redis/base.rb
CHANGED
|
@@ -11,19 +11,87 @@ module CI
|
|
|
11
11
|
::SocketError, # https://github.com/redis/redis-rb/pull/631
|
|
12
12
|
].freeze
|
|
13
13
|
|
|
14
|
+
module RedisInstrumentation
|
|
15
|
+
def call(command, redis_config)
|
|
16
|
+
result = super
|
|
17
|
+
logger = redis_config.custom[:debug_log]
|
|
18
|
+
logger.info("#{command}: #{result}")
|
|
19
|
+
result
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def call_pipelined(commands, redis_config)
|
|
23
|
+
result = super
|
|
24
|
+
logger = redis_config.custom[:debug_log]
|
|
25
|
+
logger.info("#{commands}: #{result}")
|
|
26
|
+
result
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
14
30
|
def initialize(redis_url, config)
|
|
15
31
|
@redis_url = redis_url
|
|
32
|
+
@config = config
|
|
16
33
|
if ::Redis::VERSION > "5.0.0"
|
|
17
34
|
@redis = ::Redis.new(
|
|
18
35
|
url: redis_url,
|
|
19
36
|
# Booting a CI worker is costly, so in case of a Redis blip,
|
|
20
37
|
# it makes sense to retry for a while before giving up.
|
|
21
|
-
reconnect_attempts:
|
|
38
|
+
reconnect_attempts: reconnect_attempts,
|
|
39
|
+
middlewares: custom_middlewares,
|
|
40
|
+
custom: custom_config,
|
|
22
41
|
)
|
|
23
42
|
else
|
|
24
43
|
@redis = ::Redis.new(url: redis_url)
|
|
25
44
|
end
|
|
26
|
-
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def reconnect_attempts
|
|
48
|
+
return [] if ENV["CI_QUEUE_DISABLE_RECONNECT_ATTEMPTS"]
|
|
49
|
+
|
|
50
|
+
[0, 0, 0.1, 0.5, 1, 3, 5]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def with_heartbeat(id)
|
|
54
|
+
if heartbeat_enabled?
|
|
55
|
+
ensure_heartbeat_thread_alive!
|
|
56
|
+
heartbeat_state.set(:tick, id)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
yield
|
|
60
|
+
ensure
|
|
61
|
+
heartbeat_state.set(:reset) if heartbeat_enabled?
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def ensure_heartbeat_thread_alive!
|
|
65
|
+
return unless heartbeat_enabled?
|
|
66
|
+
return if @heartbeat_thread&.alive?
|
|
67
|
+
|
|
68
|
+
@heartbeat_thread = Thread.start { heartbeat }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def boot_heartbeat_process!
|
|
72
|
+
return unless heartbeat_enabled?
|
|
73
|
+
|
|
74
|
+
heartbeat_process.boot!
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def stop_heartbeat!
|
|
78
|
+
return unless heartbeat_enabled?
|
|
79
|
+
|
|
80
|
+
heartbeat_state.set(:stop)
|
|
81
|
+
heartbeat_process.shutdown!
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def custom_config
|
|
85
|
+
return unless config.debug_log
|
|
86
|
+
|
|
87
|
+
require 'logger'
|
|
88
|
+
{ debug_log: Logger.new(config.debug_log) }
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def custom_middlewares
|
|
92
|
+
return unless config.debug_log
|
|
93
|
+
|
|
94
|
+
[RedisInstrumentation]
|
|
27
95
|
end
|
|
28
96
|
|
|
29
97
|
def exhausted?
|
|
@@ -32,7 +100,7 @@ module CI
|
|
|
32
100
|
|
|
33
101
|
def expired?
|
|
34
102
|
if (created_at = redis.get(key('created-at')))
|
|
35
|
-
(created_at.to_f + config.redis_ttl + TEN_MINUTES) <
|
|
103
|
+
(created_at.to_f + config.redis_ttl + TEN_MINUTES) < CI::Queue.time_now.to_f
|
|
36
104
|
else
|
|
37
105
|
# if there is no created at set anymore we assume queue is expired
|
|
38
106
|
true
|
|
@@ -132,6 +200,131 @@ module CI
|
|
|
132
200
|
rescue SystemCallError
|
|
133
201
|
::File.read(::File.join(CI::Queue::RELEASE_SCRIPTS_ROOT, "#{name}.lua"))
|
|
134
202
|
end
|
|
203
|
+
|
|
204
|
+
class HeartbeatProcess
|
|
205
|
+
def initialize(redis_url, zset_key, processed_key, owners_key, worker_queue_key)
|
|
206
|
+
@redis_url = redis_url
|
|
207
|
+
@zset_key = zset_key
|
|
208
|
+
@processed_key = processed_key
|
|
209
|
+
@owners_key = owners_key
|
|
210
|
+
@worker_queue_key = worker_queue_key
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def boot!
|
|
214
|
+
child_read, @pipe = IO.pipe
|
|
215
|
+
ready_pipe, child_write = IO.pipe
|
|
216
|
+
@pipe.binmode
|
|
217
|
+
@pid = Process.spawn(
|
|
218
|
+
RbConfig.ruby,
|
|
219
|
+
::File.join(__dir__, "monitor.rb"),
|
|
220
|
+
@redis_url,
|
|
221
|
+
@zset_key,
|
|
222
|
+
@processed_key,
|
|
223
|
+
@owners_key,
|
|
224
|
+
@worker_queue_key,
|
|
225
|
+
in: child_read,
|
|
226
|
+
out: child_write,
|
|
227
|
+
)
|
|
228
|
+
child_read.close
|
|
229
|
+
child_write.close
|
|
230
|
+
|
|
231
|
+
# Check the process is alive.
|
|
232
|
+
if ready_pipe.wait_readable(10)
|
|
233
|
+
ready_pipe.gets
|
|
234
|
+
ready_pipe.close
|
|
235
|
+
Process.kill(0, @pid)
|
|
236
|
+
else
|
|
237
|
+
Process.kill(0, @pid)
|
|
238
|
+
Process.wait(@pid)
|
|
239
|
+
raise "Monitor child wasn't ready after 10 seconds"
|
|
240
|
+
end
|
|
241
|
+
@pipe
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def shutdown!
|
|
245
|
+
@pipe.close
|
|
246
|
+
begin
|
|
247
|
+
_, status = Process.waitpid2(@pid)
|
|
248
|
+
status
|
|
249
|
+
rescue Errno::ECHILD
|
|
250
|
+
nil
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def tick!(id)
|
|
255
|
+
send_message(:tick!, id: id)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
private
|
|
259
|
+
|
|
260
|
+
def send_message(*message)
|
|
261
|
+
payload = message.to_json
|
|
262
|
+
@pipe.write([payload.bytesize].pack("L").b, payload)
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
class State
|
|
267
|
+
def initialize
|
|
268
|
+
@state = nil
|
|
269
|
+
@mutex = Mutex.new
|
|
270
|
+
@cond = ConditionVariable.new
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def set(*state)
|
|
274
|
+
@state = state
|
|
275
|
+
@mutex.synchronize do
|
|
276
|
+
@cond.broadcast
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def wait(timeout)
|
|
281
|
+
@mutex.synchronize do
|
|
282
|
+
@cond.wait(@mutex, timeout)
|
|
283
|
+
end
|
|
284
|
+
@state
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def heartbeat_state
|
|
289
|
+
@heartbeat_state ||= State.new
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def heartbeat_process
|
|
293
|
+
@heartbeat_process ||= HeartbeatProcess.new(
|
|
294
|
+
@redis_url,
|
|
295
|
+
key('running'),
|
|
296
|
+
key('processed'),
|
|
297
|
+
key('owners'),
|
|
298
|
+
key('worker', worker_id, 'queue'),
|
|
299
|
+
)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def heartbeat_enabled?
|
|
303
|
+
config.max_missed_heartbeat_seconds
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def heartbeat
|
|
307
|
+
Thread.current.name = "CI::Queue#heartbeat"
|
|
308
|
+
Thread.current.abort_on_exception = true
|
|
309
|
+
|
|
310
|
+
timeout = config.timeout.to_i
|
|
311
|
+
loop do
|
|
312
|
+
command = nil
|
|
313
|
+
command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
|
|
314
|
+
|
|
315
|
+
case command&.first
|
|
316
|
+
when :tick
|
|
317
|
+
if timeout > 0
|
|
318
|
+
heartbeat_process.tick!(command.last)
|
|
319
|
+
timeout -= 1
|
|
320
|
+
end
|
|
321
|
+
when :reset
|
|
322
|
+
timeout = config.timeout.to_i
|
|
323
|
+
when :stop
|
|
324
|
+
break
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
135
328
|
end
|
|
136
329
|
end
|
|
137
330
|
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
-- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
|
|
2
|
+
local zset_key = KEYS[1]
|
|
3
|
+
local processed_key = KEYS[2]
|
|
4
|
+
local owners_key = KEYS[3]
|
|
5
|
+
local worker_queue_key = KEYS[4]
|
|
6
|
+
|
|
7
|
+
local current_time = ARGV[1]
|
|
8
|
+
local test = ARGV[2]
|
|
9
|
+
|
|
10
|
+
-- already processed, we do not need to bump the timestamp
|
|
11
|
+
if redis.call('sismember', processed_key, test) == 1 then
|
|
12
|
+
return false
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
-- we're still the owner of the test, we can bump the timestamp
|
|
16
|
+
if redis.call('hget', owners_key, test) == worker_queue_key then
|
|
17
|
+
return redis.call('zadd', zset_key, current_time, test)
|
|
18
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env -S ruby --disable-gems
|
|
2
|
+
# typed: false
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
require 'logger'
|
|
6
|
+
require 'redis'
|
|
7
|
+
require 'json'
|
|
8
|
+
|
|
9
|
+
module CI
|
|
10
|
+
module Queue
|
|
11
|
+
module Redis
|
|
12
|
+
class Monitor
|
|
13
|
+
DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../../redis', __FILE__)
|
|
14
|
+
RELEASE_SCRIPTS_ROOT = ::File.expand_path('../../redis', __FILE__)
|
|
15
|
+
|
|
16
|
+
def initialize(pipe, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
|
|
17
|
+
@zset_key = zset_key
|
|
18
|
+
@processed_key = processed_key
|
|
19
|
+
@owners_key = owners_key
|
|
20
|
+
@worker_queue_key = worker_queue_key
|
|
21
|
+
@logger = logger
|
|
22
|
+
@redis = ::Redis.new(url: redis_url, reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5])
|
|
23
|
+
@shutdown = false
|
|
24
|
+
@pipe = pipe
|
|
25
|
+
@self_pipe_reader, @self_pipe_writer = IO.pipe
|
|
26
|
+
@self_pipe_writer.sync = true
|
|
27
|
+
@queue = []
|
|
28
|
+
@deadlines = {}
|
|
29
|
+
%i[TERM INT USR1].each do |sig|
|
|
30
|
+
Signal.trap(sig) { soft_signal(sig) }
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def soft_signal(sig)
|
|
35
|
+
@queue << sig
|
|
36
|
+
@self_pipe_writer << '.'
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def process_tick!(id:)
|
|
40
|
+
eval_script(
|
|
41
|
+
:heartbeat,
|
|
42
|
+
keys: [@zset_key, @processed_key, @owners_key, @worker_queue_key],
|
|
43
|
+
argv: [Time.now.to_f, id]
|
|
44
|
+
)
|
|
45
|
+
rescue => error
|
|
46
|
+
@logger.info(error)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def eval_script(script, *args)
|
|
50
|
+
@redis.evalsha(load_script(script), *args)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def load_script(script)
|
|
54
|
+
@scripts_cache ||= {}
|
|
55
|
+
@scripts_cache[script] ||= @redis.script(:load, read_script(script))
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def read_script(name)
|
|
59
|
+
::File.read(::File.join(DEV_SCRIPTS_ROOT, "#{name}.lua"))
|
|
60
|
+
rescue SystemCallError
|
|
61
|
+
::File.read(::File.join(RELEASE_SCRIPTS_ROOT, "#{name}.lua"))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
HEADER = 'L'
|
|
65
|
+
HEADER_SIZE = [0].pack(HEADER).bytesize
|
|
66
|
+
def read_message(io)
|
|
67
|
+
case header = io.read_nonblock(HEADER_SIZE, exception: false)
|
|
68
|
+
when :wait_readable
|
|
69
|
+
nil
|
|
70
|
+
when nil
|
|
71
|
+
@logger.debug('Broken pipe, exiting')
|
|
72
|
+
@shutdown = 0
|
|
73
|
+
false
|
|
74
|
+
else
|
|
75
|
+
JSON.parse(io.read(header.unpack1(HEADER)))
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def process_messages(io)
|
|
80
|
+
while (message = read_message(io))
|
|
81
|
+
type, kwargs = message
|
|
82
|
+
kwargs.transform_keys!(&:to_sym)
|
|
83
|
+
public_send("process_#{type}", **kwargs)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def wait_for_events(ios)
|
|
88
|
+
return if @shutdown
|
|
89
|
+
|
|
90
|
+
return unless (ready = IO.select(ios, nil, nil, 10))
|
|
91
|
+
|
|
92
|
+
ready[0].each do |io|
|
|
93
|
+
case io
|
|
94
|
+
when @self_pipe_reader
|
|
95
|
+
io.read_nonblock(512, exception: false) # Just flush the pipe, the information is in the @queue
|
|
96
|
+
when @pipe
|
|
97
|
+
process_messages(@pipe)
|
|
98
|
+
else
|
|
99
|
+
@logger.debug("Unknown reader: #{io.inspect}")
|
|
100
|
+
raise "Unknown reader: #{io.inspect}"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def monitor
|
|
106
|
+
@logger.debug("Starting monitor")
|
|
107
|
+
ios = [@self_pipe_reader, @pipe]
|
|
108
|
+
|
|
109
|
+
until @shutdown
|
|
110
|
+
while (sig = @queue.shift)
|
|
111
|
+
case sig
|
|
112
|
+
when :INT, :TERM
|
|
113
|
+
@logger.debug("Received #{sig}, exiting")
|
|
114
|
+
@shutdown = 0
|
|
115
|
+
break
|
|
116
|
+
else
|
|
117
|
+
raise "Unknown signal: #{sig.inspect}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
wait_for_events(ios)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
@logger.debug('Done')
|
|
125
|
+
@shutdown
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
logger = Logger.new($stderr)
|
|
133
|
+
if ARGV.include?('-v')
|
|
134
|
+
logger.level = Logger::DEBUG
|
|
135
|
+
else
|
|
136
|
+
logger.level = Logger::INFO
|
|
137
|
+
logger.formatter = ->(_severity, _timestamp, _progname, msg) { "[CI Queue Monitor] #{msg}\n" }
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
redis_url = ARGV[0]
|
|
141
|
+
zset_key = ARGV[1]
|
|
142
|
+
processed_key = ARGV[2]
|
|
143
|
+
owners_key = ARGV[3]
|
|
144
|
+
worker_queue_key = ARGV[4]
|
|
145
|
+
|
|
146
|
+
logger.debug("Starting monitor: #{redis_url} #{zset_key} #{processed_key}")
|
|
147
|
+
manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
|
|
148
|
+
|
|
149
|
+
# Notify the parent we're ready
|
|
150
|
+
$stdout.puts(".")
|
|
151
|
+
$stdout.close
|
|
152
|
+
|
|
153
|
+
exit(manager.monitor)
|
|
@@ -46,7 +46,7 @@ module CI
|
|
|
46
46
|
|
|
47
47
|
def active_workers?
|
|
48
48
|
# if there are running jobs we assume there are still agents active
|
|
49
|
-
redis.zrangebyscore(key('running'),
|
|
49
|
+
redis.zrangebyscore(key('running'), CI::Queue.time_now.to_f - config.timeout, "+inf", limit: [0,1]).count > 0
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
end
|
|
@@ -144,10 +144,6 @@ module CI
|
|
|
144
144
|
config.worker_id
|
|
145
145
|
end
|
|
146
146
|
|
|
147
|
-
def timeout
|
|
148
|
-
config.timeout
|
|
149
|
-
end
|
|
150
|
-
|
|
151
147
|
def raise_on_mismatching_test(test)
|
|
152
148
|
if @reserved_test == test
|
|
153
149
|
@reserved_test = nil
|
|
@@ -175,11 +171,13 @@ module CI
|
|
|
175
171
|
key('worker', worker_id, 'queue'),
|
|
176
172
|
key('owners'),
|
|
177
173
|
],
|
|
178
|
-
argv: [
|
|
174
|
+
argv: [CI::Queue.time_now.to_f],
|
|
179
175
|
)
|
|
180
176
|
end
|
|
181
177
|
|
|
182
178
|
def try_to_reserve_lost_test
|
|
179
|
+
timeout = config.max_missed_heartbeat_seconds ? config.max_missed_heartbeat_seconds : config.timeout
|
|
180
|
+
|
|
183
181
|
lost_test = eval_script(
|
|
184
182
|
:reserve_lost,
|
|
185
183
|
keys: [
|
|
@@ -188,11 +186,11 @@ module CI
|
|
|
188
186
|
key('worker', worker_id, 'queue'),
|
|
189
187
|
key('owners'),
|
|
190
188
|
],
|
|
191
|
-
argv: [
|
|
189
|
+
argv: [CI::Queue.time_now.to_f, timeout],
|
|
192
190
|
)
|
|
193
191
|
|
|
194
192
|
if lost_test
|
|
195
|
-
build.record_warning(Warnings::RESERVED_LOST_TEST, test: lost_test, timeout: timeout)
|
|
193
|
+
build.record_warning(Warnings::RESERVED_LOST_TEST, test: lost_test, timeout: config.timeout)
|
|
196
194
|
end
|
|
197
195
|
|
|
198
196
|
lost_test
|
data/lib/ci/queue/static.rb
CHANGED
|
@@ -48,12 +48,22 @@ module CI
|
|
|
48
48
|
self
|
|
49
49
|
end
|
|
50
50
|
|
|
51
|
+
def with_heartbeat(id)
|
|
52
|
+
yield
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def ensure_heartbeat_thread_alive!; end
|
|
56
|
+
|
|
57
|
+
def boot_heartbeat_process!; end
|
|
58
|
+
|
|
59
|
+
def stop_heartbeat!; end
|
|
60
|
+
|
|
51
61
|
def created_at=(timestamp)
|
|
52
62
|
@created_at ||= timestamp
|
|
53
63
|
end
|
|
54
64
|
|
|
55
65
|
def expired?
|
|
56
|
-
(@created_at.to_f TEN_MINUTES) <
|
|
66
|
+
(@created_at.to_f TEN_MINUTES) < CI::Queue.time_now.to_f
|
|
57
67
|
end
|
|
58
68
|
|
|
59
69
|
def populated?
|
data/lib/ci/queue/version.rb
CHANGED
data/lib/ci/queue.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'uri'
|
|
4
4
|
require 'cgi'
|
|
5
|
+
require 'json'
|
|
5
6
|
|
|
6
7
|
require 'ci/queue/version'
|
|
7
8
|
require 'ci/queue/output_helpers'
|
|
@@ -24,6 +25,15 @@ module CI
|
|
|
24
25
|
RESERVED_LOST_TEST = :RESERVED_LOST_TEST
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
GET_NOW = ::Time.method(:now)
|
|
29
|
+
private_constant :GET_NOW
|
|
30
|
+
def time_now
|
|
31
|
+
# Mocks like freeze_time should be cleaned when ci-queue runs, however,
|
|
32
|
+
# we experienced cases when tests were enqueued with wrong timestamps, so we
|
|
33
|
+
# safeguard Time.now here.
|
|
34
|
+
GET_NOW.call
|
|
35
|
+
end
|
|
36
|
+
|
|
27
37
|
def requeueable?(test_result)
|
|
28
38
|
requeueable.nil? || requeueable.call(test_result)
|
|
29
39
|
end
|
|
@@ -63,7 +63,8 @@ module Minitest
|
|
|
63
63
|
end
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
-
queue.rescue_connection_errors { queue.created_at =
|
|
66
|
+
queue.rescue_connection_errors { queue.created_at = CI::Queue.time_now.to_f }
|
|
67
|
+
queue.boot_heartbeat_process!
|
|
67
68
|
|
|
68
69
|
set_load_path
|
|
69
70
|
Minitest.queue = queue
|
|
@@ -582,10 +583,24 @@ module Minitest
|
|
|
582
583
|
queue.config.redis_ttl = time
|
|
583
584
|
end
|
|
584
585
|
|
|
586
|
+
help = <<~EOS
|
|
587
|
+
If heartbeat is enabled, a background process will periodically signal it's still processing
|
|
588
|
+
the current test. If the heartbeat stops for the specified amount of seconds,
|
|
589
|
+
the test will be requeued to another worker.
|
|
590
|
+
EOS
|
|
591
|
+
opts.on("--heartbeat [SECONDS]", Integer, help) do |time|
|
|
592
|
+
queue_config.max_missed_heartbeat_seconds = time || 30
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
|
|
585
596
|
opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do
|
|
586
597
|
self.verbose = true
|
|
587
598
|
end
|
|
588
599
|
|
|
600
|
+
opts.on("--debug-log FILE", "Path to debug log file for e.g. Redis instrumentation") do |path|
|
|
601
|
+
queue_config.debug_log = path
|
|
602
|
+
end
|
|
603
|
+
|
|
589
604
|
opts.separator ""
|
|
590
605
|
opts.separator " retry: Replays a previous run in the same order."
|
|
591
606
|
|
data/lib/minitest/queue.rb
CHANGED
|
@@ -187,7 +187,7 @@ module Minitest
|
|
|
187
187
|
private
|
|
188
188
|
|
|
189
189
|
def current_timestamp
|
|
190
|
-
|
|
190
|
+
CI::Queue.time_now.to_i
|
|
191
191
|
end
|
|
192
192
|
end
|
|
193
193
|
|
|
@@ -226,7 +226,10 @@ module Minitest
|
|
|
226
226
|
|
|
227
227
|
def run_from_queue(reporter, *)
|
|
228
228
|
queue.poll do |example|
|
|
229
|
-
result = example.
|
|
229
|
+
result = queue.with_heartbeat(example.id) do
|
|
230
|
+
example.run
|
|
231
|
+
end
|
|
232
|
+
|
|
230
233
|
failed = !(result.passed? || result.skipped?)
|
|
231
234
|
|
|
232
235
|
if example.flaky?
|
|
@@ -256,6 +259,7 @@ module Minitest
|
|
|
256
259
|
reporter.record(result)
|
|
257
260
|
end
|
|
258
261
|
end
|
|
262
|
+
queue.stop_heartbeat!
|
|
259
263
|
end
|
|
260
264
|
end
|
|
261
265
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ci-queue
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.43.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jean Boussier
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-01-
|
|
11
|
+
date: 2024-01-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -188,6 +188,8 @@ files:
|
|
|
188
188
|
- lib/ci/queue/redis/grind.rb
|
|
189
189
|
- lib/ci/queue/redis/grind_record.rb
|
|
190
190
|
- lib/ci/queue/redis/grind_supervisor.rb
|
|
191
|
+
- lib/ci/queue/redis/heartbeat.lua
|
|
192
|
+
- lib/ci/queue/redis/monitor.rb
|
|
191
193
|
- lib/ci/queue/redis/release.lua
|
|
192
194
|
- lib/ci/queue/redis/requeue.lua
|
|
193
195
|
- lib/ci/queue/redis/reserve.lua
|
|
@@ -239,7 +241,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
239
241
|
- !ruby/object:Gem::Version
|
|
240
242
|
version: '0'
|
|
241
243
|
requirements: []
|
|
242
|
-
rubygems_version: 3.5.
|
|
244
|
+
rubygems_version: 3.5.5
|
|
243
245
|
signing_key:
|
|
244
246
|
specification_version: 4
|
|
245
247
|
summary: Distribute tests over many workers using a queue
|