ci-queue 0.42.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb0b6d2c1537200e0a3540652972d25456cda0fe2115a65a884c8f5feeb17ff7
4
- data.tar.gz: 7ac240806fdf6f16edfa314c59dccb22b379dbc2795d23d0c4ad19f4a46956d1
3
+ metadata.gz: 9fafefb68f5e00faba6e1c19944c0289bd0e7c5ca4d67090605fea402f2b04f3
4
+ data.tar.gz: fe85dc8a004f45a54203eac3e4294e5348bca9d8920b70ad2427fed9f5f26776
5
5
  SHA512:
6
- metadata.gz: feaea129a7c672e57372e6475dc0fdc049c1e8b7815994b9ece3b4db0cbf6970ba19c222ed6bf9a652c7e68c7781e46edbd45e0eaa0da2b4e4d31b3856980aed
7
- data.tar.gz: e2b9112e41b223c85b0acc20a7129d2ec517a816e73a42bea381ce696af8821dbaee1c5151d5002784630b1c0104b6c342d91b6273dd12de01c063d5bef5b1fe
6
+ metadata.gz: a190ef07194b9cbb9c880de74abd9e54ede4d3e52480e26ba20c16a80720fafb66ce5933bc21df4213e7cefc9abb5f3893b75e46fb7e8327ee6e11b8a5e26de4
7
+ data.tar.gz: 5b3a127d41fde0f8094878e8cfcd088eb2d1a32fc86798b98adafbf06caa19e201631899fa4aec9c0afa8d8a9174215d529dd1dfdf637e4e9122dfb93f99d741
@@ -5,7 +5,7 @@ module CI
5
5
  attr_accessor :timeout, :worker_id, :max_requeues, :grind_count, :failure_file, :export_flaky_tests_file
6
6
  attr_accessor :requeue_tolerance, :namespace, :failing_test, :statsd_endpoint
7
7
  attr_accessor :max_test_duration, :max_test_duration_percentile, :track_test_duration
8
- attr_accessor :max_test_failed, :redis_ttl, :warnings_file, :debug_log
8
+ attr_accessor :max_test_failed, :redis_ttl, :warnings_file, :debug_log, :max_missed_heartbeat_seconds
9
9
  attr_reader :circuit_breakers
10
10
  attr_writer :seed, :build_id
11
11
  attr_writer :queue_init_timeout, :report_timeout, :inactive_workers_timeout
@@ -37,7 +37,7 @@ module CI
37
37
  grind_count: nil, max_duration: nil, failure_file: nil, max_test_duration: nil,
38
38
  max_test_duration_percentile: 0.5, track_test_duration: false, max_test_failed: nil,
39
39
  queue_init_timeout: nil, redis_ttl: 8 * 60 * 60, report_timeout: nil, inactive_workers_timeout: nil,
40
- export_flaky_tests_file: nil, warnings_file: nil, debug_log: nil)
40
+ export_flaky_tests_file: nil, warnings_file: nil, debug_log: nil, max_missed_heartbeat_seconds: nil)
41
41
  @build_id = build_id
42
42
  @circuit_breakers = [CircuitBreaker::Disabled]
43
43
  @failure_file = failure_file
@@ -63,6 +63,7 @@ module CI
63
63
  @export_flaky_tests_file = export_flaky_tests_file
64
64
  @warnings_file = warnings_file
65
65
  @debug_log = debug_log
66
+ @max_missed_heartbeat_seconds = max_missed_heartbeat_seconds
66
67
  end
67
68
 
68
69
  def queue_init_timeout
@@ -35,7 +35,7 @@ module CI
35
35
  url: redis_url,
36
36
  # Booting a CI worker is costly, so in case of a Redis blip,
37
37
  # it makes sense to retry for a while before giving up.
38
- reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5],
38
+ reconnect_attempts: reconnect_attempts,
39
39
  middlewares: custom_middlewares,
40
40
  custom: custom_config,
41
41
  )
@@ -44,6 +44,43 @@ module CI
44
44
  end
45
45
  end
46
46
 
47
+ def reconnect_attempts
48
+ return [] if ENV["CI_QUEUE_DISABLE_RECONNECT_ATTEMPTS"]
49
+
50
+ [0, 0, 0.1, 0.5, 1, 3, 5]
51
+ end
52
+
53
+ def with_heartbeat(id)
54
+ if heartbeat_enabled?
55
+ ensure_heartbeat_thread_alive!
56
+ heartbeat_state.set(:tick, id)
57
+ end
58
+
59
+ yield
60
+ ensure
61
+ heartbeat_state.set(:reset) if heartbeat_enabled?
62
+ end
63
+
64
+ def ensure_heartbeat_thread_alive!
65
+ return unless heartbeat_enabled?
66
+ return if @heartbeat_thread&.alive?
67
+
68
+ @heartbeat_thread = Thread.start { heartbeat }
69
+ end
70
+
71
+ def boot_heartbeat_process!
72
+ return unless heartbeat_enabled?
73
+
74
+ heartbeat_process.boot!
75
+ end
76
+
77
+ def stop_heartbeat!
78
+ return unless heartbeat_enabled?
79
+
80
+ heartbeat_state.set(:stop)
81
+ heartbeat_process.shutdown!
82
+ end
83
+
47
84
  def custom_config
48
85
  return unless config.debug_log
49
86
 
@@ -163,6 +200,131 @@ module CI
163
200
  rescue SystemCallError
164
201
  ::File.read(::File.join(CI::Queue::RELEASE_SCRIPTS_ROOT, "#{name}.lua"))
165
202
  end
203
+
204
+ class HeartbeatProcess
205
+ def initialize(redis_url, zset_key, processed_key, owners_key, worker_queue_key)
206
+ @redis_url = redis_url
207
+ @zset_key = zset_key
208
+ @processed_key = processed_key
209
+ @owners_key = owners_key
210
+ @worker_queue_key = worker_queue_key
211
+ end
212
+
213
+ def boot!
214
+ child_read, @pipe = IO.pipe
215
+ ready_pipe, child_write = IO.pipe
216
+ @pipe.binmode
217
+ @pid = Process.spawn(
218
+ RbConfig.ruby,
219
+ ::File.join(__dir__, "monitor.rb"),
220
+ @redis_url,
221
+ @zset_key,
222
+ @processed_key,
223
+ @owners_key,
224
+ @worker_queue_key,
225
+ in: child_read,
226
+ out: child_write,
227
+ )
228
+ child_read.close
229
+ child_write.close
230
+
231
+ # Check the process is alive.
232
+ if ready_pipe.wait_readable(10)
233
+ ready_pipe.gets
234
+ ready_pipe.close
235
+ Process.kill(0, @pid)
236
+ else
237
+ Process.kill(0, @pid)
238
+ Process.wait(@pid)
239
+ raise "Monitor child wasn't ready after 10 seconds"
240
+ end
241
+ @pipe
242
+ end
243
+
244
+ def shutdown!
245
+ @pipe.close
246
+ begin
247
+ _, status = Process.waitpid2(@pid)
248
+ status
249
+ rescue Errno::ECHILD
250
+ nil
251
+ end
252
+ end
253
+
254
+ def tick!(id)
255
+ send_message(:tick!, id: id)
256
+ end
257
+
258
+ private
259
+
260
+ def send_message(*message)
261
+ payload = message.to_json
262
+ @pipe.write([payload.bytesize].pack("L").b, payload)
263
+ end
264
+ end
265
+
266
+ class State
267
+ def initialize
268
+ @state = nil
269
+ @mutex = Mutex.new
270
+ @cond = ConditionVariable.new
271
+ end
272
+
273
+ def set(*state)
274
+ @state = state
275
+ @mutex.synchronize do
276
+ @cond.broadcast
277
+ end
278
+ end
279
+
280
+ def wait(timeout)
281
+ @mutex.synchronize do
282
+ @cond.wait(@mutex, timeout)
283
+ end
284
+ @state
285
+ end
286
+ end
287
+
288
+ def heartbeat_state
289
+ @heartbeat_state ||= State.new
290
+ end
291
+
292
+ def heartbeat_process
293
+ @heartbeat_process ||= HeartbeatProcess.new(
294
+ @redis_url,
295
+ key('running'),
296
+ key('processed'),
297
+ key('owners'),
298
+ key('worker', worker_id, 'queue'),
299
+ )
300
+ end
301
+
302
+ def heartbeat_enabled?
303
+ config.max_missed_heartbeat_seconds
304
+ end
305
+
306
+ def heartbeat
307
+ Thread.current.name = "CI::Queue#heartbeat"
308
+ Thread.current.abort_on_exception = true
309
+
310
+ timeout = config.timeout.to_i
311
+ loop do
312
+ command = nil
313
+ command = heartbeat_state.wait(1) # waits for max 1 second but wakes up immediately if we receive a command
314
+
315
+ case command&.first
316
+ when :tick
317
+ if timeout > 0
318
+ heartbeat_process.tick!(command.last)
319
+ timeout -= 1
320
+ end
321
+ when :reset
322
+ timeout = config.timeout.to_i
323
+ when :stop
324
+ break
325
+ end
326
+ end
327
+ end
166
328
  end
167
329
  end
168
330
  end
@@ -0,0 +1,18 @@
1
+ -- AUTOGENERATED FILE DO NOT EDIT DIRECTLY
2
+ local zset_key = KEYS[1]
3
+ local processed_key = KEYS[2]
4
+ local owners_key = KEYS[3]
5
+ local worker_queue_key = KEYS[4]
6
+
7
+ local current_time = ARGV[1]
8
+ local test = ARGV[2]
9
+
10
+ -- already processed, we do not need to bump the timestamp
11
+ if redis.call('sismember', processed_key, test) == 1 then
12
+ return false
13
+ end
14
+
15
+ -- we're still the owner of the test, we can bump the timestamp
16
+ if redis.call('hget', owners_key, test) == worker_queue_key then
17
+ return redis.call('zadd', zset_key, current_time, test)
18
+ end
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env -S ruby --disable-gems
2
+ # typed: false
3
+ # frozen_string_literal: true
4
+
5
+ require 'logger'
6
+ require 'redis'
7
+ require 'json'
8
+
9
+ module CI
10
+ module Queue
11
+ module Redis
12
+ class Monitor
13
+ DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../../redis', __FILE__)
14
+ RELEASE_SCRIPTS_ROOT = ::File.expand_path('../../redis', __FILE__)
15
+
16
+ def initialize(pipe, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
17
+ @zset_key = zset_key
18
+ @processed_key = processed_key
19
+ @owners_key = owners_key
20
+ @worker_queue_key = worker_queue_key
21
+ @logger = logger
22
+ @redis = ::Redis.new(url: redis_url, reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5])
23
+ @shutdown = false
24
+ @pipe = pipe
25
+ @self_pipe_reader, @self_pipe_writer = IO.pipe
26
+ @self_pipe_writer.sync = true
27
+ @queue = []
28
+ @deadlines = {}
29
+ %i[TERM INT USR1].each do |sig|
30
+ Signal.trap(sig) { soft_signal(sig) }
31
+ end
32
+ end
33
+
34
+ def soft_signal(sig)
35
+ @queue << sig
36
+ @self_pipe_writer << '.'
37
+ end
38
+
39
+ def process_tick!(id:)
40
+ eval_script(
41
+ :heartbeat,
42
+ keys: [@zset_key, @processed_key, @owners_key, @worker_queue_key],
43
+ argv: [Time.now.to_f, id]
44
+ )
45
+ rescue => error
46
+ @logger.info(error)
47
+ end
48
+
49
+ def eval_script(script, *args)
50
+ @redis.evalsha(load_script(script), *args)
51
+ end
52
+
53
+ def load_script(script)
54
+ @scripts_cache ||= {}
55
+ @scripts_cache[script] ||= @redis.script(:load, read_script(script))
56
+ end
57
+
58
+ def read_script(name)
59
+ ::File.read(::File.join(DEV_SCRIPTS_ROOT, "#{name}.lua"))
60
+ rescue SystemCallError
61
+ ::File.read(::File.join(RELEASE_SCRIPTS_ROOT, "#{name}.lua"))
62
+ end
63
+
64
+ HEADER = 'L'
65
+ HEADER_SIZE = [0].pack(HEADER).bytesize
66
+ def read_message(io)
67
+ case header = io.read_nonblock(HEADER_SIZE, exception: false)
68
+ when :wait_readable
69
+ nil
70
+ when nil
71
+ @logger.debug('Broken pipe, exiting')
72
+ @shutdown = 0
73
+ false
74
+ else
75
+ JSON.parse(io.read(header.unpack1(HEADER)))
76
+ end
77
+ end
78
+
79
+ def process_messages(io)
80
+ while (message = read_message(io))
81
+ type, kwargs = message
82
+ kwargs.transform_keys!(&:to_sym)
83
+ public_send("process_#{type}", **kwargs)
84
+ end
85
+ end
86
+
87
+ def wait_for_events(ios)
88
+ return if @shutdown
89
+
90
+ return unless (ready = IO.select(ios, nil, nil, 10))
91
+
92
+ ready[0].each do |io|
93
+ case io
94
+ when @self_pipe_reader
95
+ io.read_nonblock(512, exception: false) # Just flush the pipe, the information is in the @queue
96
+ when @pipe
97
+ process_messages(@pipe)
98
+ else
99
+ @logger.debug("Unknown reader: #{io.inspect}")
100
+ raise "Unknown reader: #{io.inspect}"
101
+ end
102
+ end
103
+ end
104
+
105
+ def monitor
106
+ @logger.debug("Starting monitor")
107
+ ios = [@self_pipe_reader, @pipe]
108
+
109
+ until @shutdown
110
+ while (sig = @queue.shift)
111
+ case sig
112
+ when :INT, :TERM
113
+ @logger.debug("Received #{sig}, exiting")
114
+ @shutdown = 0
115
+ break
116
+ else
117
+ raise "Unknown signal: #{sig.inspect}"
118
+ end
119
+ end
120
+
121
+ wait_for_events(ios)
122
+ end
123
+
124
+ @logger.debug('Done')
125
+ @shutdown
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ logger = Logger.new($stderr)
133
+ if ARGV.include?('-v')
134
+ logger.level = Logger::DEBUG
135
+ else
136
+ logger.level = Logger::INFO
137
+ logger.formatter = ->(_severity, _timestamp, _progname, msg) { "[CI Queue Monitor] #{msg}\n" }
138
+ end
139
+
140
+ redis_url = ARGV[0]
141
+ zset_key = ARGV[1]
142
+ processed_key = ARGV[2]
143
+ owners_key = ARGV[3]
144
+ worker_queue_key = ARGV[4]
145
+
146
+ logger.debug("Starting monitor: #{redis_url} #{zset_key} #{processed_key}")
147
+ manager = CI::Queue::Redis::Monitor.new($stdin, logger, redis_url, zset_key, processed_key, owners_key, worker_queue_key)
148
+
149
+ # Notify the parent we're ready
150
+ $stdout.puts(".")
151
+ $stdout.close
152
+
153
+ exit(manager.monitor)
@@ -144,10 +144,6 @@ module CI
144
144
  config.worker_id
145
145
  end
146
146
 
147
- def timeout
148
- config.timeout
149
- end
150
-
151
147
  def raise_on_mismatching_test(test)
152
148
  if @reserved_test == test
153
149
  @reserved_test = nil
@@ -180,6 +176,8 @@ module CI
180
176
  end
181
177
 
182
178
  def try_to_reserve_lost_test
179
+ timeout = config.max_missed_heartbeat_seconds ? config.max_missed_heartbeat_seconds : config.timeout
180
+
183
181
  lost_test = eval_script(
184
182
  :reserve_lost,
185
183
  keys: [
@@ -192,7 +190,7 @@ module CI
192
190
  )
193
191
 
194
192
  if lost_test
195
- build.record_warning(Warnings::RESERVED_LOST_TEST, test: lost_test, timeout: timeout)
193
+ build.record_warning(Warnings::RESERVED_LOST_TEST, test: lost_test, timeout: config.timeout)
196
194
  end
197
195
 
198
196
  lost_test
@@ -48,6 +48,16 @@ module CI
48
48
  self
49
49
  end
50
50
 
51
+ def with_heartbeat(id)
52
+ yield
53
+ end
54
+
55
+ def ensure_heartbeat_thread_alive!; end
56
+
57
+ def boot_heartbeat_process!; end
58
+
59
+ def stop_heartbeat!; end
60
+
51
61
  def created_at=(timestamp)
52
62
  @created_at ||= timestamp
53
63
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module CI
4
4
  module Queue
5
- VERSION = '0.42.0'
5
+ VERSION = '0.43.0'
6
6
  DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../redis', __FILE__)
7
7
  RELEASE_SCRIPTS_ROOT = ::File.expand_path('../redis', __FILE__)
8
8
  end
data/lib/ci/queue.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'uri'
4
4
  require 'cgi'
5
+ require 'json'
5
6
 
6
7
  require 'ci/queue/version'
7
8
  require 'ci/queue/output_helpers'
@@ -64,6 +64,7 @@ module Minitest
64
64
  end
65
65
 
66
66
  queue.rescue_connection_errors { queue.created_at = CI::Queue.time_now.to_f }
67
+ queue.boot_heartbeat_process!
67
68
 
68
69
  set_load_path
69
70
  Minitest.queue = queue
@@ -582,6 +583,16 @@ module Minitest
582
583
  queue.config.redis_ttl = time
583
584
  end
584
585
 
586
+ help = <<~EOS
587
+ If heartbeat is enabled, a background process will periodically signal it's still processing
588
+ the current test. If the heartbeat stops for the specified amount of seconds,
589
+ the test will be requeued to another worker.
590
+ EOS
591
+ opts.on("--heartbeat [SECONDS]", Integer, help) do |time|
592
+ queue_config.max_missed_heartbeat_seconds = time || 30
593
+ end
594
+
595
+
585
596
  opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do
586
597
  self.verbose = true
587
598
  end
@@ -226,7 +226,10 @@ module Minitest
226
226
 
227
227
  def run_from_queue(reporter, *)
228
228
  queue.poll do |example|
229
- result = example.run
229
+ result = queue.with_heartbeat(example.id) do
230
+ example.run
231
+ end
232
+
230
233
  failed = !(result.passed? || result.skipped?)
231
234
 
232
235
  if example.flaky?
@@ -256,6 +259,7 @@ module Minitest
256
259
  reporter.record(result)
257
260
  end
258
261
  end
262
+ queue.stop_heartbeat!
259
263
  end
260
264
  end
261
265
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ci-queue
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.42.0
4
+ version: 0.43.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-15 00:00:00.000000000 Z
11
+ date: 2024-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -188,6 +188,8 @@ files:
188
188
  - lib/ci/queue/redis/grind.rb
189
189
  - lib/ci/queue/redis/grind_record.rb
190
190
  - lib/ci/queue/redis/grind_supervisor.rb
191
+ - lib/ci/queue/redis/heartbeat.lua
192
+ - lib/ci/queue/redis/monitor.rb
191
193
  - lib/ci/queue/redis/release.lua
192
194
  - lib/ci/queue/redis/requeue.lua
193
195
  - lib/ci/queue/redis/reserve.lua
@@ -239,7 +241,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
239
241
  - !ruby/object:Gem::Version
240
242
  version: '0'
241
243
  requirements: []
242
- rubygems_version: 3.5.4
244
+ rubygems_version: 3.5.5
243
245
  signing_key:
244
246
  specification_version: 4
245
247
  summary: Distribute tests over many workers using a queue