ci-queue 0.66.0 → 0.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b78efa3839e0fdc3a3998ef7d664bab01f28d0cdde96c28b790f9c912964d33a
4
- data.tar.gz: 4bc014eb8401ce329be509dc76f449a4d80696667563e3236c5f9634514b7df0
3
+ metadata.gz: dfe678bd81982a8945fff3610d86021f2b32ab96f19cd00bbda28c7837374f0c
4
+ data.tar.gz: f71d4a43aa80012a863f3ec0c6fccebd3542dd1c92e4cc386ffe1bda80836254
5
5
  SHA512:
6
- metadata.gz: be5f84376a0a6bc93776eaafa4a725e48061e18ea13d79c407cb599b665201031fc189ae91cd89607f3c2fa062eb388caaa554411405cc7adf6b073610ee078f
7
- data.tar.gz: 47057ca6d1b2d35eb0068be06330802a0ce614744f8498d069ef9bc1ffdd499a8a38861a3bfa7aa62e2b10becd0be5c99306026938c7ca704ede42396109367c
6
+ metadata.gz: 7af733cedec42252b31b2851cdea9067d5dd3974e9549595c492edbacb366125a4655c0c3d7dccb817eba6ab4b8472e0f35c98bbc4bfeb86f6033f5ffd42d93b
7
+ data.tar.gz: 2ca6d5bf9b6b2d59c3907656afb536c84fb360eee42aee63c3e48136a7b880ef3c16584b50a7bcb0877917d0a62f1dc6f3a13d31277f1c6f7ec20718efd8c5a6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ci-queue (0.66.0)
4
+ ci-queue (0.68.0)
5
5
  logger
6
6
 
7
7
  GEM
@@ -2,9 +2,18 @@
2
2
  local zset_key = KEYS[1]
3
3
  local processed_key = KEYS[2]
4
4
  local owners_key = KEYS[3]
5
+ local error_reports_key = KEYS[4]
5
6
 
6
7
  local test = ARGV[1]
7
-
8
+ local error = ARGV[2]
9
+ local ttl = ARGV[3]
8
10
  redis.call('zrem', zset_key, test)
9
11
  redis.call('hdel', owners_key, test) -- Doesn't matter if it was reclaimed by another workers
10
- return redis.call('sadd', processed_key, test)
12
+ local acknowledged = redis.call('sadd', processed_key, test)
13
+
14
+ if acknowledged and error ~= "" then
15
+ redis.call('hset', error_reports_key, test, error)
16
+ redis.call('expire', error_reports_key, ttl)
17
+ end
18
+
19
+ return acknowledged
@@ -181,8 +181,8 @@ module CI
181
181
  master_status == 'setup'
182
182
  end
183
183
 
184
- def increment_test_failed
185
- redis.incr(key('test_failed_count'))
184
+ def increment_test_failed(pipeline: redis)
185
+ pipeline.incr(key('test_failed_count'))
186
186
  end
187
187
 
188
188
  def test_failed
@@ -225,8 +225,8 @@ module CI
225
225
  redis.get(key('master-status'))
226
226
  end
227
227
 
228
- def eval_script(script, *args)
229
- redis.evalsha(load_script(script), *args)
228
+ def eval_script(script, keys:, argv:, pipeline: redis)
229
+ pipeline.evalsha(load_script(script), keys: keys, argv: argv)
230
230
  end
231
231
 
232
232
  def load_script(script)
@@ -56,23 +56,22 @@ module CI
56
56
  redis.rpush(key('warnings'), Marshal.dump([type, attributes]))
57
57
  end
58
58
 
59
+ Test = Struct.new(:id) # Hack
60
+
59
61
  def record_error(id, payload, stats: nil)
60
62
  redis.pipelined do |pipeline|
61
- pipeline.hset(
62
- key('error-reports'),
63
- id.dup.force_encoding(Encoding::BINARY),
64
- payload.dup.force_encoding(Encoding::BINARY),
65
- )
66
- pipeline.expire(key('error-reports'), config.redis_ttl)
63
+ @queue.acknowledge(id, error: payload, pipeline: pipeline)
67
64
  record_stats(stats, pipeline: pipeline)
65
+ @queue.increment_test_failed(pipeline: pipeline)
68
66
  end
69
67
  nil
70
68
  end
71
69
 
72
- def record_success(id, stats: nil, skip_flaky_record: false)
70
+ def record_success(id, stats: nil, skip_flaky_record: false, acknowledge: true)
71
+ @queue.acknowledge(id) if acknowledge
73
72
  error_reports_deleted_count, requeued_count, _ = redis.pipelined do |pipeline|
74
- pipeline.hdel(key('error-reports'), id.dup.force_encoding(Encoding::BINARY))
75
- pipeline.hget(key('requeues-count'), id.b)
73
+ pipeline.hdel(key('error-reports'), id)
74
+ pipeline.hget(key('requeues-count'), id)
76
75
  record_stats(stats, pipeline: pipeline)
77
76
  end
78
77
  record_flaky(id) if !skip_flaky_record && (error_reports_deleted_count.to_i > 0 || requeued_count.to_i > 0)
@@ -14,7 +14,7 @@ module CI
14
14
  redis.pipelined do |pipeline|
15
15
  pipeline.lpush(
16
16
  key('error-reports'),
17
- payload.force_encoding(Encoding::BINARY),
17
+ payload,
18
18
  )
19
19
  pipeline.expire(key('error-reports'), config.redis_ttl)
20
20
  record_stats(stats, pipeline: pipeline)
@@ -22,7 +22,7 @@ module CI
22
22
  redis.pipelined do |pipeline|
23
23
  pipeline.lpush(
24
24
  test_time_key(test_name),
25
- duration.to_s.force_encoding(Encoding::BINARY),
25
+ duration.to_s,
26
26
  )
27
27
  pipeline.expire(test_time_key(test_name), config.redis_ttl)
28
28
  end
@@ -33,7 +33,7 @@ module CI
33
33
  redis.pipelined do |pipeline|
34
34
  pipeline.lpush(
35
35
  all_test_names_key,
36
- test_name.dup.force_encoding(Encoding::BINARY),
36
+ test_name,
37
37
  )
38
38
  pipeline.expire(all_test_names_key, config.redis_ttl)
39
39
  end
@@ -53,11 +53,11 @@ module CI
53
53
  end
54
54
 
55
55
  def all_test_names_key
56
- "build:#{config.build_id}:list_of_test_names".dup.force_encoding(Encoding::BINARY)
56
+ "build:#{config.build_id}:list_of_test_names"
57
57
  end
58
58
 
59
59
  def test_time_key(test_name)
60
- "build:#{config.build_id}:#{test_name}".dup.force_encoding(Encoding::BINARY)
60
+ "build:#{config.build_id}:#{test_name}"
61
61
  end
62
62
  end
63
63
  end
@@ -7,14 +7,16 @@ module CI
7
7
  module Redis
8
8
  class << self
9
9
  attr_accessor :requeue_offset
10
+ attr_accessor :max_sleep_time
10
11
  end
11
12
  self.requeue_offset = 42
13
+ self.max_sleep_time = 2
12
14
 
13
15
  class Worker < Base
14
16
  attr_reader :total
15
17
 
16
18
  def initialize(redis, config)
17
- @reserved_test = nil
19
+ @reserved_tests = Set.new
18
20
  @shutdown_required = false
19
21
  super(redis, config)
20
22
  end
@@ -46,13 +48,21 @@ module CI
46
48
  @master
47
49
  end
48
50
 
51
+ DEFAULT_SLEEP_SECONDS = 0.5
52
+
49
53
  def poll
50
54
  wait_for_master
55
+ attempt = 0
51
56
  until shutdown_required? || config.circuit_breakers.any?(&:open?) || exhausted? || max_test_failed?
52
57
  if test = reserve
58
+ attempt = 0
53
59
  yield index.fetch(test)
54
60
  else
55
- sleep 0.05
61
+ # Adding exponential backoff to avoid hammering Redis
62
+ # we just stay online here in case a test gets retried or times out so we can afford to wait
63
+ sleep_time = [DEFAULT_SLEEP_SECONDS * (2 ** attempt), Redis.max_sleep_time].min
64
+ attempt += 1
65
+ sleep sleep_time
56
66
  end
57
67
  end
58
68
  redis.pipelined do |pipeline|
@@ -97,13 +107,13 @@ module CI
97
107
  build.report_worker_error(error)
98
108
  end
99
109
 
100
- def acknowledge(test)
101
- test_key = test.id
110
+ def acknowledge(test_key, error: nil, pipeline: redis)
102
111
  raise_on_mismatching_test(test_key)
103
112
  eval_script(
104
113
  :acknowledge,
105
- keys: [key('running'), key('processed'), key('owners')],
106
- argv: [test_key],
114
+ keys: [key('running'), key('processed'), key('owners'), key('error-reports')],
115
+ argv: [test_key, error.to_s, config.redis_ttl],
116
+ pipeline: pipeline,
107
117
  ) == 1
108
118
  end
109
119
 
@@ -125,7 +135,7 @@ module CI
125
135
  argv: [config.max_requeues, global_max_requeues, test_key, offset],
126
136
  ) == 1
127
137
 
128
- @reserved_test = test_key unless requeued
138
+ reserved_tests << test_key unless requeued
129
139
  requeued
130
140
  end
131
141
 
@@ -142,25 +152,24 @@ module CI
142
152
 
143
153
  attr_reader :index
144
154
 
155
+ def reserved_tests
156
+ @reserved_tests ||= Set.new
157
+ end
158
+
145
159
  def worker_id
146
160
  config.worker_id
147
161
  end
148
162
 
149
163
  def raise_on_mismatching_test(test)
150
- if @reserved_test == test
151
- @reserved_test = nil
152
- else
153
- raise ReservationError, "Acknowledged #{test.inspect} but #{@reserved_test.inspect} was reserved"
164
+ unless reserved_tests.delete?(test)
165
+ raise ReservationError, "Acknowledged #{test.inspect} but only #{reserved_tests.map(&:inspect).join(", ")} reserved"
154
166
  end
155
167
  end
156
168
 
157
169
  def reserve
158
- if @reserved_test
159
- raise ReservationError, "#{@reserved_test.inspect} is already reserved. " \
160
- "You have to acknowledge it before you can reserve another one"
161
- end
162
-
163
- @reserved_test = (try_to_reserve_lost_test || try_to_reserve_test)
170
+ test = (try_to_reserve_lost_test || try_to_reserve_test)
171
+ reserved_tests << test
172
+ test
164
173
  end
165
174
 
166
175
  def try_to_reserve_test
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ gem "redis", "~> 5.0"
3
4
  require 'redis'
4
5
  require 'ci/queue/redis/build_record'
5
6
  require 'ci/queue/redis/base'
@@ -89,26 +89,27 @@ module CI
89
89
  end
90
90
 
91
91
  def running
92
- @reserved_test ? 1 : 0
92
+ reserved_tests.empty? ? 0 : 1
93
93
  end
94
94
 
95
95
  def poll
96
- while !@shutdown && config.circuit_breakers.none?(&:open?) && !max_test_failed? && @reserved_test = @queue.shift
97
- yield index.fetch(@reserved_test)
96
+ while !@shutdown && config.circuit_breakers.none?(&:open?) && !max_test_failed? && reserved_test = @queue.shift
97
+ reserved_tests << reserved_test
98
+ yield index.fetch(reserved_test)
98
99
  end
99
- @reserved_test = nil
100
+ reserved_tests.clear
100
101
  end
101
102
 
102
103
  def exhausted?
103
104
  @queue.empty?
104
105
  end
105
106
 
106
- def acknowledge(test)
107
+ def acknowledge(...)
107
108
  @progress += 1
108
109
  true
109
110
  end
110
111
 
111
- def increment_test_failed
112
+ def increment_test_failed(...)
112
113
  @test_failed = test_failed + 1
113
114
  end
114
115
 
@@ -142,6 +143,10 @@ module CI
142
143
  def requeues
143
144
  @requeues ||= Hash.new(0)
144
145
  end
146
+
147
+ def reserved_tests
148
+ @reserved_tests ||= Set.new
149
+ end
145
150
  end
146
151
  end
147
152
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module CI
4
4
  module Queue
5
- VERSION = '0.66.0'
5
+ VERSION = '0.68.0'
6
6
  DEV_SCRIPTS_ROOT = ::File.expand_path('../../../../../redis', __FILE__)
7
7
  RELEASE_SCRIPTS_ROOT = ::File.expand_path('../redis', __FILE__)
8
8
  end
@@ -52,7 +52,7 @@ module Minitest
52
52
  if (test.failure || test.error?) && !test.skipped?
53
53
  build.record_error("#{test.klass}##{test.name}", dump(test), stats: stats)
54
54
  else
55
- build.record_success("#{test.klass}##{test.name}", stats: stats, skip_flaky_record: test.skipped?)
55
+ build.record_success("#{test.klass}##{test.name}", stats: stats, skip_flaky_record: test.skipped?, acknowledge: !test.requeued?)
56
56
  end
57
57
  end
58
58
 
@@ -108,7 +108,16 @@ module Minitest
108
108
  build.requeued_tests
109
109
  end
110
110
 
111
+ APPLICATION_ERROR_EXIT_CODE = 42
112
+ TIMED_OUT_EXIT_CODE = 43
113
+ TOO_MANY_FAILED_TESTS_EXIT_CODE = 44
114
+ WORKERS_DIED_EXIT_CODE = 45
115
+ SUCCESS_EXIT_CODE = 0
116
+ TEST_FAILURE_EXIT_CODE = 1
117
+
111
118
  def report
119
+ exit_code = TEST_FAILURE_EXIT_CODE
120
+
112
121
  if requeued_tests.to_a.any?
113
122
  step("Requeued #{requeued_tests.size} tests")
114
123
  requeued_tests.to_a.sort.each do |test_id, count|
@@ -131,10 +140,14 @@ module Minitest
131
140
  if remaining_tests.size > 10
132
141
  puts " ..."
133
142
  end
143
+
144
+ exit_code = TIMED_OUT_EXIT_CODE
134
145
  elsif supervisor.time_left_with_no_workers.to_i <= 0
135
146
  puts red("All workers died.")
147
+ exit_code = WORKERS_DIED_EXIT_CODE
136
148
  elsif supervisor.max_test_failed?
137
149
  puts red("Encountered too many failed tests. Test run was ended early.")
150
+ exit_code = TOO_MANY_FAILED_TESTS_EXIT_CODE
138
151
  end
139
152
 
140
153
  puts
@@ -146,9 +159,10 @@ module Minitest
146
159
  puts red("Worker #{worker_id } crashed")
147
160
  puts error
148
161
  puts ""
162
+ exit_code = APPLICATION_ERROR_EXIT_CODE
149
163
  end
150
164
 
151
- success?
165
+ success? ? SUCCESS_EXIT_CODE : exit_code
152
166
  end
153
167
 
154
168
  def success?
@@ -253,25 +253,24 @@ module Minitest
253
253
 
254
254
  unless supervisor.wait_for_workers { display_warnings(supervisor.build) }
255
255
  unless supervisor.queue_initialized?
256
- abort! "No master was elected. Did all workers crash?", 40
256
+ abort! "No leader was elected. This typically means no worker was able to start. Were there any errors during application boot?", 40
257
257
  end
258
258
 
259
259
  unless supervisor.exhausted?
260
260
  reporter = BuildStatusReporter.new(supervisor: supervisor)
261
- reporter.report
261
+ exit_code = reporter.report
262
262
  reporter.write_failure_file(queue_config.failure_file) if queue_config.failure_file
263
263
  reporter.write_flaky_tests_file(queue_config.export_flaky_tests_file) if queue_config.export_flaky_tests_file
264
264
 
265
- abort!("#{supervisor.size} tests weren't run.")
265
+ abort!("#{supervisor.size} tests weren't run.", exit_code)
266
266
  end
267
267
  end
268
268
 
269
269
  reporter = BuildStatusReporter.new(supervisor: supervisor)
270
270
  reporter.write_failure_file(queue_config.failure_file) if queue_config.failure_file
271
271
  reporter.write_flaky_tests_file(queue_config.export_flaky_tests_file) if queue_config.export_flaky_tests_file
272
- reporter.report
273
-
274
- exit! reporter.success? ? 0 : 1
272
+ exit_code = reporter.report
273
+ exit! exit_code
275
274
  end
276
275
 
277
276
  def report_grind_command
@@ -107,7 +107,7 @@ module Minitest
107
107
  end
108
108
 
109
109
  module Queue
110
- include ::CI::Queue::OutputHelpers
110
+ extend ::CI::Queue::OutputHelpers
111
111
  attr_writer :run_command_formatter, :project_root
112
112
 
113
113
  def run_command_formatter
@@ -149,7 +149,79 @@ module Minitest
149
149
  path
150
150
  end
151
151
 
152
+ class << self
153
+ def queue
154
+ Minitest.queue
155
+ end
156
+
157
+ def run(reporter, *)
158
+ rescue_run_errors do
159
+ queue.poll do |example|
160
+ result = queue.with_heartbeat(example.id) do
161
+ example.run
162
+ end
163
+
164
+ handle_test_result(reporter, example, result)
165
+ end
166
+
167
+ queue.stop_heartbeat!
168
+ end
169
+ end
170
+
171
+ def handle_test_result(reporter, example, result)
172
+ failed = !(result.passed? || result.skipped?)
173
+
174
+ if example.flaky?
175
+ result.mark_as_flaked!
176
+ failed = false
177
+ end
178
+
179
+ if failed && queue.config.failing_test && queue.config.failing_test != example.id
180
+ # When we do a bisect, we don't care about the result other than the test we're running the bisect on
181
+ result.mark_as_flaked!
182
+ failed = false
183
+ elsif failed
184
+ queue.report_failure!
185
+ else
186
+ queue.report_success!
187
+ end
188
+
189
+ if failed && CI::Queue.requeueable?(result) && queue.requeue(example)
190
+ result.requeue!
191
+ end
192
+ reporter.record(result)
193
+ end
194
+
195
+ private
196
+
197
+ def rescue_run_errors(&block)
198
+ block.call
199
+ rescue Errno::EPIPE
200
+ # This happens when the heartbeat process dies
201
+ reopen_previous_step
202
+ puts red("The heartbeat process died. This worker is exiting early.")
203
+ exit!(41)
204
+ rescue CI::Queue::Error => error
205
+ reopen_previous_step
206
+ puts red("#{error.class}: #{error.message}")
207
+ error.backtrace.each do |frame|
208
+ puts red(frame)
209
+ end
210
+ exit!(41)
211
+ rescue => error
212
+ reopen_previous_step
213
+ Minitest.queue.report_worker_error(error)
214
+ puts red("This worker exited because of an uncaught application error:")
215
+ puts red("#{error.class}: #{error.message}")
216
+ error.backtrace.each do |frame|
217
+ puts red(frame)
218
+ end
219
+ exit!(42)
220
+ end
221
+ end
222
+
152
223
  class SingleExample
224
+ attr_reader :runnable, :method_name
153
225
 
154
226
  def initialize(runnable, method_name)
155
227
  @runnable = runnable
@@ -211,7 +283,7 @@ module Minitest
211
283
 
212
284
  def __run(*args)
213
285
  if queue
214
- run_from_queue(*args)
286
+ Queue.run(*args)
215
287
 
216
288
  if queue.config.circuit_breakers.any?(&:open?)
217
289
  STDERR.puts queue.config.circuit_breakers.map(&:message).join(' ').strip
@@ -224,65 +296,6 @@ module Minitest
224
296
  super
225
297
  end
226
298
  end
227
-
228
- def run_from_queue(reporter, *)
229
- queue.poll do |example|
230
- result = queue.with_heartbeat(example.id) do
231
- example.run
232
- end
233
-
234
- failed = !(result.passed? || result.skipped?)
235
-
236
- if example.flaky?
237
- result.mark_as_flaked!
238
- failed = false
239
- end
240
-
241
- if failed && queue.config.failing_test && queue.config.failing_test != example.id
242
- # When we do a bisect, we don't care about the result other than the test we're running the bisect on
243
- result.mark_as_flaked!
244
- failed = false
245
- elsif failed
246
- queue.report_failure!
247
- else
248
- queue.report_success!
249
- end
250
-
251
- if failed && CI::Queue.requeueable?(result) && queue.requeue(example)
252
- result.requeue!
253
- reporter.record(result)
254
- elsif queue.acknowledge(example)
255
- reporter.record(result)
256
- queue.increment_test_failed if failed
257
- elsif !failed
258
- # If the test was already acknowledged by another worker (we timed out)
259
- # Then we only record it if it is successful.
260
- reporter.record(result)
261
- end
262
- end
263
- queue.stop_heartbeat!
264
- rescue Errno::EPIPE
265
- # This happens when the heartbeat process dies
266
- reopen_previous_step
267
- puts red("The heartbeat process died. This worker is exiting early.")
268
- exit!(41)
269
- rescue CI::Queue::Error => error
270
- reopen_previous_step
271
- puts red("#{error.class}: #{error.message}")
272
- error.backtrace.each do |frame|
273
- puts red(frame)
274
- end
275
- exit!(41)
276
- rescue => error
277
- reopen_previous_step
278
- queue.report_worker_error(error)
279
- puts red("This worker exited because of an uncaught application error:")
280
- puts red("#{error.class}: #{error.message}")
281
- error.backtrace.each do |frame|
282
- puts red(frame)
283
- end
284
- exit!(42)
285
- end
286
299
  end
287
300
  end
288
301
 
data/lib/rspec/queue.rb CHANGED
@@ -224,13 +224,8 @@ module RSpec
224
224
  reporter.cancel_run!
225
225
  dup.mark_as_requeued!(reporter)
226
226
  return true
227
- elsif reporter.acknowledge || !@exception
228
- # If the test was already acknowledged by another worker (we timed out)
229
- # Then we only record it if it is successful.
230
- super(reporter)
231
227
  else
232
- reporter.cancel_run!
233
- return
228
+ super(reporter)
234
229
  end
235
230
  else
236
231
  super(reporter)
@@ -288,7 +283,7 @@ module RSpec
288
283
 
289
284
  unless supervisor.wait_for_workers
290
285
  unless supervisor.queue_initialized?
291
- abort! "No master was elected. Did all workers crash?"
286
+ abort! "No leader was elected. This typically means no worker was able to start. Were there any errors during application boot?"
292
287
  end
293
288
 
294
289
  unless supervisor.exhausted?
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ci-queue
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.66.0
4
+ version: 0.68.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
@@ -254,7 +254,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
254
254
  - !ruby/object:Gem::Version
255
255
  version: '0'
256
256
  requirements: []
257
- rubygems_version: 3.6.8
257
+ rubygems_version: 3.6.9
258
258
  specification_version: 4
259
259
  summary: Distribute tests over many workers using a queue
260
260
  test_files: []