megatest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,459 @@
1
+ # frozen_string_literal: true
2
+
3
+ gem "redis-client", ">= 0.22"
4
+ require "redis-client"
5
+ require "rbconfig"
6
+
7
+ # :stopdoc:
8
+
9
+ module Megatest
10
+ # Data structures
11
+ #
12
+ # Note: All keys are prefixed by `build:<@build_id>:`
13
+ #
14
+ # - "leader-status": String, either `setup` or `ready`
15
+ #
16
+ # - "queue": List, contains the test ids that haven't yet been poped.
17
+ #
18
+ # - "running": SortedSet, members are the test ids currently being processed.
19
+ # Scores are the lease expiration timestamp. If the score is lower than
20
+ # current time, the test was lost and should be re-assigned.
21
+ #
22
+ # - "processed": Set, members are the ids of test that were fully processed.
23
+ #
24
+ # - "owners": Hash, contains a mapping of currently being processed tests and the worker they are assigned to.
25
+ # Keys are test ids, values are "worker:<@worker_id>:queue".
26
+ #
27
+ # - "worker:<@worker_id>:running": Set, tests ids currently held by a worker.
28
+ #
29
+ # - "worker:<@worker_id>:failures": List, all the ids of failed tests processed by a worker.
30
+ # Used as the base for a new queue when retrying a job. May contain duplicates.
31
+ #
32
+ # - "results": List, inside are serialized TestCaseResult instances. Append only.
33
+ #
34
+ # - "requeues-count": Hash, keys are test ids, values are the number of time that particular test
35
+ # was retried. There is also the special "___total___" key.
36
+ class RedisQueue < AbstractQueue
37
+ class ExternalHeartbeatMonitor
38
+ def initialize(queue)
39
+ @queue = queue
40
+ end
41
+ end
42
+
43
+ class << self
44
+ def build(config)
45
+ queue = new(config)
46
+ if queue.retrying?
47
+ queue = RetryQueue.build(config, queue)
48
+ end
49
+ queue
50
+ end
51
+ end
52
+
53
+ attr_reader :summary
54
+
55
+ def initialize(config, ttl: 24 * 60 * 60)
56
+ super(config)
57
+
58
+ @summary = Queue::Summary.new
59
+ @redis = RedisClient.new(
60
+ url: config.queue_url,
61
+ # We retry quite aggressively in case the network
62
+ # is spotty, we'd rather wait a bit than to crash
63
+ # a worker.
64
+ reconnect_attempts: [0, 0, 0.1, 0.5, 1, 3, 5],
65
+ )
66
+ @ttl = ttl
67
+ @load_timeout = 30 # TODO: configurable
68
+ @worker_id = config.worker_id
69
+ @build_id = config.build_id
70
+ @success = true
71
+ @leader = nil
72
+ @script_cache = {}
73
+ @leader = nil
74
+ end
75
+
76
+ def retrying?
77
+ @worker_id && !@redis.call("llen", key("worker", worker_id, "failures")).zero?
78
+ end
79
+
80
+ def failed_test_ids
81
+ test_ids = @redis.call("lrange", key("worker", worker_id, "failures"), 0, -1)&.uniq
82
+ test_ids.reverse!
83
+ test_ids
84
+ end
85
+
86
+ def cleanup
87
+ if @success
88
+ if @worker_id
89
+ @redis.call(
90
+ "del",
91
+ key("worker", worker_id, "running"),
92
+ key("worker", worker_id, "failures"),
93
+ )
94
+ else
95
+ @redis.call(
96
+ "del",
97
+ key("leader-status"),
98
+ key("queue"),
99
+ key("running"),
100
+ key("processed"),
101
+ key("owners"),
102
+ key("results"),
103
+ key("requeue-counts"),
104
+ )
105
+ end
106
+ end
107
+ rescue RedisClient::ConnectionError
108
+ false # Cleanup is best effort
109
+ end
110
+
111
+ HEARTBEAT = <<~'LUA'
112
+ local running_key = KEYS[1]
113
+ local processed_key = KEYS[2]
114
+ local owners_key = KEYS[3]
115
+ local worker_running_key = KEYS[4]
116
+
117
+ local worker_id = ARGV[1]
118
+ local current_time = ARGV[2]
119
+
120
+ local count = 0
121
+
122
+ local tests = redis.call('smembers', worker_running_key)
123
+ for index = 1, #tests do
124
+ local test = tests[index]
125
+
126
+ -- # already processed, we do not need to bump the timestamp
127
+ if redis.call('sismember', processed_key, test) == 0 then
128
+ -- # we're still the owner of the test, we can bump the timestamp
129
+ local owner_id = redis.call('hget', owners_key, test)
130
+ if owner_id == worker_id then
131
+ redis.call('zadd', running_key, current_time, test)
132
+ count = count + 1
133
+ end
134
+ end
135
+ end
136
+
137
+ return count
138
+ LUA
139
+
140
+ def heartbeat
141
+ eval_script(
142
+ HEARTBEAT,
143
+ keys: [
144
+ key("running"),
145
+ key("processed"),
146
+ key("owners"),
147
+ key("worker", worker_id, "running"),
148
+ ],
149
+ argv: [
150
+ worker_id,
151
+ Megatest.now,
152
+ ],
153
+ )
154
+ true
155
+ rescue RedisClient::ConnectionError
156
+ false # Heartbeat is best effort
157
+ end
158
+
159
+ def distributed?
160
+ true
161
+ end
162
+
163
+ def populated?
164
+ @redis.call("get", key("leader-status")) == "ready"
165
+ end
166
+
167
+ def leader?
168
+ @leader
169
+ end
170
+
171
+ def remaining_size
172
+ @redis.multi do |transaction|
173
+ transaction.call("llen", key("queue"))
174
+ transaction.call("zcard", key("running"))
175
+ end.inject(:+)
176
+ end
177
+
178
+ def empty?
179
+ remaining_size.zero?
180
+ end
181
+
182
+ RESERVE = <<~'LUA'
183
+ local queue_key = KEYS[1]
184
+ local running_key = KEYS[2]
185
+ local processed_key = KEYS[3]
186
+ local owners_key = KEYS[4]
187
+ local worker_running_key = KEYS[5]
188
+
189
+ local worker_id = ARGV[1]
190
+ local current_time = ARGV[2]
191
+ local timeout = ARGV[3]
192
+
193
+ -- # First we requeue all timed out tests
194
+ local lost_tests = redis.call('zrangebyscore', running_key, 0, current_time - timeout)
195
+ for _, test in ipairs(lost_tests) do
196
+ if redis.call('sismember', processed_key, test) == 0 then
197
+ local test = redis.call('rpush', queue_key, test)
198
+ end
199
+ end
200
+
201
+ local test = redis.call('rpop', queue_key)
202
+ if test then
203
+ redis.call('zadd', running_key, current_time, test)
204
+ redis.call('sadd', worker_running_key, test)
205
+ redis.call('hset', owners_key, test, worker_id)
206
+ return test
207
+ end
208
+
209
+ return nil
210
+ LUA
211
+
212
+ def reserve
213
+ load_script(RESERVE)
214
+ test_id, = eval_script(
215
+ RESERVE,
216
+ keys: [
217
+ key("queue"),
218
+ key("running"),
219
+ key("processed"),
220
+ key("owners"),
221
+ key("worker", worker_id, "running"),
222
+ ],
223
+ argv: [
224
+ worker_id,
225
+ Megatest.now,
226
+ @config.heartbeat_frequency * 2,
227
+ ],
228
+ )
229
+ test_id
230
+ end
231
+
232
+ def populate(test_cases)
233
+ super
234
+
235
+ leader_key_set, = @redis.pipelined do |pipeline|
236
+ pipeline.call("setnx", key("leader-status"), "setup")
237
+ pipeline.call("expire", key("leader-status"), @ttl)
238
+ end
239
+ @leader = leader_key_set == 1
240
+
241
+ if @leader
242
+ @redis.multi do |transaction|
243
+ transaction.call("lpush", key("queue"), test_cases.map(&:id)) unless test_cases.empty?
244
+ transaction.call("expire", key("queue"), @ttl)
245
+ transaction.call("set", key("leader-status"), "ready")
246
+ end
247
+ else
248
+ (@load_timeout * 10).times do
249
+ if populated?
250
+ break
251
+ else
252
+ sleep 0.1
253
+ end
254
+ end
255
+ end
256
+ end
257
+
258
+ def success?
259
+ @success
260
+ end
261
+
262
+ def pop_test
263
+ if test_id = reserve
264
+ test_cases_index.fetch(test_id)
265
+ end
266
+ end
267
+
268
+ ACKNOWLEDGE = <<~'LUA'
269
+ local running_key = KEYS[1]
270
+ local processed_key = KEYS[2]
271
+ local owners_key = KEYS[3]
272
+ local worker_running_key = KEYS[4]
273
+
274
+ local test = ARGV[1]
275
+
276
+ redis.call('zrem', running_key, test)
277
+ redis.call('srem', worker_running_key, test)
278
+ redis.call('hdel', owners_key, test) -- # Doesn't matter if it was reclaimed by another workers
279
+ return redis.call('sadd', processed_key, test)
280
+ LUA
281
+
282
+ def record_result(original_result)
283
+ result = original_result
284
+ if result.failed?
285
+ if attempt_to_retry(result)
286
+ result = result.retry
287
+ else
288
+ @success = false
289
+ end
290
+ end
291
+ @summary.record_result(result)
292
+
293
+ if result.retried?
294
+ @redis.pipelined do |pipeline|
295
+ pipeline.call("rpush", key("results"), result.dump)
296
+ pipeline.call("expire", key("results"), @ttl)
297
+ end
298
+ else
299
+ load_script(ACKNOWLEDGE)
300
+ @redis.pipelined do |pipeline|
301
+ eval_script(
302
+ ACKNOWLEDGE,
303
+ keys: [
304
+ key("running"),
305
+ key("processed"),
306
+ key("owners"),
307
+ key("worker", worker_id, "running"),
308
+ ],
309
+ argv: [result.test_id],
310
+ redis: pipeline,
311
+ )
312
+ if result.failed?
313
+ pipeline.call("rpush", key("worker", worker_id, "failures"), result.test_id)
314
+ pipeline.call("expire", key("worker", worker_id, "failures"), @ttl)
315
+ elsif result.success?
316
+ pipeline.call("lrem", key("worker", worker_id, "failures"), 0, result.test_id)
317
+ end
318
+ pipeline.call("rpush", key("results"), result.dump)
319
+ pipeline.call("expire", key("results"), @ttl)
320
+ end
321
+ end
322
+
323
+ result
324
+ end
325
+
326
+ def global_summary
327
+ if payloads = @redis.call("lrange", key("results"), 0, -1)
328
+ Queue::Summary.new(payloads.map { |p| TestCaseResult.load(p) })
329
+ else
330
+ Queue::Summary.new
331
+ end
332
+ end
333
+
334
+ private
335
+
336
+ REQUEUE = <<~'LUA'
337
+ local processed_key = KEYS[1]
338
+ local requeues_count_key = KEYS[2]
339
+ local queue_key = KEYS[3]
340
+ local running_key = KEYS[4]
341
+ local owners_key = KEYS[5]
342
+
343
+ local worker_id = ARGV[1]
344
+ local max_requeues = tonumber(ARGV[2])
345
+ local global_max_requeues = tonumber(ARGV[3])
346
+ local test = ARGV[4]
347
+ local index = ARGV[5]
348
+
349
+ if redis.call('hget', owners_key, test) == worker_id then
350
+ redis.call('hdel', owners_key, test)
351
+ end
352
+
353
+ if redis.call('sismember', processed_key, test) == 1 then
354
+ return false
355
+ end
356
+
357
+ local global_requeues = tonumber(redis.call('hget', requeues_count_key, '___total___'))
358
+ if global_requeues and global_requeues >= tonumber(global_max_requeues) then
359
+ return false
360
+ end
361
+
362
+ local requeues = tonumber(redis.call('hget', requeues_count_key, test))
363
+ if requeues and requeues >= max_requeues then
364
+ return false
365
+ end
366
+
367
+ redis.call('hincrby', requeues_count_key, '___total___', 1)
368
+ redis.call('hincrby', requeues_count_key, test, 1)
369
+
370
+ local pivot = redis.call('lrange', queue_key, -1 - index, 0 - index)[1]
371
+ if pivot then
372
+ redis.call('linsert', queue_key, 'BEFORE', pivot, test)
373
+ else
374
+ redis.call('lpush', queue_key, test)
375
+ end
376
+
377
+ redis.call('zrem', running_key, test)
378
+
379
+ return true
380
+ LUA
381
+
382
+ def attempt_to_retry(result)
383
+ return false unless @config.retries?
384
+
385
+ index = @config.random.rand(0..@redis.call("llen", key("queue")))
386
+ load_script(REQUEUE)
387
+ eval_script(
388
+ REQUEUE,
389
+ keys: [
390
+ key("processed"),
391
+ key("requeues-count"),
392
+ key("queue"),
393
+ key("running"),
394
+ key("owners"),
395
+ ],
396
+ argv: [
397
+ worker_id,
398
+ @config.max_retries,
399
+ @config.total_max_retries(@size),
400
+ result.test_id,
401
+ index,
402
+ ],
403
+ ) == 1
404
+ end
405
+
406
+ def eval_script(script, keys: [], argv: [], redis: @redis)
407
+ script_id = load_script(script)
408
+ result, = pipelined(redis) do |pipeline|
409
+ pipeline.call("evalsha", script_id, keys.size, keys, argv)
410
+ keys.each do |key|
411
+ pipeline.call("expire", key, @ttl)
412
+ end
413
+ end
414
+ result
415
+ end
416
+
417
+ def pipelined(redis, &block)
418
+ if redis.respond_to?(:pipelined)
419
+ redis.pipelined(&block)
420
+ else
421
+ yield redis
422
+ end
423
+ end
424
+
425
+ def load_script(script)
426
+ @scripts_cache ||= {}
427
+ @scripts_cache[script] ||= @redis.call("script", "load", script)
428
+ end
429
+
430
+ def key(*args)
431
+ ["build", @build_id, *args].join(":")
432
+ end
433
+
434
+ def worker_id
435
+ @worker_id or raise Error, "RedisQueue not configued with a worker id"
436
+ end
437
+
438
+ class RetryQueue < Queue
439
+ def initialize(config, global_queue)
440
+ super(config)
441
+ @global_queue = global_queue
442
+ end
443
+
444
+ def populate(test_cases)
445
+ super
446
+ failed_test_ids = @global_queue.failed_test_ids
447
+ @size = failed_test_ids.size
448
+ @queue = failed_test_ids.map { |id| @test_cases_index.fetch(id) }
449
+ end
450
+
451
+ def record_result(original_result)
452
+ result = super
453
+ if result.success?
454
+ @global_queue.record_result(result)
455
+ end
456
+ end
457
+ end
458
+ end
459
+ end
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: true
2
+
3
+ # :stopdoc:
4
+
5
+ module Megatest
6
+ module Reporters
7
+ class AbstractReporter
8
+ undef_method :puts, :print
9
+
10
+ def initialize(config, out)
11
+ @config = config
12
+ @out = Output.new(out, colors: config.colors)
13
+ end
14
+
15
+ def start(_executor, _queue)
16
+ end
17
+
18
+ def before_test_case(_queue, _test_case)
19
+ end
20
+
21
+ def after_test_case(_queue, _test_case, _result)
22
+ end
23
+
24
+ def summary(_executor, _queue, _summary)
25
+ end
26
+
27
+ private
28
+
29
+ LABELS = {
30
+ retried: "Retried",
31
+ error: "Error",
32
+ failure: "Failure",
33
+ skipped: "Skipped",
34
+ }.freeze
35
+
36
+ def render_failure(result, command: true)
37
+ str = "#{LABELS.fetch(result.status)}: #{result.test_id}\n"
38
+ str = if result.retried? || result.skipped?
39
+ @out.yellow(str)
40
+ else
41
+ @out.red(str)
42
+ end
43
+ str = +str
44
+ str << "\n"
45
+
46
+ if result.error?
47
+ str << @out.indent("#{result.failure.cause.name}: #{@out.colored(result.failure.cause.message)}\n")
48
+ elsif result.failed?
49
+ str << @out.indent(@out.colored(result.failure.message.to_s))
50
+ end
51
+ str << "\n" unless str.end_with?("\n")
52
+ str << "\n"
53
+
54
+ @config.backtrace.clean(result.failure.backtrace)&.each do |frame|
55
+ str << " #{@out.cyan(frame)}\n"
56
+ end
57
+
58
+ if command
59
+ str << "\n"
60
+ str << @out.yellow(run_command(result))
61
+ end
62
+
63
+ str
64
+ end
65
+
66
+ def run_command(result)
67
+ "#{@config.program_name} #{Megatest.relative_path(result.test_location)}"
68
+ end
69
+ end
70
+
71
+ class SimpleReporter < AbstractReporter
72
+ def start(_executor, queue)
73
+ @out.puts("Running #{queue.size} test cases with --seed #{@config.seed}")
74
+ @out.puts
75
+ end
76
+
77
+ def after_test_case(_queue, _test_case, result)
78
+ if result.skipped?
79
+ @out.print(@out.yellow("S"))
80
+ elsif result.retried?
81
+ @out.print(@out.yellow("R"))
82
+ elsif result.error?
83
+ @out.print(@out.red("E"))
84
+ elsif result.failed?
85
+ @out.print(@out.red("F"))
86
+ else
87
+ @out.print(@out.green("."))
88
+ end
89
+ end
90
+
91
+ def summary(executor, _queue, summary)
92
+ @out.puts
93
+ @out.puts
94
+
95
+ failures = summary.failures.reject(&:skipped?)
96
+ unless failures.empty?
97
+ failures = failures.sort_by(&:test_id)
98
+ failures.each_with_index do |result, index|
99
+ @out.print " #{index + 1}) "
100
+ @out.puts render_failure(result)
101
+ @out.puts
102
+ end
103
+ end
104
+
105
+ if (wall_time = executor.wall_time.to_f) > 0.0
106
+ @out.puts format(
107
+ "Finished in %.2fs, %d cases/s, %d assertions/s, %.2fs tests runtime.",
108
+ wall_time,
109
+ (summary.runs_count / wall_time).to_i,
110
+ (summary.assertions_count / wall_time).to_i,
111
+ summary.total_time,
112
+ )
113
+ end
114
+
115
+ @out.puts format(
116
+ "Ran %d cases, %d assertions, %d failures, %d errors, %d retries, %d skips",
117
+ summary.runs_count,
118
+ summary.assertions_count,
119
+ summary.failures_count,
120
+ summary.errors_count,
121
+ summary.retries_count,
122
+ summary.skips_count,
123
+ )
124
+ end
125
+ end
126
+
127
+ class VerboseReporter < SimpleReporter
128
+ def before_test_case(_queue, test_case)
129
+ @out.print("#{test_case.id} = ")
130
+ end
131
+
132
+ def after_test_case(_queue, _test_case, result)
133
+ super
134
+ @out.puts
135
+ if result.bad?
136
+ @out.puts @out.colored(render_failure(result))
137
+ end
138
+ end
139
+ end
140
+
141
+ class OrderReporter < AbstractReporter
142
+ def before_test_case(_queue, test_case)
143
+ @out.puts(test_case.id)
144
+ end
145
+ end
146
+
147
+ class JUnitReporter < AbstractReporter
148
+ def summary(executor, _queue, summary)
149
+ @depth = 0
150
+ @out.puts(%{<?xml version="1.0" encoding="UTF-8"?>})
151
+
152
+ results_by_suite = summary.results.map { |r| r.test_id.split("#", 2) << r }.group_by(&:first)
153
+
154
+ tag(:testsuites, { time: executor.wall_time }) do
155
+ results_by_suite.each do |testsuite, named_results|
156
+ render_test_suite(testsuite, named_results)
157
+ end
158
+ end
159
+ end
160
+
161
+ private
162
+
163
+ def attr_escape(string)
164
+ if string.include?('"')
165
+ string.gsub('"', "&quot;")
166
+ else
167
+ string
168
+ end
169
+ end
170
+
171
+ def cdata(string)
172
+ string = string.gsub("]]>", "] ]>") if string.include?("]]>")
173
+ "<![CDATA[#{string}]]>"
174
+ end
175
+
176
+ using Compat::Tally unless Enumerable.method_defined?(:tally)
177
+
178
+ def render_test_suite(testsuite, named_results)
179
+ results = named_results.map(&:last)
180
+ statuses = results.map(&:status).tally
181
+
182
+ attributes = {
183
+ name: testsuite,
184
+ filepath: Megatest.relative_path(results.first.test_location.split(":", 2).first),
185
+ tests: results.size,
186
+ assertions: results.sum(&:assertions_count),
187
+ time: results.sum { |r| r.duration || 0.0 },
188
+ failures: statuses.fetch(:failure, 0),
189
+ errors: statuses.fetch(:error, 0),
190
+ skipped: statuses.fetch(:skipped, 0) + statuses.fetch(:retried, 0),
191
+ }
192
+
193
+ tag(:testsuite, attributes) do
194
+ named_results.each do |(_, testcase, result)|
195
+ render_test_case(testsuite, testcase, result)
196
+ end
197
+ end
198
+ end
199
+
200
+ def render_test_case(testsuite, testcase, result)
201
+ file, line = result.test_location.split(":", 2)
202
+ line.sub!(/~.*/, "")
203
+ file = Megatest.relative_path(file)
204
+
205
+ attributes = {
206
+ name: testcase,
207
+ classname: testsuite,
208
+ file: file,
209
+ line: line,
210
+ assertions: result.assertions_count,
211
+ time: result.duration || 0.0,
212
+ "run-command": run_command(result),
213
+ }
214
+
215
+ if result.success?
216
+ tag(:test_case, attributes)
217
+ elsif result.skipped? || result.retried?
218
+ tag(:test_case, attributes) do
219
+ tag(:skipped, { message: result.failure.message })
220
+ end
221
+ else
222
+ tag(:test_case, attributes) do
223
+ if result.error?
224
+ tag_name = :error
225
+ message = result.failure.message
226
+ else
227
+ tag_name = :failure
228
+ message = "Assertion Failure"
229
+ end
230
+ tag(tag_name, { type: result.failure.name, message: message }, text: cdata(render_failure(result, command: false)))
231
+ end
232
+ end
233
+ end
234
+
235
+ def tag(name, attrs, text: nil)
236
+ indent
237
+
238
+ @out << "<#{name}"
239
+ attrs&.each do |attr, value|
240
+ unless value.nil?
241
+ @out << %{ #{attr}="#{attr_escape(value.to_s)}"}
242
+ end
243
+ end
244
+
245
+ if block_given?
246
+ @out.puts(">")
247
+ @depth += 1
248
+ yield
249
+ @depth -= 1
250
+ indent
251
+ @out.puts("</#{name}>")
252
+ elsif text
253
+ @out.print(">")
254
+ @out.print(text)
255
+ @out.puts("</#{name}>")
256
+ else
257
+ @out.puts("/>")
258
+ end
259
+ end
260
+
261
+ def indent
262
+ @depth.times { @out << " " }
263
+ end
264
+ end
265
+ end
266
+ end