test-queue-patched 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.travis.yml +18 -0
- data/Gemfile +5 -0
- data/Gemfile-cucumber1-3 +4 -0
- data/Gemfile-cucumber1-3.lock +33 -0
- data/Gemfile-cucumber2-4 +4 -0
- data/Gemfile-cucumber2-4.lock +37 -0
- data/Gemfile-minitest4 +3 -0
- data/Gemfile-minitest4.lock +19 -0
- data/Gemfile-minitest5 +3 -0
- data/Gemfile-minitest5.lock +19 -0
- data/Gemfile-rspec2-1 +3 -0
- data/Gemfile-rspec2-1.lock +27 -0
- data/Gemfile-rspec3-0 +3 -0
- data/Gemfile-rspec3-0.lock +31 -0
- data/Gemfile-rspec3-1 +3 -0
- data/Gemfile-rspec3-1.lock +31 -0
- data/Gemfile-rspec3-2 +3 -0
- data/Gemfile-rspec3-2.lock +32 -0
- data/Gemfile-testunit +3 -0
- data/Gemfile-testunit.lock +21 -0
- data/Gemfile.lock +41 -0
- data/README.md +126 -0
- data/bin/cucumber-queue +4 -0
- data/bin/minitest-queue +4 -0
- data/bin/rspec-queue +4 -0
- data/bin/testunit-queue +4 -0
- data/lib/test-queue.rb +1 -0
- data/lib/test_queue/iterator.rb +107 -0
- data/lib/test_queue/runner/cucumber.rb +115 -0
- data/lib/test_queue/runner/minitest.rb +21 -0
- data/lib/test_queue/runner/minitest4.rb +88 -0
- data/lib/test_queue/runner/minitest5.rb +87 -0
- data/lib/test_queue/runner/puppet_lint.rb +31 -0
- data/lib/test_queue/runner/rspec.rb +79 -0
- data/lib/test_queue/runner/rspec2.rb +44 -0
- data/lib/test_queue/runner/rspec3.rb +54 -0
- data/lib/test_queue/runner/sample.rb +74 -0
- data/lib/test_queue/runner/testunit.rb +74 -0
- data/lib/test_queue/runner.rb +632 -0
- data/lib/test_queue/stats.rb +95 -0
- data/lib/test_queue/test_framework.rb +29 -0
- data/lib/test_queue.rb +8 -0
- data/script/bootstrap +12 -0
- data/script/cibuild +19 -0
- data/script/spec +7 -0
- data/spec/stats_spec.rb +76 -0
- data/test/cucumber.bats +57 -0
- data/test/minitest4.bats +34 -0
- data/test/minitest5.bats +194 -0
- data/test/rspec.bats +46 -0
- data/test/samples/features/bad.feature +5 -0
- data/test/samples/features/sample.feature +25 -0
- data/test/samples/features/sample2.feature +29 -0
- data/test/samples/features/step_definitions/common.rb +19 -0
- data/test/samples/sample_minispec.rb +37 -0
- data/test/samples/sample_minitest4.rb +25 -0
- data/test/samples/sample_minitest5.rb +33 -0
- data/test/samples/sample_rspec_helper.rb +1 -0
- data/test/samples/sample_shared_examples_for_spec.rb +5 -0
- data/test/samples/sample_spec.rb +25 -0
- data/test/samples/sample_split_spec.rb +17 -0
- data/test/samples/sample_testunit.rb +25 -0
- data/test/samples/sample_use_shared_example1_spec.rb +8 -0
- data/test/samples/sample_use_shared_example2_spec.rb +8 -0
- data/test/sleepy_runner.rb +14 -0
- data/test/testlib.bash +89 -0
- data/test/testunit.bats +20 -0
- data/test-queue-patched.gemspec +21 -0
- metadata +117 -0
@@ -0,0 +1,632 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'socket'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'test_queue/stats'
|
6
|
+
require 'test_queue/test_framework'
|
7
|
+
|
8
|
+
module TestQueue
|
9
|
+
class Worker
|
10
|
+
attr_accessor :pid, :status, :output, :num, :host
|
11
|
+
attr_accessor :start_time, :end_time
|
12
|
+
attr_accessor :summary, :failure_output
|
13
|
+
|
14
|
+
# Array of TestQueue::Stats::Suite recording all the suites this worker ran.
|
15
|
+
attr_reader :suites
|
16
|
+
|
17
|
+
def initialize(pid, num)
|
18
|
+
@pid = pid
|
19
|
+
@num = num
|
20
|
+
@start_time = Time.now
|
21
|
+
@output = ''
|
22
|
+
@suites = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def lines
|
26
|
+
@output.split("\n")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Runner
|
31
|
+
attr_accessor :concurrency, :exit_when_done
|
32
|
+
attr_reader :stats
|
33
|
+
|
34
|
+
TOKEN_REGEX = /^TOKEN=(\w+)/
|
35
|
+
|
36
|
+
def initialize(test_framework, concurrency=nil, socket=nil, relay=nil)
|
37
|
+
@test_framework = test_framework
|
38
|
+
@stats = Stats.new(stats_file)
|
39
|
+
|
40
|
+
if ENV['TEST_QUEUE_EARLY_FAILURE_LIMIT']
|
41
|
+
begin
|
42
|
+
@early_failure_limit = Integer(ENV['TEST_QUEUE_EARLY_FAILURE_LIMIT'])
|
43
|
+
rescue ArgumentError
|
44
|
+
raise ArgumentError, 'TEST_QUEUE_EARLY_FAILURE_LIMIT could not be parsed as an integer'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@procline = $0
|
49
|
+
|
50
|
+
@whitelist = if forced = ENV['TEST_QUEUE_FORCE']
|
51
|
+
forced.split(/\s*,\s*/)
|
52
|
+
else
|
53
|
+
[]
|
54
|
+
end
|
55
|
+
@whitelist.freeze
|
56
|
+
|
57
|
+
all_files = @test_framework.all_suite_files.to_set
|
58
|
+
@queue = @stats.all_suites
|
59
|
+
.select { |suite| all_files.include?(suite.path) }
|
60
|
+
.sort_by { |suite| -suite.duration }
|
61
|
+
.map { |suite| [suite.name, suite.path] }
|
62
|
+
|
63
|
+
if @whitelist.any?
|
64
|
+
@queue.select! { |suite_name, path| @whitelist.include?(suite_name) }
|
65
|
+
@queue.sort_by! { |suite_name, path| @whitelist.index(suite_name) }
|
66
|
+
end
|
67
|
+
|
68
|
+
@awaited_suites = Set.new(@whitelist)
|
69
|
+
@original_queue = Set.new(@queue).freeze
|
70
|
+
|
71
|
+
@workers = {}
|
72
|
+
@completed = []
|
73
|
+
|
74
|
+
@concurrency =
|
75
|
+
concurrency ||
|
76
|
+
(ENV['TEST_QUEUE_WORKERS'] && ENV['TEST_QUEUE_WORKERS'].to_i) ||
|
77
|
+
if File.exist?('/proc/cpuinfo')
|
78
|
+
File.read('/proc/cpuinfo').split("\n").grep(/processor/).size
|
79
|
+
elsif RUBY_PLATFORM =~ /darwin/
|
80
|
+
`/usr/sbin/sysctl -n hw.activecpu`.to_i
|
81
|
+
else
|
82
|
+
2
|
83
|
+
end
|
84
|
+
unless @concurrency > 0
|
85
|
+
raise ArgumentError, "Worker count (#{@concurrency}) must be greater than 0"
|
86
|
+
end
|
87
|
+
|
88
|
+
@relay_connection_timeout =
|
89
|
+
(ENV['TEST_QUEUE_RELAY_TIMEOUT'] && ENV['TEST_QUEUE_RELAY_TIMEOUT'].to_i) ||
|
90
|
+
30
|
91
|
+
|
92
|
+
@run_token = ENV['TEST_QUEUE_RELAY_TOKEN'] || SecureRandom.hex(8)
|
93
|
+
|
94
|
+
@socket =
|
95
|
+
socket ||
|
96
|
+
ENV['TEST_QUEUE_SOCKET'] ||
|
97
|
+
"/tmp/test_queue_#{$$}_#{object_id}.sock"
|
98
|
+
|
99
|
+
@relay =
|
100
|
+
relay ||
|
101
|
+
ENV['TEST_QUEUE_RELAY']
|
102
|
+
|
103
|
+
@remote_master_message = ENV["TEST_QUEUE_REMOTE_MASTER_MESSAGE"] if ENV.has_key?("TEST_QUEUE_REMOTE_MASTER_MESSAGE")
|
104
|
+
|
105
|
+
if @relay == @socket
|
106
|
+
STDERR.puts "*** Detected TEST_QUEUE_RELAY == TEST_QUEUE_SOCKET. Disabling relay mode."
|
107
|
+
@relay = nil
|
108
|
+
elsif @relay
|
109
|
+
@queue = []
|
110
|
+
end
|
111
|
+
|
112
|
+
@discovered_suites = Set.new
|
113
|
+
@assignments = {}
|
114
|
+
|
115
|
+
@exit_when_done = true
|
116
|
+
|
117
|
+
@aborting = false
|
118
|
+
end
|
119
|
+
|
120
|
+
# Run the tests.
|
121
|
+
#
|
122
|
+
# If exit_when_done is true, exit! will be called before this method
|
123
|
+
# completes. If exit_when_done is false, this method will return an Integer
|
124
|
+
# number of failures.
|
125
|
+
def execute
|
126
|
+
$stdout.sync = $stderr.sync = true
|
127
|
+
@start_time = Time.now
|
128
|
+
|
129
|
+
execute_internal
|
130
|
+
exitstatus = summarize_internal
|
131
|
+
|
132
|
+
if exit_when_done
|
133
|
+
exit! exitstatus
|
134
|
+
else
|
135
|
+
exitstatus
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def summarize_internal
|
140
|
+
puts
|
141
|
+
puts "==> Summary (#{@completed.size} workers in %.4fs)" % (Time.now-@start_time)
|
142
|
+
puts
|
143
|
+
|
144
|
+
estatus = 0
|
145
|
+
misrun_suites = []
|
146
|
+
unassigned_suites = []
|
147
|
+
@failures = ''
|
148
|
+
@completed.each do |worker|
|
149
|
+
estatus += (worker.status.exitstatus || 1)
|
150
|
+
@stats.record_suites(worker.suites)
|
151
|
+
worker.suites.each do |suite|
|
152
|
+
assignment = @assignments.delete([suite.name, suite.path])
|
153
|
+
host = worker.host || Socket.gethostname
|
154
|
+
if assignment.nil?
|
155
|
+
unassigned_suites << [suite.name, suite.path]
|
156
|
+
elsif assignment != [host, worker.pid]
|
157
|
+
misrun_suites << [suite.name, suite.path] + assignment + [host, worker.pid]
|
158
|
+
end
|
159
|
+
@discovered_suites.delete([suite.name, suite.path])
|
160
|
+
end
|
161
|
+
|
162
|
+
summarize_worker(worker)
|
163
|
+
|
164
|
+
@failures << worker.failure_output if worker.failure_output
|
165
|
+
|
166
|
+
puts " [%2d] %60s %4d suites in %.4fs (%s %s)" % [
|
167
|
+
worker.num,
|
168
|
+
worker.summary,
|
169
|
+
worker.suites.size,
|
170
|
+
worker.end_time - worker.start_time,
|
171
|
+
worker.status.to_s,
|
172
|
+
worker.host && " on #{worker.host.split('.').first}"
|
173
|
+
]
|
174
|
+
end
|
175
|
+
|
176
|
+
unless @failures.empty?
|
177
|
+
puts
|
178
|
+
puts "==> Failures"
|
179
|
+
puts
|
180
|
+
puts @failures
|
181
|
+
end
|
182
|
+
|
183
|
+
if !relay?
|
184
|
+
unless @discovered_suites.empty?
|
185
|
+
estatus += 1
|
186
|
+
puts
|
187
|
+
puts "The following suites were discovered but were not run:"
|
188
|
+
puts
|
189
|
+
|
190
|
+
@discovered_suites.sort.each do |suite_name, path|
|
191
|
+
puts "#{suite_name} - #{path}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
unless unassigned_suites.empty?
|
195
|
+
estatus += 1
|
196
|
+
puts
|
197
|
+
puts "The following suites were not discovered but were run anyway:"
|
198
|
+
puts
|
199
|
+
unassigned_suites.sort.each do |suite_name, path|
|
200
|
+
puts "#{suite_name} - #{path}"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
unless misrun_suites.empty?
|
204
|
+
estatus += 1
|
205
|
+
puts
|
206
|
+
puts "The following suites were run on the wrong workers:"
|
207
|
+
puts
|
208
|
+
misrun_suites.each do |suite_name, path, target_host, target_pid, actual_host, actual_pid|
|
209
|
+
puts "#{suite_name} - #{path}: #{actual_host} (#{actual_pid}) - assigned to #{target_host} (#{target_pid})"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
puts
|
215
|
+
|
216
|
+
@stats.save
|
217
|
+
|
218
|
+
summarize
|
219
|
+
|
220
|
+
estatus = @completed.inject(0){ |s, worker| s + (worker.status.exitstatus || 1)}
|
221
|
+
[estatus, 255].min
|
222
|
+
end
|
223
|
+
|
224
|
+
def summarize
|
225
|
+
end
|
226
|
+
|
227
|
+
def stats_file
|
228
|
+
ENV['TEST_QUEUE_STATS'] ||
|
229
|
+
'.test_queue_stats'
|
230
|
+
end
|
231
|
+
|
232
|
+
def execute_internal
|
233
|
+
start_master
|
234
|
+
prepare(@concurrency)
|
235
|
+
@prepared_time = Time.now
|
236
|
+
start_relay if relay?
|
237
|
+
discover_suites
|
238
|
+
spawn_workers
|
239
|
+
distribute_queue
|
240
|
+
ensure
|
241
|
+
stop_master
|
242
|
+
|
243
|
+
kill_subprocesses
|
244
|
+
end
|
245
|
+
|
246
|
+
def start_master
|
247
|
+
if !relay?
|
248
|
+
if @socket =~ /^(?:(.+):)?(\d+)$/
|
249
|
+
address = $1 || '0.0.0.0'
|
250
|
+
port = $2.to_i
|
251
|
+
@socket = "#$1:#$2"
|
252
|
+
@server = TCPServer.new(address, port)
|
253
|
+
else
|
254
|
+
FileUtils.rm(@socket) if File.exist?(@socket)
|
255
|
+
@server = UNIXServer.new(@socket)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
desc = "test-queue master (#{relay?? "relaying to #{@relay}" : @socket})"
|
260
|
+
puts "Starting #{desc}"
|
261
|
+
$0 = "#{desc} - #{@procline}"
|
262
|
+
end
|
263
|
+
|
264
|
+
def start_relay
|
265
|
+
return unless relay?
|
266
|
+
|
267
|
+
sock = connect_to_relay
|
268
|
+
message = @remote_master_message ? " #{@remote_master_message}" : ""
|
269
|
+
message.gsub!(/(\r|\n)/, "") # Our "protocol" is newline-separated
|
270
|
+
sock.puts("TOKEN=#{@run_token}")
|
271
|
+
sock.puts("REMOTE MASTER #{@concurrency} #{Socket.gethostname} #{message}")
|
272
|
+
response = sock.gets.strip
|
273
|
+
unless response == "OK"
|
274
|
+
STDERR.puts "*** Got non-OK response from master: #{response}"
|
275
|
+
sock.close
|
276
|
+
exit! 1
|
277
|
+
end
|
278
|
+
sock.close
|
279
|
+
rescue Errno::ECONNREFUSED
|
280
|
+
STDERR.puts "*** Unable to connect to relay #{@relay}. Aborting.."
|
281
|
+
exit! 1
|
282
|
+
end
|
283
|
+
|
284
|
+
def stop_master
|
285
|
+
return if relay?
|
286
|
+
|
287
|
+
FileUtils.rm_f(@socket) if @socket && @server.is_a?(UNIXServer)
|
288
|
+
@server.close rescue nil if @server
|
289
|
+
@socket = @server = nil
|
290
|
+
end
|
291
|
+
|
292
|
+
def spawn_workers
|
293
|
+
@concurrency.times do |i|
|
294
|
+
num = i+1
|
295
|
+
|
296
|
+
pid = fork do
|
297
|
+
@server.close if @server
|
298
|
+
|
299
|
+
iterator = Iterator.new(@test_framework, relay?? @relay : @socket, method(:around_filter), early_failure_limit: @early_failure_limit, run_token: @run_token)
|
300
|
+
after_fork_internal(num, iterator)
|
301
|
+
ret = run_worker(iterator) || 0
|
302
|
+
cleanup_worker
|
303
|
+
Kernel.exit! ret
|
304
|
+
end
|
305
|
+
|
306
|
+
@workers[pid] = Worker.new(pid, num)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def discover_suites
|
311
|
+
# Remote masters don't discover suites; the central master does and
|
312
|
+
# distributes them to remote masters.
|
313
|
+
return if relay?
|
314
|
+
|
315
|
+
# No need to discover suites if all whitelisted suites are already
|
316
|
+
# queued.
|
317
|
+
return if @whitelist.any? && @awaited_suites.empty?
|
318
|
+
|
319
|
+
@discovering_suites_pid = fork do
|
320
|
+
terminate = false
|
321
|
+
Signal.trap("INT") { terminate = true }
|
322
|
+
|
323
|
+
$0 = "test-queue suite discovery process"
|
324
|
+
|
325
|
+
@test_framework.all_suite_files.each do |path|
|
326
|
+
@test_framework.suites_from_file(path).each do |suite_name, suite|
|
327
|
+
Kernel.exit!(0) if terminate
|
328
|
+
|
329
|
+
@server.connect_address.connect do |sock|
|
330
|
+
sock.puts("TOKEN=#{@run_token}")
|
331
|
+
sock.puts("NEW SUITE #{Marshal.dump([suite_name, path])}")
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
Kernel.exit! 0
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def awaiting_suites?
|
341
|
+
case
|
342
|
+
when @awaited_suites.any?
|
343
|
+
# We're waiting to find all the whitelisted suites so we can run them
|
344
|
+
# in the correct order.
|
345
|
+
true
|
346
|
+
when @queue.empty? && !!@discovering_suites_pid
|
347
|
+
# We don't have any suites yet, but we're working on it.
|
348
|
+
true
|
349
|
+
else
|
350
|
+
# It's fine to run any queued suites now.
|
351
|
+
false
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def enqueue_discovered_suite(suite_name, path)
|
356
|
+
if @whitelist.any? && !@whitelist.include?(suite_name)
|
357
|
+
return
|
358
|
+
end
|
359
|
+
|
360
|
+
@discovered_suites << [suite_name, path]
|
361
|
+
|
362
|
+
if @original_queue.include?([suite_name, path])
|
363
|
+
# This suite was already added to the queue some other way.
|
364
|
+
@awaited_suites.delete(suite_name)
|
365
|
+
return
|
366
|
+
end
|
367
|
+
|
368
|
+
# We don't know how long new suites will take to run, so we put them at
|
369
|
+
# the front of the queue. It's better to run a fast suite early than to
|
370
|
+
# run a slow suite late.
|
371
|
+
@queue.unshift [suite_name, path]
|
372
|
+
|
373
|
+
if @awaited_suites.delete?(suite_name) && @awaited_suites.empty?
|
374
|
+
# We've found all the whitelisted suites. Sort the queue to match the
|
375
|
+
# whitelist.
|
376
|
+
@queue.sort_by! { |suite_name, path| @whitelist.index(suite_name) }
|
377
|
+
|
378
|
+
kill_suite_discovery_process("INT")
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def after_fork_internal(num, iterator)
|
383
|
+
srand
|
384
|
+
|
385
|
+
output = File.open("/tmp/test_queue_worker_#{$$}_output", 'w')
|
386
|
+
|
387
|
+
$stdout.reopen(output)
|
388
|
+
$stderr.reopen($stdout)
|
389
|
+
$stdout.sync = $stderr.sync = true
|
390
|
+
|
391
|
+
$0 = "test-queue worker [#{num}]"
|
392
|
+
puts
|
393
|
+
puts "==> Starting #$0 (#{Process.pid} on #{Socket.gethostname}) - iterating over #{iterator.sock}"
|
394
|
+
puts
|
395
|
+
|
396
|
+
after_fork(num)
|
397
|
+
end
|
398
|
+
|
399
|
+
# Run in the master before the fork. Used to create
|
400
|
+
# concurrency copies of any databases required by the
|
401
|
+
# test workers.
|
402
|
+
def prepare(concurrency)
|
403
|
+
end
|
404
|
+
|
405
|
+
def around_filter(suite)
|
406
|
+
yield
|
407
|
+
end
|
408
|
+
|
409
|
+
# Prepare a worker for executing jobs after a fork.
|
410
|
+
def after_fork(num)
|
411
|
+
end
|
412
|
+
|
413
|
+
# Entry point for internal runner implementations. The iterator will yield
|
414
|
+
# jobs from the shared queue on the master.
|
415
|
+
#
|
416
|
+
# Returns an Integer number of failures.
|
417
|
+
def run_worker(iterator)
|
418
|
+
iterator.each do |item|
|
419
|
+
puts " #{item.inspect}"
|
420
|
+
end
|
421
|
+
|
422
|
+
return 0 # exit status
|
423
|
+
end
|
424
|
+
|
425
|
+
def cleanup_worker
|
426
|
+
end
|
427
|
+
|
428
|
+
def summarize_worker(worker)
|
429
|
+
worker.summary = ''
|
430
|
+
worker.failure_output = ''
|
431
|
+
end
|
432
|
+
|
433
|
+
def reap_workers(blocking=true)
|
434
|
+
@workers.delete_if do |_, worker|
|
435
|
+
if Process.waitpid(worker.pid, blocking ? 0 : Process::WNOHANG).nil?
|
436
|
+
next false
|
437
|
+
end
|
438
|
+
|
439
|
+
worker.status = $?
|
440
|
+
worker.end_time = Time.now
|
441
|
+
|
442
|
+
collect_worker_data(worker)
|
443
|
+
relay_to_master(worker) if relay?
|
444
|
+
worker_completed(worker)
|
445
|
+
|
446
|
+
true
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
def collect_worker_data(worker)
|
451
|
+
if File.exist?(file = "/tmp/test_queue_worker_#{worker.pid}_output")
|
452
|
+
worker.output = IO.binread(file)
|
453
|
+
FileUtils.rm(file)
|
454
|
+
end
|
455
|
+
|
456
|
+
if File.exist?(file = "/tmp/test_queue_worker_#{worker.pid}_suites")
|
457
|
+
worker.suites.replace(Marshal.load(IO.binread(file)))
|
458
|
+
FileUtils.rm(file)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def worker_completed(worker)
|
463
|
+
return if @aborting
|
464
|
+
@completed << worker
|
465
|
+
puts worker.output if ENV['TEST_QUEUE_VERBOSE'] || worker.status.exitstatus != 0
|
466
|
+
end
|
467
|
+
|
468
|
+
def distribute_queue
|
469
|
+
return if relay?
|
470
|
+
remote_workers = 0
|
471
|
+
|
472
|
+
until !awaiting_suites? && @queue.empty? && remote_workers == 0
|
473
|
+
queue_status(@start_time, @queue.size, @workers.size, remote_workers)
|
474
|
+
|
475
|
+
if status = reap_suite_discovery_process(false)
|
476
|
+
abort("Discovering suites failed.") unless status.success?
|
477
|
+
abort("Failed to discover #{@awaited_suites.sort.join(", ")} specified in TEST_QUEUE_FORCE") if @awaited_suites.any?
|
478
|
+
end
|
479
|
+
|
480
|
+
if IO.select([@server], nil, nil, 0.1).nil?
|
481
|
+
reap_workers(false) # check for worker deaths
|
482
|
+
else
|
483
|
+
sock = @server.accept
|
484
|
+
token = sock.gets.strip
|
485
|
+
cmd = sock.gets.strip
|
486
|
+
|
487
|
+
token = token[TOKEN_REGEX, 1]
|
488
|
+
# If we have a remote master from a different test run, respond with "WRONG RUN", and it will consider the test run done.
|
489
|
+
if token != @run_token
|
490
|
+
message = token.nil? ? "Worker sent no token to master" : "Worker from run #{token} connected to master"
|
491
|
+
STDERR.puts "*** #{message} for run #{@run_token}; ignoring."
|
492
|
+
sock.write("WRONG RUN\n")
|
493
|
+
next
|
494
|
+
end
|
495
|
+
|
496
|
+
case cmd
|
497
|
+
when /^POP (\S+) (\d+)/
|
498
|
+
hostname = $1
|
499
|
+
pid = Integer($2)
|
500
|
+
if awaiting_suites?
|
501
|
+
sock.write(Marshal.dump("WAIT"))
|
502
|
+
elsif obj = @queue.shift
|
503
|
+
data = Marshal.dump(obj)
|
504
|
+
sock.write(data)
|
505
|
+
@assignments[obj] = [hostname, pid]
|
506
|
+
end
|
507
|
+
when /^REMOTE MASTER (\d+) ([\w\.-]+)(?: (.+))?/
|
508
|
+
num = $1.to_i
|
509
|
+
remote_master = $2
|
510
|
+
remote_master_message = $3
|
511
|
+
|
512
|
+
sock.write("OK\n")
|
513
|
+
remote_workers += num
|
514
|
+
|
515
|
+
message = "*** #{num} workers connected from #{remote_master} after #{Time.now-@start_time}s"
|
516
|
+
message << " " + remote_master_message if remote_master_message
|
517
|
+
STDERR.puts message
|
518
|
+
when /^WORKER (\d+)/
|
519
|
+
data = sock.read($1.to_i)
|
520
|
+
worker = Marshal.load(data)
|
521
|
+
worker_completed(worker)
|
522
|
+
remote_workers -= 1
|
523
|
+
when /^NEW SUITE (.+)/
|
524
|
+
suite_name, path = Marshal.load($1)
|
525
|
+
enqueue_discovered_suite(suite_name, path)
|
526
|
+
when /^KABOOM/
|
527
|
+
# worker reporting an abnormal number of test failures;
|
528
|
+
# stop everything immediately and report the results.
|
529
|
+
break
|
530
|
+
else
|
531
|
+
STDERR.puts("Ignoring unrecognized command: \"#{cmd}\"")
|
532
|
+
end
|
533
|
+
sock.close
|
534
|
+
end
|
535
|
+
end
|
536
|
+
ensure
|
537
|
+
stop_master
|
538
|
+
reap_workers
|
539
|
+
end
|
540
|
+
|
541
|
+
def relay?
|
542
|
+
!!@relay
|
543
|
+
end
|
544
|
+
|
545
|
+
def connect_to_relay
|
546
|
+
sock = nil
|
547
|
+
start = Time.now
|
548
|
+
puts "Attempting to connect for #{@relay_connection_timeout}s..."
|
549
|
+
while sock.nil?
|
550
|
+
begin
|
551
|
+
sock = TCPSocket.new(*@relay.split(':'))
|
552
|
+
rescue Errno::ECONNREFUSED => e
|
553
|
+
raise e if Time.now - start > @relay_connection_timeout
|
554
|
+
puts "Master not yet available, sleeping..."
|
555
|
+
sleep 0.5
|
556
|
+
end
|
557
|
+
end
|
558
|
+
sock
|
559
|
+
end
|
560
|
+
|
561
|
+
def relay_to_master(worker)
|
562
|
+
worker.host = Socket.gethostname
|
563
|
+
data = Marshal.dump(worker)
|
564
|
+
|
565
|
+
sock = connect_to_relay
|
566
|
+
sock.puts("TOKEN=#{@run_token}")
|
567
|
+
sock.puts("WORKER #{data.bytesize}")
|
568
|
+
sock.write(data)
|
569
|
+
ensure
|
570
|
+
sock.close if sock
|
571
|
+
end
|
572
|
+
|
573
|
+
def kill_subprocesses
|
574
|
+
kill_workers
|
575
|
+
kill_suite_discovery_process
|
576
|
+
end
|
577
|
+
|
578
|
+
def kill_workers
|
579
|
+
@workers.each do |pid, worker|
|
580
|
+
Process.kill 'KILL', pid
|
581
|
+
end
|
582
|
+
|
583
|
+
reap_workers
|
584
|
+
end
|
585
|
+
|
586
|
+
def kill_suite_discovery_process(signal="KILL")
|
587
|
+
return unless @discovering_suites_pid
|
588
|
+
Process.kill signal, @discovering_suites_pid
|
589
|
+
reap_suite_discovery_process
|
590
|
+
end
|
591
|
+
|
592
|
+
def reap_suite_discovery_process(blocking=true)
|
593
|
+
return unless @discovering_suites_pid
|
594
|
+
_, status = Process.waitpid2(@discovering_suites_pid, blocking ? 0 : Process::WNOHANG)
|
595
|
+
return unless status
|
596
|
+
|
597
|
+
@discovering_suites_pid = nil
|
598
|
+
status
|
599
|
+
end
|
600
|
+
|
601
|
+
# Stop the test run immediately.
|
602
|
+
#
|
603
|
+
# message - String message to print to the console when exiting.
|
604
|
+
#
|
605
|
+
# Doesn't return.
|
606
|
+
def abort(message)
|
607
|
+
@aborting = true
|
608
|
+
kill_subprocesses
|
609
|
+
Kernel::abort("Aborting: #{message}")
|
610
|
+
end
|
611
|
+
|
612
|
+
# Subclasses can override to monitor the status of the queue.
|
613
|
+
#
|
614
|
+
# For example, you may want to record metrics about how quickly remote
|
615
|
+
# workers connect, or abort the build if not enough connect.
|
616
|
+
#
|
617
|
+
# This method is called very frequently during the test run, so don't do
|
618
|
+
# anything expensive/blocking.
|
619
|
+
#
|
620
|
+
# This method is not called on remote masters when using remote workers,
|
621
|
+
# only on the central master.
|
622
|
+
#
|
623
|
+
# start_time - Time when the test run began
|
624
|
+
# queue_size - Integer number of suites left in the queue
|
625
|
+
# local_worker_count - Integer number of active local workers
|
626
|
+
# remote_worker_count - Integer number of active remote workers
|
627
|
+
#
|
628
|
+
# Returns nothing.
|
629
|
+
def queue_status(start_time, queue_size, local_worker_count, remote_worker_count)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module TestQueue
|
2
|
+
class Stats
|
3
|
+
class Suite
|
4
|
+
attr_reader :name, :path, :duration, :last_seen_at
|
5
|
+
|
6
|
+
def initialize(name, path, duration, last_seen_at)
|
7
|
+
@name = name
|
8
|
+
@path = path
|
9
|
+
@duration = duration
|
10
|
+
@last_seen_at = last_seen_at
|
11
|
+
|
12
|
+
freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
def ==(other)
|
16
|
+
other &&
|
17
|
+
name == other.name &&
|
18
|
+
path == other.path &&
|
19
|
+
duration == other.duration &&
|
20
|
+
last_seen_at == other.last_seen_at
|
21
|
+
end
|
22
|
+
alias_method :eql?, :==
|
23
|
+
|
24
|
+
def to_h
|
25
|
+
{ :name => name, :path => path, :duration => duration, :last_seen_at => last_seen_at.to_i }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.from_hash(hash)
|
29
|
+
self.new(hash.fetch(:name),
|
30
|
+
hash.fetch(:path),
|
31
|
+
hash.fetch(:duration),
|
32
|
+
Time.at(hash.fetch(:last_seen_at)))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(path)
|
37
|
+
@path = path
|
38
|
+
@suites = {}
|
39
|
+
load
|
40
|
+
end
|
41
|
+
|
42
|
+
def all_suites
|
43
|
+
@suites.values
|
44
|
+
end
|
45
|
+
|
46
|
+
def suite(name)
|
47
|
+
@suites[name]
|
48
|
+
end
|
49
|
+
|
50
|
+
def record_suites(suites)
|
51
|
+
suites.each do |suite|
|
52
|
+
@suites[suite.name] = suite
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def save
|
57
|
+
prune
|
58
|
+
|
59
|
+
File.open(@path, "wb") do |f|
|
60
|
+
Marshal.dump(to_h, f)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
CURRENT_VERSION = 2
|
67
|
+
|
68
|
+
def to_h
|
69
|
+
suites = @suites.each_value.map(&:to_h)
|
70
|
+
|
71
|
+
{ :version => CURRENT_VERSION, :suites => suites }
|
72
|
+
end
|
73
|
+
|
74
|
+
def load
|
75
|
+
data = begin
|
76
|
+
File.open(@path, "rb") { |f| Marshal.load(f) }
|
77
|
+
rescue Errno::ENOENT, EOFError, TypeError, ArgumentError
|
78
|
+
end
|
79
|
+
return unless data && data.is_a?(Hash) && data[:version] == CURRENT_VERSION
|
80
|
+
data[:suites].each do |suite_hash|
|
81
|
+
suite = Suite.from_hash(suite_hash)
|
82
|
+
@suites[suite.name] = suite
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
EIGHT_DAYS_S = 8 * 24 * 60 * 60
|
87
|
+
|
88
|
+
def prune
|
89
|
+
earliest = Time.now - EIGHT_DAYS_S
|
90
|
+
@suites.delete_if do |name, suite|
|
91
|
+
suite.last_seen_at < earliest
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|