test-queue-patched 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.travis.yml +18 -0
- data/Gemfile +5 -0
- data/Gemfile-cucumber1-3 +4 -0
- data/Gemfile-cucumber1-3.lock +33 -0
- data/Gemfile-cucumber2-4 +4 -0
- data/Gemfile-cucumber2-4.lock +37 -0
- data/Gemfile-minitest4 +3 -0
- data/Gemfile-minitest4.lock +19 -0
- data/Gemfile-minitest5 +3 -0
- data/Gemfile-minitest5.lock +19 -0
- data/Gemfile-rspec2-1 +3 -0
- data/Gemfile-rspec2-1.lock +27 -0
- data/Gemfile-rspec3-0 +3 -0
- data/Gemfile-rspec3-0.lock +31 -0
- data/Gemfile-rspec3-1 +3 -0
- data/Gemfile-rspec3-1.lock +31 -0
- data/Gemfile-rspec3-2 +3 -0
- data/Gemfile-rspec3-2.lock +32 -0
- data/Gemfile-testunit +3 -0
- data/Gemfile-testunit.lock +21 -0
- data/Gemfile.lock +41 -0
- data/README.md +126 -0
- data/bin/cucumber-queue +4 -0
- data/bin/minitest-queue +4 -0
- data/bin/rspec-queue +4 -0
- data/bin/testunit-queue +4 -0
- data/lib/test-queue.rb +1 -0
- data/lib/test_queue/iterator.rb +107 -0
- data/lib/test_queue/runner/cucumber.rb +115 -0
- data/lib/test_queue/runner/minitest.rb +21 -0
- data/lib/test_queue/runner/minitest4.rb +88 -0
- data/lib/test_queue/runner/minitest5.rb +87 -0
- data/lib/test_queue/runner/puppet_lint.rb +31 -0
- data/lib/test_queue/runner/rspec.rb +79 -0
- data/lib/test_queue/runner/rspec2.rb +44 -0
- data/lib/test_queue/runner/rspec3.rb +54 -0
- data/lib/test_queue/runner/sample.rb +74 -0
- data/lib/test_queue/runner/testunit.rb +74 -0
- data/lib/test_queue/runner.rb +632 -0
- data/lib/test_queue/stats.rb +95 -0
- data/lib/test_queue/test_framework.rb +29 -0
- data/lib/test_queue.rb +8 -0
- data/script/bootstrap +12 -0
- data/script/cibuild +19 -0
- data/script/spec +7 -0
- data/spec/stats_spec.rb +76 -0
- data/test/cucumber.bats +57 -0
- data/test/minitest4.bats +34 -0
- data/test/minitest5.bats +194 -0
- data/test/rspec.bats +46 -0
- data/test/samples/features/bad.feature +5 -0
- data/test/samples/features/sample.feature +25 -0
- data/test/samples/features/sample2.feature +29 -0
- data/test/samples/features/step_definitions/common.rb +19 -0
- data/test/samples/sample_minispec.rb +37 -0
- data/test/samples/sample_minitest4.rb +25 -0
- data/test/samples/sample_minitest5.rb +33 -0
- data/test/samples/sample_rspec_helper.rb +1 -0
- data/test/samples/sample_shared_examples_for_spec.rb +5 -0
- data/test/samples/sample_spec.rb +25 -0
- data/test/samples/sample_split_spec.rb +17 -0
- data/test/samples/sample_testunit.rb +25 -0
- data/test/samples/sample_use_shared_example1_spec.rb +8 -0
- data/test/samples/sample_use_shared_example2_spec.rb +8 -0
- data/test/sleepy_runner.rb +14 -0
- data/test/testlib.bash +89 -0
- data/test/testunit.bats +20 -0
- data/test-queue-patched.gemspec +21 -0
- metadata +117 -0
@@ -0,0 +1,632 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'socket'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'test_queue/stats'
|
6
|
+
require 'test_queue/test_framework'
|
7
|
+
|
8
|
+
module TestQueue
|
9
|
+
class Worker
|
10
|
+
attr_accessor :pid, :status, :output, :num, :host
|
11
|
+
attr_accessor :start_time, :end_time
|
12
|
+
attr_accessor :summary, :failure_output
|
13
|
+
|
14
|
+
# Array of TestQueue::Stats::Suite recording all the suites this worker ran.
|
15
|
+
attr_reader :suites
|
16
|
+
|
17
|
+
def initialize(pid, num)
|
18
|
+
@pid = pid
|
19
|
+
@num = num
|
20
|
+
@start_time = Time.now
|
21
|
+
@output = ''
|
22
|
+
@suites = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def lines
|
26
|
+
@output.split("\n")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Runner
|
31
|
+
attr_accessor :concurrency, :exit_when_done
|
32
|
+
attr_reader :stats
|
33
|
+
|
34
|
+
TOKEN_REGEX = /^TOKEN=(\w+)/
|
35
|
+
|
36
|
+
def initialize(test_framework, concurrency=nil, socket=nil, relay=nil)
|
37
|
+
@test_framework = test_framework
|
38
|
+
@stats = Stats.new(stats_file)
|
39
|
+
|
40
|
+
if ENV['TEST_QUEUE_EARLY_FAILURE_LIMIT']
|
41
|
+
begin
|
42
|
+
@early_failure_limit = Integer(ENV['TEST_QUEUE_EARLY_FAILURE_LIMIT'])
|
43
|
+
rescue ArgumentError
|
44
|
+
raise ArgumentError, 'TEST_QUEUE_EARLY_FAILURE_LIMIT could not be parsed as an integer'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@procline = $0
|
49
|
+
|
50
|
+
@whitelist = if forced = ENV['TEST_QUEUE_FORCE']
|
51
|
+
forced.split(/\s*,\s*/)
|
52
|
+
else
|
53
|
+
[]
|
54
|
+
end
|
55
|
+
@whitelist.freeze
|
56
|
+
|
57
|
+
all_files = @test_framework.all_suite_files.to_set
|
58
|
+
@queue = @stats.all_suites
|
59
|
+
.select { |suite| all_files.include?(suite.path) }
|
60
|
+
.sort_by { |suite| -suite.duration }
|
61
|
+
.map { |suite| [suite.name, suite.path] }
|
62
|
+
|
63
|
+
if @whitelist.any?
|
64
|
+
@queue.select! { |suite_name, path| @whitelist.include?(suite_name) }
|
65
|
+
@queue.sort_by! { |suite_name, path| @whitelist.index(suite_name) }
|
66
|
+
end
|
67
|
+
|
68
|
+
@awaited_suites = Set.new(@whitelist)
|
69
|
+
@original_queue = Set.new(@queue).freeze
|
70
|
+
|
71
|
+
@workers = {}
|
72
|
+
@completed = []
|
73
|
+
|
74
|
+
@concurrency =
|
75
|
+
concurrency ||
|
76
|
+
(ENV['TEST_QUEUE_WORKERS'] && ENV['TEST_QUEUE_WORKERS'].to_i) ||
|
77
|
+
if File.exist?('/proc/cpuinfo')
|
78
|
+
File.read('/proc/cpuinfo').split("\n").grep(/processor/).size
|
79
|
+
elsif RUBY_PLATFORM =~ /darwin/
|
80
|
+
`/usr/sbin/sysctl -n hw.activecpu`.to_i
|
81
|
+
else
|
82
|
+
2
|
83
|
+
end
|
84
|
+
unless @concurrency > 0
|
85
|
+
raise ArgumentError, "Worker count (#{@concurrency}) must be greater than 0"
|
86
|
+
end
|
87
|
+
|
88
|
+
@relay_connection_timeout =
|
89
|
+
(ENV['TEST_QUEUE_RELAY_TIMEOUT'] && ENV['TEST_QUEUE_RELAY_TIMEOUT'].to_i) ||
|
90
|
+
30
|
91
|
+
|
92
|
+
@run_token = ENV['TEST_QUEUE_RELAY_TOKEN'] || SecureRandom.hex(8)
|
93
|
+
|
94
|
+
@socket =
|
95
|
+
socket ||
|
96
|
+
ENV['TEST_QUEUE_SOCKET'] ||
|
97
|
+
"/tmp/test_queue_#{$$}_#{object_id}.sock"
|
98
|
+
|
99
|
+
@relay =
|
100
|
+
relay ||
|
101
|
+
ENV['TEST_QUEUE_RELAY']
|
102
|
+
|
103
|
+
@remote_master_message = ENV["TEST_QUEUE_REMOTE_MASTER_MESSAGE"] if ENV.has_key?("TEST_QUEUE_REMOTE_MASTER_MESSAGE")
|
104
|
+
|
105
|
+
if @relay == @socket
|
106
|
+
STDERR.puts "*** Detected TEST_QUEUE_RELAY == TEST_QUEUE_SOCKET. Disabling relay mode."
|
107
|
+
@relay = nil
|
108
|
+
elsif @relay
|
109
|
+
@queue = []
|
110
|
+
end
|
111
|
+
|
112
|
+
@discovered_suites = Set.new
|
113
|
+
@assignments = {}
|
114
|
+
|
115
|
+
@exit_when_done = true
|
116
|
+
|
117
|
+
@aborting = false
|
118
|
+
end
|
119
|
+
|
120
|
+
# Run the tests.
|
121
|
+
#
|
122
|
+
# If exit_when_done is true, exit! will be called before this method
|
123
|
+
# completes. If exit_when_done is false, this method will return an Integer
|
124
|
+
# number of failures.
|
125
|
+
def execute
|
126
|
+
$stdout.sync = $stderr.sync = true
|
127
|
+
@start_time = Time.now
|
128
|
+
|
129
|
+
execute_internal
|
130
|
+
exitstatus = summarize_internal
|
131
|
+
|
132
|
+
if exit_when_done
|
133
|
+
exit! exitstatus
|
134
|
+
else
|
135
|
+
exitstatus
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def summarize_internal
|
140
|
+
puts
|
141
|
+
puts "==> Summary (#{@completed.size} workers in %.4fs)" % (Time.now-@start_time)
|
142
|
+
puts
|
143
|
+
|
144
|
+
estatus = 0
|
145
|
+
misrun_suites = []
|
146
|
+
unassigned_suites = []
|
147
|
+
@failures = ''
|
148
|
+
@completed.each do |worker|
|
149
|
+
estatus += (worker.status.exitstatus || 1)
|
150
|
+
@stats.record_suites(worker.suites)
|
151
|
+
worker.suites.each do |suite|
|
152
|
+
assignment = @assignments.delete([suite.name, suite.path])
|
153
|
+
host = worker.host || Socket.gethostname
|
154
|
+
if assignment.nil?
|
155
|
+
unassigned_suites << [suite.name, suite.path]
|
156
|
+
elsif assignment != [host, worker.pid]
|
157
|
+
misrun_suites << [suite.name, suite.path] + assignment + [host, worker.pid]
|
158
|
+
end
|
159
|
+
@discovered_suites.delete([suite.name, suite.path])
|
160
|
+
end
|
161
|
+
|
162
|
+
summarize_worker(worker)
|
163
|
+
|
164
|
+
@failures << worker.failure_output if worker.failure_output
|
165
|
+
|
166
|
+
puts " [%2d] %60s %4d suites in %.4fs (%s %s)" % [
|
167
|
+
worker.num,
|
168
|
+
worker.summary,
|
169
|
+
worker.suites.size,
|
170
|
+
worker.end_time - worker.start_time,
|
171
|
+
worker.status.to_s,
|
172
|
+
worker.host && " on #{worker.host.split('.').first}"
|
173
|
+
]
|
174
|
+
end
|
175
|
+
|
176
|
+
unless @failures.empty?
|
177
|
+
puts
|
178
|
+
puts "==> Failures"
|
179
|
+
puts
|
180
|
+
puts @failures
|
181
|
+
end
|
182
|
+
|
183
|
+
if !relay?
|
184
|
+
unless @discovered_suites.empty?
|
185
|
+
estatus += 1
|
186
|
+
puts
|
187
|
+
puts "The following suites were discovered but were not run:"
|
188
|
+
puts
|
189
|
+
|
190
|
+
@discovered_suites.sort.each do |suite_name, path|
|
191
|
+
puts "#{suite_name} - #{path}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
unless unassigned_suites.empty?
|
195
|
+
estatus += 1
|
196
|
+
puts
|
197
|
+
puts "The following suites were not discovered but were run anyway:"
|
198
|
+
puts
|
199
|
+
unassigned_suites.sort.each do |suite_name, path|
|
200
|
+
puts "#{suite_name} - #{path}"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
unless misrun_suites.empty?
|
204
|
+
estatus += 1
|
205
|
+
puts
|
206
|
+
puts "The following suites were run on the wrong workers:"
|
207
|
+
puts
|
208
|
+
misrun_suites.each do |suite_name, path, target_host, target_pid, actual_host, actual_pid|
|
209
|
+
puts "#{suite_name} - #{path}: #{actual_host} (#{actual_pid}) - assigned to #{target_host} (#{target_pid})"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
puts
|
215
|
+
|
216
|
+
@stats.save
|
217
|
+
|
218
|
+
summarize
|
219
|
+
|
220
|
+
estatus = @completed.inject(0){ |s, worker| s + (worker.status.exitstatus || 1)}
|
221
|
+
[estatus, 255].min
|
222
|
+
end
|
223
|
+
|
224
|
+
def summarize
|
225
|
+
end
|
226
|
+
|
227
|
+
def stats_file
|
228
|
+
ENV['TEST_QUEUE_STATS'] ||
|
229
|
+
'.test_queue_stats'
|
230
|
+
end
|
231
|
+
|
232
|
+
def execute_internal
|
233
|
+
start_master
|
234
|
+
prepare(@concurrency)
|
235
|
+
@prepared_time = Time.now
|
236
|
+
start_relay if relay?
|
237
|
+
discover_suites
|
238
|
+
spawn_workers
|
239
|
+
distribute_queue
|
240
|
+
ensure
|
241
|
+
stop_master
|
242
|
+
|
243
|
+
kill_subprocesses
|
244
|
+
end
|
245
|
+
|
246
|
+
def start_master
|
247
|
+
if !relay?
|
248
|
+
if @socket =~ /^(?:(.+):)?(\d+)$/
|
249
|
+
address = $1 || '0.0.0.0'
|
250
|
+
port = $2.to_i
|
251
|
+
@socket = "#$1:#$2"
|
252
|
+
@server = TCPServer.new(address, port)
|
253
|
+
else
|
254
|
+
FileUtils.rm(@socket) if File.exist?(@socket)
|
255
|
+
@server = UNIXServer.new(@socket)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
desc = "test-queue master (#{relay?? "relaying to #{@relay}" : @socket})"
|
260
|
+
puts "Starting #{desc}"
|
261
|
+
$0 = "#{desc} - #{@procline}"
|
262
|
+
end
|
263
|
+
|
264
|
+
def start_relay
|
265
|
+
return unless relay?
|
266
|
+
|
267
|
+
sock = connect_to_relay
|
268
|
+
message = @remote_master_message ? " #{@remote_master_message}" : ""
|
269
|
+
message.gsub!(/(\r|\n)/, "") # Our "protocol" is newline-separated
|
270
|
+
sock.puts("TOKEN=#{@run_token}")
|
271
|
+
sock.puts("REMOTE MASTER #{@concurrency} #{Socket.gethostname} #{message}")
|
272
|
+
response = sock.gets.strip
|
273
|
+
unless response == "OK"
|
274
|
+
STDERR.puts "*** Got non-OK response from master: #{response}"
|
275
|
+
sock.close
|
276
|
+
exit! 1
|
277
|
+
end
|
278
|
+
sock.close
|
279
|
+
rescue Errno::ECONNREFUSED
|
280
|
+
STDERR.puts "*** Unable to connect to relay #{@relay}. Aborting.."
|
281
|
+
exit! 1
|
282
|
+
end
|
283
|
+
|
284
|
+
def stop_master
|
285
|
+
return if relay?
|
286
|
+
|
287
|
+
FileUtils.rm_f(@socket) if @socket && @server.is_a?(UNIXServer)
|
288
|
+
@server.close rescue nil if @server
|
289
|
+
@socket = @server = nil
|
290
|
+
end
|
291
|
+
|
292
|
+
def spawn_workers
|
293
|
+
@concurrency.times do |i|
|
294
|
+
num = i+1
|
295
|
+
|
296
|
+
pid = fork do
|
297
|
+
@server.close if @server
|
298
|
+
|
299
|
+
iterator = Iterator.new(@test_framework, relay?? @relay : @socket, method(:around_filter), early_failure_limit: @early_failure_limit, run_token: @run_token)
|
300
|
+
after_fork_internal(num, iterator)
|
301
|
+
ret = run_worker(iterator) || 0
|
302
|
+
cleanup_worker
|
303
|
+
Kernel.exit! ret
|
304
|
+
end
|
305
|
+
|
306
|
+
@workers[pid] = Worker.new(pid, num)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def discover_suites
|
311
|
+
# Remote masters don't discover suites; the central master does and
|
312
|
+
# distributes them to remote masters.
|
313
|
+
return if relay?
|
314
|
+
|
315
|
+
# No need to discover suites if all whitelisted suites are already
|
316
|
+
# queued.
|
317
|
+
return if @whitelist.any? && @awaited_suites.empty?
|
318
|
+
|
319
|
+
@discovering_suites_pid = fork do
|
320
|
+
terminate = false
|
321
|
+
Signal.trap("INT") { terminate = true }
|
322
|
+
|
323
|
+
$0 = "test-queue suite discovery process"
|
324
|
+
|
325
|
+
@test_framework.all_suite_files.each do |path|
|
326
|
+
@test_framework.suites_from_file(path).each do |suite_name, suite|
|
327
|
+
Kernel.exit!(0) if terminate
|
328
|
+
|
329
|
+
@server.connect_address.connect do |sock|
|
330
|
+
sock.puts("TOKEN=#{@run_token}")
|
331
|
+
sock.puts("NEW SUITE #{Marshal.dump([suite_name, path])}")
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
Kernel.exit! 0
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def awaiting_suites?
|
341
|
+
case
|
342
|
+
when @awaited_suites.any?
|
343
|
+
# We're waiting to find all the whitelisted suites so we can run them
|
344
|
+
# in the correct order.
|
345
|
+
true
|
346
|
+
when @queue.empty? && !!@discovering_suites_pid
|
347
|
+
# We don't have any suites yet, but we're working on it.
|
348
|
+
true
|
349
|
+
else
|
350
|
+
# It's fine to run any queued suites now.
|
351
|
+
false
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def enqueue_discovered_suite(suite_name, path)
|
356
|
+
if @whitelist.any? && !@whitelist.include?(suite_name)
|
357
|
+
return
|
358
|
+
end
|
359
|
+
|
360
|
+
@discovered_suites << [suite_name, path]
|
361
|
+
|
362
|
+
if @original_queue.include?([suite_name, path])
|
363
|
+
# This suite was already added to the queue some other way.
|
364
|
+
@awaited_suites.delete(suite_name)
|
365
|
+
return
|
366
|
+
end
|
367
|
+
|
368
|
+
# We don't know how long new suites will take to run, so we put them at
|
369
|
+
# the front of the queue. It's better to run a fast suite early than to
|
370
|
+
# run a slow suite late.
|
371
|
+
@queue.unshift [suite_name, path]
|
372
|
+
|
373
|
+
if @awaited_suites.delete?(suite_name) && @awaited_suites.empty?
|
374
|
+
# We've found all the whitelisted suites. Sort the queue to match the
|
375
|
+
# whitelist.
|
376
|
+
@queue.sort_by! { |suite_name, path| @whitelist.index(suite_name) }
|
377
|
+
|
378
|
+
kill_suite_discovery_process("INT")
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def after_fork_internal(num, iterator)
|
383
|
+
srand
|
384
|
+
|
385
|
+
output = File.open("/tmp/test_queue_worker_#{$$}_output", 'w')
|
386
|
+
|
387
|
+
$stdout.reopen(output)
|
388
|
+
$stderr.reopen($stdout)
|
389
|
+
$stdout.sync = $stderr.sync = true
|
390
|
+
|
391
|
+
$0 = "test-queue worker [#{num}]"
|
392
|
+
puts
|
393
|
+
puts "==> Starting #$0 (#{Process.pid} on #{Socket.gethostname}) - iterating over #{iterator.sock}"
|
394
|
+
puts
|
395
|
+
|
396
|
+
after_fork(num)
|
397
|
+
end
|
398
|
+
|
399
|
+
# Run in the master before the fork. Used to create
|
400
|
+
# concurrency copies of any databases required by the
|
401
|
+
# test workers.
|
402
|
+
def prepare(concurrency)
|
403
|
+
end
|
404
|
+
|
405
|
+
def around_filter(suite)
|
406
|
+
yield
|
407
|
+
end
|
408
|
+
|
409
|
+
# Prepare a worker for executing jobs after a fork.
|
410
|
+
def after_fork(num)
|
411
|
+
end
|
412
|
+
|
413
|
+
# Entry point for internal runner implementations. The iterator will yield
|
414
|
+
# jobs from the shared queue on the master.
|
415
|
+
#
|
416
|
+
# Returns an Integer number of failures.
|
417
|
+
def run_worker(iterator)
|
418
|
+
iterator.each do |item|
|
419
|
+
puts " #{item.inspect}"
|
420
|
+
end
|
421
|
+
|
422
|
+
return 0 # exit status
|
423
|
+
end
|
424
|
+
|
425
|
+
def cleanup_worker
|
426
|
+
end
|
427
|
+
|
428
|
+
def summarize_worker(worker)
|
429
|
+
worker.summary = ''
|
430
|
+
worker.failure_output = ''
|
431
|
+
end
|
432
|
+
|
433
|
+
def reap_workers(blocking=true)
|
434
|
+
@workers.delete_if do |_, worker|
|
435
|
+
if Process.waitpid(worker.pid, blocking ? 0 : Process::WNOHANG).nil?
|
436
|
+
next false
|
437
|
+
end
|
438
|
+
|
439
|
+
worker.status = $?
|
440
|
+
worker.end_time = Time.now
|
441
|
+
|
442
|
+
collect_worker_data(worker)
|
443
|
+
relay_to_master(worker) if relay?
|
444
|
+
worker_completed(worker)
|
445
|
+
|
446
|
+
true
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
def collect_worker_data(worker)
|
451
|
+
if File.exist?(file = "/tmp/test_queue_worker_#{worker.pid}_output")
|
452
|
+
worker.output = IO.binread(file)
|
453
|
+
FileUtils.rm(file)
|
454
|
+
end
|
455
|
+
|
456
|
+
if File.exist?(file = "/tmp/test_queue_worker_#{worker.pid}_suites")
|
457
|
+
worker.suites.replace(Marshal.load(IO.binread(file)))
|
458
|
+
FileUtils.rm(file)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def worker_completed(worker)
|
463
|
+
return if @aborting
|
464
|
+
@completed << worker
|
465
|
+
puts worker.output if ENV['TEST_QUEUE_VERBOSE'] || worker.status.exitstatus != 0
|
466
|
+
end
|
467
|
+
|
468
|
+
def distribute_queue
|
469
|
+
return if relay?
|
470
|
+
remote_workers = 0
|
471
|
+
|
472
|
+
until !awaiting_suites? && @queue.empty? && remote_workers == 0
|
473
|
+
queue_status(@start_time, @queue.size, @workers.size, remote_workers)
|
474
|
+
|
475
|
+
if status = reap_suite_discovery_process(false)
|
476
|
+
abort("Discovering suites failed.") unless status.success?
|
477
|
+
abort("Failed to discover #{@awaited_suites.sort.join(", ")} specified in TEST_QUEUE_FORCE") if @awaited_suites.any?
|
478
|
+
end
|
479
|
+
|
480
|
+
if IO.select([@server], nil, nil, 0.1).nil?
|
481
|
+
reap_workers(false) # check for worker deaths
|
482
|
+
else
|
483
|
+
sock = @server.accept
|
484
|
+
token = sock.gets.strip
|
485
|
+
cmd = sock.gets.strip
|
486
|
+
|
487
|
+
token = token[TOKEN_REGEX, 1]
|
488
|
+
# If we have a remote master from a different test run, respond with "WRONG RUN", and it will consider the test run done.
|
489
|
+
if token != @run_token
|
490
|
+
message = token.nil? ? "Worker sent no token to master" : "Worker from run #{token} connected to master"
|
491
|
+
STDERR.puts "*** #{message} for run #{@run_token}; ignoring."
|
492
|
+
sock.write("WRONG RUN\n")
|
493
|
+
next
|
494
|
+
end
|
495
|
+
|
496
|
+
case cmd
|
497
|
+
when /^POP (\S+) (\d+)/
|
498
|
+
hostname = $1
|
499
|
+
pid = Integer($2)
|
500
|
+
if awaiting_suites?
|
501
|
+
sock.write(Marshal.dump("WAIT"))
|
502
|
+
elsif obj = @queue.shift
|
503
|
+
data = Marshal.dump(obj)
|
504
|
+
sock.write(data)
|
505
|
+
@assignments[obj] = [hostname, pid]
|
506
|
+
end
|
507
|
+
when /^REMOTE MASTER (\d+) ([\w\.-]+)(?: (.+))?/
|
508
|
+
num = $1.to_i
|
509
|
+
remote_master = $2
|
510
|
+
remote_master_message = $3
|
511
|
+
|
512
|
+
sock.write("OK\n")
|
513
|
+
remote_workers += num
|
514
|
+
|
515
|
+
message = "*** #{num} workers connected from #{remote_master} after #{Time.now-@start_time}s"
|
516
|
+
message << " " + remote_master_message if remote_master_message
|
517
|
+
STDERR.puts message
|
518
|
+
when /^WORKER (\d+)/
|
519
|
+
data = sock.read($1.to_i)
|
520
|
+
worker = Marshal.load(data)
|
521
|
+
worker_completed(worker)
|
522
|
+
remote_workers -= 1
|
523
|
+
when /^NEW SUITE (.+)/
|
524
|
+
suite_name, path = Marshal.load($1)
|
525
|
+
enqueue_discovered_suite(suite_name, path)
|
526
|
+
when /^KABOOM/
|
527
|
+
# worker reporting an abnormal number of test failures;
|
528
|
+
# stop everything immediately and report the results.
|
529
|
+
break
|
530
|
+
else
|
531
|
+
STDERR.puts("Ignoring unrecognized command: \"#{cmd}\"")
|
532
|
+
end
|
533
|
+
sock.close
|
534
|
+
end
|
535
|
+
end
|
536
|
+
ensure
|
537
|
+
stop_master
|
538
|
+
reap_workers
|
539
|
+
end
|
540
|
+
|
541
|
+
def relay?
|
542
|
+
!!@relay
|
543
|
+
end
|
544
|
+
|
545
|
+
def connect_to_relay
|
546
|
+
sock = nil
|
547
|
+
start = Time.now
|
548
|
+
puts "Attempting to connect for #{@relay_connection_timeout}s..."
|
549
|
+
while sock.nil?
|
550
|
+
begin
|
551
|
+
sock = TCPSocket.new(*@relay.split(':'))
|
552
|
+
rescue Errno::ECONNREFUSED => e
|
553
|
+
raise e if Time.now - start > @relay_connection_timeout
|
554
|
+
puts "Master not yet available, sleeping..."
|
555
|
+
sleep 0.5
|
556
|
+
end
|
557
|
+
end
|
558
|
+
sock
|
559
|
+
end
|
560
|
+
|
561
|
+
def relay_to_master(worker)
|
562
|
+
worker.host = Socket.gethostname
|
563
|
+
data = Marshal.dump(worker)
|
564
|
+
|
565
|
+
sock = connect_to_relay
|
566
|
+
sock.puts("TOKEN=#{@run_token}")
|
567
|
+
sock.puts("WORKER #{data.bytesize}")
|
568
|
+
sock.write(data)
|
569
|
+
ensure
|
570
|
+
sock.close if sock
|
571
|
+
end
|
572
|
+
|
573
|
+
def kill_subprocesses
|
574
|
+
kill_workers
|
575
|
+
kill_suite_discovery_process
|
576
|
+
end
|
577
|
+
|
578
|
+
def kill_workers
|
579
|
+
@workers.each do |pid, worker|
|
580
|
+
Process.kill 'KILL', pid
|
581
|
+
end
|
582
|
+
|
583
|
+
reap_workers
|
584
|
+
end
|
585
|
+
|
586
|
+
def kill_suite_discovery_process(signal="KILL")
|
587
|
+
return unless @discovering_suites_pid
|
588
|
+
Process.kill signal, @discovering_suites_pid
|
589
|
+
reap_suite_discovery_process
|
590
|
+
end
|
591
|
+
|
592
|
+
def reap_suite_discovery_process(blocking=true)
|
593
|
+
return unless @discovering_suites_pid
|
594
|
+
_, status = Process.waitpid2(@discovering_suites_pid, blocking ? 0 : Process::WNOHANG)
|
595
|
+
return unless status
|
596
|
+
|
597
|
+
@discovering_suites_pid = nil
|
598
|
+
status
|
599
|
+
end
|
600
|
+
|
601
|
+
# Stop the test run immediately.
|
602
|
+
#
|
603
|
+
# message - String message to print to the console when exiting.
|
604
|
+
#
|
605
|
+
# Doesn't return.
|
606
|
+
def abort(message)
|
607
|
+
@aborting = true
|
608
|
+
kill_subprocesses
|
609
|
+
Kernel::abort("Aborting: #{message}")
|
610
|
+
end
|
611
|
+
|
612
|
+
# Subclasses can override to monitor the status of the queue.
|
613
|
+
#
|
614
|
+
# For example, you may want to record metrics about how quickly remote
|
615
|
+
# workers connect, or abort the build if not enough connect.
|
616
|
+
#
|
617
|
+
# This method is called very frequently during the test run, so don't do
|
618
|
+
# anything expensive/blocking.
|
619
|
+
#
|
620
|
+
# This method is not called on remote masters when using remote workers,
|
621
|
+
# only on the central master.
|
622
|
+
#
|
623
|
+
# start_time - Time when the test run began
|
624
|
+
# queue_size - Integer number of suites left in the queue
|
625
|
+
# local_worker_count - Integer number of active local workers
|
626
|
+
# remote_worker_count - Integer number of active remote workers
|
627
|
+
#
|
628
|
+
# Returns nothing.
|
629
|
+
def queue_status(start_time, queue_size, local_worker_count, remote_worker_count)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module TestQueue
|
2
|
+
class Stats
|
3
|
+
class Suite
|
4
|
+
attr_reader :name, :path, :duration, :last_seen_at
|
5
|
+
|
6
|
+
def initialize(name, path, duration, last_seen_at)
|
7
|
+
@name = name
|
8
|
+
@path = path
|
9
|
+
@duration = duration
|
10
|
+
@last_seen_at = last_seen_at
|
11
|
+
|
12
|
+
freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
def ==(other)
|
16
|
+
other &&
|
17
|
+
name == other.name &&
|
18
|
+
path == other.path &&
|
19
|
+
duration == other.duration &&
|
20
|
+
last_seen_at == other.last_seen_at
|
21
|
+
end
|
22
|
+
alias_method :eql?, :==
|
23
|
+
|
24
|
+
def to_h
|
25
|
+
{ :name => name, :path => path, :duration => duration, :last_seen_at => last_seen_at.to_i }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.from_hash(hash)
|
29
|
+
self.new(hash.fetch(:name),
|
30
|
+
hash.fetch(:path),
|
31
|
+
hash.fetch(:duration),
|
32
|
+
Time.at(hash.fetch(:last_seen_at)))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(path)
|
37
|
+
@path = path
|
38
|
+
@suites = {}
|
39
|
+
load
|
40
|
+
end
|
41
|
+
|
42
|
+
def all_suites
|
43
|
+
@suites.values
|
44
|
+
end
|
45
|
+
|
46
|
+
def suite(name)
|
47
|
+
@suites[name]
|
48
|
+
end
|
49
|
+
|
50
|
+
def record_suites(suites)
|
51
|
+
suites.each do |suite|
|
52
|
+
@suites[suite.name] = suite
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def save
|
57
|
+
prune
|
58
|
+
|
59
|
+
File.open(@path, "wb") do |f|
|
60
|
+
Marshal.dump(to_h, f)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
CURRENT_VERSION = 2
|
67
|
+
|
68
|
+
def to_h
|
69
|
+
suites = @suites.each_value.map(&:to_h)
|
70
|
+
|
71
|
+
{ :version => CURRENT_VERSION, :suites => suites }
|
72
|
+
end
|
73
|
+
|
74
|
+
def load
|
75
|
+
data = begin
|
76
|
+
File.open(@path, "rb") { |f| Marshal.load(f) }
|
77
|
+
rescue Errno::ENOENT, EOFError, TypeError, ArgumentError
|
78
|
+
end
|
79
|
+
return unless data && data.is_a?(Hash) && data[:version] == CURRENT_VERSION
|
80
|
+
data[:suites].each do |suite_hash|
|
81
|
+
suite = Suite.from_hash(suite_hash)
|
82
|
+
@suites[suite.name] = suite
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
EIGHT_DAYS_S = 8 * 24 * 60 * 60
|
87
|
+
|
88
|
+
def prune
|
89
|
+
earliest = Time.now - EIGHT_DAYS_S
|
90
|
+
@suites.delete_if do |name, suite|
|
91
|
+
suite.last_seen_at < earliest
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|