hive-runner 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,400 @@
1
+ require 'yaml'
2
+
3
+ require 'hive'
4
+ require 'hive/file_system'
5
+ require 'hive/execution_script'
6
+ require 'hive/diagnostic_runner'
7
+ require 'hive/messages'
8
+ require 'hive/port_allocator'
9
+ require 'code_cache'
10
+ require 'res'
11
+
12
+ module Hive
13
+ # The generic worker class
14
+ class Worker
15
+ class InvalidJobReservationError < StandardError
16
+ end
17
+
18
+ class DeviceNotReady < StandardError
19
+ end
20
+
21
+ class NoPortsAvailable < StandardError
22
+ end
23
+
24
+ # Device API Object for device associated with this worker
25
+ attr_accessor :device_api, :queues
26
+
27
+ # The main worker process loop
28
+ def initialize(options)
29
+ @options = options
30
+ @parent_pid = @options['parent_pid']
31
+ @device_id = @options['id']
32
+ @device_identity = @options['device_identity'] || 'unknown-device'
33
+ pid = Process.pid
34
+ $PROGRAM_NAME = "#{@options['name_stub'] || 'WORKER'}.#{pid}"
35
+ @log = Hive::Log.new
36
+ @log.add_logger(
37
+ "#{LOG_DIRECTORY}/#{pid}.#{@device_identity}.log",
38
+ Hive.config.logging.worker_level || 'INFO'
39
+ )
40
+ @devicedb_register = true if @devicedb_register.nil?
41
+
42
+ @queues = @options['queues'].class == Array ? @options['queues'] : []
43
+ self.update_queue_log
44
+
45
+ @port_allocator = (@options.has_key?('port_allocator') ? @options['port_allocator'] : Hive::PortAllocator.new(ports: []))
46
+
47
+ platform = self.class.to_s.scan(/[^:][^:]*/)[2].downcase
48
+ @diagnostic_runner = Hive::DiagnosticRunner.new(@options, Hive.config.diagnostics, platform) if !Hive.config["diagnostics"].nil?
49
+
50
+ Hive::Messages.configure do |config|
51
+ config.base_path = Hive.config.network.scheduler
52
+ config.pem_file = Hive.config.network.cert
53
+ config.ssl_verify_mode = OpenSSL::SSL::VERIFY_NONE
54
+ end
55
+
56
+ Signal.trap('TERM') do
57
+ @log.info("Worker terminated")
58
+ exit
59
+ end
60
+
61
+ @log.info('Starting worker')
62
+ while keep_running?
63
+ begin
64
+ diagnostics
65
+ update_queues
66
+ poll_queue
67
+ rescue DeviceNotReady => e
68
+ @log.info("#{e.message}\n");
69
+ rescue StandardError => e
70
+ @log.warn("Worker loop aborted: #{e.message}\n : #{e.backtrace.join("\n : ")}")
71
+ end
72
+ sleep Hive.config.timings.worker_loop_interval
73
+ end
74
+ @log.info('Exiting worker')
75
+ end
76
+
77
+ # Check the queues for work
78
+ def poll_queue
79
+ @job = reserve_job
80
+ if @job.nil?
81
+ @log.info('No job found')
82
+ else
83
+ @log.info('Job starting')
84
+ begin
85
+ execute_job
86
+ rescue => e
87
+ @log.info("Error running test: #{e.message}\n : #{e.backtrace.join("\n :")}")
88
+ end
89
+ cleanup
90
+ end
91
+ end
92
+
93
+ # Try to find and reserve a job
94
+ def reserve_job
95
+ @log.info "Trying to reserve job for queues: #{@queues.join(', ')}"
96
+ job = job_message_klass.reserve(@queues, reservation_details)
97
+ @log.debug "Job: #{job.inspect}"
98
+ raise InvalidJobReservationError.new("Invalid Job Reserved") if ! (job.nil? || job.valid?)
99
+ job
100
+ end
101
+
102
+ # Get the correct job class
103
+ # This should usually be replaced in the child class
104
+ def job_message_klass
105
+ @log.info 'Generic job class'
106
+ Hive::Messages::Job
107
+ end
108
+
109
+ def reservation_details
110
+ { hive_id: Hive.id, worker_pid: Process.pid }
111
+ end
112
+
113
+ # Execute a job
114
+ def execute_job
115
+ # Ensure that a killed worker cleans up correctly
116
+ Signal.trap('TERM') do |s|
117
+ Signal.trap('TERM') {} # Prevent retry signals
118
+ @log.info "Caught TERM signal"
119
+ @log.info "Terminating script, if running"
120
+ @script.terminate if @script
121
+ @log.info "Post-execution cleanup"
122
+ signal_safe_post_script(@job, @file_system, @script)
123
+
124
+ # Upload results
125
+ @file_system.finalise_results_directory
126
+ upload_files(@job, @file_system.results_path, @file_system.logs_path)
127
+ File.open("#{@file_system.home_path}/job_info", 'w') do |f|
128
+ f.puts "#{Process.pid} completed"
129
+ end
130
+ @job.error('Worker killed')
131
+ @log.info "Worker terminated"
132
+ exit
133
+ end
134
+
135
+ @log.info('Job starting')
136
+ @job.prepare(@device_id)
137
+
138
+ exception = nil
139
+ begin
140
+ @log.info "Setting job paths"
141
+ @file_system = Hive::FileSystem.new(@job.job_id, Hive.config.logging.home, @log)
142
+ File.open("#{@file_system.home_path}/job_info", 'w') do |f|
143
+ f.puts "#{Process.pid} running"
144
+ end
145
+
146
+ if ! @job.repository.to_s.empty?
147
+ @log.info "Checking out the repository"
148
+ @log.debug " #{@job.repository}"
149
+ @log.debug " #{@file_system.testbed_path}"
150
+ checkout_code(@job.repository, @file_system.testbed_path)
151
+ end
152
+
153
+ @log.info "Initialising execution script"
154
+ @script = Hive::ExecutionScript.new(
155
+ file_system: @file_system,
156
+ log: @log,
157
+ keep_running: ->() { self.keep_running? }
158
+ )
159
+ @script.append_bash_cmd "mkdir -p #{@file_system.testbed_path}/#{@job.execution_directory}"
160
+ @script.append_bash_cmd "cd #{@file_system.testbed_path}/#{@job.execution_directory}"
161
+
162
+ @log.info "Setting the execution variables in the environment"
163
+ @script.set_env 'HIVE_RESULTS', @file_system.results_path
164
+ @job.execution_variables.to_h.each_pair do |var, val|
165
+ @script.set_env "HIVE_#{var.to_s}".upcase, val if ! val.kind_of?(Array)
166
+ end
167
+ if @job.execution_variables.retry_urns && !@job.execution_variables.retry_urns.empty?
168
+ @script.set_env "RETRY_URNS", @job.execution_variables.retry_urns
169
+ end
170
+ if @job.execution_variables.tests && @job.execution_variables.tests != [""]
171
+ @script.set_env "TEST_NAMES", @job.execution_variables.tests
172
+ end
173
+
174
+
175
+ @log.info "Appending test script to execution script"
176
+ @script.append_bash_cmd @job.command
177
+
178
+ @job.start
179
+
180
+ @log.info "Pre-execution setup"
181
+ pre_script(@job, @file_system, @script)
182
+
183
+ @log.info "Running execution script"
184
+ exit_value = @script.run
185
+ @job.end(exit_value)
186
+ rescue => e
187
+ exception = e
188
+ end
189
+
190
+ begin
191
+ @log.info "Post-execution cleanup"
192
+ post_script(@job, @file_system, @script)
193
+
194
+ # Upload results
195
+ @file_system.finalise_results_directory
196
+ upload_files(@job, @file_system.results_path, @file_system.logs_path)
197
+ upload_results(@job, @file_system.testbed_path, @file_system.results_path)
198
+ rescue => e
199
+ @log.error( "Post execution failed: " + e.message)
200
+ @log.error(" : #{e.backtrace.join("\n : ")}")
201
+ end
202
+
203
+ if exception
204
+ @job.error( exception.message )
205
+ raise exception
206
+ else
207
+ @job.complete
208
+ end
209
+
210
+ Signal.trap('TERM') do
211
+ @log.info("Worker terminated")
212
+ exit
213
+ end
214
+
215
+ File.open("#{@file_system.home_path}/job_info", 'w') do |f|
216
+ f.puts "#{Process.pid} completed"
217
+ end
218
+ exit_value == 0
219
+ end
220
+
221
+ # Diagnostics function to be extended in child class, as required
222
+ def diagnostics
223
+ @diagnostic_runner.run if !@diagnostic_runner.nil?
224
+ status = device_status
225
+ status = set_device_status('idle') if status == 'busy'
226
+ raise DeviceNotReady.new("Current device status: '#{status}'") if status != 'idle'
227
+ end
228
+
229
+ # Current state of the device
230
+ # This method should be replaced in child classes, as appropriate
231
+ def device_status
232
+ @device_status ||= 'idle'
233
+ end
234
+
235
+ # Set the status of a device
236
+ # This method should be replaced in child classes, as appropriate
237
+ def set_device_status(status)
238
+ @device_status = status
239
+ end
240
+
241
+ def update_queues
242
+ if @devicedb_register
243
+ details = Hive.devicedb('Device').find(@options['id'])
244
+ @log.debug("Device details: #{details.inspect}")
245
+
246
+ if details['device_queues']
247
+ new_queues = details['device_queues'].collect do |queue_details|
248
+ queue_details['name']
249
+ end
250
+ if @queues.sort != new_queues.sort
251
+ @log.info("Updated queue list: #{new_queues.join(', ')}")
252
+ @queues = new_queues
253
+ end
254
+ update_queue_log
255
+ else
256
+ @log.warn("Queue list missing from DeviceDB response")
257
+ end
258
+ end
259
+ end
260
+
261
+ def update_queue_log
262
+ File.open("#{LOG_DIRECTORY}/#{Process.pid}.queues.yml",'w') { |f| f.write @queues.to_yaml}
263
+ end
264
+
265
+ # Upload any files from the test
266
+ def upload_files(job, *paths)
267
+ @log.info("Uploading assets")
268
+ paths.each do |path|
269
+ @log.info("Uploading files from #{path}")
270
+ Dir.foreach(path) do |item|
271
+ @log.info("File: #{item}")
272
+ next if item == '.' or item == '..'
273
+ begin
274
+ artifact = job.report_artifact("#{path}/#{item}")
275
+ @log.info("Artifact uploaded: #{artifact.attributes.to_s}")
276
+ rescue => e
277
+ @log.error("Error uploading artifact #{item}: #{e.message}")
278
+ @log.error(" : #{e.backtrace.join("\n : ")}")
279
+ end
280
+ end
281
+ end
282
+ end
283
+
284
+ # Update results
285
+ def upload_results(job, checkout, results_dir)
286
+
287
+ res_file = detect_res_file(results_dir) || process_xunit_results(results_dir)
288
+
289
+ if res_file
290
+ @log.info("Res file found")
291
+
292
+ begin
293
+ Res.submit_results(
294
+ reporter: :hive,
295
+ ir: res_file,
296
+ job_id: job.job_id
297
+ )
298
+ rescue => e
299
+ @log.warn("Res Hive upload failed #{e.message}")
300
+ end
301
+
302
+ begin
303
+ if conf_file = testmine_config(checkout)
304
+ Res.submit_results(
305
+ reporter: :testmine,
306
+ ir: res_file,
307
+ config_file: conf_file,
308
+ hive_job_id: job.job_id,
309
+ version: job.execution_variables.version,
310
+ target: job.execution_variables.queue_name
311
+ )
312
+ end
313
+ rescue => e
314
+ @log.warn("Res Testmine upload failed #{e.message}")
315
+ end
316
+
317
+ # TODO Add in Testrail upload
318
+
319
+ end
320
+
321
+ end
322
+
323
+ def detect_res_file(results_dir)
324
+ Dir.glob( "#{results_dir}/*.res" ).first
325
+ end
326
+
327
+ def process_xunit_results(results_dir)
328
+ if !Dir.glob("#{results_dir}/*.xml").empty?
329
+ xunit_output = Res.parse_results(parser: :junit,:file => Dir.glob( "#{results_dir}/*.xml" ).first)
330
+ res_output = File.open(xunit_output.io, "rb")
331
+ contents = res_output.read
332
+ res_output.close
333
+ res = File.open("#{results_dir}/xunit.res", "w+")
334
+ res.puts contents
335
+ res.close
336
+ res
337
+ end
338
+ end
339
+
340
+ def testmine_config(checkout)
341
+ Dir.glob( "#{checkout}/.testmi{n,t}e.yml" ).first
342
+ end
343
+
344
+ # Get a checkout of the repository
345
+ def checkout_code(repository, checkout_directory)
346
+ CodeCache.repo(repository).checkout(:head, checkout_directory) or raise "Unable to checkout repository #{repository}"
347
+ end
348
+
349
+ # Determine whether to keep the worker running
350
+ # This just checks the presense of the parent process
351
+ def keep_running?
352
+ begin
353
+ Process.getpgid(@parent_pid)
354
+ true
355
+ rescue
356
+ false
357
+ end
358
+ end
359
+
360
+ # Any setup required before the execution script
361
+ def pre_script(job, file_system, script)
362
+ end
363
+
364
+ # Any device specific steps immediately after the execution script
365
+ def post_script(job, file_system, script)
366
+ signal_safe_post_script(job, file_system, script)
367
+ end
368
+
369
+ # Any device specific steps immediately after the execution script
370
+ # that can be safely run in the a Signal.trap
371
+ # This should be called by post_script
372
+ def signal_safe_post_script(job, file_system, script)
373
+ end
374
+
375
+ # Do whatever device cleanup is required
376
+ def cleanup
377
+ end
378
+
379
+ # Allocate a port
380
+ def allocate_port
381
+ @log.warn("Using deprecated 'Hive::Worker.allocate_port' method")
382
+ @log.warn("Use @port_allocator.allocate_port instead")
383
+ @port_allocator.allocate_port
384
+ end
385
+
386
+ # Release a port
387
+ def release_port(p)
388
+ @log.warn("Using deprecated 'Hive::Worker.release_port' method")
389
+ @log.warn("Use @port_allocator.release_port instead")
390
+ @port_allocator.release_port(p)
391
+ end
392
+
393
+ # Release all ports
394
+ def release_all_ports
395
+ @log.warn("Using deprecated 'Hive::Worker.release_all_ports' method")
396
+ @log.warn("Use @port_allocator.release_all_ports instead")
397
+ @port_allocator.release_all_ports
398
+ end
399
+ end
400
+ end
data/lib/hive.rb ADDED
@@ -0,0 +1,118 @@
1
+ require 'chamber'
2
+ require 'hive/log'
3
+ require 'hive/register'
4
+ require 'devicedb_comms'
5
+ require 'macaddr'
6
+ require 'socket'
7
+
8
+ # The Hive automated testing framework
9
+ module Hive
10
+ Chamber.load(
11
+ basepath: ENV['HIVE_CONFIG'] || './config/',
12
+ namespaces: {
13
+ environment: ENV['HIVE_ENVIRONMENT'] || 'test'
14
+ }
15
+ )
16
+ DAEMON_NAME = Chamber.env.daemon_name? ? Chamber.env.daemon_name : 'HIVE'
17
+
18
+ if Chamber.env.logging?
19
+ if Chamber.env.logging.directory?
20
+ LOG_DIRECTORY = Chamber.env.logging.directory
21
+ else
22
+ fail 'Missing log directory'
23
+ end
24
+ if Chamber.env.logging.pids?
25
+ PIDS_DIRECTORY = Chamber.env.logging.pids
26
+ else
27
+ PIDS_DIRECTORY = LOG_DIRECTORY
28
+ end
29
+ else
30
+ fail 'Missing logging section in configuration file'
31
+ end
32
+
33
+ DeviceDBComms.configure do |config|
34
+ config.url = Chamber.env.network.devicedb
35
+ config.pem_file = Chamber.env.network.cert
36
+ config.ssl_verify_mode = OpenSSL::SSL::VERIFY_NONE
37
+ end
38
+
39
+ def self.config
40
+ Chamber.env
41
+ end
42
+
43
+ def self.logger
44
+ if ! @logger
45
+ @logger = Hive::Log.new
46
+
47
+ if Hive.config.logging.main_filename?
48
+ @logger.add_logger("#{LOG_DIRECTORY}/#{Hive.config.logging.main_filename}", Chamber.env.logging.main_level? ? Chamber.env.logging.main_level : 'INFO')
49
+ end
50
+ if Hive.config.logging.console_level?
51
+ @logger.add_logger(STDOUT, Hive.config.logging.console_level)
52
+ end
53
+ end
54
+ @logger
55
+ end
56
+
57
+ def self.devicedb(section)
58
+ @devicedb = {} if ! @devicedb.kind_of?(Hash)
59
+ @devicedb[section] ||= Object.const_get('DeviceDBComms').const_get(section).new()
60
+ end
61
+
62
+ def self.register
63
+ @register ||= Hive::Register.new
64
+ end
65
+
66
+ # Get the id of the hive from the device database
67
+ def self.id
68
+ if ! @devicedb_id
69
+ Hive.logger.info "Attempting to register the hive as #{Hive.hostname}"
70
+ register_response = self.devicedb('Hive').register(Hive.hostname, Hive.mac_address, Hive.ip_address)
71
+ if register_response['error'].present?
72
+ Hive.logger.warn 'Hive failed to register'
73
+ Hive.logger.warn " - #{register_response['error']}"
74
+ else
75
+ Hive.logger.info "Hive registered with id #{register_response['id']}"
76
+ @devicedb_id = register_response['id']
77
+ end
78
+ end
79
+ @devicedb_id || -1
80
+ end
81
+
82
+ # Poll the device database
83
+ def self.poll
84
+ id = self.id
85
+ if id and id > 0
86
+ Hive.logger.debug "Polling hive: #{id}"
87
+ rtn = Hive.devicedb('Hive').poll(id)
88
+ Hive.logger.debug "Return data: #{rtn}"
89
+ if rtn['error'].present?
90
+ Hive.logger.warn "Hive polling failed: #{rtn['error']}"
91
+ else
92
+ Hive.logger.info "Successfully polled hive"
93
+ end
94
+ else
95
+ if id
96
+ Hive.logger.debug "Skipping polling of hive"
97
+ else
98
+ Hive.logger.warn "Unable to poll hive"
99
+ end
100
+ end
101
+ end
102
+
103
+ # Get the IP address of the Hive
104
+ def self.ip_address
105
+ ip = Socket.ip_address_list.detect { |intf| intf.ipv4_private? }
106
+ ip.ip_address
107
+ end
108
+
109
+ # Get the MAC address of the Hive
110
+ def self.mac_address
111
+ Mac.addr
112
+ end
113
+
114
+ # Get the hostname of the Hive
115
+ def self.hostname
116
+ Socket.gethostname.split('.').first
117
+ end
118
+ end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Concatinate $RETRY_URNS environment variable array into string
4
+ function retry_args {
5
+ echo -n ${RETRY_URNS[*]}
6
+ }
7
+
8
+ # Concatinate $TEST_NAMES into cucumber regexp args
9
+ function cucumber_testname_args {
10
+ for TEST_NAME in "${TEST_NAMES[@]}"
11
+ do
12
+ printf -- '-n'
13
+ TEST_NAME=${TEST_NAME// /\\s}
14
+ printf "^$TEST_NAME$"
15
+ printf " "
16
+ done
17
+ }