atex 0.9__py3-none-any.whl → 0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atex/aggregator/__init__.py +62 -0
- atex/aggregator/json.py +279 -0
- atex/cli/__init__.py +14 -1
- atex/cli/fmf.py +7 -7
- atex/cli/libvirt.py +3 -2
- atex/cli/testingfarm.py +74 -3
- atex/connection/podman.py +2 -4
- atex/connection/ssh.py +7 -14
- atex/executor/executor.py +21 -20
- atex/executor/scripts.py +5 -3
- atex/executor/testcontrol.py +1 -1
- atex/orchestrator/__init__.py +76 -3
- atex/orchestrator/{orchestrator.py → adhoc.py} +246 -108
- atex/orchestrator/contest.py +94 -0
- atex/{provision → provisioner}/__init__.py +48 -52
- atex/{provision → provisioner}/libvirt/libvirt.py +34 -15
- atex/{provision → provisioner}/libvirt/locking.py +3 -1
- atex/provisioner/podman/__init__.py +2 -0
- atex/provisioner/podman/podman.py +169 -0
- atex/{provision → provisioner}/testingfarm/api.py +56 -48
- atex/{provision → provisioner}/testingfarm/testingfarm.py +43 -45
- atex/util/log.py +62 -67
- atex/util/subprocess.py +46 -12
- atex/util/threads.py +7 -0
- atex-0.11.dist-info/METADATA +86 -0
- atex-0.11.dist-info/RECORD +45 -0
- {atex-0.9.dist-info → atex-0.11.dist-info}/WHEEL +1 -1
- atex/orchestrator/aggregator.py +0 -111
- atex/provision/podman/__init__.py +0 -1
- atex/provision/podman/podman.py +0 -274
- atex-0.9.dist-info/METADATA +0 -178
- atex-0.9.dist-info/RECORD +0 -43
- /atex/{provision → provisioner}/libvirt/VM_PROVISION +0 -0
- /atex/{provision → provisioner}/libvirt/__init__.py +0 -0
- /atex/{provision → provisioner}/libvirt/setup-libvirt.sh +0 -0
- /atex/{provision → provisioner}/testingfarm/__init__.py +0 -0
- {atex-0.9.dist-info → atex-0.11.dist-info}/entry_points.txt +0 -0
- {atex-0.9.dist-info → atex-0.11.dist-info}/licenses/COPYING.txt +0 -0
|
@@ -1,24 +1,19 @@
|
|
|
1
|
-
import time
|
|
2
1
|
import tempfile
|
|
3
|
-
import traceback
|
|
4
|
-
import concurrent
|
|
5
2
|
import collections
|
|
3
|
+
import concurrent.futures
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
|
|
8
6
|
from .. import util, executor
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class OrchestratorError(Exception):
|
|
12
|
-
pass
|
|
7
|
+
from . import Orchestrator, OrchestratorError
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
class FailedSetupError(OrchestratorError):
|
|
16
11
|
pass
|
|
17
12
|
|
|
18
13
|
|
|
19
|
-
class Orchestrator:
|
|
14
|
+
class AdHocOrchestrator(Orchestrator):
|
|
20
15
|
"""
|
|
21
|
-
|
|
16
|
+
TODO: document function specific to this reference, ie. run_setup(), etc.
|
|
22
17
|
"""
|
|
23
18
|
|
|
24
19
|
class SetupInfo(
|
|
@@ -55,13 +50,17 @@ class Orchestrator:
|
|
|
55
50
|
# exception class instance if running the test failed
|
|
56
51
|
# (None if no exception happened (exit_code is defined))
|
|
57
52
|
"exception",
|
|
53
|
+
# Path of a 'results' JSON file with test-reported results
|
|
54
|
+
"results",
|
|
55
|
+
# Path of a 'files' directory with test-uploaded files
|
|
56
|
+
"files",
|
|
58
57
|
),
|
|
59
58
|
):
|
|
60
59
|
pass
|
|
61
60
|
|
|
62
61
|
def __init__(
|
|
63
62
|
self, platform, fmf_tests, provisioners, aggregator, tmp_dir, *,
|
|
64
|
-
max_reruns=2, max_failed_setups=10, env=None,
|
|
63
|
+
max_remotes=1, max_spares=0, max_reruns=2, max_failed_setups=10, env=None,
|
|
65
64
|
):
|
|
66
65
|
"""
|
|
67
66
|
'platform' is a string with platform name.
|
|
@@ -76,6 +75,15 @@ class Orchestrator:
|
|
|
76
75
|
storing per-test results and uploaded files before being ingested
|
|
77
76
|
by the aggregator. Can be safely shared by Orchestrator instances.
|
|
78
77
|
|
|
78
|
+
'max_remotes' is how many Remotes to hold reserved at any given time,
|
|
79
|
+
eg. how many tests to run in parallel. Clamped to the number of
|
|
80
|
+
to-be-run tests given as 'fmf_tests'.
|
|
81
|
+
|
|
82
|
+
'max_spares' is how many set-up Remotes to hold reserved and unused,
|
|
83
|
+
ready to replace a Remote destroyed by test. Values above 0 greatly
|
|
84
|
+
speed up test reruns as Remote reservation happens asynchronously
|
|
85
|
+
to test execution. Spares are reserved on top of 'max_remotes'.
|
|
86
|
+
|
|
79
87
|
'max_reruns' is an integer of how many times to re-try running a failed
|
|
80
88
|
test (which exited with non-0 or caused an Executor exception).
|
|
81
89
|
|
|
@@ -91,36 +99,23 @@ class Orchestrator:
|
|
|
91
99
|
self.aggregator = aggregator
|
|
92
100
|
self.tmp_dir = tmp_dir
|
|
93
101
|
self.failed_setups_left = max_failed_setups
|
|
102
|
+
self.max_remotes = max_remotes
|
|
103
|
+
self.max_spares = max_spares
|
|
94
104
|
# indexed by test name, value being integer of how many times
|
|
95
105
|
self.reruns = collections.defaultdict(lambda: max_reruns)
|
|
96
106
|
self.env = env
|
|
97
107
|
# tests still waiting to be run
|
|
98
108
|
self.to_run = set(fmf_tests.tests)
|
|
99
|
-
# running setup functions, as a list of SetupInfo items
|
|
100
|
-
self.running_setups = []
|
|
101
109
|
# running tests as a dict, indexed by test name, with RunningInfo values
|
|
102
110
|
self.running_tests = {}
|
|
103
111
|
# thread queue for actively running tests
|
|
104
112
|
self.test_queue = util.ThreadQueue(daemon=False)
|
|
105
113
|
# thread queue for remotes being set up (uploading tests, etc.)
|
|
106
114
|
self.setup_queue = util.ThreadQueue(daemon=True)
|
|
107
|
-
#
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def run_setup(sinfo):
|
|
112
|
-
"""
|
|
113
|
-
Set up a newly acquired class Remote instance for test execution.
|
|
114
|
-
|
|
115
|
-
'sinfo' is a SetupInfo instance with the (fully connected) remote.
|
|
116
|
-
"""
|
|
117
|
-
sinfo.executor.setup()
|
|
118
|
-
sinfo.executor.upload_tests()
|
|
119
|
-
sinfo.executor.plan_prepare()
|
|
120
|
-
# NOTE: we never run executor.plan_finish() or even executor.cleanup()
|
|
121
|
-
# anywhere - instead, we assume the remote (and its connection)
|
|
122
|
-
# was invalidated by the test, so we just rely on remote.release()
|
|
123
|
-
# destroying the system
|
|
115
|
+
# thread queue for remotes being released
|
|
116
|
+
self.release_queue = util.ThreadQueue(daemon=True)
|
|
117
|
+
# thread queue for results being ingested
|
|
118
|
+
self.ingest_queue = util.ThreadQueue(daemon=False)
|
|
124
119
|
|
|
125
120
|
def _run_new_test(self, info):
|
|
126
121
|
"""
|
|
@@ -132,7 +127,7 @@ class Orchestrator:
|
|
|
132
127
|
next_test_name = self.next_test(self.to_run, self.fmf_tests.tests, info)
|
|
133
128
|
assert next_test_name in self.to_run, "next_test() returned valid test name"
|
|
134
129
|
|
|
135
|
-
util.info(f"starting '{next_test_name}'
|
|
130
|
+
util.info(f"{info.remote}: starting '{next_test_name}'")
|
|
136
131
|
|
|
137
132
|
self.to_run.remove(next_test_name)
|
|
138
133
|
|
|
@@ -147,6 +142,7 @@ class Orchestrator:
|
|
|
147
142
|
)
|
|
148
143
|
|
|
149
144
|
tmp_dir_path = Path(rinfo.tmp_dir.name)
|
|
145
|
+
tmp_dir_path.chmod(0o755)
|
|
150
146
|
self.test_queue.start_thread(
|
|
151
147
|
target=info.executor.run_test,
|
|
152
148
|
target_args=(
|
|
@@ -162,57 +158,74 @@ class Orchestrator:
|
|
|
162
158
|
"""
|
|
163
159
|
'finfo' is a FinishedInfo instance.
|
|
164
160
|
"""
|
|
161
|
+
test_data = self.fmf_tests.tests[finfo.test_name]
|
|
162
|
+
|
|
163
|
+
# TODO: somehow move logging from was_successful and should_be_rerun here,
|
|
164
|
+
# probably print just some generic info from those functions that doesn't
|
|
165
|
+
# imply any outcome, ie.
|
|
166
|
+
# {remote_with_test} threw {exception}
|
|
167
|
+
# {remote_with_test} exited with {code}
|
|
168
|
+
# {remote_with_test} has {N} reruns left
|
|
169
|
+
# {remote_with_test} has 0 reruns left
|
|
170
|
+
# and then log the decision separately, here below, such as
|
|
171
|
+
# {remote_with_test} failed, re-running
|
|
172
|
+
# {remote_with_test} completed, ingesting result
|
|
173
|
+
# {remote_with_test} was destructive, releasing remote
|
|
174
|
+
# {remote_with_test} ...., running next test
|
|
175
|
+
# That allows the user to override the functions, while keeping critical
|
|
176
|
+
# flow reliably logged here.
|
|
177
|
+
|
|
165
178
|
remote_with_test = f"{finfo.remote}: '{finfo.test_name}'"
|
|
166
179
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
180
|
+
if not self.was_successful(finfo, test_data) and self.should_be_rerun(finfo, test_data):
|
|
181
|
+
# re-run the test
|
|
182
|
+
util.info(f"{remote_with_test} failed, re-running")
|
|
183
|
+
self.to_run.add(finfo.test_name)
|
|
184
|
+
else:
|
|
185
|
+
# ingest the result
|
|
186
|
+
#
|
|
187
|
+
# a condition just in case Executor code itself threw an exception
|
|
172
188
|
# and didn't even report the fallback 'infra' result
|
|
173
|
-
if
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
elif finfo.exit_code != 0:
|
|
194
|
-
msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
|
|
195
|
-
#finfo.remote.release()
|
|
196
|
-
if (reruns_left := self.reruns[finfo.test_name]) > 0:
|
|
197
|
-
util.info(f"{msg}, re-running ({reruns_left} reruns left)")
|
|
198
|
-
self.reruns[finfo.test_name] -= 1
|
|
199
|
-
self.to_run.add(finfo.test_name)
|
|
200
|
-
else:
|
|
201
|
-
util.info(f"{msg}, reruns exceeded, giving up")
|
|
202
|
-
# record the final result anyway
|
|
203
|
-
ingest_result()
|
|
189
|
+
if finfo.results is not None and finfo.files is not None:
|
|
190
|
+
util.info(f"{remote_with_test} completed, ingesting result")
|
|
191
|
+
|
|
192
|
+
def ingest_and_cleanup(ingest, args, cleanup):
|
|
193
|
+
ingest(*args)
|
|
194
|
+
# also delete the tmpdir housing these
|
|
195
|
+
cleanup()
|
|
196
|
+
|
|
197
|
+
self.ingest_queue.start_thread(
|
|
198
|
+
ingest_and_cleanup,
|
|
199
|
+
target_args=(
|
|
200
|
+
# ingest func itself
|
|
201
|
+
self.aggregator.ingest,
|
|
202
|
+
# args for ingest
|
|
203
|
+
(self.platform, finfo.test_name, finfo.results, finfo.files),
|
|
204
|
+
# cleanup func itself
|
|
205
|
+
finfo.tmp_dir.cleanup,
|
|
206
|
+
),
|
|
207
|
+
test_name=finfo.test_name,
|
|
208
|
+
)
|
|
204
209
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
210
|
+
# ingesting destroys these
|
|
211
|
+
finfo = self.FinishedInfo._from(
|
|
212
|
+
finfo,
|
|
213
|
+
results=None,
|
|
214
|
+
files=None,
|
|
215
|
+
tmp_dir=None,
|
|
216
|
+
)
|
|
209
217
|
|
|
210
|
-
# if destroyed, release the remote
|
|
218
|
+
# if destroyed, release the remote and request a replacement
|
|
211
219
|
# (Executor exception is always considered destructive)
|
|
212
|
-
test_data = self.fmf_tests.tests[finfo.test_name]
|
|
213
220
|
if finfo.exception or self.destructive(finfo, test_data):
|
|
214
221
|
util.debug(f"{remote_with_test} was destructive, releasing remote")
|
|
215
|
-
|
|
222
|
+
self.release_queue.start_thread(
|
|
223
|
+
finfo.remote.release,
|
|
224
|
+
remote=finfo.remote,
|
|
225
|
+
)
|
|
226
|
+
# TODO: should this be conditioned by 'self.to_run:' ? to not uselessly fall
|
|
227
|
+
# into setup spares and get immediately released after setup?
|
|
228
|
+
finfo.provisioner.provision(1)
|
|
216
229
|
|
|
217
230
|
# if still not destroyed, run another test on it
|
|
218
231
|
# (without running plan setup, re-using already set up remote)
|
|
@@ -220,6 +233,14 @@ class Orchestrator:
|
|
|
220
233
|
util.debug(f"{remote_with_test} was non-destructive, running next test")
|
|
221
234
|
self._run_new_test(finfo)
|
|
222
235
|
|
|
236
|
+
# no more tests to run, release the remote
|
|
237
|
+
else:
|
|
238
|
+
util.debug(f"{finfo.remote} no longer useful, releasing it")
|
|
239
|
+
self.release_queue.start_thread(
|
|
240
|
+
finfo.remote.release,
|
|
241
|
+
remote=finfo.remote,
|
|
242
|
+
)
|
|
243
|
+
|
|
223
244
|
def serve_once(self):
|
|
224
245
|
"""
|
|
225
246
|
Run the orchestration logic, processing any outstanding requests
|
|
@@ -229,10 +250,6 @@ class Orchestrator:
|
|
|
229
250
|
Returns True to indicate that it should be called again by the user
|
|
230
251
|
(more work to be done), False once all testing is concluded.
|
|
231
252
|
"""
|
|
232
|
-
util.debug(
|
|
233
|
-
f"to_run: {len(self.to_run)} tests / "
|
|
234
|
-
f"running: {len(self.running_tests)} tests, {len(self.running_setups)} setups",
|
|
235
|
-
)
|
|
236
253
|
# all done
|
|
237
254
|
if not self.to_run and not self.running_tests:
|
|
238
255
|
return False
|
|
@@ -248,10 +265,16 @@ class Orchestrator:
|
|
|
248
265
|
rinfo = treturn.rinfo
|
|
249
266
|
del self.running_tests[rinfo.test_name]
|
|
250
267
|
|
|
268
|
+
tmp_dir_path = Path(rinfo.tmp_dir.name)
|
|
269
|
+
results_path = tmp_dir_path / "results"
|
|
270
|
+
files_path = tmp_dir_path / "files"
|
|
271
|
+
|
|
251
272
|
finfo = self.FinishedInfo(
|
|
252
273
|
**rinfo,
|
|
253
274
|
exit_code=treturn.returned,
|
|
254
275
|
exception=treturn.exception,
|
|
276
|
+
results=results_path if results_path.exists() else None,
|
|
277
|
+
files=files_path if files_path.exists() else None,
|
|
255
278
|
)
|
|
256
279
|
self._process_finished_test(finfo)
|
|
257
280
|
|
|
@@ -264,22 +287,38 @@ class Orchestrator:
|
|
|
264
287
|
break
|
|
265
288
|
|
|
266
289
|
sinfo = treturn.sinfo
|
|
267
|
-
self.running_setups.remove(sinfo)
|
|
268
290
|
|
|
269
291
|
if treturn.exception:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
292
|
+
exc_str = f"{type(treturn.exception).__name__}({treturn.exception})"
|
|
293
|
+
msg = f"{sinfo.remote}: setup failed with {exc_str}"
|
|
294
|
+
self.release_queue.start_thread(
|
|
295
|
+
sinfo.remote.release,
|
|
296
|
+
remote=sinfo.remote,
|
|
297
|
+
)
|
|
274
298
|
if (reruns_left := self.failed_setups_left) > 0:
|
|
275
|
-
util.warning(f"{msg}, re-trying ({reruns_left} setup retries left)
|
|
299
|
+
util.warning(f"{msg}, re-trying ({reruns_left} setup retries left)")
|
|
276
300
|
self.failed_setups_left -= 1
|
|
301
|
+
sinfo.provisioner.provision(1)
|
|
277
302
|
else:
|
|
278
|
-
util.warning(f"{msg}, setup retries exceeded, giving up
|
|
303
|
+
util.warning(f"{msg}, setup retries exceeded, giving up")
|
|
279
304
|
raise FailedSetupError("setup retries limit exceeded, broken infra?")
|
|
280
305
|
else:
|
|
281
306
|
self._run_new_test(sinfo)
|
|
282
307
|
|
|
308
|
+
# release any extra Remotes being held as set-up when we know we won't
|
|
309
|
+
# use them for any tests (because to_run is empty)
|
|
310
|
+
else:
|
|
311
|
+
while self.setup_queue.qsize() > self.max_spares:
|
|
312
|
+
try:
|
|
313
|
+
treturn = self.setup_queue.get_raw(block=False)
|
|
314
|
+
except util.ThreadQueue.Empty:
|
|
315
|
+
break
|
|
316
|
+
util.debug(f"releasing extraneous set-up {treturn.sinfo.remote}")
|
|
317
|
+
self.release_queue.start_thread(
|
|
318
|
+
treturn.sinfo.remote.release,
|
|
319
|
+
remote=treturn.sinfo.remote,
|
|
320
|
+
)
|
|
321
|
+
|
|
283
322
|
# try to get new remotes from Provisioners - if we get some, start
|
|
284
323
|
# running setup on them
|
|
285
324
|
for provisioner in self.provisioners:
|
|
@@ -295,48 +334,96 @@ class Orchestrator:
|
|
|
295
334
|
target_args=(sinfo,),
|
|
296
335
|
sinfo=sinfo,
|
|
297
336
|
)
|
|
298
|
-
self.running_setups.append(sinfo)
|
|
299
337
|
util.info(f"{provisioner}: running setup on new {remote}")
|
|
300
338
|
|
|
301
|
-
|
|
339
|
+
# gather returns from Remote.release() functions - check for exceptions
|
|
340
|
+
# thrown, re-report them as warnings as they are not typically critical
|
|
341
|
+
# for operation
|
|
342
|
+
while True:
|
|
343
|
+
try:
|
|
344
|
+
treturn = self.release_queue.get_raw(block=False)
|
|
345
|
+
except util.ThreadQueue.Empty:
|
|
346
|
+
break
|
|
347
|
+
else:
|
|
348
|
+
if treturn.exception:
|
|
349
|
+
exc_str = f"{type(treturn.exception).__name__}({treturn.exception})"
|
|
350
|
+
util.warning(f"{treturn.remote} release failed: {exc_str}")
|
|
351
|
+
else:
|
|
352
|
+
util.debug(f"{treturn.remote} release completed")
|
|
302
353
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
354
|
+
# gather returns from Aggregator.ingest() calls - check for exceptions
|
|
355
|
+
while True:
|
|
356
|
+
try:
|
|
357
|
+
treturn = self.ingest_queue.get_raw(block=False)
|
|
358
|
+
except util.ThreadQueue.Empty:
|
|
359
|
+
break
|
|
360
|
+
else:
|
|
361
|
+
if treturn.exception:
|
|
362
|
+
exc_str = f"{type(treturn.exception).__name__}({treturn.exception})"
|
|
363
|
+
util.warning(f"'{treturn.test_name}' ingesting failed: {exc_str}")
|
|
364
|
+
else:
|
|
365
|
+
util.debug(f"'{treturn.test_name}' ingesting completed")
|
|
366
|
+
|
|
367
|
+
return True
|
|
309
368
|
|
|
310
369
|
def start(self):
|
|
311
370
|
# start all provisioners
|
|
312
371
|
for prov in self.provisioners:
|
|
313
372
|
prov.start()
|
|
314
|
-
|
|
373
|
+
|
|
374
|
+
# start up initial reservations, balanced evenly across all available
|
|
375
|
+
# provisioner instances
|
|
376
|
+
count = min(self.max_remotes, len(self.fmf_tests.tests)) + self.max_spares
|
|
377
|
+
provisioners = self.provisioners[:count]
|
|
378
|
+
for idx, prov in enumerate(provisioners):
|
|
379
|
+
if count % len(provisioners) > idx:
|
|
380
|
+
prov.provision((count // len(provisioners)) + 1)
|
|
381
|
+
else:
|
|
382
|
+
prov.provision(count // len(provisioners))
|
|
315
383
|
|
|
316
384
|
def stop(self):
|
|
317
385
|
# cancel all running tests and wait for them to clean up (up to 0.1sec)
|
|
318
386
|
for rinfo in self.running_tests.values():
|
|
319
387
|
rinfo.executor.cancel()
|
|
320
|
-
self.test_queue.join()
|
|
388
|
+
self.test_queue.join() # also ignore any exceptions raised
|
|
389
|
+
|
|
390
|
+
# wait for all running ingestions to finish, print exceptions
|
|
391
|
+
# (we would rather stop provisioners further below than raise here)
|
|
392
|
+
while True:
|
|
393
|
+
try:
|
|
394
|
+
treturn = self.ingest_queue.get_raw(block=False)
|
|
395
|
+
except util.ThreadQueue.Empty:
|
|
396
|
+
break
|
|
397
|
+
else:
|
|
398
|
+
if treturn.exception:
|
|
399
|
+
exc_str = f"{type(treturn.exception).__name__}({treturn.exception})"
|
|
400
|
+
util.warning(f"'{treturn.test_name}' ingesting failed: {exc_str}")
|
|
401
|
+
else:
|
|
402
|
+
util.debug(f"'{treturn.test_name}' ingesting completed")
|
|
403
|
+
self.ingest_queue.join()
|
|
321
404
|
|
|
322
405
|
# stop all provisioners, also releasing all remotes
|
|
406
|
+
# - parallelize up to 10 provisioners at a time
|
|
323
407
|
if self.provisioners:
|
|
324
|
-
workers = min(len(self.provisioners),
|
|
408
|
+
workers = min(len(self.provisioners), 10)
|
|
325
409
|
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as ex:
|
|
326
410
|
for provisioner in self.provisioners:
|
|
327
|
-
|
|
328
|
-
ex.submit(func)
|
|
411
|
+
ex.submit(provisioner.stop)
|
|
329
412
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
except Exception:
|
|
335
|
-
self.stop()
|
|
336
|
-
raise
|
|
413
|
+
@staticmethod
|
|
414
|
+
def run_setup(sinfo):
|
|
415
|
+
"""
|
|
416
|
+
Set up a newly acquired class Remote instance for test execution.
|
|
337
417
|
|
|
338
|
-
|
|
339
|
-
|
|
418
|
+
'sinfo' is a SetupInfo instance with the (fully connected) remote.
|
|
419
|
+
"""
|
|
420
|
+
sinfo.executor.start()
|
|
421
|
+
sinfo.executor.upload_tests()
|
|
422
|
+
sinfo.executor.plan_prepare()
|
|
423
|
+
# NOTE: we never run executor.plan_finish() or even executor.stop()
|
|
424
|
+
# anywhere - instead, we assume the remote (and its connection)
|
|
425
|
+
# was invalidated by the test, so we just rely on remote.release()
|
|
426
|
+
# destroying the system
|
|
340
427
|
|
|
341
428
|
@staticmethod
|
|
342
429
|
def next_test(to_run, all_tests, previous): # noqa: ARG004
|
|
@@ -381,5 +468,56 @@ class Orchestrator:
|
|
|
381
468
|
return True
|
|
382
469
|
# otherwise we good
|
|
383
470
|
return False
|
|
384
|
-
|
|
385
|
-
|
|
471
|
+
|
|
472
|
+
@staticmethod
|
|
473
|
+
def was_successful(info, test_data): # noqa: ARG004
|
|
474
|
+
"""
|
|
475
|
+
Return a boolean result whether a finished test was successful.
|
|
476
|
+
Returning False might cause it to be re-run (per should_be_rerun()).
|
|
477
|
+
|
|
478
|
+
'info' is Orchestrator.FinishedInfo namedtuple of the test.
|
|
479
|
+
|
|
480
|
+
'test_data' is a dict of fully resolved fmf test metadata of that test.
|
|
481
|
+
"""
|
|
482
|
+
remote_with_test = f"{info.remote}: '{info.test_name}'"
|
|
483
|
+
|
|
484
|
+
# executor (or test) threw exception
|
|
485
|
+
if info.exception:
|
|
486
|
+
exc_str = f"{type(info.exception).__name__}({info.exception})"
|
|
487
|
+
util.info(f"{remote_with_test} threw {exc_str} during test runtime")
|
|
488
|
+
return False
|
|
489
|
+
|
|
490
|
+
# the test exited as non-0
|
|
491
|
+
if info.exit_code != 0:
|
|
492
|
+
util.info(f"{remote_with_test} exited with non-zero: {info.exit_code}")
|
|
493
|
+
return False
|
|
494
|
+
|
|
495
|
+
# otherwise we good
|
|
496
|
+
return True
|
|
497
|
+
|
|
498
|
+
# TODO: @staticmethod and remove ARG002
|
|
499
|
+
#@staticmethod
|
|
500
|
+
def should_be_rerun(self, info, test_data): # noqa: ARG004, ARG002
|
|
501
|
+
"""
|
|
502
|
+
Return a boolean result whether a finished test failed in a way
|
|
503
|
+
that another execution attempt might succeed, due to race conditions
|
|
504
|
+
in the test or other non-deterministic factors.
|
|
505
|
+
|
|
506
|
+
'info' is Orchestrator.FinishedInfo namedtuple of the test.
|
|
507
|
+
|
|
508
|
+
'test_data' is a dict of fully resolved fmf test metadata of that test.
|
|
509
|
+
"""
|
|
510
|
+
remote_with_test = f"{info.remote}: '{info.test_name}'"
|
|
511
|
+
|
|
512
|
+
# TODO: remove self.reruns and the whole X-reruns logic from AdHocOrchestrator,
|
|
513
|
+
# leave it up to the user to wrap should_be_rerun() with an external dict
|
|
514
|
+
# of tests, counting reruns for each
|
|
515
|
+
# - allows the user to adjust counts per-test (ie. test_data metadata)
|
|
516
|
+
# - allows this template to be @staticmethod
|
|
517
|
+
reruns_left = self.reruns[info.test_name]
|
|
518
|
+
util.info(f"{remote_with_test}: {reruns_left} reruns left")
|
|
519
|
+
if reruns_left > 0:
|
|
520
|
+
self.reruns[info.test_name] -= 1
|
|
521
|
+
return True
|
|
522
|
+
else:
|
|
523
|
+
return False
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from .. import util
|
|
2
|
+
from .adhoc import AdHocOrchestrator
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# copy/pasted from the Contest repo, lib/virt.py
|
|
6
|
+
def calculate_guest_tag(tags):
|
|
7
|
+
if "snapshottable" not in tags:
|
|
8
|
+
return None
|
|
9
|
+
name = "default"
|
|
10
|
+
if "with-gui" in tags:
|
|
11
|
+
name += "_gui"
|
|
12
|
+
if "uefi" in tags:
|
|
13
|
+
name += "_uefi"
|
|
14
|
+
if "fips" in tags:
|
|
15
|
+
name += "_fips"
|
|
16
|
+
return name
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ContestOrchestrator(AdHocOrchestrator):
|
|
20
|
+
"""
|
|
21
|
+
Orchestrator for the Contest test suite:
|
|
22
|
+
https://github.com/RHSecurityCompliance/contest
|
|
23
|
+
|
|
24
|
+
Includes SCAP content upload via rsync and other Contest-specific
|
|
25
|
+
optimizations (around VM snapshots and scheduling).
|
|
26
|
+
"""
|
|
27
|
+
content_dir_on_remote = "/root/upstream-content"
|
|
28
|
+
|
|
29
|
+
def __init__(self, *args, content_dir, **kwargs):
|
|
30
|
+
self.content_dir = content_dir
|
|
31
|
+
super().__init__(*args, **kwargs)
|
|
32
|
+
|
|
33
|
+
def run_setup(self, sinfo):
|
|
34
|
+
super().run_setup(sinfo)
|
|
35
|
+
# upload pre-built content
|
|
36
|
+
sinfo.remote.rsync(
|
|
37
|
+
"-r", "--delete", "--exclude=.git/",
|
|
38
|
+
f"{self.content_dir}/",
|
|
39
|
+
f"remote:{self.content_dir_on_remote}",
|
|
40
|
+
func=util.subprocess_log,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def next_test(cls, to_run, all_tests, previous):
|
|
45
|
+
# fresh remote, prefer running destructive tests (which likely need
|
|
46
|
+
# clean OS) to get them out of the way and prevent them from running
|
|
47
|
+
# on a tainted OS later
|
|
48
|
+
if type(previous) is AdHocOrchestrator.SetupInfo:
|
|
49
|
+
for next_name in to_run:
|
|
50
|
+
next_tags = all_tests[next_name].get("tag", ())
|
|
51
|
+
util.debug(f"considering next_test for destructivity: {next_name}")
|
|
52
|
+
if "destructive" in next_tags:
|
|
53
|
+
util.debug(f"chosen next_test: {next_name}")
|
|
54
|
+
return next_name
|
|
55
|
+
|
|
56
|
+
# previous test was run and finished non-destructively,
|
|
57
|
+
# try to find a next test with the same Contest lib.virt guest tags
|
|
58
|
+
# as the previous one, allowing snapshot reuse by Contest
|
|
59
|
+
elif type(previous) is AdHocOrchestrator.FinishedInfo:
|
|
60
|
+
finished_tags = all_tests[previous.test_name].get("tag", ())
|
|
61
|
+
util.debug(f"previous finished test on {previous.remote}: {previous.test_name}")
|
|
62
|
+
# if Guest tag is None, don't bother searching
|
|
63
|
+
if finished_guest_tag := calculate_guest_tag(finished_tags):
|
|
64
|
+
for next_name in to_run:
|
|
65
|
+
util.debug(f"considering next_test with tags {finished_tags}: {next_name}")
|
|
66
|
+
next_tags = all_tests[next_name].get("tag", ())
|
|
67
|
+
next_guest_tag = calculate_guest_tag(next_tags)
|
|
68
|
+
if next_guest_tag and finished_guest_tag == next_guest_tag:
|
|
69
|
+
util.debug(f"chosen next_test: {next_name}")
|
|
70
|
+
return next_name
|
|
71
|
+
|
|
72
|
+
# fallback to the default next_test()
|
|
73
|
+
return super().next_test(to_run, all_tests, previous)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def destructive(cls, info, test_data):
|
|
77
|
+
# if Executor ended with an exception (ie. duration exceeded),
|
|
78
|
+
# consider the test destructive
|
|
79
|
+
if info.exception:
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# if the test returned non-0 exit code, it could have thrown
|
|
83
|
+
# a python exception of its own, or (if bash) aborted abruptly
|
|
84
|
+
# due to 'set -e', don't trust the remote, consider it destroyed
|
|
85
|
+
# (0 = pass, 2 = fail, anything else = bad)
|
|
86
|
+
if info.exit_code not in [0,2]:
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
# if the test was destructive, assume the remote is destroyed
|
|
90
|
+
tags = test_data.get("tag", ())
|
|
91
|
+
if "destructive" in tags:
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
return False
|