atex 0.9__py3-none-any.whl → 0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atex/aggregator/__init__.py +60 -0
- atex/{orchestrator/aggregator.py → aggregator/json.py} +6 -21
- atex/cli/__init__.py +11 -1
- atex/cli/libvirt.py +3 -2
- atex/cli/testingfarm.py +48 -3
- atex/connection/podman.py +2 -4
- atex/connection/ssh.py +7 -14
- atex/executor/executor.py +18 -17
- atex/executor/scripts.py +5 -3
- atex/executor/testcontrol.py +1 -1
- atex/orchestrator/__init__.py +76 -3
- atex/orchestrator/{orchestrator.py → adhoc.py} +183 -103
- atex/{provision → provisioner}/__init__.py +49 -37
- atex/{provision → provisioner}/libvirt/libvirt.py +21 -14
- atex/{provision → provisioner}/libvirt/locking.py +3 -1
- atex/provisioner/podman/__init__.py +2 -0
- atex/provisioner/podman/podman.py +169 -0
- atex/{provision → provisioner}/testingfarm/api.py +53 -44
- atex/{provision → provisioner}/testingfarm/testingfarm.py +17 -23
- atex/util/log.py +62 -67
- atex/util/subprocess.py +46 -12
- atex/util/threads.py +7 -0
- atex-0.10.dist-info/METADATA +86 -0
- atex-0.10.dist-info/RECORD +44 -0
- atex/provision/podman/__init__.py +0 -1
- atex/provision/podman/podman.py +0 -274
- atex-0.9.dist-info/METADATA +0 -178
- atex-0.9.dist-info/RECORD +0 -43
- /atex/{provision → provisioner}/libvirt/VM_PROVISION +0 -0
- /atex/{provision → provisioner}/libvirt/__init__.py +0 -0
- /atex/{provision → provisioner}/libvirt/setup-libvirt.sh +0 -0
- /atex/{provision → provisioner}/testingfarm/__init__.py +0 -0
- {atex-0.9.dist-info → atex-0.10.dist-info}/WHEEL +0 -0
- {atex-0.9.dist-info → atex-0.10.dist-info}/entry_points.txt +0 -0
- {atex-0.9.dist-info → atex-0.10.dist-info}/licenses/COPYING.txt +0 -0
|
@@ -1,24 +1,19 @@
|
|
|
1
|
-
import time
|
|
2
1
|
import tempfile
|
|
3
|
-
import traceback
|
|
4
2
|
import concurrent
|
|
5
3
|
import collections
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
|
|
8
6
|
from .. import util, executor
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class OrchestratorError(Exception):
|
|
12
|
-
pass
|
|
7
|
+
from . import Orchestrator, OrchestratorError
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
class FailedSetupError(OrchestratorError):
|
|
16
11
|
pass
|
|
17
12
|
|
|
18
13
|
|
|
19
|
-
class Orchestrator:
|
|
14
|
+
class AdHocOrchestrator(Orchestrator):
|
|
20
15
|
"""
|
|
21
|
-
|
|
16
|
+
TODO: document function specific to this reference, ie. run_setup(), etc.
|
|
22
17
|
"""
|
|
23
18
|
|
|
24
19
|
class SetupInfo(
|
|
@@ -55,13 +50,17 @@ class Orchestrator:
|
|
|
55
50
|
# exception class instance if running the test failed
|
|
56
51
|
# (None if no exception happened (exit_code is defined))
|
|
57
52
|
"exception",
|
|
53
|
+
# Path of a 'results' JSON file with test-reported results
|
|
54
|
+
"results",
|
|
55
|
+
# Path of a 'files' directory with test-uploaded files
|
|
56
|
+
"files",
|
|
58
57
|
),
|
|
59
58
|
):
|
|
60
59
|
pass
|
|
61
60
|
|
|
62
61
|
def __init__(
|
|
63
62
|
self, platform, fmf_tests, provisioners, aggregator, tmp_dir, *,
|
|
64
|
-
max_reruns=2, max_failed_setups=10, env=None,
|
|
63
|
+
max_remotes=1, max_spares=0, max_reruns=2, max_failed_setups=10, env=None,
|
|
65
64
|
):
|
|
66
65
|
"""
|
|
67
66
|
'platform' is a string with platform name.
|
|
@@ -76,6 +75,15 @@ class Orchestrator:
|
|
|
76
75
|
storing per-test results and uploaded files before being ingested
|
|
77
76
|
by the aggregator. Can be safely shared by Orchestrator instances.
|
|
78
77
|
|
|
78
|
+
'max_remotes' is how many Remotes to hold reserved at any given time,
|
|
79
|
+
eg. how many tests to run in parallel. Clamped to the number of
|
|
80
|
+
to-be-run tests given as 'fmf_tests'.
|
|
81
|
+
|
|
82
|
+
'max_spares' is how many set-up Remotes to hold reserved and unused,
|
|
83
|
+
ready to replace a Remote destroyed by test. Values above 0 greatly
|
|
84
|
+
speed up test reruns as Remote reservation happens asynchronously
|
|
85
|
+
to test execution. Spares are reserved on top of 'max_remotes'.
|
|
86
|
+
|
|
79
87
|
'max_reruns' is an integer of how many times to re-try running a failed
|
|
80
88
|
test (which exited with non-0 or caused an Executor exception).
|
|
81
89
|
|
|
@@ -91,36 +99,21 @@ class Orchestrator:
|
|
|
91
99
|
self.aggregator = aggregator
|
|
92
100
|
self.tmp_dir = tmp_dir
|
|
93
101
|
self.failed_setups_left = max_failed_setups
|
|
102
|
+
self.max_remotes = max_remotes
|
|
103
|
+
self.max_spares = max_spares
|
|
94
104
|
# indexed by test name, value being integer of how many times
|
|
95
105
|
self.reruns = collections.defaultdict(lambda: max_reruns)
|
|
96
106
|
self.env = env
|
|
97
107
|
# tests still waiting to be run
|
|
98
108
|
self.to_run = set(fmf_tests.tests)
|
|
99
|
-
# running setup functions, as a list of SetupInfo items
|
|
100
|
-
self.running_setups = []
|
|
101
109
|
# running tests as a dict, indexed by test name, with RunningInfo values
|
|
102
110
|
self.running_tests = {}
|
|
103
111
|
# thread queue for actively running tests
|
|
104
112
|
self.test_queue = util.ThreadQueue(daemon=False)
|
|
105
113
|
# thread queue for remotes being set up (uploading tests, etc.)
|
|
106
114
|
self.setup_queue = util.ThreadQueue(daemon=True)
|
|
107
|
-
#
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
@staticmethod
|
|
111
|
-
def run_setup(sinfo):
|
|
112
|
-
"""
|
|
113
|
-
Set up a newly acquired class Remote instance for test execution.
|
|
114
|
-
|
|
115
|
-
'sinfo' is a SetupInfo instance with the (fully connected) remote.
|
|
116
|
-
"""
|
|
117
|
-
sinfo.executor.setup()
|
|
118
|
-
sinfo.executor.upload_tests()
|
|
119
|
-
sinfo.executor.plan_prepare()
|
|
120
|
-
# NOTE: we never run executor.plan_finish() or even executor.cleanup()
|
|
121
|
-
# anywhere - instead, we assume the remote (and its connection)
|
|
122
|
-
# was invalidated by the test, so we just rely on remote.release()
|
|
123
|
-
# destroying the system
|
|
115
|
+
# thread queue for remotes being released
|
|
116
|
+
self.release_queue = util.ThreadQueue(daemon=True)
|
|
124
117
|
|
|
125
118
|
def _run_new_test(self, info):
|
|
126
119
|
"""
|
|
@@ -162,57 +155,59 @@ class Orchestrator:
|
|
|
162
155
|
"""
|
|
163
156
|
'finfo' is a FinishedInfo instance.
|
|
164
157
|
"""
|
|
165
|
-
|
|
158
|
+
test_data = self.fmf_tests.tests[finfo.test_name]
|
|
166
159
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
160
|
+
# TODO: somehow move logging from was_successful and should_be_rerun here,
|
|
161
|
+
# probably print just some generic info from those functions that doesn't
|
|
162
|
+
# imply any outcome, ie.
|
|
163
|
+
# {remote_with_test} threw {exception}
|
|
164
|
+
# {remote_with_test} exited with {code}
|
|
165
|
+
# {remote_with_test} has {N} reruns left
|
|
166
|
+
# {remote_with_test} has 0 reruns left
|
|
167
|
+
# and then log the decision separately, here below, such as
|
|
168
|
+
# {remote_with_test} failed, re-running
|
|
169
|
+
# {remote_with_test} completed, ingesting result
|
|
170
|
+
# {remote_with_test} was destructive, releasing remote
|
|
171
|
+
# {remote_with_test} ...., running next test
|
|
172
|
+
# That allows the user to override the functions, while keeping critical
|
|
173
|
+
# flow reliably logged here.
|
|
176
174
|
|
|
177
|
-
|
|
178
|
-
if finfo.exception:
|
|
179
|
-
exc_name = type(finfo.exception).__name__
|
|
180
|
-
exc_tb = "".join(traceback.format_exception(finfo.exception)).rstrip("\n")
|
|
181
|
-
msg = f"{remote_with_test} threw {exc_name} during test runtime"
|
|
182
|
-
#finfo.remote.release()
|
|
183
|
-
if (reruns_left := self.reruns[finfo.test_name]) > 0:
|
|
184
|
-
util.info(f"{msg}, re-running ({reruns_left} reruns left):\n{exc_tb}")
|
|
185
|
-
self.reruns[finfo.test_name] -= 1
|
|
186
|
-
self.to_run.add(finfo.test_name)
|
|
187
|
-
else:
|
|
188
|
-
util.info(f"{msg}, reruns exceeded, giving up:\n{exc_tb}")
|
|
189
|
-
# record the final result anyway
|
|
190
|
-
ingest_result()
|
|
191
|
-
|
|
192
|
-
# if the test exited as non-0, try a re-run
|
|
193
|
-
elif finfo.exit_code != 0:
|
|
194
|
-
msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
|
|
195
|
-
#finfo.remote.release()
|
|
196
|
-
if (reruns_left := self.reruns[finfo.test_name]) > 0:
|
|
197
|
-
util.info(f"{msg}, re-running ({reruns_left} reruns left)")
|
|
198
|
-
self.reruns[finfo.test_name] -= 1
|
|
199
|
-
self.to_run.add(finfo.test_name)
|
|
200
|
-
else:
|
|
201
|
-
util.info(f"{msg}, reruns exceeded, giving up")
|
|
202
|
-
# record the final result anyway
|
|
203
|
-
ingest_result()
|
|
175
|
+
remote_with_test = f"{finfo.remote}: '{finfo.test_name}'"
|
|
204
176
|
|
|
205
|
-
|
|
177
|
+
if not self.was_successful(finfo, test_data) and self.should_be_rerun(finfo, test_data):
|
|
178
|
+
# re-run the test
|
|
179
|
+
self.to_run.add(finfo.test_name)
|
|
206
180
|
else:
|
|
207
|
-
|
|
208
|
-
|
|
181
|
+
# ingest the result
|
|
182
|
+
#
|
|
183
|
+
# a condition just in case Executor code itself threw an exception
|
|
184
|
+
# and didn't even report the fallback 'infra' result
|
|
185
|
+
if finfo.results is not None and finfo.files is not None:
|
|
186
|
+
self.aggregator.ingest(
|
|
187
|
+
self.platform,
|
|
188
|
+
finfo.test_name,
|
|
189
|
+
finfo.results,
|
|
190
|
+
finfo.files,
|
|
191
|
+
)
|
|
192
|
+
# also delete the tmpdir housing these
|
|
193
|
+
finfo.tmp_dir.cleanup()
|
|
194
|
+
# ingesting destroyed these
|
|
195
|
+
finfo = self.FinishedInfo._from(
|
|
196
|
+
finfo,
|
|
197
|
+
results=None,
|
|
198
|
+
files=None,
|
|
199
|
+
tmp_dir=None,
|
|
200
|
+
)
|
|
209
201
|
|
|
210
|
-
# if destroyed, release the remote
|
|
202
|
+
# if destroyed, release the remote and request a replacement
|
|
211
203
|
# (Executor exception is always considered destructive)
|
|
212
|
-
test_data = self.fmf_tests.tests[finfo.test_name]
|
|
213
204
|
if finfo.exception or self.destructive(finfo, test_data):
|
|
214
205
|
util.debug(f"{remote_with_test} was destructive, releasing remote")
|
|
215
|
-
|
|
206
|
+
self.release_queue.start_thread(
|
|
207
|
+
finfo.remote.release,
|
|
208
|
+
remote=finfo.remote,
|
|
209
|
+
)
|
|
210
|
+
finfo.provisioner.provision(1)
|
|
216
211
|
|
|
217
212
|
# if still not destroyed, run another test on it
|
|
218
213
|
# (without running plan setup, re-using already set up remote)
|
|
@@ -229,12 +224,8 @@ class Orchestrator:
|
|
|
229
224
|
Returns True to indicate that it should be called again by the user
|
|
230
225
|
(more work to be done), False once all testing is concluded.
|
|
231
226
|
"""
|
|
232
|
-
util.debug(
|
|
233
|
-
f"to_run: {len(self.to_run)} tests / "
|
|
234
|
-
f"running: {len(self.running_tests)} tests, {len(self.running_setups)} setups",
|
|
235
|
-
)
|
|
236
227
|
# all done
|
|
237
|
-
if not self.to_run and not self.running_tests:
|
|
228
|
+
if not self.to_run and not self.running_tests and self.release_queue.qsize() == 0:
|
|
238
229
|
return False
|
|
239
230
|
|
|
240
231
|
# process all finished tests, potentially reusing remotes for executing
|
|
@@ -248,10 +239,16 @@ class Orchestrator:
|
|
|
248
239
|
rinfo = treturn.rinfo
|
|
249
240
|
del self.running_tests[rinfo.test_name]
|
|
250
241
|
|
|
242
|
+
tmp_dir_path = Path(rinfo.tmp_dir.name)
|
|
243
|
+
results_path = tmp_dir_path / "results"
|
|
244
|
+
files_path = tmp_dir_path / "files"
|
|
245
|
+
|
|
251
246
|
finfo = self.FinishedInfo(
|
|
252
247
|
**rinfo,
|
|
253
248
|
exit_code=treturn.returned,
|
|
254
249
|
exception=treturn.exception,
|
|
250
|
+
results=results_path if results_path.exists() else None,
|
|
251
|
+
files=files_path if files_path.exists() else None,
|
|
255
252
|
)
|
|
256
253
|
self._process_finished_test(finfo)
|
|
257
254
|
|
|
@@ -264,22 +261,36 @@ class Orchestrator:
|
|
|
264
261
|
break
|
|
265
262
|
|
|
266
263
|
sinfo = treturn.sinfo
|
|
267
|
-
self.running_setups.remove(sinfo)
|
|
268
264
|
|
|
269
265
|
if treturn.exception:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
266
|
+
msg = f"{sinfo.remote}: setup failed with {repr(treturn.exception)}"
|
|
267
|
+
self.release_queue.start_thread(
|
|
268
|
+
sinfo.remote.release,
|
|
269
|
+
remote=sinfo.remote,
|
|
270
|
+
)
|
|
274
271
|
if (reruns_left := self.failed_setups_left) > 0:
|
|
275
|
-
util.warning(f"{msg}, re-trying ({reruns_left} setup retries left)
|
|
272
|
+
util.warning(f"{msg}, re-trying ({reruns_left} setup retries left)")
|
|
276
273
|
self.failed_setups_left -= 1
|
|
274
|
+
sinfo.provisioner.provision(1)
|
|
277
275
|
else:
|
|
278
|
-
util.warning(f"{msg}, setup retries exceeded, giving up
|
|
276
|
+
util.warning(f"{msg}, setup retries exceeded, giving up")
|
|
279
277
|
raise FailedSetupError("setup retries limit exceeded, broken infra?")
|
|
280
278
|
else:
|
|
281
279
|
self._run_new_test(sinfo)
|
|
282
280
|
|
|
281
|
+
# release any extra Remotes being held as set-up when we know we won't
|
|
282
|
+
# use them for any tests (because to_run is empty)
|
|
283
|
+
else:
|
|
284
|
+
while self.setup_queue.qsize() > self.max_spares:
|
|
285
|
+
try:
|
|
286
|
+
treturn = self.setup_queue.get_raw(block=False)
|
|
287
|
+
except util.ThreadQueue.Empty:
|
|
288
|
+
break
|
|
289
|
+
self.release_queue.start_thread(
|
|
290
|
+
treturn.sinfo.remote.release,
|
|
291
|
+
remote=treturn.sinfo.remote,
|
|
292
|
+
)
|
|
293
|
+
|
|
283
294
|
# try to get new remotes from Provisioners - if we get some, start
|
|
284
295
|
# running setup on them
|
|
285
296
|
for provisioner in self.provisioners:
|
|
@@ -295,23 +306,37 @@ class Orchestrator:
|
|
|
295
306
|
target_args=(sinfo,),
|
|
296
307
|
sinfo=sinfo,
|
|
297
308
|
)
|
|
298
|
-
self.running_setups.append(sinfo)
|
|
299
309
|
util.info(f"{provisioner}: running setup on new {remote}")
|
|
300
310
|
|
|
301
|
-
|
|
311
|
+
# gather returns from Remote.release() functions - check for exceptions
|
|
312
|
+
# thrown, re-report them as warnings as they are not typically critical
|
|
313
|
+
# for operation
|
|
314
|
+
try:
|
|
315
|
+
treturn = self.release_queue.get_raw(block=False)
|
|
316
|
+
except util.ThreadQueue.Empty:
|
|
317
|
+
pass
|
|
318
|
+
else:
|
|
319
|
+
if treturn.exception:
|
|
320
|
+
util.warning(f"{treturn.remote} release failed: {repr(treturn.exception)}")
|
|
321
|
+
else:
|
|
322
|
+
util.debug(f"{treturn.remote}: completed .release()")
|
|
302
323
|
|
|
303
|
-
|
|
304
|
-
"""
|
|
305
|
-
Run the orchestration logic, blocking until all testing is concluded.
|
|
306
|
-
"""
|
|
307
|
-
while self.serve_once():
|
|
308
|
-
time.sleep(1)
|
|
324
|
+
return True
|
|
309
325
|
|
|
310
326
|
def start(self):
|
|
311
327
|
# start all provisioners
|
|
312
328
|
for prov in self.provisioners:
|
|
313
329
|
prov.start()
|
|
314
|
-
|
|
330
|
+
|
|
331
|
+
# start up initial reservations, balanced evenly across all available
|
|
332
|
+
# provisioner instances
|
|
333
|
+
count = min(self.max_remotes, len(self.fmf_tests.tests)) + self.max_spares
|
|
334
|
+
provisioners = self.provisioners[:count]
|
|
335
|
+
for idx, prov in enumerate(provisioners):
|
|
336
|
+
if count % len(provisioners) > idx:
|
|
337
|
+
prov.provision((count // len(provisioners)) + 1)
|
|
338
|
+
else:
|
|
339
|
+
prov.provision(count // len(provisioners))
|
|
315
340
|
|
|
316
341
|
def stop(self):
|
|
317
342
|
# cancel all running tests and wait for them to clean up (up to 0.1sec)
|
|
@@ -320,6 +345,7 @@ class Orchestrator:
|
|
|
320
345
|
self.test_queue.join() # also ignore any exceptions raised
|
|
321
346
|
|
|
322
347
|
# stop all provisioners, also releasing all remotes
|
|
348
|
+
# TODO: don't parallelize here, remove .stop_defer() and parallelize in provisioners
|
|
323
349
|
if self.provisioners:
|
|
324
350
|
workers = min(len(self.provisioners), 20)
|
|
325
351
|
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as ex:
|
|
@@ -327,16 +353,20 @@ class Orchestrator:
|
|
|
327
353
|
for func in provisioner.stop_defer():
|
|
328
354
|
ex.submit(func)
|
|
329
355
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
except Exception:
|
|
335
|
-
self.stop()
|
|
336
|
-
raise
|
|
356
|
+
@staticmethod
|
|
357
|
+
def run_setup(sinfo):
|
|
358
|
+
"""
|
|
359
|
+
Set up a newly acquired class Remote instance for test execution.
|
|
337
360
|
|
|
338
|
-
|
|
339
|
-
|
|
361
|
+
'sinfo' is a SetupInfo instance with the (fully connected) remote.
|
|
362
|
+
"""
|
|
363
|
+
sinfo.executor.start()
|
|
364
|
+
sinfo.executor.upload_tests()
|
|
365
|
+
sinfo.executor.plan_prepare()
|
|
366
|
+
# NOTE: we never run executor.plan_finish() or even executor.stop()
|
|
367
|
+
# anywhere - instead, we assume the remote (and its connection)
|
|
368
|
+
# was invalidated by the test, so we just rely on remote.release()
|
|
369
|
+
# destroying the system
|
|
340
370
|
|
|
341
371
|
@staticmethod
|
|
342
372
|
def next_test(to_run, all_tests, previous): # noqa: ARG004
|
|
@@ -381,5 +411,55 @@ class Orchestrator:
|
|
|
381
411
|
return True
|
|
382
412
|
# otherwise we good
|
|
383
413
|
return False
|
|
384
|
-
|
|
385
|
-
|
|
414
|
+
|
|
415
|
+
@staticmethod
|
|
416
|
+
def was_successful(info, test_data): # noqa: ARG004
|
|
417
|
+
"""
|
|
418
|
+
Return a boolean result whether a finished test was successful.
|
|
419
|
+
Returning False might cause it to be re-run (per should_be_rerun()).
|
|
420
|
+
|
|
421
|
+
'info' is Orchestrator.FinishedInfo namedtuple of the test.
|
|
422
|
+
|
|
423
|
+
'test_data' is a dict of fully resolved fmf test metadata of that test.
|
|
424
|
+
"""
|
|
425
|
+
remote_with_test = f"{info.remote}: '{info.test_name}'"
|
|
426
|
+
|
|
427
|
+
# executor (or test) threw exception
|
|
428
|
+
if info.exception:
|
|
429
|
+
util.info(f"{remote_with_test} threw {repr(info.exception)} during test runtime")
|
|
430
|
+
return False
|
|
431
|
+
|
|
432
|
+
# the test exited as non-0
|
|
433
|
+
if info.exit_code != 0:
|
|
434
|
+
util.info(f"{remote_with_test} exited with non-zero: {info.exit_code}")
|
|
435
|
+
return False
|
|
436
|
+
|
|
437
|
+
# otherwise we good
|
|
438
|
+
return True
|
|
439
|
+
|
|
440
|
+
# TODO: @staticmethod and remove ARG002
|
|
441
|
+
#@staticmethod
|
|
442
|
+
def should_be_rerun(self, info, test_data): # noqa: ARG004, ARG002
|
|
443
|
+
"""
|
|
444
|
+
Return a boolean result whether a finished test failed in a way
|
|
445
|
+
that another execution attempt might succeed, due to race conditions
|
|
446
|
+
in the test or other non-deterministic factors.
|
|
447
|
+
|
|
448
|
+
'info' is Orchestrator.FinishedInfo namedtuple of the test.
|
|
449
|
+
|
|
450
|
+
'test_data' is a dict of fully resolved fmf test metadata of that test.
|
|
451
|
+
"""
|
|
452
|
+
remote_with_test = f"{info.remote}: '{info.test_name}'"
|
|
453
|
+
|
|
454
|
+
# TODO: remove self.reruns and the whole X-reruns logic from AdHocOrchestrator,
|
|
455
|
+
# leave it up to the user to wrap should_be_rerun() with an external dict
|
|
456
|
+
# of tests, counting reruns for each
|
|
457
|
+
# - allows the user to adjust counts per-test (ie. test_data metadata)
|
|
458
|
+
# - allows this template to be @staticmethod
|
|
459
|
+
if (reruns_left := self.reruns[info.test_name]) > 0:
|
|
460
|
+
util.info(f"{remote_with_test}: re-running ({reruns_left} reruns left)")
|
|
461
|
+
self.reruns[info.test_name] -= 1
|
|
462
|
+
return True
|
|
463
|
+
else:
|
|
464
|
+
util.info(f"{remote_with_test}: reruns exceeded, giving up")
|
|
465
|
+
return False
|
|
@@ -4,41 +4,78 @@ import pkgutil as _pkgutil
|
|
|
4
4
|
from .. import connection as _connection
|
|
5
5
|
|
|
6
6
|
|
|
7
|
+
class Remote(_connection.Connection):
|
|
8
|
+
"""
|
|
9
|
+
Representation of a provisioned (reserved) remote system, providing
|
|
10
|
+
a Connection-like API in addition to system management helpers.
|
|
11
|
+
|
|
12
|
+
An instance of Remote is typically prepared by a Provisioner and returned
|
|
13
|
+
to the caller for use and an eventual .release().
|
|
14
|
+
|
|
15
|
+
Also note that Remote can be used via Context Manager, but does not
|
|
16
|
+
do automatic .release(), the manager only handles the built-in Connection.
|
|
17
|
+
The intention is for a Provisioner to run via its own Contest Manager and
|
|
18
|
+
release all Remotes upon exit.
|
|
19
|
+
If you need automatic release of one Remote, use a try/finally block.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def release(self):
|
|
23
|
+
"""
|
|
24
|
+
Release (de-provision) the remote resource.
|
|
25
|
+
"""
|
|
26
|
+
raise NotImplementedError(f"'release' not implemented for {self.__class__.__name__}")
|
|
27
|
+
|
|
28
|
+
|
|
7
29
|
class Provisioner:
|
|
8
30
|
"""
|
|
9
31
|
A remote resource (machine/system) provider.
|
|
10
32
|
|
|
11
|
-
The
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
33
|
+
The idea is to request machines (a.k.a. Remotes, or class Remote instances)
|
|
34
|
+
to be reserved via a non-blocking .provision() and for them to be retrieved
|
|
35
|
+
through blocking / non-blocking .get_remote() when they become available.
|
|
36
|
+
|
|
37
|
+
Each Remote has its own .release() for freeing (de-provisioning) it once
|
|
38
|
+
the user doesn't need it anymore. The Provisioner does this automatically
|
|
39
|
+
to all Remotes during .stop() or context manager exit.
|
|
15
40
|
|
|
16
41
|
p = Provisioner()
|
|
17
42
|
p.start()
|
|
43
|
+
p.provision(count=1)
|
|
18
44
|
remote = p.get_remote()
|
|
19
45
|
remote.cmd(["ls", "/"])
|
|
20
46
|
remote.release()
|
|
21
47
|
p.stop()
|
|
22
48
|
|
|
23
49
|
with Provisioner() as p:
|
|
24
|
-
|
|
50
|
+
p.provision(count=2)
|
|
51
|
+
remote1 = p.get_remote()
|
|
52
|
+
remote2 = p.get_remote()
|
|
25
53
|
...
|
|
26
|
-
remote.release()
|
|
27
54
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
55
|
+
Note that .provision() is a hint expressed by the caller, not a guarantee
|
|
56
|
+
that .get_remote() will ever return a Remote. Ie. the caller can call
|
|
57
|
+
.provision(count=math.inf) to receive as many remotes as the Provisioner
|
|
58
|
+
can possibly supply.
|
|
31
59
|
|
|
60
|
+
TODO: remove .defer_stop() (or stop_defer) and mention this below:
|
|
32
61
|
Note that .stop() or .defer_stop() may be called from a different
|
|
33
62
|
thread, asynchronously to any other functions.
|
|
34
63
|
"""
|
|
35
64
|
|
|
65
|
+
def provision(self, count=1):
|
|
66
|
+
"""
|
|
67
|
+
Request that 'count' machines be provisioned (reserved) for use,
|
|
68
|
+
to be returned at a later point by .get_remote().
|
|
69
|
+
"""
|
|
70
|
+
raise NotImplementedError(f"'provision' not implemented for {self.__class__.__name__}")
|
|
71
|
+
|
|
36
72
|
def get_remote(self, block=True):
|
|
37
73
|
"""
|
|
38
|
-
|
|
74
|
+
Return a connected class Remote instance of a previously .provision()ed
|
|
75
|
+
remote system.
|
|
39
76
|
|
|
40
|
-
If 'block' is True, wait for the
|
|
41
|
-
otherwise return None if there is
|
|
77
|
+
If 'block' is True, wait for the Remote to be available and connected,
|
|
78
|
+
otherwise return None if there is none available yet.
|
|
42
79
|
"""
|
|
43
80
|
raise NotImplementedError(f"'get_remote' not implemented for {self.__class__.__name__}")
|
|
44
81
|
|
|
@@ -80,31 +117,6 @@ class Provisioner:
|
|
|
80
117
|
self.stop()
|
|
81
118
|
|
|
82
119
|
|
|
83
|
-
class Remote(_connection.Connection):
|
|
84
|
-
"""
|
|
85
|
-
Representation of a provisioned (reserved) remote system, providing
|
|
86
|
-
a Connection-like API in addition to system management helpers.
|
|
87
|
-
|
|
88
|
-
An instance of Remote is typically prepared by a Provisioner and lent out
|
|
89
|
-
for further use, to be .release()d by the user (if destroyed).
|
|
90
|
-
It is not meant for repeated reserve/release cycles, hence the lack
|
|
91
|
-
of .reserve().
|
|
92
|
-
|
|
93
|
-
Also note that Remote can be used via Context Manager, but does not
|
|
94
|
-
do automatic .release(), the manager only handles the built-in Connection.
|
|
95
|
-
The intention is for a Provisioner to run via its own Contest Manager and
|
|
96
|
-
release all Remotes upon exit.
|
|
97
|
-
If you need automatic release of one Remote, use a contextlib.ExitStack
|
|
98
|
-
with a callback, or a try/finally block.
|
|
99
|
-
"""
|
|
100
|
-
|
|
101
|
-
def release(self):
|
|
102
|
-
"""
|
|
103
|
-
Release (de-provision) the remote resource.
|
|
104
|
-
"""
|
|
105
|
-
raise NotImplementedError(f"'release' not implemented for {self.__class__.__name__}")
|
|
106
|
-
|
|
107
|
-
|
|
108
120
|
_submodules = [
|
|
109
121
|
info.name for info in _pkgutil.iter_modules(__spec__.submodule_search_locations)
|
|
110
122
|
]
|