atex 0.7__py3-none-any.whl → 0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. atex/cli/fmf.py +143 -0
  2. atex/cli/libvirt.py +127 -0
  3. atex/cli/testingfarm.py +35 -13
  4. atex/connection/__init__.py +13 -19
  5. atex/connection/podman.py +63 -0
  6. atex/connection/ssh.py +34 -52
  7. atex/executor/__init__.py +2 -0
  8. atex/executor/duration.py +60 -0
  9. atex/executor/executor.py +402 -0
  10. atex/executor/reporter.py +101 -0
  11. atex/{minitmt → executor}/scripts.py +37 -25
  12. atex/{minitmt → executor}/testcontrol.py +54 -42
  13. atex/fmf.py +237 -0
  14. atex/orchestrator/__init__.py +3 -59
  15. atex/orchestrator/aggregator.py +82 -134
  16. atex/orchestrator/orchestrator.py +385 -0
  17. atex/provision/__init__.py +74 -105
  18. atex/provision/libvirt/__init__.py +2 -24
  19. atex/provision/libvirt/libvirt.py +465 -0
  20. atex/provision/libvirt/locking.py +168 -0
  21. atex/provision/libvirt/setup-libvirt.sh +21 -1
  22. atex/provision/podman/__init__.py +1 -0
  23. atex/provision/podman/podman.py +274 -0
  24. atex/provision/testingfarm/__init__.py +2 -29
  25. atex/provision/testingfarm/api.py +123 -65
  26. atex/provision/testingfarm/testingfarm.py +234 -0
  27. atex/util/__init__.py +1 -6
  28. atex/util/libvirt.py +18 -0
  29. atex/util/log.py +31 -8
  30. atex/util/named_mapping.py +158 -0
  31. atex/util/path.py +16 -0
  32. atex/util/ssh_keygen.py +14 -0
  33. atex/util/threads.py +99 -0
  34. atex-0.9.dist-info/METADATA +178 -0
  35. atex-0.9.dist-info/RECORD +43 -0
  36. atex/cli/minitmt.py +0 -175
  37. atex/minitmt/__init__.py +0 -23
  38. atex/minitmt/executor.py +0 -348
  39. atex/minitmt/fmf.py +0 -202
  40. atex/provision/nspawn/README +0 -74
  41. atex/provision/podman/README +0 -59
  42. atex/provision/podman/host_container.sh +0 -74
  43. atex/provision/testingfarm/foo.py +0 -1
  44. atex-0.7.dist-info/METADATA +0 -102
  45. atex-0.7.dist-info/RECORD +0 -32
  46. {atex-0.7.dist-info → atex-0.9.dist-info}/WHEEL +0 -0
  47. {atex-0.7.dist-info → atex-0.9.dist-info}/entry_points.txt +0 -0
  48. {atex-0.7.dist-info → atex-0.9.dist-info}/licenses/COPYING.txt +0 -0
@@ -1,59 +1,3 @@
1
- import importlib as _importlib
2
- import pkgutil as _pkgutil
3
- #import threading as _threading
4
-
5
-
6
- class Orchestrator:
7
- """
8
- A scheduler for parallel execution on multiple resources (machines/systems).
9
-
10
- Given a list of Provisioner-derived class instances, it attempts to reserve
11
- resources and uses them on-demand as they become available, calling run()
12
- on each.
13
-
14
- Note that run() and report() always run in a separate threads (are allowed
15
- to block), and may access instance attributes, which are transparently
16
- guarded by a thread-aware mutex.
17
-
18
- """
19
-
20
- def __init__(self):
21
- pass
22
- # TODO: configure via args, max workers, etc.
23
-
24
- # def reserve(self, provisioner):
25
- # # call provisioner.reserve(), return its return
26
- # ...
27
-
28
- def add_provisioner(self, provisioner):
29
- # add to a self.* list of provisioners to be used for getting machines
30
- ...
31
-
32
- def run(self, provisioner):
33
- # run tests, if destructive, call provisioner.release()
34
- # returns anything
35
- ...
36
-
37
- def report(self):
38
- # gets return from run
39
- # writes it out to somewhere else
40
- ...
41
-
42
-
43
- _submodules = [
44
- info.name for info in _pkgutil.iter_modules(__spec__.submodule_search_locations)
45
- ]
46
-
47
- __all__ = [*_submodules, Orchestrator.__name__] # noqa: PLE0604
48
-
49
-
50
- def __dir__():
51
- return __all__
52
-
53
-
54
- # lazily import submodules
55
- def __getattr__(attr):
56
- if attr in _submodules:
57
- return _importlib.import_module(f".{attr}", __name__)
58
- else:
59
- raise AttributeError(f"module '{__name__}' has no attribute '{attr}'")
1
+ #from .aggregator import CSVAggregator, JSONAggregator # noqa: F401
2
+ from .aggregator import JSONAggregator # noqa: F401
3
+ from .orchestrator import Orchestrator, OrchestratorError, FailedSetupError # noqa: F401
@@ -1,163 +1,111 @@
1
- """
2
- Functions and utilities for persistently storing test results and files (logs).
3
-
4
- There is a global aggregator (ie. CSVAggregator) that handles all the results
5
- from all platforms (arches and distros), and several per-platform aggregators
6
- that are used by test execution logic.
7
-
8
- with CSVAggregator("results.csv.gz", "file/storage/dir") as global_aggr:
9
- reporter = global_aggr.for_platform("rhel-9@x86_64")
10
- reporter.report({"name": "/some/test", "status": "pass"})
11
- with reporter.open_tmpfile() as fd:
12
- os.write(fd, "some contents")
13
- reporter.link_tmpfile_to("/some/test", "test.log", fd)
14
- """
15
-
16
- import os
17
- import csv
18
1
  import gzip
19
- import ctypes
20
- import ctypes.util
2
+ import json
3
+ import shutil
21
4
  import threading
22
- import contextlib
23
5
  from pathlib import Path
24
6
 
25
7
 
26
- libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
27
-
28
- # int linkat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath, int flags)
29
- libc.linkat.argtypes = (
30
- ctypes.c_int,
31
- ctypes.c_char_p,
32
- ctypes.c_int,
33
- ctypes.c_char_p,
34
- ctypes.c_int,
35
- )
36
- libc.linkat.restype = ctypes.c_int
37
-
38
- # fcntl.h:#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
39
- AT_EMPTY_PATH = 0x1000
40
-
41
- # fcntl.h:#define AT_FDCWD -100 /* Special value used to indicate
42
- AT_FDCWD = -100
43
-
8
+ class JSONAggregator:
9
+ """
10
+ Collects reported results as a GZIP-ed line-JSON and files (logs) from
11
+ multiple test runs under a shared directory.
44
12
 
45
- def linkat(*args):
46
- if (ret := libc.linkat(*args)) == -1:
47
- errno = ctypes.get_errno()
48
- raise OSError(errno, os.strerror(errno))
49
- return ret
13
+ Note that the aggregated JSON file *does not* use the test-based JSON format
14
+ described by executor/RESULTS.md - both use JSON, but are very different.
50
15
 
16
+ This aggergated format uses a top-level array (on each line) with a fixed
17
+ field order:
51
18
 
52
- def _normalize_path(path):
53
- # the magic here is to treat any dangerous path as starting at /
54
- # and resolve any weird constructs relative to /, and then simply
55
- # strip off the leading / and use it as a relative path
56
- path = path.lstrip("/")
57
- path = os.path.normpath(f"/{path}")
58
- return path[1:]
19
+ platform, status, test name, subtest name, files, note
59
20
 
21
+ All these are strings except 'files', which is another (nested) array
22
+ of strings.
60
23
 
61
- class CSVAggregator:
62
- """
63
- Collects reported results as a GZIP-ed CSV and files (logs) under a related
64
- directory.
24
+ If a field is missing in the source result, it is translated to a null
25
+ value.
65
26
  """
66
27
 
67
- class _ExcelWithUnixNewline(csv.excel):
68
- lineterminator = "\n"
28
+ def __init__(self, json_file, storage_dir):
29
+ """
30
+ 'json_file' is a string/Path to a .json.gz file with aggregated results.
69
31
 
70
- def __init__(self, results_file, storage_dir):
32
+ 'storage_dir' is a string/Path of the top-level parent for all
33
+ per-platform / per-test files uploaded by tests.
34
+ """
71
35
  self.lock = threading.RLock()
72
36
  self.storage_dir = Path(storage_dir)
73
- self.results_file = Path(results_file)
74
- self.csv_writer = None
75
- self.results_gzip_handle = None
37
+ self.json_file = Path(json_file)
38
+ self.json_gzip_fobj = None
76
39
 
77
- def __enter__(self):
78
- if self.results_file.exists():
79
- raise FileExistsError(f"{self.results_file} already exists")
80
- f = gzip.open(self.results_file, "wt", newline="")
81
- try:
82
- self.csv_writer = csv.writer(f, dialect=self._ExcelWithUnixNewline)
83
- except:
84
- f.close()
85
- raise
86
- self.results_gzip_handle = f
40
+ def open(self):
41
+ if self.json_file.exists():
42
+ raise FileExistsError(f"{self.json_file} already exists")
43
+ self.json_gzip_fobj = gzip.open(self.json_file, "wt", newline="\n")
87
44
 
88
45
  if self.storage_dir.exists():
89
46
  raise FileExistsError(f"{self.storage_dir} already exists")
90
47
  self.storage_dir.mkdir()
91
48
 
92
- return self
93
-
94
- def __exit__(self, exc_type, exc_value, traceback):
95
- self.results_gzip_handle.close()
96
- self.results_gzip_handle = None
97
- self.csv_writer = None
98
-
99
- def report(self, platform, status, name, note, *files):
100
- with self.lock:
101
- self.csv_writer.writerow((platform, status, name, note, *files))
102
-
103
- def for_platform(self, platform_string):
104
- """
105
- Return a ResultAggregator instance that writes results into this
106
- CSVAgreggator instance.
107
- """
108
- def report(result_line):
109
- file_names = []
110
- if "testout" in result_line:
111
- file_names.append(result_line["testout"])
112
- if "files" in result_line:
113
- file_names += (f["name"] for f in result_line["files"])
114
- self.report(
115
- platform_string, result_line["status"], result_line["name"],
116
- result_line.get("note", ""), *file_names,
117
- )
118
- platform_dir = self.storage_dir / platform_string
119
- platform_dir.mkdir(exist_ok=True)
120
- return ResultAggregator(report, platform_dir)
49
+ def close(self):
50
+ if self.json_gzip_fobj:
51
+ self.json_gzip_fobj.close()
52
+ self.json_gzip_fobj = None
121
53
 
54
+ def __enter__(self):
55
+ try:
56
+ self.open()
57
+ return self
58
+ except Exception:
59
+ self.close()
60
+ raise
122
61
 
123
- class ResultAggregator:
124
- """
125
- Collects reported results (in a format specified by RESULTS.md) for
126
- a specific platform, storing them persistently.
127
- """
62
+ def __exit__(self, exc_type, exc_value, traceback):
63
+ self.close()
128
64
 
129
- def __init__(self, callback, storage_dir):
65
+ def ingest(self, platform, test_name, results_file, files_dir):
130
66
  """
131
- 'callback' is a function to call to record a result, with the
132
- result dict passed as an argument.
133
-
134
- 'storage_dir' is a directory for storing uploaded files.
67
+ Process 'results_file' (string/Path) for reported results and append
68
+ them to the overall aggregated line-JSON file, recursively copying over
69
+ the dir structure under 'files_dir' (string/Path) under the respective
70
+ platform and test name in the aggregated storage dir.
135
71
  """
136
- self.report = callback
137
- self.storage_dir = storage_dir
72
+ platform_dir = self.storage_dir / platform
73
+ test_dir = platform_dir / test_name.lstrip("/")
74
+ if test_dir.exists():
75
+ raise FileExistsError(f"{test_dir} already exists for {test_name}")
76
+
77
+ # parse the results separately, before writing any aggregated output,
78
+ # to ensure that either all results from the test are ingested, or none
79
+ # at all (ie. if one of the result lines contains JSON errors)
80
+ output_lines = []
81
+ with open(results_file) as results_fobj:
82
+ for raw_line in results_fobj:
83
+ result_line = json.loads(raw_line)
84
+
85
+ file_names = []
86
+ if "testout" in result_line:
87
+ file_names.append(result_line["testout"])
88
+ if "files" in result_line:
89
+ file_names += (f["name"] for f in result_line["files"])
90
+
91
+ output_line = (
92
+ platform,
93
+ result_line["status"],
94
+ test_name,
95
+ result_line.get("name"),
96
+ file_names,
97
+ result_line.get("note"),
98
+ )
99
+ encoded = json.dumps(output_line, indent=None)
100
+ output_lines.append(encoded)
101
+
102
+ output_str = "\n".join(output_lines) + "\n"
138
103
 
139
- @contextlib.contextmanager
140
- def open_tmpfile(self, open_mode=os.O_WRONLY):
141
- """
142
- Open an anonymous (name-less) file for writing and yield its file
143
- descriptor (int) as context, closing it when the context is exited.
144
- """
145
- flags = open_mode | os.O_TMPFILE
146
- fd = os.open(self.storage_dir, flags, 0o644)
147
- try:
148
- yield fd
149
- finally:
150
- os.close(fd)
104
+ with self.lock:
105
+ self.json_gzip_fobj.write(output_str)
106
+ self.json_gzip_fobj.flush()
151
107
 
152
- def link_tmpfile_to(self, result_name, file_name, fd):
153
- """
154
- Store a file named 'file_name' in a directory relevant to 'result_name'
155
- whose 'fd' (a file descriptor) was created by .open_tmpfile().
108
+ Path(results_file).unlink()
156
109
 
157
- This function can be called multiple times with the same 'fd', and
158
- does not close or otherwise alter the descriptor.
159
- """
160
- # /path/to/all/logs / some/test/name / path/to/file.log
161
- file_path = self.storage_dir / result_name.lstrip("/") / _normalize_path(file_name)
162
- file_path.parent.mkdir(parents=True, exist_ok=True)
163
- linkat(fd, b"", AT_FDCWD, bytes(file_path), AT_EMPTY_PATH)
110
+ platform_dir.mkdir(exist_ok=True)
111
+ shutil.move(files_dir, test_dir)
@@ -0,0 +1,385 @@
1
+ import time
2
+ import tempfile
3
+ import traceback
4
+ import concurrent
5
+ import collections
6
+ from pathlib import Path
7
+
8
+ from .. import util, executor
9
+
10
+
11
+ class OrchestratorError(Exception):
12
+ pass
13
+
14
+
15
+ class FailedSetupError(OrchestratorError):
16
+ pass
17
+
18
+
19
+ class Orchestrator:
20
+ """
21
+ A scheduler for parallel execution on multiple resources (machines/systems).
22
+ """
23
+
24
+ class SetupInfo(
25
+ util.NamedMapping,
26
+ required=(
27
+ # class Provisioner instance this machine is provided by
28
+ # (for logging purposes)
29
+ "provisioner",
30
+ # class Remote instance returned by the Provisioner
31
+ "remote",
32
+ # class Executor instance uploading tests / running setup or tests
33
+ "executor",
34
+ ),
35
+ ):
36
+ pass
37
+
38
+ class RunningInfo(
39
+ SetupInfo,
40
+ required=(
41
+ # string with /test/name
42
+ "test_name",
43
+ # class tempfile.TemporaryDirectory instance passed to Executor
44
+ "tmp_dir",
45
+ ),
46
+ ):
47
+ pass
48
+
49
+ class FinishedInfo(
50
+ RunningInfo,
51
+ required=(
52
+ # integer with exit code of the test
53
+ # (None if exception happened)
54
+ "exit_code",
55
+ # exception class instance if running the test failed
56
+ # (None if no exception happened (exit_code is defined))
57
+ "exception",
58
+ ),
59
+ ):
60
+ pass
61
+
62
+ def __init__(
63
+ self, platform, fmf_tests, provisioners, aggregator, tmp_dir, *,
64
+ max_reruns=2, max_failed_setups=10, env=None,
65
+ ):
66
+ """
67
+ 'platform' is a string with platform name.
68
+
69
+ 'fmf_tests' is a class FMFTests instance of the tests to run.
70
+
71
+ 'provisioners' is an iterable of class Provisioner instances.
72
+
73
+ 'aggregator' is a class CSVAggregator instance.
74
+
75
+ 'tmp_dir' is a string/Path to a temporary directory, to be used for
76
+ storing per-test results and uploaded files before being ingested
77
+ by the aggregator. Can be safely shared by Orchestrator instances.
78
+
79
+ 'max_reruns' is an integer of how many times to re-try running a failed
80
+ test (which exited with non-0 or caused an Executor exception).
81
+
82
+ 'max_failed_setups' is an integer of how many times an Executor's
83
+ plan setup (uploading tests, running prepare scripts, etc.) may fail
84
+ before FailedSetupError is raised.
85
+
86
+ 'env' is a dict of extra environment variables to pass to Executor.
87
+ """
88
+ self.platform = platform
89
+ self.fmf_tests = fmf_tests
90
+ self.provisioners = tuple(provisioners)
91
+ self.aggregator = aggregator
92
+ self.tmp_dir = tmp_dir
93
+ self.failed_setups_left = max_failed_setups
94
+ # indexed by test name, value being integer of how many times
95
+ self.reruns = collections.defaultdict(lambda: max_reruns)
96
+ self.env = env
97
+ # tests still waiting to be run
98
+ self.to_run = set(fmf_tests.tests)
99
+ # running setup functions, as a list of SetupInfo items
100
+ self.running_setups = []
101
+ # running tests as a dict, indexed by test name, with RunningInfo values
102
+ self.running_tests = {}
103
+ # thread queue for actively running tests
104
+ self.test_queue = util.ThreadQueue(daemon=False)
105
+ # thread queue for remotes being set up (uploading tests, etc.)
106
+ self.setup_queue = util.ThreadQueue(daemon=True)
107
+ # NOTE: running_setups and test_running are just for debugging and
108
+ # cancellation, the execution flow itself uses ThreadQueues
109
+
110
+ @staticmethod
111
+ def run_setup(sinfo):
112
+ """
113
+ Set up a newly acquired class Remote instance for test execution.
114
+
115
+ 'sinfo' is a SetupInfo instance with the (fully connected) remote.
116
+ """
117
+ sinfo.executor.setup()
118
+ sinfo.executor.upload_tests()
119
+ sinfo.executor.plan_prepare()
120
+ # NOTE: we never run executor.plan_finish() or even executor.cleanup()
121
+ # anywhere - instead, we assume the remote (and its connection)
122
+ # was invalidated by the test, so we just rely on remote.release()
123
+ # destroying the system
124
+
125
+ def _run_new_test(self, info):
126
+ """
127
+ 'info' can be either
128
+ - SetupInfo instance with Remote/Executor to run the new test.
129
+ - FinishedInfo instance of a previously executed test
130
+ (reusing Remote/Executor for a new test).
131
+ """
132
+ next_test_name = self.next_test(self.to_run, self.fmf_tests.tests, info)
133
+ assert next_test_name in self.to_run, "next_test() returned valid test name"
134
+
135
+ util.info(f"starting '{next_test_name}' on {info.remote}")
136
+
137
+ self.to_run.remove(next_test_name)
138
+
139
+ rinfo = self.RunningInfo._from(
140
+ info,
141
+ test_name=next_test_name,
142
+ tmp_dir=tempfile.TemporaryDirectory(
143
+ prefix=next_test_name.strip("/").replace("/","-") + "-",
144
+ dir=self.tmp_dir,
145
+ delete=False,
146
+ ),
147
+ )
148
+
149
+ tmp_dir_path = Path(rinfo.tmp_dir.name)
150
+ self.test_queue.start_thread(
151
+ target=info.executor.run_test,
152
+ target_args=(
153
+ next_test_name,
154
+ tmp_dir_path,
155
+ ),
156
+ rinfo=rinfo,
157
+ )
158
+
159
+ self.running_tests[next_test_name] = rinfo
160
+
161
+ def _process_finished_test(self, finfo):
162
+ """
163
+ 'finfo' is a FinishedInfo instance.
164
+ """
165
+ remote_with_test = f"{finfo.remote}: '{finfo.test_name}'"
166
+
167
+ def ingest_result():
168
+ tmp_dir_path = Path(finfo.tmp_dir.name)
169
+ results_file = tmp_dir_path / "results"
170
+ files_dir = tmp_dir_path / "files"
171
+ # in case Executor code itself threw an unrecoverable exception
172
+ # and didn't even report the fallback 'infra' result
173
+ if results_file.exists() and files_dir.exists():
174
+ self.aggregator.ingest(self.platform, finfo.test_name, results_file, files_dir)
175
+ finfo.tmp_dir.cleanup()
176
+
177
+ # if executor (or test) threw exception, schedule a re-run
178
+ if finfo.exception:
179
+ exc_name = type(finfo.exception).__name__
180
+ exc_tb = "".join(traceback.format_exception(finfo.exception)).rstrip("\n")
181
+ msg = f"{remote_with_test} threw {exc_name} during test runtime"
182
+ #finfo.remote.release()
183
+ if (reruns_left := self.reruns[finfo.test_name]) > 0:
184
+ util.info(f"{msg}, re-running ({reruns_left} reruns left):\n{exc_tb}")
185
+ self.reruns[finfo.test_name] -= 1
186
+ self.to_run.add(finfo.test_name)
187
+ else:
188
+ util.info(f"{msg}, reruns exceeded, giving up:\n{exc_tb}")
189
+ # record the final result anyway
190
+ ingest_result()
191
+
192
+ # if the test exited as non-0, try a re-run
193
+ elif finfo.exit_code != 0:
194
+ msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
195
+ #finfo.remote.release()
196
+ if (reruns_left := self.reruns[finfo.test_name]) > 0:
197
+ util.info(f"{msg}, re-running ({reruns_left} reruns left)")
198
+ self.reruns[finfo.test_name] -= 1
199
+ self.to_run.add(finfo.test_name)
200
+ else:
201
+ util.info(f"{msg}, reruns exceeded, giving up")
202
+ # record the final result anyway
203
+ ingest_result()
204
+
205
+ # test finished successfully - ingest its results
206
+ else:
207
+ util.info(f"{remote_with_test} finished successfully")
208
+ ingest_result()
209
+
210
+ # if destroyed, release the remote
211
+ # (Executor exception is always considered destructive)
212
+ test_data = self.fmf_tests.tests[finfo.test_name]
213
+ if finfo.exception or self.destructive(finfo, test_data):
214
+ util.debug(f"{remote_with_test} was destructive, releasing remote")
215
+ finfo.remote.release()
216
+
217
+ # if still not destroyed, run another test on it
218
+ # (without running plan setup, re-using already set up remote)
219
+ elif self.to_run:
220
+ util.debug(f"{remote_with_test} was non-destructive, running next test")
221
+ self._run_new_test(finfo)
222
+
223
+ def serve_once(self):
224
+ """
225
+ Run the orchestration logic, processing any outstanding requests
226
+ (for provisioning, new test execution, etc.) and returning once these
227
+ are taken care of.
228
+
229
+ Returns True to indicate that it should be called again by the user
230
+ (more work to be done), False once all testing is concluded.
231
+ """
232
+ util.debug(
233
+ f"to_run: {len(self.to_run)} tests / "
234
+ f"running: {len(self.running_tests)} tests, {len(self.running_setups)} setups",
235
+ )
236
+ # all done
237
+ if not self.to_run and not self.running_tests:
238
+ return False
239
+
240
+ # process all finished tests, potentially reusing remotes for executing
241
+ # further tests
242
+ while True:
243
+ try:
244
+ treturn = self.test_queue.get_raw(block=False)
245
+ except util.ThreadQueue.Empty:
246
+ break
247
+
248
+ rinfo = treturn.rinfo
249
+ del self.running_tests[rinfo.test_name]
250
+
251
+ finfo = self.FinishedInfo(
252
+ **rinfo,
253
+ exit_code=treturn.returned,
254
+ exception=treturn.exception,
255
+ )
256
+ self._process_finished_test(finfo)
257
+
258
+ # process any remotes with finished plan setup (uploaded tests,
259
+ # plan-defined pkgs / prepare scripts), start executing tests on them
260
+ while self.to_run:
261
+ try:
262
+ treturn = self.setup_queue.get_raw(block=False)
263
+ except util.ThreadQueue.Empty:
264
+ break
265
+
266
+ sinfo = treturn.sinfo
267
+ self.running_setups.remove(sinfo)
268
+
269
+ if treturn.exception:
270
+ exc_name = type(treturn.exception).__name__
271
+ exc_tb = "".join(traceback.format_exception(treturn.exception)).rstrip("\n")
272
+ msg = f"{sinfo.remote}: setup failed with {exc_name}"
273
+ sinfo.remote.release()
274
+ if (reruns_left := self.failed_setups_left) > 0:
275
+ util.warning(f"{msg}, re-trying ({reruns_left} setup retries left):\n{exc_tb}")
276
+ self.failed_setups_left -= 1
277
+ else:
278
+ util.warning(f"{msg}, setup retries exceeded, giving up:\n{exc_tb}")
279
+ raise FailedSetupError("setup retries limit exceeded, broken infra?")
280
+ else:
281
+ self._run_new_test(sinfo)
282
+
283
+ # try to get new remotes from Provisioners - if we get some, start
284
+ # running setup on them
285
+ for provisioner in self.provisioners:
286
+ while (remote := provisioner.get_remote(block=False)) is not None:
287
+ ex = executor.Executor(self.fmf_tests, remote, env=self.env)
288
+ sinfo = self.SetupInfo(
289
+ provisioner=provisioner,
290
+ remote=remote,
291
+ executor=ex,
292
+ )
293
+ self.setup_queue.start_thread(
294
+ target=self.run_setup,
295
+ target_args=(sinfo,),
296
+ sinfo=sinfo,
297
+ )
298
+ self.running_setups.append(sinfo)
299
+ util.info(f"{provisioner}: running setup on new {remote}")
300
+
301
+ return True
302
+
303
+ def serve_forever(self):
304
+ """
305
+ Run the orchestration logic, blocking until all testing is concluded.
306
+ """
307
+ while self.serve_once():
308
+ time.sleep(1)
309
+
310
+ def start(self):
311
+ # start all provisioners
312
+ for prov in self.provisioners:
313
+ prov.start()
314
+ return self
315
+
316
+ def stop(self):
317
+ # cancel all running tests and wait for them to clean up (up to 0.1sec)
318
+ for rinfo in self.running_tests.values():
319
+ rinfo.executor.cancel()
320
+ self.test_queue.join() # also ignore any exceptions raised
321
+
322
+ # stop all provisioners, also releasing all remotes
323
+ if self.provisioners:
324
+ workers = min(len(self.provisioners), 20)
325
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as ex:
326
+ for provisioner in self.provisioners:
327
+ for func in provisioner.stop_defer():
328
+ ex.submit(func)
329
+
330
+ def __enter__(self):
331
+ try:
332
+ self.start()
333
+ return self
334
+ except Exception:
335
+ self.stop()
336
+ raise
337
+
338
+ def __exit__(self, exc_type, exc_value, traceback):
339
+ self.stop()
340
+
341
+ @staticmethod
342
+ def next_test(to_run, all_tests, previous): # noqa: ARG004
343
+ """
344
+ Return a test name (string) to be executed next.
345
+
346
+ 'to_run' is a set of test names to pick from. The returned test name
347
+ must be chosen from this set.
348
+
349
+ 'tests' is a dict indexed by test name (string), with values being
350
+ fully resolved fmf test metadata (dicts) of all possible tests.
351
+
352
+ 'previous' can be either
353
+ - Orchestrator.SetupInfo instance (first test to be run)
354
+ - Orchestrator.FinishedInfo instance (previous executed test)
355
+
356
+ This method must not modify any of its arguments, it must treat them
357
+ as read-only, eg. don't remove the returned test name from 'to_run'.
358
+ """
359
+ # default to simply picking any available test
360
+ return next(iter(to_run))
361
+
362
+ @staticmethod
363
+ def destructive(info, test_data): # noqa: ARG004
364
+ """
365
+ Return a boolean result whether a finished test was destructive
366
+ to a class Remote instance, indicating that the Remote instance
367
+ should not be used for further test execution.
368
+
369
+ 'info' is Orchestrator.FinishedInfo namedtuple of the test.
370
+
371
+ 'test_data' is a dict of fully resolved fmf test metadata of that test.
372
+ """
373
+ # if Executor ended with an exception (ie. duration exceeded),
374
+ # consider the test destructive
375
+ if info.exception:
376
+ return True
377
+ # if the test returned non-0 exit code, it could have thrown
378
+ # a python exception of its own, or (if bash) aborted abruptly
379
+ # due to 'set -e', don't trust the remote, consider it destroyed
380
+ if info.exit_code != 0:
381
+ return True
382
+ # otherwise we good
383
+ return False
384
+ # TODO: override with additional 'extra-contest: destructive: True' fmf metadata
385
+ # destructive = test_data.get("extra-contest", {}).get("destructive", False)