atex 0.7__py3-none-any.whl → 0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atex/cli/fmf.py +143 -0
- atex/cli/libvirt.py +127 -0
- atex/cli/testingfarm.py +35 -13
- atex/connection/__init__.py +13 -19
- atex/connection/podman.py +63 -0
- atex/connection/ssh.py +34 -52
- atex/executor/__init__.py +2 -0
- atex/executor/duration.py +60 -0
- atex/executor/executor.py +402 -0
- atex/executor/reporter.py +101 -0
- atex/{minitmt → executor}/scripts.py +37 -25
- atex/{minitmt → executor}/testcontrol.py +54 -42
- atex/fmf.py +237 -0
- atex/orchestrator/__init__.py +3 -59
- atex/orchestrator/aggregator.py +82 -134
- atex/orchestrator/orchestrator.py +385 -0
- atex/provision/__init__.py +74 -105
- atex/provision/libvirt/__init__.py +2 -24
- atex/provision/libvirt/libvirt.py +465 -0
- atex/provision/libvirt/locking.py +168 -0
- atex/provision/libvirt/setup-libvirt.sh +21 -1
- atex/provision/podman/__init__.py +1 -0
- atex/provision/podman/podman.py +274 -0
- atex/provision/testingfarm/__init__.py +2 -29
- atex/provision/testingfarm/api.py +123 -65
- atex/provision/testingfarm/testingfarm.py +234 -0
- atex/util/__init__.py +1 -6
- atex/util/libvirt.py +18 -0
- atex/util/log.py +31 -8
- atex/util/named_mapping.py +158 -0
- atex/util/path.py +16 -0
- atex/util/ssh_keygen.py +14 -0
- atex/util/threads.py +99 -0
- atex-0.9.dist-info/METADATA +178 -0
- atex-0.9.dist-info/RECORD +43 -0
- atex/cli/minitmt.py +0 -175
- atex/minitmt/__init__.py +0 -23
- atex/minitmt/executor.py +0 -348
- atex/minitmt/fmf.py +0 -202
- atex/provision/nspawn/README +0 -74
- atex/provision/podman/README +0 -59
- atex/provision/podman/host_container.sh +0 -74
- atex/provision/testingfarm/foo.py +0 -1
- atex-0.7.dist-info/METADATA +0 -102
- atex-0.7.dist-info/RECORD +0 -32
- {atex-0.7.dist-info → atex-0.9.dist-info}/WHEEL +0 -0
- {atex-0.7.dist-info → atex-0.9.dist-info}/entry_points.txt +0 -0
- {atex-0.7.dist-info → atex-0.9.dist-info}/licenses/COPYING.txt +0 -0
atex/orchestrator/__init__.py
CHANGED
|
@@ -1,59 +1,3 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Orchestrator:
|
|
7
|
-
"""
|
|
8
|
-
A scheduler for parallel execution on multiple resources (machines/systems).
|
|
9
|
-
|
|
10
|
-
Given a list of Provisioner-derived class instances, it attempts to reserve
|
|
11
|
-
resources and uses them on-demand as they become available, calling run()
|
|
12
|
-
on each.
|
|
13
|
-
|
|
14
|
-
Note that run() and report() always run in a separate threads (are allowed
|
|
15
|
-
to block), and may access instance attributes, which are transparently
|
|
16
|
-
guarded by a thread-aware mutex.
|
|
17
|
-
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self):
|
|
21
|
-
pass
|
|
22
|
-
# TODO: configure via args, max workers, etc.
|
|
23
|
-
|
|
24
|
-
# def reserve(self, provisioner):
|
|
25
|
-
# # call provisioner.reserve(), return its return
|
|
26
|
-
# ...
|
|
27
|
-
|
|
28
|
-
def add_provisioner(self, provisioner):
|
|
29
|
-
# add to a self.* list of provisioners to be used for getting machines
|
|
30
|
-
...
|
|
31
|
-
|
|
32
|
-
def run(self, provisioner):
|
|
33
|
-
# run tests, if destructive, call provisioner.release()
|
|
34
|
-
# returns anything
|
|
35
|
-
...
|
|
36
|
-
|
|
37
|
-
def report(self):
|
|
38
|
-
# gets return from run
|
|
39
|
-
# writes it out to somewhere else
|
|
40
|
-
...
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
_submodules = [
|
|
44
|
-
info.name for info in _pkgutil.iter_modules(__spec__.submodule_search_locations)
|
|
45
|
-
]
|
|
46
|
-
|
|
47
|
-
__all__ = [*_submodules, Orchestrator.__name__] # noqa: PLE0604
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def __dir__():
|
|
51
|
-
return __all__
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# lazily import submodules
|
|
55
|
-
def __getattr__(attr):
|
|
56
|
-
if attr in _submodules:
|
|
57
|
-
return _importlib.import_module(f".{attr}", __name__)
|
|
58
|
-
else:
|
|
59
|
-
raise AttributeError(f"module '{__name__}' has no attribute '{attr}'")
|
|
1
|
+
#from .aggregator import CSVAggregator, JSONAggregator # noqa: F401
|
|
2
|
+
from .aggregator import JSONAggregator # noqa: F401
|
|
3
|
+
from .orchestrator import Orchestrator, OrchestratorError, FailedSetupError # noqa: F401
|
atex/orchestrator/aggregator.py
CHANGED
|
@@ -1,163 +1,111 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Functions and utilities for persistently storing test results and files (logs).
|
|
3
|
-
|
|
4
|
-
There is a global aggregator (ie. CSVAggregator) that handles all the results
|
|
5
|
-
from all platforms (arches and distros), and several per-platform aggregators
|
|
6
|
-
that are used by test execution logic.
|
|
7
|
-
|
|
8
|
-
with CSVAggregator("results.csv.gz", "file/storage/dir") as global_aggr:
|
|
9
|
-
reporter = global_aggr.for_platform("rhel-9@x86_64")
|
|
10
|
-
reporter.report({"name": "/some/test", "status": "pass"})
|
|
11
|
-
with reporter.open_tmpfile() as fd:
|
|
12
|
-
os.write(fd, "some contents")
|
|
13
|
-
reporter.link_tmpfile_to("/some/test", "test.log", fd)
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import os
|
|
17
|
-
import csv
|
|
18
1
|
import gzip
|
|
19
|
-
import
|
|
20
|
-
import
|
|
2
|
+
import json
|
|
3
|
+
import shutil
|
|
21
4
|
import threading
|
|
22
|
-
import contextlib
|
|
23
5
|
from pathlib import Path
|
|
24
6
|
|
|
25
7
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
ctypes.c_int,
|
|
31
|
-
ctypes.c_char_p,
|
|
32
|
-
ctypes.c_int,
|
|
33
|
-
ctypes.c_char_p,
|
|
34
|
-
ctypes.c_int,
|
|
35
|
-
)
|
|
36
|
-
libc.linkat.restype = ctypes.c_int
|
|
37
|
-
|
|
38
|
-
# fcntl.h:#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
|
|
39
|
-
AT_EMPTY_PATH = 0x1000
|
|
40
|
-
|
|
41
|
-
# fcntl.h:#define AT_FDCWD -100 /* Special value used to indicate
|
|
42
|
-
AT_FDCWD = -100
|
|
43
|
-
|
|
8
|
+
class JSONAggregator:
|
|
9
|
+
"""
|
|
10
|
+
Collects reported results as a GZIP-ed line-JSON and files (logs) from
|
|
11
|
+
multiple test runs under a shared directory.
|
|
44
12
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
errno = ctypes.get_errno()
|
|
48
|
-
raise OSError(errno, os.strerror(errno))
|
|
49
|
-
return ret
|
|
13
|
+
Note that the aggregated JSON file *does not* use the test-based JSON format
|
|
14
|
+
described by executor/RESULTS.md - both use JSON, but are very different.
|
|
50
15
|
|
|
16
|
+
This aggergated format uses a top-level array (on each line) with a fixed
|
|
17
|
+
field order:
|
|
51
18
|
|
|
52
|
-
|
|
53
|
-
# the magic here is to treat any dangerous path as starting at /
|
|
54
|
-
# and resolve any weird constructs relative to /, and then simply
|
|
55
|
-
# strip off the leading / and use it as a relative path
|
|
56
|
-
path = path.lstrip("/")
|
|
57
|
-
path = os.path.normpath(f"/{path}")
|
|
58
|
-
return path[1:]
|
|
19
|
+
platform, status, test name, subtest name, files, note
|
|
59
20
|
|
|
21
|
+
All these are strings except 'files', which is another (nested) array
|
|
22
|
+
of strings.
|
|
60
23
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
Collects reported results as a GZIP-ed CSV and files (logs) under a related
|
|
64
|
-
directory.
|
|
24
|
+
If a field is missing in the source result, it is translated to a null
|
|
25
|
+
value.
|
|
65
26
|
"""
|
|
66
27
|
|
|
67
|
-
|
|
68
|
-
|
|
28
|
+
def __init__(self, json_file, storage_dir):
|
|
29
|
+
"""
|
|
30
|
+
'json_file' is a string/Path to a .json.gz file with aggregated results.
|
|
69
31
|
|
|
70
|
-
|
|
32
|
+
'storage_dir' is a string/Path of the top-level parent for all
|
|
33
|
+
per-platform / per-test files uploaded by tests.
|
|
34
|
+
"""
|
|
71
35
|
self.lock = threading.RLock()
|
|
72
36
|
self.storage_dir = Path(storage_dir)
|
|
73
|
-
self.
|
|
74
|
-
self.
|
|
75
|
-
self.results_gzip_handle = None
|
|
37
|
+
self.json_file = Path(json_file)
|
|
38
|
+
self.json_gzip_fobj = None
|
|
76
39
|
|
|
77
|
-
def
|
|
78
|
-
if self.
|
|
79
|
-
raise FileExistsError(f"{self.
|
|
80
|
-
|
|
81
|
-
try:
|
|
82
|
-
self.csv_writer = csv.writer(f, dialect=self._ExcelWithUnixNewline)
|
|
83
|
-
except:
|
|
84
|
-
f.close()
|
|
85
|
-
raise
|
|
86
|
-
self.results_gzip_handle = f
|
|
40
|
+
def open(self):
|
|
41
|
+
if self.json_file.exists():
|
|
42
|
+
raise FileExistsError(f"{self.json_file} already exists")
|
|
43
|
+
self.json_gzip_fobj = gzip.open(self.json_file, "wt", newline="\n")
|
|
87
44
|
|
|
88
45
|
if self.storage_dir.exists():
|
|
89
46
|
raise FileExistsError(f"{self.storage_dir} already exists")
|
|
90
47
|
self.storage_dir.mkdir()
|
|
91
48
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
self.results_gzip_handle = None
|
|
97
|
-
self.csv_writer = None
|
|
98
|
-
|
|
99
|
-
def report(self, platform, status, name, note, *files):
|
|
100
|
-
with self.lock:
|
|
101
|
-
self.csv_writer.writerow((platform, status, name, note, *files))
|
|
102
|
-
|
|
103
|
-
def for_platform(self, platform_string):
|
|
104
|
-
"""
|
|
105
|
-
Return a ResultAggregator instance that writes results into this
|
|
106
|
-
CSVAgreggator instance.
|
|
107
|
-
"""
|
|
108
|
-
def report(result_line):
|
|
109
|
-
file_names = []
|
|
110
|
-
if "testout" in result_line:
|
|
111
|
-
file_names.append(result_line["testout"])
|
|
112
|
-
if "files" in result_line:
|
|
113
|
-
file_names += (f["name"] for f in result_line["files"])
|
|
114
|
-
self.report(
|
|
115
|
-
platform_string, result_line["status"], result_line["name"],
|
|
116
|
-
result_line.get("note", ""), *file_names,
|
|
117
|
-
)
|
|
118
|
-
platform_dir = self.storage_dir / platform_string
|
|
119
|
-
platform_dir.mkdir(exist_ok=True)
|
|
120
|
-
return ResultAggregator(report, platform_dir)
|
|
49
|
+
def close(self):
|
|
50
|
+
if self.json_gzip_fobj:
|
|
51
|
+
self.json_gzip_fobj.close()
|
|
52
|
+
self.json_gzip_fobj = None
|
|
121
53
|
|
|
54
|
+
def __enter__(self):
|
|
55
|
+
try:
|
|
56
|
+
self.open()
|
|
57
|
+
return self
|
|
58
|
+
except Exception:
|
|
59
|
+
self.close()
|
|
60
|
+
raise
|
|
122
61
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
Collects reported results (in a format specified by RESULTS.md) for
|
|
126
|
-
a specific platform, storing them persistently.
|
|
127
|
-
"""
|
|
62
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
63
|
+
self.close()
|
|
128
64
|
|
|
129
|
-
def
|
|
65
|
+
def ingest(self, platform, test_name, results_file, files_dir):
|
|
130
66
|
"""
|
|
131
|
-
'
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
67
|
+
Process 'results_file' (string/Path) for reported results and append
|
|
68
|
+
them to the overall aggregated line-JSON file, recursively copying over
|
|
69
|
+
the dir structure under 'files_dir' (string/Path) under the respective
|
|
70
|
+
platform and test name in the aggregated storage dir.
|
|
135
71
|
"""
|
|
136
|
-
self.
|
|
137
|
-
|
|
72
|
+
platform_dir = self.storage_dir / platform
|
|
73
|
+
test_dir = platform_dir / test_name.lstrip("/")
|
|
74
|
+
if test_dir.exists():
|
|
75
|
+
raise FileExistsError(f"{test_dir} already exists for {test_name}")
|
|
76
|
+
|
|
77
|
+
# parse the results separately, before writing any aggregated output,
|
|
78
|
+
# to ensure that either all results from the test are ingested, or none
|
|
79
|
+
# at all (ie. if one of the result lines contains JSON errors)
|
|
80
|
+
output_lines = []
|
|
81
|
+
with open(results_file) as results_fobj:
|
|
82
|
+
for raw_line in results_fobj:
|
|
83
|
+
result_line = json.loads(raw_line)
|
|
84
|
+
|
|
85
|
+
file_names = []
|
|
86
|
+
if "testout" in result_line:
|
|
87
|
+
file_names.append(result_line["testout"])
|
|
88
|
+
if "files" in result_line:
|
|
89
|
+
file_names += (f["name"] for f in result_line["files"])
|
|
90
|
+
|
|
91
|
+
output_line = (
|
|
92
|
+
platform,
|
|
93
|
+
result_line["status"],
|
|
94
|
+
test_name,
|
|
95
|
+
result_line.get("name"),
|
|
96
|
+
file_names,
|
|
97
|
+
result_line.get("note"),
|
|
98
|
+
)
|
|
99
|
+
encoded = json.dumps(output_line, indent=None)
|
|
100
|
+
output_lines.append(encoded)
|
|
101
|
+
|
|
102
|
+
output_str = "\n".join(output_lines) + "\n"
|
|
138
103
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
Open an anonymous (name-less) file for writing and yield its file
|
|
143
|
-
descriptor (int) as context, closing it when the context is exited.
|
|
144
|
-
"""
|
|
145
|
-
flags = open_mode | os.O_TMPFILE
|
|
146
|
-
fd = os.open(self.storage_dir, flags, 0o644)
|
|
147
|
-
try:
|
|
148
|
-
yield fd
|
|
149
|
-
finally:
|
|
150
|
-
os.close(fd)
|
|
104
|
+
with self.lock:
|
|
105
|
+
self.json_gzip_fobj.write(output_str)
|
|
106
|
+
self.json_gzip_fobj.flush()
|
|
151
107
|
|
|
152
|
-
|
|
153
|
-
"""
|
|
154
|
-
Store a file named 'file_name' in a directory relevant to 'result_name'
|
|
155
|
-
whose 'fd' (a file descriptor) was created by .open_tmpfile().
|
|
108
|
+
Path(results_file).unlink()
|
|
156
109
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"""
|
|
160
|
-
# /path/to/all/logs / some/test/name / path/to/file.log
|
|
161
|
-
file_path = self.storage_dir / result_name.lstrip("/") / _normalize_path(file_name)
|
|
162
|
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
163
|
-
linkat(fd, b"", AT_FDCWD, bytes(file_path), AT_EMPTY_PATH)
|
|
110
|
+
platform_dir.mkdir(exist_ok=True)
|
|
111
|
+
shutil.move(files_dir, test_dir)
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import tempfile
|
|
3
|
+
import traceback
|
|
4
|
+
import concurrent
|
|
5
|
+
import collections
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .. import util, executor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OrchestratorError(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FailedSetupError(OrchestratorError):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Orchestrator:
|
|
20
|
+
"""
|
|
21
|
+
A scheduler for parallel execution on multiple resources (machines/systems).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
class SetupInfo(
|
|
25
|
+
util.NamedMapping,
|
|
26
|
+
required=(
|
|
27
|
+
# class Provisioner instance this machine is provided by
|
|
28
|
+
# (for logging purposes)
|
|
29
|
+
"provisioner",
|
|
30
|
+
# class Remote instance returned by the Provisioner
|
|
31
|
+
"remote",
|
|
32
|
+
# class Executor instance uploading tests / running setup or tests
|
|
33
|
+
"executor",
|
|
34
|
+
),
|
|
35
|
+
):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
class RunningInfo(
|
|
39
|
+
SetupInfo,
|
|
40
|
+
required=(
|
|
41
|
+
# string with /test/name
|
|
42
|
+
"test_name",
|
|
43
|
+
# class tempfile.TemporaryDirectory instance passed to Executor
|
|
44
|
+
"tmp_dir",
|
|
45
|
+
),
|
|
46
|
+
):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
class FinishedInfo(
|
|
50
|
+
RunningInfo,
|
|
51
|
+
required=(
|
|
52
|
+
# integer with exit code of the test
|
|
53
|
+
# (None if exception happened)
|
|
54
|
+
"exit_code",
|
|
55
|
+
# exception class instance if running the test failed
|
|
56
|
+
# (None if no exception happened (exit_code is defined))
|
|
57
|
+
"exception",
|
|
58
|
+
),
|
|
59
|
+
):
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self, platform, fmf_tests, provisioners, aggregator, tmp_dir, *,
|
|
64
|
+
max_reruns=2, max_failed_setups=10, env=None,
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
'platform' is a string with platform name.
|
|
68
|
+
|
|
69
|
+
'fmf_tests' is a class FMFTests instance of the tests to run.
|
|
70
|
+
|
|
71
|
+
'provisioners' is an iterable of class Provisioner instances.
|
|
72
|
+
|
|
73
|
+
'aggregator' is a class CSVAggregator instance.
|
|
74
|
+
|
|
75
|
+
'tmp_dir' is a string/Path to a temporary directory, to be used for
|
|
76
|
+
storing per-test results and uploaded files before being ingested
|
|
77
|
+
by the aggregator. Can be safely shared by Orchestrator instances.
|
|
78
|
+
|
|
79
|
+
'max_reruns' is an integer of how many times to re-try running a failed
|
|
80
|
+
test (which exited with non-0 or caused an Executor exception).
|
|
81
|
+
|
|
82
|
+
'max_failed_setups' is an integer of how many times an Executor's
|
|
83
|
+
plan setup (uploading tests, running prepare scripts, etc.) may fail
|
|
84
|
+
before FailedSetupError is raised.
|
|
85
|
+
|
|
86
|
+
'env' is a dict of extra environment variables to pass to Executor.
|
|
87
|
+
"""
|
|
88
|
+
self.platform = platform
|
|
89
|
+
self.fmf_tests = fmf_tests
|
|
90
|
+
self.provisioners = tuple(provisioners)
|
|
91
|
+
self.aggregator = aggregator
|
|
92
|
+
self.tmp_dir = tmp_dir
|
|
93
|
+
self.failed_setups_left = max_failed_setups
|
|
94
|
+
# indexed by test name, value being integer of how many times
|
|
95
|
+
self.reruns = collections.defaultdict(lambda: max_reruns)
|
|
96
|
+
self.env = env
|
|
97
|
+
# tests still waiting to be run
|
|
98
|
+
self.to_run = set(fmf_tests.tests)
|
|
99
|
+
# running setup functions, as a list of SetupInfo items
|
|
100
|
+
self.running_setups = []
|
|
101
|
+
# running tests as a dict, indexed by test name, with RunningInfo values
|
|
102
|
+
self.running_tests = {}
|
|
103
|
+
# thread queue for actively running tests
|
|
104
|
+
self.test_queue = util.ThreadQueue(daemon=False)
|
|
105
|
+
# thread queue for remotes being set up (uploading tests, etc.)
|
|
106
|
+
self.setup_queue = util.ThreadQueue(daemon=True)
|
|
107
|
+
# NOTE: running_setups and test_running are just for debugging and
|
|
108
|
+
# cancellation, the execution flow itself uses ThreadQueues
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def run_setup(sinfo):
|
|
112
|
+
"""
|
|
113
|
+
Set up a newly acquired class Remote instance for test execution.
|
|
114
|
+
|
|
115
|
+
'sinfo' is a SetupInfo instance with the (fully connected) remote.
|
|
116
|
+
"""
|
|
117
|
+
sinfo.executor.setup()
|
|
118
|
+
sinfo.executor.upload_tests()
|
|
119
|
+
sinfo.executor.plan_prepare()
|
|
120
|
+
# NOTE: we never run executor.plan_finish() or even executor.cleanup()
|
|
121
|
+
# anywhere - instead, we assume the remote (and its connection)
|
|
122
|
+
# was invalidated by the test, so we just rely on remote.release()
|
|
123
|
+
# destroying the system
|
|
124
|
+
|
|
125
|
+
def _run_new_test(self, info):
|
|
126
|
+
"""
|
|
127
|
+
'info' can be either
|
|
128
|
+
- SetupInfo instance with Remote/Executor to run the new test.
|
|
129
|
+
- FinishedInfo instance of a previously executed test
|
|
130
|
+
(reusing Remote/Executor for a new test).
|
|
131
|
+
"""
|
|
132
|
+
next_test_name = self.next_test(self.to_run, self.fmf_tests.tests, info)
|
|
133
|
+
assert next_test_name in self.to_run, "next_test() returned valid test name"
|
|
134
|
+
|
|
135
|
+
util.info(f"starting '{next_test_name}' on {info.remote}")
|
|
136
|
+
|
|
137
|
+
self.to_run.remove(next_test_name)
|
|
138
|
+
|
|
139
|
+
rinfo = self.RunningInfo._from(
|
|
140
|
+
info,
|
|
141
|
+
test_name=next_test_name,
|
|
142
|
+
tmp_dir=tempfile.TemporaryDirectory(
|
|
143
|
+
prefix=next_test_name.strip("/").replace("/","-") + "-",
|
|
144
|
+
dir=self.tmp_dir,
|
|
145
|
+
delete=False,
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
tmp_dir_path = Path(rinfo.tmp_dir.name)
|
|
150
|
+
self.test_queue.start_thread(
|
|
151
|
+
target=info.executor.run_test,
|
|
152
|
+
target_args=(
|
|
153
|
+
next_test_name,
|
|
154
|
+
tmp_dir_path,
|
|
155
|
+
),
|
|
156
|
+
rinfo=rinfo,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self.running_tests[next_test_name] = rinfo
|
|
160
|
+
|
|
161
|
+
def _process_finished_test(self, finfo):
|
|
162
|
+
"""
|
|
163
|
+
'finfo' is a FinishedInfo instance.
|
|
164
|
+
"""
|
|
165
|
+
remote_with_test = f"{finfo.remote}: '{finfo.test_name}'"
|
|
166
|
+
|
|
167
|
+
def ingest_result():
|
|
168
|
+
tmp_dir_path = Path(finfo.tmp_dir.name)
|
|
169
|
+
results_file = tmp_dir_path / "results"
|
|
170
|
+
files_dir = tmp_dir_path / "files"
|
|
171
|
+
# in case Executor code itself threw an unrecoverable exception
|
|
172
|
+
# and didn't even report the fallback 'infra' result
|
|
173
|
+
if results_file.exists() and files_dir.exists():
|
|
174
|
+
self.aggregator.ingest(self.platform, finfo.test_name, results_file, files_dir)
|
|
175
|
+
finfo.tmp_dir.cleanup()
|
|
176
|
+
|
|
177
|
+
# if executor (or test) threw exception, schedule a re-run
|
|
178
|
+
if finfo.exception:
|
|
179
|
+
exc_name = type(finfo.exception).__name__
|
|
180
|
+
exc_tb = "".join(traceback.format_exception(finfo.exception)).rstrip("\n")
|
|
181
|
+
msg = f"{remote_with_test} threw {exc_name} during test runtime"
|
|
182
|
+
#finfo.remote.release()
|
|
183
|
+
if (reruns_left := self.reruns[finfo.test_name]) > 0:
|
|
184
|
+
util.info(f"{msg}, re-running ({reruns_left} reruns left):\n{exc_tb}")
|
|
185
|
+
self.reruns[finfo.test_name] -= 1
|
|
186
|
+
self.to_run.add(finfo.test_name)
|
|
187
|
+
else:
|
|
188
|
+
util.info(f"{msg}, reruns exceeded, giving up:\n{exc_tb}")
|
|
189
|
+
# record the final result anyway
|
|
190
|
+
ingest_result()
|
|
191
|
+
|
|
192
|
+
# if the test exited as non-0, try a re-run
|
|
193
|
+
elif finfo.exit_code != 0:
|
|
194
|
+
msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
|
|
195
|
+
#finfo.remote.release()
|
|
196
|
+
if (reruns_left := self.reruns[finfo.test_name]) > 0:
|
|
197
|
+
util.info(f"{msg}, re-running ({reruns_left} reruns left)")
|
|
198
|
+
self.reruns[finfo.test_name] -= 1
|
|
199
|
+
self.to_run.add(finfo.test_name)
|
|
200
|
+
else:
|
|
201
|
+
util.info(f"{msg}, reruns exceeded, giving up")
|
|
202
|
+
# record the final result anyway
|
|
203
|
+
ingest_result()
|
|
204
|
+
|
|
205
|
+
# test finished successfully - ingest its results
|
|
206
|
+
else:
|
|
207
|
+
util.info(f"{remote_with_test} finished successfully")
|
|
208
|
+
ingest_result()
|
|
209
|
+
|
|
210
|
+
# if destroyed, release the remote
|
|
211
|
+
# (Executor exception is always considered destructive)
|
|
212
|
+
test_data = self.fmf_tests.tests[finfo.test_name]
|
|
213
|
+
if finfo.exception or self.destructive(finfo, test_data):
|
|
214
|
+
util.debug(f"{remote_with_test} was destructive, releasing remote")
|
|
215
|
+
finfo.remote.release()
|
|
216
|
+
|
|
217
|
+
# if still not destroyed, run another test on it
|
|
218
|
+
# (without running plan setup, re-using already set up remote)
|
|
219
|
+
elif self.to_run:
|
|
220
|
+
util.debug(f"{remote_with_test} was non-destructive, running next test")
|
|
221
|
+
self._run_new_test(finfo)
|
|
222
|
+
|
|
223
|
+
def serve_once(self):
|
|
224
|
+
"""
|
|
225
|
+
Run the orchestration logic, processing any outstanding requests
|
|
226
|
+
(for provisioning, new test execution, etc.) and returning once these
|
|
227
|
+
are taken care of.
|
|
228
|
+
|
|
229
|
+
Returns True to indicate that it should be called again by the user
|
|
230
|
+
(more work to be done), False once all testing is concluded.
|
|
231
|
+
"""
|
|
232
|
+
util.debug(
|
|
233
|
+
f"to_run: {len(self.to_run)} tests / "
|
|
234
|
+
f"running: {len(self.running_tests)} tests, {len(self.running_setups)} setups",
|
|
235
|
+
)
|
|
236
|
+
# all done
|
|
237
|
+
if not self.to_run and not self.running_tests:
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
# process all finished tests, potentially reusing remotes for executing
|
|
241
|
+
# further tests
|
|
242
|
+
while True:
|
|
243
|
+
try:
|
|
244
|
+
treturn = self.test_queue.get_raw(block=False)
|
|
245
|
+
except util.ThreadQueue.Empty:
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
rinfo = treturn.rinfo
|
|
249
|
+
del self.running_tests[rinfo.test_name]
|
|
250
|
+
|
|
251
|
+
finfo = self.FinishedInfo(
|
|
252
|
+
**rinfo,
|
|
253
|
+
exit_code=treturn.returned,
|
|
254
|
+
exception=treturn.exception,
|
|
255
|
+
)
|
|
256
|
+
self._process_finished_test(finfo)
|
|
257
|
+
|
|
258
|
+
# process any remotes with finished plan setup (uploaded tests,
|
|
259
|
+
# plan-defined pkgs / prepare scripts), start executing tests on them
|
|
260
|
+
while self.to_run:
|
|
261
|
+
try:
|
|
262
|
+
treturn = self.setup_queue.get_raw(block=False)
|
|
263
|
+
except util.ThreadQueue.Empty:
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
sinfo = treturn.sinfo
|
|
267
|
+
self.running_setups.remove(sinfo)
|
|
268
|
+
|
|
269
|
+
if treturn.exception:
|
|
270
|
+
exc_name = type(treturn.exception).__name__
|
|
271
|
+
exc_tb = "".join(traceback.format_exception(treturn.exception)).rstrip("\n")
|
|
272
|
+
msg = f"{sinfo.remote}: setup failed with {exc_name}"
|
|
273
|
+
sinfo.remote.release()
|
|
274
|
+
if (reruns_left := self.failed_setups_left) > 0:
|
|
275
|
+
util.warning(f"{msg}, re-trying ({reruns_left} setup retries left):\n{exc_tb}")
|
|
276
|
+
self.failed_setups_left -= 1
|
|
277
|
+
else:
|
|
278
|
+
util.warning(f"{msg}, setup retries exceeded, giving up:\n{exc_tb}")
|
|
279
|
+
raise FailedSetupError("setup retries limit exceeded, broken infra?")
|
|
280
|
+
else:
|
|
281
|
+
self._run_new_test(sinfo)
|
|
282
|
+
|
|
283
|
+
# try to get new remotes from Provisioners - if we get some, start
|
|
284
|
+
# running setup on them
|
|
285
|
+
for provisioner in self.provisioners:
|
|
286
|
+
while (remote := provisioner.get_remote(block=False)) is not None:
|
|
287
|
+
ex = executor.Executor(self.fmf_tests, remote, env=self.env)
|
|
288
|
+
sinfo = self.SetupInfo(
|
|
289
|
+
provisioner=provisioner,
|
|
290
|
+
remote=remote,
|
|
291
|
+
executor=ex,
|
|
292
|
+
)
|
|
293
|
+
self.setup_queue.start_thread(
|
|
294
|
+
target=self.run_setup,
|
|
295
|
+
target_args=(sinfo,),
|
|
296
|
+
sinfo=sinfo,
|
|
297
|
+
)
|
|
298
|
+
self.running_setups.append(sinfo)
|
|
299
|
+
util.info(f"{provisioner}: running setup on new {remote}")
|
|
300
|
+
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
def serve_forever(self):
|
|
304
|
+
"""
|
|
305
|
+
Run the orchestration logic, blocking until all testing is concluded.
|
|
306
|
+
"""
|
|
307
|
+
while self.serve_once():
|
|
308
|
+
time.sleep(1)
|
|
309
|
+
|
|
310
|
+
def start(self):
|
|
311
|
+
# start all provisioners
|
|
312
|
+
for prov in self.provisioners:
|
|
313
|
+
prov.start()
|
|
314
|
+
return self
|
|
315
|
+
|
|
316
|
+
def stop(self):
|
|
317
|
+
# cancel all running tests and wait for them to clean up (up to 0.1sec)
|
|
318
|
+
for rinfo in self.running_tests.values():
|
|
319
|
+
rinfo.executor.cancel()
|
|
320
|
+
self.test_queue.join() # also ignore any exceptions raised
|
|
321
|
+
|
|
322
|
+
# stop all provisioners, also releasing all remotes
|
|
323
|
+
if self.provisioners:
|
|
324
|
+
workers = min(len(self.provisioners), 20)
|
|
325
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as ex:
|
|
326
|
+
for provisioner in self.provisioners:
|
|
327
|
+
for func in provisioner.stop_defer():
|
|
328
|
+
ex.submit(func)
|
|
329
|
+
|
|
330
|
+
def __enter__(self):
|
|
331
|
+
try:
|
|
332
|
+
self.start()
|
|
333
|
+
return self
|
|
334
|
+
except Exception:
|
|
335
|
+
self.stop()
|
|
336
|
+
raise
|
|
337
|
+
|
|
338
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
339
|
+
self.stop()
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def next_test(to_run, all_tests, previous): # noqa: ARG004
|
|
343
|
+
"""
|
|
344
|
+
Return a test name (string) to be executed next.
|
|
345
|
+
|
|
346
|
+
'to_run' is a set of test names to pick from. The returned test name
|
|
347
|
+
must be chosen from this set.
|
|
348
|
+
|
|
349
|
+
'tests' is a dict indexed by test name (string), with values being
|
|
350
|
+
fully resolved fmf test metadata (dicts) of all possible tests.
|
|
351
|
+
|
|
352
|
+
'previous' can be either
|
|
353
|
+
- Orchestrator.SetupInfo instance (first test to be run)
|
|
354
|
+
- Orchestrator.FinishedInfo instance (previous executed test)
|
|
355
|
+
|
|
356
|
+
This method must not modify any of its arguments, it must treat them
|
|
357
|
+
as read-only, eg. don't remove the returned test name from 'to_run'.
|
|
358
|
+
"""
|
|
359
|
+
# default to simply picking any available test
|
|
360
|
+
return next(iter(to_run))
|
|
361
|
+
|
|
362
|
+
@staticmethod
|
|
363
|
+
def destructive(info, test_data): # noqa: ARG004
|
|
364
|
+
"""
|
|
365
|
+
Return a boolean result whether a finished test was destructive
|
|
366
|
+
to a class Remote instance, indicating that the Remote instance
|
|
367
|
+
should not be used for further test execution.
|
|
368
|
+
|
|
369
|
+
'info' is Orchestrator.FinishedInfo namedtuple of the test.
|
|
370
|
+
|
|
371
|
+
'test_data' is a dict of fully resolved fmf test metadata of that test.
|
|
372
|
+
"""
|
|
373
|
+
# if Executor ended with an exception (ie. duration exceeded),
|
|
374
|
+
# consider the test destructive
|
|
375
|
+
if info.exception:
|
|
376
|
+
return True
|
|
377
|
+
# if the test returned non-0 exit code, it could have thrown
|
|
378
|
+
# a python exception of its own, or (if bash) aborted abruptly
|
|
379
|
+
# due to 'set -e', don't trust the remote, consider it destroyed
|
|
380
|
+
if info.exit_code != 0:
|
|
381
|
+
return True
|
|
382
|
+
# otherwise we good
|
|
383
|
+
return False
|
|
384
|
+
# TODO: override with additional 'extra-contest: destructive: True' fmf metadata
|
|
385
|
+
# destructive = test_data.get("extra-contest", {}).get("destructive", False)
|