PyPI - atex - Versions diffs - 0.7__py3-none-any.whl → 0.8__py3-none-any.whl - Mend

atex 0.7py3-none-any.whl → 0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

atex/cli/fmf.py +93 -0
atex/cli/testingfarm.py +23 -13
atex/connection/__init__.py +0 -8
atex/connection/ssh.py +3 -19
atex/executor/__init__.py +2 -0
atex/executor/duration.py +60 -0
atex/executor/executor.py +378 -0
atex/executor/reporter.py +106 -0
atex/{minitmt → executor}/scripts.py +30 -24
atex/{minitmt → executor}/testcontrol.py +16 -17
atex/{minitmt/fmf.py → fmf.py} +49 -34
atex/orchestrator/__init__.py +2 -59
atex/orchestrator/aggregator.py +66 -123
atex/orchestrator/orchestrator.py +324 -0
atex/provision/__init__.py +68 -99
atex/provision/testingfarm/__init__.py +2 -29
atex/provision/testingfarm/api.py +55 -40
atex/provision/testingfarm/testingfarm.py +236 -0
atex/util/__init__.py +1 -6
atex/util/log.py +8 -0
atex/util/path.py +16 -0
atex/util/ssh_keygen.py +14 -0
atex/util/threads.py +55 -0
{atex-0.7.dist-info → atex-0.8.dist-info}/METADATA +97 -2
atex-0.8.dist-info/RECORD +37 -0
atex/cli/minitmt.py +0 -175
atex/minitmt/__init__.py +0 -23
atex/minitmt/executor.py +0 -348
atex/provision/nspawn/README +0 -74
atex/provision/testingfarm/foo.py +0 -1
atex-0.7.dist-info/RECORD +0 -32
{atex-0.7.dist-info → atex-0.8.dist-info}/WHEEL +0 -0
{atex-0.7.dist-info → atex-0.8.dist-info}/entry_points.txt +0 -0
{atex-0.7.dist-info → atex-0.8.dist-info}/licenses/COPYING.txt +0 -0

atex/orchestrator/aggregator.py CHANGED Viewed

@@ -1,83 +1,37 @@
-"""
-Functions and utilities for persistently storing test results and files (logs).
-There is a global aggregator (ie. CSVAggregator) that handles all the results
-from all platforms (arches and distros), and several  per-platform aggregators
-that are used by test execution logic.
-    with CSVAggregator("results.csv.gz", "file/storage/dir") as global_aggr:
-        reporter = global_aggr.for_platform("rhel-9@x86_64")
-        reporter.report({"name": "/some/test", "status": "pass"})
-        with reporter.open_tmpfile() as fd:
-            os.write(fd, "some contents")
-            reporter.link_tmpfile_to("/some/test", "test.log", fd)
-"""
-import os
 import csv
 import gzip
-import ctypes
-import ctypes.util
+import json
+import shutil
 import threading
-import contextlib
 from pathlib import Path
-libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
-# int linkat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath, int flags)
-libc.linkat.argtypes = (
-    ctypes.c_int,
-    ctypes.c_char_p,
-    ctypes.c_int,
-    ctypes.c_char_p,
-    ctypes.c_int,
-)
-libc.linkat.restype = ctypes.c_int
-# fcntl.h:#define AT_EMPTY_PATH                0x1000  /* Allow empty relative pathname */
-AT_EMPTY_PATH = 0x1000
-# fcntl.h:#define AT_FDCWD             -100    /* Special value used to indicate
-AT_FDCWD = -100
-def linkat(*args):
-    if (ret := libc.linkat(*args)) == -1:
-        errno = ctypes.get_errno()
-        raise OSError(errno, os.strerror(errno))
-    return ret
-def _normalize_path(path):
-    # the magic here is to treat any dangerous path as starting at /
-    # and resolve any weird constructs relative to /, and then simply
-    # strip off the leading / and use it as a relative path
-    path = path.lstrip("/")
-    path = os.path.normpath(f"/{path}")
-    return path[1:]
 class CSVAggregator:
     """
-    Collects reported results as a GZIP-ed CSV and files (logs) under a related
-    directory.
+    Collects reported results as a GZIP-ed CSV and files (logs) from multiple
+    test runs under a shared directory.
     """
     class _ExcelWithUnixNewline(csv.excel):
         lineterminator = "\n"
-    def __init__(self, results_file, storage_dir):
+    def __init__(self, csv_file, storage_dir):
+        """
+        'csv_file' is a string/Path to a .csv.gz file with aggregated results.
+        'storage_dir' is a string/Path of the top-level parent for all
+        per-platform / per-test files uploaded by tests.
+        """
         self.lock = threading.RLock()
         self.storage_dir = Path(storage_dir)
-        self.results_file = Path(results_file)
+        self.csv_file = Path(csv_file)
         self.csv_writer = None
         self.results_gzip_handle = None
-    def __enter__(self):
-        if self.results_file.exists():
-            raise FileExistsError(f"{self.results_file} already exists")
-        f = gzip.open(self.results_file, "wt", newline="")
+    def open(self):
+        if self.csv_file.exists():
+            raise FileExistsError(f"{self.csv_file} already exists")
+        f = gzip.open(self.csv_file, "wt", newline="")
         try:
             self.csv_writer = csv.writer(f, dialect=self._ExcelWithUnixNewline)
         except:
@@ -89,75 +43,64 @@ class CSVAggregator:
             raise FileExistsError(f"{self.storage_dir} already exists")
         self.storage_dir.mkdir()
-        return self
-    def __exit__(self, exc_type, exc_value, traceback):
+    def close(self):
         self.results_gzip_handle.close()
         self.results_gzip_handle = None
         self.csv_writer = None
-    def report(self, platform, status, name, note, *files):
-        with self.lock:
-            self.csv_writer.writerow((platform, status, name, note, *files))
-    def for_platform(self, platform_string):
-        """
-        Return a ResultAggregator instance that writes results into this
-        CSVAgreggator instance.
-        """
-        def report(result_line):
-            file_names = []
-            if "testout" in result_line:
-                file_names.append(result_line["testout"])
-            if "files" in result_line:
-                file_names += (f["name"] for f in result_line["files"])
-            self.report(
-                platform_string, result_line["status"], result_line["name"],
-                result_line.get("note", ""), *file_names,
-            )
-        platform_dir = self.storage_dir / platform_string
-        platform_dir.mkdir(exist_ok=True)
-        return ResultAggregator(report, platform_dir)
+    def __enter__(self):
+        self.open()
+        return self
-class ResultAggregator:
-    """
-    Collects reported results (in a format specified by RESULTS.md) for
-    a specific platform, storing them persistently.
-    """
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
-    def __init__(self, callback, storage_dir):
+    def ingest(self, platform, test_name, json_file, files_dir):
         """
-        'callback' is a function to call to record a result, with the
-        result dict passed as an argument.
-        'storage_dir' is a directory for storing uploaded files.
+        Process 'json_file' (string/Path) for reported results and append them
+        to the overall aggregated CSV file, recursively copying over the dir
+        structure under 'files_dir' (string/Path) under the respective platform
+        and test name in the aggregated files storage dir.
         """
-        self.report = callback
-        self.storage_dir = storage_dir
+        # parse the JSON separately, before writing any CSV lines, to ensure
+        # that either all results from the test are ingested, or none at all
+        # (if one of the lines contains JSON errors)
+        csv_lines = []
+        with open(json_file) as json_fobj:
+            for raw_line in json_fobj:
+                result_line = json.loads(raw_line)
+                result_name = result_line.get("name")
+                if result_name:
+                    # sub-result; prefix test name
+                    result_name = f"{test_name}/{result_name}"
+                else:
+                    # result for test itself; use test name
+                    result_name = test_name
+                file_names = []
+                if "testout" in result_line:
+                    file_names.append(result_line["testout"])
+                if "files" in result_line:
+                    file_names += (f["name"] for f in result_line["files"])
+                csv_lines.append((
+                    platform,
+                    result_line["status"],
+                    result_name,
+                    result_line.get("note", ""),
+                    *file_names,
+                ))
-    @contextlib.contextmanager
-    def open_tmpfile(self, open_mode=os.O_WRONLY):
-        """
-        Open an anonymous (name-less) file for writing and yield its file
-        descriptor (int) as context, closing it when the context is exited.
-        """
-        flags = open_mode | os.O_TMPFILE
-        fd = os.open(self.storage_dir, flags, 0o644)
-        try:
-            yield fd
-        finally:
-            os.close(fd)
+        with self.lock:
+            self.csv_writer.writerows(csv_lines)
+            self.results_gzip_handle.flush()
-    def link_tmpfile_to(self, result_name, file_name, fd):
-        """
-        Store a file named 'file_name' in a directory relevant to 'result_name'
-        whose 'fd' (a file descriptor) was created by .open_tmpfile().
+        Path(json_file).unlink()
-        This function can be called multiple times with the same 'fd', and
-        does not close or otherwise alter the descriptor.
-        """
-        # /path/to/all/logs / some/test/name / path/to/file.log
-        file_path = self.storage_dir / result_name.lstrip("/") / _normalize_path(file_name)
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-        linkat(fd, b"", AT_FDCWD, bytes(file_path), AT_EMPTY_PATH)
+        platform_dir = self.storage_dir / platform
+        platform_dir.mkdir(exist_ok=True)
+        test_dir = platform_dir / test_name.lstrip("/")
+        if test_dir.exists():
+            raise FileExistsError(f"{test_dir} already exists for {test_name}")
+        shutil.move(files_dir, test_dir, copy_function=shutil.copy)

atex/orchestrator/orchestrator.py ADDED Viewed

@@ -0,0 +1,324 @@
+import time
+import tempfile
+import traceback
+import concurrent
+import collections
+from pathlib import Path
+from .. import util, executor
+class Orchestrator:
+    """
+    A scheduler for parallel execution on multiple resources (machines/systems).
+    """
+    SetupInfo = collections.namedtuple(
+        "SetupInfo",
+        (
+            # class Provisioner instance this machine is provided by
+            # (for logging purposes)
+            "provisioner",
+            # class Remote instance returned by the Provisioner
+            "remote",
+            # class Executor instance uploading tests / running setup or tests
+            "executor",
+        ),
+    )
+    RunningInfo = collections.namedtuple(
+        "RunningInfo",
+        (
+            # "inherit" from SetupInfo
+            *SetupInfo._fields,
+            # string with /test/name
+            "test_name",
+            # class tempfile.TemporaryDirectory instance with 'json_file' and 'files_dir'
+            "tmp_dir",
+        ),
+    )
+    FinishedInfo = collections.namedtuple(
+        "FinishedInfo",
+        (
+            # "inherit" from RunningInfo
+            *RunningInfo._fields,
+            # integer with exit code of the test
+            # (None if exception happened)
+            "exit_code",
+            # exception class instance if running the test failed
+            # (None if no exception happened (exit_code is defined))
+            "exception",
+        ),
+    )
+    def __init__(self, platform, fmf_tests, provisioners, aggregator, tmp_dir, *, max_reruns=2):
+        """
+        'platform' is a string with platform name.
+        'fmf_tests' is a class FMFTests instance of the tests to run.
+        'provisioners' is an iterable of class Provisioner instances.
+        'aggregator' is a class CSVAggregator instance.
+        'tmp_dir' is a string/Path to a temporary directory, to be used for
+        storing per-test results and uploaded files before being ingested
+        by the aggregator. Can be safely shared by Orchestrator instances.
+        """
+        self.platform = platform
+        self.fmf_tests = fmf_tests
+        self.provisioners = tuple(provisioners)
+        self.aggregator = aggregator
+        self.tmp_dir = tmp_dir
+        # tests still waiting to be run
+        self.to_run = set(fmf_tests.tests)
+        # running setup functions, as a list of SetupInfo items
+        self.running_setups = []
+        # running tests as a dict, indexed by test name, with RunningInfo values
+        self.running_tests = {}
+        # indexed by test name, value being integer of how many times
+        self.reruns = collections.defaultdict(lambda: max_reruns)
+        # thread queue for actively running tests
+        self.test_queue = util.ThreadQueue(daemon=False)
+        # thread queue for remotes being set up (uploading tests, etc.)
+        self.setup_queue = util.ThreadQueue(daemon=True)
+        # NOTE: running_setups and test_running are just for debugging and
+        #       cancellation, the execution flow itself uses ThreadQueues
+    @staticmethod
+    def _run_setup(sinfo):
+        sinfo.executor.setup()
+        sinfo.executor.upload_tests()
+        sinfo.executor.setup_plan()
+        # NOTE: we never run executor.cleanup() anywhere - instead, we assume
+        #       the remote (and its connection) was invalidated by the test,
+        #       so we just rely on remote.release() destroying the system
+        return sinfo
+    @classmethod
+    def _wrap_test(cls, rinfo, func, *args, **kwargs):
+        """
+        Wrap 'func' (test execution function) to preserve extra metadata
+        ('rinfo') and return it with the function return value.
+        """
+        try:
+            return cls.FinishedInfo(*rinfo, func(*args, **kwargs), None)
+        except Exception as e:
+            return cls.FinishedInfo(*rinfo, None, e)
+    def _run_new_test(self, sinfo):
+        """
+        'sinfo' is a SetupInfo instance.
+        """
+        next_test_name = self.next_test(self.to_run, self.fmf_tests)
+        assert next_test_name in self.to_run, "next_test() returned valid test name"
+        self.to_run.remove(next_test_name)
+        rinfo = self.RunningInfo(
+            *sinfo,
+            test_name=next_test_name,
+            tmp_dir=tempfile.TemporaryDirectory(
+                prefix=next_test_name.strip("/").replace("/","-") + "-",
+                dir=self.tmp_dir,
+                delete=False,
+            ),
+        )
+        tmp_dir_path = Path(rinfo.tmp_dir.name)
+        self.test_queue.start_thread(
+            target=self._wrap_test,
+            args=(
+                rinfo,
+                sinfo.executor.run_test,
+                next_test_name,
+                tmp_dir_path / "json_file",
+                tmp_dir_path / "files_dir",
+            ),
+        )
+        self.running_tests[next_test_name] = rinfo
+    def _process_finished_test(self, finfo):
+        """
+        'finfo' is a FinishedInfo instance.
+        """
+        test_id = f"'{finfo.test_name}' on '{finfo.remote}'"
+        tmp_dir_path = Path(finfo.tmp_dir.name)
+        # NOTE: document that we intentionally don't .cleanup() executioner below,
+        #       we rely on remote .release() destroying the OS, because we don't
+        #       want to risk .cleanup() blocking on dead ssh into the remote after
+        #       executing a destructive test
+        destructive = False
+        # if executor (or test) threw exception, schedule a re-run
+        if finfo.exception:
+            destructive = True
+            exc_str = "".join(traceback.format_exception(finfo.exception)).rstrip("\n")
+            util.info(f"unexpected exception happened while running {test_id}:\n{exc_str}")
+            finfo.remote.release()
+            if self.reruns[finfo.test_name] > 0:
+                self.reruns[finfo.test_name] -= 1
+                self.to_run.add(finfo.test_name)
+            else:
+                util.info(f"reruns for {test_id} exceeded, ignoring it")
+        # if the test exited as non-0, try a re-run
+        elif finfo.exit_code != 0:
+            destructive = True
+            finfo.remote.release()
+            if self.reruns[finfo.test_name] > 0:
+                util.info(
+                    f"{test_id} exited with non-zero: {finfo.exit_code}, re-running "
+                    f"({self.reruns[finfo.test_name]} reruns left)",
+                )
+                self.reruns[finfo.test_name] -= 1
+                self.to_run.add(finfo.test_name)
+            else:
+                util.info(
+                    f"{test_id} exited with non-zero: {finfo.exit_code}, "
+                    "all reruns exceeded, giving up",
+                )
+                # record the final result anyway
+                self.aggregator.ingest(
+                    self.platform,
+                    finfo.test_name,
+                    tmp_dir_path / "json_file",
+                    tmp_dir_path / "files_dir",
+                )
+                finfo.tmp_dir.cleanup()
+        # test finished successfully - ingest its results
+        else:
+            util.info(f"{test_id} finished successfully")
+            self.aggregator.ingest(
+                self.platform,
+                finfo.test_name,
+                tmp_dir_path / "json_file",
+                tmp_dir_path / "files_dir",
+            )
+            finfo.tmp_dir.cleanup()
+        # if the remote was not destroyed by traceback / failing test,
+        # check if the test always destroys it (even on success)
+        if not destructive:
+            test_data = self.fmf_tests.tests[finfo.test_name]
+            destructive = test_data.get("extra-atex", {}).get("destructive", False)
+        # if destroyed, release the remote
+        if destructive:
+            util.debug(f"{test_id} was destructive, releasing remote")
+            finfo.remote.release()
+        # if still not destroyed, run another test on it
+        # (without running plan setup, re-using already set up remote)
+        elif self.to_run:
+            sinfo = self.SetupInfo(
+                provisioner=finfo.provisioner,
+                remote=finfo.remote,
+                executor=finfo.executor,
+            )
+            util.debug(f"{test_id} was non-destructive, running next test")
+            self._run_new_test(sinfo)
+    def serve_once(self):
+        """
+        Run the orchestration logic, processing any outstanding requests
+        (for provisioning, new test execution, etc.) and returning once these
+        are taken care of.
+        Returns True to indicate that it should be called again by the user
+        (more work to be done), False once all testing is concluded.
+        """
+        util.debug(
+            f"to_run: {len(self.to_run)} tests / "
+            f"running: {len(self.running_tests)} tests, {len(self.running_setups)} setups",
+        )
+        # all done
+        if not self.to_run and not self.running_tests:
+            return False
+        # process all finished tests, potentially reusing remotes for executing
+        # further tests
+        while True:
+            try:
+                finfo = self.test_queue.get(block=False)
+            except util.ThreadQueue.Empty:
+                break
+            del self.running_tests[finfo.test_name]
+            self._process_finished_test(finfo)
+        # process any remotes with finished plan setup (uploaded tests,
+        # plan-defined pkgs / prepare scripts), start executing tests on them
+        while True:
+            try:
+                sinfo = self.setup_queue.get(block=False)
+            except util.ThreadQueue.Empty:
+                break
+            util.debug(f"setup finished for '{sinfo.remote}', running first test")
+            self.running_setups.remove(sinfo)
+            self._run_new_test(sinfo)
+        # try to get new remotes from Provisioners - if we get some, start
+        # running setup on them
+        for provisioner in self.provisioners:
+            while (remote := provisioner.get_remote(block=False)) is not None:
+                ex = executor.Executor(self.fmf_tests, remote)
+                sinfo = self.SetupInfo(
+                    provisioner=provisioner,
+                    remote=remote,
+                    executor=ex,
+                )
+                self.setup_queue.start_thread(
+                    target=self._run_setup,
+                    args=(sinfo,),
+                )
+                self.running_setups.append(sinfo)
+                util.debug(f"got remote '{remote}' from '{provisioner}', running setup")
+        return True
+    def serve_forever(self):
+        """
+        Run the orchestration logic, blocking until all testing is concluded.
+        """
+        while self.serve_once():
+            time.sleep(1)
+    def __enter__(self):
+        # start all provisioners
+        for prov in self.provisioners:
+            prov.start()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        # cancel all running tests and wait for them to clean up (up to 0.1sec)
+        for rinfo in self.running_tests.values():
+            rinfo.executor.cancel()
+        self.test_queue.join()  # also ignore any exceptions raised
+        # stop all provisioners, also releasing all remotes
+        with concurrent.futures.ThreadPoolExecutor(max_workers=20) as ex:
+            for provisioner in self.provisioners:
+                for func in provisioner.stop_defer():
+                    ex.submit(func)
+    def next_test(self, tests, fmf_tests):  # noqa: ARG002, PLR6301
+        """
+        Return a test name (string) from a set of 'tests' (set of test name
+        strings) to be run next.
+        'fmf_tests' is a class FMFTests instance with additional test metadata.
+        This method is user-overridable, ie. by subclassing Orchestrator:
+            class CustomOrchestrator(Orchestrator):
+                @staticmethod
+                def next_test(tests):
+                    ...
+        """
+        # TODO: more advanced algorithm
+        #
+        # simple:
+        return next(iter(tests))

atex 0.7__py3-none-any.whl → 0.8__py3-none-any.whl

atex 0.7py3-none-any.whl → 0.8py3-none-any.whl