PyPI - atex - Versions diffs - 0.8__py3-none-any.whl → 0.10__py3-none-any.whl - Mend

atex 0.8py3-none-any.whl → 0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

atex/aggregator/__init__.py +60 -0
atex/aggregator/json.py +96 -0
atex/cli/__init__.py +11 -1
atex/cli/fmf.py +73 -23
atex/cli/libvirt.py +128 -0
atex/cli/testingfarm.py +60 -3
atex/connection/__init__.py +13 -11
atex/connection/podman.py +61 -0
atex/connection/ssh.py +38 -47
atex/executor/executor.py +144 -119
atex/executor/reporter.py +66 -71
atex/executor/scripts.py +13 -5
atex/executor/testcontrol.py +43 -30
atex/fmf.py +94 -74
atex/orchestrator/__init__.py +76 -2
atex/orchestrator/adhoc.py +465 -0
atex/{provision → provisioner}/__init__.py +54 -42
atex/provisioner/libvirt/__init__.py +2 -0
atex/provisioner/libvirt/libvirt.py +472 -0
atex/provisioner/libvirt/locking.py +170 -0
atex/{provision → provisioner}/libvirt/setup-libvirt.sh +21 -1
atex/provisioner/podman/__init__.py +2 -0
atex/provisioner/podman/podman.py +169 -0
atex/{provision → provisioner}/testingfarm/api.py +121 -69
atex/{provision → provisioner}/testingfarm/testingfarm.py +44 -52
atex/util/libvirt.py +18 -0
atex/util/log.py +53 -43
atex/util/named_mapping.py +158 -0
atex/util/subprocess.py +46 -12
atex/util/threads.py +71 -20
atex-0.10.dist-info/METADATA +86 -0
atex-0.10.dist-info/RECORD +44 -0
atex/orchestrator/aggregator.py +0 -106
atex/orchestrator/orchestrator.py +0 -324
atex/provision/libvirt/__init__.py +0 -24
atex/provision/podman/README +0 -59
atex/provision/podman/host_container.sh +0 -74
atex-0.8.dist-info/METADATA +0 -197
atex-0.8.dist-info/RECORD +0 -37
/atex/{provision → provisioner}/libvirt/VM_PROVISION +0 -0
/atex/{provision → provisioner}/testingfarm/__init__.py +0 -0
{atex-0.8.dist-info → atex-0.10.dist-info}/WHEEL +0 -0
{atex-0.8.dist-info → atex-0.10.dist-info}/entry_points.txt +0 -0
{atex-0.8.dist-info → atex-0.10.dist-info}/licenses/COPYING.txt +0 -0

atex/provisioner/podman/podman.py ADDED Viewed

@@ -0,0 +1,169 @@
+import tempfile
+import threading
+import subprocess
+from ... import connection, util
+from .. import Provisioner, Remote
+class PodmanRemote(Remote, connection.podman.PodmanConnection):
+    """
+    Built on the official Remote API, pulling in the Connection API
+    as implemented by ManagedSSHConnection.
+    """
+    def __init__(self, image, container, *, release_hook):
+        """
+        'image' is an image tag (used for repr()).
+        'container' is a podman container id / name.
+        'release_hook' is a callable called on .release() in addition
+        to disconnecting the connection.
+        """
+        super().__init__(container=container)
+        self.lock = threading.RLock()
+        self.image = image
+        self.container = container
+        self.release_called = False
+        self.release_hook = release_hook
+    def release(self):
+        with self.lock:
+            if self.release_called:
+                return
+            else:
+                self.release_called = True
+        self.release_hook(self)
+        self.disconnect()
+        util.subprocess_run(
+            ("podman", "container", "rm", "-f", "-t", "0", self.container),
+            check=False,  # ignore if it fails
+            stdout=subprocess.DEVNULL,
+        )
+    # not /technically/ a valid repr(), but meh
+    def __repr__(self):
+        class_name = self.__class__.__name__
+        if "/" in self.image:
+            image = self.image.rsplit("/",1)[1]
+        elif len(self.image) > 20:
+            image = f"{self.image[:17]}..."
+        else:
+            image = self.image
+        name = f"{self.container[:17]}..." if len(self.container) > 20 else self.container
+        return f"{class_name}({image}, {name})"
+class PodmanProvisioner(Provisioner):
+    def __init__(self, image, run_options=None):
+        """
+        'image' is a string of image tag/id to create containers from.
+        It can be a local identifier or an URL.
+        'run_options' is an iterable with additional CLI options passed
+        to 'podman container run'.
+        """
+        self.lock = threading.RLock()
+        self.image = image
+        self.run_options = run_options or ()
+        # created PodmanRemote instances, ready to be handed over to the user,
+        # or already in use by the user
+        self.remotes = []
+        self.to_create = 0
+    def start(self):
+        if not self.image:
+            raise ValueError("image cannot be empty")
+    def stop(self):
+        with self.lock:
+            while self.remotes:
+                self.remotes.pop().release()
+    def provision(self, count=1):
+        with self.lock:
+            self.to_create += count
+    def get_remote(self, block=True):
+        if self.to_create <= 0:
+            if block:
+                raise RuntimeError("no .provision() requested, would block forever")
+            else:
+                return None
+        proc = util.subprocess_run(
+            (
+                "podman", "container", "run", "--quiet", "--detach", "--pull", "never",
+                *self.run_options, self.image, "sleep", "inf",
+            ),
+            check=True,
+            text=True,
+            stdout=subprocess.PIPE,
+        )
+        container_id = proc.stdout.rstrip("\n")
+        def release_hook(remote):
+            # remove from the list of remotes inside this Provisioner
+            with self.lock:
+                try:
+                    self.remotes.remove(remote)
+                except ValueError:
+                    pass
+        remote = PodmanRemote(
+            self.image,
+            container_id,
+            release_hook=release_hook,
+        )
+        with self.lock:
+            self.remotes.append(remote)
+            self.to_create -= 1
+        return remote
+    # not /technically/ a valid repr(), but meh
+    def __repr__(self):
+        class_name = self.__class__.__name__
+        return (
+            f"{class_name}({self.image}, {len(self.remotes)} remotes, {hex(id(self))})"
+        )
+def pull_image(origin):
+    proc = util.subprocess_run(
+        ("podman", "image", "pull", "-q", origin),
+        check=True,
+        text=True,
+        stdout=subprocess.PIPE,
+    )
+    return proc.stdout.rstrip("\n")
+def build_container_with_deps(origin, tag=None, *, extra_pkgs=None):
+    tag_args = ("-t", tag) if tag else ()
+    pkgs = ["rsync"]
+    if extra_pkgs:
+        pkgs += extra_pkgs
+    pkgs_str = " ".join(pkgs)
+    with tempfile.NamedTemporaryFile("w+t", delete_on_close=False) as tmpf:
+        tmpf.write(util.dedent(fr"""
+            FROM {origin}
+            RUN dnf -y -q --setopt=install_weak_deps=False install {pkgs_str} >/dev/null
+            RUN dnf -y -q clean packages >/dev/null
+        """))
+        tmpf.close()
+        proc = util.subprocess_run(
+            ("podman", "image", "build", "-q", "-f", tmpf.name, *tag_args, "."),
+            check=True,
+            text=True,
+            stdout=subprocess.PIPE,
+        )
+        return proc.stdout.rstrip("\n")

atex/{provision → provisioner}/testingfarm/api.py RENAMED Viewed

@@ -16,16 +16,11 @@ import urllib3
 DEFAULT_API_URL = "https://api.testing-farm.io/v0.1"
-# how many seconds to sleep for during API polling
-API_QUERY_DELAY = 30
-RESERVE_TASK = {
-    "fmf": {
-        "url": "https://github.com/RHSecurityCompliance/atex",
-        "ref": "main",
-        "path": "tmt_tests",
-        "name": "/plans/reserve",
-    },
+DEFAULT_RESERVE_TEST = {
+    "url": "https://github.com/RHSecurityCompliance/atex-reserve",
+    "ref": "v0.10",
+    "path": ".",
+    "name": "/plans/reserve",
 }
 # final states of a request,
@@ -35,7 +30,19 @@ END_STATES = ("error", "complete", "canceled")
 # always have at most 10 outstanding HTTP requests to every given API host,
 # shared by all instances of all classes here, to avoid flooding the host
 # by multi-threaded users
-_http = urllib3.PoolManager(maxsize=10, block=True)
+_http = urllib3.PoolManager(
+    maxsize=10,
+    block=True,
+    retries=urllib3.Retry(
+        total=10,
+        # account for API restarts / short outages
+        backoff_factor=60,
+        backoff_max=600,
+        # retry on API server errors too, not just connection issues
+        status=10,
+        status_forcelist={403,404,408,429,500,502,503,504},
+    ),
+)
 class TestingFarmError(Exception):
@@ -78,12 +85,13 @@ class TestingFarmAPI:
         self.api_url = url
         self.api_token = token or os.environ.get("TESTING_FARM_API_TOKEN")
-    def _query(self, method, path, *args, headers=None, **kwargs):
+    def _query(self, method, path, *args, headers=None, auth=True, **kwargs):
         url = f"{self.api_url}{path}"
-        if headers is not None:
-            headers["Authorization"] = f"Bearer {self.api_token}"
-        else:
-            headers = {"Authorization": f"Bearer {self.api_token}"}
+        if self.api_token and auth:
+            if headers is not None:
+                headers["Authorization"] = f"Bearer {self.api_token}"
+            else:
+                headers = {"Authorization": f"Bearer {self.api_token}"}
         reply = _http.request(method, url, *args, headers=headers, preload_content=False, **kwargs)
@@ -170,7 +178,7 @@ class TestingFarmAPI:
             fields["token_id"] = self.whoami()["token"]["id"]
             fields["user_id"] = self.whoami()["user"]["id"]
-        return self._query("GET", "/requests", fields=fields)
+        return self._query("GET", "/requests", fields=fields, auth=mine)
     def get_request(self, request_id):
         """
@@ -200,19 +208,22 @@ class Request:
     request.
     """
-    # TODO: maintain internal time.monotonic() clock and call .update() from
-    #       functions like .alive() if last update is > API_QUERY_DELAY
+    # actually query the TestingFarm API at most every X seconds,
+    # re-using cached state between updates
+    api_query_limit = 30
     def __init__(self, id=None, api=None, initial_data=None):
         """
         'id' is a Testing Farm request UUID
         'api' is a TestingFarmAPI instance - if unspecified, a sensible default
-        'initial_data' (dict) can be used to pre-fill an initial Request state
-        will be used.
+        'initial_data' (dict) can be used to pre-fill an initial Request state.
         """
         self.id = id
         self.api = api or TestingFarmAPI()
         self.data = initial_data or {}
+        self.next_query = 0
     def submit(self, spec):
         """
@@ -224,16 +235,12 @@ class Request:
         self.data = self.api.submit_request(spec)
         self.id = self.data["id"]
-    def update(self):
-        """
-        Query Testing Farm API to get a more up-to-date version of the request
-        metadata. Do not call too frequently.
-        This function is also used internally by others, you do not need to
-        always call it manually.
-        """
-        self.data = self.api.get_request(self.id)
-        # TODO: refresh internal time.monotonic() timer
-        return self.data
+    def _refresh(self):
+        if not self.id:
+            return
+        if time.monotonic() > self.next_query:
+            self.data = self.api.get_request(self.id)
+            self.next_query = time.monotonic() + self.api_query_limit
     def cancel(self):
         if not self.id:
@@ -244,35 +251,44 @@ class Request:
         return data
     def alive(self):
-        if "state" not in self.data:
-            self.update()
+        if not self.id:
+            return False
+        self._refresh()
         return self.data["state"] not in END_STATES
     def assert_alive(self):
         if not self.alive():
             state = self.data["state"]
-            raise GoneAwayError(f"request {self.data['id']} not alive anymore, entered: {state}")
+            raise GoneAwayError(f"request {self.id} not alive anymore, entered: {state}")
     def wait_for_state(self, state):
-        if "state" not in self.data:
-            self.update()
-        self.assert_alive()
-        while self.data["state"] != state:
-            time.sleep(API_QUERY_DELAY)
-            self.update()
-            self.assert_alive()
+        """
+        'state' is a str or a tuple of states to wait for.
+        """
+        watched = (state,) if isinstance(state, str) else state
+        while True:
+            self._refresh()
+            if self.data["state"] in watched:
+                break
+            # if the request ended in one of END_STATES and the above condition
+            # did not catch it, the wait will never end
+            if self.data["state"] in END_STATES:
+                raise GoneAwayError(f"request {self.id} ended with {self.data['state']}")
     def __repr__(self):
         return f"Request(id={self.id})"
     def __str__(self):
+        self._refresh()
         # python has no better dict-pretty-printing logic
         return json.dumps(self.data, sort_keys=True, indent=4)
     def __contains__(self, item):
+        self._refresh()
         return item in self.data
     def __getitem__(self, key):
+        self._refresh()
         return self.data[key]
@@ -282,6 +298,10 @@ class PipelineLogStreamer:
     to "stream" its contents over time (over many requests), never having to
     re-read old pipeline.log content.
     """
+    # how frequently to check for pipeline.log updates (seconds)
+    pipeline_query_limit = 30
     def __init__(self, request):
         self.request = request
@@ -314,8 +334,7 @@ class PipelineLogStreamer:
                 return log
             finally:
-                time.sleep(API_QUERY_DELAY)
-                self.request.update()
+                time.sleep(self.pipeline_query_limit)
     def __iter__(self):
         url = self._wait_for_entry()
@@ -346,8 +365,7 @@ class PipelineLogStreamer:
                     buffer = buffer[index+1:]
             finally:
-                time.sleep(API_QUERY_DELAY)
-                self.request.update()
+                time.sleep(self.pipeline_query_limit)
 class Reserve:
@@ -370,7 +388,9 @@ class Reserve:
     def __init__(
         self, *, compose, arch="x86_64", pool=None, hardware=None, kickstart=None,
-        timeout=60, ssh_key=None, source_host=None, api=None,
+        timeout=60, ssh_key=None, source_host=None,
+        reserve_test=None, variables=None, secrets=None,
+        api=None,
     ):
         """
         'compose' (str) is the OS to install, chosen from the composes supported
@@ -403,18 +423,31 @@ class Reserve:
         facing address of the current system.
         Ignored on the 'redhat' ranch.
+        'reserve_test' is a dict with a fmf test specification to be run on the
+        target system to reserve it, ie.:
+            {
+                "url": "https://some-host/path/to/repo",
+                "ref": "main",
+                "name": "/plans/reserve",
+            }
+        'variables' and 'secrets' are dicts with environment variable key/values
+        exported for the reserve test - variables are visible via TF API,
+        secrets are not (but can still be extracted from pipeline log).
         'api' is a TestingFarmAPI instance - if unspecified, a sensible default
         will be used.
         """
-        util.info(f"Will reserve compose:{compose} on arch:{arch} for {timeout}min")
+        util.info(f"will reserve compose:{compose} on arch:{arch} for {timeout}min")
         spec = {
-            "test": RESERVE_TASK,
+            "test": {
+                "fmf": reserve_test or DEFAULT_RESERVE_TEST,
+            },
             "environments": [{
                 "arch": arch,
                 "os": {
                     "compose": compose,
                 },
-                "pool": pool,
                 "settings": {
                     "pipeline": {
                         "skip_guest_setup": True,
@@ -423,10 +456,8 @@ class Reserve:
                         "tags": {
                             "ArtemisUseSpot": "false",
                         },
-                        "security_group_rules_ingress": [],
                     },
                 },
-                "secrets": {},
             }],
             "settings": {
                 "pipeline": {
@@ -434,10 +465,16 @@ class Reserve:
                 },
             },
         }
+        spec_env = spec["environments"][0]
+        if pool:
+            spec_env["pool"] = pool
         if hardware:
-            spec["environments"][0]["hardware"] = hardware
+            spec_env["hardware"] = hardware
         if kickstart:
-            spec["environments"][0]["kickstart"] = kickstart
+            spec_env["kickstart"] = kickstart
+        if variables:
+            spec_env["variables"] = variables
+        spec_env["secrets"] = secrets.copy() if secrets else {}  # we need it for ssh pubkey
         self._spec = spec
         self._ssh_key = Path(ssh_key) if ssh_key else None
@@ -465,20 +502,25 @@ class Reserve:
                 raise RuntimeError("reservation already in progress")
         spec = self._spec.copy()
+        spec_env = spec["environments"][0]
-        try:
-            # add source_host firewall filter
+        # add source_host firewall filter on the public ranch
+        if self.api.whoami()["token"]["ranch"] == "public":
             source_host = self._source_host or f"{self._guess_host_ipv4()}/32"
-            ingress = \
-                spec["environments"][0]["settings"]["provisioning"]["security_group_rules_ingress"]
-            ingress.append({
+            ingress_rule = {
                 "type": "ingress",
                 "protocol": "-1",
                 "cidr": source_host,
                 "port_min": 0,
                 "port_max": 65535,
-            })
+            }
+            provisioning = spec_env["settings"]["provisioning"]
+            if "security_group_rules_ingress" in provisioning:
+                provisioning["security_group_rules_ingress"].append(ingress_rule)
+            else:
+                provisioning["security_group_rules_ingress"] = [ingress_rule]
+        try:
             # read user-provided ssh key, or generate one
             ssh_key = self._ssh_key
             if ssh_key:
@@ -491,22 +533,30 @@ class Reserve:
                     ssh_key, ssh_pubkey = util.ssh_keygen(self._tmpdir.name)
             pubkey_contents = ssh_pubkey.read_text().strip()
-            secrets = spec["environments"][0]["secrets"]
-            secrets["RESERVE_SSH_PUBKEY"] = pubkey_contents
+            # TODO: split ^^^ into 3 parts (key type, hash, comment), assert it,
+            #       and anonymize comment in case it contains a secret user/hostname
+            spec_env["secrets"]["RESERVE_SSH_PUBKEY"] = pubkey_contents
             with self.lock:
                 self.request = Request(api=self.api)
                 self.request.submit(spec)
-            util.debug(f"submitted request:\n{textwrap.indent(str(self.request), '    ')}")
+            util.debug(f"submitted request {self.request.id}")
+            util.extradebug(
+                f"request {self.request.id}:\n{textwrap.indent(str(self.request), '    ')}",
+            )
             # wait for user/host to ssh to
             ssh_user = ssh_host = None
             for line in PipelineLogStreamer(self.request):
                 # the '\033[0m' is to reset colors sometimes left in a bad
                 # state by pipeline.log
-                util.debug(f"pipeline: {line}\033[0m")
+                util.extradebug(f"{line}\033[0m")
                 # find hidden login details
-                m = re.search(r"\] Guest is ready: ArtemisGuest\([^,]+, (\w+)@([0-9\.]+), ", line)
+                m = re.search(
+                    # host address can be an IP address or a hostname
+                    r"\] Guest is ready: ArtemisGuest\([^,]+, (\w+)@([^,]+), arch=",
+                    line,
+                )
                 if m:
                     ssh_user, ssh_host = m.groups()
                     continue
@@ -520,14 +570,12 @@ class Reserve:
             # (it will be failing to login for a while, until the reserve test
             #  installs our ssh pubkey into authorized_keys)
             ssh_attempt_cmd = (
-                "ssh", "-q", "-i", ssh_key, f"-oConnectionAttempts={API_QUERY_DELAY}",
+                "ssh", "-q", "-i", ssh_key.absolute(), "-oConnectionAttempts=60",
                "-oStrictHostKeyChecking=no", "-oUserKnownHostsFile=/dev/null",
                 f"{ssh_user}@{ssh_host}", "exit 123",
             )
             while True:
-                # wait for API_QUERY_DELAY between ssh retries, seems like GEFN sleep time
-                time.sleep(API_QUERY_DELAY)
-                self.request.update()
+                time.sleep(1)
                 self.request.assert_alive()
                 proc = util.subprocess_run(
@@ -564,7 +612,11 @@ class Reserve:
                 self._tmpdir = None
     def __enter__(self):
-        return self.reserve()
+        try:
+            return self.reserve()
+        except Exception:
+            self.release()
+            raise
     def __exit__(self, exc_type, exc_value, traceback):
         self.release()

atex 0.8__py3-none-any.whl → 0.10__py3-none-any.whl

atex 0.8py3-none-any.whl → 0.10py3-none-any.whl