PyPI - atex - Versions diffs - 0.7__tar.gz → 0.9__tar.gz - Mend

atex 0.7tar.gz → 0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

atex-0.9/DEVEL.md +43 -0
atex-0.9/PKG-INFO +178 -0
atex-0.9/README.md +162 -0
atex-0.9/TODO +302 -0
atex-0.9/aggrtest-combined.py +74 -0
atex-0.9/aggrtest.py +41 -0
atex-0.9/atex/cli/fmf.py +143 -0
atex-0.9/atex/cli/libvirt.py +127 -0
{atex-0.7 → atex-0.9}/atex/cli/testingfarm.py +35 -13
{atex-0.7 → atex-0.9}/atex/connection/__init__.py +13 -19
atex-0.9/atex/connection/podman.py +63 -0
{atex-0.7 → atex-0.9}/atex/connection/ssh.py +34 -52
atex-0.9/atex/executor/README.md +102 -0
{atex-0.7/atex/minitmt → atex-0.9/atex/executor}/RESULTS.md +32 -39
{atex-0.7/atex/minitmt → atex-0.9/atex/executor}/TEST_CONTROL.md +2 -4
atex-0.9/atex/executor/__init__.py +2 -0
atex-0.9/atex/executor/duration.py +60 -0
atex-0.9/atex/executor/executor.py +402 -0
atex-0.9/atex/executor/reporter.py +101 -0
{atex-0.7/atex/minitmt → atex-0.9/atex/executor}/scripts.py +37 -25
{atex-0.7/atex/minitmt → atex-0.9/atex/executor}/testcontrol.py +54 -42
atex-0.9/atex/fmf.py +237 -0
atex-0.9/atex/orchestrator/__init__.py +3 -0
atex-0.9/atex/orchestrator/aggregator.py +111 -0
atex-0.9/atex/orchestrator/orchestrator.py +385 -0
atex-0.9/atex/provision/__init__.py +124 -0
atex-0.9/atex/provision/libvirt/__init__.py +2 -0
atex-0.9/atex/provision/libvirt/libvirt.py +465 -0
atex-0.9/atex/provision/libvirt/locking.py +168 -0
{atex-0.7 → atex-0.9}/atex/provision/libvirt/setup-libvirt.sh +21 -1
atex-0.9/atex/provision/podman/__init__.py +1 -0
atex-0.9/atex/provision/podman/podman.py +274 -0
atex-0.9/atex/provision/testingfarm/__init__.py +2 -0
{atex-0.7 → atex-0.9}/atex/provision/testingfarm/api.py +123 -65
atex-0.9/atex/provision/testingfarm/testingfarm.py +234 -0
{atex-0.7 → atex-0.9}/atex/util/__init__.py +1 -6
atex-0.9/atex/util/libvirt.py +18 -0
{atex-0.7 → atex-0.9}/atex/util/log.py +31 -8
atex-0.9/atex/util/named_mapping.py +158 -0
atex-0.9/atex/util/path.py +16 -0
atex-0.9/atex/util/ssh_keygen.py +14 -0
atex-0.9/atex/util/threads.py +99 -0
atex-0.9/contest.py +153 -0
atex-0.9/orch.py +40 -0
atex-0.9/prov.py +39 -0
{atex-0.7 → atex-0.9}/pyproject.toml +17 -4
atex-0.9/runtest.py +74 -0
atex-0.9/ssh.py +86 -0
atex-0.9/tests/conftest.py +15 -0
atex-0.9/tests/fmf/fmf_tree/adjusted.fmf +23 -0
atex-0.9/tests/fmf/fmf_tree/disabled.fmf +2 -0
atex-0.9/tests/fmf/fmf_tree/environment.fmf +3 -0
atex-0.9/tests/fmf/fmf_tree/filters.fmf +12 -0
atex-0.9/tests/fmf/fmf_tree/inherit/child/main.fmf +3 -0
atex-0.9/tests/fmf/fmf_tree/inherit/main.fmf +4 -0
atex-0.9/tests/fmf/fmf_tree/listlike.fmf +3 -0
atex-0.9/tests/fmf/fmf_tree/manual.fmf +2 -0
atex-0.9/tests/fmf/fmf_tree/nontest.fmf +1 -0
atex-0.9/tests/fmf/fmf_tree/plans/filtered.fmf +21 -0
atex-0.9/tests/fmf/fmf_tree/plans/listlike.fmf +7 -0
atex-0.9/tests/fmf/fmf_tree/plans/scripts.fmf +20 -0
atex-0.9/tests/fmf/fmf_tree/plans/with_env.fmf +5 -0
atex-0.9/tests/fmf/fmf_tree/simple/main.fmf +4 -0
atex-0.9/tests/fmf/fmf_tree/story.fmf +11 -0
atex-0.9/tests/fmf/fmf_tree/virtual.fmf +7 -0
atex-0.9/tests/fmf/test_fmf.py +151 -0
atex-0.9/tests/provision/shared.py +122 -0
atex-0.9/tests/provision/test_podman.py +86 -0
atex-0.9/tests/provision/test_testingfarm.py +87 -0
atex-0.9/tests/testutil/__init__.py +44 -0
atex-0.9/tests/testutil/timeout.py +52 -0
atex-0.9/tmt_tests/.fmf/version +1 -0
atex-0.9/tmt_tests/reserve/main.fmf +11 -0
atex-0.9/tmt_tests/reserve/test.sh +115 -0
atex-0.9/utils/finished_excludes.py +22 -0
atex-0.7/PKG-INFO +0 -102
atex-0.7/README.md +0 -87
atex-0.7/TODO +0 -59
atex-0.7/atex/cli/minitmt.py +0 -175
atex-0.7/atex/minitmt/README.md +0 -180
atex-0.7/atex/minitmt/__init__.py +0 -23
atex-0.7/atex/minitmt/executor.py +0 -348
atex-0.7/atex/minitmt/fmf.py +0 -202
atex-0.7/atex/orchestrator/__init__.py +0 -59
atex-0.7/atex/orchestrator/aggregator.py +0 -163
atex-0.7/atex/provision/__init__.py +0 -155
atex-0.7/atex/provision/libvirt/__init__.py +0 -24
atex-0.7/atex/provision/nspawn/README +0 -74
atex-0.7/atex/provision/podman/README +0 -59
atex-0.7/atex/provision/podman/host_container.sh +0 -74
atex-0.7/atex/provision/testingfarm/__init__.py +0 -29
atex-0.7/atex/provision/testingfarm/foo.py +0 -1
atex-0.7/logtest.py +0 -19
atex-0.7/ssh.py +0 -48
atex-0.7/tests/PYTEST.md +0 -11
atex-0.7/tests/conftest.py +0 -50
atex-0.7/tests/test_another.py +0 -4
atex-0.7/tests/test_foobar.py +0 -13
atex-0.7/tmt_tests/reserve/main.fmf +0 -5
atex-0.7/tmt_tests/reserve/test.sh +0 -72
{atex-0.7 → atex-0.9}/.editorconfig +0 -0
{atex-0.7 → atex-0.9}/.gitignore +0 -0
{atex-0.7 → atex-0.9}/COPYING.txt +0 -0
{atex-0.7 → atex-0.9}/atex/__init__.py +0 -0
{atex-0.7 → atex-0.9}/atex/cli/__init__.py +0 -0
{atex-0.7 → atex-0.9}/atex/provision/libvirt/VM_PROVISION +0 -0
{atex-0.7 → atex-0.9}/atex/util/README.md +0 -0
{atex-0.7 → atex-0.9}/atex/util/dedent.py +0 -0
{atex-0.7 → atex-0.9}/atex/util/subprocess.py +0 -0
{atex-0.7 → atex-0.9}/reporter.py +0 -0
{atex-0.7/tmt_tests → atex-0.9/tests/fmf/fmf_tree}/.fmf/version +0 -0
{atex-0.7 → atex-0.9}/tf.py +0 -0
{atex-0.7 → atex-0.9}/tmt_tests/plans/reserve.fmf +0 -0

atex-0.9/DEVEL.md ADDED Viewed

@@ -0,0 +1,43 @@
+# Misc development notes
+## Contributing
+TODO - coding style
+## Release workflow
+NEVER commit these to git, they are ONLY for the PyPI release.
+1. Increase `version = ` in `pyproject.toml`
+1. Tag a new version in the `atex-reserve` repo, push the tag
+1. Point to that tag from `atex/provisioner/testingfarm/api.py`,
+   `DEFAULT_RESERVE_TEST`
+1. ...
+## Blocking functions
+- this is about `get_remote(block=True/False)` and similar ones
+- the key difference is that `True` blocks until the function has something
+  to return, or until Exception, and `False` does not
+- it does NOT mean that `False` cannot block on any IO
+  - ie. `False` can still wait 1 second for HTTP GET to finish, effectively
+    "blocking" the parent process, but the important part is that it doesn't
+    block all the way until a Remote is provisioned
+- best practice for `False`: never block on IO, offload any IO requests
+  to internal threads, incl. URL retrieval, on-disk file read/writes, etc.,
+  have the `False`-called code only check python variables on whether XYZ
+  is ready or not
+  - but, often, best practice != reality, and code complexity also has to be
+    considered
+- finally, `False` is not a guarantee, just a wish of the caller; if a function
+  cannot be implemented non-blocking, it should behave as if called with `True`
+  rather than throwing an error
+  - any code using `False` should still theoretically work given that `False`
+    provides no guarantees on how quickly it returns, it will just work more
+    slowly
+- TODO: `Remote.release(block=True/False)`
+  - dictates whether to block until the remote is successfully released or
+    the release fails with an Exception (`True`), or whether the caller doesn't
+    care and wants to fire off the release, to be handled in some background
+    thread (set up by the Remote/Provisioner)

atex-0.9/PKG-INFO ADDED Viewed

@@ -0,0 +1,178 @@
+Metadata-Version: 2.4
+Name: atex
+Version: 0.9
+Summary: Ad-hoc Test EXecutor
+Project-URL: Homepage, https://github.com/RHSecurityCompliance/atex
+License-Expression: GPL-3.0-or-later
+License-File: COPYING.txt
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Software Development :: Testing
+Requires-Python: >=3.11
+Requires-Dist: fmf>=1.6
+Requires-Dist: pyyaml
+Requires-Dist: urllib3<3,>=2
+Description-Content-Type: text/markdown
+# ATEX = Ad-hoc Test EXecutor
+A collections of Python APIs to provision operating systems, collect
+and execute [FMF](https://github.com/teemtee/fmf/)-style tests, gather
+and organize their results and generate reports from those results.
+The name comes from a (fairly unique to FMF/TMT ecosystem) approach that
+allows provisioning a pool of systems and scheduling tests on them as one would
+on an ad-hoc pool of thread/process workers - once a worker becomes free,
+it receives a test to run.
+This is in contrast to splitting a large list of N tests onto M workers
+like N/M, which yields significant time penalties due to tests having
+very varies runtimes.
+Above all, this project is meant to be a toolbox, not a silver-plate solution.
+Use its Python APIs to build a CLI tool for your specific use case.
+The CLI tool provided here is just for demonstration / testing, not for serious
+use - we want to avoid huge modular CLIs for Every Possible Scenario. That's
+the job of the Python API. Any CLI should be simple by nature.
+---
+THIS PROJECT IS HEAVILY WIP, THINGS WILL MOVE AROUND, CHANGE AND OTHERWISE
+BREAK. DO NOT USE IT (for now).
+---
+## License
+Unless specified otherwise, any content within this repository is distributed
+under the GNU GPLv3 license, see the [COPYING.txt](COPYING.txt) file for more.
+## Testing this project
+There are some limited sanity tests provided via `pytest`, although:
+* Some require additional variables (ie. Testing Farm) and will ERROR
+  without them.
+* Some take a long time (ie. Testing Farm) due to system provisioning
+  taking a long time, so install `pytest-xdist` and run with a large `-n`.
+Currently, the recommended approach is to split the execution:
+```
+# synchronously, because podman CLI has concurrency issues
+pytest tests/provision/test_podman.py
+# in parallel, because provisioning takes a long time
+export TESTING_FARM_API_TOKEN=...
+export TESTING_FARM_COMPOSE=...
+pytest -n 20 tests/provision/test_podman.py
+# fast enough for synchronous execution
+pytest tests/fmf
+```
+## Parallelism and cleanup
+There are effectively 3 methods of running things in parallel in Python:
+- `threading.Thread` (and related `concurrent.futures` classes)
+- `multiprocessing.Process` (and related `concurrent.futures` classes)
+- `asyncio`
+and there is no clear winner (in terms of cleanup on `SIGTERM` or Ctrl-C):
+- `Thread` has signal handlers only in the main thread and is unable to
+  interrupt any running threads without super ugly workarounds like `sleep(1)`
+  in every thread, checking some "pls exit" variable
+- `Process` is too heavyweight and makes sharing native Python objects hard,
+  but it does handle signals in each process individually
+- `asyncio` handles interrupting perfectly (every `try`/`except`/`finally`
+  completes just fine, `KeyboardInterrupt` is raised in every async context),
+  but async python is still (3.14) too weird and unsupported
+  - `asyncio` effectively re-implements `subprocess` with a slightly different
+    API, same with `asyncio.Transport` and derivatives reimplementing `socket`
+  - 3rd party libraries like `requests` or `urllib3` don't support it, one needs
+    to resort to spawning these in separate threads anyway
+  - same with `os.*` functions and syscalls
+  - every thing exposed via API needs to have 2 copies - async and non-async,
+    making it unbearable
+  - other stdlib bugs, ie. "large" reads returning BlockingIOError sometimes
+The approach chosen by this project was to use `threading.Thread`, and
+implement thread safety for classes and their functions that need it.
+For example:
+```python
+class MachineReserver:
+    def __init__(self):
+        self.lock = threading.RLock()
+        self.job = None
+        self.proc = None
+    def reserve(self, ...):
+        try:
+            ...
+            job = schedule_new_job_on_external_service()
+            with self.lock:
+                self.job = job
+            ...
+            while not reserved(self.job):
+                time.sleep(60)
+            ...
+            with self.lock:
+                self.proc = subprocess.Popen(["ssh", f"{user}@{host}", ...)
+            ...
+            return machine
+        except Exception:
+            self.abort()
+            raise
+    def abort(self):
+        with self.lock:
+            if self.job:
+                cancel_external_service(self.job)
+                self.job = None
+            if self.proc:
+                self.proc.kill()
+                self.proc = None
+```
+Here, it is expected for `.reserve()` to be called in a long-running thread that
+provisions a new machine on some external service, waits for it to be installed
+and reserved, connects an ssh session to it and returns it back.
+But equally, `.abort()` can be called from an external thread and clean up any
+non-pythonic resources (external jobs, processes, temporary files, etc.) at
+which point **we don't care what happens to .reserve()**, it will probably fail
+with some exception, but doesn't do any harm.
+Here is where `daemon=True` threads come in handy - we can simply call `.abort()`
+from a `KeyboardInterrupt` (or `SIGTERM`) handle in the main thread, and just
+exit, automatically killing any leftover threads that are uselessly sleeping.
+(Realistically, we might want to spawn new threads to run many `.abort()`s in
+parallel, but the main thread can wait for those just fine.)
+It is not perfect, but it's probably the best Python can do.
+Note that races can still occur between a resource being reserved and written
+to `self.*` for `.abort()` to free, so resource de-allocation is not 100%
+guaranteed, but single-threaded interrupting has the same issue.
+Do have fallbacks (ie. max reserve times on the external service).
+Also note that `.reserve()` and `.abort()` could be also called by a context
+manager as `__enter__` and `__exit__`, ie. by a non-threaded caller (running
+everything in the main thread).
+## Unsorted notes
+TODO: codestyle from contest
+```
+- this is not tmt, the goal is to make a python toolbox *for* making runcontest
+  style tools easily, not to replace those tools with tmt-style CLI syntax
+  - the whole point is to make usecase-targeted easy-to-use tools that don't
+    intimidate users with 1 KB long command line, and runcontest is a nice example
+  - TL;DR - use a modular pythonic approach, not a gluetool-style long CLI
+```

atex-0.9/README.md ADDED Viewed

@@ -0,0 +1,162 @@
+# ATEX = Ad-hoc Test EXecutor
+A collections of Python APIs to provision operating systems, collect
+and execute [FMF](https://github.com/teemtee/fmf/)-style tests, gather
+and organize their results and generate reports from those results.
+The name comes from a (fairly unique to FMF/TMT ecosystem) approach that
+allows provisioning a pool of systems and scheduling tests on them as one would
+on an ad-hoc pool of thread/process workers - once a worker becomes free,
+it receives a test to run.
+This is in contrast to splitting a large list of N tests onto M workers
+like N/M, which yields significant time penalties due to tests having
+very varies runtimes.
+Above all, this project is meant to be a toolbox, not a silver-plate solution.
+Use its Python APIs to build a CLI tool for your specific use case.
+The CLI tool provided here is just for demonstration / testing, not for serious
+use - we want to avoid huge modular CLIs for Every Possible Scenario. That's
+the job of the Python API. Any CLI should be simple by nature.
+---
+THIS PROJECT IS HEAVILY WIP, THINGS WILL MOVE AROUND, CHANGE AND OTHERWISE
+BREAK. DO NOT USE IT (for now).
+---
+## License
+Unless specified otherwise, any content within this repository is distributed
+under the GNU GPLv3 license, see the [COPYING.txt](COPYING.txt) file for more.
+## Testing this project
+There are some limited sanity tests provided via `pytest`, although:
+* Some require additional variables (ie. Testing Farm) and will ERROR
+  without them.
+* Some take a long time (ie. Testing Farm) due to system provisioning
+  taking a long time, so install `pytest-xdist` and run with a large `-n`.
+Currently, the recommended approach is to split the execution:
+```
+# synchronously, because podman CLI has concurrency issues
+pytest tests/provision/test_podman.py
+# in parallel, because provisioning takes a long time
+export TESTING_FARM_API_TOKEN=...
+export TESTING_FARM_COMPOSE=...
+pytest -n 20 tests/provision/test_podman.py
+# fast enough for synchronous execution
+pytest tests/fmf
+```
+## Parallelism and cleanup
+There are effectively 3 methods of running things in parallel in Python:
+- `threading.Thread` (and related `concurrent.futures` classes)
+- `multiprocessing.Process` (and related `concurrent.futures` classes)
+- `asyncio`
+and there is no clear winner (in terms of cleanup on `SIGTERM` or Ctrl-C):
+- `Thread` has signal handlers only in the main thread and is unable to
+  interrupt any running threads without super ugly workarounds like `sleep(1)`
+  in every thread, checking some "pls exit" variable
+- `Process` is too heavyweight and makes sharing native Python objects hard,
+  but it does handle signals in each process individually
+- `asyncio` handles interrupting perfectly (every `try`/`except`/`finally`
+  completes just fine, `KeyboardInterrupt` is raised in every async context),
+  but async python is still (3.14) too weird and unsupported
+  - `asyncio` effectively re-implements `subprocess` with a slightly different
+    API, same with `asyncio.Transport` and derivatives reimplementing `socket`
+  - 3rd party libraries like `requests` or `urllib3` don't support it, one needs
+    to resort to spawning these in separate threads anyway
+  - same with `os.*` functions and syscalls
+  - every thing exposed via API needs to have 2 copies - async and non-async,
+    making it unbearable
+  - other stdlib bugs, ie. "large" reads returning BlockingIOError sometimes
+The approach chosen by this project was to use `threading.Thread`, and
+implement thread safety for classes and their functions that need it.
+For example:
+```python
+class MachineReserver:
+    def __init__(self):
+        self.lock = threading.RLock()
+        self.job = None
+        self.proc = None
+    def reserve(self, ...):
+        try:
+            ...
+            job = schedule_new_job_on_external_service()
+            with self.lock:
+                self.job = job
+            ...
+            while not reserved(self.job):
+                time.sleep(60)
+            ...
+            with self.lock:
+                self.proc = subprocess.Popen(["ssh", f"{user}@{host}", ...)
+            ...
+            return machine
+        except Exception:
+            self.abort()
+            raise
+    def abort(self):
+        with self.lock:
+            if self.job:
+                cancel_external_service(self.job)
+                self.job = None
+            if self.proc:
+                self.proc.kill()
+                self.proc = None
+```
+Here, it is expected for `.reserve()` to be called in a long-running thread that
+provisions a new machine on some external service, waits for it to be installed
+and reserved, connects an ssh session to it and returns it back.
+But equally, `.abort()` can be called from an external thread and clean up any
+non-pythonic resources (external jobs, processes, temporary files, etc.) at
+which point **we don't care what happens to .reserve()**, it will probably fail
+with some exception, but doesn't do any harm.
+Here is where `daemon=True` threads come in handy - we can simply call `.abort()`
+from a `KeyboardInterrupt` (or `SIGTERM`) handle in the main thread, and just
+exit, automatically killing any leftover threads that are uselessly sleeping.
+(Realistically, we might want to spawn new threads to run many `.abort()`s in
+parallel, but the main thread can wait for those just fine.)
+It is not perfect, but it's probably the best Python can do.
+Note that races can still occur between a resource being reserved and written
+to `self.*` for `.abort()` to free, so resource de-allocation is not 100%
+guaranteed, but single-threaded interrupting has the same issue.
+Do have fallbacks (ie. max reserve times on the external service).
+Also note that `.reserve()` and `.abort()` could be also called by a context
+manager as `__enter__` and `__exit__`, ie. by a non-threaded caller (running
+everything in the main thread).
+## Unsorted notes
+TODO: codestyle from contest
+```
+- this is not tmt, the goal is to make a python toolbox *for* making runcontest
+  style tools easily, not to replace those tools with tmt-style CLI syntax
+  - the whole point is to make usecase-targeted easy-to-use tools that don't
+    intimidate users with 1 KB long command line, and runcontest is a nice example
+  - TL;DR - use a modular pythonic approach, not a gluetool-style long CLI
+```

atex-0.9/TODO ADDED Viewed

@@ -0,0 +1,302 @@
+- proper cleaning of tmpdirs on the host system ?
+- delete tmt_tests (it's now in its own branch)
+- get rid of testout_fobj, replace it iwth testout_fd
+  - and os.open() in .start() / os.close() in .stop()
+  - have some reporter function to close it manually, close_testout()
+    - and call it from executor after doing Popen, to avoid opened fd hanging around
+      in the main python process when we don't need it
+  - make sure to open with O_WRONLY | O_CREAT | O_APPEND, so reconnects don't override the log
+  - verify by 'ls -l /proc/$pyproc/fd' to ensure there are no .../testout.temp fds open
+- test special cases; ie.
+  - Executor running test and the remote loses connection
+    (ie. iptables -I INPUT 1 -j DROP)
+  - ssh after reboot doesn't actually work (ssh.ConnectError)
+    - might need generalization of all Connection exceptions
+    - does the test result get saved anywhere? .. as 'infra' ?
+    - are non-0 exit codes and exceptions raised by orchestrator, like
+        atex: unexpected exception happened while running ...
+      logged anywhere aside from ./contest.py stderr?
+  - raise non-0 exit code and unexpected exceptions to util.warning
+- orchestrator is still calling Remote.release() directly, which may block
+  for a long time; have some worker/queue that does it in the background
+  - probably as some new BackgroundRemoteReleaser class
+    - would be .start()ed from orchestrator start
+    - orchestrator would end it from .stop()
+    - the class would have a worker function running in the thread,
+      reading from a SimpleQueue and calling .release()
+      - if it reads None, it ends
+    - the class would have some .terminate(), which would push None
+      to the queue and wait for .join()
+      - orchestrator could return that waiting-for-join function
+        as a callable in stop_defer()
+    --> actually, do it differently:
+        - make existing ThreadQueue more similar to ThreadPoolExecutor
+          by having a configurable 'max_workers' argument, default = infinity
+          (and thus start_thread() gets renamed to submit())
+        - then make a simplified version of it that doesn't need to
+          return anything, just runs functions pushed to queue
+          - and then use it for .release() with max_workers=2 or so
+        - (make sure to self.lock the ThreadPoolExecutor for actions that need it)
+          - and have a semaphore for tracking how many threads are active,
+            giving .submit() a clue whether to spawn a new one
+        - have threads self-shutdown themselves by .get(block=False) checking
+          whether the queue is empty - if it is, shut down the worker
+- contest bug?, reporting log with full path
+  - :238: PASS / [report.html, scan-arf.xml.gz, /var/lib/libvirt/images/contest-osbuild.txt]
+  - does it upload correctly with name: contest-osbuild.txt ?
+- priority system for contest
+  - 'extra-priority' or something like that
+  - run problematic tests (ie. image-builder) first, so they can
+    rerun while others run
+- per-test rerun counts ('extra-reruns')
+  - root-level main.fmf could set it to 1, except RHEL-10 (because OpenGPG dnf bug) to 3
+  - image-builder test could set it to 5
+- make testing farm point to an ATEX repo tag when downloading the
+  reserve test, to freeze given ATEX versions in time (so they're not
+  broken by future git commits)
+  - also parametrize reserve test via module-level constants or TestingFarmProvisioner args
+- make it python3.12 (RHEL-10) compatible, ideally 3.11 (RHEL-9)
+- in the CLI tool (for contest), block further SIGINT/SIGTERM
+  while already running cleanup (regular one, not induced by ctrl-c)
+  - got hit by this in podman provision; 'podman container rm -t 0 -f'
+    was already removing a container (waiting for kill) when the user
+    issued SIGINT and it killed the 'rm', leaving container behind
+- notable TODOs
+  - testingfarm and automatically limiting 'class Request' refreshes
+  - testingfarm provisioner scaling up/down remotes, to avoid 40 "empty"
+    remotes being used up while waiting for the last 1-3 tests to finish
+  - atex/util/threads.py documentation
+  - generic ConnectError-ish for all Connections
+- interactive mode for Executor (without redirecting stderr to file,
+  and with stdin routed through)
+- enable gpgchck=1 on TestingFarm RHEL-based systems
+  - TODO: check RHEL-8 and 10 too, are they the same?
+  - /etc/yum.repos.d/rhel.repo
+    - search for "^name=rhel-BaseOS$" to check the file is not OS default
+    - replace all "^gpgcheck=0$' with 1
+- appending to a previous results.json.gz + files_dir
+  - gzip should be able to append a new gz header, and we can reuse a files_dir easily
+  - maybe append=False param for Orchestrator/Aggregator that would
+    - return error if False and one of the two exists
+    - append to them if True
+  - add test for it
+- testingfarm failure backoff cooldown
+  - if provisioning fails, retry getting new machines in increasing intervals;
+    ie. wait 1min, 2min, 4min, 8min, 16min, etc.
+  - maybe ditch the concept of an "infra retry" for provisioning, and just always
+    expect infinite retries based on ^^^, or use a high number like 8 (backoffs)
+    or an absolute giveup time
+  - different approach: add .provision(count=1) to the Provisioner API
+    - allows the user to signal to the Provisioner how many Remotes to provision
+      and (eventually) return via .get_remote()
+      - maybe raise some unique Exception if .get_remote() is called and there is no
+        provisioning in progress (it will never return a remote, user has to call
+        .provision() first)
+    - gets rid of static max_remotes=20 and lets the user request remotes on-the-fly
+      - there would probably be absolute_max_remotes=100 or a similar safety fallback
+        - have it as class attribute (constant), not __init__ argument
+      - Orchestrator could .provision(20) when first starting up, and call .provision()
+        to replace a destroyed Remote, BUT CRITICALLY, it could simply not call it
+        when to_run is empty and there's >= 2 Remotes already ready in setup queue
+        (for use by reruns)
+        - this effectively "shuts down" the reservations as tests wind down
+    - .provision() should probably internally clamp count= to absolute max, not raise
+      an Exception, to allow the user to say "as many as possible" by 'math.inf'
+- centralized TF API querying for one Provisioner and its remotes
+  - possibly do that in one class TestingFarmAPI instance - throttle API queries
+    globally to ie. 1/sec, maybe via some queue
+    - or a derived RateLimitedTestingFarmAPI instance ?
+- some interface on SIGUSR1 (?) on the state of the orchestration
+  - what tests are running / how many setups running / how many remotes / etc.
+  - how long have the running tests been running?
+  - what tests are still in to_run
+- more tests
+  - testcontrol (incl. reporting many results)
+    - incl. reporting after reboot
+  - testingfarm API directly
+    - API functions
+    - class Request
+    - class Reserve + manual use by ssh(1), see tf.py
+  - ssh connection tests (both Standalone and Managed) using
+    systemd-based podman container (PodmanProvisioner with extra run opts)
+  - reporter (part of executor)
+  - executor
+    - incl. corner cases (see above)
+    - shared_dir across multiple parallel Executor instances
+    - reboot
+      - partial results preserved across reboots
+      - disconnect without requested reconnect --> error
+      - etc.
+  - aggregators
+  - orchestrator
+    - incl. corner cases like setup failing and being retried
+  - provisioners (see TODO at the end of shared.py)
+    - start() and stop()
+    - stop_defer()
+  - testingfarm simple sanity reserve for centos stream 8 9 10
+    - to catch distro-specific bugs
+- clarify, in docstrings for API definitions, what functions may block
+  and which ones may be run in different threads
+- demo examples
+  - parallel Executors on one system with multiple Connections
+  - Aggregator in append=True mode, re-running tests that never finished
+    (building up excludes=[] from finished results.json.gz)
+  - image mode, implemented as additional run_setup() Orchestrator hook,
+    that collects required packages from all to_run tests, installs them,
+    does bootc switch, and that's it
+  - additional waiving logic on aggregated JSON results, before rendering
+    HTML
+- formalize coding style in some Markdown doc
+  - mention that class member variables and functions that are for internal
+    use only (private) start with _ (underscore), everything else is considered
+    public, incl. attributes / variables
+    - TODO: actually revise the codebase, applying this
+- make rsync install in podman provisioner optional
+  - maybe check with 'rpm -q' and install only if missing?
+  - maybe install_rsync=True or install_deps=True __init__ arg?
+- detailed logging.DEBUG log of everything, for debugging
+  - to be redirected to a file, with INFO on the console
+  - maybe without pipeline output, that seems useless for debugging
+  - but with every ssh attempt, every reconnection, every provisioning query, etc.
+- properly document which parts of various APIs need to be thread safe,
+  which functions are safe, which are not
+- mention in Orchestrator documentation that it is explicitly about running
+  tests on **ONE** platform (string)
+- reevaluate which func helpers (ie. _private_helper()) make sense on a module
+  level vs inside a class
+- document that Connection MUST use some subprocess-style commands
+  for cmd and rsync, and support func=
+  - because Executor uses func=Popen to run a test in the background
+- probably rename ManagedSSHConn -> ManagedSSHConnection
+  - for all Conn classes
+- get rid of skip_frames=
+  - this would make Connection methods using func= fully subprocess-compatible,
+    ie. with func=subprocess.Popen instead of having to use util.*
+  - instead, have a list of names to skip on top of util/log.py,
+        UNWIND = [
+            "util.subprocess_run",
+            ...
+        ]
+  - mention the list is iterated multiple times until nothing matches, so that
+    two levels of wrappers can be unwound just be including both wrappers in the list
+- add an 'EXTRADEBUG = 5' to util/extra_debug.py and use it for
+  - pipeline.log
+  - printing domain XMLs
+  - etc.
+- atex tf sr
+  - make --state optional, have it be action='append'
+  - have the default --state be all non-end states and print '(running)' or '(queued)'
+    etc. on each line
+- rename atex/provision --> atex/provisioner
+- move aggregator out of orchestrator
+- remove Provisioner.stop_defer(), have just a blocking .stop() that may internally
+  paralelize resource releasing if necessary
+  - stop_defer() made sense for TF and other state-less releases, but doesn't for
+    libvirt, which needs to release all first, and *then* close the connection
+- give .stop() a block=True/False similar to Remote.release(block=True/False)
+  - "just stop/release it in the background, I don't care about return"
+  - useful when calling .stop() on many Provisioners from the main program
+  - these should be daemon=False threads; to make sure they finish before exit
+--------------------------
+atex: unexpected exception happened while running '/hardening/host-os/oscap/stig'
+2025-06-17 07:53:03 atex: unexpected exception happened while running '/per-rule/12/oscap' on TestingFarmRemote(RHEL-9.7.0-Nightly @ x86_64, 0x7f0e5cf46df0):
+atex.executor.testcontrol.BadReportJSONError: file 'out.txt' already exists
+2025-06-17 08:23:13 atex: unexpected exception happened while running '/hardening/host-os/oscap/stig' on TestingFarmRemote(RHEL-9.7.0-Nightly @ x86_64, 0x7f0e5cef3610):
+atex.connection.ssh.ConnectError: SSH ControlMaster failed to start on /tmp/atex-ssh-de1w7ayl with 255:
+b'kex_exchange_identification: read: Connection reset by peer\r\n'
+2025-07-23 02:15:47 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' threw NotConnectedError during test runtime, reruns exceeded, giving up:
+atex.connection.ssh.NotConnectedError: SSH ControlMaster is not running
+   ---> IDENTIFIED: the problem is that orchestrator releases the remote on ANY non-0 exit code,
+                          elif finfo.exit_code != 0:
+                              msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
+                              finfo.remote.release()
+                    but then REUSES it if the test was not destructive - by default, any non-0
+                    is destructive, but custom ContestOrchestrator allows reuse on 'exit 1'
+                    as it doesn't consider regular test fail destructive
+                       ---> but the remote is already dead, connection disconnected
+    - TODO: maybe have some destructive sanity check between tests?
+    - TODO: maybe check if there *was* a SSH ControlMaster running, but it exited with some error?
+   - it's /static-checks/html-links somehow destroying the system by successfully exiting with FAIL:
+      2025-07-23 02:15:46 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' exited with non-zero: 2, re-running (1 reruns left)
+      2025-07-23 02:15:46 atex: starting '/static-checks/html-links' on TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93)
+      2025-07-23 02:15:47 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' threw NotConnectedError during test runtime, reruns exceeded, giving up:
+      Traceback (most recent call last):
+        File "/root/atex/atex/util/threads.py", line 26, in _wrapper
+          ret = func(*func_args, **func_kwargs)
+        File "/root/atex/atex/executor/executor.py", line 233, in run_test
+          self.conn.cmd(("bash",), input=setup_script, text=True, check=True)
+          ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+        File "/root/atex/atex/connection/ssh.py", line 366, in cmd
+          self.assert_master()
+          ~~~~~~~~~~~~~~~~~~^^
+        File "/root/atex/atex/connection/ssh.py", line 254, in assert_master
+          raise NotConnectedError("SSH ControlMaster is not running")
+      atex.connection.ssh.NotConnectedError: SSH ControlMaster is not running
+      ...
+      2025-07-23 00:15:30 test.py:23: lib.results.report_plain:238: PASS http://www.avahi.org
+      2025-07-23 00:15:30 test.py:23: lib.results.report_plain:238: PASS https://chrony-project.org/
+      2025-07-23 00:15:41 test.py:21: lib.results.report_plain:238: FAIL https://www.iso.org/contents/data/standard/05/45/54534.html (HTTPSConnectionPool(host='www.iso.org', port=443): Read timed out. (read timeout=10))
+      2025-07-23 00:15:44 test.py:23: lib.results.report_plain:238: PASS https://public.cyber.mil/stigs/downloads/?_dl_facet_stigs=container-platform
+      2025-07-23 00:15:44 test.py:25: lib.results.report_plain:238: FAIL /

atex 0.7__tar.gz → 0.9__tar.gz

atex 0.7tar.gz → 0.9tar.gz