atex 0.7__tar.gz → 0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. atex-0.9/DEVEL.md +43 -0
  2. atex-0.9/PKG-INFO +178 -0
  3. atex-0.9/README.md +162 -0
  4. atex-0.9/TODO +302 -0
  5. atex-0.9/aggrtest-combined.py +74 -0
  6. atex-0.9/aggrtest.py +41 -0
  7. atex-0.9/atex/cli/fmf.py +143 -0
  8. atex-0.9/atex/cli/libvirt.py +127 -0
  9. {atex-0.7 → atex-0.9}/atex/cli/testingfarm.py +35 -13
  10. {atex-0.7 → atex-0.9}/atex/connection/__init__.py +13 -19
  11. atex-0.9/atex/connection/podman.py +63 -0
  12. {atex-0.7 → atex-0.9}/atex/connection/ssh.py +34 -52
  13. atex-0.9/atex/executor/README.md +102 -0
  14. {atex-0.7/atex/minitmt → atex-0.9/atex/executor}/RESULTS.md +32 -39
  15. {atex-0.7/atex/minitmt → atex-0.9/atex/executor}/TEST_CONTROL.md +2 -4
  16. atex-0.9/atex/executor/__init__.py +2 -0
  17. atex-0.9/atex/executor/duration.py +60 -0
  18. atex-0.9/atex/executor/executor.py +402 -0
  19. atex-0.9/atex/executor/reporter.py +101 -0
  20. {atex-0.7/atex/minitmt → atex-0.9/atex/executor}/scripts.py +37 -25
  21. {atex-0.7/atex/minitmt → atex-0.9/atex/executor}/testcontrol.py +54 -42
  22. atex-0.9/atex/fmf.py +237 -0
  23. atex-0.9/atex/orchestrator/__init__.py +3 -0
  24. atex-0.9/atex/orchestrator/aggregator.py +111 -0
  25. atex-0.9/atex/orchestrator/orchestrator.py +385 -0
  26. atex-0.9/atex/provision/__init__.py +124 -0
  27. atex-0.9/atex/provision/libvirt/__init__.py +2 -0
  28. atex-0.9/atex/provision/libvirt/libvirt.py +465 -0
  29. atex-0.9/atex/provision/libvirt/locking.py +168 -0
  30. {atex-0.7 → atex-0.9}/atex/provision/libvirt/setup-libvirt.sh +21 -1
  31. atex-0.9/atex/provision/podman/__init__.py +1 -0
  32. atex-0.9/atex/provision/podman/podman.py +274 -0
  33. atex-0.9/atex/provision/testingfarm/__init__.py +2 -0
  34. {atex-0.7 → atex-0.9}/atex/provision/testingfarm/api.py +123 -65
  35. atex-0.9/atex/provision/testingfarm/testingfarm.py +234 -0
  36. {atex-0.7 → atex-0.9}/atex/util/__init__.py +1 -6
  37. atex-0.9/atex/util/libvirt.py +18 -0
  38. {atex-0.7 → atex-0.9}/atex/util/log.py +31 -8
  39. atex-0.9/atex/util/named_mapping.py +158 -0
  40. atex-0.9/atex/util/path.py +16 -0
  41. atex-0.9/atex/util/ssh_keygen.py +14 -0
  42. atex-0.9/atex/util/threads.py +99 -0
  43. atex-0.9/contest.py +153 -0
  44. atex-0.9/orch.py +40 -0
  45. atex-0.9/prov.py +39 -0
  46. {atex-0.7 → atex-0.9}/pyproject.toml +17 -4
  47. atex-0.9/runtest.py +74 -0
  48. atex-0.9/ssh.py +86 -0
  49. atex-0.9/tests/conftest.py +15 -0
  50. atex-0.9/tests/fmf/fmf_tree/adjusted.fmf +23 -0
  51. atex-0.9/tests/fmf/fmf_tree/disabled.fmf +2 -0
  52. atex-0.9/tests/fmf/fmf_tree/environment.fmf +3 -0
  53. atex-0.9/tests/fmf/fmf_tree/filters.fmf +12 -0
  54. atex-0.9/tests/fmf/fmf_tree/inherit/child/main.fmf +3 -0
  55. atex-0.9/tests/fmf/fmf_tree/inherit/main.fmf +4 -0
  56. atex-0.9/tests/fmf/fmf_tree/listlike.fmf +3 -0
  57. atex-0.9/tests/fmf/fmf_tree/manual.fmf +2 -0
  58. atex-0.9/tests/fmf/fmf_tree/nontest.fmf +1 -0
  59. atex-0.9/tests/fmf/fmf_tree/plans/filtered.fmf +21 -0
  60. atex-0.9/tests/fmf/fmf_tree/plans/listlike.fmf +7 -0
  61. atex-0.9/tests/fmf/fmf_tree/plans/scripts.fmf +20 -0
  62. atex-0.9/tests/fmf/fmf_tree/plans/with_env.fmf +5 -0
  63. atex-0.9/tests/fmf/fmf_tree/simple/main.fmf +4 -0
  64. atex-0.9/tests/fmf/fmf_tree/story.fmf +11 -0
  65. atex-0.9/tests/fmf/fmf_tree/virtual.fmf +7 -0
  66. atex-0.9/tests/fmf/test_fmf.py +151 -0
  67. atex-0.9/tests/provision/shared.py +122 -0
  68. atex-0.9/tests/provision/test_podman.py +86 -0
  69. atex-0.9/tests/provision/test_testingfarm.py +87 -0
  70. atex-0.9/tests/testutil/__init__.py +44 -0
  71. atex-0.9/tests/testutil/timeout.py +52 -0
  72. atex-0.9/tmt_tests/.fmf/version +1 -0
  73. atex-0.9/tmt_tests/reserve/main.fmf +11 -0
  74. atex-0.9/tmt_tests/reserve/test.sh +115 -0
  75. atex-0.9/utils/finished_excludes.py +22 -0
  76. atex-0.7/PKG-INFO +0 -102
  77. atex-0.7/README.md +0 -87
  78. atex-0.7/TODO +0 -59
  79. atex-0.7/atex/cli/minitmt.py +0 -175
  80. atex-0.7/atex/minitmt/README.md +0 -180
  81. atex-0.7/atex/minitmt/__init__.py +0 -23
  82. atex-0.7/atex/minitmt/executor.py +0 -348
  83. atex-0.7/atex/minitmt/fmf.py +0 -202
  84. atex-0.7/atex/orchestrator/__init__.py +0 -59
  85. atex-0.7/atex/orchestrator/aggregator.py +0 -163
  86. atex-0.7/atex/provision/__init__.py +0 -155
  87. atex-0.7/atex/provision/libvirt/__init__.py +0 -24
  88. atex-0.7/atex/provision/nspawn/README +0 -74
  89. atex-0.7/atex/provision/podman/README +0 -59
  90. atex-0.7/atex/provision/podman/host_container.sh +0 -74
  91. atex-0.7/atex/provision/testingfarm/__init__.py +0 -29
  92. atex-0.7/atex/provision/testingfarm/foo.py +0 -1
  93. atex-0.7/logtest.py +0 -19
  94. atex-0.7/ssh.py +0 -48
  95. atex-0.7/tests/PYTEST.md +0 -11
  96. atex-0.7/tests/conftest.py +0 -50
  97. atex-0.7/tests/test_another.py +0 -4
  98. atex-0.7/tests/test_foobar.py +0 -13
  99. atex-0.7/tmt_tests/reserve/main.fmf +0 -5
  100. atex-0.7/tmt_tests/reserve/test.sh +0 -72
  101. {atex-0.7 → atex-0.9}/.editorconfig +0 -0
  102. {atex-0.7 → atex-0.9}/.gitignore +0 -0
  103. {atex-0.7 → atex-0.9}/COPYING.txt +0 -0
  104. {atex-0.7 → atex-0.9}/atex/__init__.py +0 -0
  105. {atex-0.7 → atex-0.9}/atex/cli/__init__.py +0 -0
  106. {atex-0.7 → atex-0.9}/atex/provision/libvirt/VM_PROVISION +0 -0
  107. {atex-0.7 → atex-0.9}/atex/util/README.md +0 -0
  108. {atex-0.7 → atex-0.9}/atex/util/dedent.py +0 -0
  109. {atex-0.7 → atex-0.9}/atex/util/subprocess.py +0 -0
  110. {atex-0.7 → atex-0.9}/reporter.py +0 -0
  111. {atex-0.7/tmt_tests → atex-0.9/tests/fmf/fmf_tree}/.fmf/version +0 -0
  112. {atex-0.7 → atex-0.9}/tf.py +0 -0
  113. {atex-0.7 → atex-0.9}/tmt_tests/plans/reserve.fmf +0 -0
atex-0.9/DEVEL.md ADDED
@@ -0,0 +1,43 @@
1
+ # Misc development notes
2
+
3
+ ## Contributing
4
+
5
+ TODO - coding style
6
+
7
+ ## Release workflow
8
+
9
+ NEVER commit these to git, they are ONLY for the PyPI release.
10
+
11
+ 1. Increase `version = ` in `pyproject.toml`
12
+ 1. Tag a new version in the `atex-reserve` repo, push the tag
13
+ 1. Point to that tag from `atex/provisioner/testingfarm/api.py`,
14
+ `DEFAULT_RESERVE_TEST`
15
+ 1. ...
16
+
17
+ ## Blocking functions
18
+
19
+ - this is about `get_remote(block=True/False)` and similar ones
20
+ - the key difference is that `True` blocks until the function has something
21
+ to return, or until Exception, and `False` does not
22
+ - it does NOT mean that `False` cannot block on any IO
23
+ - ie. `False` can still wait 1 second for HTTP GET to finish, effectively
24
+ "blocking" the parent process, but the important part is that it doesn't
25
+ block all the way until a Remote is provisioned
26
+ - best practice for `False`: never block on IO, offload any IO requests
27
+ to internal threads, incl. URL retrieval, on-disk file read/writes, etc.,
28
+ have the `False`-called code only check python variables on whether XYZ
29
+ is ready or not
30
+ - but, often, best practice != reality, and code complexity also has to be
31
+ considered
32
+ - finally, `False` is not a guarantee, just a wish of the caller; if a function
33
+ cannot be implemented non-blocking, it should behave as if called with `True`
34
+ rather than throwing an error
35
+ - any code using `False` should still theoretically work given that `False`
36
+ provides no guarantees on how quickly it returns, it will just work more
37
+ slowly
38
+
39
+ - TODO: `Remote.release(block=True/False)`
40
+ - dictates whether to block until the remote is successfully released or
41
+ the release fails with an Exception (`True`), or whether the caller doesn't
42
+ care and wants to fire off the release, to be handled in some background
43
+ thread (set up by the Remote/Provisioner)
atex-0.9/PKG-INFO ADDED
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: atex
3
+ Version: 0.9
4
+ Summary: Ad-hoc Test EXecutor
5
+ Project-URL: Homepage, https://github.com/RHSecurityCompliance/atex
6
+ License-Expression: GPL-3.0-or-later
7
+ License-File: COPYING.txt
8
+ Classifier: Operating System :: POSIX :: Linux
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Software Development :: Testing
11
+ Requires-Python: >=3.11
12
+ Requires-Dist: fmf>=1.6
13
+ Requires-Dist: pyyaml
14
+ Requires-Dist: urllib3<3,>=2
15
+ Description-Content-Type: text/markdown
16
+
17
+ # ATEX = Ad-hoc Test EXecutor
18
+
19
+ A collections of Python APIs to provision operating systems, collect
20
+ and execute [FMF](https://github.com/teemtee/fmf/)-style tests, gather
21
+ and organize their results and generate reports from those results.
22
+
23
+ The name comes from a (fairly unique to FMF/TMT ecosystem) approach that
24
+ allows provisioning a pool of systems and scheduling tests on them as one would
25
+ on an ad-hoc pool of thread/process workers - once a worker becomes free,
26
+ it receives a test to run.
27
+ This is in contrast to splitting a large list of N tests onto M workers
28
+ like N/M, which yields significant time penalties due to tests having
29
+ very varies runtimes.
30
+
31
+ Above all, this project is meant to be a toolbox, not a silver-plate solution.
32
+ Use its Python APIs to build a CLI tool for your specific use case.
33
+ The CLI tool provided here is just for demonstration / testing, not for serious
34
+ use - we want to avoid huge modular CLIs for Every Possible Scenario. That's
35
+ the job of the Python API. Any CLI should be simple by nature.
36
+
37
+ ---
38
+
39
+ THIS PROJECT IS HEAVILY WIP, THINGS WILL MOVE AROUND, CHANGE AND OTHERWISE
40
+ BREAK. DO NOT USE IT (for now).
41
+
42
+ ---
43
+
44
+ ## License
45
+
46
+ Unless specified otherwise, any content within this repository is distributed
47
+ under the GNU GPLv3 license, see the [COPYING.txt](COPYING.txt) file for more.
48
+
49
+ ## Testing this project
50
+
51
+ There are some limited sanity tests provided via `pytest`, although:
52
+
53
+ * Some require additional variables (ie. Testing Farm) and will ERROR
54
+ without them.
55
+ * Some take a long time (ie. Testing Farm) due to system provisioning
56
+ taking a long time, so install `pytest-xdist` and run with a large `-n`.
57
+
58
+ Currently, the recommended approach is to split the execution:
59
+
60
+ ```
61
+ # synchronously, because podman CLI has concurrency issues
62
+ pytest tests/provision/test_podman.py
63
+
64
+ # in parallel, because provisioning takes a long time
65
+ export TESTING_FARM_API_TOKEN=...
66
+ export TESTING_FARM_COMPOSE=...
67
+ pytest -n 20 tests/provision/test_podman.py
68
+
69
+ # fast enough for synchronous execution
70
+ pytest tests/fmf
71
+ ```
72
+
73
+ ## Parallelism and cleanup
74
+
75
+ There are effectively 3 methods of running things in parallel in Python:
76
+
77
+ - `threading.Thread` (and related `concurrent.futures` classes)
78
+ - `multiprocessing.Process` (and related `concurrent.futures` classes)
79
+ - `asyncio`
80
+
81
+ and there is no clear winner (in terms of cleanup on `SIGTERM` or Ctrl-C):
82
+
83
+ - `Thread` has signal handlers only in the main thread and is unable to
84
+ interrupt any running threads without super ugly workarounds like `sleep(1)`
85
+ in every thread, checking some "pls exit" variable
86
+ - `Process` is too heavyweight and makes sharing native Python objects hard,
87
+ but it does handle signals in each process individually
88
+ - `asyncio` handles interrupting perfectly (every `try`/`except`/`finally`
89
+ completes just fine, `KeyboardInterrupt` is raised in every async context),
90
+ but async python is still (3.14) too weird and unsupported
91
+ - `asyncio` effectively re-implements `subprocess` with a slightly different
92
+ API, same with `asyncio.Transport` and derivatives reimplementing `socket`
93
+ - 3rd party libraries like `requests` or `urllib3` don't support it, one needs
94
+ to resort to spawning these in separate threads anyway
95
+ - same with `os.*` functions and syscalls
96
+ - every thing exposed via API needs to have 2 copies - async and non-async,
97
+ making it unbearable
98
+ - other stdlib bugs, ie. "large" reads returning BlockingIOError sometimes
99
+
100
+ The approach chosen by this project was to use `threading.Thread`, and
101
+ implement thread safety for classes and their functions that need it.
102
+ For example:
103
+
104
+ ```python
105
+ class MachineReserver:
106
+ def __init__(self):
107
+ self.lock = threading.RLock()
108
+ self.job = None
109
+ self.proc = None
110
+
111
+ def reserve(self, ...):
112
+ try:
113
+ ...
114
+ job = schedule_new_job_on_external_service()
115
+ with self.lock:
116
+ self.job = job
117
+ ...
118
+ while not reserved(self.job):
119
+ time.sleep(60)
120
+ ...
121
+ with self.lock:
122
+ self.proc = subprocess.Popen(["ssh", f"{user}@{host}", ...)
123
+ ...
124
+ return machine
125
+ except Exception:
126
+ self.abort()
127
+ raise
128
+
129
+ def abort(self):
130
+ with self.lock:
131
+ if self.job:
132
+ cancel_external_service(self.job)
133
+ self.job = None
134
+ if self.proc:
135
+ self.proc.kill()
136
+ self.proc = None
137
+ ```
138
+
139
+ Here, it is expected for `.reserve()` to be called in a long-running thread that
140
+ provisions a new machine on some external service, waits for it to be installed
141
+ and reserved, connects an ssh session to it and returns it back.
142
+
143
+ But equally, `.abort()` can be called from an external thread and clean up any
144
+ non-pythonic resources (external jobs, processes, temporary files, etc.) at
145
+ which point **we don't care what happens to .reserve()**, it will probably fail
146
+ with some exception, but doesn't do any harm.
147
+
148
+ Here is where `daemon=True` threads come in handy - we can simply call `.abort()`
149
+ from a `KeyboardInterrupt` (or `SIGTERM`) handle in the main thread, and just
150
+ exit, automatically killing any leftover threads that are uselessly sleeping.
151
+ (Realistically, we might want to spawn new threads to run many `.abort()`s in
152
+ parallel, but the main thread can wait for those just fine.)
153
+
154
+ It is not perfect, but it's probably the best Python can do.
155
+
156
+ Note that races can still occur between a resource being reserved and written
157
+ to `self.*` for `.abort()` to free, so resource de-allocation is not 100%
158
+ guaranteed, but single-threaded interrupting has the same issue.
159
+ Do have fallbacks (ie. max reserve times on the external service).
160
+
161
+ Also note that `.reserve()` and `.abort()` could be also called by a context
162
+ manager as `__enter__` and `__exit__`, ie. by a non-threaded caller (running
163
+ everything in the main thread).
164
+
165
+
166
+ ## Unsorted notes
167
+
168
+ TODO: codestyle from contest
169
+
170
+ ```
171
+ - this is not tmt, the goal is to make a python toolbox *for* making runcontest
172
+ style tools easily, not to replace those tools with tmt-style CLI syntax
173
+
174
+ - the whole point is to make usecase-targeted easy-to-use tools that don't
175
+ intimidate users with 1 KB long command line, and runcontest is a nice example
176
+
177
+ - TL;DR - use a modular pythonic approach, not a gluetool-style long CLI
178
+ ```
atex-0.9/README.md ADDED
@@ -0,0 +1,162 @@
1
+ # ATEX = Ad-hoc Test EXecutor
2
+
3
+ A collections of Python APIs to provision operating systems, collect
4
+ and execute [FMF](https://github.com/teemtee/fmf/)-style tests, gather
5
+ and organize their results and generate reports from those results.
6
+
7
+ The name comes from a (fairly unique to FMF/TMT ecosystem) approach that
8
+ allows provisioning a pool of systems and scheduling tests on them as one would
9
+ on an ad-hoc pool of thread/process workers - once a worker becomes free,
10
+ it receives a test to run.
11
+ This is in contrast to splitting a large list of N tests onto M workers
12
+ like N/M, which yields significant time penalties due to tests having
13
+ very varies runtimes.
14
+
15
+ Above all, this project is meant to be a toolbox, not a silver-plate solution.
16
+ Use its Python APIs to build a CLI tool for your specific use case.
17
+ The CLI tool provided here is just for demonstration / testing, not for serious
18
+ use - we want to avoid huge modular CLIs for Every Possible Scenario. That's
19
+ the job of the Python API. Any CLI should be simple by nature.
20
+
21
+ ---
22
+
23
+ THIS PROJECT IS HEAVILY WIP, THINGS WILL MOVE AROUND, CHANGE AND OTHERWISE
24
+ BREAK. DO NOT USE IT (for now).
25
+
26
+ ---
27
+
28
+ ## License
29
+
30
+ Unless specified otherwise, any content within this repository is distributed
31
+ under the GNU GPLv3 license, see the [COPYING.txt](COPYING.txt) file for more.
32
+
33
+ ## Testing this project
34
+
35
+ There are some limited sanity tests provided via `pytest`, although:
36
+
37
+ * Some require additional variables (ie. Testing Farm) and will ERROR
38
+ without them.
39
+ * Some take a long time (ie. Testing Farm) due to system provisioning
40
+ taking a long time, so install `pytest-xdist` and run with a large `-n`.
41
+
42
+ Currently, the recommended approach is to split the execution:
43
+
44
+ ```
45
+ # synchronously, because podman CLI has concurrency issues
46
+ pytest tests/provision/test_podman.py
47
+
48
+ # in parallel, because provisioning takes a long time
49
+ export TESTING_FARM_API_TOKEN=...
50
+ export TESTING_FARM_COMPOSE=...
51
+ pytest -n 20 tests/provision/test_podman.py
52
+
53
+ # fast enough for synchronous execution
54
+ pytest tests/fmf
55
+ ```
56
+
57
+ ## Parallelism and cleanup
58
+
59
+ There are effectively 3 methods of running things in parallel in Python:
60
+
61
+ - `threading.Thread` (and related `concurrent.futures` classes)
62
+ - `multiprocessing.Process` (and related `concurrent.futures` classes)
63
+ - `asyncio`
64
+
65
+ and there is no clear winner (in terms of cleanup on `SIGTERM` or Ctrl-C):
66
+
67
+ - `Thread` has signal handlers only in the main thread and is unable to
68
+ interrupt any running threads without super ugly workarounds like `sleep(1)`
69
+ in every thread, checking some "pls exit" variable
70
+ - `Process` is too heavyweight and makes sharing native Python objects hard,
71
+ but it does handle signals in each process individually
72
+ - `asyncio` handles interrupting perfectly (every `try`/`except`/`finally`
73
+ completes just fine, `KeyboardInterrupt` is raised in every async context),
74
+ but async python is still (3.14) too weird and unsupported
75
+ - `asyncio` effectively re-implements `subprocess` with a slightly different
76
+ API, same with `asyncio.Transport` and derivatives reimplementing `socket`
77
+ - 3rd party libraries like `requests` or `urllib3` don't support it, one needs
78
+ to resort to spawning these in separate threads anyway
79
+ - same with `os.*` functions and syscalls
80
+ - every thing exposed via API needs to have 2 copies - async and non-async,
81
+ making it unbearable
82
+ - other stdlib bugs, ie. "large" reads returning BlockingIOError sometimes
83
+
84
+ The approach chosen by this project was to use `threading.Thread`, and
85
+ implement thread safety for classes and their functions that need it.
86
+ For example:
87
+
88
+ ```python
89
+ class MachineReserver:
90
+ def __init__(self):
91
+ self.lock = threading.RLock()
92
+ self.job = None
93
+ self.proc = None
94
+
95
+ def reserve(self, ...):
96
+ try:
97
+ ...
98
+ job = schedule_new_job_on_external_service()
99
+ with self.lock:
100
+ self.job = job
101
+ ...
102
+ while not reserved(self.job):
103
+ time.sleep(60)
104
+ ...
105
+ with self.lock:
106
+ self.proc = subprocess.Popen(["ssh", f"{user}@{host}", ...)
107
+ ...
108
+ return machine
109
+ except Exception:
110
+ self.abort()
111
+ raise
112
+
113
+ def abort(self):
114
+ with self.lock:
115
+ if self.job:
116
+ cancel_external_service(self.job)
117
+ self.job = None
118
+ if self.proc:
119
+ self.proc.kill()
120
+ self.proc = None
121
+ ```
122
+
123
+ Here, it is expected for `.reserve()` to be called in a long-running thread that
124
+ provisions a new machine on some external service, waits for it to be installed
125
+ and reserved, connects an ssh session to it and returns it back.
126
+
127
+ But equally, `.abort()` can be called from an external thread and clean up any
128
+ non-pythonic resources (external jobs, processes, temporary files, etc.) at
129
+ which point **we don't care what happens to .reserve()**, it will probably fail
130
+ with some exception, but doesn't do any harm.
131
+
132
+ Here is where `daemon=True` threads come in handy - we can simply call `.abort()`
133
+ from a `KeyboardInterrupt` (or `SIGTERM`) handle in the main thread, and just
134
+ exit, automatically killing any leftover threads that are uselessly sleeping.
135
+ (Realistically, we might want to spawn new threads to run many `.abort()`s in
136
+ parallel, but the main thread can wait for those just fine.)
137
+
138
+ It is not perfect, but it's probably the best Python can do.
139
+
140
+ Note that races can still occur between a resource being reserved and written
141
+ to `self.*` for `.abort()` to free, so resource de-allocation is not 100%
142
+ guaranteed, but single-threaded interrupting has the same issue.
143
+ Do have fallbacks (ie. max reserve times on the external service).
144
+
145
+ Also note that `.reserve()` and `.abort()` could be also called by a context
146
+ manager as `__enter__` and `__exit__`, ie. by a non-threaded caller (running
147
+ everything in the main thread).
148
+
149
+
150
+ ## Unsorted notes
151
+
152
+ TODO: codestyle from contest
153
+
154
+ ```
155
+ - this is not tmt, the goal is to make a python toolbox *for* making runcontest
156
+ style tools easily, not to replace those tools with tmt-style CLI syntax
157
+
158
+ - the whole point is to make usecase-targeted easy-to-use tools that don't
159
+ intimidate users with 1 KB long command line, and runcontest is a nice example
160
+
161
+ - TL;DR - use a modular pythonic approach, not a gluetool-style long CLI
162
+ ```
atex-0.9/TODO ADDED
@@ -0,0 +1,302 @@
1
+
2
+ - proper cleaning of tmpdirs on the host system ?
3
+
4
+ - delete tmt_tests (it's now in its own branch)
5
+
6
+ - get rid of testout_fobj, replace it iwth testout_fd
7
+ - and os.open() in .start() / os.close() in .stop()
8
+ - have some reporter function to close it manually, close_testout()
9
+ - and call it from executor after doing Popen, to avoid opened fd hanging around
10
+ in the main python process when we don't need it
11
+ - make sure to open with O_WRONLY | O_CREAT | O_APPEND, so reconnects don't override the log
12
+ - verify by 'ls -l /proc/$pyproc/fd' to ensure there are no .../testout.temp fds open
13
+
14
+ - test special cases; ie.
15
+
16
+ - Executor running test and the remote loses connection
17
+ (ie. iptables -I INPUT 1 -j DROP)
18
+
19
+ - ssh after reboot doesn't actually work (ssh.ConnectError)
20
+ - might need generalization of all Connection exceptions
21
+ - does the test result get saved anywhere? .. as 'infra' ?
22
+
23
+ - are non-0 exit codes and exceptions raised by orchestrator, like
24
+ atex: unexpected exception happened while running ...
25
+ logged anywhere aside from ./contest.py stderr?
26
+
27
+ - raise non-0 exit code and unexpected exceptions to util.warning
28
+
29
+ - orchestrator is still calling Remote.release() directly, which may block
30
+ for a long time; have some worker/queue that does it in the background
31
+ - probably as some new BackgroundRemoteReleaser class
32
+ - would be .start()ed from orchestrator start
33
+ - orchestrator would end it from .stop()
34
+ - the class would have a worker function running in the thread,
35
+ reading from a SimpleQueue and calling .release()
36
+ - if it reads None, it ends
37
+ - the class would have some .terminate(), which would push None
38
+ to the queue and wait for .join()
39
+ - orchestrator could return that waiting-for-join function
40
+ as a callable in stop_defer()
41
+
42
+ --> actually, do it differently:
43
+ - make existing ThreadQueue more similar to ThreadPoolExecutor
44
+ by having a configurable 'max_workers' argument, default = infinity
45
+ (and thus start_thread() gets renamed to submit())
46
+
47
+ - then make a simplified version of it that doesn't need to
48
+ return anything, just runs functions pushed to queue
49
+ - and then use it for .release() with max_workers=2 or so
50
+
51
+ - (make sure to self.lock the ThreadPoolExecutor for actions that need it)
52
+ - and have a semaphore for tracking how many threads are active,
53
+ giving .submit() a clue whether to spawn a new one
54
+ - have threads self-shutdown themselves by .get(block=False) checking
55
+ whether the queue is empty - if it is, shut down the worker
56
+
57
+ - contest bug?, reporting log with full path
58
+ - :238: PASS / [report.html, scan-arf.xml.gz, /var/lib/libvirt/images/contest-osbuild.txt]
59
+ - does it upload correctly with name: contest-osbuild.txt ?
60
+
61
+ - priority system for contest
62
+ - 'extra-priority' or something like that
63
+ - run problematic tests (ie. image-builder) first, so they can
64
+ rerun while others run
65
+
66
+ - per-test rerun counts ('extra-reruns')
67
+ - root-level main.fmf could set it to 1, except RHEL-10 (because OpenGPG dnf bug) to 3
68
+ - image-builder test could set it to 5
69
+
70
+ - make testing farm point to an ATEX repo tag when downloading the
71
+ reserve test, to freeze given ATEX versions in time (so they're not
72
+ broken by future git commits)
73
+
74
+ - also parametrize reserve test via module-level constants or TestingFarmProvisioner args
75
+
76
+ - make it python3.12 (RHEL-10) compatible, ideally 3.11 (RHEL-9)
77
+
78
+ - in the CLI tool (for contest), block further SIGINT/SIGTERM
79
+ while already running cleanup (regular one, not induced by ctrl-c)
80
+
81
+ - got hit by this in podman provision; 'podman container rm -t 0 -f'
82
+ was already removing a container (waiting for kill) when the user
83
+ issued SIGINT and it killed the 'rm', leaving container behind
84
+
85
+ - notable TODOs
86
+ - testingfarm and automatically limiting 'class Request' refreshes
87
+ - testingfarm provisioner scaling up/down remotes, to avoid 40 "empty"
88
+ remotes being used up while waiting for the last 1-3 tests to finish
89
+ - atex/util/threads.py documentation
90
+ - generic ConnectError-ish for all Connections
91
+
92
+ - interactive mode for Executor (without redirecting stderr to file,
93
+ and with stdin routed through)
94
+
95
+ - enable gpgchck=1 on TestingFarm RHEL-based systems
96
+ - TODO: check RHEL-8 and 10 too, are they the same?
97
+ - /etc/yum.repos.d/rhel.repo
98
+ - search for "^name=rhel-BaseOS$" to check the file is not OS default
99
+ - replace all "^gpgcheck=0$' with 1
100
+
101
+ - appending to a previous results.json.gz + files_dir
102
+ - gzip should be able to append a new gz header, and we can reuse a files_dir easily
103
+ - maybe append=False param for Orchestrator/Aggregator that would
104
+ - return error if False and one of the two exists
105
+ - append to them if True
106
+ - add test for it
107
+
108
+ - testingfarm failure backoff cooldown
109
+ - if provisioning fails, retry getting new machines in increasing intervals;
110
+ ie. wait 1min, 2min, 4min, 8min, 16min, etc.
111
+ - maybe ditch the concept of an "infra retry" for provisioning, and just always
112
+ expect infinite retries based on ^^^, or use a high number like 8 (backoffs)
113
+ or an absolute giveup time
114
+
115
+ - different approach: add .provision(count=1) to the Provisioner API
116
+ - allows the user to signal to the Provisioner how many Remotes to provision
117
+ and (eventually) return via .get_remote()
118
+ - maybe raise some unique Exception if .get_remote() is called and there is no
119
+ provisioning in progress (it will never return a remote, user has to call
120
+ .provision() first)
121
+ - gets rid of static max_remotes=20 and lets the user request remotes on-the-fly
122
+ - there would probably be absolute_max_remotes=100 or a similar safety fallback
123
+ - have it as class attribute (constant), not __init__ argument
124
+ - Orchestrator could .provision(20) when first starting up, and call .provision()
125
+ to replace a destroyed Remote, BUT CRITICALLY, it could simply not call it
126
+ when to_run is empty and there's >= 2 Remotes already ready in setup queue
127
+ (for use by reruns)
128
+ - this effectively "shuts down" the reservations as tests wind down
129
+ - .provision() should probably internally clamp count= to absolute max, not raise
130
+ an Exception, to allow the user to say "as many as possible" by 'math.inf'
131
+
132
+ - centralized TF API querying for one Provisioner and its remotes
133
+ - possibly do that in one class TestingFarmAPI instance - throttle API queries
134
+ globally to ie. 1/sec, maybe via some queue
135
+ - or a derived RateLimitedTestingFarmAPI instance ?
136
+
137
+ - some interface on SIGUSR1 (?) on the state of the orchestration
138
+ - what tests are running / how many setups running / how many remotes / etc.
139
+ - how long have the running tests been running?
140
+ - what tests are still in to_run
141
+
142
+ - more tests
143
+ - testcontrol (incl. reporting many results)
144
+ - incl. reporting after reboot
145
+ - testingfarm API directly
146
+ - API functions
147
+ - class Request
148
+ - class Reserve + manual use by ssh(1), see tf.py
149
+ - ssh connection tests (both Standalone and Managed) using
150
+ systemd-based podman container (PodmanProvisioner with extra run opts)
151
+ - reporter (part of executor)
152
+ - executor
153
+ - incl. corner cases (see above)
154
+ - shared_dir across multiple parallel Executor instances
155
+ - reboot
156
+ - partial results preserved across reboots
157
+ - disconnect without requested reconnect --> error
158
+ - etc.
159
+ - aggregators
160
+ - orchestrator
161
+ - incl. corner cases like setup failing and being retried
162
+ - provisioners (see TODO at the end of shared.py)
163
+ - start() and stop()
164
+ - stop_defer()
165
+ - testingfarm simple sanity reserve for centos stream 8 9 10
166
+ - to catch distro-specific bugs
167
+
168
+ - clarify, in docstrings for API definitions, what functions may block
169
+ and which ones may be run in different threads
170
+
171
+ - demo examples
172
+ - parallel Executors on one system with multiple Connections
173
+ - Aggregator in append=True mode, re-running tests that never finished
174
+ (building up excludes=[] from finished results.json.gz)
175
+ - image mode, implemented as additional run_setup() Orchestrator hook,
176
+ that collects required packages from all to_run tests, installs them,
177
+ does bootc switch, and that's it
178
+ - additional waiving logic on aggregated JSON results, before rendering
179
+ HTML
180
+
181
+ - formalize coding style in some Markdown doc
182
+ - mention that class member variables and functions that are for internal
183
+ use only (private) start with _ (underscore), everything else is considered
184
+ public, incl. attributes / variables
185
+ - TODO: actually revise the codebase, applying this
186
+
187
+ - make rsync install in podman provisioner optional
188
+ - maybe check with 'rpm -q' and install only if missing?
189
+ - maybe install_rsync=True or install_deps=True __init__ arg?
190
+
191
+ - detailed logging.DEBUG log of everything, for debugging
192
+ - to be redirected to a file, with INFO on the console
193
+ - maybe without pipeline output, that seems useless for debugging
194
+ - but with every ssh attempt, every reconnection, every provisioning query, etc.
195
+
196
+ - properly document which parts of various APIs need to be thread safe,
197
+ which functions are safe, which are not
198
+
199
+ - mention in Orchestrator documentation that it is explicitly about running
200
+ tests on **ONE** platform (string)
201
+
202
+ - reevaluate which func helpers (ie. _private_helper()) make sense on a module
203
+ level vs inside a class
204
+
205
+ - document that Connection MUST use some subprocess-style commands
206
+ for cmd and rsync, and support func=
207
+ - because Executor uses func=Popen to run a test in the background
208
+
209
+ - probably rename ManagedSSHConn -> ManagedSSHConnection
210
+ - for all Conn classes
211
+
212
+ - get rid of skip_frames=
213
+ - this would make Connection methods using func= fully subprocess-compatible,
214
+ ie. with func=subprocess.Popen instead of having to use util.*
215
+ - instead, have a list of names to skip on top of util/log.py,
216
+ UNWIND = [
217
+ "util.subprocess_run",
218
+ ...
219
+ ]
220
+ - mention the list is iterated multiple times until nothing matches, so that
221
+ two levels of wrappers can be unwound just be including both wrappers in the list
222
+
223
+ - add an 'EXTRADEBUG = 5' to util/extra_debug.py and use it for
224
+ - pipeline.log
225
+ - printing domain XMLs
226
+ - etc.
227
+
228
+ - atex tf sr
229
+ - make --state optional, have it be action='append'
230
+ - have the default --state be all non-end states and print '(running)' or '(queued)'
231
+ etc. on each line
232
+
233
+ - rename atex/provision --> atex/provisioner
234
+
235
+ - move aggregator out of orchestrator
236
+
237
+ - remove Provisioner.stop_defer(), have just a blocking .stop() that may internally
238
+ paralelize resource releasing if necessary
239
+ - stop_defer() made sense for TF and other state-less releases, but doesn't for
240
+ libvirt, which needs to release all first, and *then* close the connection
241
+ - give .stop() a block=True/False similar to Remote.release(block=True/False)
242
+ - "just stop/release it in the background, I don't care about return"
243
+ - useful when calling .stop() on many Provisioners from the main program
244
+ - these should be daemon=False threads; to make sure they finish before exit
245
+
246
+ --------------------------
247
+
248
+ atex: unexpected exception happened while running '/hardening/host-os/oscap/stig'
249
+
250
+ 2025-06-17 07:53:03 atex: unexpected exception happened while running '/per-rule/12/oscap' on TestingFarmRemote(RHEL-9.7.0-Nightly @ x86_64, 0x7f0e5cf46df0):
251
+ atex.executor.testcontrol.BadReportJSONError: file 'out.txt' already exists
252
+
253
+ 2025-06-17 08:23:13 atex: unexpected exception happened while running '/hardening/host-os/oscap/stig' on TestingFarmRemote(RHEL-9.7.0-Nightly @ x86_64, 0x7f0e5cef3610):
254
+ atex.connection.ssh.ConnectError: SSH ControlMaster failed to start on /tmp/atex-ssh-de1w7ayl with 255:
255
+ b'kex_exchange_identification: read: Connection reset by peer\r\n'
256
+
257
+
258
+
259
+ 2025-07-23 02:15:47 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' threw NotConnectedError during test runtime, reruns exceeded, giving up:
260
+ atex.connection.ssh.NotConnectedError: SSH ControlMaster is not running
261
+
262
+
263
+ ---> IDENTIFIED: the problem is that orchestrator releases the remote on ANY non-0 exit code,
264
+
265
+ elif finfo.exit_code != 0:
266
+ msg = f"{remote_with_test} exited with non-zero: {finfo.exit_code}"
267
+ finfo.remote.release()
268
+
269
+ but then REUSES it if the test was not destructive - by default, any non-0
270
+ is destructive, but custom ContestOrchestrator allows reuse on 'exit 1'
271
+ as it doesn't consider regular test fail destructive
272
+ ---> but the remote is already dead, connection disconnected
273
+
274
+
275
+ - TODO: maybe have some destructive sanity check between tests?
276
+
277
+ - TODO: maybe check if there *was* a SSH ControlMaster running, but it exited with some error?
278
+
279
+ - it's /static-checks/html-links somehow destroying the system by successfully exiting with FAIL:
280
+
281
+ 2025-07-23 02:15:46 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' exited with non-zero: 2, re-running (1 reruns left)
282
+ 2025-07-23 02:15:46 atex: starting '/static-checks/html-links' on TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93)
283
+ 2025-07-23 02:15:47 atex: TestingFarmRemote(root@10.0.178.211:22@/tmp/tmp_86glufe/key_rsa, dd48242e-3956-4ca3-bc59-a00b1b6a1a93): '/static-checks/html-links' threw NotConnectedError during test runtime, reruns exceeded, giving up:
284
+ Traceback (most recent call last):
285
+ File "/root/atex/atex/util/threads.py", line 26, in _wrapper
286
+ ret = func(*func_args, **func_kwargs)
287
+ File "/root/atex/atex/executor/executor.py", line 233, in run_test
288
+ self.conn.cmd(("bash",), input=setup_script, text=True, check=True)
289
+ ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
290
+ File "/root/atex/atex/connection/ssh.py", line 366, in cmd
291
+ self.assert_master()
292
+ ~~~~~~~~~~~~~~~~~~^^
293
+ File "/root/atex/atex/connection/ssh.py", line 254, in assert_master
294
+ raise NotConnectedError("SSH ControlMaster is not running")
295
+ atex.connection.ssh.NotConnectedError: SSH ControlMaster is not running
296
+
297
+ ...
298
+ 2025-07-23 00:15:30 test.py:23: lib.results.report_plain:238: PASS http://www.avahi.org
299
+ 2025-07-23 00:15:30 test.py:23: lib.results.report_plain:238: PASS https://chrony-project.org/
300
+ 2025-07-23 00:15:41 test.py:21: lib.results.report_plain:238: FAIL https://www.iso.org/contents/data/standard/05/45/54534.html (HTTPSConnectionPool(host='www.iso.org', port=443): Read timed out. (read timeout=10))
301
+ 2025-07-23 00:15:44 test.py:23: lib.results.report_plain:238: PASS https://public.cyber.mil/stigs/downloads/?_dl_facet_stigs=container-platform
302
+ 2025-07-23 00:15:44 test.py:25: lib.results.report_plain:238: FAIL /