atex 0.7__py3-none-any.whl → 0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atex/cli/fmf.py +93 -0
- atex/cli/testingfarm.py +23 -13
- atex/connection/__init__.py +0 -8
- atex/connection/ssh.py +3 -19
- atex/executor/__init__.py +2 -0
- atex/executor/duration.py +60 -0
- atex/executor/executor.py +378 -0
- atex/executor/reporter.py +106 -0
- atex/{minitmt → executor}/scripts.py +30 -24
- atex/{minitmt → executor}/testcontrol.py +16 -17
- atex/{minitmt/fmf.py → fmf.py} +49 -34
- atex/orchestrator/__init__.py +2 -59
- atex/orchestrator/aggregator.py +66 -123
- atex/orchestrator/orchestrator.py +324 -0
- atex/provision/__init__.py +68 -99
- atex/provision/testingfarm/__init__.py +2 -29
- atex/provision/testingfarm/api.py +55 -40
- atex/provision/testingfarm/testingfarm.py +236 -0
- atex/util/__init__.py +1 -6
- atex/util/log.py +8 -0
- atex/util/path.py +16 -0
- atex/util/ssh_keygen.py +14 -0
- atex/util/threads.py +55 -0
- {atex-0.7.dist-info → atex-0.8.dist-info}/METADATA +97 -2
- atex-0.8.dist-info/RECORD +37 -0
- atex/cli/minitmt.py +0 -175
- atex/minitmt/__init__.py +0 -23
- atex/minitmt/executor.py +0 -348
- atex/provision/nspawn/README +0 -74
- atex/provision/testingfarm/foo.py +0 -1
- atex-0.7.dist-info/RECORD +0 -32
- {atex-0.7.dist-info → atex-0.8.dist-info}/WHEEL +0 -0
- {atex-0.7.dist-info → atex-0.8.dist-info}/entry_points.txt +0 -0
- {atex-0.7.dist-info → atex-0.8.dist-info}/licenses/COPYING.txt +0 -0
atex/provision/__init__.py
CHANGED
|
@@ -7,60 +7,27 @@ from .. import connection as _connection
|
|
|
7
7
|
|
|
8
8
|
class Provisioner:
|
|
9
9
|
"""
|
|
10
|
-
A resource (machine/system) provider.
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
any time and need to handle it safely.
|
|
32
|
-
Ie. once released(), an instance must never return alive() == True.
|
|
33
|
-
|
|
34
|
-
# explicit method calls
|
|
35
|
-
res = Provisioner(...)
|
|
36
|
-
res.reserve()
|
|
37
|
-
conn = res.connection()
|
|
38
|
-
conn.connect()
|
|
39
|
-
conn.ssh('ls /')
|
|
40
|
-
conn.disconnect()
|
|
41
|
-
res.release()
|
|
42
|
-
|
|
43
|
-
# via a context manager
|
|
44
|
-
with Provisioner(...) as res:
|
|
45
|
-
with res.connection() as conn:
|
|
46
|
-
conn.ssh('ls /')
|
|
47
|
-
|
|
48
|
-
If a Provisioner class needs additional configuration, it should do so via
|
|
49
|
-
class (not instance) attributes, allowing it to be instantiated many times.
|
|
50
|
-
|
|
51
|
-
class ConfiguredProvisioner(Provisioner):
|
|
52
|
-
resource_hub = 'https://...'
|
|
53
|
-
login = 'joe'
|
|
54
|
-
|
|
55
|
-
# or dynamically
|
|
56
|
-
name = 'joe'
|
|
57
|
-
cls = type(
|
|
58
|
-
f'Provisioner_for_{name}',
|
|
59
|
-
(Provisioner,),
|
|
60
|
-
{'resource_hub': 'https://...', 'login': name},
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
These attributes can then be accessed from __init__ or any other function.
|
|
10
|
+
A remote resource (machine/system) provider.
|
|
11
|
+
|
|
12
|
+
The main interface is .get_remote() that returns a connected class Remote
|
|
13
|
+
instance for use by the user, to be .release()d when not needed anymore,
|
|
14
|
+
with Provisioner automatically getting a replacement for it, to be returned
|
|
15
|
+
via .get_remote() later.
|
|
16
|
+
|
|
17
|
+
p = Provisioner()
|
|
18
|
+
p.start()
|
|
19
|
+
remote = p.get_remote()
|
|
20
|
+
remote.cmd(["ls", "/"])
|
|
21
|
+
remote.release()
|
|
22
|
+
p.stop()
|
|
23
|
+
|
|
24
|
+
with Provisioner() as p:
|
|
25
|
+
remote = p.get_remote()
|
|
26
|
+
...
|
|
27
|
+
remote.release()
|
|
28
|
+
|
|
29
|
+
Note that .stop() or .defer_stop() may be called from a different
|
|
30
|
+
thread, asynchronously to any other functions.
|
|
64
31
|
"""
|
|
65
32
|
|
|
66
33
|
def __init__(self):
|
|
@@ -70,41 +37,58 @@ class Provisioner:
|
|
|
70
37
|
"""
|
|
71
38
|
self.lock = _threading.RLock()
|
|
72
39
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
40
|
+
def get_remote(self, block=True):
|
|
41
|
+
"""
|
|
42
|
+
Get a connected class Remote instance.
|
|
43
|
+
|
|
44
|
+
If 'block' is True, wait for the remote to be available and connected,
|
|
45
|
+
otherwise return None if there is no Remote available yet.
|
|
46
|
+
"""
|
|
47
|
+
raise NotImplementedError(f"'get_remote' not implemented for {self.__class__.__name__}")
|
|
48
|
+
|
|
49
|
+
def start(self):
|
|
50
|
+
"""
|
|
51
|
+
Start the Provisioner instance, start any provisioning-related
|
|
52
|
+
processes that lead to systems being reserved.
|
|
53
|
+
"""
|
|
54
|
+
raise NotImplementedError(f"'start' not implemented for {self.__class__.__name__}")
|
|
55
|
+
|
|
56
|
+
def stop(self):
|
|
57
|
+
"""
|
|
58
|
+
Stop the Provisioner instance, freeing all reserved resources,
|
|
59
|
+
calling .release() on all Remote instances that were created.
|
|
60
|
+
"""
|
|
61
|
+
raise NotImplementedError(f"'stop' not implemented for {self.__class__.__name__}")
|
|
62
|
+
|
|
63
|
+
def stop_defer(self):
|
|
64
|
+
"""
|
|
65
|
+
Enable an external caller to stop the Provisioner instance,
|
|
66
|
+
deferring resource deallocation to the caller.
|
|
67
|
+
|
|
68
|
+
Return an iterable of argument-free thread-safe callables that can be
|
|
69
|
+
called, possibly in parallel, to free up resources.
|
|
70
|
+
Ie. a list of 200 .release() functions, to be called in a thread pool
|
|
71
|
+
by the user, speeding up cleanup.
|
|
72
|
+
"""
|
|
73
|
+
return self.stop
|
|
74
|
+
|
|
75
|
+
def __enter__(self):
|
|
76
|
+
self.start()
|
|
77
|
+
return self
|
|
78
|
+
|
|
79
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
80
|
+
self.stop()
|
|
98
81
|
|
|
99
82
|
|
|
100
83
|
class Remote(_connection.Connection):
|
|
101
84
|
"""
|
|
102
85
|
Representation of a provisioned (reserved) remote system, providing
|
|
103
|
-
a Connection-like API in addition system management helpers.
|
|
86
|
+
a Connection-like API in addition to system management helpers.
|
|
104
87
|
|
|
105
|
-
An instance of Remote is typically prepared by a Provisioner and
|
|
106
|
-
|
|
107
|
-
for repeated reserve/release cycles, hence the lack
|
|
88
|
+
An instance of Remote is typically prepared by a Provisioner and lent out
|
|
89
|
+
for further use, to be .release()d by the user (if destroyed).
|
|
90
|
+
It is not meant for repeated reserve/release cycles, hence the lack
|
|
91
|
+
of .reserve().
|
|
108
92
|
|
|
109
93
|
Also note that Remote can be used via Context Manager, but does not
|
|
110
94
|
do automatic .release(), the manager only handles the built-in Connection.
|
|
@@ -114,27 +98,12 @@ class Remote(_connection.Connection):
|
|
|
114
98
|
with a callback, or a try/finally block.
|
|
115
99
|
"""
|
|
116
100
|
|
|
117
|
-
# TODO: pass platform as arg ?
|
|
118
|
-
#def __init__(self, platform, *args, **kwargs):
|
|
119
|
-
# """
|
|
120
|
-
# Initialize a new Remote instance based on a Connection instance.
|
|
121
|
-
# If extending __init__, always call 'super().__init__(conn)' at the top.
|
|
122
|
-
# """
|
|
123
|
-
# self.lock = _threading.RLock()
|
|
124
|
-
# self.platform = platform
|
|
125
|
-
|
|
126
101
|
def release(self):
|
|
127
102
|
"""
|
|
128
|
-
Release (de-provision) the remote resource
|
|
103
|
+
Release (de-provision) the remote resource.
|
|
129
104
|
"""
|
|
130
105
|
raise NotImplementedError(f"'release' not implemented for {self.__class__.__name__}")
|
|
131
106
|
|
|
132
|
-
def alive(self):
|
|
133
|
-
"""
|
|
134
|
-
Return True if the remote resource is still valid and reserved.
|
|
135
|
-
"""
|
|
136
|
-
raise NotImplementedError(f"'alive' not implemented for {self.__class__.__name__}")
|
|
137
|
-
|
|
138
107
|
|
|
139
108
|
_submodules = [
|
|
140
109
|
info.name for info in _pkgutil.iter_modules(__spec__.submodule_search_locations)
|
|
@@ -1,29 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from .. import Provisioner, Remote
|
|
4
|
-
|
|
5
|
-
#from . import api
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TestingFarmRemote(Remote):
|
|
9
|
-
def __init__(self, connection, request):
|
|
10
|
-
"""
|
|
11
|
-
'connection' is a class Connection instance.
|
|
12
|
-
|
|
13
|
-
'request' is a testing farm Request class instance.
|
|
14
|
-
"""
|
|
15
|
-
super().__init__(connection)
|
|
16
|
-
self.request = request
|
|
17
|
-
self.valid = True
|
|
18
|
-
|
|
19
|
-
def release(self):
|
|
20
|
-
self.disconnect()
|
|
21
|
-
self.request.cancel()
|
|
22
|
-
self.valid = False
|
|
23
|
-
|
|
24
|
-
def alive(self):
|
|
25
|
-
return self.valid
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class TestingFarmProvisioner(Provisioner):
|
|
29
|
-
pass
|
|
1
|
+
from . import api # noqa: F401
|
|
2
|
+
from .testingfarm import TestingFarmProvisioner, TestingFarmRemote # noqa: F401
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import re
|
|
4
3
|
import time
|
|
5
4
|
import tempfile
|
|
6
5
|
import textwrap
|
|
6
|
+
import threading
|
|
7
7
|
import subprocess
|
|
8
8
|
import collections
|
|
9
9
|
|
|
@@ -17,7 +17,7 @@ import urllib3
|
|
|
17
17
|
DEFAULT_API_URL = "https://api.testing-farm.io/v0.1"
|
|
18
18
|
|
|
19
19
|
# how many seconds to sleep for during API polling
|
|
20
|
-
API_QUERY_DELAY =
|
|
20
|
+
API_QUERY_DELAY = 30
|
|
21
21
|
|
|
22
22
|
RESERVE_TASK = {
|
|
23
23
|
"fmf": {
|
|
@@ -32,10 +32,10 @@ RESERVE_TASK = {
|
|
|
32
32
|
# https://gitlab.com/testing-farm/nucleus/-/blob/main/api/src/tft/nucleus/api/core/schemes/test_request.py
|
|
33
33
|
END_STATES = ("error", "complete", "canceled")
|
|
34
34
|
|
|
35
|
-
# always have at most
|
|
35
|
+
# always have at most 10 outstanding HTTP requests to every given API host,
|
|
36
36
|
# shared by all instances of all classes here, to avoid flooding the host
|
|
37
37
|
# by multi-threaded users
|
|
38
|
-
_http = urllib3.PoolManager(maxsize=
|
|
38
|
+
_http = urllib3.PoolManager(maxsize=10, block=True)
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class TestingFarmError(Exception):
|
|
@@ -132,15 +132,20 @@ class TestingFarmAPI:
|
|
|
132
132
|
return self._query("GET", f"/composes/{ranch}")
|
|
133
133
|
|
|
134
134
|
def search_requests(
|
|
135
|
-
self, state,
|
|
135
|
+
self, *, state, ranch=None,
|
|
136
|
+
mine=True, user_id=None, token_id=None,
|
|
137
|
+
created_before=None, created_after=None,
|
|
136
138
|
):
|
|
137
139
|
"""
|
|
138
140
|
'state' is one of 'running', 'queued', etc., and is required by the API.
|
|
139
141
|
|
|
142
|
+
'ranch' is 'public' or 'redhat', or (probably?) all if left empty.
|
|
143
|
+
|
|
140
144
|
If 'mine' is True and a token was given, return only requests for that
|
|
141
145
|
token (user), otherwise return *all* requests (use extra filters pls).
|
|
142
146
|
|
|
143
|
-
'
|
|
147
|
+
'user_id' and 'token_id' are search API parameters - if not given and
|
|
148
|
+
'mine' is True, these are extracted from a user-provided token.
|
|
144
149
|
|
|
145
150
|
'created_*' take ISO 8601 formatted strings, as returned by the API
|
|
146
151
|
elsewhere, ie. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS' (or with '.MS'),
|
|
@@ -154,7 +159,12 @@ class TestingFarmAPI:
|
|
|
154
159
|
if created_after:
|
|
155
160
|
fields["created_after"] = created_after
|
|
156
161
|
|
|
157
|
-
if
|
|
162
|
+
if user_id or token_id:
|
|
163
|
+
if user_id:
|
|
164
|
+
fields["user_id"] = user_id
|
|
165
|
+
if token_id:
|
|
166
|
+
fields["token_id"] = token_id
|
|
167
|
+
elif mine:
|
|
158
168
|
if not self.api_token:
|
|
159
169
|
raise ValueError("search_requests(mine=True) requires an auth token")
|
|
160
170
|
fields["token_id"] = self.whoami()["token"]["id"]
|
|
@@ -289,9 +299,12 @@ class PipelineLogStreamer:
|
|
|
289
299
|
|
|
290
300
|
log = f"{artifacts}/pipeline.log"
|
|
291
301
|
reply = _http.request("HEAD", log)
|
|
292
|
-
# TF has a race condition of adding the .log entry without
|
|
293
|
-
|
|
294
|
-
|
|
302
|
+
# 404: TF has a race condition of adding the .log entry without
|
|
303
|
+
# it being created
|
|
304
|
+
# 403: happens on internal OSCI artifacts server, probably
|
|
305
|
+
# due to similar reasons (folder exists without log)
|
|
306
|
+
if reply.status in (404,403):
|
|
307
|
+
util.debug(f"got {reply.status} for {log}, retrying")
|
|
295
308
|
continue
|
|
296
309
|
elif reply.status != 200:
|
|
297
310
|
raise APIError(f"got HTTP {reply.status} on HEAD {log}", reply)
|
|
@@ -431,6 +444,7 @@ class Reserve:
|
|
|
431
444
|
self._source_host = source_host
|
|
432
445
|
self.api = api or TestingFarmAPI()
|
|
433
446
|
|
|
447
|
+
self.lock = threading.RLock()
|
|
434
448
|
self.request = None
|
|
435
449
|
self._tmpdir = None
|
|
436
450
|
|
|
@@ -445,17 +459,11 @@ class Reserve:
|
|
|
445
459
|
r = _http.request("GET", "https://ifconfig.co", headers=curl_agent)
|
|
446
460
|
return r.data.decode().strip()
|
|
447
461
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
("ssh-keygen", "-t", "rsa", "-N", "", "-f", tmpdir / "key_rsa"),
|
|
453
|
-
stdout=subprocess.DEVNULL,
|
|
454
|
-
check=True,
|
|
455
|
-
)
|
|
456
|
-
return (tmpdir / "key_rsa", tmpdir / "key_rsa.pub")
|
|
462
|
+
def reserve(self):
|
|
463
|
+
with self.lock:
|
|
464
|
+
if self.request:
|
|
465
|
+
raise RuntimeError("reservation already in progress")
|
|
457
466
|
|
|
458
|
-
def __enter__(self):
|
|
459
467
|
spec = self._spec.copy()
|
|
460
468
|
|
|
461
469
|
try:
|
|
@@ -478,21 +486,25 @@ class Reserve:
|
|
|
478
486
|
raise FileNotFoundError(f"{ssh_key} specified, but does not exist")
|
|
479
487
|
ssh_pubkey = Path(f"{ssh_key}.pub")
|
|
480
488
|
else:
|
|
481
|
-
self.
|
|
482
|
-
|
|
489
|
+
with self.lock:
|
|
490
|
+
self._tmpdir = tempfile.TemporaryDirectory()
|
|
491
|
+
ssh_key, ssh_pubkey = util.ssh_keygen(self._tmpdir.name)
|
|
483
492
|
|
|
484
493
|
pubkey_contents = ssh_pubkey.read_text().strip()
|
|
485
494
|
secrets = spec["environments"][0]["secrets"]
|
|
486
495
|
secrets["RESERVE_SSH_PUBKEY"] = pubkey_contents
|
|
487
496
|
|
|
488
|
-
|
|
489
|
-
|
|
497
|
+
with self.lock:
|
|
498
|
+
self.request = Request(api=self.api)
|
|
499
|
+
self.request.submit(spec)
|
|
490
500
|
util.debug(f"submitted request:\n{textwrap.indent(str(self.request), ' ')}")
|
|
491
501
|
|
|
492
502
|
# wait for user/host to ssh to
|
|
493
503
|
ssh_user = ssh_host = None
|
|
494
504
|
for line in PipelineLogStreamer(self.request):
|
|
495
|
-
|
|
505
|
+
# the '\033[0m' is to reset colors sometimes left in a bad
|
|
506
|
+
# state by pipeline.log
|
|
507
|
+
util.debug(f"pipeline: {line}\033[0m")
|
|
496
508
|
# find hidden login details
|
|
497
509
|
m = re.search(r"\] Guest is ready: ArtemisGuest\([^,]+, (\w+)@([0-9\.]+), ", line)
|
|
498
510
|
if m:
|
|
@@ -534,22 +546,25 @@ class Reserve:
|
|
|
534
546
|
)
|
|
535
547
|
|
|
536
548
|
except:
|
|
537
|
-
self.
|
|
549
|
+
self.release()
|
|
538
550
|
raise
|
|
539
551
|
|
|
540
|
-
def
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
552
|
+
def release(self):
|
|
553
|
+
with self.lock:
|
|
554
|
+
if self.request:
|
|
555
|
+
try:
|
|
556
|
+
self.request.cancel()
|
|
557
|
+
except APIError:
|
|
558
|
+
pass
|
|
559
|
+
finally:
|
|
560
|
+
self.request = None
|
|
548
561
|
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
562
|
+
if self._tmpdir:
|
|
563
|
+
self._tmpdir.cleanup()
|
|
564
|
+
self._tmpdir = None
|
|
552
565
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
566
|
+
def __enter__(self):
|
|
567
|
+
return self.reserve()
|
|
568
|
+
|
|
569
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
570
|
+
self.release()
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import tempfile
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
from ... import connection, util
|
|
6
|
+
from .. import Provisioner, Remote
|
|
7
|
+
|
|
8
|
+
from . import api
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestingFarmRemote(Remote, connection.ssh.ManagedSSHConn):
|
|
12
|
+
"""
|
|
13
|
+
Built on the official Remote API, pulling in the Connection API
|
|
14
|
+
as implemented by ManagedSSHConn.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, ssh_options, *, release_hook, provisioner):
|
|
18
|
+
"""
|
|
19
|
+
'ssh_options' are a dict, passed to ManagedSSHConn __init__().
|
|
20
|
+
|
|
21
|
+
'release_hook' is a callable called on .release() in addition
|
|
22
|
+
to disconnecting the connection.
|
|
23
|
+
"""
|
|
24
|
+
# start with empty ssh options, we'll fill them in later
|
|
25
|
+
super().__init__(options=ssh_options)
|
|
26
|
+
self.release_hook = release_hook
|
|
27
|
+
self.provisioner = provisioner
|
|
28
|
+
self.lock = threading.RLock()
|
|
29
|
+
self.release_called = False
|
|
30
|
+
|
|
31
|
+
def release(self):
|
|
32
|
+
with self.lock:
|
|
33
|
+
if not self.release_called:
|
|
34
|
+
self.release_called = True
|
|
35
|
+
else:
|
|
36
|
+
return
|
|
37
|
+
self.release_hook(self)
|
|
38
|
+
self.disconnect()
|
|
39
|
+
|
|
40
|
+
# not /technically/ a valid repr(), but meh
|
|
41
|
+
def __repr__(self):
|
|
42
|
+
class_name = self.__class__.__name__
|
|
43
|
+
compose = self.provisioner.compose
|
|
44
|
+
arch = self.provisioner.arch
|
|
45
|
+
return f"{class_name}({compose} @ {arch}, {hex(id(self))})"
|
|
46
|
+
|
|
47
|
+
# def alive(self):
|
|
48
|
+
# return self.valid
|
|
49
|
+
|
|
50
|
+
# TODO: def __str__(self): as root@1.2.3.4 and arch, ranch, etc.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestingFarmProvisioner(Provisioner):
|
|
54
|
+
# TODO: have max_systems as (min,default,max) tuple; have an algorithm that
|
|
55
|
+
# starts at default and scales up/down as needed
|
|
56
|
+
|
|
57
|
+
def __init__(self, compose, arch="x86_64", *, max_systems=1, timeout=60, max_retries=10):
|
|
58
|
+
"""
|
|
59
|
+
'compose' is a Testing Farm compose to prepare.
|
|
60
|
+
|
|
61
|
+
'arch' is an architecture associated with the compose.
|
|
62
|
+
|
|
63
|
+
'max_systems' is an int of how many systems to reserve (and keep
|
|
64
|
+
reserved) in an internal pool.
|
|
65
|
+
|
|
66
|
+
'timeout' is the maximum Testing Farm pipeline timeout (waiting for
|
|
67
|
+
a system + OS installation + reservation time).
|
|
68
|
+
|
|
69
|
+
'max_retries' is a maximum number of provisioning (Testing Farm) errors
|
|
70
|
+
that will be reprovisioned before giving up.
|
|
71
|
+
"""
|
|
72
|
+
super().__init__()
|
|
73
|
+
self.compose = compose # TODO: translate "centos9" to "CentOS-Stream-9"
|
|
74
|
+
self.arch = arch
|
|
75
|
+
self.max_systems = max_systems
|
|
76
|
+
self.timeout = timeout
|
|
77
|
+
self.retries = max_retries
|
|
78
|
+
self._tmpdir = None
|
|
79
|
+
self.ssh_key = self.ssh_pubkey = None
|
|
80
|
+
self.queue = util.ThreadQueue(daemon=True)
|
|
81
|
+
self.tf_api = api.TestingFarmAPI()
|
|
82
|
+
|
|
83
|
+
# TF Reserve instances (not Remotes) actively being provisioned,
|
|
84
|
+
# in case we need to call their .release() on abort
|
|
85
|
+
self.reserving = []
|
|
86
|
+
|
|
87
|
+
# active TestingFarmRemote instances, ready to be handed over to the user,
|
|
88
|
+
# or already in use by the user
|
|
89
|
+
self.remotes = []
|
|
90
|
+
|
|
91
|
+
def _wait_for_reservation(self, tf_reserve, initial_delay):
|
|
92
|
+
# assuming this function will be called many times, attempt to
|
|
93
|
+
# distribute load on TF servers
|
|
94
|
+
# (we can sleep here as this code is running in a separate thread)
|
|
95
|
+
if initial_delay:
|
|
96
|
+
util.debug(f"delaying for {initial_delay}s to distribute load")
|
|
97
|
+
time.sleep(initial_delay)
|
|
98
|
+
|
|
99
|
+
# 'machine' is api.Reserve.ReservedMachine namedtuple
|
|
100
|
+
machine = tf_reserve.reserve()
|
|
101
|
+
|
|
102
|
+
# connect our Remote to the machine via its class Connection API
|
|
103
|
+
ssh_options = {
|
|
104
|
+
"Hostname": machine.host,
|
|
105
|
+
"User": machine.user,
|
|
106
|
+
"Port": machine.port,
|
|
107
|
+
"IdentityFile": machine.ssh_key,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def release_hook(remote):
|
|
111
|
+
# remove from the list of remotes inside this Provisioner
|
|
112
|
+
with self.lock:
|
|
113
|
+
try:
|
|
114
|
+
self.remotes.remove(remote)
|
|
115
|
+
except ValueError:
|
|
116
|
+
pass
|
|
117
|
+
# call TF API, cancel the request, etc.
|
|
118
|
+
tf_reserve.release()
|
|
119
|
+
|
|
120
|
+
remote = TestingFarmRemote(
|
|
121
|
+
ssh_options,
|
|
122
|
+
release_hook=release_hook,
|
|
123
|
+
provisioner=self,
|
|
124
|
+
)
|
|
125
|
+
remote.connect()
|
|
126
|
+
|
|
127
|
+
# since the system is fully ready, stop tracking its reservation
|
|
128
|
+
# and return the finished Remote instance
|
|
129
|
+
with self.lock:
|
|
130
|
+
self.remotes.append(remote)
|
|
131
|
+
self.reserving.remove(tf_reserve)
|
|
132
|
+
|
|
133
|
+
return remote
|
|
134
|
+
|
|
135
|
+
def _schedule_one_reservation(self, initial_delay=None):
|
|
136
|
+
# instantiate a class Reserve from the Testing Farm api module
|
|
137
|
+
# (which typically provides context manager, but we use its .reserve()
|
|
138
|
+
# and .release() functions directly)
|
|
139
|
+
tf_reserve = api.Reserve(
|
|
140
|
+
compose=self.compose,
|
|
141
|
+
arch=self.arch,
|
|
142
|
+
timeout=self.timeout,
|
|
143
|
+
ssh_key=self.ssh_key,
|
|
144
|
+
api=self.tf_api,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# add it to self.reserving even before we schedule a provision,
|
|
148
|
+
# to avoid races on suddent abort
|
|
149
|
+
with self.lock:
|
|
150
|
+
self.reserving.append(tf_reserve)
|
|
151
|
+
|
|
152
|
+
# start a background wait
|
|
153
|
+
self.queue.start_thread(
|
|
154
|
+
target=self._wait_for_reservation,
|
|
155
|
+
args=(tf_reserve, initial_delay),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def start(self):
|
|
159
|
+
with self.lock:
|
|
160
|
+
self._tmpdir = tempfile.TemporaryDirectory()
|
|
161
|
+
self.ssh_key, self.ssh_pubkey = util.ssh_keygen(self._tmpdir.name)
|
|
162
|
+
# start up all initial reservations
|
|
163
|
+
for i in range(self.max_systems):
|
|
164
|
+
delay = (api.API_QUERY_DELAY / self.max_systems) * i
|
|
165
|
+
#self.queue.start_thread(target=self._schedule_one_reservation, args=(delay,))
|
|
166
|
+
self._schedule_one_reservation(delay)
|
|
167
|
+
|
|
168
|
+
def stop(self):
|
|
169
|
+
with self.lock:
|
|
170
|
+
# abort reservations in progress
|
|
171
|
+
for tf_reserve in self.reserving:
|
|
172
|
+
tf_reserve.release()
|
|
173
|
+
self.reserving = []
|
|
174
|
+
# cancel/release all Remotes ever created by us
|
|
175
|
+
for remote in self.remotes:
|
|
176
|
+
remote.release()
|
|
177
|
+
self.remotes = [] # just in case
|
|
178
|
+
# explicitly remove the tmpdir rather than relying on destructor
|
|
179
|
+
self._tmpdir.cleanup()
|
|
180
|
+
self._tmpdir = None
|
|
181
|
+
|
|
182
|
+
def stop_defer(self):
|
|
183
|
+
callables = []
|
|
184
|
+
with self.lock:
|
|
185
|
+
callables += (f.release for f in self.reserving)
|
|
186
|
+
self.reserving = []
|
|
187
|
+
callables += (r.release for r in self.remotes)
|
|
188
|
+
self.remotes = [] # just in case
|
|
189
|
+
callables.append(self._tmpdir.cleanup)
|
|
190
|
+
self._tmpdir = None
|
|
191
|
+
return callables
|
|
192
|
+
|
|
193
|
+
def get_remote(self, block=True):
|
|
194
|
+
# fill .release()d remotes back up with reservations
|
|
195
|
+
with self.lock:
|
|
196
|
+
deficit = self.max_systems - len(self.remotes) - len(self.reserving)
|
|
197
|
+
for i in range(deficit):
|
|
198
|
+
delay = (api.API_QUERY_DELAY / deficit) * i
|
|
199
|
+
self._schedule_one_reservation(delay)
|
|
200
|
+
|
|
201
|
+
while True:
|
|
202
|
+
# otherwise wait on a queue of Remotes being provisioned
|
|
203
|
+
try:
|
|
204
|
+
return self.queue.get(block=block) # thread-safe
|
|
205
|
+
except util.ThreadQueue.Empty:
|
|
206
|
+
# always non-blocking
|
|
207
|
+
return None
|
|
208
|
+
except (api.TestingFarmError, connection.ssh.SSHError) as e:
|
|
209
|
+
with self.lock:
|
|
210
|
+
if self.retries > 0:
|
|
211
|
+
util.warning(
|
|
212
|
+
f"caught while reserving a TF system: {repr(e)}, "
|
|
213
|
+
f"retrying ({self.retries} left)",
|
|
214
|
+
)
|
|
215
|
+
self.retries -= 1
|
|
216
|
+
self._schedule_one_reservation()
|
|
217
|
+
if block:
|
|
218
|
+
continue
|
|
219
|
+
else:
|
|
220
|
+
return None
|
|
221
|
+
else:
|
|
222
|
+
util.warning(
|
|
223
|
+
f"caught while reserving a TF system: {repr(e)}, "
|
|
224
|
+
"exhausted all retries, giving up",
|
|
225
|
+
)
|
|
226
|
+
raise
|
|
227
|
+
|
|
228
|
+
# not /technically/ a valid repr(), but meh
|
|
229
|
+
def __repr__(self):
|
|
230
|
+
class_name = self.__class__.__name__
|
|
231
|
+
reserving = len(self.reserving)
|
|
232
|
+
remotes = len(self.remotes)
|
|
233
|
+
return (
|
|
234
|
+
f"{class_name}({self.compose} @ {self.arch}, {reserving} reserving, "
|
|
235
|
+
f"{remotes} remotes, {hex(id(self))})"
|
|
236
|
+
)
|
atex/util/__init__.py
CHANGED
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
"""
|
|
2
|
-
TODO some description about utilities
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
1
|
import importlib as _importlib
|
|
6
2
|
import pkgutil as _pkgutil
|
|
7
3
|
import inspect as _inspect
|
|
@@ -39,8 +35,7 @@ def _import_submodules():
|
|
|
39
35
|
if _inspect.ismodule(attr):
|
|
40
36
|
continue
|
|
41
37
|
# do not override already processed objects (avoid duplicates)
|
|
42
|
-
|
|
43
|
-
raise AssertionError(f"tried to override already-imported '{key}'")
|
|
38
|
+
assert key not in __all__, f"tried to override already-imported '{key}'"
|
|
44
39
|
|
|
45
40
|
globals()[key] = attr
|
|
46
41
|
__all__.append(key)
|