atex 0.8__py3-none-any.whl → 0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. atex/aggregator/__init__.py +60 -0
  2. atex/aggregator/json.py +96 -0
  3. atex/cli/__init__.py +11 -1
  4. atex/cli/fmf.py +73 -23
  5. atex/cli/libvirt.py +128 -0
  6. atex/cli/testingfarm.py +60 -3
  7. atex/connection/__init__.py +13 -11
  8. atex/connection/podman.py +61 -0
  9. atex/connection/ssh.py +38 -47
  10. atex/executor/executor.py +144 -119
  11. atex/executor/reporter.py +66 -71
  12. atex/executor/scripts.py +13 -5
  13. atex/executor/testcontrol.py +43 -30
  14. atex/fmf.py +94 -74
  15. atex/orchestrator/__init__.py +76 -2
  16. atex/orchestrator/adhoc.py +465 -0
  17. atex/{provision → provisioner}/__init__.py +54 -42
  18. atex/provisioner/libvirt/__init__.py +2 -0
  19. atex/provisioner/libvirt/libvirt.py +472 -0
  20. atex/provisioner/libvirt/locking.py +170 -0
  21. atex/{provision → provisioner}/libvirt/setup-libvirt.sh +21 -1
  22. atex/provisioner/podman/__init__.py +2 -0
  23. atex/provisioner/podman/podman.py +169 -0
  24. atex/{provision → provisioner}/testingfarm/api.py +121 -69
  25. atex/{provision → provisioner}/testingfarm/testingfarm.py +44 -52
  26. atex/util/libvirt.py +18 -0
  27. atex/util/log.py +53 -43
  28. atex/util/named_mapping.py +158 -0
  29. atex/util/subprocess.py +46 -12
  30. atex/util/threads.py +71 -20
  31. atex-0.10.dist-info/METADATA +86 -0
  32. atex-0.10.dist-info/RECORD +44 -0
  33. atex/orchestrator/aggregator.py +0 -106
  34. atex/orchestrator/orchestrator.py +0 -324
  35. atex/provision/libvirt/__init__.py +0 -24
  36. atex/provision/podman/README +0 -59
  37. atex/provision/podman/host_container.sh +0 -74
  38. atex-0.8.dist-info/METADATA +0 -197
  39. atex-0.8.dist-info/RECORD +0 -37
  40. /atex/{provision → provisioner}/libvirt/VM_PROVISION +0 -0
  41. /atex/{provision → provisioner}/testingfarm/__init__.py +0 -0
  42. {atex-0.8.dist-info → atex-0.10.dist-info}/WHEEL +0 -0
  43. {atex-0.8.dist-info → atex-0.10.dist-info}/entry_points.txt +0 -0
  44. {atex-0.8.dist-info → atex-0.10.dist-info}/licenses/COPYING.txt +0 -0
@@ -0,0 +1,169 @@
1
+ import tempfile
2
+ import threading
3
+ import subprocess
4
+
5
+ from ... import connection, util
6
+ from .. import Provisioner, Remote
7
+
8
+
9
+ class PodmanRemote(Remote, connection.podman.PodmanConnection):
10
+ """
11
+ Built on the official Remote API, pulling in the Connection API
12
+ as implemented by ManagedSSHConnection.
13
+ """
14
+
15
+ def __init__(self, image, container, *, release_hook):
16
+ """
17
+ 'image' is an image tag (used for repr()).
18
+
19
+ 'container' is a podman container id / name.
20
+
21
+ 'release_hook' is a callable called on .release() in addition
22
+ to disconnecting the connection.
23
+ """
24
+ super().__init__(container=container)
25
+ self.lock = threading.RLock()
26
+ self.image = image
27
+ self.container = container
28
+ self.release_called = False
29
+ self.release_hook = release_hook
30
+
31
+ def release(self):
32
+ with self.lock:
33
+ if self.release_called:
34
+ return
35
+ else:
36
+ self.release_called = True
37
+ self.release_hook(self)
38
+ self.disconnect()
39
+ util.subprocess_run(
40
+ ("podman", "container", "rm", "-f", "-t", "0", self.container),
41
+ check=False, # ignore if it fails
42
+ stdout=subprocess.DEVNULL,
43
+ )
44
+
45
+ # not /technically/ a valid repr(), but meh
46
+ def __repr__(self):
47
+ class_name = self.__class__.__name__
48
+
49
+ if "/" in self.image:
50
+ image = self.image.rsplit("/",1)[1]
51
+ elif len(self.image) > 20:
52
+ image = f"{self.image[:17]}..."
53
+ else:
54
+ image = self.image
55
+
56
+ name = f"{self.container[:17]}..." if len(self.container) > 20 else self.container
57
+
58
+ return f"{class_name}({image}, {name})"
59
+
60
+
61
+ class PodmanProvisioner(Provisioner):
62
+ def __init__(self, image, run_options=None):
63
+ """
64
+ 'image' is a string of image tag/id to create containers from.
65
+ It can be a local identifier or an URL.
66
+
67
+ 'run_options' is an iterable with additional CLI options passed
68
+ to 'podman container run'.
69
+ """
70
+ self.lock = threading.RLock()
71
+ self.image = image
72
+ self.run_options = run_options or ()
73
+
74
+ # created PodmanRemote instances, ready to be handed over to the user,
75
+ # or already in use by the user
76
+ self.remotes = []
77
+ self.to_create = 0
78
+
79
+ def start(self):
80
+ if not self.image:
81
+ raise ValueError("image cannot be empty")
82
+
83
+ def stop(self):
84
+ with self.lock:
85
+ while self.remotes:
86
+ self.remotes.pop().release()
87
+
88
+ def provision(self, count=1):
89
+ with self.lock:
90
+ self.to_create += count
91
+
92
+ def get_remote(self, block=True):
93
+ if self.to_create <= 0:
94
+ if block:
95
+ raise RuntimeError("no .provision() requested, would block forever")
96
+ else:
97
+ return None
98
+
99
+ proc = util.subprocess_run(
100
+ (
101
+ "podman", "container", "run", "--quiet", "--detach", "--pull", "never",
102
+ *self.run_options, self.image, "sleep", "inf",
103
+ ),
104
+ check=True,
105
+ text=True,
106
+ stdout=subprocess.PIPE,
107
+ )
108
+ container_id = proc.stdout.rstrip("\n")
109
+
110
+ def release_hook(remote):
111
+ # remove from the list of remotes inside this Provisioner
112
+ with self.lock:
113
+ try:
114
+ self.remotes.remove(remote)
115
+ except ValueError:
116
+ pass
117
+
118
+ remote = PodmanRemote(
119
+ self.image,
120
+ container_id,
121
+ release_hook=release_hook,
122
+ )
123
+
124
+ with self.lock:
125
+ self.remotes.append(remote)
126
+ self.to_create -= 1
127
+
128
+ return remote
129
+
130
+ # not /technically/ a valid repr(), but meh
131
+ def __repr__(self):
132
+ class_name = self.__class__.__name__
133
+ return (
134
+ f"{class_name}({self.image}, {len(self.remotes)} remotes, {hex(id(self))})"
135
+ )
136
+
137
+
138
+ def pull_image(origin):
139
+ proc = util.subprocess_run(
140
+ ("podman", "image", "pull", "-q", origin),
141
+ check=True,
142
+ text=True,
143
+ stdout=subprocess.PIPE,
144
+ )
145
+ return proc.stdout.rstrip("\n")
146
+
147
+
148
+ def build_container_with_deps(origin, tag=None, *, extra_pkgs=None):
149
+ tag_args = ("-t", tag) if tag else ()
150
+
151
+ pkgs = ["rsync"]
152
+ if extra_pkgs:
153
+ pkgs += extra_pkgs
154
+ pkgs_str = " ".join(pkgs)
155
+
156
+ with tempfile.NamedTemporaryFile("w+t", delete_on_close=False) as tmpf:
157
+ tmpf.write(util.dedent(fr"""
158
+ FROM {origin}
159
+ RUN dnf -y -q --setopt=install_weak_deps=False install {pkgs_str} >/dev/null
160
+ RUN dnf -y -q clean packages >/dev/null
161
+ """))
162
+ tmpf.close()
163
+ proc = util.subprocess_run(
164
+ ("podman", "image", "build", "-q", "-f", tmpf.name, *tag_args, "."),
165
+ check=True,
166
+ text=True,
167
+ stdout=subprocess.PIPE,
168
+ )
169
+ return proc.stdout.rstrip("\n")
@@ -16,16 +16,11 @@ import urllib3
16
16
 
17
17
  DEFAULT_API_URL = "https://api.testing-farm.io/v0.1"
18
18
 
19
- # how many seconds to sleep for during API polling
20
- API_QUERY_DELAY = 30
21
-
22
- RESERVE_TASK = {
23
- "fmf": {
24
- "url": "https://github.com/RHSecurityCompliance/atex",
25
- "ref": "main",
26
- "path": "tmt_tests",
27
- "name": "/plans/reserve",
28
- },
19
+ DEFAULT_RESERVE_TEST = {
20
+ "url": "https://github.com/RHSecurityCompliance/atex-reserve",
21
+ "ref": "v0.10",
22
+ "path": ".",
23
+ "name": "/plans/reserve",
29
24
  }
30
25
 
31
26
  # final states of a request,
@@ -35,7 +30,19 @@ END_STATES = ("error", "complete", "canceled")
35
30
  # always have at most 10 outstanding HTTP requests to every given API host,
36
31
  # shared by all instances of all classes here, to avoid flooding the host
37
32
  # by multi-threaded users
38
- _http = urllib3.PoolManager(maxsize=10, block=True)
33
+ _http = urllib3.PoolManager(
34
+ maxsize=10,
35
+ block=True,
36
+ retries=urllib3.Retry(
37
+ total=10,
38
+ # account for API restarts / short outages
39
+ backoff_factor=60,
40
+ backoff_max=600,
41
+ # retry on API server errors too, not just connection issues
42
+ status=10,
43
+ status_forcelist={403,404,408,429,500,502,503,504},
44
+ ),
45
+ )
39
46
 
40
47
 
41
48
  class TestingFarmError(Exception):
@@ -78,12 +85,13 @@ class TestingFarmAPI:
78
85
  self.api_url = url
79
86
  self.api_token = token or os.environ.get("TESTING_FARM_API_TOKEN")
80
87
 
81
- def _query(self, method, path, *args, headers=None, **kwargs):
88
+ def _query(self, method, path, *args, headers=None, auth=True, **kwargs):
82
89
  url = f"{self.api_url}{path}"
83
- if headers is not None:
84
- headers["Authorization"] = f"Bearer {self.api_token}"
85
- else:
86
- headers = {"Authorization": f"Bearer {self.api_token}"}
90
+ if self.api_token and auth:
91
+ if headers is not None:
92
+ headers["Authorization"] = f"Bearer {self.api_token}"
93
+ else:
94
+ headers = {"Authorization": f"Bearer {self.api_token}"}
87
95
 
88
96
  reply = _http.request(method, url, *args, headers=headers, preload_content=False, **kwargs)
89
97
 
@@ -170,7 +178,7 @@ class TestingFarmAPI:
170
178
  fields["token_id"] = self.whoami()["token"]["id"]
171
179
  fields["user_id"] = self.whoami()["user"]["id"]
172
180
 
173
- return self._query("GET", "/requests", fields=fields)
181
+ return self._query("GET", "/requests", fields=fields, auth=mine)
174
182
 
175
183
  def get_request(self, request_id):
176
184
  """
@@ -200,19 +208,22 @@ class Request:
200
208
  request.
201
209
  """
202
210
 
203
- # TODO: maintain internal time.monotonic() clock and call .update() from
204
- # functions like .alive() if last update is > API_QUERY_DELAY
211
+ # actually query the TestingFarm API at most every X seconds,
212
+ # re-using cached state between updates
213
+ api_query_limit = 30
205
214
 
206
215
  def __init__(self, id=None, api=None, initial_data=None):
207
216
  """
208
217
  'id' is a Testing Farm request UUID
218
+
209
219
  'api' is a TestingFarmAPI instance - if unspecified, a sensible default
210
- 'initial_data' (dict) can be used to pre-fill an initial Request state
211
- will be used.
220
+
221
+ 'initial_data' (dict) can be used to pre-fill an initial Request state.
212
222
  """
213
223
  self.id = id
214
224
  self.api = api or TestingFarmAPI()
215
225
  self.data = initial_data or {}
226
+ self.next_query = 0
216
227
 
217
228
  def submit(self, spec):
218
229
  """
@@ -224,16 +235,12 @@ class Request:
224
235
  self.data = self.api.submit_request(spec)
225
236
  self.id = self.data["id"]
226
237
 
227
- def update(self):
228
- """
229
- Query Testing Farm API to get a more up-to-date version of the request
230
- metadata. Do not call too frequently.
231
- This function is also used internally by others, you do not need to
232
- always call it manually.
233
- """
234
- self.data = self.api.get_request(self.id)
235
- # TODO: refresh internal time.monotonic() timer
236
- return self.data
238
+ def _refresh(self):
239
+ if not self.id:
240
+ return
241
+ if time.monotonic() > self.next_query:
242
+ self.data = self.api.get_request(self.id)
243
+ self.next_query = time.monotonic() + self.api_query_limit
237
244
 
238
245
  def cancel(self):
239
246
  if not self.id:
@@ -244,35 +251,44 @@ class Request:
244
251
  return data
245
252
 
246
253
  def alive(self):
247
- if "state" not in self.data:
248
- self.update()
254
+ if not self.id:
255
+ return False
256
+ self._refresh()
249
257
  return self.data["state"] not in END_STATES
250
258
 
251
259
  def assert_alive(self):
252
260
  if not self.alive():
253
261
  state = self.data["state"]
254
- raise GoneAwayError(f"request {self.data['id']} not alive anymore, entered: {state}")
262
+ raise GoneAwayError(f"request {self.id} not alive anymore, entered: {state}")
255
263
 
256
264
  def wait_for_state(self, state):
257
- if "state" not in self.data:
258
- self.update()
259
- self.assert_alive()
260
- while self.data["state"] != state:
261
- time.sleep(API_QUERY_DELAY)
262
- self.update()
263
- self.assert_alive()
265
+ """
266
+ 'state' is a str or a tuple of states to wait for.
267
+ """
268
+ watched = (state,) if isinstance(state, str) else state
269
+ while True:
270
+ self._refresh()
271
+ if self.data["state"] in watched:
272
+ break
273
+ # if the request ended in one of END_STATES and the above condition
274
+ # did not catch it, the wait will never end
275
+ if self.data["state"] in END_STATES:
276
+ raise GoneAwayError(f"request {self.id} ended with {self.data['state']}")
264
277
 
265
278
  def __repr__(self):
266
279
  return f"Request(id={self.id})"
267
280
 
268
281
  def __str__(self):
282
+ self._refresh()
269
283
  # python has no better dict-pretty-printing logic
270
284
  return json.dumps(self.data, sort_keys=True, indent=4)
271
285
 
272
286
  def __contains__(self, item):
287
+ self._refresh()
273
288
  return item in self.data
274
289
 
275
290
  def __getitem__(self, key):
291
+ self._refresh()
276
292
  return self.data[key]
277
293
 
278
294
 
@@ -282,6 +298,10 @@ class PipelineLogStreamer:
282
298
  to "stream" its contents over time (over many requests), never having to
283
299
  re-read old pipeline.log content.
284
300
  """
301
+
302
+ # how frequently to check for pipeline.log updates (seconds)
303
+ pipeline_query_limit = 30
304
+
285
305
  def __init__(self, request):
286
306
  self.request = request
287
307
 
@@ -314,8 +334,7 @@ class PipelineLogStreamer:
314
334
  return log
315
335
 
316
336
  finally:
317
- time.sleep(API_QUERY_DELAY)
318
- self.request.update()
337
+ time.sleep(self.pipeline_query_limit)
319
338
 
320
339
  def __iter__(self):
321
340
  url = self._wait_for_entry()
@@ -346,8 +365,7 @@ class PipelineLogStreamer:
346
365
  buffer = buffer[index+1:]
347
366
 
348
367
  finally:
349
- time.sleep(API_QUERY_DELAY)
350
- self.request.update()
368
+ time.sleep(self.pipeline_query_limit)
351
369
 
352
370
 
353
371
  class Reserve:
@@ -370,7 +388,9 @@ class Reserve:
370
388
 
371
389
  def __init__(
372
390
  self, *, compose, arch="x86_64", pool=None, hardware=None, kickstart=None,
373
- timeout=60, ssh_key=None, source_host=None, api=None,
391
+ timeout=60, ssh_key=None, source_host=None,
392
+ reserve_test=None, variables=None, secrets=None,
393
+ api=None,
374
394
  ):
375
395
  """
376
396
  'compose' (str) is the OS to install, chosen from the composes supported
@@ -403,18 +423,31 @@ class Reserve:
403
423
  facing address of the current system.
404
424
  Ignored on the 'redhat' ranch.
405
425
 
426
+ 'reserve_test' is a dict with a fmf test specification to be run on the
427
+ target system to reserve it, ie.:
428
+ {
429
+ "url": "https://some-host/path/to/repo",
430
+ "ref": "main",
431
+ "name": "/plans/reserve",
432
+ }
433
+
434
+ 'variables' and 'secrets' are dicts with environment variable key/values
435
+ exported for the reserve test - variables are visible via TF API,
436
+ secrets are not (but can still be extracted from pipeline log).
437
+
406
438
  'api' is a TestingFarmAPI instance - if unspecified, a sensible default
407
439
  will be used.
408
440
  """
409
- util.info(f"Will reserve compose:{compose} on arch:{arch} for {timeout}min")
441
+ util.info(f"will reserve compose:{compose} on arch:{arch} for {timeout}min")
410
442
  spec = {
411
- "test": RESERVE_TASK,
443
+ "test": {
444
+ "fmf": reserve_test or DEFAULT_RESERVE_TEST,
445
+ },
412
446
  "environments": [{
413
447
  "arch": arch,
414
448
  "os": {
415
449
  "compose": compose,
416
450
  },
417
- "pool": pool,
418
451
  "settings": {
419
452
  "pipeline": {
420
453
  "skip_guest_setup": True,
@@ -423,10 +456,8 @@ class Reserve:
423
456
  "tags": {
424
457
  "ArtemisUseSpot": "false",
425
458
  },
426
- "security_group_rules_ingress": [],
427
459
  },
428
460
  },
429
- "secrets": {},
430
461
  }],
431
462
  "settings": {
432
463
  "pipeline": {
@@ -434,10 +465,16 @@ class Reserve:
434
465
  },
435
466
  },
436
467
  }
468
+ spec_env = spec["environments"][0]
469
+ if pool:
470
+ spec_env["pool"] = pool
437
471
  if hardware:
438
- spec["environments"][0]["hardware"] = hardware
472
+ spec_env["hardware"] = hardware
439
473
  if kickstart:
440
- spec["environments"][0]["kickstart"] = kickstart
474
+ spec_env["kickstart"] = kickstart
475
+ if variables:
476
+ spec_env["variables"] = variables
477
+ spec_env["secrets"] = secrets.copy() if secrets else {} # we need it for ssh pubkey
441
478
 
442
479
  self._spec = spec
443
480
  self._ssh_key = Path(ssh_key) if ssh_key else None
@@ -465,20 +502,25 @@ class Reserve:
465
502
  raise RuntimeError("reservation already in progress")
466
503
 
467
504
  spec = self._spec.copy()
505
+ spec_env = spec["environments"][0]
468
506
 
469
- try:
470
- # add source_host firewall filter
507
+ # add source_host firewall filter on the public ranch
508
+ if self.api.whoami()["token"]["ranch"] == "public":
471
509
  source_host = self._source_host or f"{self._guess_host_ipv4()}/32"
472
- ingress = \
473
- spec["environments"][0]["settings"]["provisioning"]["security_group_rules_ingress"]
474
- ingress.append({
510
+ ingress_rule = {
475
511
  "type": "ingress",
476
512
  "protocol": "-1",
477
513
  "cidr": source_host,
478
514
  "port_min": 0,
479
515
  "port_max": 65535,
480
- })
516
+ }
517
+ provisioning = spec_env["settings"]["provisioning"]
518
+ if "security_group_rules_ingress" in provisioning:
519
+ provisioning["security_group_rules_ingress"].append(ingress_rule)
520
+ else:
521
+ provisioning["security_group_rules_ingress"] = [ingress_rule]
481
522
 
523
+ try:
482
524
  # read user-provided ssh key, or generate one
483
525
  ssh_key = self._ssh_key
484
526
  if ssh_key:
@@ -491,22 +533,30 @@ class Reserve:
491
533
  ssh_key, ssh_pubkey = util.ssh_keygen(self._tmpdir.name)
492
534
 
493
535
  pubkey_contents = ssh_pubkey.read_text().strip()
494
- secrets = spec["environments"][0]["secrets"]
495
- secrets["RESERVE_SSH_PUBKEY"] = pubkey_contents
536
+ # TODO: split ^^^ into 3 parts (key type, hash, comment), assert it,
537
+ # and anonymize comment in case it contains a secret user/hostname
538
+ spec_env["secrets"]["RESERVE_SSH_PUBKEY"] = pubkey_contents
496
539
 
497
540
  with self.lock:
498
541
  self.request = Request(api=self.api)
499
542
  self.request.submit(spec)
500
- util.debug(f"submitted request:\n{textwrap.indent(str(self.request), ' ')}")
543
+ util.debug(f"submitted request {self.request.id}")
544
+ util.extradebug(
545
+ f"request {self.request.id}:\n{textwrap.indent(str(self.request), ' ')}",
546
+ )
501
547
 
502
548
  # wait for user/host to ssh to
503
549
  ssh_user = ssh_host = None
504
550
  for line in PipelineLogStreamer(self.request):
505
551
  # the '\033[0m' is to reset colors sometimes left in a bad
506
552
  # state by pipeline.log
507
- util.debug(f"pipeline: {line}\033[0m")
553
+ util.extradebug(f"{line}\033[0m")
508
554
  # find hidden login details
509
- m = re.search(r"\] Guest is ready: ArtemisGuest\([^,]+, (\w+)@([0-9\.]+), ", line)
555
+ m = re.search(
556
+ # host address can be an IP address or a hostname
557
+ r"\] Guest is ready: ArtemisGuest\([^,]+, (\w+)@([^,]+), arch=",
558
+ line,
559
+ )
510
560
  if m:
511
561
  ssh_user, ssh_host = m.groups()
512
562
  continue
@@ -520,14 +570,12 @@ class Reserve:
520
570
  # (it will be failing to login for a while, until the reserve test
521
571
  # installs our ssh pubkey into authorized_keys)
522
572
  ssh_attempt_cmd = (
523
- "ssh", "-q", "-i", ssh_key, f"-oConnectionAttempts={API_QUERY_DELAY}",
573
+ "ssh", "-q", "-i", ssh_key.absolute(), "-oConnectionAttempts=60",
524
574
  "-oStrictHostKeyChecking=no", "-oUserKnownHostsFile=/dev/null",
525
575
  f"{ssh_user}@{ssh_host}", "exit 123",
526
576
  )
527
577
  while True:
528
- # wait for API_QUERY_DELAY between ssh retries, seems like GEFN sleep time
529
- time.sleep(API_QUERY_DELAY)
530
- self.request.update()
578
+ time.sleep(1)
531
579
  self.request.assert_alive()
532
580
 
533
581
  proc = util.subprocess_run(
@@ -564,7 +612,11 @@ class Reserve:
564
612
  self._tmpdir = None
565
613
 
566
614
  def __enter__(self):
567
- return self.reserve()
615
+ try:
616
+ return self.reserve()
617
+ except Exception:
618
+ self.release()
619
+ raise
568
620
 
569
621
  def __exit__(self, exc_type, exc_value, traceback):
570
622
  self.release()