gpustack-runtime 0.1.39__py3-none-any.whl → 0.1.39.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack_runtime/__main__.py +6 -2
- gpustack_runtime/_version.py +2 -2
- gpustack_runtime/_version_appendix.py +1 -1
- gpustack_runtime/cmds/__init__.py +6 -2
- gpustack_runtime/cmds/deployer.py +187 -288
- gpustack_runtime/deployer/__init__.py +197 -0
- gpustack_runtime/deployer/__types__.py +382 -17
- gpustack_runtime/deployer/__utils__.py +34 -0
- gpustack_runtime/deployer/docker.py +312 -167
- gpustack_runtime/deployer/kuberentes.py +288 -45
- gpustack_runtime/deployer/podman.py +322 -167
- gpustack_runtime/detector/amd.py +1 -1
- gpustack_runtime/detector/hygon.py +1 -1
- gpustack_runtime/detector/pyhsa/__init__.py +7 -7
- gpustack_runtime/envs.py +29 -42
- {gpustack_runtime-0.1.39.dist-info → gpustack_runtime-0.1.39.post2.dist-info}/METADATA +2 -2
- {gpustack_runtime-0.1.39.dist-info → gpustack_runtime-0.1.39.post2.dist-info}/RECORD +20 -20
- {gpustack_runtime-0.1.39.dist-info → gpustack_runtime-0.1.39.post2.dist-info}/WHEEL +0 -0
- {gpustack_runtime-0.1.39.dist-info → gpustack_runtime-0.1.39.post2.dist-info}/entry_points.txt +0 -0
- {gpustack_runtime-0.1.39.dist-info → gpustack_runtime-0.1.39.post2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
|
+
import io
|
|
4
5
|
import json
|
|
5
6
|
import logging
|
|
6
7
|
import operator
|
|
7
8
|
import os
|
|
8
9
|
import socket
|
|
9
10
|
import sys
|
|
11
|
+
import tarfile
|
|
10
12
|
from dataclasses import dataclass, field
|
|
11
13
|
from functools import lru_cache, reduce
|
|
12
14
|
from math import ceil
|
|
@@ -24,7 +26,7 @@ from docker.utils import parse_repository_tag
|
|
|
24
26
|
from tqdm import tqdm
|
|
25
27
|
|
|
26
28
|
from .. import envs
|
|
27
|
-
from ..logging import debug_log_exception
|
|
29
|
+
from ..logging import debug_log_exception, debug_log_warning
|
|
28
30
|
from .__types__ import (
|
|
29
31
|
Container,
|
|
30
32
|
ContainerCheck,
|
|
@@ -32,7 +34,7 @@ from .__types__ import (
|
|
|
32
34
|
ContainerMountModeEnum,
|
|
33
35
|
ContainerProfileEnum,
|
|
34
36
|
ContainerRestartPolicyEnum,
|
|
35
|
-
|
|
37
|
+
EndoscopicDeployer,
|
|
36
38
|
OperationError,
|
|
37
39
|
UnsupportedError,
|
|
38
40
|
WorkloadExecStream,
|
|
@@ -44,7 +46,13 @@ from .__types__ import (
|
|
|
44
46
|
WorkloadStatusOperation,
|
|
45
47
|
WorkloadStatusStateEnum,
|
|
46
48
|
)
|
|
47
|
-
from .__utils__ import
|
|
49
|
+
from .__utils__ import (
|
|
50
|
+
_MiB,
|
|
51
|
+
bytes_to_human_readable,
|
|
52
|
+
replace_image_with,
|
|
53
|
+
safe_json,
|
|
54
|
+
sensitive_env_var,
|
|
55
|
+
)
|
|
48
56
|
|
|
49
57
|
if TYPE_CHECKING:
|
|
50
58
|
from collections.abc import Callable, Generator
|
|
@@ -139,7 +147,7 @@ class DockerWorkloadPlan(WorkloadPlan):
|
|
|
139
147
|
super().validate_and_default()
|
|
140
148
|
|
|
141
149
|
# Adjust default image namespace if needed.
|
|
142
|
-
if namespace := envs.
|
|
150
|
+
if namespace := envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE:
|
|
143
151
|
self.pause_image = replace_image_with(
|
|
144
152
|
image=self.pause_image,
|
|
145
153
|
namespace=namespace,
|
|
@@ -294,7 +302,7 @@ Name of the Docker deployer.
|
|
|
294
302
|
"""
|
|
295
303
|
|
|
296
304
|
|
|
297
|
-
class DockerDeployer(
|
|
305
|
+
class DockerDeployer(EndoscopicDeployer):
|
|
298
306
|
"""
|
|
299
307
|
Deployer implementation for Docker containers.
|
|
300
308
|
"""
|
|
@@ -303,10 +311,6 @@ class DockerDeployer(Deployer):
|
|
|
303
311
|
"""
|
|
304
312
|
Client for interacting with the Docker daemon.
|
|
305
313
|
"""
|
|
306
|
-
_container_ephemeral_files_dir: Path | None = None
|
|
307
|
-
"""
|
|
308
|
-
Directory for ephemeral files inside containers, internal use only.
|
|
309
|
-
"""
|
|
310
314
|
_mutate_create_options: Callable[[dict[str, Any]], dict[str, Any]] | None = None
|
|
311
315
|
"""
|
|
312
316
|
Function to handle mirrored deployment, internal use only.
|
|
@@ -383,48 +387,6 @@ class DockerDeployer(Deployer):
|
|
|
383
387
|
|
|
384
388
|
return wrapper
|
|
385
389
|
|
|
386
|
-
@staticmethod
|
|
387
|
-
def _create_ephemeral_files(
|
|
388
|
-
workload: DockerWorkloadPlan,
|
|
389
|
-
) -> dict[tuple[int, str], str]:
|
|
390
|
-
"""
|
|
391
|
-
Create ephemeral files as local file for the workload.
|
|
392
|
-
|
|
393
|
-
Returns:
|
|
394
|
-
A mapping from (container index, configured path) to actual filename.
|
|
395
|
-
|
|
396
|
-
Raises:
|
|
397
|
-
OperationError:
|
|
398
|
-
If the ephemeral files fail to create.
|
|
399
|
-
|
|
400
|
-
"""
|
|
401
|
-
# Map (container index, configured path) to actual filename.
|
|
402
|
-
ephemeral_filename_mapping: dict[tuple[int, str], str] = {}
|
|
403
|
-
ephemeral_files: list[tuple[str, str, int]] = []
|
|
404
|
-
for ci, c in enumerate(workload.containers):
|
|
405
|
-
for fi, f in enumerate(c.files or []):
|
|
406
|
-
if f.content is not None:
|
|
407
|
-
fn = f"{workload.name}-{ci}-{fi}"
|
|
408
|
-
ephemeral_filename_mapping[(ci, f.path)] = fn
|
|
409
|
-
ephemeral_files.append((fn, f.content, f.mode))
|
|
410
|
-
if not ephemeral_filename_mapping:
|
|
411
|
-
return ephemeral_filename_mapping
|
|
412
|
-
|
|
413
|
-
# Create ephemeral files directory if not exists.
|
|
414
|
-
try:
|
|
415
|
-
for fn, fc, fm in ephemeral_files:
|
|
416
|
-
fp = envs.GPUSTACK_RUNTIME_DOCKER_EPHEMERAL_FILES_DIR.joinpath(fn)
|
|
417
|
-
with fp.open("w", encoding="utf-8") as f:
|
|
418
|
-
f.write(fc)
|
|
419
|
-
f.flush()
|
|
420
|
-
fp.chmod(fm)
|
|
421
|
-
logger.debug("Created local file %s with mode %s", fp, oct(fm))
|
|
422
|
-
except OSError as e:
|
|
423
|
-
msg = "Failed to create ephemeral files"
|
|
424
|
-
raise OperationError(msg) from e
|
|
425
|
-
|
|
426
|
-
return ephemeral_filename_mapping
|
|
427
|
-
|
|
428
390
|
def _create_ephemeral_volumes(self, workload: DockerWorkloadPlan) -> dict[str, str]:
|
|
429
391
|
"""
|
|
430
392
|
Create ephemeral volumes for the workload.
|
|
@@ -470,12 +432,12 @@ class DockerDeployer(Deployer):
|
|
|
470
432
|
tag = tag or "latest"
|
|
471
433
|
auth_config = None
|
|
472
434
|
if (
|
|
473
|
-
envs.
|
|
474
|
-
and envs.
|
|
435
|
+
envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME
|
|
436
|
+
and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD
|
|
475
437
|
):
|
|
476
438
|
auth_config = {
|
|
477
|
-
"username": envs.
|
|
478
|
-
"password": envs.
|
|
439
|
+
"username": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME,
|
|
440
|
+
"password": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD,
|
|
479
441
|
}
|
|
480
442
|
|
|
481
443
|
logs = self._client.api.pull(
|
|
@@ -717,12 +679,10 @@ class DockerDeployer(Deployer):
|
|
|
717
679
|
else:
|
|
718
680
|
return d_container
|
|
719
681
|
|
|
682
|
+
@staticmethod
|
|
720
683
|
def _append_container_mounts(
|
|
721
|
-
self,
|
|
722
684
|
create_options: dict[str, Any],
|
|
723
685
|
c: Container,
|
|
724
|
-
ci: int,
|
|
725
|
-
ephemeral_filename_mapping: dict[tuple[int, str] : str],
|
|
726
686
|
ephemeral_volume_name_mapping: dict[str, str],
|
|
727
687
|
):
|
|
728
688
|
"""
|
|
@@ -738,17 +698,7 @@ class DockerDeployer(Deployer):
|
|
|
738
698
|
target="",
|
|
739
699
|
)
|
|
740
700
|
|
|
741
|
-
if f.content is
|
|
742
|
-
# Ephemeral file, use from local ephemeral files directory.
|
|
743
|
-
if (ci, f.path) not in ephemeral_filename_mapping:
|
|
744
|
-
continue
|
|
745
|
-
fn = ephemeral_filename_mapping[(ci, f.path)]
|
|
746
|
-
path = str(
|
|
747
|
-
self._container_ephemeral_files_dir.joinpath(fn),
|
|
748
|
-
)
|
|
749
|
-
binding["Source"] = path
|
|
750
|
-
binding["Target"] = f"/{f.path.lstrip('/')}"
|
|
751
|
-
elif f.path:
|
|
701
|
+
if f.content is None and f.path:
|
|
752
702
|
# Host file, bind directly.
|
|
753
703
|
binding["Source"] = f.path
|
|
754
704
|
binding["Target"] = f.path
|
|
@@ -860,10 +810,39 @@ class DockerDeployer(Deployer):
|
|
|
860
810
|
|
|
861
811
|
return healthcheck
|
|
862
812
|
|
|
813
|
+
@staticmethod
|
|
814
|
+
def _upload_ephemeral_files(
|
|
815
|
+
c: Container,
|
|
816
|
+
container: docker.models.containers.Container,
|
|
817
|
+
):
|
|
818
|
+
if not c.files:
|
|
819
|
+
return
|
|
820
|
+
|
|
821
|
+
f_tar = io.BytesIO()
|
|
822
|
+
with tarfile.open(fileobj=f_tar, mode="w") as tar:
|
|
823
|
+
for f in c.files:
|
|
824
|
+
if f.content is None or not f.path:
|
|
825
|
+
continue
|
|
826
|
+
fc_bytes = f.content.encode("utf-8")
|
|
827
|
+
info = tarfile.TarInfo(name=f.path.lstrip("/"))
|
|
828
|
+
info.size = len(fc_bytes)
|
|
829
|
+
info.mode = f.mode
|
|
830
|
+
tar.addfile(tarinfo=info, fileobj=io.BytesIO(fc_bytes))
|
|
831
|
+
if f_tar.getbuffer().nbytes == 0:
|
|
832
|
+
return
|
|
833
|
+
|
|
834
|
+
f_tar.seek(0)
|
|
835
|
+
uploaded = container.put_archive(
|
|
836
|
+
path="/",
|
|
837
|
+
data=f_tar.getvalue(),
|
|
838
|
+
)
|
|
839
|
+
if not uploaded:
|
|
840
|
+
msg = f"Failed to upload ephemeral files to container {container.name}"
|
|
841
|
+
raise OperationError(msg)
|
|
842
|
+
|
|
863
843
|
def _create_containers(
|
|
864
844
|
self,
|
|
865
845
|
workload: DockerWorkloadPlan,
|
|
866
|
-
ephemeral_filename_mapping: dict[tuple[int, str] : str],
|
|
867
846
|
ephemeral_volume_name_mapping: dict[str, str],
|
|
868
847
|
pause_container: docker.models.containers.Container,
|
|
869
848
|
) -> (
|
|
@@ -1106,8 +1085,6 @@ class DockerDeployer(Deployer):
|
|
|
1106
1085
|
self._append_container_mounts(
|
|
1107
1086
|
create_options,
|
|
1108
1087
|
c,
|
|
1109
|
-
ci,
|
|
1110
|
-
ephemeral_filename_mapping,
|
|
1111
1088
|
ephemeral_volume_name_mapping,
|
|
1112
1089
|
)
|
|
1113
1090
|
|
|
@@ -1149,6 +1126,10 @@ class DockerDeployer(Deployer):
|
|
|
1149
1126
|
detach=detach,
|
|
1150
1127
|
**create_options,
|
|
1151
1128
|
)
|
|
1129
|
+
|
|
1130
|
+
# Upload ephemeral files into the container.
|
|
1131
|
+
self._upload_ephemeral_files(c, d_container)
|
|
1132
|
+
|
|
1152
1133
|
except docker.errors.APIError as e:
|
|
1153
1134
|
msg = f"Failed to create container {container_name}{_detail_api_call_error(e)}"
|
|
1154
1135
|
raise OperationError(msg) from e
|
|
@@ -1198,43 +1179,30 @@ class DockerDeployer(Deployer):
|
|
|
1198
1179
|
def __init__(self):
|
|
1199
1180
|
super().__init__(_NAME)
|
|
1200
1181
|
self._client = self._get_client()
|
|
1201
|
-
self._container_ephemeral_files_dir = (
|
|
1202
|
-
envs.GPUSTACK_RUNTIME_DOCKER_EPHEMERAL_FILES_DIR
|
|
1203
|
-
)
|
|
1204
1182
|
|
|
1205
|
-
def
|
|
1183
|
+
def _prepare_mirrored_deployment(self):
|
|
1206
1184
|
"""
|
|
1207
|
-
Prepare for
|
|
1185
|
+
Prepare for mirrored deployment.
|
|
1208
1186
|
|
|
1209
1187
|
"""
|
|
1210
1188
|
# Prepare mirrored deployment if enabled.
|
|
1211
1189
|
if self._mutate_create_options:
|
|
1212
1190
|
return
|
|
1213
1191
|
self._mutate_create_options = lambda o: o
|
|
1214
|
-
if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
|
|
1215
|
-
logger.debug("Mirrored deployment disabled")
|
|
1216
|
-
return
|
|
1217
1192
|
|
|
1218
1193
|
# Retrieve self-container info.
|
|
1219
|
-
## - Get Container name, default to hostname if not set.
|
|
1220
|
-
self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
|
|
1221
|
-
if not self_container_id:
|
|
1222
|
-
self_container_id = socket.gethostname()
|
|
1223
|
-
logger.warning(
|
|
1224
|
-
"Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
|
|
1225
|
-
self_container_id,
|
|
1226
|
-
)
|
|
1227
1194
|
try:
|
|
1228
|
-
self_container = self._find_self_container(
|
|
1195
|
+
self_container = self._find_self_container()
|
|
1196
|
+
if not self_container:
|
|
1197
|
+
return
|
|
1229
1198
|
logger.info(
|
|
1230
1199
|
"Mirrored deployment enabled, using self Container %s for options mirroring",
|
|
1231
|
-
self_container.
|
|
1200
|
+
self_container.short_id,
|
|
1232
1201
|
)
|
|
1233
1202
|
self_image = self_container.image
|
|
1234
1203
|
except docker.errors.APIError:
|
|
1235
1204
|
logger.exception(
|
|
1236
|
-
"Mirrored deployment enabled, but failed to get self Container
|
|
1237
|
-
self_container_id,
|
|
1205
|
+
"Mirrored deployment enabled, but failed to get self Container, skipping",
|
|
1238
1206
|
)
|
|
1239
1207
|
return
|
|
1240
1208
|
|
|
@@ -1434,36 +1402,10 @@ class DockerDeployer(Deployer):
|
|
|
1434
1402
|
|
|
1435
1403
|
self._mutate_create_options = mutate_create_options
|
|
1436
1404
|
|
|
1437
|
-
|
|
1438
|
-
if mirrored_mounts:
|
|
1439
|
-
e_target = str(envs.GPUSTACK_RUNTIME_DOCKER_EPHEMERAL_FILES_DIR)
|
|
1440
|
-
b_source = ""
|
|
1441
|
-
b_target = ""
|
|
1442
|
-
for m in mirrored_mounts:
|
|
1443
|
-
c_target = m.get("Destination", "///")
|
|
1444
|
-
if (
|
|
1445
|
-
e_target == c_target or e_target.startswith(f"{c_target}/")
|
|
1446
|
-
) and len(c_target) >= len(b_target):
|
|
1447
|
-
b_source = m.get("Source")
|
|
1448
|
-
b_target = c_target
|
|
1449
|
-
if b_source:
|
|
1450
|
-
result = Path(b_source)
|
|
1451
|
-
if e_target != b_target:
|
|
1452
|
-
b_subpath = e_target.removeprefix(b_target)
|
|
1453
|
-
result = result.joinpath(b_subpath.lstrip("/"))
|
|
1454
|
-
self._container_ephemeral_files_dir = result
|
|
1455
|
-
|
|
1456
|
-
def _find_self_container(
|
|
1457
|
-
self,
|
|
1458
|
-
self_container_id: str,
|
|
1459
|
-
) -> docker.models.containers.Container:
|
|
1405
|
+
def _find_self_container(self) -> docker.models.containers.Container | None:
|
|
1460
1406
|
"""
|
|
1461
1407
|
Find the current container if running inside a Docker container.
|
|
1462
1408
|
|
|
1463
|
-
Args:
|
|
1464
|
-
self_container_id:
|
|
1465
|
-
The container name or ID to find.
|
|
1466
|
-
|
|
1467
1409
|
Returns:
|
|
1468
1410
|
The Docker container if found, None otherwise.
|
|
1469
1411
|
|
|
@@ -1471,38 +1413,54 @@ class DockerDeployer(Deployer):
|
|
|
1471
1413
|
If failed to find itself.
|
|
1472
1414
|
|
|
1473
1415
|
"""
|
|
1474
|
-
if envs.
|
|
1475
|
-
|
|
1476
|
-
return
|
|
1477
|
-
|
|
1478
|
-
# Find containers that matches the hostname.
|
|
1479
|
-
containers: list[docker.models.containers.Container] = []
|
|
1480
|
-
for c in self._client.containers.list():
|
|
1481
|
-
# Ignore workload containers with host network enabled.
|
|
1482
|
-
if _LABEL_WORKLOAD in c.labels:
|
|
1483
|
-
continue
|
|
1484
|
-
# Ignore containers that do not match the hostname.
|
|
1485
|
-
if c.attrs["Config"].get("Hostname", "") != self_container_id:
|
|
1486
|
-
continue
|
|
1487
|
-
# Ignore containers that do not match the filter labels.
|
|
1488
|
-
if envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS and any(
|
|
1489
|
-
c.labels.get(k) != v
|
|
1490
|
-
for k, v in envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS.items()
|
|
1491
|
-
):
|
|
1492
|
-
continue
|
|
1493
|
-
containers.append(c)
|
|
1416
|
+
if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
|
|
1417
|
+
logger.debug("Mirrored deployment disabled")
|
|
1418
|
+
return None
|
|
1494
1419
|
|
|
1495
|
-
#
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
"
|
|
1420
|
+
# Get container ID or hostname.
|
|
1421
|
+
self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
|
|
1422
|
+
if not self_container_id:
|
|
1423
|
+
self_container_id = socket.gethostname()
|
|
1424
|
+
debug_log_warning(
|
|
1425
|
+
logger,
|
|
1426
|
+
"Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
|
|
1427
|
+
self_container_id,
|
|
1502
1428
|
)
|
|
1503
|
-
raise docker.errors.NotFound(msg)
|
|
1504
1429
|
|
|
1505
|
-
|
|
1430
|
+
if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
|
|
1431
|
+
# Directly get container.
|
|
1432
|
+
self_container = self._client.containers.get(self_container_id)
|
|
1433
|
+
else:
|
|
1434
|
+
# Find containers that matches the hostname.
|
|
1435
|
+
containers: list[docker.models.containers.Container] = []
|
|
1436
|
+
for c in self._client.containers.list():
|
|
1437
|
+
# Ignore workload containers with host network enabled.
|
|
1438
|
+
if _LABEL_WORKLOAD in c.labels:
|
|
1439
|
+
continue
|
|
1440
|
+
# Ignore containers that do not match the hostname.
|
|
1441
|
+
if c.attrs["Config"].get("Hostname", "") != self_container_id:
|
|
1442
|
+
continue
|
|
1443
|
+
# Ignore containers that do not match the filter labels.
|
|
1444
|
+
if envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS and any(
|
|
1445
|
+
c.labels.get(k) != v
|
|
1446
|
+
for k, v in envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS.items()
|
|
1447
|
+
):
|
|
1448
|
+
continue
|
|
1449
|
+
containers.append(c)
|
|
1450
|
+
|
|
1451
|
+
# Validate found containers.
|
|
1452
|
+
if len(containers) != 1:
|
|
1453
|
+
msg = (
|
|
1454
|
+
f"Found multiple Containers with the same hostname {self_container_id}, "
|
|
1455
|
+
if len(containers) > 1
|
|
1456
|
+
else f"Not found Container with hostname {self_container_id}, "
|
|
1457
|
+
"please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact Container name"
|
|
1458
|
+
)
|
|
1459
|
+
raise docker.errors.NotFound(msg)
|
|
1460
|
+
|
|
1461
|
+
self_container = containers[0]
|
|
1462
|
+
|
|
1463
|
+
return self_container
|
|
1506
1464
|
|
|
1507
1465
|
@_supported
|
|
1508
1466
|
def _create(self, workload: WorkloadPlan):
|
|
@@ -1528,7 +1486,7 @@ class DockerDeployer(Deployer):
|
|
|
1528
1486
|
msg = f"Invalid workload type: {type(workload)}"
|
|
1529
1487
|
raise TypeError(msg)
|
|
1530
1488
|
|
|
1531
|
-
self.
|
|
1489
|
+
self._prepare_mirrored_deployment()
|
|
1532
1490
|
|
|
1533
1491
|
if isinstance(workload, WorkloadPlan):
|
|
1534
1492
|
workload = DockerWorkloadPlan(**workload.__dict__)
|
|
@@ -1536,12 +1494,6 @@ class DockerDeployer(Deployer):
|
|
|
1536
1494
|
if logger.isEnabledFor(logging.DEBUG):
|
|
1537
1495
|
logger.debug("Creating workload:\n%s", workload.to_yaml())
|
|
1538
1496
|
|
|
1539
|
-
# Create ephemeral file if needed,
|
|
1540
|
-
# (container index, configured path): <actual filename>
|
|
1541
|
-
ephemeral_filename_mapping: dict[tuple[int, str] : str] = (
|
|
1542
|
-
self._create_ephemeral_files(workload)
|
|
1543
|
-
)
|
|
1544
|
-
|
|
1545
1497
|
# Create ephemeral volumes if needed,
|
|
1546
1498
|
# <configured volume name>: <actual volume name>
|
|
1547
1499
|
ephemeral_volume_name_mapping: dict[str, str] = self._create_ephemeral_volumes(
|
|
@@ -1554,7 +1506,6 @@ class DockerDeployer(Deployer):
|
|
|
1554
1506
|
# Create init/run containers.
|
|
1555
1507
|
init_containers, run_containers = self._create_containers(
|
|
1556
1508
|
workload,
|
|
1557
|
-
ephemeral_filename_mapping,
|
|
1558
1509
|
ephemeral_volume_name_mapping,
|
|
1559
1510
|
pause_container,
|
|
1560
1511
|
)
|
|
@@ -1696,17 +1647,6 @@ class DockerDeployer(Deployer):
|
|
|
1696
1647
|
msg = f"Failed to delete volumes for workload {name}{_detail_api_call_error(e)}"
|
|
1697
1648
|
raise OperationError(msg) from e
|
|
1698
1649
|
|
|
1699
|
-
# Remove all ephemeral files for the workload.
|
|
1700
|
-
try:
|
|
1701
|
-
for fp in envs.GPUSTACK_RUNTIME_DOCKER_EPHEMERAL_FILES_DIR.glob(
|
|
1702
|
-
f"{name}-*",
|
|
1703
|
-
):
|
|
1704
|
-
if fp.is_file():
|
|
1705
|
-
fp.unlink(missing_ok=True)
|
|
1706
|
-
except OSError as e:
|
|
1707
|
-
msg = f"Failed to delete ephemeral files for workload {name}"
|
|
1708
|
-
raise OperationError(msg) from e
|
|
1709
|
-
|
|
1710
1650
|
return workload
|
|
1711
1651
|
|
|
1712
1652
|
@_supported
|
|
@@ -1946,6 +1886,211 @@ class DockerDeployer(Deployer):
|
|
|
1946
1886
|
return output
|
|
1947
1887
|
return DockerWorkloadExecStream(output)
|
|
1948
1888
|
|
|
1889
|
+
@_supported
|
|
1890
|
+
def _inspect(
|
|
1891
|
+
self,
|
|
1892
|
+
name: WorkloadName,
|
|
1893
|
+
namespace: WorkloadNamespace | None = None,
|
|
1894
|
+
) -> str | None:
|
|
1895
|
+
"""
|
|
1896
|
+
Inspect a Docker workload.
|
|
1897
|
+
|
|
1898
|
+
Args:
|
|
1899
|
+
name:
|
|
1900
|
+
The name of the workload.
|
|
1901
|
+
namespace:
|
|
1902
|
+
The namespace of the workload.
|
|
1903
|
+
|
|
1904
|
+
Returns:
|
|
1905
|
+
The inspection result as a JSON string. None if not found.
|
|
1906
|
+
|
|
1907
|
+
Raises:
|
|
1908
|
+
UnsupportedError:
|
|
1909
|
+
If Docker is not supported in the current environment.
|
|
1910
|
+
OperationError:
|
|
1911
|
+
If the Docker workload fails to inspect.
|
|
1912
|
+
|
|
1913
|
+
"""
|
|
1914
|
+
workload = self._get(name=name, namespace=namespace)
|
|
1915
|
+
if not workload:
|
|
1916
|
+
return None
|
|
1917
|
+
|
|
1918
|
+
d_containers = getattr(workload, "_d_containers", [])
|
|
1919
|
+
if not d_containers:
|
|
1920
|
+
return None
|
|
1921
|
+
|
|
1922
|
+
result = []
|
|
1923
|
+
for c in d_containers:
|
|
1924
|
+
c_attrs = c.attrs
|
|
1925
|
+
# Mask sensitive environment variables
|
|
1926
|
+
if "Env" in c_attrs["Config"]:
|
|
1927
|
+
for i, env in enumerate(c_attrs["Config"]["Env"] or []):
|
|
1928
|
+
env_name, _ = env.split("=", maxsplit=1)
|
|
1929
|
+
if sensitive_env_var(env_name):
|
|
1930
|
+
c_attrs["Config"]["Env"][i] = f"{env_name}=******"
|
|
1931
|
+
result.append(c_attrs)
|
|
1932
|
+
return safe_json(result, indent=2)
|
|
1933
|
+
|
|
1934
|
+
def _find_self_container_for_endoscopy(self) -> docker.models.containers.Container:
|
|
1935
|
+
"""
|
|
1936
|
+
Find the self container for endoscopy.
|
|
1937
|
+
Only works in mirrored deployment mode.
|
|
1938
|
+
|
|
1939
|
+
Returns:
|
|
1940
|
+
The self container object.
|
|
1941
|
+
|
|
1942
|
+
Raises:
|
|
1943
|
+
UnsupportedError:
|
|
1944
|
+
If endoscopy is not supported in the current environment.
|
|
1945
|
+
|
|
1946
|
+
"""
|
|
1947
|
+
try:
|
|
1948
|
+
self_container = self._find_self_container()
|
|
1949
|
+
except docker.errors.APIError as e:
|
|
1950
|
+
msg = "Endoscopy is not supported in the current environment: Mirrored deployment enabled, but failed to get self Container"
|
|
1951
|
+
raise UnsupportedError(msg) from e
|
|
1952
|
+
except Exception as e:
|
|
1953
|
+
msg = "Endoscopy is not supported in the current environment: Failed to get self Container"
|
|
1954
|
+
raise UnsupportedError(msg) from e
|
|
1955
|
+
|
|
1956
|
+
if not self_container:
|
|
1957
|
+
msg = "Endoscopy is not supported in the current environment: Mirrored deployment disabled"
|
|
1958
|
+
raise UnsupportedError(msg)
|
|
1959
|
+
return self_container
|
|
1960
|
+
|
|
1961
|
+
def _endoscopic_logs(
|
|
1962
|
+
self,
|
|
1963
|
+
timestamps: bool = False,
|
|
1964
|
+
tail: int | None = None,
|
|
1965
|
+
since: int | None = None,
|
|
1966
|
+
follow: bool = False,
|
|
1967
|
+
) -> Generator[bytes | str, None, None] | bytes | str:
|
|
1968
|
+
"""
|
|
1969
|
+
Get the logs of the deployer itself.
|
|
1970
|
+
Only works in mirrored deployment mode.
|
|
1971
|
+
|
|
1972
|
+
Args:
|
|
1973
|
+
timestamps:
|
|
1974
|
+
Show timestamps in the logs.
|
|
1975
|
+
tail:
|
|
1976
|
+
Number of lines to show from the end of the logs.
|
|
1977
|
+
since:
|
|
1978
|
+
Show logs since the given epoch in seconds.
|
|
1979
|
+
follow:
|
|
1980
|
+
Whether to follow the logs.
|
|
1981
|
+
|
|
1982
|
+
Returns:
|
|
1983
|
+
The logs as a byte string or a generator yielding byte strings if follow is True.
|
|
1984
|
+
|
|
1985
|
+
Raises:
|
|
1986
|
+
UnsupportedError:
|
|
1987
|
+
If endoscopy is not supported in the current environment.
|
|
1988
|
+
OperationError:
|
|
1989
|
+
If the deployer fails to get logs.
|
|
1990
|
+
|
|
1991
|
+
"""
|
|
1992
|
+
self_container = self._find_self_container_for_endoscopy()
|
|
1993
|
+
|
|
1994
|
+
logs_options = {
|
|
1995
|
+
"timestamps": timestamps,
|
|
1996
|
+
"tail": tail if tail >= 0 else None,
|
|
1997
|
+
"since": since,
|
|
1998
|
+
"follow": follow,
|
|
1999
|
+
}
|
|
2000
|
+
|
|
2001
|
+
try:
|
|
2002
|
+
output = self_container.logs(
|
|
2003
|
+
stream=follow,
|
|
2004
|
+
**logs_options,
|
|
2005
|
+
)
|
|
2006
|
+
except docker.errors.APIError as e:
|
|
2007
|
+
msg = f"Failed to fetch logs for self Container {self_container.short_id}{_detail_api_call_error(e)}"
|
|
2008
|
+
raise OperationError(msg) from e
|
|
2009
|
+
else:
|
|
2010
|
+
return output
|
|
2011
|
+
|
|
2012
|
+
def _endoscopic_exec(
|
|
2013
|
+
self,
|
|
2014
|
+
detach: bool = True,
|
|
2015
|
+
command: list[str] | None = None,
|
|
2016
|
+
args: list[str] | None = None,
|
|
2017
|
+
) -> WorkloadExecStream | bytes | str:
|
|
2018
|
+
"""
|
|
2019
|
+
Execute a command in the deployer itself.
|
|
2020
|
+
Only works in mirrored deployment mode.
|
|
2021
|
+
|
|
2022
|
+
Args:
|
|
2023
|
+
detach:
|
|
2024
|
+
Whether to detach from the command.
|
|
2025
|
+
command:
|
|
2026
|
+
The command to execute.
|
|
2027
|
+
If not specified, use /bin/sh and implicitly attach.
|
|
2028
|
+
args:
|
|
2029
|
+
The arguments to pass to the command.
|
|
2030
|
+
|
|
2031
|
+
Returns:
|
|
2032
|
+
If detach is False, return a WorkloadExecStream.
|
|
2033
|
+
otherwise, return the output of the command as a byte string or string.
|
|
2034
|
+
|
|
2035
|
+
Raises:
|
|
2036
|
+
UnsupportedError:
|
|
2037
|
+
If endoscopy is not supported in the current environment.
|
|
2038
|
+
OperationError:
|
|
2039
|
+
If the deployer fails to execute the command.
|
|
2040
|
+
|
|
2041
|
+
"""
|
|
2042
|
+
self_container = self._find_self_container_for_endoscopy()
|
|
2043
|
+
|
|
2044
|
+
attach = not detach or not command
|
|
2045
|
+
exec_options = {
|
|
2046
|
+
"stdout": True,
|
|
2047
|
+
"stderr": True,
|
|
2048
|
+
"stdin": attach,
|
|
2049
|
+
"socket": attach,
|
|
2050
|
+
"tty": attach,
|
|
2051
|
+
"cmd": [*command, *(args or [])] if command else ["/bin/sh"],
|
|
2052
|
+
}
|
|
2053
|
+
|
|
2054
|
+
try:
|
|
2055
|
+
_, output = self_container.exec_run(
|
|
2056
|
+
detach=False,
|
|
2057
|
+
**exec_options,
|
|
2058
|
+
)
|
|
2059
|
+
except docker.errors.APIError as e:
|
|
2060
|
+
msg = f"Failed to exec command in self Container {self_container.short_id}{_detail_api_call_error(e)}"
|
|
2061
|
+
raise OperationError(msg) from e
|
|
2062
|
+
else:
|
|
2063
|
+
if not attach:
|
|
2064
|
+
return output
|
|
2065
|
+
return DockerWorkloadExecStream(output)
|
|
2066
|
+
|
|
2067
|
+
def _endoscopic_inspect(self) -> str:
|
|
2068
|
+
"""
|
|
2069
|
+
Inspect the deployer itself.
|
|
2070
|
+
Only works in mirrored deployment mode.
|
|
2071
|
+
|
|
2072
|
+
Returns:
|
|
2073
|
+
The inspection result.
|
|
2074
|
+
|
|
2075
|
+
Raises:
|
|
2076
|
+
UnsupportedError:
|
|
2077
|
+
If endoscopy is not supported in the current environment.
|
|
2078
|
+
OperationError:
|
|
2079
|
+
If the deployer fails to execute the command.
|
|
2080
|
+
|
|
2081
|
+
"""
|
|
2082
|
+
self_container = self._find_self_container_for_endoscopy()
|
|
2083
|
+
|
|
2084
|
+
c_attrs = self_container.attrs
|
|
2085
|
+
# Mask sensitive environment variables
|
|
2086
|
+
if "Env" in c_attrs["Config"]:
|
|
2087
|
+
for i, env in enumerate(c_attrs["Config"]["Env"] or []):
|
|
2088
|
+
env_name, _ = env.split("=", maxsplit=1)
|
|
2089
|
+
if sensitive_env_var(env_name):
|
|
2090
|
+
c_attrs["Config"]["Env"][i] = f"{env_name}=******"
|
|
2091
|
+
|
|
2092
|
+
return safe_json(c_attrs, indent=2)
|
|
2093
|
+
|
|
1949
2094
|
|
|
1950
2095
|
def _has_restart_policy(
|
|
1951
2096
|
container: docker.models.containers.Container,
|