gpustack-runtime 0.1.39__py3-none-any.whl → 0.1.39.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,17 +12,19 @@ from . import deployer, detector
12
12
  from ._version import commit_id, version
13
13
  from .cmds import (
14
14
  CopyImagesSubCommand,
15
- CreateRunnerWorkloadSubCommand,
16
15
  CreateWorkloadSubCommand,
17
16
  DeleteWorkloadsSubCommand,
18
17
  DeleteWorkloadSubCommand,
19
18
  DetectDevicesSubCommand,
19
+ ExecSelfSubCommand,
20
20
  ExecWorkloadSubCommand,
21
21
  GetDevicesTopologySubCommand,
22
22
  GetWorkloadSubCommand,
23
+ InspectSelfSubCommand,
23
24
  InspectWorkloadSubCommand,
24
25
  ListImagesSubCommand,
25
26
  ListWorkloadsSubCommand,
27
+ LogsSelfSubCommand,
26
28
  LogsWorkloadSubCommand,
27
29
  SaveImagesSubCommand,
28
30
  )
@@ -59,7 +61,6 @@ def main():
59
61
  subcommand_parser = parser.add_subparsers(
60
62
  help="gpustack-runtime command helpers",
61
63
  )
62
- CreateRunnerWorkloadSubCommand.register(subcommand_parser)
63
64
  CreateWorkloadSubCommand.register(subcommand_parser)
64
65
  DeleteWorkloadSubCommand.register(subcommand_parser)
65
66
  DeleteWorkloadsSubCommand.register(subcommand_parser)
@@ -73,6 +74,9 @@ def main():
73
74
  ListImagesSubCommand.register(subcommand_parser)
74
75
  SaveImagesSubCommand.register(subcommand_parser)
75
76
  CopyImagesSubCommand.register(subcommand_parser)
77
+ LogsSelfSubCommand.register(subcommand_parser)
78
+ ExecSelfSubCommand.register(subcommand_parser)
79
+ InspectSelfSubCommand.register(subcommand_parser)
76
80
 
77
81
  # Autocomplete
78
82
  argcomplete.autocomplete(parser)
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.39'
31
- __version_tuple__ = version_tuple = (0, 1, 39)
30
+ __version__ = version = '0.1.39.post2'
31
+ __version_tuple__ = version_tuple = (0, 1, 39, 'post2')
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -1 +1 @@
1
- git_commit = "c8c93ed"
1
+ git_commit = "e044bab"
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from .deployer import (
4
- CreateRunnerWorkloadSubCommand,
5
4
  CreateWorkloadSubCommand,
6
5
  DeleteWorkloadsSubCommand,
7
6
  DeleteWorkloadSubCommand,
7
+ ExecSelfSubCommand,
8
8
  ExecWorkloadSubCommand,
9
9
  GetWorkloadSubCommand,
10
+ InspectSelfSubCommand,
10
11
  InspectWorkloadSubCommand,
11
12
  ListWorkloadsSubCommand,
13
+ LogsSelfSubCommand,
12
14
  LogsWorkloadSubCommand,
13
15
  )
14
16
  from .detector import DetectDevicesSubCommand, GetDevicesTopologySubCommand
@@ -23,17 +25,19 @@ from .images import (
23
25
 
24
26
  __all__ = [
25
27
  "CopyImagesSubCommand",
26
- "CreateRunnerWorkloadSubCommand",
27
28
  "CreateWorkloadSubCommand",
28
29
  "DeleteWorkloadSubCommand",
29
30
  "DeleteWorkloadsSubCommand",
30
31
  "DetectDevicesSubCommand",
32
+ "ExecSelfSubCommand",
31
33
  "ExecWorkloadSubCommand",
32
34
  "GetDevicesTopologySubCommand",
33
35
  "GetWorkloadSubCommand",
36
+ "InspectSelfSubCommand",
34
37
  "InspectWorkloadSubCommand",
35
38
  "ListImagesSubCommand",
36
39
  "ListWorkloadsSubCommand",
40
+ "LogsSelfSubCommand",
37
41
  "LogsWorkloadSubCommand",
38
42
  "PlatformedImage",
39
43
  "SaveImagesSubCommand",
@@ -23,14 +23,17 @@ from ..deployer import (
23
23
  WorkloadPlan,
24
24
  WorkloadStatus,
25
25
  WorkloadStatusStateEnum,
26
+ async_logs_self,
26
27
  async_logs_workload,
27
28
  create_workload,
28
29
  delete_workload,
30
+ exec_self,
29
31
  exec_workload,
30
32
  get_workload,
33
+ inspect_self,
34
+ inspect_workload,
31
35
  list_workloads,
32
36
  )
33
- from ..deployer.__utils__ import safe_json, safe_yaml
34
37
  from ..detector import supported_backends
35
38
  from .__types__ import SubCommand
36
39
 
@@ -78,251 +81,6 @@ _IGNORE_ENVS_SUFFIX = (
78
81
  "_DRIVER_CAPABILITIES",
79
82
  )
80
83
 
81
- _IGNORE_SENSITIVE_ENVS_SUFFIX = (
82
- "_KEY",
83
- "_key",
84
- "_TOKEN",
85
- "_token",
86
- "_SECRET",
87
- "_secret",
88
- "_PASSWORD",
89
- "_password",
90
- "_PASS",
91
- "_pass",
92
- )
93
-
94
-
95
- class CreateRunnerWorkloadSubCommand(SubCommand):
96
- """
97
- Command to create a runner workload deployment.
98
- """
99
-
100
- backend: str
101
- device: str
102
- command_script: str | None
103
- port: int
104
- host_network: bool
105
- check: bool
106
- namespace: str
107
- service: str
108
- version: str
109
- name: str
110
- volume: str
111
- extra_args: list[str]
112
-
113
- @staticmethod
114
- def register(parser: _SubParsersAction):
115
- deploy_parser = parser.add_parser(
116
- "create-runner",
117
- help="Create a runner workload deployment",
118
- )
119
-
120
- deploy_parser.add_argument(
121
- "--backend",
122
- type=str,
123
- help="Backend to use (default: detect from current environment)",
124
- choices=supported_backends(),
125
- )
126
-
127
- deploy_parser.add_argument(
128
- "--device",
129
- type=str,
130
- help="Device to use, multiple devices join by comma (default: all devices)",
131
- default="all",
132
- )
133
-
134
- deploy_parser.add_argument(
135
- "--command-script-file",
136
- type=str,
137
- help="Path of command script for the workload",
138
- )
139
-
140
- deploy_parser.add_argument(
141
- "--port",
142
- type=int,
143
- help="Port to expose",
144
- )
145
-
146
- deploy_parser.add_argument(
147
- "--host-network",
148
- action="store_true",
149
- help="Use host network (default: False)",
150
- default=False,
151
- )
152
-
153
- deploy_parser.add_argument(
154
- "--check",
155
- action="store_true",
156
- help="Enable health check, needs --port (default: False)",
157
- default=False,
158
- )
159
-
160
- deploy_parser.add_argument(
161
- "--namespace",
162
- type=str,
163
- help="Namespace of the runner",
164
- )
165
-
166
- deploy_parser.add_argument(
167
- "service",
168
- type=str,
169
- help="Service of the runner",
170
- )
171
-
172
- deploy_parser.add_argument(
173
- "version",
174
- type=str,
175
- help="Version of the runner",
176
- )
177
-
178
- deploy_parser.add_argument(
179
- "volume",
180
- type=str,
181
- help="Volume to mount",
182
- )
183
-
184
- deploy_parser.add_argument(
185
- "extra_args",
186
- nargs=REMAINDER,
187
- help="Extra arguments for the runner",
188
- )
189
-
190
- deploy_parser.set_defaults(func=CreateRunnerWorkloadSubCommand)
191
-
192
- def __init__(self, args: Namespace):
193
- self.backend = args.backend
194
- self.device = args.device
195
- self.command_script = None
196
- self.port = args.port
197
- self.host_network = args.host_network
198
- self.check = args.check
199
- self.namespace = args.namespace
200
- self.service = args.service
201
- self.version = args.version
202
- self.name = f"{args.service}-{args.version}".lower().replace(".", "-")
203
- self.volume = args.volume
204
- self.extra_args = args.extra_args
205
-
206
- if not self.name or not self.volume:
207
- msg = "The name and volume arguments are required."
208
- raise ValueError(msg)
209
-
210
- if args.command_script_file:
211
- command_script_file = Path(args.command_script_file)
212
- if not command_script_file.is_file():
213
- msg = f"The command script file '{command_script_file}' does not exist."
214
- raise ValueError(msg)
215
- self.command_script = command_script_file.read_text(
216
- encoding="utf-8",
217
- ).strip()
218
-
219
- def run(self):
220
- env = [
221
- ContainerEnv(
222
- name=name,
223
- value=value,
224
- )
225
- for name, value in os.environ.items()
226
- if not name.startswith(_IGNORE_ENVS_PREFIX)
227
- and not name.endswith(_IGNORE_ENVS_SUFFIX)
228
- ]
229
- if self.backend:
230
- resources = ContainerResources(
231
- **{
232
- v: self.device
233
- for k, v in envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.items()
234
- if k == self.backend
235
- },
236
- )
237
- else:
238
- resources = ContainerResources(
239
- **{
240
- envs.GPUSTACK_RUNTIME_DEPLOY_AUTOMAP_RESOURCE_KEY: self.device,
241
- },
242
- )
243
- mounts = [
244
- ContainerMount(
245
- path=self.volume,
246
- ),
247
- ]
248
- execution = ContainerExecution(
249
- command_script=self.command_script,
250
- args=self.extra_args,
251
- )
252
- ports = (
253
- [
254
- ContainerPort(
255
- internal=self.port,
256
- ),
257
- ]
258
- if self.port
259
- else None
260
- )
261
- checks = (
262
- [
263
- ContainerCheck(
264
- delay=60,
265
- interval=10,
266
- timeout=5,
267
- retries=6,
268
- tcp=ContainerCheckTCP(port=self.port),
269
- teardown=True,
270
- ),
271
- ]
272
- if self.check and self.port
273
- else None
274
- )
275
- plan = WorkloadPlan(
276
- name=self.name,
277
- namespace=self.namespace,
278
- host_network=self.host_network,
279
- containers=[
280
- Container(
281
- restart_policy=(
282
- ContainerRestartPolicyEnum.NEVER
283
- if not self.check
284
- else ContainerRestartPolicyEnum.ALWAYS
285
- ),
286
- image=f"gpustack/runner:{self.backend if self.backend else 'Host'}X.Y-{self.service}{self.version}",
287
- name=self.name,
288
- envs=env,
289
- resources=resources,
290
- mounts=mounts,
291
- execution=execution,
292
- ports=ports,
293
- checks=checks,
294
- ),
295
- ],
296
- )
297
- create_workload(plan)
298
- print(f"Created workload '{self.name}'.")
299
-
300
- while True:
301
- st = get_workload(
302
- name=self.name,
303
- namespace=self.namespace,
304
- )
305
- if st and st.state not in (
306
- WorkloadStatusStateEnum.PENDING,
307
- WorkloadStatusStateEnum.INITIALIZING,
308
- ):
309
- break
310
- time.sleep(1)
311
-
312
- print("\033[2J\033[H", end="")
313
-
314
- async def stream_logs():
315
- logs_result = await async_logs_workload(
316
- name=self.name,
317
- namespace=self.namespace,
318
- tail=-1,
319
- follow=True,
320
- )
321
- async for line in logs_result:
322
- print(line.decode("utf-8").rstrip())
323
-
324
- asyncio.run(stream_logs())
325
-
326
84
 
327
85
  class CreateWorkloadSubCommand(SubCommand):
328
86
  """
@@ -358,8 +116,7 @@ class CreateWorkloadSubCommand(SubCommand):
358
116
  deploy_parser.add_argument(
359
117
  "--device",
360
118
  type=str,
361
- help="Device to use, multiple devices join by comma (default: all devices)",
362
- default="all",
119
+ help="Device to use, multiple devices join by comma, all for all devices",
363
120
  )
364
121
 
365
122
  deploy_parser.add_argument(
@@ -456,20 +213,22 @@ class CreateWorkloadSubCommand(SubCommand):
456
213
  if not name.startswith(_IGNORE_ENVS_PREFIX)
457
214
  and not name.endswith(_IGNORE_ENVS_SUFFIX)
458
215
  ]
459
- if self.backend:
460
- resources = ContainerResources(
461
- **{
462
- v: self.device
463
- for k, v in envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.items()
464
- if k == self.backend
465
- },
466
- )
467
- else:
468
- resources = ContainerResources(
469
- **{
470
- envs.GPUSTACK_RUNTIME_DEPLOY_AUTOMAP_RESOURCE_KEY: self.device,
471
- },
472
- )
216
+ resources = None
217
+ if self.device:
218
+ if self.backend:
219
+ resources = ContainerResources(
220
+ **{
221
+ v: self.device
222
+ for k, v in envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.items()
223
+ if k == self.backend
224
+ },
225
+ )
226
+ else:
227
+ resources = ContainerResources(
228
+ **{
229
+ envs.GPUSTACK_RUNTIME_DEPLOY_AUTOMAP_RESOURCE_KEY: self.device,
230
+ },
231
+ )
473
232
  mounts = [
474
233
  ContainerMount(
475
234
  path=self.volume,
@@ -1015,35 +774,175 @@ class InspectWorkloadSubCommand(SubCommand):
1015
774
  raise ValueError(msg)
1016
775
 
1017
776
  def run(self):
1018
- workload = get_workload(self.name, self.namespace)
1019
- if not workload:
777
+ result = inspect_workload(self.name, self.namespace)
778
+ if not result:
1020
779
  print(f"Workload '{self.name}' not found.")
1021
780
  return
1022
781
 
1023
- if hasattr(workload, "_d_containers"):
1024
- result = []
1025
- for c in workload._d_containers: # noqa: SLF001
1026
- c_attrs = c.attrs
1027
- # Mask sensitive environment variables
1028
- if "Env" in c_attrs["Config"]:
1029
- for i, env in enumerate(c_attrs["Config"]["Env"] or []):
1030
- env_name, _ = env.split("=", maxsplit=1)
1031
- if env_name.endswith(_IGNORE_SENSITIVE_ENVS_SUFFIX):
1032
- c_attrs["Config"]["Env"][i] = f"{env_name}=******"
1033
- result.append(c_attrs)
1034
- print(safe_json(result, indent=2))
1035
- elif hasattr(workload, "_k_pod"):
1036
- k_pod = workload._k_pod # noqa: SLF001
1037
- # Remove managed fields to reduce output size
1038
- k_pod.metadata.managed_fields = None
1039
- # Mask sensitive environment variables
1040
- for c in k_pod.spec.containers:
1041
- for env in c.env or []:
1042
- if env.name.endswith(_IGNORE_SENSITIVE_ENVS_SUFFIX):
1043
- env.value = "******"
1044
- print(safe_yaml(k_pod, indent=2, sort_keys=False))
1045
- else:
1046
- print("No detailed inspection information available for this workload.")
782
+ print(result)
783
+
784
+
785
+ class LogsSelfSubCommand(SubCommand):
786
+ """
787
+ Command to get the logs of the deployer itself.
788
+ """
789
+
790
+ tail: int
791
+ follow: bool
792
+
793
+ @staticmethod
794
+ def register(parser: _SubParsersAction):
795
+ logs_parser = parser.add_parser(
796
+ "logs-self",
797
+ help="Get the logs of the deployer itself",
798
+ )
799
+
800
+ logs_parser.add_argument(
801
+ "--tail",
802
+ type=int,
803
+ help="Number of lines to show from the end of the logs (default: -1)",
804
+ default=-1,
805
+ )
806
+
807
+ logs_parser.add_argument(
808
+ "--follow",
809
+ "-f",
810
+ action="store_true",
811
+ help="Follow the logs in real-time",
812
+ )
813
+
814
+ logs_parser.set_defaults(func=LogsSelfSubCommand)
815
+
816
+ def __init__(self, args: Namespace):
817
+ self.tail = args.tail
818
+ self.follow = args.follow
819
+
820
+ def run(self):
821
+ print("\033[2J\033[H", end="")
822
+
823
+ async def stream_logs():
824
+ logs_result = await async_logs_self(
825
+ tail=self.tail,
826
+ follow=self.follow,
827
+ )
828
+ if self.follow:
829
+ async for line in logs_result:
830
+ print(line.decode("utf-8").rstrip())
831
+ elif isinstance(logs_result, str):
832
+ print(logs_result.rstrip())
833
+ else:
834
+ print(logs_result.decode("utf-8").rstrip())
835
+
836
+ asyncio.run(stream_logs())
837
+
838
+
839
+ class ExecSelfSubCommand(SubCommand):
840
+ """
841
+ Command to execute a command in the deployer itself.
842
+ """
843
+
844
+ interactive: bool
845
+ command: list[str]
846
+
847
+ @staticmethod
848
+ def register(parser: _SubParsersAction):
849
+ exec_parser = parser.add_parser(
850
+ "exec-self",
851
+ help="Execute a command in the deployer itself",
852
+ )
853
+
854
+ exec_parser.add_argument(
855
+ "--interactive",
856
+ "-i",
857
+ action="store_true",
858
+ help="Interactive mode",
859
+ )
860
+
861
+ exec_parser.add_argument(
862
+ "command",
863
+ nargs=REMAINDER,
864
+ help="Command to execute in the workload",
865
+ )
866
+
867
+ exec_parser.set_defaults(func=ExecSelfSubCommand)
868
+
869
+ def __init__(self, args: Namespace):
870
+ self.interactive = args.interactive
871
+ self.command = args.command
872
+
873
+ def run(self):
874
+ try:
875
+ if self.interactive:
876
+ from dockerpty import io, pty # noqa: PLC0415
877
+ except ImportError:
878
+ print(
879
+ "dockerpty is required for interactive mode. "
880
+ "Please install it via 'pip install dockerpty'.",
881
+ )
882
+ sys.exit(1)
883
+
884
+ print("\033[2J\033[H", end="")
885
+ exec_result = exec_self(
886
+ detach=not self.interactive,
887
+ command=self.command,
888
+ )
889
+
890
+ # Non-interactive mode: print output and exit with the command's exit code
891
+
892
+ if not self.interactive:
893
+ if isinstance(exec_result, bytes):
894
+ print(exec_result.decode("utf-8").rstrip())
895
+ else:
896
+ print(exec_result)
897
+ return
898
+
899
+ # Interactive mode: use dockerpty to attach to the exec session
900
+
901
+ class ExecOperation(pty.Operation):
902
+ def __init__(self, sock):
903
+ self.stdin = sys.stdin
904
+ self.stdout = sys.stdout
905
+ self.sock = io.Stream(sock)
906
+
907
+ def israw(self, **_):
908
+ return self.stdout.isatty()
909
+
910
+ def start(self, **_):
911
+ sock = self.sockets()
912
+ return [
913
+ io.Pump(io.Stream(self.stdin), sock, wait_for_output=False),
914
+ io.Pump(sock, io.Stream(self.stdout), propagate_close=False),
915
+ ]
916
+
917
+ def resize(self, height, width, **_):
918
+ pass
919
+
920
+ def sockets(self):
921
+ return self.sock
922
+
923
+ exec_op = ExecOperation(exec_result)
924
+ pty.PseudoTerminal(None, exec_op).start()
925
+
926
+
927
+ class InspectSelfSubCommand(SubCommand):
928
+ """
929
+ Command to diagnose the deployer itself.
930
+ """
931
+
932
+ @staticmethod
933
+ def register(parser: _SubParsersAction):
934
+ inspect_parser = parser.add_parser(
935
+ "inspect-self",
936
+ help="Inspect the deployer itself",
937
+ )
938
+
939
+ inspect_parser.set_defaults(func=InspectSelfSubCommand)
940
+
941
+ def __init__(self, args: Namespace):
942
+ pass
943
+
944
+ def run(self):
945
+ print(inspect_self())
1047
946
 
1048
947
 
1049
948
  def format_workloads_json(sts: list[WorkloadStatus]) -> str: