gpustack-runtime 0.1.39.post2__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. gpustack_runtime/__main__.py +7 -3
  2. gpustack_runtime/_version.py +2 -2
  3. gpustack_runtime/_version_appendix.py +1 -1
  4. gpustack_runtime/cmds/__init__.py +2 -0
  5. gpustack_runtime/cmds/deployer.py +84 -2
  6. gpustack_runtime/cmds/images.py +2 -0
  7. gpustack_runtime/deployer/__init__.py +2 -0
  8. gpustack_runtime/deployer/__types__.py +52 -28
  9. gpustack_runtime/deployer/__utils__.py +99 -112
  10. gpustack_runtime/deployer/cdi/__init__.py +81 -0
  11. gpustack_runtime/deployer/cdi/__types__.py +667 -0
  12. gpustack_runtime/deployer/cdi/thead.py +103 -0
  13. gpustack_runtime/deployer/docker.py +42 -24
  14. gpustack_runtime/deployer/kuberentes.py +8 -4
  15. gpustack_runtime/deployer/podman.py +41 -23
  16. gpustack_runtime/detector/__init__.py +62 -3
  17. gpustack_runtime/detector/__types__.py +11 -0
  18. gpustack_runtime/detector/__utils__.py +23 -0
  19. gpustack_runtime/detector/amd.py +17 -9
  20. gpustack_runtime/detector/hygon.py +6 -1
  21. gpustack_runtime/detector/iluvatar.py +20 -5
  22. gpustack_runtime/detector/mthreads.py +8 -12
  23. gpustack_runtime/detector/nvidia.py +365 -168
  24. gpustack_runtime/detector/pyacl/__init__.py +9 -1
  25. gpustack_runtime/detector/pyamdgpu/__init__.py +8 -0
  26. gpustack_runtime/detector/pycuda/__init__.py +9 -1
  27. gpustack_runtime/detector/pydcmi/__init__.py +9 -2
  28. gpustack_runtime/detector/pyhgml/__init__.py +5879 -0
  29. gpustack_runtime/detector/pyhgml/libhgml.so +0 -0
  30. gpustack_runtime/detector/pyhgml/libuki.so +0 -0
  31. gpustack_runtime/detector/pyhsa/__init__.py +9 -0
  32. gpustack_runtime/detector/pyixml/__init__.py +89 -164
  33. gpustack_runtime/detector/pyrocmcore/__init__.py +42 -24
  34. gpustack_runtime/detector/pyrocmsmi/__init__.py +141 -138
  35. gpustack_runtime/detector/thead.py +733 -0
  36. gpustack_runtime/envs.py +128 -55
  37. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/METADATA +4 -2
  38. gpustack_runtime-0.1.40.dist-info/RECORD +55 -0
  39. gpustack_runtime/detector/pymtml/__init__.py +0 -770
  40. gpustack_runtime-0.1.39.post2.dist-info/RECORD +0 -49
  41. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/WHEEL +0 -0
  42. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/entry_points.txt +0 -0
  43. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,667 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from abc import ABC, abstractmethod
4
+ from functools import lru_cache
5
+ from typing import TYPE_CHECKING, Literal
6
+
7
+ from ... import envs
8
+ from ...detector import (
9
+ ManufacturerEnum,
10
+ manufacturer_to_backend,
11
+ )
12
+ from ..__utils__ import load_yaml_or_json, safe_json, safe_yaml
13
+
14
+ if TYPE_CHECKING:
15
+ from pathlib import Path
16
+
17
+ from ...detector import Devices
18
+
19
+ _DEFAULT_CDI_VERSION = "0.5.0"
20
+
21
+
22
+ class ConfigDeviceNode(dict):
23
+ """
24
+ CDI device node configuration.
25
+
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ path: str,
31
+ host_path: str | None = None,
32
+ permissions: str | None = None,
33
+ type_: str = "c",
34
+ major: int | None = None,
35
+ minor: int | None = None,
36
+ uid: int | None = None,
37
+ gid: int | None = None,
38
+ ):
39
+ """
40
+ Initialize a CDI container edit configuration.
41
+
42
+ Args:
43
+ path:
44
+ The path inside the container.
45
+ host_path:
46
+ The path on the host system. Optional.
47
+ permissions:
48
+ The permissions for the device. Optional.
49
+ type_:
50
+ The type of the device. Default is "c".
51
+ major:
52
+ The major number of the device. Optional.
53
+ minor:
54
+ The minor number of the device. Optional.
55
+ uid:
56
+ The user ID for the device. Optional.
57
+ gid:
58
+ The group ID for the device. Optional.
59
+
60
+ """
61
+ if not path:
62
+ msg = "path cannot be empty"
63
+ raise ValueError(msg)
64
+
65
+ super().__init__()
66
+
67
+ self["path"] = path
68
+ if host_path is not None:
69
+ self["hostPath"] = host_path
70
+ if permissions is not None:
71
+ self["permissions"] = permissions
72
+ if type_ is not None:
73
+ self["type"] = type_
74
+ if major is not None and minor is not None:
75
+ self["major"] = major
76
+ self["minor"] = minor
77
+ if uid is not None:
78
+ self["uid"] = uid
79
+ if gid is not None:
80
+ self["gid"] = gid
81
+
82
+ @property
83
+ def path(self) -> str:
84
+ """
85
+ Return the path inside the container.
86
+
87
+ Returns:
88
+ The path inside the container.
89
+
90
+ """
91
+ return self["path"]
92
+
93
+ @property
94
+ def host_path(self) -> str | None:
95
+ """
96
+ Return the host path if present.
97
+
98
+ Returns:
99
+ The host path if present, else None.
100
+
101
+ """
102
+ return self.get("hostPath", None)
103
+
104
+ @property
105
+ def permissions(self) -> str | None:
106
+ """
107
+ Return the permissions if present.
108
+
109
+ Returns:
110
+ The permissions if present, else None.
111
+
112
+ """
113
+ return self.get("permissions", None)
114
+
115
+ @property
116
+ def type_(self) -> str:
117
+ """
118
+ Return the type of the device.
119
+
120
+ Returns:
121
+ The type of the device.
122
+
123
+ """
124
+ return self["type"]
125
+
126
+ @property
127
+ def major(self) -> int | None:
128
+ """
129
+ Return the major number if present.
130
+
131
+ Returns:
132
+ The major number if present, else None.
133
+
134
+ """
135
+ return self.get("major", None)
136
+
137
+ @property
138
+ def minor(self) -> int | None:
139
+ """
140
+ Return the minor number if present.
141
+
142
+ Returns:
143
+ The minor number if present, else None.
144
+
145
+ """
146
+ return self.get("minor", None)
147
+
148
+ @property
149
+ def uid(self) -> int | None:
150
+ """
151
+ Return the user ID if present.
152
+
153
+ Returns:
154
+ The user ID if present, else None.
155
+
156
+ """
157
+ return self.get("uid", None)
158
+
159
+ @property
160
+ def gid(self) -> int | None:
161
+ """
162
+ Return the group ID if present.
163
+
164
+ Returns:
165
+ The group ID if present, else None.
166
+
167
+ """
168
+ return self.get("gid", None)
169
+
170
+
171
+ class ConfigMount(dict):
172
+ """
173
+ CDI mount configuration.
174
+
175
+ """
176
+
177
+ def __init__(
178
+ self,
179
+ host_path: str,
180
+ container_path: str,
181
+ options: list[str] | None = None,
182
+ type_: str | None = None,
183
+ ):
184
+ """
185
+ Initialize a CDI mount configuration.
186
+
187
+ Args:
188
+ host_path:
189
+ The path on the host system.
190
+ container_path:
191
+ The path inside the container.
192
+ options:
193
+ The mount options. Optional.
194
+ type_:
195
+ The mount type. Optional.
196
+
197
+ """
198
+ if not host_path:
199
+ msg = "host_path cannot be empty"
200
+ raise ValueError(msg)
201
+ if not container_path:
202
+ msg = "container_path cannot be empty"
203
+ raise ValueError(msg)
204
+
205
+ super().__init__()
206
+
207
+ self["hostPath"] = host_path
208
+ self["containerPath"] = container_path
209
+ if options is not None:
210
+ self["options"] = options
211
+ if type_ is not None:
212
+ self["type"] = type_
213
+
214
+ @property
215
+ def host_path(self) -> str:
216
+ """
217
+ Return the path on the host system.
218
+ """
219
+ return self["hostPath"]
220
+
221
+ @property
222
+ def container_path(self) -> str:
223
+ """
224
+ Return the path inside the container.
225
+ """
226
+ return self["containerPath"]
227
+
228
+ @property
229
+ def options(self) -> list[str] | None:
230
+ """
231
+ Return the mount options if present.
232
+ """
233
+ return self.get("options", None)
234
+
235
+ @property
236
+ def type_(self) -> str | None:
237
+ """
238
+ Return the mount type if present.
239
+ """
240
+ return self.get("type", None)
241
+
242
+
243
+ class ConfigHook(dict):
244
+ """
245
+ CDI hook configuration.
246
+
247
+ """
248
+
249
+ def __init__(
250
+ self,
251
+ hook_name: str,
252
+ path: str,
253
+ args: list[str] | None = None,
254
+ env: list[str] | None = None,
255
+ timeout: int | None = None,
256
+ ):
257
+ """
258
+ Initialize a CDI hook configuration.
259
+
260
+ Args:
261
+ hook_name:
262
+ The name of the hook.
263
+ path:
264
+ The path to the hook executable.
265
+ args:
266
+ The arguments for the hook. Optional.
267
+ env:
268
+ The environment variables for the hook. Optional.
269
+ timeout:
270
+ The timeout for the hook in seconds. Optional.
271
+
272
+ """
273
+ if not hook_name:
274
+ msg = "hook_name cannot be empty"
275
+ raise ValueError(msg)
276
+ if not path:
277
+ msg = "path cannot be empty"
278
+ raise ValueError(msg)
279
+
280
+ super().__init__()
281
+
282
+ self["hookName"] = hook_name
283
+ self["path"] = path
284
+ if args is not None:
285
+ self["args"] = args
286
+ if env is not None:
287
+ self["env"] = env
288
+ if timeout is not None:
289
+ self["timeout"] = timeout
290
+
291
+ @property
292
+ def hook_name(self) -> str:
293
+ """
294
+ Return the name of the hook.
295
+ """
296
+ return self["hookName"]
297
+
298
+ @property
299
+ def path(self) -> str:
300
+ """
301
+ Return the path to the hook executable.
302
+ """
303
+ return self["path"]
304
+
305
+ @property
306
+ def args(self) -> list[str] | None:
307
+ """
308
+ Return the arguments for the hook if present.
309
+ """
310
+ return self.get("args", None)
311
+
312
+ @property
313
+ def env(self) -> list[str] | None:
314
+ """
315
+ Return the environment variables for the hook if present.
316
+ """
317
+ return self.get("env", None)
318
+
319
+ @property
320
+ def timeout(self) -> int | None:
321
+ """
322
+ Return the timeout for the hook in seconds if present.
323
+ """
324
+ return self.get("timeout", None)
325
+
326
+
327
+ class ConfigContainerEdits(dict):
328
+ """
329
+ CDI container edits configuration.
330
+
331
+ """
332
+
333
+ def __init__(
334
+ self,
335
+ env: list[str] | None = None,
336
+ device_nodes: list[ConfigDeviceNode | str] | None = None,
337
+ mounts: list[ConfigMount] | None = None,
338
+ hooks: list[ConfigHook] | None = None,
339
+ ):
340
+ """
341
+ Initialize a CDI container edits configuration.
342
+
343
+ Args:
344
+ env:
345
+ The environment variables to set. Optional.
346
+ device_nodes:
347
+ The device nodes to add. Optional.
348
+ mounts:
349
+ The mounts to add. Optional.
350
+ hooks:
351
+ The hooks to add. Optional.
352
+
353
+ """
354
+ if not (device_nodes or mounts or hooks):
355
+ msg = "At least one of device_nodes, mounts, or hooks must be provided"
356
+ raise ValueError(msg)
357
+
358
+ super().__init__()
359
+
360
+ if env is not None:
361
+ self["env"] = env
362
+ if device_nodes is not None:
363
+ self["deviceNodes"] = [
364
+ n if not isinstance(n, str) else ConfigDeviceNode(n)
365
+ for n in device_nodes
366
+ ]
367
+ if mounts is not None:
368
+ self["mounts"] = mounts
369
+ if hooks is not None:
370
+ self["hooks"] = hooks
371
+
372
+ @property
373
+ def env(self) -> list[str] | None:
374
+ """
375
+ Return the environment variables if present.
376
+
377
+ Returns:
378
+ The environment variables if present, else None.
379
+
380
+ """
381
+ return self.get("env", None)
382
+
383
+ @property
384
+ def device_nodes(self) -> list[ConfigDeviceNode] | None:
385
+ """
386
+ Return the device nodes if present.
387
+
388
+ Returns:
389
+ The device nodes if present, else None.
390
+
391
+ """
392
+ return self.get("deviceNodes", None)
393
+
394
+ @property
395
+ def mounts(self) -> list[ConfigMount] | None:
396
+ """
397
+ Return the mounts if present.
398
+
399
+ Returns:
400
+ The mounts if present, else None.
401
+
402
+ """
403
+ return self.get("mounts", None)
404
+
405
+ @property
406
+ def hooks(self) -> list[ConfigHook] | None:
407
+ """
408
+ Return the hooks if present.
409
+
410
+ Returns:
411
+ The hooks if present, else None.
412
+
413
+ """
414
+ return self.get("hooks", None)
415
+
416
+
417
+ class ConfigDevice(dict):
418
+ """
419
+ CDI device configuration.
420
+
421
+ """
422
+
423
+ def __init__(
424
+ self,
425
+ name: str,
426
+ container_edits: ConfigContainerEdits,
427
+ annotations: dict[str, str] | None = None,
428
+ ):
429
+ """
430
+ Initialize a CDI device configuration.
431
+
432
+ Args:
433
+ name:
434
+ The name of the device.
435
+ container_edits:
436
+ The container edits for the device.
437
+ annotations:
438
+ Optional annotations for the device.
439
+
440
+ """
441
+ super().__init__()
442
+
443
+ self["name"] = name
444
+ self["containerEdits"] = container_edits
445
+ if annotations is not None:
446
+ self["annotations"] = annotations
447
+
448
+ @property
449
+ def name(self) -> str:
450
+ """
451
+ Return the name of the device.
452
+
453
+ Returns:
454
+ The name of the device.
455
+
456
+ """
457
+ return self["name"]
458
+
459
+ @property
460
+ def container_edits(self) -> ConfigContainerEdits:
461
+ """
462
+ Return the container edits of the device.
463
+
464
+ Returns:
465
+ The container edits.
466
+
467
+ """
468
+ return self["containerEdits"]
469
+
470
+ @property
471
+ def annotations(self) -> dict[str, str] | None:
472
+ """
473
+ Return the annotations of the device.
474
+
475
+ Returns:
476
+ The annotations if present, else None.
477
+
478
+ """
479
+ return self.get("annotations", None)
480
+
481
+
482
+ class Config(dict):
483
+ """
484
+ CDI configuration.
485
+ """
486
+
487
+ @classmethod
488
+ def from_file(cls, path: str | Path, strict: bool = False) -> Config:
489
+ """
490
+ Load a CDI configuration from a file.
491
+
492
+ Args:
493
+ path:
494
+ The path to the CDI configuration file.
495
+ strict:
496
+ Whether to enable strict mode.
497
+
498
+ Returns:
499
+ The loaded CDI configuration.
500
+
501
+ """
502
+ data = load_yaml_or_json(path)
503
+ if isinstance(data, list):
504
+ data_size = len(data)
505
+ if data_size == 0:
506
+ msg = f"Parsed CDI config is empty, check the content of {path}"
507
+ raise RuntimeError(msg)
508
+ if data_size > 1 and strict:
509
+ msg = f"Parsed CDI config has multiple objects, check the content of {path}"
510
+ raise RuntimeError(msg)
511
+ data = data[0]
512
+
513
+ return cls(
514
+ kind=data["kind"],
515
+ devices=data["devices"],
516
+ cdi_version=data.get("cdiVersion", _DEFAULT_CDI_VERSION),
517
+ annotations=data.get("annotations", None),
518
+ )
519
+
520
+ def __init__(
521
+ self,
522
+ kind: str,
523
+ devices: list[ConfigDevice],
524
+ cdi_version: str = _DEFAULT_CDI_VERSION,
525
+ annotations: dict[str, str] | None = None,
526
+ ):
527
+ """
528
+ Initialize a CDI configuration.
529
+
530
+ Args:
531
+ kind: The kind of the CDI configuration.
532
+ devices: The list of devices in the CDI configuration.
533
+ cdi_version: The CDI version. Default is "0.5.0".
534
+ annotations: Optional annotations for the CDI configuration.
535
+
536
+ """
537
+ super().__init__()
538
+
539
+ self["cdiVersion"] = cdi_version
540
+ self["kind"] = kind
541
+ self["devices"] = devices
542
+ if annotations is not None:
543
+ self["annotations"] = annotations
544
+
545
+ @property
546
+ def devices(self) -> list[ConfigDevice]:
547
+ """
548
+ Return the list of devices in the CDI configuration.
549
+
550
+ Returns:
551
+ The list of devices.
552
+
553
+ """
554
+ return self["devices"]
555
+
556
+ @property
557
+ def kind(self) -> str:
558
+ """
559
+ Return the kind of the CDI configuration.
560
+
561
+ Returns:
562
+ The kind of the CDI configuration.
563
+
564
+ """
565
+ return self["kind"]
566
+
567
+ @property
568
+ def cdi_version(self) -> str:
569
+ """
570
+ Return the CDI version of the configuration.
571
+
572
+ Returns:
573
+ The CDI version.
574
+
575
+ """
576
+ return self["cdiVersion"]
577
+
578
+ @property
579
+ def annotations(self) -> dict[str, str] | None:
580
+ """
581
+ Return the annotations of the CDI configuration.
582
+
583
+ Returns:
584
+ The annotations if present, else None.
585
+
586
+ """
587
+ return self.get("annotations", None)
588
+
589
+ def stringify(self, _format: Literal["yaml", "json"] = "yaml") -> str:
590
+ """
591
+ Stringify the CDI configuration to the specified format.
592
+
593
+ Args:
594
+ _format:
595
+ The format of the CDI configuration.
596
+ Either "yaml" or "json". Default is "yaml".
597
+
598
+ Returns:
599
+ The string representation of the CDI configuration in the specified format.
600
+
601
+ """
602
+ if _format == "yaml":
603
+ return safe_yaml(self, indent=2, sort_keys=False)
604
+ return safe_json(self, indent=2, sort_keys=False)
605
+
606
+
607
+ @lru_cache
608
+ def manufacturer_to_config_kind(manufacturer: ManufacturerEnum) -> str | None:
609
+ """
610
+ Map a manufacturer to its corresponding CDI config kind,
611
+ based on `GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY`
612
+ and `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI` envs.
613
+
614
+ Args:
615
+ manufacturer:
616
+ The manufacturer enum.
617
+
618
+ Returns:
619
+ The corresponding CDI config kind as a string.
620
+ None if not found.
621
+
622
+ """
623
+ backend = manufacturer_to_backend(manufacturer)
624
+ resource_key = envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.get(backend)
625
+ if not resource_key:
626
+ return None
627
+ kind = envs.GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI.get(resource_key)
628
+ return kind
629
+
630
+
631
+ class Generator(ABC):
632
+ """
633
+ Base class for all CDI generators.
634
+ """
635
+
636
+ manufacturer: ManufacturerEnum = ManufacturerEnum.UNKNOWN
637
+ """
638
+ Manufacturer of the detector.
639
+ """
640
+
641
+ def __init__(self, manufacturer: ManufacturerEnum):
642
+ self.manufacturer = manufacturer
643
+
644
+ @property
645
+ def name(self) -> str:
646
+ """
647
+ Return the name of the generator.
648
+
649
+ Returns:
650
+ The name of the generator.
651
+
652
+ """
653
+ return str(self.manufacturer)
654
+
655
+ @abstractmethod
656
+ def generate(self, devices: Devices | None = None) -> Config | None:
657
+ """
658
+ Generate the CDI specification.
659
+
660
+ Args:
661
+ devices: The devices to generate the CDI specification for.
662
+
663
+ Returns:
664
+ The Config object, or None if not supported.
665
+
666
+ """
667
+ raise NotImplementedError