konduktor-nightly 0.1.0.dev20250710105129__tar.gz → 0.1.0.dev20251103104940__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

Files changed (109) hide show
  1. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/PKG-INFO +1 -1
  2. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/__init__.py +4 -7
  3. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/__init__.py +6 -0
  4. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/constants.py +21 -0
  5. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/deployment.py +204 -0
  6. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/deployment_utils.py +1351 -0
  7. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/backends/jobset.py +12 -8
  8. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/jobset_utils.py +726 -0
  9. konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/pod_utils.py +499 -0
  10. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/cli.py +848 -66
  11. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/config.py +1 -1
  12. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/launch.py +1 -1
  13. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/aws/s3.py +2 -1
  14. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/gcs.py +2 -5
  15. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/execution.py +5 -2
  16. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/kube_client.py +66 -6
  17. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/logging.py +6 -4
  18. konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/aibrix-setup.yaml +430 -0
  19. konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/apoxy-setup.yaml +184 -0
  20. konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/apoxy-setup2.yaml +98 -0
  21. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/resource.py +44 -3
  22. konduktor_nightly-0.1.0.dev20251103104940/konduktor/serving.py +153 -0
  23. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/task.py +70 -1
  24. konduktor_nightly-0.1.0.dev20251103104940/konduktor/templates/deployment.yaml.j2 +191 -0
  25. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/templates/jobset.yaml.j2 +9 -2
  26. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/templates/pod.yaml.j2 +202 -17
  27. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/accelerator_registry.py +1 -1
  28. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/base64_utils.py +2 -0
  29. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/common_utils.py +1 -1
  30. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/kubernetes_utils.py +41 -9
  31. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/log_utils.py +217 -2
  32. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/loki_utils.py +13 -1
  33. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/schemas.py +82 -1
  34. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/ux_utils.py +36 -11
  35. konduktor_nightly-0.1.0.dev20251103104940/konduktor/utils/validator.py +461 -0
  36. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/pyproject.toml +1 -1
  37. konduktor_nightly-0.1.0.dev20250710105129/konduktor/backends/__init__.py +0 -8
  38. konduktor_nightly-0.1.0.dev20250710105129/konduktor/backends/jobset_utils.py +0 -591
  39. konduktor_nightly-0.1.0.dev20250710105129/konduktor/utils/validator.py +0 -91
  40. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/LICENSE +0 -0
  41. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/README.md +0 -0
  42. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/__init__.py +0 -0
  43. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/aws.py +0 -0
  44. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/common.py +0 -0
  45. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/gcp.py +0 -0
  46. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/authentication.py +0 -0
  47. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/backends/backend.py +0 -0
  48. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/check.py +0 -0
  49. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/constants.py +0 -0
  50. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/__init__.py +0 -0
  51. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/constants.py +0 -0
  52. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/node.py +0 -0
  53. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/parse.py +0 -0
  54. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/README.md +0 -0
  55. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/backend/main.py +0 -0
  56. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/backend/sockets.py +0 -0
  57. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/.eslintrc.json +0 -0
  58. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/.gitignore +0 -0
  59. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/api/jobs/route.js +0 -0
  60. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/api/namespaces/route.js +0 -0
  61. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/Grafana.jsx +0 -0
  62. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/JobsData.jsx +0 -0
  63. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/LogsData.jsx +0 -0
  64. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavMenu.jsx +0 -0
  65. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavTabs.jsx +0 -0
  66. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavTabs2.jsx +0 -0
  67. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/SelectBtn.jsx +0 -0
  68. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/lib/utils.js +0 -0
  69. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +0 -0
  70. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/input.jsx +0 -0
  71. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +0 -0
  72. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/select.jsx +0 -0
  73. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/favicon.ico +0 -0
  74. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/globals.css +0 -0
  75. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/jobs/page.js +0 -0
  76. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/layout.js +0 -0
  77. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/logs/page.js +0 -0
  78. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/page.js +0 -0
  79. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/jsconfig.json +0 -0
  80. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/next.config.mjs +0 -0
  81. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/package-lock.json +0 -0
  82. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/package.json +0 -0
  83. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/postcss.config.mjs +0 -0
  84. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/server.js +0 -0
  85. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/tailwind.config.js +0 -0
  86. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/__init__.py +0 -0
  87. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/aws/__init__.py +0 -0
  88. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/constants.py +0 -0
  89. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/data_utils.py +0 -0
  90. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/__init__.py +0 -0
  91. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/constants.py +0 -0
  92. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/utils.py +0 -0
  93. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/registry.py +0 -0
  94. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/storage.py +0 -0
  95. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/storage_utils.py +0 -0
  96. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/controller_deployment.yaml +0 -0
  97. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/dashboard_deployment.yaml +0 -0
  98. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/dmesg_daemonset.yaml +0 -0
  99. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/pod_cleanup_controller.yaml +0 -0
  100. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/usage/__init__.py +0 -0
  101. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/usage/constants.py +0 -0
  102. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/__init__.py +0 -0
  103. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/annotations.py +0 -0
  104. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/constants.py +0 -0
  105. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/env_options.py +0 -0
  106. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/exceptions.py +0 -0
  107. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/kubernetes_enums.py +0 -0
  108. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/rich_utils.py +0 -0
  109. {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/subprocess_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250710105129
3
+ Version: 0.1.0.dev20251103104940
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -5,16 +5,13 @@ import subprocess
5
5
 
6
6
  from konduktor.execution import launch
7
7
  from konduktor.resource import Resources
8
+ from konduktor.serving import Serving
8
9
  from konduktor.task import Task
9
10
 
10
- __all__ = [
11
- 'launch',
12
- 'Resources',
13
- 'Task',
14
- ]
11
+ __all__ = ['launch', 'Resources', 'Task', 'Serving']
15
12
 
16
13
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = '5760f2c18ed487270e9244d22c7209eee12821c1'
14
+ _KONDUKTOR_COMMIT_SHA = 'd5fddf4e144c4887227e1c6943c70bcd72d364d5'
18
15
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
16
 
20
17
 
@@ -48,5 +45,5 @@ def _get_git_commit():
48
45
 
49
46
 
50
47
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250710105129'
48
+ __version__ = '1.0.0.dev0.1.0.dev20251103104940'
52
49
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -0,0 +1,6 @@
1
+ """Batch job backends"""
2
+
3
+ from konduktor.backends.deployment import DeploymentBackend
4
+ from konduktor.backends.jobset import JobsetBackend
5
+
6
+ __all__ = ['Backend', 'JobsetBackend', 'DeploymentBackend']
@@ -0,0 +1,21 @@
1
+ KONDUKTOR_SSH_PORT = 2222
2
+
3
+ # Common labels used across JobSets and Deployments
4
+ JOB_NAME_LABEL = 'trainy.ai/job-name'
5
+ DEPLOYMENT_NAME_LABEL = 'trainy.ai/deployment-name'
6
+ AIBRIX_NAME_LABEL = 'model.aibrix.ai/name'
7
+ USERID_LABEL = 'trainy.ai/user-id'
8
+ USER_LABEL = 'trainy.ai/username'
9
+ ACCELERATOR_LABEL = 'trainy.ai/accelerator'
10
+ NUM_ACCELERATORS_LABEL = 'trainy.ai/num-accelerators'
11
+ MAX_EXECUTION_TIME_LABEL = 'kueue.x-k8s.io/max-exec-time-seconds'
12
+
13
+ # Start/stop/status related labels
14
+ STOP_USERID_LABEL = 'trainy.ai/stop-userid'
15
+ STOP_USERNAME_LABEL = 'trainy.ai/stop-username'
16
+
17
+ # Secret labels
18
+ SECRET_BASENAME_LABEL = 'trainy.ai/secret-basename'
19
+ SECRET_KIND_LABEL = 'trainy.ai/secret-kind'
20
+ SECRET_OWNER_LABEL = 'trainy.ai/secret-owner'
21
+ ROOT_NAME = 'trainy.ai/root-name'
@@ -0,0 +1,204 @@
1
+ import time
2
+ import typing
3
+ from typing import Dict, Optional
4
+
5
+ import colorama
6
+
7
+ if typing.TYPE_CHECKING:
8
+ import konduktor
9
+ from konduktor.data import storage as storage_lib
10
+
11
+ from kubernetes.client.exceptions import ApiException
12
+
13
+ from konduktor import config, kube_client, logging
14
+ from konduktor.backends import backend, deployment_utils, pod_utils
15
+ from konduktor.utils import kubernetes_utils, rich_utils, ux_utils
16
+
17
+ Path = str
18
+ logger = logging.get_logger(__file__)
19
+
20
+ POLL_INTERVAL = 5
21
+ DEFAULT_ATTACH_TIMEOUT = 300
22
+
23
+
24
+ class DeploymentError(Exception):
25
+ pass
26
+
27
+
28
+ def _wait_for_all_ready(namespace: str, name: str):
29
+ """Wait for Deployment, Service, and Autoscaler readiness."""
30
+ time.sleep(2)
31
+ start = time.time()
32
+ timeout = config.get_nested(
33
+ ('kubernetes', 'provision_timeout'),
34
+ default_value=DEFAULT_ATTACH_TIMEOUT,
35
+ )
36
+
37
+ while True:
38
+ context = kubernetes_utils.get_current_kube_config_context_name()
39
+
40
+ # Directly read objects instead of listing everything
41
+ try:
42
+ deployment = kube_client.apps_api(context).read_namespaced_deployment(
43
+ name=name, namespace=namespace
44
+ )
45
+ deployments_map = {name: deployment}
46
+ except ApiException:
47
+ deployments_map = {}
48
+
49
+ try:
50
+ service = kube_client.core_api(context).read_namespaced_service(
51
+ name=name, namespace=namespace
52
+ )
53
+ services_map = {name: service}
54
+ except ApiException:
55
+ services_map = {}
56
+
57
+ autoscalers_map = {}
58
+ try:
59
+ autoscaler_obj = deployment_utils.get_autoscaler(namespace, name)
60
+ if autoscaler_obj:
61
+ # detect aibrix vs general from deployment labels
62
+ labels = (deployment.metadata.labels or {}) if deployment else {}
63
+ is_aibrix = deployment_utils.AIBRIX_NAME_LABEL in labels
64
+ if is_aibrix:
65
+ autoscalers_map[name] = {'kpa': autoscaler_obj}
66
+ else:
67
+ autoscalers_map[name] = {'hpa': autoscaler_obj}
68
+ except ApiException:
69
+ pass
70
+
71
+ status = deployment_utils.get_model_status(
72
+ name, deployments_map, services_map, autoscalers_map
73
+ )
74
+
75
+ is_ready = (
76
+ status['deployment'] == 'ready'
77
+ and status['service'] == 'ready'
78
+ and (status['autoscaler'] == 'ready' or status['autoscaler'] is None)
79
+ )
80
+
81
+ states = {
82
+ 'Deployment': status['deployment'],
83
+ 'Service': status['service'],
84
+ 'Autoscaler': status['autoscaler'],
85
+ }
86
+
87
+ # Figure out which components are missing
88
+ missing_parts = [name for name, state in states.items() if state == 'missing']
89
+
90
+ if missing_parts:
91
+ deployment_utils.delete_serving_specs(name, namespace)
92
+ missing_str = ', '.join(missing_parts)
93
+ raise DeploymentError(
94
+ f'Deployment failed. '
95
+ f'The following components are missing: {missing_str}.'
96
+ )
97
+
98
+ if is_ready:
99
+ logger.info(
100
+ f'task {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
101
+ f'{name}{colorama.Style.RESET_ALL} ready'
102
+ )
103
+ return
104
+
105
+ if timeout != -1 and time.time() - start > timeout:
106
+ logger.error(
107
+ f'{colorama.Style.BRIGHT}{colorama.Fore.RED}'
108
+ f'Model timed out waiting for readiness.'
109
+ f'{colorama.Style.RESET_ALL}'
110
+ f'Final status:\n{status}'
111
+ )
112
+ deployment_utils.delete_serving_specs(name, namespace)
113
+ raise DeploymentError(
114
+ f'Model failed to become ready within {timeout} seconds.\n'
115
+ )
116
+
117
+ time.sleep(POLL_INTERVAL)
118
+
119
+
120
+ class DeploymentBackend(backend.Backend):
121
+ NAME = 'deployment'
122
+
123
+ def check_resources_fit_cluster(self, task: 'konduktor.Task') -> bool:
124
+ return True
125
+
126
+ def add_storage_objects(self, task: 'konduktor.Task') -> None:
127
+ pass
128
+
129
+ def register_info(self, **kwargs) -> None:
130
+ pass
131
+
132
+ def _sync_file_mounts(
133
+ self,
134
+ all_file_mounts: Optional[Dict[Path, Path]],
135
+ storage_mounts: Optional[Dict[Path, 'storage_lib.Storage']],
136
+ ) -> None:
137
+ pass
138
+
139
+ def _sync_workdir(self, workdir: str) -> None:
140
+ pass
141
+
142
+ def _post_execute(self) -> None:
143
+ pass
144
+
145
+ def _execute(
146
+ self,
147
+ task: 'konduktor.Task',
148
+ detach_run: bool = False,
149
+ dryrun: bool = False,
150
+ ) -> Optional[str]:
151
+ """Execute a task by launching a long-running Deployment."""
152
+
153
+ pod_spec = pod_utils.create_pod_spec(task)
154
+ context = kubernetes_utils.get_current_kube_config_context_name()
155
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
156
+
157
+ if not dryrun and task.serving:
158
+ logger.debug(f'[DEBUG] Creating deployment for task: {task.name}')
159
+ deployment_utils.create_deployment(
160
+ namespace=namespace,
161
+ task=task,
162
+ pod_spec=pod_spec['kubernetes']['pod_config'],
163
+ dryrun=dryrun,
164
+ )
165
+
166
+ logger.debug(f'[DEBUG] Creating service for task: {task.name}')
167
+ deployment_utils.create_service(
168
+ namespace=namespace,
169
+ task=task,
170
+ dryrun=dryrun,
171
+ )
172
+
173
+ # Create podautoscaler for non-general deployments
174
+ logger.debug(f'[DEBUG] Creating podautoscaler for task: {task.name}')
175
+ deployment_utils.create_pod_autoscaler(
176
+ namespace=namespace,
177
+ task=task,
178
+ dryrun=dryrun,
179
+ )
180
+
181
+ # HTTP Add-on resources for general deployments
182
+ logger.debug(
183
+ f'[DEBUG] Creating HTTP Add-on resources for task: {task.name}'
184
+ )
185
+ deployment_utils.create_http_addon_resources(
186
+ namespace=namespace,
187
+ task=task,
188
+ dryrun=dryrun,
189
+ )
190
+
191
+ if not dryrun and not detach_run:
192
+ with ux_utils.print_exception_no_traceback():
193
+ with rich_utils.safe_status(
194
+ ux_utils.spinner_message('waiting for resources to be ready.\n')
195
+ ):
196
+ _wait_for_all_ready(namespace, task.name)
197
+ logger.info(
198
+ f"Model '{task.name}' is ready. "
199
+ f'Run `konduktor serve status` for details.'
200
+ )
201
+ else:
202
+ logger.info('detaching from run.')
203
+
204
+ return task.name