konduktor-nightly 0.1.0.dev20250710105129__tar.gz → 0.1.0.dev20251103104940__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/PKG-INFO +1 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/__init__.py +4 -7
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/__init__.py +6 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/constants.py +21 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/deployment.py +204 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/deployment_utils.py +1351 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/backends/jobset.py +12 -8
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/jobset_utils.py +726 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/backends/pod_utils.py +499 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/cli.py +848 -66
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/config.py +1 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/launch.py +1 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/aws/s3.py +2 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/gcs.py +2 -5
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/execution.py +5 -2
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/kube_client.py +66 -6
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/logging.py +6 -4
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/aibrix-setup.yaml +430 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/apoxy-setup.yaml +184 -0
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/manifests/apoxy-setup2.yaml +98 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/resource.py +44 -3
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/serving.py +153 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/task.py +70 -1
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/templates/deployment.yaml.j2 +191 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/templates/jobset.yaml.j2 +9 -2
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/templates/pod.yaml.j2 +202 -17
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/accelerator_registry.py +1 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/base64_utils.py +2 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/common_utils.py +1 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/kubernetes_utils.py +41 -9
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/log_utils.py +217 -2
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/loki_utils.py +13 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/schemas.py +82 -1
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/ux_utils.py +36 -11
- konduktor_nightly-0.1.0.dev20251103104940/konduktor/utils/validator.py +461 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/pyproject.toml +1 -1
- konduktor_nightly-0.1.0.dev20250710105129/konduktor/backends/__init__.py +0 -8
- konduktor_nightly-0.1.0.dev20250710105129/konduktor/backends/jobset_utils.py +0 -591
- konduktor_nightly-0.1.0.dev20250710105129/konduktor/utils/validator.py +0 -91
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/README.md +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/aws.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/common.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/adaptors/gcp.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/authentication.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/backends/backend.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/check.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/node.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/controller/parse.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/README.md +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/backend/main.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/backend/sockets.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/.eslintrc.json +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/.gitignore +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/api/jobs/route.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/api/namespaces/route.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/Grafana.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/JobsData.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/LogsData.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavMenu.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavTabs.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/NavTabs2.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/SelectBtn.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/lib/utils.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/input.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/components/ui/select.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/favicon.ico +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/globals.css +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/jobs/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/layout.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/logs/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/app/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/jsconfig.json +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/next.config.mjs +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/package-lock.json +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/package.json +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/postcss.config.mjs +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/server.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/dashboard/frontend/tailwind.config.js +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/aws/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/data_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/gcp/utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/registry.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/storage.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/data/storage_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/controller_deployment.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/dashboard_deployment.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/dmesg_daemonset.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/manifests/pod_cleanup_controller.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/usage/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/usage/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/annotations.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/env_options.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/exceptions.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/kubernetes_enums.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/rich_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250710105129 → konduktor_nightly-0.1.0.dev20251103104940}/konduktor/utils/subprocess_utils.py +0 -0
|
@@ -5,16 +5,13 @@ import subprocess
|
|
|
5
5
|
|
|
6
6
|
from konduktor.execution import launch
|
|
7
7
|
from konduktor.resource import Resources
|
|
8
|
+
from konduktor.serving import Serving
|
|
8
9
|
from konduktor.task import Task
|
|
9
10
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
'launch',
|
|
12
|
-
'Resources',
|
|
13
|
-
'Task',
|
|
14
|
-
]
|
|
11
|
+
__all__ = ['launch', 'Resources', 'Task', 'Serving']
|
|
15
12
|
|
|
16
13
|
# Replaced with the current commit when building the wheels.
|
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
|
14
|
+
_KONDUKTOR_COMMIT_SHA = 'd5fddf4e144c4887227e1c6943c70bcd72d364d5'
|
|
18
15
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
|
19
16
|
|
|
20
17
|
|
|
@@ -48,5 +45,5 @@ def _get_git_commit():
|
|
|
48
45
|
|
|
49
46
|
|
|
50
47
|
__commit__ = _get_git_commit()
|
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
|
48
|
+
__version__ = '1.0.0.dev0.1.0.dev20251103104940'
|
|
52
49
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
KONDUKTOR_SSH_PORT = 2222
|
|
2
|
+
|
|
3
|
+
# Common labels used across JobSets and Deployments
|
|
4
|
+
JOB_NAME_LABEL = 'trainy.ai/job-name'
|
|
5
|
+
DEPLOYMENT_NAME_LABEL = 'trainy.ai/deployment-name'
|
|
6
|
+
AIBRIX_NAME_LABEL = 'model.aibrix.ai/name'
|
|
7
|
+
USERID_LABEL = 'trainy.ai/user-id'
|
|
8
|
+
USER_LABEL = 'trainy.ai/username'
|
|
9
|
+
ACCELERATOR_LABEL = 'trainy.ai/accelerator'
|
|
10
|
+
NUM_ACCELERATORS_LABEL = 'trainy.ai/num-accelerators'
|
|
11
|
+
MAX_EXECUTION_TIME_LABEL = 'kueue.x-k8s.io/max-exec-time-seconds'
|
|
12
|
+
|
|
13
|
+
# Start/stop/status related labels
|
|
14
|
+
STOP_USERID_LABEL = 'trainy.ai/stop-userid'
|
|
15
|
+
STOP_USERNAME_LABEL = 'trainy.ai/stop-username'
|
|
16
|
+
|
|
17
|
+
# Secret labels
|
|
18
|
+
SECRET_BASENAME_LABEL = 'trainy.ai/secret-basename'
|
|
19
|
+
SECRET_KIND_LABEL = 'trainy.ai/secret-kind'
|
|
20
|
+
SECRET_OWNER_LABEL = 'trainy.ai/secret-owner'
|
|
21
|
+
ROOT_NAME = 'trainy.ai/root-name'
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import typing
|
|
3
|
+
from typing import Dict, Optional
|
|
4
|
+
|
|
5
|
+
import colorama
|
|
6
|
+
|
|
7
|
+
if typing.TYPE_CHECKING:
|
|
8
|
+
import konduktor
|
|
9
|
+
from konduktor.data import storage as storage_lib
|
|
10
|
+
|
|
11
|
+
from kubernetes.client.exceptions import ApiException
|
|
12
|
+
|
|
13
|
+
from konduktor import config, kube_client, logging
|
|
14
|
+
from konduktor.backends import backend, deployment_utils, pod_utils
|
|
15
|
+
from konduktor.utils import kubernetes_utils, rich_utils, ux_utils
|
|
16
|
+
|
|
17
|
+
Path = str
|
|
18
|
+
logger = logging.get_logger(__file__)
|
|
19
|
+
|
|
20
|
+
POLL_INTERVAL = 5
|
|
21
|
+
DEFAULT_ATTACH_TIMEOUT = 300
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DeploymentError(Exception):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _wait_for_all_ready(namespace: str, name: str):
|
|
29
|
+
"""Wait for Deployment, Service, and Autoscaler readiness."""
|
|
30
|
+
time.sleep(2)
|
|
31
|
+
start = time.time()
|
|
32
|
+
timeout = config.get_nested(
|
|
33
|
+
('kubernetes', 'provision_timeout'),
|
|
34
|
+
default_value=DEFAULT_ATTACH_TIMEOUT,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
while True:
|
|
38
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
39
|
+
|
|
40
|
+
# Directly read objects instead of listing everything
|
|
41
|
+
try:
|
|
42
|
+
deployment = kube_client.apps_api(context).read_namespaced_deployment(
|
|
43
|
+
name=name, namespace=namespace
|
|
44
|
+
)
|
|
45
|
+
deployments_map = {name: deployment}
|
|
46
|
+
except ApiException:
|
|
47
|
+
deployments_map = {}
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
service = kube_client.core_api(context).read_namespaced_service(
|
|
51
|
+
name=name, namespace=namespace
|
|
52
|
+
)
|
|
53
|
+
services_map = {name: service}
|
|
54
|
+
except ApiException:
|
|
55
|
+
services_map = {}
|
|
56
|
+
|
|
57
|
+
autoscalers_map = {}
|
|
58
|
+
try:
|
|
59
|
+
autoscaler_obj = deployment_utils.get_autoscaler(namespace, name)
|
|
60
|
+
if autoscaler_obj:
|
|
61
|
+
# detect aibrix vs general from deployment labels
|
|
62
|
+
labels = (deployment.metadata.labels or {}) if deployment else {}
|
|
63
|
+
is_aibrix = deployment_utils.AIBRIX_NAME_LABEL in labels
|
|
64
|
+
if is_aibrix:
|
|
65
|
+
autoscalers_map[name] = {'kpa': autoscaler_obj}
|
|
66
|
+
else:
|
|
67
|
+
autoscalers_map[name] = {'hpa': autoscaler_obj}
|
|
68
|
+
except ApiException:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
status = deployment_utils.get_model_status(
|
|
72
|
+
name, deployments_map, services_map, autoscalers_map
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
is_ready = (
|
|
76
|
+
status['deployment'] == 'ready'
|
|
77
|
+
and status['service'] == 'ready'
|
|
78
|
+
and (status['autoscaler'] == 'ready' or status['autoscaler'] is None)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
states = {
|
|
82
|
+
'Deployment': status['deployment'],
|
|
83
|
+
'Service': status['service'],
|
|
84
|
+
'Autoscaler': status['autoscaler'],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Figure out which components are missing
|
|
88
|
+
missing_parts = [name for name, state in states.items() if state == 'missing']
|
|
89
|
+
|
|
90
|
+
if missing_parts:
|
|
91
|
+
deployment_utils.delete_serving_specs(name, namespace)
|
|
92
|
+
missing_str = ', '.join(missing_parts)
|
|
93
|
+
raise DeploymentError(
|
|
94
|
+
f'Deployment failed. '
|
|
95
|
+
f'The following components are missing: {missing_str}.'
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if is_ready:
|
|
99
|
+
logger.info(
|
|
100
|
+
f'task {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
101
|
+
f'{name}{colorama.Style.RESET_ALL} ready'
|
|
102
|
+
)
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
if timeout != -1 and time.time() - start > timeout:
|
|
106
|
+
logger.error(
|
|
107
|
+
f'{colorama.Style.BRIGHT}{colorama.Fore.RED}'
|
|
108
|
+
f'Model timed out waiting for readiness.'
|
|
109
|
+
f'{colorama.Style.RESET_ALL}'
|
|
110
|
+
f'Final status:\n{status}'
|
|
111
|
+
)
|
|
112
|
+
deployment_utils.delete_serving_specs(name, namespace)
|
|
113
|
+
raise DeploymentError(
|
|
114
|
+
f'Model failed to become ready within {timeout} seconds.\n'
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
time.sleep(POLL_INTERVAL)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class DeploymentBackend(backend.Backend):
|
|
121
|
+
NAME = 'deployment'
|
|
122
|
+
|
|
123
|
+
def check_resources_fit_cluster(self, task: 'konduktor.Task') -> bool:
|
|
124
|
+
return True
|
|
125
|
+
|
|
126
|
+
def add_storage_objects(self, task: 'konduktor.Task') -> None:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
def register_info(self, **kwargs) -> None:
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
def _sync_file_mounts(
|
|
133
|
+
self,
|
|
134
|
+
all_file_mounts: Optional[Dict[Path, Path]],
|
|
135
|
+
storage_mounts: Optional[Dict[Path, 'storage_lib.Storage']],
|
|
136
|
+
) -> None:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
def _sync_workdir(self, workdir: str) -> None:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
def _post_execute(self) -> None:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
def _execute(
|
|
146
|
+
self,
|
|
147
|
+
task: 'konduktor.Task',
|
|
148
|
+
detach_run: bool = False,
|
|
149
|
+
dryrun: bool = False,
|
|
150
|
+
) -> Optional[str]:
|
|
151
|
+
"""Execute a task by launching a long-running Deployment."""
|
|
152
|
+
|
|
153
|
+
pod_spec = pod_utils.create_pod_spec(task)
|
|
154
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
155
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
156
|
+
|
|
157
|
+
if not dryrun and task.serving:
|
|
158
|
+
logger.debug(f'[DEBUG] Creating deployment for task: {task.name}')
|
|
159
|
+
deployment_utils.create_deployment(
|
|
160
|
+
namespace=namespace,
|
|
161
|
+
task=task,
|
|
162
|
+
pod_spec=pod_spec['kubernetes']['pod_config'],
|
|
163
|
+
dryrun=dryrun,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
logger.debug(f'[DEBUG] Creating service for task: {task.name}')
|
|
167
|
+
deployment_utils.create_service(
|
|
168
|
+
namespace=namespace,
|
|
169
|
+
task=task,
|
|
170
|
+
dryrun=dryrun,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Create podautoscaler for non-general deployments
|
|
174
|
+
logger.debug(f'[DEBUG] Creating podautoscaler for task: {task.name}')
|
|
175
|
+
deployment_utils.create_pod_autoscaler(
|
|
176
|
+
namespace=namespace,
|
|
177
|
+
task=task,
|
|
178
|
+
dryrun=dryrun,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# HTTP Add-on resources for general deployments
|
|
182
|
+
logger.debug(
|
|
183
|
+
f'[DEBUG] Creating HTTP Add-on resources for task: {task.name}'
|
|
184
|
+
)
|
|
185
|
+
deployment_utils.create_http_addon_resources(
|
|
186
|
+
namespace=namespace,
|
|
187
|
+
task=task,
|
|
188
|
+
dryrun=dryrun,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if not dryrun and not detach_run:
|
|
192
|
+
with ux_utils.print_exception_no_traceback():
|
|
193
|
+
with rich_utils.safe_status(
|
|
194
|
+
ux_utils.spinner_message('waiting for resources to be ready.\n')
|
|
195
|
+
):
|
|
196
|
+
_wait_for_all_ready(namespace, task.name)
|
|
197
|
+
logger.info(
|
|
198
|
+
f"Model '{task.name}' is ready. "
|
|
199
|
+
f'Run `konduktor serve status` for details.'
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
logger.info('detaching from run.')
|
|
203
|
+
|
|
204
|
+
return task.name
|