konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +49 -0
- konduktor/adaptors/__init__.py +0 -0
- konduktor/adaptors/aws.py +221 -0
- konduktor/adaptors/common.py +118 -0
- konduktor/adaptors/gcp.py +126 -0
- konduktor/authentication.py +124 -0
- konduktor/backends/__init__.py +6 -0
- konduktor/backends/backend.py +86 -0
- konduktor/backends/constants.py +21 -0
- konduktor/backends/deployment.py +204 -0
- konduktor/backends/deployment_utils.py +1351 -0
- konduktor/backends/jobset.py +225 -0
- konduktor/backends/jobset_utils.py +726 -0
- konduktor/backends/pod_utils.py +501 -0
- konduktor/check.py +184 -0
- konduktor/cli.py +1945 -0
- konduktor/config.py +420 -0
- konduktor/constants.py +36 -0
- konduktor/controller/__init__.py +0 -0
- konduktor/controller/constants.py +56 -0
- konduktor/controller/launch.py +44 -0
- konduktor/controller/node.py +116 -0
- konduktor/controller/parse.py +111 -0
- konduktor/dashboard/README.md +30 -0
- konduktor/dashboard/backend/main.py +169 -0
- konduktor/dashboard/backend/sockets.py +154 -0
- konduktor/dashboard/frontend/.eslintrc.json +3 -0
- konduktor/dashboard/frontend/.gitignore +36 -0
- konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
- konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
- konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
- konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
- konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
- konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
- konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
- konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
- konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
- konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
- konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
- konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
- konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
- konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
- konduktor/dashboard/frontend/app/favicon.ico +0 -0
- konduktor/dashboard/frontend/app/globals.css +120 -0
- konduktor/dashboard/frontend/app/jobs/page.js +10 -0
- konduktor/dashboard/frontend/app/layout.js +22 -0
- konduktor/dashboard/frontend/app/logs/page.js +11 -0
- konduktor/dashboard/frontend/app/page.js +12 -0
- konduktor/dashboard/frontend/jsconfig.json +7 -0
- konduktor/dashboard/frontend/next.config.mjs +4 -0
- konduktor/dashboard/frontend/package-lock.json +6687 -0
- konduktor/dashboard/frontend/package.json +37 -0
- konduktor/dashboard/frontend/postcss.config.mjs +8 -0
- konduktor/dashboard/frontend/server.js +64 -0
- konduktor/dashboard/frontend/tailwind.config.js +17 -0
- konduktor/data/__init__.py +9 -0
- konduktor/data/aws/__init__.py +15 -0
- konduktor/data/aws/s3.py +1138 -0
- konduktor/data/constants.py +7 -0
- konduktor/data/data_utils.py +268 -0
- konduktor/data/gcp/__init__.py +19 -0
- konduktor/data/gcp/constants.py +42 -0
- konduktor/data/gcp/gcs.py +994 -0
- konduktor/data/gcp/utils.py +9 -0
- konduktor/data/registry.py +19 -0
- konduktor/data/storage.py +812 -0
- konduktor/data/storage_utils.py +535 -0
- konduktor/execution.py +447 -0
- konduktor/kube_client.py +237 -0
- konduktor/logging.py +111 -0
- konduktor/manifests/aibrix-setup.yaml +430 -0
- konduktor/manifests/apoxy-setup.yaml +184 -0
- konduktor/manifests/apoxy-setup2.yaml +98 -0
- konduktor/manifests/controller_deployment.yaml +69 -0
- konduktor/manifests/dashboard_deployment.yaml +131 -0
- konduktor/manifests/dmesg_daemonset.yaml +57 -0
- konduktor/manifests/pod_cleanup_controller.yaml +129 -0
- konduktor/resource.py +546 -0
- konduktor/serving.py +153 -0
- konduktor/task.py +949 -0
- konduktor/templates/deployment.yaml.j2 +191 -0
- konduktor/templates/jobset.yaml.j2 +43 -0
- konduktor/templates/pod.yaml.j2 +563 -0
- konduktor/usage/__init__.py +0 -0
- konduktor/usage/constants.py +21 -0
- konduktor/utils/__init__.py +0 -0
- konduktor/utils/accelerator_registry.py +17 -0
- konduktor/utils/annotations.py +62 -0
- konduktor/utils/base64_utils.py +95 -0
- konduktor/utils/common_utils.py +426 -0
- konduktor/utils/constants.py +5 -0
- konduktor/utils/env_options.py +55 -0
- konduktor/utils/exceptions.py +234 -0
- konduktor/utils/kubernetes_enums.py +8 -0
- konduktor/utils/kubernetes_utils.py +763 -0
- konduktor/utils/log_utils.py +467 -0
- konduktor/utils/loki_utils.py +102 -0
- konduktor/utils/rich_utils.py +123 -0
- konduktor/utils/schemas.py +625 -0
- konduktor/utils/subprocess_utils.py +273 -0
- konduktor/utils/ux_utils.py +247 -0
- konduktor/utils/validator.py +461 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,1351 @@
|
|
|
1
|
+
"""Deployment utils: wraps CRUD operations for deployments"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
import typing
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import colorama
|
|
10
|
+
from kubernetes.client.exceptions import ApiException
|
|
11
|
+
from rich import box
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
from rich.text import Text
|
|
15
|
+
|
|
16
|
+
import konduktor
|
|
17
|
+
from konduktor import config as konduktor_config
|
|
18
|
+
from konduktor import kube_client, logging
|
|
19
|
+
from konduktor.backends import constants as backend_constants
|
|
20
|
+
from konduktor.backends import pod_utils
|
|
21
|
+
from konduktor.utils import (
|
|
22
|
+
common_utils,
|
|
23
|
+
kubernetes_utils,
|
|
24
|
+
validator,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if typing.TYPE_CHECKING:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
logger = logging.get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
# Use shared constants from konduktor.backends.constants
|
|
33
|
+
DEPLOYMENT_NAME_LABEL = backend_constants.DEPLOYMENT_NAME_LABEL
|
|
34
|
+
DEPLOYMENT_USERID_LABEL = backend_constants.USERID_LABEL
|
|
35
|
+
DEPLOYMENT_USER_LABEL = backend_constants.USER_LABEL
|
|
36
|
+
DEPLOYMENT_ACCELERATOR_LABEL = backend_constants.ACCELERATOR_LABEL
|
|
37
|
+
DEPLOYMENT_NUM_ACCELERATORS_LABEL = backend_constants.NUM_ACCELERATORS_LABEL
|
|
38
|
+
AIBRIX_NAME_LABEL = backend_constants.AIBRIX_NAME_LABEL
|
|
39
|
+
|
|
40
|
+
SECRET_BASENAME_LABEL = backend_constants.SECRET_BASENAME_LABEL
|
|
41
|
+
|
|
42
|
+
_DEPLOYMENT_METADATA_LABELS = {
|
|
43
|
+
'deployment_name_label': DEPLOYMENT_NAME_LABEL,
|
|
44
|
+
'deployment_userid_label': DEPLOYMENT_USERID_LABEL,
|
|
45
|
+
'deployment_user_label': DEPLOYMENT_USER_LABEL,
|
|
46
|
+
'deployment_accelerator_label': DEPLOYMENT_ACCELERATOR_LABEL,
|
|
47
|
+
'deployment_num_accelerators_label': DEPLOYMENT_NUM_ACCELERATORS_LABEL,
|
|
48
|
+
'model_name_label': AIBRIX_NAME_LABEL,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def render_specs(
|
|
53
|
+
task: 'konduktor.Task',
|
|
54
|
+
) -> Tuple[
|
|
55
|
+
Dict[str, Any], Dict[str, Any], List[Dict[str, Any]], Optional[Dict[str, Any]]
|
|
56
|
+
]:
|
|
57
|
+
"""Renders Kubernetes resource specifications from a Konduktor task.
|
|
58
|
+
|
|
59
|
+
Takes a Konduktor task and generates the necessary Kubernetes resource
|
|
60
|
+
specifications for deployment by filling the deployment.yaml.j2 template.
|
|
61
|
+
Automatically detects deployment type (vLLM/Aibrix vs General) based on
|
|
62
|
+
the task's run command.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
task: A Konduktor Task object containing deployment configuration
|
|
66
|
+
including resources, serving settings, and run commands.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
A tuple containing:
|
|
70
|
+
- deployment_spec (Dict[str, Any]): Kubernetes Deployment specification
|
|
71
|
+
- service_spec (Dict[str, Any]): Kubernetes Service specification
|
|
72
|
+
- http_addon_resources (List[Dict[str, Any]]): List of HTTP add-on resources
|
|
73
|
+
(HTTPScaledObject and Ingress) for general deployments; empty for vLLM
|
|
74
|
+
- pa_resource (Optional[Dict[str, Any]]): PodAutoscaler specification for
|
|
75
|
+
vLLM deployments with autoscaling enabled, None otherwise; empty for general
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If required specs are missing after template rendering or
|
|
79
|
+
if spec validation fails.
|
|
80
|
+
"""
|
|
81
|
+
general = True
|
|
82
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
83
|
+
general = False
|
|
84
|
+
|
|
85
|
+
# Calculate accelerator info for template
|
|
86
|
+
assert task.resources is not None
|
|
87
|
+
accelerator_type = task.resources.get_accelerator_type() or 'None'
|
|
88
|
+
# For Deployments: GPUs per pod (not total across replicas)
|
|
89
|
+
num_accelerators = task.resources.get_accelerator_count() or 0
|
|
90
|
+
|
|
91
|
+
if task.run:
|
|
92
|
+
task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
|
|
93
|
+
with tempfile.NamedTemporaryFile() as temp:
|
|
94
|
+
common_utils.fill_template(
|
|
95
|
+
'deployment.yaml.j2',
|
|
96
|
+
{
|
|
97
|
+
'name': task.name,
|
|
98
|
+
'user': common_utils.get_cleaned_username(),
|
|
99
|
+
'accelerator_type': accelerator_type,
|
|
100
|
+
'num_accelerators': str(num_accelerators),
|
|
101
|
+
'min_replicas': task.serving.min_replicas if task.serving else 1,
|
|
102
|
+
'max_replicas': task.serving.max_replicas if task.serving else 1,
|
|
103
|
+
'ports': task.serving.ports if task.serving else 8000,
|
|
104
|
+
'probe_path': (
|
|
105
|
+
task.serving.get('probe', None) if task.serving else None
|
|
106
|
+
),
|
|
107
|
+
'autoscaler': (
|
|
108
|
+
'true'
|
|
109
|
+
if (
|
|
110
|
+
task.serving
|
|
111
|
+
and task.serving.min_replicas != task.serving.max_replicas
|
|
112
|
+
)
|
|
113
|
+
else 'false'
|
|
114
|
+
),
|
|
115
|
+
'general': general,
|
|
116
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
117
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
118
|
+
# deleting/creating TunnelNode resources with same names too
|
|
119
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
120
|
+
'general_base_host': (
|
|
121
|
+
f'{get_unique_cluster_name_from_tunnel()[:-3]}2.trainy.us'
|
|
122
|
+
)
|
|
123
|
+
if general
|
|
124
|
+
else None,
|
|
125
|
+
**_DEPLOYMENT_METADATA_LABELS,
|
|
126
|
+
},
|
|
127
|
+
temp.name,
|
|
128
|
+
)
|
|
129
|
+
docs = common_utils.read_yaml_all(temp.name)
|
|
130
|
+
|
|
131
|
+
deployment_spec = None
|
|
132
|
+
service_spec = None
|
|
133
|
+
http_addon_resources = [] # For general deployments
|
|
134
|
+
pa_resource = None # For aibrix deployments w autoscaling
|
|
135
|
+
|
|
136
|
+
for doc in docs:
|
|
137
|
+
kind = doc.get('kind')
|
|
138
|
+
if kind == 'Deployment':
|
|
139
|
+
deployment_spec = doc
|
|
140
|
+
elif kind == 'Service':
|
|
141
|
+
service_spec = doc
|
|
142
|
+
# HTTPScaledObject resource for general deployments w autoscaling only
|
|
143
|
+
elif kind == 'HTTPScaledObject':
|
|
144
|
+
http_addon_resources.append(doc)
|
|
145
|
+
# Ingress resource for all general deployments
|
|
146
|
+
elif kind == 'Ingress':
|
|
147
|
+
http_addon_resources.append(doc)
|
|
148
|
+
# PodAutoscaler resource for aibrix deployments w autoscaling only
|
|
149
|
+
elif kind == 'PodAutoscaler':
|
|
150
|
+
pa_resource = doc
|
|
151
|
+
|
|
152
|
+
if deployment_spec is None:
|
|
153
|
+
raise ValueError('Deployment manifest not found.')
|
|
154
|
+
if service_spec is None:
|
|
155
|
+
raise ValueError('Service manifest not found.')
|
|
156
|
+
if general and not http_addon_resources:
|
|
157
|
+
raise ValueError('General deployment manifests not found.')
|
|
158
|
+
if (
|
|
159
|
+
not general
|
|
160
|
+
and task.serving
|
|
161
|
+
and task.serving.min_replicas != task.serving.max_replicas
|
|
162
|
+
and pa_resource is None
|
|
163
|
+
):
|
|
164
|
+
raise ValueError('Aibrix deployment PodAutoscaler manifest not found.')
|
|
165
|
+
|
|
166
|
+
# Validate specs before returning
|
|
167
|
+
try:
|
|
168
|
+
validator.validate_deployment_spec(deployment_spec)
|
|
169
|
+
validator.validate_service_spec(service_spec)
|
|
170
|
+
except ValueError as e:
|
|
171
|
+
raise ValueError(f'Spec validation failed: {e}')
|
|
172
|
+
|
|
173
|
+
return deployment_spec, service_spec, http_addon_resources, pa_resource
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def create_pod_autoscaler(
|
|
177
|
+
namespace: str,
|
|
178
|
+
task: 'konduktor.Task',
|
|
179
|
+
dryrun: bool = False,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Creates Aibrix PodAutoscaler for non-general deployments."""
|
|
182
|
+
|
|
183
|
+
# Check if this is a non-general deployment
|
|
184
|
+
general = True
|
|
185
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
186
|
+
general = False
|
|
187
|
+
|
|
188
|
+
# Only create PA for aibrix deployments w autoscaling
|
|
189
|
+
if general:
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# Check if autoscaling is needed
|
|
193
|
+
if not task.serving or task.serving.min_replicas == task.serving.max_replicas:
|
|
194
|
+
logger.debug(
|
|
195
|
+
f'[DEBUG] No autoscaling needed: '
|
|
196
|
+
f'min={task.serving.min_replicas if task.serving else "None"}, '
|
|
197
|
+
f'max={task.serving.max_replicas if task.serving else "None"}'
|
|
198
|
+
)
|
|
199
|
+
return # No autoscaling needed
|
|
200
|
+
|
|
201
|
+
logger.debug(
|
|
202
|
+
f'[DEBUG] PA autoscaling enabled: '
|
|
203
|
+
f'min={task.serving.min_replicas}, max={task.serving.max_replicas}'
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Get the PA spec from the rendered template
|
|
207
|
+
_, _, _, pa_spec = render_specs(task)
|
|
208
|
+
|
|
209
|
+
if not pa_spec:
|
|
210
|
+
logger.warning('[DEBUG] No PodAutoscaler found in rendered template')
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
if dryrun:
|
|
214
|
+
logger.debug(
|
|
215
|
+
f'[DRYRUN] Would create PA autoscaler: '
|
|
216
|
+
f'{pa_spec["metadata"].get("name", "<no-name>")}'
|
|
217
|
+
)
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
221
|
+
custom_api = kube_client.crd_api(context=context)
|
|
222
|
+
|
|
223
|
+
# Create KPA for aibrix deployments w autoscaling
|
|
224
|
+
name = pa_spec.get('metadata', {}).get('name', '<no-name>')
|
|
225
|
+
try:
|
|
226
|
+
custom_api.create_namespaced_custom_object(
|
|
227
|
+
group='autoscaling.aibrix.ai',
|
|
228
|
+
version='v1alpha1',
|
|
229
|
+
namespace=namespace,
|
|
230
|
+
plural='podautoscalers',
|
|
231
|
+
body=pa_spec,
|
|
232
|
+
)
|
|
233
|
+
logger.info(f'Pod autoscaler {name} created')
|
|
234
|
+
except Exception as e:
|
|
235
|
+
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
236
|
+
logger.warning(f'Pod autoscaler {name} already exists, skipping')
|
|
237
|
+
else:
|
|
238
|
+
logger.error(f'Error creating pod autoscaler {name}: {e}')
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def create_deployment(
|
|
243
|
+
namespace: str,
|
|
244
|
+
task: 'konduktor.Task',
|
|
245
|
+
pod_spec: Dict[str, Any],
|
|
246
|
+
dryrun: bool = False,
|
|
247
|
+
) -> Optional[Dict[str, Any]]:
|
|
248
|
+
"""Creates a Kubernetes Deployment based on the task and pod spec."""
|
|
249
|
+
|
|
250
|
+
assert task.resources is not None, 'Task resources are undefined'
|
|
251
|
+
|
|
252
|
+
deployment_spec, _, _, _ = render_specs(task)
|
|
253
|
+
|
|
254
|
+
# Inject deployment-specific pod metadata
|
|
255
|
+
pod_utils.inject_deployment_pod_metadata(pod_spec, task)
|
|
256
|
+
|
|
257
|
+
# Inject pod spec directly (like jobset logic)
|
|
258
|
+
pod_utils.merge_pod_into_deployment_template(deployment_spec['spec'], pod_spec)
|
|
259
|
+
|
|
260
|
+
if dryrun:
|
|
261
|
+
logger.debug(f'[DRYRUN] Would create deployment:\n{deployment_spec}')
|
|
262
|
+
return deployment_spec
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
266
|
+
apps_api = kube_client.apps_api(context=context)
|
|
267
|
+
deployment = apps_api.create_namespaced_deployment(
|
|
268
|
+
namespace=namespace,
|
|
269
|
+
body=deployment_spec,
|
|
270
|
+
)
|
|
271
|
+
logger.info(
|
|
272
|
+
f'Deployment {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
273
|
+
f'{task.name}{colorama.Style.RESET_ALL} created'
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return deployment
|
|
277
|
+
except kube_client.api_exception() as err:
|
|
278
|
+
try:
|
|
279
|
+
error_body = json.loads(err.body)
|
|
280
|
+
error_message = error_body.get('message', '')
|
|
281
|
+
logger.error(f'Error creating deployment: {error_message}')
|
|
282
|
+
except json.JSONDecodeError:
|
|
283
|
+
logger.error(f'Error creating deployment: {err.body}')
|
|
284
|
+
raise err
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def create_service(
|
|
288
|
+
namespace: str,
|
|
289
|
+
task: 'konduktor.Task',
|
|
290
|
+
dryrun: bool = False,
|
|
291
|
+
) -> Optional[Dict[str, Any]]:
|
|
292
|
+
"""Creates a Kubernetes Service based on the task and pod spec."""
|
|
293
|
+
|
|
294
|
+
assert task.resources is not None, 'Task resources are undefined'
|
|
295
|
+
|
|
296
|
+
_, service_spec, _, _ = render_specs(task)
|
|
297
|
+
|
|
298
|
+
if dryrun:
|
|
299
|
+
logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
|
|
300
|
+
return service_spec
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
304
|
+
core_api = kube_client.core_api(context=context)
|
|
305
|
+
service = core_api.create_namespaced_service(
|
|
306
|
+
namespace=namespace,
|
|
307
|
+
body=service_spec,
|
|
308
|
+
)
|
|
309
|
+
logger.info(
|
|
310
|
+
f'Service {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
311
|
+
f'{task.name}{colorama.Style.RESET_ALL} created'
|
|
312
|
+
)
|
|
313
|
+
return service
|
|
314
|
+
except kube_client.api_exception() as err:
|
|
315
|
+
try:
|
|
316
|
+
error_body = json.loads(err.body)
|
|
317
|
+
error_message = error_body.get('message', '')
|
|
318
|
+
logger.error(f'Error creating service: {error_message}')
|
|
319
|
+
except json.JSONDecodeError:
|
|
320
|
+
logger.error(f'Error creating service: {error_message}')
|
|
321
|
+
raise err
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def create_http_addon_resources(
|
|
325
|
+
namespace: str,
|
|
326
|
+
task: 'konduktor.Task',
|
|
327
|
+
dryrun: bool = False,
|
|
328
|
+
) -> None:
|
|
329
|
+
"""Creates HTTP Add-on resources for general deployments."""
|
|
330
|
+
|
|
331
|
+
# Check if this is a non-general deployment
|
|
332
|
+
general = True
|
|
333
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
334
|
+
general = False
|
|
335
|
+
|
|
336
|
+
# Only create PA for aibrix deployments w autoscaling
|
|
337
|
+
if not general:
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
_, _, http_addon_resources, _ = render_specs(task)
|
|
341
|
+
|
|
342
|
+
if not http_addon_resources:
|
|
343
|
+
logger.debug('[DEBUG] No HTTP Add-on resources to create')
|
|
344
|
+
return
|
|
345
|
+
|
|
346
|
+
if dryrun:
|
|
347
|
+
logger.debug(
|
|
348
|
+
f'[DRYRUN] Would create HTTP Add-on resources:\n' f'{http_addon_resources}'
|
|
349
|
+
)
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
353
|
+
logger.debug(f'[DEBUG] Using Kubernetes context: {context}')
|
|
354
|
+
|
|
355
|
+
for resource in http_addon_resources:
|
|
356
|
+
kind = resource.get('kind')
|
|
357
|
+
name = resource['metadata']['name']
|
|
358
|
+
|
|
359
|
+
logger.debug(f'[DEBUG] Creating {kind}: {name}')
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
if kind == 'HTTPScaledObject':
|
|
363
|
+
# Create HTTPScaledObject (only for autoscaling)
|
|
364
|
+
custom_api = kube_client.crd_api(context=context)
|
|
365
|
+
custom_api.create_namespaced_custom_object(
|
|
366
|
+
group='http.keda.sh',
|
|
367
|
+
version='v1alpha1',
|
|
368
|
+
namespace=namespace,
|
|
369
|
+
plural='httpscaledobjects',
|
|
370
|
+
body=resource,
|
|
371
|
+
)
|
|
372
|
+
logger.info(f'HTTPScaledObject {name} created')
|
|
373
|
+
|
|
374
|
+
elif kind == 'Ingress':
|
|
375
|
+
# Create Ingress (always needed for external access)
|
|
376
|
+
networking_api = kube_client.networking_api(context=context)
|
|
377
|
+
networking_api.create_namespaced_ingress(
|
|
378
|
+
namespace=namespace,
|
|
379
|
+
body=resource,
|
|
380
|
+
)
|
|
381
|
+
logger.info(f'Ingress {name} created')
|
|
382
|
+
|
|
383
|
+
except Exception as e:
|
|
384
|
+
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
385
|
+
logger.warning(
|
|
386
|
+
f'HTTP Add-on resource {kind} {name} already exists, skipping'
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
logger.error(f'Error creating HTTP Add-on resource {kind} {name}: {e}')
|
|
390
|
+
raise
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def list_models(namespace: str) -> List[str]:
|
|
394
|
+
"""
|
|
395
|
+
Returns a list of unique model names in the namespace,
|
|
396
|
+
based on label DEPLOYMENT_NAME_LABEL=`trainy.ai/deployment-name`.
|
|
397
|
+
"""
|
|
398
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
399
|
+
apps = kube_client.apps_api(context)
|
|
400
|
+
core = kube_client.core_api(context)
|
|
401
|
+
crds = kube_client.crd_client(context)
|
|
402
|
+
|
|
403
|
+
label_selector = DEPLOYMENT_NAME_LABEL
|
|
404
|
+
model_names: set[str] = set()
|
|
405
|
+
|
|
406
|
+
# Deployments
|
|
407
|
+
for deploy in apps.list_namespaced_deployment(
|
|
408
|
+
namespace, label_selector=label_selector
|
|
409
|
+
).items:
|
|
410
|
+
labels = getattr(deploy.metadata, 'labels', {}) or {}
|
|
411
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
412
|
+
if name:
|
|
413
|
+
model_names.add(name)
|
|
414
|
+
|
|
415
|
+
# Services
|
|
416
|
+
for svc in core.list_namespaced_service(
|
|
417
|
+
namespace, label_selector=label_selector
|
|
418
|
+
).items:
|
|
419
|
+
labels = getattr(svc.metadata, 'labels', {}) or {}
|
|
420
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
421
|
+
if name:
|
|
422
|
+
model_names.add(name)
|
|
423
|
+
|
|
424
|
+
# Podautoscalers (KPA only)
|
|
425
|
+
try:
|
|
426
|
+
pa_list = crds.list_namespaced_custom_object(
|
|
427
|
+
group='autoscaling.aibrix.ai',
|
|
428
|
+
version='v1alpha1',
|
|
429
|
+
namespace=namespace,
|
|
430
|
+
plural='podautoscalers',
|
|
431
|
+
)
|
|
432
|
+
for pa in pa_list.get('items', []):
|
|
433
|
+
labels = pa.get('metadata', {}).get('labels', {})
|
|
434
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
435
|
+
if name:
|
|
436
|
+
model_names.add(name)
|
|
437
|
+
except ApiException as e:
|
|
438
|
+
if e.status != 404:
|
|
439
|
+
# re-raise if it's not just missing CRD
|
|
440
|
+
raise
|
|
441
|
+
# otherwise ignore, cluster just doesn't have Aibrix CRDs
|
|
442
|
+
logger.warning('Skipping PA lookup. Aibrix CRDs not found in cluster')
|
|
443
|
+
|
|
444
|
+
# HPA
|
|
445
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
446
|
+
hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
|
|
447
|
+
namespace=namespace
|
|
448
|
+
)
|
|
449
|
+
for hpa in hpa_list.items:
|
|
450
|
+
labels = getattr(hpa.metadata, 'labels', {}) or {}
|
|
451
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
452
|
+
if name:
|
|
453
|
+
model_names.add(name)
|
|
454
|
+
|
|
455
|
+
return sorted(model_names)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def get_autoscaler_status_for_deployment(
|
|
459
|
+
name: str, autoscalers_map: dict, is_general: bool
|
|
460
|
+
) -> bool:
|
|
461
|
+
"""Return autoscaler readiness by deployment type.
|
|
462
|
+
|
|
463
|
+
- General: returns hpa_ready
|
|
464
|
+
- vLLM/Aibrix: returns kpa_ready
|
|
465
|
+
"""
|
|
466
|
+
|
|
467
|
+
def _is_ready(obj: dict) -> bool:
|
|
468
|
+
try:
|
|
469
|
+
conditions = obj.get('status', {}).get('conditions') or []
|
|
470
|
+
kind = obj.get('kind') or ''
|
|
471
|
+
|
|
472
|
+
for cond in conditions:
|
|
473
|
+
if cond.get('type') == 'AbleToScale' and cond.get('status') == 'True':
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
if kind == 'HorizontalPodAutoscaler':
|
|
477
|
+
# Check for ScalingActive condition
|
|
478
|
+
for cond in conditions:
|
|
479
|
+
if cond.get('type') == 'ScalingActive':
|
|
480
|
+
# ScalingActive: True means actively scaling
|
|
481
|
+
if cond.get('status') == 'True':
|
|
482
|
+
return True
|
|
483
|
+
# ScalingActive: False with ScalingDisabled reason
|
|
484
|
+
# is normal for scale-to-zero
|
|
485
|
+
if (
|
|
486
|
+
cond.get('status') == 'False'
|
|
487
|
+
and cond.get('reason') == 'ScalingDisabled'
|
|
488
|
+
):
|
|
489
|
+
return True
|
|
490
|
+
|
|
491
|
+
# Treat existing HPA with no conditions as ready
|
|
492
|
+
return not conditions or any(
|
|
493
|
+
c.get('type') == 'AbleToScale' and c.get('status') == 'True'
|
|
494
|
+
for c in conditions
|
|
495
|
+
)
|
|
496
|
+
except Exception as e:
|
|
497
|
+
logger.warning(f'Error checking autoscaler readiness: {e}')
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
kpa_ready = False
|
|
501
|
+
hpa_ready = False
|
|
502
|
+
|
|
503
|
+
dep_autos = autoscalers_map.get(name, {})
|
|
504
|
+
|
|
505
|
+
if is_general:
|
|
506
|
+
if 'hpa' in dep_autos:
|
|
507
|
+
hpa_ready = _is_ready(dep_autos['hpa'])
|
|
508
|
+
return hpa_ready
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
if 'kpa' in dep_autos:
|
|
512
|
+
kpa_ready = _is_ready(dep_autos['kpa'])
|
|
513
|
+
return kpa_ready
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _extract_min_max_from_autoscaler(autoscaler: dict) -> tuple[str, str]:
|
|
518
|
+
"""Extract min/max replicas across PA/HPA/KEDA.
|
|
519
|
+
|
|
520
|
+
Returns (min_str, max_str). Unknowns as '?'.
|
|
521
|
+
"""
|
|
522
|
+
try:
|
|
523
|
+
if not autoscaler:
|
|
524
|
+
return '?', '?'
|
|
525
|
+
|
|
526
|
+
spec = autoscaler.get('spec', {})
|
|
527
|
+
|
|
528
|
+
# Check for HTTPScaledObject format (replicas.min/max)
|
|
529
|
+
if 'replicas' in spec:
|
|
530
|
+
replicas = spec.get('replicas', {})
|
|
531
|
+
if 'min' in replicas or 'max' in replicas:
|
|
532
|
+
return (str(replicas.get('min', '?')), str(replicas.get('max', '?')))
|
|
533
|
+
|
|
534
|
+
# Check for KEDA ScaledObject format (minReplicaCount/maxReplicaCount)
|
|
535
|
+
if 'minReplicaCount' in spec or 'maxReplicaCount' in spec:
|
|
536
|
+
return (
|
|
537
|
+
str(spec.get('minReplicaCount', '?')),
|
|
538
|
+
str(spec.get('maxReplicaCount', '?')),
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# Check for PA/HPA format (minReplicas/maxReplicas)
|
|
542
|
+
if 'minReplicas' in spec or 'maxReplicas' in spec:
|
|
543
|
+
return str(spec.get('minReplicas', '?')), str(spec.get('maxReplicas', '?'))
|
|
544
|
+
except Exception:
|
|
545
|
+
pass
|
|
546
|
+
return '?', '?'
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
|
|
550
|
+
"""Fetch autoscalers and return a simple map keyed by deployment name.
|
|
551
|
+
|
|
552
|
+
Simplified model:
|
|
553
|
+
- Aibrix deployments: 1 PodAutoscaler (KPA) if autoscaling enabled
|
|
554
|
+
- General deployments: 1 HPA (created by KEDA) if autoscaling enabled
|
|
555
|
+
- No autoscaling: No autoscaler
|
|
556
|
+
|
|
557
|
+
Returns: {deployment_name: {'kpa': pa_obj} or {'hpa': hpa_obj}}
|
|
558
|
+
"""
|
|
559
|
+
autoscalers: Dict[str, Dict[str, Any]] = {}
|
|
560
|
+
|
|
561
|
+
# --- Aibrix deployment KPA ---
|
|
562
|
+
try:
|
|
563
|
+
crd_api = kube_client.crd_api(context=context)
|
|
564
|
+
pa_list = crd_api.list_namespaced_custom_object(
|
|
565
|
+
group='autoscaling.aibrix.ai',
|
|
566
|
+
version='v1alpha1',
|
|
567
|
+
namespace=namespace,
|
|
568
|
+
plural='podautoscalers',
|
|
569
|
+
)
|
|
570
|
+
for pa in pa_list.get('items', []):
|
|
571
|
+
labels = pa.get('metadata', {}).get('labels', {})
|
|
572
|
+
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
573
|
+
if not dep_name:
|
|
574
|
+
# Fallback to scaleTargetRef.name
|
|
575
|
+
spec = pa.get('spec', {})
|
|
576
|
+
scale_ref = spec.get('scaleTargetRef', {})
|
|
577
|
+
dep_name = scale_ref.get('name')
|
|
578
|
+
if dep_name:
|
|
579
|
+
autoscalers[dep_name] = {'kpa': pa}
|
|
580
|
+
if pa_list.get('items'):
|
|
581
|
+
logger.debug(f"Found {len(pa_list.get('items', []))} PodAutoscalers")
|
|
582
|
+
except Exception as e:
|
|
583
|
+
logger.warning(f'Error fetching PodAutoscalers: {e}')
|
|
584
|
+
|
|
585
|
+
# --- General deployment HPA ---
|
|
586
|
+
try:
|
|
587
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
588
|
+
hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
|
|
589
|
+
namespace=namespace
|
|
590
|
+
)
|
|
591
|
+
for hpa in hpa_list.items:
|
|
592
|
+
labels = getattr(hpa.metadata, 'labels', {}) or {}
|
|
593
|
+
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
594
|
+
if not dep_name:
|
|
595
|
+
# Fallback to scaleTargetRef.name
|
|
596
|
+
spec = hpa.spec.to_dict() if hpa.spec else {}
|
|
597
|
+
scale_ref = spec.get('scale_target_ref', {})
|
|
598
|
+
dep_name = scale_ref.get('name')
|
|
599
|
+
if dep_name:
|
|
600
|
+
hpa_dict = hpa.to_dict()
|
|
601
|
+
hpa_dict['kind'] = 'HorizontalPodAutoscaler'
|
|
602
|
+
hpa_dict['apiVersion'] = 'autoscaling/v2'
|
|
603
|
+
autoscalers[dep_name] = {'hpa': hpa_dict}
|
|
604
|
+
if hpa_list.items:
|
|
605
|
+
logger.debug(f'Found {len(hpa_list.items)} HPAs')
|
|
606
|
+
except Exception as e:
|
|
607
|
+
logger.warning(f'Error fetching HPAs: {e}')
|
|
608
|
+
|
|
609
|
+
return autoscalers
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def get_model_status(
|
|
613
|
+
name: str,
|
|
614
|
+
deployments: dict[str, Any],
|
|
615
|
+
services: dict[str, Any],
|
|
616
|
+
autoscalers: dict[str, dict],
|
|
617
|
+
) -> Dict[str, Optional[str]]:
|
|
618
|
+
"""Check the status of Deployment, Service, and Autoscaler."""
|
|
619
|
+
status = {
|
|
620
|
+
'deployment': 'missing',
|
|
621
|
+
'service': 'missing',
|
|
622
|
+
'autoscaler': None,
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
# --- Deployment ---
|
|
626
|
+
if name in deployments:
|
|
627
|
+
d = deployments[name]
|
|
628
|
+
ready = (d.status.ready_replicas or 0) if d.status else 0
|
|
629
|
+
desired = (d.spec.replicas or 0) if d.spec else 0
|
|
630
|
+
|
|
631
|
+
labels = d.metadata.labels or {}
|
|
632
|
+
is_aibrix = AIBRIX_NAME_LABEL in labels
|
|
633
|
+
|
|
634
|
+
if is_aibrix and name in autoscalers:
|
|
635
|
+
# For Aibrix deployments, get the original min replicas from
|
|
636
|
+
# deployment labels
|
|
637
|
+
original_min_replicas = 0
|
|
638
|
+
original_min_str = labels.get('trainy.ai/original-min-replicas')
|
|
639
|
+
if original_min_str:
|
|
640
|
+
try:
|
|
641
|
+
original_min_replicas = int(original_min_str)
|
|
642
|
+
except (ValueError, TypeError):
|
|
643
|
+
pass
|
|
644
|
+
|
|
645
|
+
# For Aibrix deployments, consider ready if:
|
|
646
|
+
# 1. Ready replicas >= original minimum replicas, OR
|
|
647
|
+
# 2. If original_min_replicas is 0 (scale-to-zero allowed),
|
|
648
|
+
# then ready == desired
|
|
649
|
+
if original_min_replicas == 0:
|
|
650
|
+
status['deployment'] = 'ready' if ready == desired else 'pending'
|
|
651
|
+
else:
|
|
652
|
+
status['deployment'] = (
|
|
653
|
+
'ready' if ready >= original_min_replicas else 'pending'
|
|
654
|
+
)
|
|
655
|
+
else:
|
|
656
|
+
# General deployments or no autoscaler: use simple ready == desired check
|
|
657
|
+
status['deployment'] = 'ready' if ready == desired else 'pending'
|
|
658
|
+
|
|
659
|
+
# --- Service ---
|
|
660
|
+
if name in services:
|
|
661
|
+
status['service'] = 'ready'
|
|
662
|
+
else:
|
|
663
|
+
status['service'] = 'missing'
|
|
664
|
+
|
|
665
|
+
# --- Autoscaler ---
|
|
666
|
+
if name in autoscalers:
|
|
667
|
+
# Check if this is a general deployment (not vLLM/Aibrix)
|
|
668
|
+
is_general = True
|
|
669
|
+
if deployments.get(name) and hasattr(deployments[name].metadata, 'labels'):
|
|
670
|
+
labels = deployments[name].metadata.labels or {}
|
|
671
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
672
|
+
is_general = False
|
|
673
|
+
|
|
674
|
+
# Check actual autoscaler readiness
|
|
675
|
+
autoscaler_ready = get_autoscaler_status_for_deployment(
|
|
676
|
+
name, autoscalers, is_general
|
|
677
|
+
)
|
|
678
|
+
status['autoscaler'] = 'ready' if autoscaler_ready else 'pending'
|
|
679
|
+
else:
|
|
680
|
+
status['autoscaler'] = None
|
|
681
|
+
|
|
682
|
+
return status
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def get_deployment(namespace: str, job_name: str) -> Optional[Any]:
|
|
686
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
687
|
+
apps_api = kube_client.apps_api(context=context)
|
|
688
|
+
try:
|
|
689
|
+
return apps_api.read_namespaced_deployment(name=job_name, namespace=namespace)
|
|
690
|
+
except ApiException as e:
|
|
691
|
+
if e.status == 404:
|
|
692
|
+
return None
|
|
693
|
+
raise
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def get_service(namespace: str, job_name: str) -> Optional[Any]:
|
|
697
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
698
|
+
core_api = kube_client.core_api(context=context)
|
|
699
|
+
try:
|
|
700
|
+
return core_api.read_namespaced_service(name=job_name, namespace=namespace)
|
|
701
|
+
except ApiException as e:
|
|
702
|
+
if e.status == 404:
|
|
703
|
+
return None
|
|
704
|
+
raise
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
|
|
708
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
709
|
+
# --- Try Aibrix PA first ---
|
|
710
|
+
crd_api = kube_client.crd_api(context=context)
|
|
711
|
+
try:
|
|
712
|
+
return crd_api.get_namespaced_custom_object(
|
|
713
|
+
group='autoscaling.aibrix.ai',
|
|
714
|
+
version='v1alpha1',
|
|
715
|
+
namespace=namespace,
|
|
716
|
+
plural='podautoscalers',
|
|
717
|
+
name=f'{job_name}-pa',
|
|
718
|
+
)
|
|
719
|
+
except ApiException as e:
|
|
720
|
+
if e.status != 404:
|
|
721
|
+
raise
|
|
722
|
+
# Fall through to check HPA
|
|
723
|
+
|
|
724
|
+
# --- Try built‑in Kubernetes HPA ---
|
|
725
|
+
try:
|
|
726
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
727
|
+
return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
|
|
728
|
+
name=f'{job_name}-hpa', namespace=namespace
|
|
729
|
+
).to_dict()
|
|
730
|
+
except ApiException as e:
|
|
731
|
+
if e.status != 404:
|
|
732
|
+
raise
|
|
733
|
+
|
|
734
|
+
# --- Try KEDA ScaledObject ---
|
|
735
|
+
try:
|
|
736
|
+
return crd_api.get_namespaced_custom_object(
|
|
737
|
+
group='keda.sh',
|
|
738
|
+
version='v1alpha1',
|
|
739
|
+
namespace=namespace,
|
|
740
|
+
plural='scaledobjects',
|
|
741
|
+
name=f'{job_name}-keda',
|
|
742
|
+
)
|
|
743
|
+
except ApiException as e:
|
|
744
|
+
if e.status == 404:
|
|
745
|
+
return None
|
|
746
|
+
raise
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def delete_deployment(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
750
|
+
"""Deletes a Kubernetes Deployment in the given namespace.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
namespace: Namespace where the deployment exists.
|
|
754
|
+
name: Name of the deployment to delete.
|
|
755
|
+
|
|
756
|
+
Returns:
|
|
757
|
+
Response from delete operation, or None on error.
|
|
758
|
+
"""
|
|
759
|
+
try:
|
|
760
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
761
|
+
response = kube_client.apps_api(context=context).delete_namespaced_deployment(
|
|
762
|
+
name=name,
|
|
763
|
+
namespace=namespace,
|
|
764
|
+
)
|
|
765
|
+
return response
|
|
766
|
+
except kube_client.api_exception() as err:
|
|
767
|
+
try:
|
|
768
|
+
error_body = json.loads(err.body)
|
|
769
|
+
error_message = error_body.get('message', '')
|
|
770
|
+
logger.error(f'Error deleting deployment: {error_message}')
|
|
771
|
+
except json.JSONDecodeError:
|
|
772
|
+
error_message = str(err.body)
|
|
773
|
+
logger.error(f'Error deleting deployment: {error_message}')
|
|
774
|
+
else:
|
|
775
|
+
raise err
|
|
776
|
+
return None
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
780
|
+
"""Deletes a Kubernetes Service in the given namespace.
|
|
781
|
+
|
|
782
|
+
Args:
|
|
783
|
+
namespace: Namespace where the service exists.
|
|
784
|
+
name: Name of the service to delete.
|
|
785
|
+
|
|
786
|
+
Returns:
|
|
787
|
+
Response from delete operation, or None on error.
|
|
788
|
+
"""
|
|
789
|
+
try:
|
|
790
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
791
|
+
response = kube_client.core_api(context=context).delete_namespaced_service(
|
|
792
|
+
name=name,
|
|
793
|
+
namespace=namespace,
|
|
794
|
+
)
|
|
795
|
+
return response
|
|
796
|
+
except kube_client.api_exception() as err:
|
|
797
|
+
try:
|
|
798
|
+
error_body = json.loads(err.body)
|
|
799
|
+
error_message = error_body.get('message', '')
|
|
800
|
+
logger.error(f'Error deleting service: {error_message}')
|
|
801
|
+
except json.JSONDecodeError:
|
|
802
|
+
logger.error(f'Error deleting service: {err.body}')
|
|
803
|
+
raise err
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
808
|
+
"""Delete all autoscalers associated with a deployment name.
|
|
809
|
+
|
|
810
|
+
This includes:
|
|
811
|
+
- All Aibrix PodAutoscalers (e.g., "-pa", "-apa") targeting the deployment
|
|
812
|
+
- Any HorizontalPodAutoscaler named "<name>-hpa"
|
|
813
|
+
- Any KEDA ScaledObject named "<name>-keda"
|
|
814
|
+
"""
|
|
815
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
816
|
+
|
|
817
|
+
# --- Delete ALL PodAutoscalers that target this deployment ---
|
|
818
|
+
try:
|
|
819
|
+
custom_api = kube_client.crd_api(context=context)
|
|
820
|
+
pa_list = custom_api.list_namespaced_custom_object(
|
|
821
|
+
group='autoscaling.aibrix.ai',
|
|
822
|
+
version='v1alpha1',
|
|
823
|
+
namespace=namespace,
|
|
824
|
+
plural='podautoscalers',
|
|
825
|
+
)
|
|
826
|
+
for pa in pa_list.get('items', []):
|
|
827
|
+
meta = pa.get('metadata', {})
|
|
828
|
+
spec = pa.get('spec', {})
|
|
829
|
+
pa_name = meta.get('name', '')
|
|
830
|
+
labels = meta.get('labels', {})
|
|
831
|
+
scale_ref = spec.get('scaleTargetRef', {}).get('name')
|
|
832
|
+
targets_deployment = (
|
|
833
|
+
labels.get(DEPLOYMENT_NAME_LABEL) == name
|
|
834
|
+
or scale_ref == name
|
|
835
|
+
or pa_name.startswith(f'{name}-')
|
|
836
|
+
)
|
|
837
|
+
if targets_deployment:
|
|
838
|
+
try:
|
|
839
|
+
custom_api.delete_namespaced_custom_object(
|
|
840
|
+
group='autoscaling.aibrix.ai',
|
|
841
|
+
version='v1alpha1',
|
|
842
|
+
namespace=namespace,
|
|
843
|
+
plural='podautoscalers',
|
|
844
|
+
name=pa_name,
|
|
845
|
+
)
|
|
846
|
+
logger.info(f'Deleted PodAutoscaler: {pa_name}')
|
|
847
|
+
except kube_client.api_exception() as err:
|
|
848
|
+
if getattr(err, 'status', None) != 404:
|
|
849
|
+
raise
|
|
850
|
+
except kube_client.api_exception() as err:
|
|
851
|
+
# If PA CRD is missing, skip; otherwise bubble up
|
|
852
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
853
|
+
raise
|
|
854
|
+
|
|
855
|
+
# --- Delete HPA ---
|
|
856
|
+
try:
|
|
857
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
858
|
+
autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
|
|
859
|
+
name=f'{name}-hpa',
|
|
860
|
+
namespace=namespace,
|
|
861
|
+
)
|
|
862
|
+
logger.info(f'Deleted HPA: {name}-hpa')
|
|
863
|
+
except kube_client.api_exception() as err:
|
|
864
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
865
|
+
try:
|
|
866
|
+
error_body = json.loads(err.body)
|
|
867
|
+
error_message = error_body.get('message', '')
|
|
868
|
+
logger.error(f'Error deleting HPA: {error_message}')
|
|
869
|
+
except json.JSONDecodeError:
|
|
870
|
+
logger.error(f'Error deleting HPA: {err.body}')
|
|
871
|
+
raise err
|
|
872
|
+
|
|
873
|
+
# --- Delete KEDA ScaledObject ---
|
|
874
|
+
try:
|
|
875
|
+
custom_api = kube_client.crd_api(context=context)
|
|
876
|
+
custom_api.delete_namespaced_custom_object(
|
|
877
|
+
group='keda.sh',
|
|
878
|
+
version='v1alpha1',
|
|
879
|
+
namespace=namespace,
|
|
880
|
+
plural='scaledobjects',
|
|
881
|
+
name=f'{name}-keda',
|
|
882
|
+
)
|
|
883
|
+
logger.info(f'Deleted ScaledObject: {name}-keda')
|
|
884
|
+
except kube_client.api_exception() as err:
|
|
885
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
886
|
+
try:
|
|
887
|
+
error_body = json.loads(err.body)
|
|
888
|
+
error_message = error_body.get('message', '')
|
|
889
|
+
logger.error(f'Error deleting KEDA ScaledObject: {error_message}')
|
|
890
|
+
except json.JSONDecodeError:
|
|
891
|
+
logger.error(f'Error deleting KEDA ScaledObject: {err.body}')
|
|
892
|
+
raise err
|
|
893
|
+
|
|
894
|
+
return None
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def delete_http_addon_resources(name: str, namespace: str) -> None:
|
|
898
|
+
"""Deletes HTTP Add-on resources for general deployments."""
|
|
899
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
900
|
+
|
|
901
|
+
# Delete HTTPScaledObject
|
|
902
|
+
try:
|
|
903
|
+
custom_api = kube_client.crd_api(context=context)
|
|
904
|
+
custom_api.delete_namespaced_custom_object(
|
|
905
|
+
group='http.keda.sh',
|
|
906
|
+
version='v1alpha1',
|
|
907
|
+
namespace=namespace,
|
|
908
|
+
plural='httpscaledobjects',
|
|
909
|
+
name=f'{name}-httpscaledobject',
|
|
910
|
+
)
|
|
911
|
+
logger.info(f'Deleted HTTPScaledObject: {name}-httpscaledobject')
|
|
912
|
+
except kube_client.api_exception() as err:
|
|
913
|
+
if err.status != 404:
|
|
914
|
+
logger.debug(
|
|
915
|
+
f'Failed to delete HTTPScaledObject {name}-httpscaledobject: {err}'
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
# Delete Ingress
|
|
919
|
+
try:
|
|
920
|
+
networking_api = kube_client.networking_api(context=context)
|
|
921
|
+
networking_api.delete_namespaced_ingress(
|
|
922
|
+
name=f'{name}-ingress',
|
|
923
|
+
namespace=namespace,
|
|
924
|
+
)
|
|
925
|
+
logger.info(f'Deleted Ingress: {name}-ingress')
|
|
926
|
+
except kube_client.api_exception() as err:
|
|
927
|
+
if err.status != 404:
|
|
928
|
+
logger.debug(f'Failed to delete Ingress {name}-ingress: {err}')
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def delete_serving_specs(name: str, namespace: str) -> None:
|
|
932
|
+
for kind, delete_fn in [
|
|
933
|
+
('deployment', delete_deployment),
|
|
934
|
+
('service', delete_service),
|
|
935
|
+
]:
|
|
936
|
+
try:
|
|
937
|
+
delete_fn(namespace, name)
|
|
938
|
+
logger.info(f'Deleted {kind}: {name}')
|
|
939
|
+
except Exception as e:
|
|
940
|
+
logger.debug(f'Failed to delete {kind} {name}: {e}')
|
|
941
|
+
|
|
942
|
+
# Delete autoscaler resources (Aibrix PA, HPA, or KEDA ScaledObject)
|
|
943
|
+
try:
|
|
944
|
+
delete_autoscaler(namespace=namespace, name=name)
|
|
945
|
+
except Exception as e:
|
|
946
|
+
logger.debug(f'Failed to delete autoscaler for {name}: {e}')
|
|
947
|
+
|
|
948
|
+
# Delete HTTP Add-on resources for general deployments
|
|
949
|
+
delete_http_addon_resources(name, namespace)
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
def _get_resource_summary(deployment) -> str:
|
|
953
|
+
"""Extract and format pod resource information from a deployment.
|
|
954
|
+
|
|
955
|
+
Args:
|
|
956
|
+
deployment: Kubernetes deployment object
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
Formatted string with resource information (GPU, CPU, memory)
|
|
960
|
+
"""
|
|
961
|
+
if not deployment:
|
|
962
|
+
return '?'
|
|
963
|
+
|
|
964
|
+
try:
|
|
965
|
+
containers = deployment.spec.template.spec.containers
|
|
966
|
+
if not containers:
|
|
967
|
+
return '?'
|
|
968
|
+
container = containers[0]
|
|
969
|
+
res = container.resources.requests or {}
|
|
970
|
+
|
|
971
|
+
cpu = res.get('cpu', '?')
|
|
972
|
+
mem = res.get('memory', '?')
|
|
973
|
+
gpu = res.get('nvidia.com/gpu') or res.get('trainy.ai/gpu')
|
|
974
|
+
|
|
975
|
+
# Try to extract GPU type from deployment labels
|
|
976
|
+
labels = deployment.metadata.labels or {}
|
|
977
|
+
accelerator_type = labels.get('trainy.ai/accelerator', 'L4O')
|
|
978
|
+
|
|
979
|
+
gpu_str = f'{accelerator_type}:{gpu}' if gpu else 'None'
|
|
980
|
+
return f'{gpu_str}\n{cpu} CPU\n{mem}'
|
|
981
|
+
except Exception:
|
|
982
|
+
return '?'
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def get_envoy_external_ip() -> Optional[str]:
|
|
986
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
987
|
+
core_api = kube_client.core_api(context=context)
|
|
988
|
+
try:
|
|
989
|
+
services = core_api.list_namespaced_service(namespace='envoy-gateway-system')
|
|
990
|
+
for svc in services.items:
|
|
991
|
+
if svc.spec.type == 'LoadBalancer' and 'envoy' in svc.metadata.name:
|
|
992
|
+
ingress = svc.status.load_balancer.ingress
|
|
993
|
+
if ingress:
|
|
994
|
+
return ingress[0].ip or ingress[0].hostname
|
|
995
|
+
except Exception:
|
|
996
|
+
pass
|
|
997
|
+
return None
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
def get_ingress_nginx_external_ip() -> Optional[str]:
|
|
1001
|
+
"""Get the external IP of the keda-ingress-nginx-controller LoadBalancer."""
|
|
1002
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1003
|
+
core_api = kube_client.core_api(context=context)
|
|
1004
|
+
try:
|
|
1005
|
+
# Look for keda-ingress-nginx-controller service in keda namespace
|
|
1006
|
+
service = core_api.read_namespaced_service(
|
|
1007
|
+
name='keda-ingress-nginx-controller', namespace='keda'
|
|
1008
|
+
)
|
|
1009
|
+
if service.spec.type == 'LoadBalancer':
|
|
1010
|
+
ingress = service.status.load_balancer.ingress
|
|
1011
|
+
if ingress:
|
|
1012
|
+
return ingress[0].ip or ingress[0].hostname
|
|
1013
|
+
except Exception:
|
|
1014
|
+
pass
|
|
1015
|
+
return None
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def get_unique_cluster_name_from_tunnel() -> str:
|
|
1019
|
+
"""Get cluster name from the apoxy deployment command."""
|
|
1020
|
+
try:
|
|
1021
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1022
|
+
apps_api = kube_client.apps_api(context=context)
|
|
1023
|
+
|
|
1024
|
+
# Get the apoxy deployment
|
|
1025
|
+
deployment = apps_api.read_namespaced_deployment(
|
|
1026
|
+
name='apoxy', namespace='apoxy-system'
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
# Extract cluster name from the command
|
|
1030
|
+
containers = deployment.spec.template.spec.containers
|
|
1031
|
+
if containers and len(containers) > 0:
|
|
1032
|
+
command = containers[0].command
|
|
1033
|
+
if (
|
|
1034
|
+
command
|
|
1035
|
+
and len(command) >= 4
|
|
1036
|
+
and command[1] == 'tunnel'
|
|
1037
|
+
and command[2] == 'run'
|
|
1038
|
+
):
|
|
1039
|
+
return command[3] # The cluster name is the 4th argument
|
|
1040
|
+
|
|
1041
|
+
logger.warning('Could not extract cluster name from apoxy deployment command')
|
|
1042
|
+
|
|
1043
|
+
except Exception as e:
|
|
1044
|
+
logger.warning(f'Error getting cluster name from apoxy deployment: {e}')
|
|
1045
|
+
|
|
1046
|
+
return 'default'
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
def get_endpoint_type_from_config() -> str:
|
|
1050
|
+
"""Get the endpoint type from konduktor config.
|
|
1051
|
+
|
|
1052
|
+
Returns:
|
|
1053
|
+
'trainy' for Apoxy endpoints (default)
|
|
1054
|
+
'direct' for LoadBalancer IP endpoints
|
|
1055
|
+
"""
|
|
1056
|
+
try:
|
|
1057
|
+
# Use the proper config system that handles KONDUKTOR_CONFIG env var
|
|
1058
|
+
endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
|
|
1059
|
+
logger.debug(f'[DEBUG] Config endpoint_type: {endpoint_type}')
|
|
1060
|
+
return endpoint_type.lower()
|
|
1061
|
+
except Exception as e:
|
|
1062
|
+
logger.warning(f'Error reading endpoint config: {e}')
|
|
1063
|
+
|
|
1064
|
+
# Default to trainy if config not found or error
|
|
1065
|
+
logger.debug('[DEBUG] Falling back to default endpoint type: trainy')
|
|
1066
|
+
return 'trainy'
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def get_deployment_endpoint(
|
|
1070
|
+
force_direct: bool = False, deployment_type: str = 'AIBRIX'
|
|
1071
|
+
) -> str:
|
|
1072
|
+
"""Get the endpoint for both vLLM/Aibrix and general deployments."""
|
|
1073
|
+
if force_direct:
|
|
1074
|
+
endpoint_type = 'direct'
|
|
1075
|
+
else:
|
|
1076
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
1077
|
+
|
|
1078
|
+
if endpoint_type == 'direct':
|
|
1079
|
+
# Check if this is a general deployment
|
|
1080
|
+
if deployment_type == 'GENERAL':
|
|
1081
|
+
# General deployments: ingress IP + Host header
|
|
1082
|
+
ingress_ip = get_ingress_nginx_external_ip()
|
|
1083
|
+
if ingress_ip:
|
|
1084
|
+
return f'{ingress_ip}'
|
|
1085
|
+
else:
|
|
1086
|
+
return '<pending>'
|
|
1087
|
+
else:
|
|
1088
|
+
# vLLM/Aibrix deployments: envoy IP
|
|
1089
|
+
try:
|
|
1090
|
+
aibrix_endpoint = get_envoy_external_ip()
|
|
1091
|
+
return aibrix_endpoint or '<pending>'
|
|
1092
|
+
except Exception:
|
|
1093
|
+
return '<pending>'
|
|
1094
|
+
else:
|
|
1095
|
+
# Use Apoxy (trainy.us)
|
|
1096
|
+
try:
|
|
1097
|
+
cluster_name = get_unique_cluster_name_from_tunnel()
|
|
1098
|
+
if deployment_type == 'GENERAL':
|
|
1099
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
1100
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
1101
|
+
# deleting/creating TunnelNode resources with same names too
|
|
1102
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
1103
|
+
return f'{cluster_name[:-3]}2.trainy.us' # General deployments
|
|
1104
|
+
else:
|
|
1105
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
1106
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
1107
|
+
# deleting/creating TunnelNode resources with same names too
|
|
1108
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
1109
|
+
return f'{cluster_name[:-3]}.trainy.us' # vLLM deployments
|
|
1110
|
+
except Exception:
|
|
1111
|
+
return '<pending>'
|
|
1112
|
+
|
|
1113
|
+
|
|
1114
|
+
def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
|
|
1115
|
+
"""Display status of Konduktor Serve models."""
|
|
1116
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1117
|
+
|
|
1118
|
+
# Build lookup maps (deployment_name -> object)
|
|
1119
|
+
apps_api = kube_client.apps_api(context)
|
|
1120
|
+
core_api = kube_client.core_api(context)
|
|
1121
|
+
|
|
1122
|
+
deployments_map = {}
|
|
1123
|
+
for d in apps_api.list_namespaced_deployment(namespace=namespace).items:
|
|
1124
|
+
name = (d.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
|
|
1125
|
+
if name is not None:
|
|
1126
|
+
deployments_map[name] = d
|
|
1127
|
+
|
|
1128
|
+
services_map = {}
|
|
1129
|
+
for s in core_api.list_namespaced_service(namespace=namespace).items:
|
|
1130
|
+
name = (s.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
|
|
1131
|
+
if name is not None:
|
|
1132
|
+
services_map[name] = s
|
|
1133
|
+
|
|
1134
|
+
autoscalers_map = build_autoscaler_map(namespace, context or '')
|
|
1135
|
+
|
|
1136
|
+
model_names = list_models(namespace)
|
|
1137
|
+
if not model_names:
|
|
1138
|
+
Console().print(
|
|
1139
|
+
f'[yellow]No deployments found in namespace {namespace}.[/yellow]'
|
|
1140
|
+
)
|
|
1141
|
+
return
|
|
1142
|
+
|
|
1143
|
+
Console().print()
|
|
1144
|
+
title = '[bold]KONDUKTOR SERVE[/bold]'
|
|
1145
|
+
is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
|
|
1146
|
+
|
|
1147
|
+
# Get Aibrix endpoint once for all Aibrix deployments
|
|
1148
|
+
aibrix_endpoint = get_deployment_endpoint(force_direct, 'AIBRIX')
|
|
1149
|
+
# Get General endpoint once for all General deployments
|
|
1150
|
+
general_endpoint = get_deployment_endpoint(force_direct, 'GENERAL')
|
|
1151
|
+
|
|
1152
|
+
table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
|
|
1153
|
+
if all_users:
|
|
1154
|
+
table.add_column('User', style='magenta', no_wrap=True)
|
|
1155
|
+
table.add_column('Name', style='cyan', no_wrap=True)
|
|
1156
|
+
table.add_column('Status', no_wrap=True)
|
|
1157
|
+
table.add_column('Summary', style='bold', no_wrap=True)
|
|
1158
|
+
table.add_column('Endpoint', style='yellow', no_wrap=True)
|
|
1159
|
+
table.add_column('Replicas', style='dim', no_wrap=True)
|
|
1160
|
+
table.add_column('Resources', style='white', no_wrap=True)
|
|
1161
|
+
|
|
1162
|
+
unowned = 0
|
|
1163
|
+
|
|
1164
|
+
for idx, name in enumerate(model_names):
|
|
1165
|
+
deployment = deployments_map.get(name)
|
|
1166
|
+
service = services_map.get(name)
|
|
1167
|
+
autoscaler = autoscalers_map.get(name)
|
|
1168
|
+
|
|
1169
|
+
# Extract owner
|
|
1170
|
+
owner = None
|
|
1171
|
+
for resource in [deployment, service, autoscaler]:
|
|
1172
|
+
if not resource:
|
|
1173
|
+
continue
|
|
1174
|
+
metadata = (
|
|
1175
|
+
resource.metadata
|
|
1176
|
+
if hasattr(resource, 'metadata')
|
|
1177
|
+
else resource.get('metadata', {})
|
|
1178
|
+
)
|
|
1179
|
+
labels = (
|
|
1180
|
+
metadata.labels
|
|
1181
|
+
if hasattr(metadata, 'labels')
|
|
1182
|
+
else metadata.get('labels', {})
|
|
1183
|
+
)
|
|
1184
|
+
if labels:
|
|
1185
|
+
owner = labels.get('trainy.ai/username')
|
|
1186
|
+
if owner:
|
|
1187
|
+
break
|
|
1188
|
+
|
|
1189
|
+
if not all_users and owner != common_utils.get_cleaned_username():
|
|
1190
|
+
unowned += 1
|
|
1191
|
+
continue
|
|
1192
|
+
|
|
1193
|
+
# Status
|
|
1194
|
+
status = get_model_status(name, deployments_map, services_map, autoscalers_map)
|
|
1195
|
+
states = [status['deployment'], status['service'], status['autoscaler']]
|
|
1196
|
+
|
|
1197
|
+
def emoji_line(label: str, state: str) -> str:
|
|
1198
|
+
emoji_map = {
|
|
1199
|
+
'ready': '✅',
|
|
1200
|
+
'pending': '❓',
|
|
1201
|
+
'missing': '❌',
|
|
1202
|
+
}
|
|
1203
|
+
return f"{label}: {emoji_map.get(state, '❓')}"
|
|
1204
|
+
|
|
1205
|
+
# Check if this is a general deployment (not vLLM/Aibrix)
|
|
1206
|
+
is_general = True
|
|
1207
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1208
|
+
labels = deployment.metadata.labels or {}
|
|
1209
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
1210
|
+
is_general = False
|
|
1211
|
+
|
|
1212
|
+
summary_lines = [
|
|
1213
|
+
emoji_line('Deploym', status['deployment'] or 'missing'),
|
|
1214
|
+
emoji_line('Service', status['service'] or 'missing'),
|
|
1215
|
+
]
|
|
1216
|
+
|
|
1217
|
+
if is_general:
|
|
1218
|
+
# Autoscaler for General: HPA only
|
|
1219
|
+
hpa_ready = get_autoscaler_status_for_deployment(
|
|
1220
|
+
name, autoscalers_map, is_general=True
|
|
1221
|
+
)
|
|
1222
|
+
if name in autoscalers_map:
|
|
1223
|
+
summary_lines.append(f"AScaler: {'✅' if hpa_ready else '❓'}")
|
|
1224
|
+
else:
|
|
1225
|
+
# Autoscaler for vLLM: only KPA (APA no longer used)
|
|
1226
|
+
if name in autoscalers_map:
|
|
1227
|
+
kpa_ready = get_autoscaler_status_for_deployment(
|
|
1228
|
+
name, autoscalers_map, is_general=False
|
|
1229
|
+
)
|
|
1230
|
+
if 'kpa' in autoscalers_map.get(name, {}):
|
|
1231
|
+
summary_lines.append(f"AScaler: {'✅' if kpa_ready else '❓'}")
|
|
1232
|
+
summary = '\n'.join(summary_lines)
|
|
1233
|
+
|
|
1234
|
+
# Overall status
|
|
1235
|
+
if any(s == 'missing' for s in states):
|
|
1236
|
+
status_text = Text('FAILED', style='red')
|
|
1237
|
+
else:
|
|
1238
|
+
if status['autoscaler'] is not None:
|
|
1239
|
+
status_text = (
|
|
1240
|
+
Text('READY', style='green')
|
|
1241
|
+
if all(s == 'ready' for s in states)
|
|
1242
|
+
else Text('PENDING', style='yellow')
|
|
1243
|
+
)
|
|
1244
|
+
else:
|
|
1245
|
+
status_text = (
|
|
1246
|
+
Text('READY', style='green')
|
|
1247
|
+
if (
|
|
1248
|
+
status['deployment'] == 'ready' and status['service'] == 'ready'
|
|
1249
|
+
)
|
|
1250
|
+
else Text('PENDING', style='yellow')
|
|
1251
|
+
)
|
|
1252
|
+
|
|
1253
|
+
# Extract labels from deployment, service, or fallback to empty dict
|
|
1254
|
+
labels = {}
|
|
1255
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1256
|
+
labels = deployment.metadata.labels or {}
|
|
1257
|
+
elif service and hasattr(service.metadata, 'labels'):
|
|
1258
|
+
labels = service.metadata.labels or {}
|
|
1259
|
+
else:
|
|
1260
|
+
labels = {}
|
|
1261
|
+
|
|
1262
|
+
endpoint_str = '<pending>'
|
|
1263
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
1264
|
+
# Aibrix deployment
|
|
1265
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
1266
|
+
if force_direct or endpoint_type == 'direct':
|
|
1267
|
+
# Direct access: use http for IP endpoints
|
|
1268
|
+
endpoint_str = (
|
|
1269
|
+
f'http://{aibrix_endpoint}'
|
|
1270
|
+
if aibrix_endpoint != '<pending>'
|
|
1271
|
+
else aibrix_endpoint
|
|
1272
|
+
)
|
|
1273
|
+
else:
|
|
1274
|
+
# Apoxy access: use https for trainy.us endpoints
|
|
1275
|
+
endpoint_str = (
|
|
1276
|
+
f'https://{aibrix_endpoint}'
|
|
1277
|
+
if aibrix_endpoint != '<pending>'
|
|
1278
|
+
else aibrix_endpoint
|
|
1279
|
+
)
|
|
1280
|
+
else:
|
|
1281
|
+
# General deployment
|
|
1282
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
1283
|
+
if force_direct or endpoint_type == 'direct':
|
|
1284
|
+
# Direct access: IP + Host header
|
|
1285
|
+
endpoint_str = f'http://{general_endpoint}\nHost: {name}'
|
|
1286
|
+
else:
|
|
1287
|
+
# Apoxy access: single host + path
|
|
1288
|
+
endpoint_str = f'https://{general_endpoint}/{name}'
|
|
1289
|
+
|
|
1290
|
+
# Replicas
|
|
1291
|
+
if deployment:
|
|
1292
|
+
ready_replicas = str(deployment.status.ready_replicas or 0)
|
|
1293
|
+
desired_replicas = str(deployment.spec.replicas or 0)
|
|
1294
|
+
else:
|
|
1295
|
+
ready_replicas = '?'
|
|
1296
|
+
desired_replicas = '?'
|
|
1297
|
+
|
|
1298
|
+
replicas_text = Text()
|
|
1299
|
+
replicas_text.append(
|
|
1300
|
+
f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
|
|
1301
|
+
)
|
|
1302
|
+
|
|
1303
|
+
if status['autoscaler']:
|
|
1304
|
+
# Get min/max from deployment labels
|
|
1305
|
+
min_r, max_r = '?', '?'
|
|
1306
|
+
|
|
1307
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1308
|
+
labels = deployment.metadata.labels or {}
|
|
1309
|
+
# All deployments with autoscaling get these labels from the template
|
|
1310
|
+
original_min_str = labels.get('trainy.ai/original-min-replicas')
|
|
1311
|
+
original_max_str = labels.get('trainy.ai/original-max-replicas')
|
|
1312
|
+
if original_min_str and original_max_str:
|
|
1313
|
+
min_r, max_r = original_min_str, original_max_str
|
|
1314
|
+
logger.debug(
|
|
1315
|
+
f'[DEBUG] Got replicas from deployment labels: '
|
|
1316
|
+
f'min={min_r}, max={max_r}'
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
replicas_text.append(f'Min : {min_r}\n', style='bold white')
|
|
1320
|
+
replicas_text.append(f'Max : {max_r}', style='bold white')
|
|
1321
|
+
|
|
1322
|
+
# Resources
|
|
1323
|
+
resources_text = _get_resource_summary(deployment)
|
|
1324
|
+
|
|
1325
|
+
# Row
|
|
1326
|
+
if all_users:
|
|
1327
|
+
table.add_row(
|
|
1328
|
+
owner or '(unknown)',
|
|
1329
|
+
name,
|
|
1330
|
+
status_text,
|
|
1331
|
+
summary,
|
|
1332
|
+
endpoint_str,
|
|
1333
|
+
replicas_text,
|
|
1334
|
+
resources_text,
|
|
1335
|
+
)
|
|
1336
|
+
else:
|
|
1337
|
+
table.add_row(
|
|
1338
|
+
name, status_text, summary, endpoint_str, replicas_text, resources_text
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
if idx != len(model_names) - 1:
|
|
1342
|
+
table.add_row(*([''] * len(table.columns)))
|
|
1343
|
+
|
|
1344
|
+
if len(model_names) == unowned:
|
|
1345
|
+
Console().print(
|
|
1346
|
+
f'[yellow]No deployments created by you found '
|
|
1347
|
+
f'in namespace {namespace}. Try --all-users.[/yellow]'
|
|
1348
|
+
)
|
|
1349
|
+
return
|
|
1350
|
+
|
|
1351
|
+
Console().print(table)
|