konduktor-nightly 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- konduktor/__init__.py +4 -7
- konduktor/backends/__init__.py +2 -4
- konduktor/backends/constants.py +12 -0
- konduktor/backends/deployment.py +179 -0
- konduktor/backends/deployment_utils.py +835 -0
- konduktor/backends/jobset.py +2 -2
- konduktor/backends/jobset_utils.py +16 -266
- konduktor/backends/pod_utils.py +392 -0
- konduktor/cli.py +343 -8
- konduktor/controller/launch.py +1 -1
- konduktor/execution.py +5 -2
- konduktor/kube_client.py +8 -0
- konduktor/resource.py +20 -0
- konduktor/serving.py +149 -0
- konduktor/task.py +61 -0
- konduktor/templates/deployment.yaml.j2 +142 -0
- konduktor/templates/pod.yaml.j2 +36 -0
- konduktor/utils/accelerator_registry.py +1 -1
- konduktor/utils/log_utils.py +1 -1
- konduktor/utils/schemas.py +42 -0
- konduktor/utils/validator.py +51 -16
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/RECORD +26 -21
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,835 @@
|
|
|
1
|
+
"""Deployment utils: wraps CRUD operations for deployments"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
import typing
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import colorama
|
|
10
|
+
from kubernetes.client.exceptions import ApiException
|
|
11
|
+
from rich import box
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
from rich.text import Text
|
|
15
|
+
|
|
16
|
+
import konduktor
|
|
17
|
+
from konduktor import kube_client, logging
|
|
18
|
+
from konduktor.backends import constants as backend_constants
|
|
19
|
+
from konduktor.backends import pod_utils
|
|
20
|
+
from konduktor.utils import (
|
|
21
|
+
common_utils,
|
|
22
|
+
kubernetes_utils,
|
|
23
|
+
validator,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if typing.TYPE_CHECKING:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
logger = logging.get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
# Use shared constants from konduktor.backends.constants
|
|
32
|
+
DEPLOYMENT_NAME_LABEL = backend_constants.DEPLOYMENT_NAME_LABEL
|
|
33
|
+
DEPLOYMENT_USERID_LABEL = backend_constants.USERID_LABEL
|
|
34
|
+
DEPLOYMENT_USER_LABEL = backend_constants.USER_LABEL
|
|
35
|
+
DEPLOYMENT_ACCELERATOR_LABEL = backend_constants.ACCELERATOR_LABEL
|
|
36
|
+
DEPLOYMENT_NUM_ACCELERATORS_LABEL = backend_constants.NUM_ACCELERATORS_LABEL
|
|
37
|
+
AIBRIX_NAME_LABEL = backend_constants.AIBRIX_NAME_LABEL
|
|
38
|
+
|
|
39
|
+
SECRET_BASENAME_LABEL = backend_constants.SECRET_BASENAME_LABEL
|
|
40
|
+
|
|
41
|
+
_DEPLOYMENT_METADATA_LABELS = {
|
|
42
|
+
'deployment_name_label': DEPLOYMENT_NAME_LABEL,
|
|
43
|
+
'deployment_userid_label': DEPLOYMENT_USERID_LABEL,
|
|
44
|
+
'deployment_user_label': DEPLOYMENT_USER_LABEL,
|
|
45
|
+
'deployment_accelerator_label': DEPLOYMENT_ACCELERATOR_LABEL,
|
|
46
|
+
'deployment_num_accelerators_label': DEPLOYMENT_NUM_ACCELERATORS_LABEL,
|
|
47
|
+
'model_name_label': AIBRIX_NAME_LABEL,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
_RUN_DURATION_ANNOTATION_KEY = 'kueue.x-k8s.io/maxRunDurationSeconds'
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def render_specs(
|
|
54
|
+
task: 'konduktor.Task',
|
|
55
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
|
|
56
|
+
general = True
|
|
57
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
58
|
+
general = False
|
|
59
|
+
|
|
60
|
+
# Calculate accelerator info for template
|
|
61
|
+
assert task.resources is not None
|
|
62
|
+
accelerator_type = task.resources.get_accelerator_type() or 'None'
|
|
63
|
+
# For Deployments: GPUs per pod (not total across replicas)
|
|
64
|
+
num_accelerators = task.resources.get_accelerator_count() or 0
|
|
65
|
+
|
|
66
|
+
if task.run:
|
|
67
|
+
task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
|
|
68
|
+
with tempfile.NamedTemporaryFile() as temp:
|
|
69
|
+
common_utils.fill_template(
|
|
70
|
+
'deployment.yaml.j2',
|
|
71
|
+
{
|
|
72
|
+
'name': task.name,
|
|
73
|
+
'user': common_utils.get_cleaned_username(),
|
|
74
|
+
'accelerator_type': accelerator_type,
|
|
75
|
+
'num_accelerators': str(num_accelerators),
|
|
76
|
+
'min_replicas': task.serving.min_replicas if task.serving else 1,
|
|
77
|
+
'max_replicas': task.serving.max_replicas if task.serving else 1,
|
|
78
|
+
'ports': task.serving.ports if task.serving else 8000,
|
|
79
|
+
'autoscaler': (
|
|
80
|
+
'true'
|
|
81
|
+
if (
|
|
82
|
+
task.serving
|
|
83
|
+
and task.serving.min_replicas != task.serving.max_replicas
|
|
84
|
+
)
|
|
85
|
+
else 'false'
|
|
86
|
+
),
|
|
87
|
+
'general': general,
|
|
88
|
+
**_DEPLOYMENT_METADATA_LABELS,
|
|
89
|
+
},
|
|
90
|
+
temp.name,
|
|
91
|
+
)
|
|
92
|
+
docs = common_utils.read_yaml_all(temp.name)
|
|
93
|
+
|
|
94
|
+
deployment_spec = None
|
|
95
|
+
service_spec = None
|
|
96
|
+
autoscaler_spec = None
|
|
97
|
+
|
|
98
|
+
for doc in docs:
|
|
99
|
+
kind = doc.get('kind')
|
|
100
|
+
if kind == 'Deployment':
|
|
101
|
+
deployment_spec = doc
|
|
102
|
+
elif kind == 'Service':
|
|
103
|
+
service_spec = doc
|
|
104
|
+
elif kind == 'PodAutoscaler' or kind == 'HorizontalPodAutoscaler':
|
|
105
|
+
autoscaler_spec = doc
|
|
106
|
+
|
|
107
|
+
# not every deployment + service will have podautoscaler
|
|
108
|
+
if task.serving and task.serving.min_replicas == task.serving.max_replicas:
|
|
109
|
+
autoscaler_spec = None
|
|
110
|
+
|
|
111
|
+
if deployment_spec is None:
|
|
112
|
+
raise ValueError('Deployment manifest not found.')
|
|
113
|
+
if service_spec is None:
|
|
114
|
+
raise ValueError('Service manifest not found.')
|
|
115
|
+
|
|
116
|
+
# Validate specs before returning
|
|
117
|
+
try:
|
|
118
|
+
validator.validate_deployment_spec(deployment_spec)
|
|
119
|
+
validator.validate_service_spec(service_spec)
|
|
120
|
+
# Only validate HPA if it exists (APA doesn't have official schema)
|
|
121
|
+
if autoscaler_spec and autoscaler_spec.get('kind') == 'HorizontalPodAutoscaler':
|
|
122
|
+
validator.validate_horizontalpodautoscaler_spec(autoscaler_spec)
|
|
123
|
+
except ValueError as e:
|
|
124
|
+
raise ValueError(f'Spec validation failed: {e}')
|
|
125
|
+
|
|
126
|
+
return deployment_spec, service_spec, autoscaler_spec or {}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def create_deployment(
|
|
130
|
+
namespace: str,
|
|
131
|
+
task: 'konduktor.Task',
|
|
132
|
+
pod_spec: Dict[str, Any],
|
|
133
|
+
dryrun: bool = False,
|
|
134
|
+
) -> Optional[Dict[str, Any]]:
|
|
135
|
+
"""Creates a Kubernetes Deployment based on the task and pod spec."""
|
|
136
|
+
|
|
137
|
+
assert task.resources is not None, 'Task resources are undefined'
|
|
138
|
+
|
|
139
|
+
deployment_spec, _, _ = render_specs(task)
|
|
140
|
+
|
|
141
|
+
# Inject deployment-specific pod metadata
|
|
142
|
+
pod_utils.inject_deployment_pod_metadata(pod_spec, task)
|
|
143
|
+
|
|
144
|
+
# Inject pod spec directly (like jobset logic)
|
|
145
|
+
pod_utils.merge_pod_into_deployment_template(deployment_spec['spec'], pod_spec)
|
|
146
|
+
|
|
147
|
+
if dryrun:
|
|
148
|
+
logger.debug(f'[DRYRUN] Would create deployment:\n{deployment_spec}')
|
|
149
|
+
return deployment_spec
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
153
|
+
apps_api = kube_client.apps_api(context=context)
|
|
154
|
+
deployment = apps_api.create_namespaced_deployment(
|
|
155
|
+
namespace=namespace,
|
|
156
|
+
body=deployment_spec,
|
|
157
|
+
)
|
|
158
|
+
logger.info(
|
|
159
|
+
f'Deployment {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
160
|
+
f'{task.name}{colorama.Style.RESET_ALL} created'
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return deployment
|
|
164
|
+
except kube_client.api_exception() as err:
|
|
165
|
+
try:
|
|
166
|
+
error_body = json.loads(err.body)
|
|
167
|
+
error_message = error_body.get('message', '')
|
|
168
|
+
logger.error(f'Error creating deployment: {error_message}')
|
|
169
|
+
except json.JSONDecodeError:
|
|
170
|
+
logger.error(f'Error creating deployment: {err.body}')
|
|
171
|
+
raise err
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def create_service(
|
|
175
|
+
namespace: str,
|
|
176
|
+
task: 'konduktor.Task',
|
|
177
|
+
dryrun: bool = False,
|
|
178
|
+
) -> Optional[Dict[str, Any]]:
|
|
179
|
+
"""Creates a Kubernetes Service based on the task and pod spec."""
|
|
180
|
+
|
|
181
|
+
assert task.resources is not None, 'Task resources are undefined'
|
|
182
|
+
|
|
183
|
+
_, service_spec, _ = render_specs(task)
|
|
184
|
+
|
|
185
|
+
if dryrun:
|
|
186
|
+
logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
|
|
187
|
+
return service_spec
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
191
|
+
core_api = kube_client.core_api(context=context)
|
|
192
|
+
service = core_api.create_namespaced_service(
|
|
193
|
+
namespace=namespace,
|
|
194
|
+
body=service_spec,
|
|
195
|
+
)
|
|
196
|
+
logger.info(
|
|
197
|
+
f'Service {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
198
|
+
f'{task.name}{colorama.Style.RESET_ALL} created'
|
|
199
|
+
)
|
|
200
|
+
return service
|
|
201
|
+
except kube_client.api_exception() as err:
|
|
202
|
+
try:
|
|
203
|
+
error_body = json.loads(err.body)
|
|
204
|
+
error_message = error_body.get('message', '')
|
|
205
|
+
logger.error(f'Error creating service: {error_message}')
|
|
206
|
+
except json.JSONDecodeError:
|
|
207
|
+
logger.error(f'Error creating service: {err.body}')
|
|
208
|
+
raise err
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def create_autoscaler(namespace: str, task: 'konduktor.Task', dryrun: bool = False):
|
|
212
|
+
_, _, autoscaler_spec = render_specs(task)
|
|
213
|
+
|
|
214
|
+
if not autoscaler_spec:
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# Decide if it's APA or HPA by looking at autoscaler_spec["kind"]
|
|
218
|
+
kind = autoscaler_spec.get('kind')
|
|
219
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
220
|
+
|
|
221
|
+
if dryrun:
|
|
222
|
+
logger.debug(f'[DRYRUN] Would create {kind}:\n{autoscaler_spec}')
|
|
223
|
+
return autoscaler_spec
|
|
224
|
+
|
|
225
|
+
if kind == 'PodAutoscaler':
|
|
226
|
+
custom_api = kube_client.crd_api(context=context)
|
|
227
|
+
return custom_api.create_namespaced_custom_object(
|
|
228
|
+
group='autoscaling.aibrix.ai',
|
|
229
|
+
version='v1alpha1',
|
|
230
|
+
namespace=namespace,
|
|
231
|
+
plural='podautoscalers',
|
|
232
|
+
body=autoscaler_spec,
|
|
233
|
+
)
|
|
234
|
+
elif kind == 'HorizontalPodAutoscaler':
|
|
235
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
236
|
+
return autoscaling_api.create_namespaced_horizontal_pod_autoscaler(
|
|
237
|
+
namespace=namespace,
|
|
238
|
+
body=autoscaler_spec,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def list_models(namespace: str) -> List[str]:
|
|
243
|
+
"""
|
|
244
|
+
Returns a list of unique model names in the namespace,
|
|
245
|
+
based on label DEPLOYMENT_NAME_LABEL=`trainy.ai/deployment-name`.
|
|
246
|
+
"""
|
|
247
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
248
|
+
apps = kube_client.apps_api(context)
|
|
249
|
+
core = kube_client.core_api(context)
|
|
250
|
+
crds = kube_client.crd_client(context)
|
|
251
|
+
|
|
252
|
+
label_selector = DEPLOYMENT_NAME_LABEL
|
|
253
|
+
model_names: set[str] = set()
|
|
254
|
+
|
|
255
|
+
# --- Deployments ---
|
|
256
|
+
for deploy in apps.list_namespaced_deployment(
|
|
257
|
+
namespace, label_selector=label_selector
|
|
258
|
+
).items:
|
|
259
|
+
labels = getattr(deploy.metadata, 'labels', {}) or {}
|
|
260
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
261
|
+
if name:
|
|
262
|
+
model_names.add(name)
|
|
263
|
+
|
|
264
|
+
# --- Services ---
|
|
265
|
+
for svc in core.list_namespaced_service(
|
|
266
|
+
namespace, label_selector=label_selector
|
|
267
|
+
).items:
|
|
268
|
+
labels = getattr(svc.metadata, 'labels', {}) or {}
|
|
269
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
270
|
+
if name:
|
|
271
|
+
model_names.add(name)
|
|
272
|
+
|
|
273
|
+
# --- PodAutoscalers ---
|
|
274
|
+
# APA
|
|
275
|
+
try:
|
|
276
|
+
apa_list = crds.list_namespaced_custom_object(
|
|
277
|
+
group='autoscaling.aibrix.ai',
|
|
278
|
+
version='v1alpha1',
|
|
279
|
+
namespace=namespace,
|
|
280
|
+
plural='podautoscalers',
|
|
281
|
+
)
|
|
282
|
+
for apa in apa_list.get('items', []):
|
|
283
|
+
labels = apa.get('metadata', {}).get('labels', {}) or {}
|
|
284
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
285
|
+
if name:
|
|
286
|
+
model_names.add(name)
|
|
287
|
+
except ApiException as e:
|
|
288
|
+
if e.status != 404:
|
|
289
|
+
# re-raise if it's not just missing CRD
|
|
290
|
+
raise
|
|
291
|
+
# otherwise ignore, cluster just doesn't have Aibrix CRDs
|
|
292
|
+
logger.warning('Skipping APA lookup. Aibrix CRDs not found in cluster')
|
|
293
|
+
|
|
294
|
+
# HPA
|
|
295
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
296
|
+
hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
|
|
297
|
+
namespace=namespace
|
|
298
|
+
)
|
|
299
|
+
for hpa in hpa_list.items:
|
|
300
|
+
labels = getattr(hpa.metadata, 'labels', {}) or {}
|
|
301
|
+
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
302
|
+
if name:
|
|
303
|
+
model_names.add(name)
|
|
304
|
+
|
|
305
|
+
return sorted(model_names)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def is_autoscaler_ready(autoscaler_obj: dict) -> bool:
|
|
309
|
+
"""
|
|
310
|
+
Returns True if the autoscaler (PodAutoscaler or HPA) is considered healthy.
|
|
311
|
+
For PodAutoscaler: AbleToScale == True.
|
|
312
|
+
For HPA: AbleToScale == True, or presence of the HPA is enough if no conditions.
|
|
313
|
+
"""
|
|
314
|
+
try:
|
|
315
|
+
if hasattr(autoscaler_obj, 'to_dict'):
|
|
316
|
+
autoscaler_obj = autoscaler_obj.to_dict()
|
|
317
|
+
conditions = autoscaler_obj.get('status', {}).get('conditions', []) or []
|
|
318
|
+
|
|
319
|
+
# If conditions exist, look for AbleToScale == True
|
|
320
|
+
for cond in conditions:
|
|
321
|
+
cond_type = cond.get('type')
|
|
322
|
+
cond_status = cond.get('status')
|
|
323
|
+
if cond_type == 'AbleToScale' and cond_status == 'True':
|
|
324
|
+
return True
|
|
325
|
+
|
|
326
|
+
# If no conditions are present (common for HPAs), assume
|
|
327
|
+
# it's fine as soon as object exists
|
|
328
|
+
if not conditions:
|
|
329
|
+
return True
|
|
330
|
+
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.warning(f'Error checking autoscaler readiness: {e}')
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
|
|
337
|
+
"""Fetch all APAs and HPAs and combine into 1 dict keyed by deployment name."""
|
|
338
|
+
autoscalers = {}
|
|
339
|
+
|
|
340
|
+
# --- Aibrix APAs ---
|
|
341
|
+
try:
|
|
342
|
+
crd_api = kube_client.crd_api(context=context)
|
|
343
|
+
apa_list = crd_api.list_namespaced_custom_object(
|
|
344
|
+
group='autoscaling.aibrix.ai',
|
|
345
|
+
version='v1alpha1',
|
|
346
|
+
namespace=namespace,
|
|
347
|
+
plural='podautoscalers',
|
|
348
|
+
)
|
|
349
|
+
for apa in apa_list.get('items', []):
|
|
350
|
+
labels = apa.get('metadata', {}).get('labels', {}) or {}
|
|
351
|
+
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
352
|
+
if dep_name:
|
|
353
|
+
autoscalers[dep_name] = apa
|
|
354
|
+
except Exception as e:
|
|
355
|
+
logger.warning(f'Error fetching APAs: {e}')
|
|
356
|
+
|
|
357
|
+
# --- Standard HPAs ---
|
|
358
|
+
try:
|
|
359
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
360
|
+
hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
|
|
361
|
+
namespace=namespace
|
|
362
|
+
)
|
|
363
|
+
for hpa in hpa_list.items:
|
|
364
|
+
labels = getattr(hpa.metadata, 'labels', {}) or {}
|
|
365
|
+
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
366
|
+
if dep_name and dep_name not in autoscalers:
|
|
367
|
+
autoscalers[dep_name] = hpa.to_dict()
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.warning(f'Error fetching HPAs: {e}')
|
|
370
|
+
|
|
371
|
+
return autoscalers
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def get_model_status(
|
|
375
|
+
name: str,
|
|
376
|
+
deployments: dict[str, Any],
|
|
377
|
+
services: dict[str, Any],
|
|
378
|
+
autoscalers: dict[str, dict],
|
|
379
|
+
) -> Dict[str, Optional[str]]:
|
|
380
|
+
"""Check the status of Deployment, Service, and Autoscaler."""
|
|
381
|
+
status = {
|
|
382
|
+
'deployment': 'missing',
|
|
383
|
+
'service': 'missing',
|
|
384
|
+
'autoscaler': None,
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
# --- Deployment ---
|
|
388
|
+
if name in deployments:
|
|
389
|
+
d = deployments[name]
|
|
390
|
+
ready = (d.status.ready_replicas or 0) if d.status else 0
|
|
391
|
+
desired = (d.spec.replicas or 0) if d.spec else 0
|
|
392
|
+
status['deployment'] = 'ready' if ready == desired else 'pending'
|
|
393
|
+
|
|
394
|
+
# --- Service ---
|
|
395
|
+
if name in services:
|
|
396
|
+
s = services[name]
|
|
397
|
+
labels = getattr(s.metadata, 'labels', {}) or {}
|
|
398
|
+
is_vllm = AIBRIX_NAME_LABEL in labels
|
|
399
|
+
|
|
400
|
+
if is_vllm:
|
|
401
|
+
status['service'] = 'ready'
|
|
402
|
+
else:
|
|
403
|
+
lb_ready = False
|
|
404
|
+
if s.status and s.status.load_balancer and s.status.load_balancer.ingress:
|
|
405
|
+
ingress = s.status.load_balancer.ingress
|
|
406
|
+
if ingress and (ingress[0].ip or ingress[0].hostname):
|
|
407
|
+
lb_ready = True
|
|
408
|
+
status['service'] = 'ready' if lb_ready else 'pending'
|
|
409
|
+
|
|
410
|
+
# --- Autoscaler ---
|
|
411
|
+
if name in autoscalers:
|
|
412
|
+
a = autoscalers[name]
|
|
413
|
+
status['autoscaler'] = 'ready' if is_autoscaler_ready(a) else 'pending'
|
|
414
|
+
else:
|
|
415
|
+
status['autoscaler'] = None
|
|
416
|
+
|
|
417
|
+
return status
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def get_deployment(namespace: str, job_name: str) -> Optional[Any]:
|
|
421
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
422
|
+
apps_api = kube_client.apps_api(context=context)
|
|
423
|
+
try:
|
|
424
|
+
return apps_api.read_namespaced_deployment(name=job_name, namespace=namespace)
|
|
425
|
+
except ApiException as e:
|
|
426
|
+
if e.status == 404:
|
|
427
|
+
return None
|
|
428
|
+
raise
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def get_service(namespace: str, job_name: str) -> Optional[Any]:
|
|
432
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
433
|
+
core_api = kube_client.core_api(context=context)
|
|
434
|
+
try:
|
|
435
|
+
return core_api.read_namespaced_service(name=job_name, namespace=namespace)
|
|
436
|
+
except ApiException as e:
|
|
437
|
+
if e.status == 404:
|
|
438
|
+
return None
|
|
439
|
+
raise
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
|
|
443
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
444
|
+
# --- Try Aibrix APA first ---
|
|
445
|
+
crd_api = kube_client.crd_api(context=context)
|
|
446
|
+
try:
|
|
447
|
+
return crd_api.get_namespaced_custom_object(
|
|
448
|
+
group='autoscaling.aibrix.ai',
|
|
449
|
+
version='v1alpha1',
|
|
450
|
+
namespace=namespace,
|
|
451
|
+
plural='podautoscalers',
|
|
452
|
+
name=f'{job_name}-apa',
|
|
453
|
+
)
|
|
454
|
+
except ApiException as e:
|
|
455
|
+
if e.status != 404:
|
|
456
|
+
raise
|
|
457
|
+
# Fall through to check HPA
|
|
458
|
+
|
|
459
|
+
# --- Try built‑in Kubernetes HPA ---
|
|
460
|
+
try:
|
|
461
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
462
|
+
return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
|
|
463
|
+
name=f'{job_name}-hpa', namespace=namespace
|
|
464
|
+
).to_dict()
|
|
465
|
+
except ApiException as e:
|
|
466
|
+
if e.status == 404:
|
|
467
|
+
return None
|
|
468
|
+
raise
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def delete_deployment(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
472
|
+
"""Deletes a Kubernetes Deployment in the given namespace.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
namespace: Namespace where the deployment exists.
|
|
476
|
+
name: Name of the deployment to delete.
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
Response from delete operation, or None on error.
|
|
480
|
+
"""
|
|
481
|
+
try:
|
|
482
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
483
|
+
response = kube_client.apps_api(context=context).delete_namespaced_deployment(
|
|
484
|
+
name=name,
|
|
485
|
+
namespace=namespace,
|
|
486
|
+
)
|
|
487
|
+
return response
|
|
488
|
+
except kube_client.api_exception() as err:
|
|
489
|
+
try:
|
|
490
|
+
error_body = json.loads(err.body)
|
|
491
|
+
error_message = error_body.get('message', '')
|
|
492
|
+
logger.error(f'Error deleting deployment: {error_message}')
|
|
493
|
+
except json.JSONDecodeError:
|
|
494
|
+
error_message = str(err.body)
|
|
495
|
+
logger.error(f'Error deleting deployment: {error_message}')
|
|
496
|
+
else:
|
|
497
|
+
raise err
|
|
498
|
+
return None
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
502
|
+
"""Deletes a Kubernetes Service in the given namespace.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
namespace: Namespace where the service exists.
|
|
506
|
+
name: Name of the service to delete.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Response from delete operation, or None on error.
|
|
510
|
+
"""
|
|
511
|
+
try:
|
|
512
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
513
|
+
response = kube_client.core_api(context=context).delete_namespaced_service(
|
|
514
|
+
name=name,
|
|
515
|
+
namespace=namespace,
|
|
516
|
+
)
|
|
517
|
+
return response
|
|
518
|
+
except kube_client.api_exception() as err:
|
|
519
|
+
try:
|
|
520
|
+
error_body = json.loads(err.body)
|
|
521
|
+
error_message = error_body.get('message', '')
|
|
522
|
+
logger.error(f'Error deleting service: {error_message}')
|
|
523
|
+
except json.JSONDecodeError:
|
|
524
|
+
logger.error(f'Error deleting service: {err.body}')
|
|
525
|
+
raise err
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
530
|
+
"""Deletes either an Aibrix PodAutoscaler or a HorizontalPodAutoscaler."""
|
|
531
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
532
|
+
|
|
533
|
+
# --- Try delete APA first ---
|
|
534
|
+
try:
|
|
535
|
+
custom_api = kube_client.crd_api(context=context)
|
|
536
|
+
response = custom_api.delete_namespaced_custom_object(
|
|
537
|
+
group='autoscaling.aibrix.ai',
|
|
538
|
+
version='v1alpha1',
|
|
539
|
+
namespace=namespace,
|
|
540
|
+
plural='podautoscalers',
|
|
541
|
+
name=f'{name}-apa',
|
|
542
|
+
)
|
|
543
|
+
return response
|
|
544
|
+
except kube_client.api_exception() as err:
|
|
545
|
+
# If not found, try HPA
|
|
546
|
+
try:
|
|
547
|
+
error_body = json.loads(err.body)
|
|
548
|
+
if err.status != 404:
|
|
549
|
+
raise
|
|
550
|
+
except Exception:
|
|
551
|
+
if getattr(err, 'status', None) != 404:
|
|
552
|
+
raise
|
|
553
|
+
|
|
554
|
+
# --- Try delete HPA ---
|
|
555
|
+
try:
|
|
556
|
+
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
557
|
+
return autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
|
|
558
|
+
name=f'{name}-hpa',
|
|
559
|
+
namespace=namespace,
|
|
560
|
+
)
|
|
561
|
+
except kube_client.api_exception() as err:
|
|
562
|
+
try:
|
|
563
|
+
error_body = json.loads(err.body)
|
|
564
|
+
error_message = error_body.get('message', '')
|
|
565
|
+
logger.error(f'Error deleting Pod Autoscaler: {error_message}')
|
|
566
|
+
except json.JSONDecodeError:
|
|
567
|
+
logger.error(f'Error deleting Pod Autoscaler: {err.body}')
|
|
568
|
+
raise err
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def delete_serving_specs(name: str, namespace: str) -> None:
|
|
572
|
+
for kind, delete_fn in [
|
|
573
|
+
('deployment', delete_deployment),
|
|
574
|
+
('service', delete_service),
|
|
575
|
+
('podautoscaler', delete_autoscaler),
|
|
576
|
+
]:
|
|
577
|
+
try:
|
|
578
|
+
delete_fn(namespace, name)
|
|
579
|
+
logger.info(f'Deleted {kind}: {name}')
|
|
580
|
+
except Exception as e:
|
|
581
|
+
logger.error(f'Failed to delete {kind} {name}: {e}')
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _get_resource_summary(deployment) -> str:
|
|
585
|
+
"""Extract and format pod resource information from a deployment.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
deployment: Kubernetes deployment object
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
Formatted string with resource information (GPU, CPU, memory)
|
|
592
|
+
"""
|
|
593
|
+
if not deployment:
|
|
594
|
+
return '?'
|
|
595
|
+
|
|
596
|
+
try:
|
|
597
|
+
containers = deployment.spec.template.spec.containers
|
|
598
|
+
if not containers:
|
|
599
|
+
return '?'
|
|
600
|
+
container = containers[0]
|
|
601
|
+
res = container.resources.requests or {}
|
|
602
|
+
|
|
603
|
+
cpu = res.get('cpu', '?')
|
|
604
|
+
mem = res.get('memory', '?')
|
|
605
|
+
gpu = res.get('nvidia.com/gpu') or res.get('trainy.ai/gpu')
|
|
606
|
+
|
|
607
|
+
# Try to extract GPU type from deployment labels
|
|
608
|
+
labels = deployment.metadata.labels or {}
|
|
609
|
+
accelerator_type = labels.get('trainy.ai/accelerator', 'L4O')
|
|
610
|
+
|
|
611
|
+
gpu_str = f'{accelerator_type}:{gpu}' if gpu else 'None'
|
|
612
|
+
return f'{gpu_str}\n{cpu} CPU\n{mem}'
|
|
613
|
+
except Exception:
|
|
614
|
+
return '?'
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def get_envoy_external_ip() -> Optional[str]:
|
|
618
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
619
|
+
core_api = kube_client.core_api(context=context)
|
|
620
|
+
try:
|
|
621
|
+
services = core_api.list_namespaced_service(namespace='envoy-gateway-system')
|
|
622
|
+
for svc in services.items:
|
|
623
|
+
if svc.spec.type == 'LoadBalancer' and 'envoy' in svc.metadata.name:
|
|
624
|
+
ingress = svc.status.load_balancer.ingress
|
|
625
|
+
if ingress:
|
|
626
|
+
return ingress[0].ip or ingress[0].hostname
|
|
627
|
+
except Exception:
|
|
628
|
+
pass
|
|
629
|
+
return None
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def show_status_table(namespace: str, all_users: bool):
|
|
633
|
+
"""Display status of Konduktor Serve models."""
|
|
634
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
635
|
+
|
|
636
|
+
# Build lookup maps (deployment_name -> object)
|
|
637
|
+
apps_api = kube_client.apps_api(context)
|
|
638
|
+
core_api = kube_client.core_api(context)
|
|
639
|
+
|
|
640
|
+
deployments_map = {}
|
|
641
|
+
for d in apps_api.list_namespaced_deployment(namespace=namespace).items:
|
|
642
|
+
name = (d.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
|
|
643
|
+
if name is not None:
|
|
644
|
+
deployments_map[name] = d
|
|
645
|
+
|
|
646
|
+
services_map = {}
|
|
647
|
+
for s in core_api.list_namespaced_service(namespace=namespace).items:
|
|
648
|
+
name = (s.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
|
|
649
|
+
if name is not None:
|
|
650
|
+
services_map[name] = s
|
|
651
|
+
|
|
652
|
+
autoscalers_map = build_autoscaler_map(namespace, context or '')
|
|
653
|
+
|
|
654
|
+
model_names = list_models(namespace)
|
|
655
|
+
if not model_names:
|
|
656
|
+
Console().print(
|
|
657
|
+
f'[yellow]No deployments found in namespace {namespace}.[/yellow]'
|
|
658
|
+
)
|
|
659
|
+
return
|
|
660
|
+
|
|
661
|
+
Console().print()
|
|
662
|
+
external_ip = get_envoy_external_ip()
|
|
663
|
+
title = '[bold]KONDUKTOR SERVE[/bold]'
|
|
664
|
+
is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
|
|
665
|
+
|
|
666
|
+
table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
|
|
667
|
+
if all_users:
|
|
668
|
+
table.add_column('User', style='magenta', no_wrap=True)
|
|
669
|
+
table.add_column('Name', style='cyan', no_wrap=True)
|
|
670
|
+
table.add_column('Status', no_wrap=True)
|
|
671
|
+
table.add_column('Summary', style='bold', no_wrap=True)
|
|
672
|
+
table.add_column('Endpoint', style='yellow', no_wrap=True)
|
|
673
|
+
table.add_column('Replicas', style='dim', no_wrap=True)
|
|
674
|
+
table.add_column('Resources', style='white', no_wrap=True)
|
|
675
|
+
|
|
676
|
+
unowned = 0
|
|
677
|
+
|
|
678
|
+
for idx, name in enumerate(model_names):
|
|
679
|
+
deployment = deployments_map.get(name)
|
|
680
|
+
service = services_map.get(name)
|
|
681
|
+
autoscaler = autoscalers_map.get(name)
|
|
682
|
+
|
|
683
|
+
# Extract owner
|
|
684
|
+
owner = None
|
|
685
|
+
for resource in [deployment, service, autoscaler]:
|
|
686
|
+
if not resource:
|
|
687
|
+
continue
|
|
688
|
+
metadata = (
|
|
689
|
+
resource.metadata
|
|
690
|
+
if hasattr(resource, 'metadata')
|
|
691
|
+
else resource.get('metadata', {})
|
|
692
|
+
)
|
|
693
|
+
labels = (
|
|
694
|
+
metadata.labels
|
|
695
|
+
if hasattr(metadata, 'labels')
|
|
696
|
+
else metadata.get('labels', {})
|
|
697
|
+
)
|
|
698
|
+
if labels:
|
|
699
|
+
owner = labels.get('trainy.ai/username')
|
|
700
|
+
if owner:
|
|
701
|
+
break
|
|
702
|
+
|
|
703
|
+
if not all_users and owner != common_utils.get_cleaned_username():
|
|
704
|
+
unowned += 1
|
|
705
|
+
continue
|
|
706
|
+
|
|
707
|
+
# Status
|
|
708
|
+
status = get_model_status(name, deployments_map, services_map, autoscalers_map)
|
|
709
|
+
states = [status['deployment'], status['service'], status['autoscaler']]
|
|
710
|
+
|
|
711
|
+
def emoji_line(label: str, state: str) -> str:
|
|
712
|
+
emoji_map = {
|
|
713
|
+
'ready': '✅',
|
|
714
|
+
'pending': '❓',
|
|
715
|
+
'missing': '❌',
|
|
716
|
+
}
|
|
717
|
+
return f"{label}: {emoji_map.get(state, '❓')}"
|
|
718
|
+
|
|
719
|
+
summary_lines = [
|
|
720
|
+
emoji_line('Deploym', status['deployment'] or 'missing'),
|
|
721
|
+
emoji_line('Service', status['service'] or 'missing'),
|
|
722
|
+
]
|
|
723
|
+
if status['autoscaler'] is not None:
|
|
724
|
+
summary_lines.append(
|
|
725
|
+
emoji_line('AScaler', status['autoscaler'] or 'missing')
|
|
726
|
+
)
|
|
727
|
+
summary = '\n'.join(summary_lines)
|
|
728
|
+
|
|
729
|
+
# Overall status
|
|
730
|
+
if any(s == 'missing' for s in states):
|
|
731
|
+
status_text = Text('FAILED', style='red')
|
|
732
|
+
else:
|
|
733
|
+
if status['autoscaler'] is not None:
|
|
734
|
+
status_text = (
|
|
735
|
+
Text('READY', style='green')
|
|
736
|
+
if all(s == 'ready' for s in states)
|
|
737
|
+
else Text('PENDING', style='yellow')
|
|
738
|
+
)
|
|
739
|
+
else:
|
|
740
|
+
status_text = (
|
|
741
|
+
Text('READY', style='green')
|
|
742
|
+
if (
|
|
743
|
+
status['deployment'] == 'ready' and status['service'] == 'ready'
|
|
744
|
+
)
|
|
745
|
+
else Text('PENDING', style='yellow')
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
# Type & endpoint
|
|
749
|
+
ip_str = '<pending>'
|
|
750
|
+
labels = (
|
|
751
|
+
(
|
|
752
|
+
deployment.metadata.labels
|
|
753
|
+
if deployment and hasattr(deployment.metadata, 'labels')
|
|
754
|
+
else {}
|
|
755
|
+
)
|
|
756
|
+
or (
|
|
757
|
+
service.metadata.labels
|
|
758
|
+
if service and hasattr(service.metadata, 'labels')
|
|
759
|
+
else {}
|
|
760
|
+
)
|
|
761
|
+
or {}
|
|
762
|
+
)
|
|
763
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
764
|
+
ip_str = external_ip or '<pending>'
|
|
765
|
+
else:
|
|
766
|
+
if (
|
|
767
|
+
service
|
|
768
|
+
and service.status
|
|
769
|
+
and service.status.load_balancer
|
|
770
|
+
and service.status.load_balancer.ingress
|
|
771
|
+
):
|
|
772
|
+
ing = service.status.load_balancer.ingress[0]
|
|
773
|
+
ip_str = ing.ip or ing.hostname or '<pending>'
|
|
774
|
+
|
|
775
|
+
# Port
|
|
776
|
+
port_str = ''
|
|
777
|
+
if service and service.spec and service.spec.ports:
|
|
778
|
+
port_obj = (
|
|
779
|
+
next((p for p in service.spec.ports if p.name == 'serve'), None)
|
|
780
|
+
or service.spec.ports[0]
|
|
781
|
+
)
|
|
782
|
+
if port_obj and port_obj.port:
|
|
783
|
+
port_str = str(port_obj.port)
|
|
784
|
+
endpoint_str = f'{ip_str}:{port_str}' if port_str else ip_str
|
|
785
|
+
|
|
786
|
+
# Replicas
|
|
787
|
+
ready_replicas = (
|
|
788
|
+
str(deployment.status.ready_replicas or 0) if deployment else '?'
|
|
789
|
+
)
|
|
790
|
+
desired_replicas = str(deployment.spec.replicas or 0) if deployment else '?'
|
|
791
|
+
replicas_text = Text()
|
|
792
|
+
replicas_text.append(
|
|
793
|
+
f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
|
|
794
|
+
)
|
|
795
|
+
if status['autoscaler']:
|
|
796
|
+
spec = (
|
|
797
|
+
autoscaler.get('spec', {})
|
|
798
|
+
if isinstance(autoscaler, dict)
|
|
799
|
+
else getattr(autoscaler, 'spec', {})
|
|
800
|
+
)
|
|
801
|
+
min_r = str(spec.get('minReplicas', spec.get('min_replicas', '?')))
|
|
802
|
+
max_r = str(spec.get('maxReplicas', spec.get('max_replicas', '?')))
|
|
803
|
+
replicas_text.append(f'Min : {min_r}\n', style='bold white')
|
|
804
|
+
replicas_text.append(f'Max : {max_r}', style='bold white')
|
|
805
|
+
|
|
806
|
+
# Resources
|
|
807
|
+
resources_text = _get_resource_summary(deployment)
|
|
808
|
+
|
|
809
|
+
# Row
|
|
810
|
+
if all_users:
|
|
811
|
+
table.add_row(
|
|
812
|
+
owner or '(unknown)',
|
|
813
|
+
name,
|
|
814
|
+
status_text,
|
|
815
|
+
summary,
|
|
816
|
+
endpoint_str,
|
|
817
|
+
replicas_text,
|
|
818
|
+
resources_text,
|
|
819
|
+
)
|
|
820
|
+
else:
|
|
821
|
+
table.add_row(
|
|
822
|
+
name, status_text, summary, endpoint_str, replicas_text, resources_text
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
if idx != len(model_names) - 1:
|
|
826
|
+
table.add_row(*([''] * len(table.columns)))
|
|
827
|
+
|
|
828
|
+
if len(model_names) == unowned:
|
|
829
|
+
Console().print(
|
|
830
|
+
f'[yellow]No deployments created by you found '
|
|
831
|
+
f'in namespace {namespace}. Try --all-users.[/yellow]'
|
|
832
|
+
)
|
|
833
|
+
return
|
|
834
|
+
|
|
835
|
+
Console().print(table)
|