konduktor-nightly 0.1.0.dev20250804105449__py3-none-any.whl → 0.1.0.dev20250806105405__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

@@ -0,0 +1,835 @@
1
+ """Deployment utils: wraps CRUD operations for deployments"""
2
+
3
+ import json
4
+ import os
5
+ import tempfile
6
+ import typing
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ import colorama
10
+ from kubernetes.client.exceptions import ApiException
11
+ from rich import box
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+ from rich.text import Text
15
+
16
+ import konduktor
17
+ from konduktor import kube_client, logging
18
+ from konduktor.backends import constants as backend_constants
19
+ from konduktor.backends import pod_utils
20
+ from konduktor.utils import (
21
+ common_utils,
22
+ kubernetes_utils,
23
+ validator,
24
+ )
25
+
26
+ if typing.TYPE_CHECKING:
27
+ pass
28
+
29
+ logger = logging.get_logger(__name__)
30
+
31
+ # Use shared constants from konduktor.backends.constants
32
+ DEPLOYMENT_NAME_LABEL = backend_constants.DEPLOYMENT_NAME_LABEL
33
+ DEPLOYMENT_USERID_LABEL = backend_constants.USERID_LABEL
34
+ DEPLOYMENT_USER_LABEL = backend_constants.USER_LABEL
35
+ DEPLOYMENT_ACCELERATOR_LABEL = backend_constants.ACCELERATOR_LABEL
36
+ DEPLOYMENT_NUM_ACCELERATORS_LABEL = backend_constants.NUM_ACCELERATORS_LABEL
37
+ AIBRIX_NAME_LABEL = backend_constants.AIBRIX_NAME_LABEL
38
+
39
+ SECRET_BASENAME_LABEL = backend_constants.SECRET_BASENAME_LABEL
40
+
41
+ _DEPLOYMENT_METADATA_LABELS = {
42
+ 'deployment_name_label': DEPLOYMENT_NAME_LABEL,
43
+ 'deployment_userid_label': DEPLOYMENT_USERID_LABEL,
44
+ 'deployment_user_label': DEPLOYMENT_USER_LABEL,
45
+ 'deployment_accelerator_label': DEPLOYMENT_ACCELERATOR_LABEL,
46
+ 'deployment_num_accelerators_label': DEPLOYMENT_NUM_ACCELERATORS_LABEL,
47
+ 'model_name_label': AIBRIX_NAME_LABEL,
48
+ }
49
+
50
+ _RUN_DURATION_ANNOTATION_KEY = 'kueue.x-k8s.io/maxRunDurationSeconds'
51
+
52
+
53
+ def render_specs(
54
+ task: 'konduktor.Task',
55
+ ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
56
+ general = True
57
+ if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
58
+ general = False
59
+
60
+ # Calculate accelerator info for template
61
+ assert task.resources is not None
62
+ accelerator_type = task.resources.get_accelerator_type() or 'None'
63
+ # For Deployments: GPUs per pod (not total across replicas)
64
+ num_accelerators = task.resources.get_accelerator_count() or 0
65
+
66
+ if task.run:
67
+ task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
68
+ with tempfile.NamedTemporaryFile() as temp:
69
+ common_utils.fill_template(
70
+ 'deployment.yaml.j2',
71
+ {
72
+ 'name': task.name,
73
+ 'user': common_utils.get_cleaned_username(),
74
+ 'accelerator_type': accelerator_type,
75
+ 'num_accelerators': str(num_accelerators),
76
+ 'min_replicas': task.serving.min_replicas if task.serving else 1,
77
+ 'max_replicas': task.serving.max_replicas if task.serving else 1,
78
+ 'ports': task.serving.ports if task.serving else 8000,
79
+ 'autoscaler': (
80
+ 'true'
81
+ if (
82
+ task.serving
83
+ and task.serving.min_replicas != task.serving.max_replicas
84
+ )
85
+ else 'false'
86
+ ),
87
+ 'general': general,
88
+ **_DEPLOYMENT_METADATA_LABELS,
89
+ },
90
+ temp.name,
91
+ )
92
+ docs = common_utils.read_yaml_all(temp.name)
93
+
94
+ deployment_spec = None
95
+ service_spec = None
96
+ autoscaler_spec = None
97
+
98
+ for doc in docs:
99
+ kind = doc.get('kind')
100
+ if kind == 'Deployment':
101
+ deployment_spec = doc
102
+ elif kind == 'Service':
103
+ service_spec = doc
104
+ elif kind == 'PodAutoscaler' or kind == 'HorizontalPodAutoscaler':
105
+ autoscaler_spec = doc
106
+
107
+ # not every deployment + service will have podautoscaler
108
+ if task.serving and task.serving.min_replicas == task.serving.max_replicas:
109
+ autoscaler_spec = None
110
+
111
+ if deployment_spec is None:
112
+ raise ValueError('Deployment manifest not found.')
113
+ if service_spec is None:
114
+ raise ValueError('Service manifest not found.')
115
+
116
+ # Validate specs before returning
117
+ try:
118
+ validator.validate_deployment_spec(deployment_spec)
119
+ validator.validate_service_spec(service_spec)
120
+ # Only validate HPA if it exists (APA doesn't have official schema)
121
+ if autoscaler_spec and autoscaler_spec.get('kind') == 'HorizontalPodAutoscaler':
122
+ validator.validate_horizontalpodautoscaler_spec(autoscaler_spec)
123
+ except ValueError as e:
124
+ raise ValueError(f'Spec validation failed: {e}')
125
+
126
+ return deployment_spec, service_spec, autoscaler_spec or {}
127
+
128
+
129
+ def create_deployment(
130
+ namespace: str,
131
+ task: 'konduktor.Task',
132
+ pod_spec: Dict[str, Any],
133
+ dryrun: bool = False,
134
+ ) -> Optional[Dict[str, Any]]:
135
+ """Creates a Kubernetes Deployment based on the task and pod spec."""
136
+
137
+ assert task.resources is not None, 'Task resources are undefined'
138
+
139
+ deployment_spec, _, _ = render_specs(task)
140
+
141
+ # Inject deployment-specific pod metadata
142
+ pod_utils.inject_deployment_pod_metadata(pod_spec, task)
143
+
144
+ # Inject pod spec directly (like jobset logic)
145
+ pod_utils.merge_pod_into_deployment_template(deployment_spec['spec'], pod_spec)
146
+
147
+ if dryrun:
148
+ logger.debug(f'[DRYRUN] Would create deployment:\n{deployment_spec}')
149
+ return deployment_spec
150
+
151
+ try:
152
+ context = kubernetes_utils.get_current_kube_config_context_name()
153
+ apps_api = kube_client.apps_api(context=context)
154
+ deployment = apps_api.create_namespaced_deployment(
155
+ namespace=namespace,
156
+ body=deployment_spec,
157
+ )
158
+ logger.info(
159
+ f'Deployment {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
160
+ f'{task.name}{colorama.Style.RESET_ALL} created'
161
+ )
162
+
163
+ return deployment
164
+ except kube_client.api_exception() as err:
165
+ try:
166
+ error_body = json.loads(err.body)
167
+ error_message = error_body.get('message', '')
168
+ logger.error(f'Error creating deployment: {error_message}')
169
+ except json.JSONDecodeError:
170
+ logger.error(f'Error creating deployment: {err.body}')
171
+ raise err
172
+
173
+
174
+ def create_service(
175
+ namespace: str,
176
+ task: 'konduktor.Task',
177
+ dryrun: bool = False,
178
+ ) -> Optional[Dict[str, Any]]:
179
+ """Creates a Kubernetes Service based on the task and pod spec."""
180
+
181
+ assert task.resources is not None, 'Task resources are undefined'
182
+
183
+ _, service_spec, _ = render_specs(task)
184
+
185
+ if dryrun:
186
+ logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
187
+ return service_spec
188
+
189
+ try:
190
+ context = kubernetes_utils.get_current_kube_config_context_name()
191
+ core_api = kube_client.core_api(context=context)
192
+ service = core_api.create_namespaced_service(
193
+ namespace=namespace,
194
+ body=service_spec,
195
+ )
196
+ logger.info(
197
+ f'Service {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
198
+ f'{task.name}{colorama.Style.RESET_ALL} created'
199
+ )
200
+ return service
201
+ except kube_client.api_exception() as err:
202
+ try:
203
+ error_body = json.loads(err.body)
204
+ error_message = error_body.get('message', '')
205
+ logger.error(f'Error creating service: {error_message}')
206
+ except json.JSONDecodeError:
207
+ logger.error(f'Error creating service: {err.body}')
208
+ raise err
209
+
210
+
211
+ def create_autoscaler(namespace: str, task: 'konduktor.Task', dryrun: bool = False):
212
+ _, _, autoscaler_spec = render_specs(task)
213
+
214
+ if not autoscaler_spec:
215
+ return
216
+
217
+ # Decide if it's APA or HPA by looking at autoscaler_spec["kind"]
218
+ kind = autoscaler_spec.get('kind')
219
+ context = kubernetes_utils.get_current_kube_config_context_name()
220
+
221
+ if dryrun:
222
+ logger.debug(f'[DRYRUN] Would create {kind}:\n{autoscaler_spec}')
223
+ return autoscaler_spec
224
+
225
+ if kind == 'PodAutoscaler':
226
+ custom_api = kube_client.crd_api(context=context)
227
+ return custom_api.create_namespaced_custom_object(
228
+ group='autoscaling.aibrix.ai',
229
+ version='v1alpha1',
230
+ namespace=namespace,
231
+ plural='podautoscalers',
232
+ body=autoscaler_spec,
233
+ )
234
+ elif kind == 'HorizontalPodAutoscaler':
235
+ autoscaling_api = kube_client.autoscaling_api(context=context)
236
+ return autoscaling_api.create_namespaced_horizontal_pod_autoscaler(
237
+ namespace=namespace,
238
+ body=autoscaler_spec,
239
+ )
240
+
241
+
242
+ def list_models(namespace: str) -> List[str]:
243
+ """
244
+ Returns a list of unique model names in the namespace,
245
+ based on label DEPLOYMENT_NAME_LABEL=`trainy.ai/deployment-name`.
246
+ """
247
+ context = kubernetes_utils.get_current_kube_config_context_name()
248
+ apps = kube_client.apps_api(context)
249
+ core = kube_client.core_api(context)
250
+ crds = kube_client.crd_client(context)
251
+
252
+ label_selector = DEPLOYMENT_NAME_LABEL
253
+ model_names: set[str] = set()
254
+
255
+ # --- Deployments ---
256
+ for deploy in apps.list_namespaced_deployment(
257
+ namespace, label_selector=label_selector
258
+ ).items:
259
+ labels = getattr(deploy.metadata, 'labels', {}) or {}
260
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
261
+ if name:
262
+ model_names.add(name)
263
+
264
+ # --- Services ---
265
+ for svc in core.list_namespaced_service(
266
+ namespace, label_selector=label_selector
267
+ ).items:
268
+ labels = getattr(svc.metadata, 'labels', {}) or {}
269
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
270
+ if name:
271
+ model_names.add(name)
272
+
273
+ # --- PodAutoscalers ---
274
+ # APA
275
+ try:
276
+ apa_list = crds.list_namespaced_custom_object(
277
+ group='autoscaling.aibrix.ai',
278
+ version='v1alpha1',
279
+ namespace=namespace,
280
+ plural='podautoscalers',
281
+ )
282
+ for apa in apa_list.get('items', []):
283
+ labels = apa.get('metadata', {}).get('labels', {}) or {}
284
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
285
+ if name:
286
+ model_names.add(name)
287
+ except ApiException as e:
288
+ if e.status != 404:
289
+ # re-raise if it's not just missing CRD
290
+ raise
291
+ # otherwise ignore, cluster just doesn't have Aibrix CRDs
292
+ logger.warning('Skipping APA lookup. Aibrix CRDs not found in cluster')
293
+
294
+ # HPA
295
+ autoscaling_api = kube_client.autoscaling_api(context=context)
296
+ hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
297
+ namespace=namespace
298
+ )
299
+ for hpa in hpa_list.items:
300
+ labels = getattr(hpa.metadata, 'labels', {}) or {}
301
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
302
+ if name:
303
+ model_names.add(name)
304
+
305
+ return sorted(model_names)
306
+
307
+
308
+ def is_autoscaler_ready(autoscaler_obj: dict) -> bool:
309
+ """
310
+ Returns True if the autoscaler (PodAutoscaler or HPA) is considered healthy.
311
+ For PodAutoscaler: AbleToScale == True.
312
+ For HPA: AbleToScale == True, or presence of the HPA is enough if no conditions.
313
+ """
314
+ try:
315
+ if hasattr(autoscaler_obj, 'to_dict'):
316
+ autoscaler_obj = autoscaler_obj.to_dict()
317
+ conditions = autoscaler_obj.get('status', {}).get('conditions', []) or []
318
+
319
+ # If conditions exist, look for AbleToScale == True
320
+ for cond in conditions:
321
+ cond_type = cond.get('type')
322
+ cond_status = cond.get('status')
323
+ if cond_type == 'AbleToScale' and cond_status == 'True':
324
+ return True
325
+
326
+ # If no conditions are present (common for HPAs), assume
327
+ # it's fine as soon as object exists
328
+ if not conditions:
329
+ return True
330
+
331
+ except Exception as e:
332
+ logger.warning(f'Error checking autoscaler readiness: {e}')
333
+ return False
334
+
335
+
336
+ def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
337
+ """Fetch all APAs and HPAs and combine into 1 dict keyed by deployment name."""
338
+ autoscalers = {}
339
+
340
+ # --- Aibrix APAs ---
341
+ try:
342
+ crd_api = kube_client.crd_api(context=context)
343
+ apa_list = crd_api.list_namespaced_custom_object(
344
+ group='autoscaling.aibrix.ai',
345
+ version='v1alpha1',
346
+ namespace=namespace,
347
+ plural='podautoscalers',
348
+ )
349
+ for apa in apa_list.get('items', []):
350
+ labels = apa.get('metadata', {}).get('labels', {}) or {}
351
+ dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
352
+ if dep_name:
353
+ autoscalers[dep_name] = apa
354
+ except Exception as e:
355
+ logger.warning(f'Error fetching APAs: {e}')
356
+
357
+ # --- Standard HPAs ---
358
+ try:
359
+ autoscaling_api = kube_client.autoscaling_api(context=context)
360
+ hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
361
+ namespace=namespace
362
+ )
363
+ for hpa in hpa_list.items:
364
+ labels = getattr(hpa.metadata, 'labels', {}) or {}
365
+ dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
366
+ if dep_name and dep_name not in autoscalers:
367
+ autoscalers[dep_name] = hpa.to_dict()
368
+ except Exception as e:
369
+ logger.warning(f'Error fetching HPAs: {e}')
370
+
371
+ return autoscalers
372
+
373
+
374
+ def get_model_status(
375
+ name: str,
376
+ deployments: dict[str, Any],
377
+ services: dict[str, Any],
378
+ autoscalers: dict[str, dict],
379
+ ) -> Dict[str, Optional[str]]:
380
+ """Check the status of Deployment, Service, and Autoscaler."""
381
+ status = {
382
+ 'deployment': 'missing',
383
+ 'service': 'missing',
384
+ 'autoscaler': None,
385
+ }
386
+
387
+ # --- Deployment ---
388
+ if name in deployments:
389
+ d = deployments[name]
390
+ ready = (d.status.ready_replicas or 0) if d.status else 0
391
+ desired = (d.spec.replicas or 0) if d.spec else 0
392
+ status['deployment'] = 'ready' if ready == desired else 'pending'
393
+
394
+ # --- Service ---
395
+ if name in services:
396
+ s = services[name]
397
+ labels = getattr(s.metadata, 'labels', {}) or {}
398
+ is_vllm = AIBRIX_NAME_LABEL in labels
399
+
400
+ if is_vllm:
401
+ status['service'] = 'ready'
402
+ else:
403
+ lb_ready = False
404
+ if s.status and s.status.load_balancer and s.status.load_balancer.ingress:
405
+ ingress = s.status.load_balancer.ingress
406
+ if ingress and (ingress[0].ip or ingress[0].hostname):
407
+ lb_ready = True
408
+ status['service'] = 'ready' if lb_ready else 'pending'
409
+
410
+ # --- Autoscaler ---
411
+ if name in autoscalers:
412
+ a = autoscalers[name]
413
+ status['autoscaler'] = 'ready' if is_autoscaler_ready(a) else 'pending'
414
+ else:
415
+ status['autoscaler'] = None
416
+
417
+ return status
418
+
419
+
420
+ def get_deployment(namespace: str, job_name: str) -> Optional[Any]:
421
+ context = kubernetes_utils.get_current_kube_config_context_name()
422
+ apps_api = kube_client.apps_api(context=context)
423
+ try:
424
+ return apps_api.read_namespaced_deployment(name=job_name, namespace=namespace)
425
+ except ApiException as e:
426
+ if e.status == 404:
427
+ return None
428
+ raise
429
+
430
+
431
+ def get_service(namespace: str, job_name: str) -> Optional[Any]:
432
+ context = kubernetes_utils.get_current_kube_config_context_name()
433
+ core_api = kube_client.core_api(context=context)
434
+ try:
435
+ return core_api.read_namespaced_service(name=job_name, namespace=namespace)
436
+ except ApiException as e:
437
+ if e.status == 404:
438
+ return None
439
+ raise
440
+
441
+
442
+ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
443
+ context = kubernetes_utils.get_current_kube_config_context_name()
444
+ # --- Try Aibrix APA first ---
445
+ crd_api = kube_client.crd_api(context=context)
446
+ try:
447
+ return crd_api.get_namespaced_custom_object(
448
+ group='autoscaling.aibrix.ai',
449
+ version='v1alpha1',
450
+ namespace=namespace,
451
+ plural='podautoscalers',
452
+ name=f'{job_name}-apa',
453
+ )
454
+ except ApiException as e:
455
+ if e.status != 404:
456
+ raise
457
+ # Fall through to check HPA
458
+
459
+ # --- Try built‑in Kubernetes HPA ---
460
+ try:
461
+ autoscaling_api = kube_client.autoscaling_api(context=context)
462
+ return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
463
+ name=f'{job_name}-hpa', namespace=namespace
464
+ ).to_dict()
465
+ except ApiException as e:
466
+ if e.status == 404:
467
+ return None
468
+ raise
469
+
470
+
471
+ def delete_deployment(namespace: str, name: str) -> Optional[Dict[str, Any]]:
472
+ """Deletes a Kubernetes Deployment in the given namespace.
473
+
474
+ Args:
475
+ namespace: Namespace where the deployment exists.
476
+ name: Name of the deployment to delete.
477
+
478
+ Returns:
479
+ Response from delete operation, or None on error.
480
+ """
481
+ try:
482
+ context = kubernetes_utils.get_current_kube_config_context_name()
483
+ response = kube_client.apps_api(context=context).delete_namespaced_deployment(
484
+ name=name,
485
+ namespace=namespace,
486
+ )
487
+ return response
488
+ except kube_client.api_exception() as err:
489
+ try:
490
+ error_body = json.loads(err.body)
491
+ error_message = error_body.get('message', '')
492
+ logger.error(f'Error deleting deployment: {error_message}')
493
+ except json.JSONDecodeError:
494
+ error_message = str(err.body)
495
+ logger.error(f'Error deleting deployment: {error_message}')
496
+ else:
497
+ raise err
498
+ return None
499
+
500
+
501
+ def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
502
+ """Deletes a Kubernetes Service in the given namespace.
503
+
504
+ Args:
505
+ namespace: Namespace where the service exists.
506
+ name: Name of the service to delete.
507
+
508
+ Returns:
509
+ Response from delete operation, or None on error.
510
+ """
511
+ try:
512
+ context = kubernetes_utils.get_current_kube_config_context_name()
513
+ response = kube_client.core_api(context=context).delete_namespaced_service(
514
+ name=name,
515
+ namespace=namespace,
516
+ )
517
+ return response
518
+ except kube_client.api_exception() as err:
519
+ try:
520
+ error_body = json.loads(err.body)
521
+ error_message = error_body.get('message', '')
522
+ logger.error(f'Error deleting service: {error_message}')
523
+ except json.JSONDecodeError:
524
+ logger.error(f'Error deleting service: {err.body}')
525
+ raise err
526
+ return None
527
+
528
+
529
+ def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
530
+ """Deletes either an Aibrix PodAutoscaler or a HorizontalPodAutoscaler."""
531
+ context = kubernetes_utils.get_current_kube_config_context_name()
532
+
533
+ # --- Try delete APA first ---
534
+ try:
535
+ custom_api = kube_client.crd_api(context=context)
536
+ response = custom_api.delete_namespaced_custom_object(
537
+ group='autoscaling.aibrix.ai',
538
+ version='v1alpha1',
539
+ namespace=namespace,
540
+ plural='podautoscalers',
541
+ name=f'{name}-apa',
542
+ )
543
+ return response
544
+ except kube_client.api_exception() as err:
545
+ # If not found, try HPA
546
+ try:
547
+ error_body = json.loads(err.body)
548
+ if err.status != 404:
549
+ raise
550
+ except Exception:
551
+ if getattr(err, 'status', None) != 404:
552
+ raise
553
+
554
+ # --- Try delete HPA ---
555
+ try:
556
+ autoscaling_api = kube_client.autoscaling_api(context=context)
557
+ return autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
558
+ name=f'{name}-hpa',
559
+ namespace=namespace,
560
+ )
561
+ except kube_client.api_exception() as err:
562
+ try:
563
+ error_body = json.loads(err.body)
564
+ error_message = error_body.get('message', '')
565
+ logger.error(f'Error deleting Pod Autoscaler: {error_message}')
566
+ except json.JSONDecodeError:
567
+ logger.error(f'Error deleting Pod Autoscaler: {err.body}')
568
+ raise err
569
+
570
+
571
+ def delete_serving_specs(name: str, namespace: str) -> None:
572
+ for kind, delete_fn in [
573
+ ('deployment', delete_deployment),
574
+ ('service', delete_service),
575
+ ('podautoscaler', delete_autoscaler),
576
+ ]:
577
+ try:
578
+ delete_fn(namespace, name)
579
+ logger.info(f'Deleted {kind}: {name}')
580
+ except Exception as e:
581
+ logger.error(f'Failed to delete {kind} {name}: {e}')
582
+
583
+
584
+ def _get_resource_summary(deployment) -> str:
585
+ """Extract and format pod resource information from a deployment.
586
+
587
+ Args:
588
+ deployment: Kubernetes deployment object
589
+
590
+ Returns:
591
+ Formatted string with resource information (GPU, CPU, memory)
592
+ """
593
+ if not deployment:
594
+ return '?'
595
+
596
+ try:
597
+ containers = deployment.spec.template.spec.containers
598
+ if not containers:
599
+ return '?'
600
+ container = containers[0]
601
+ res = container.resources.requests or {}
602
+
603
+ cpu = res.get('cpu', '?')
604
+ mem = res.get('memory', '?')
605
+ gpu = res.get('nvidia.com/gpu') or res.get('trainy.ai/gpu')
606
+
607
+ # Try to extract GPU type from deployment labels
608
+ labels = deployment.metadata.labels or {}
609
+ accelerator_type = labels.get('trainy.ai/accelerator', 'L4O')
610
+
611
+ gpu_str = f'{accelerator_type}:{gpu}' if gpu else 'None'
612
+ return f'{gpu_str}\n{cpu} CPU\n{mem}'
613
+ except Exception:
614
+ return '?'
615
+
616
+
617
+ def get_envoy_external_ip() -> Optional[str]:
618
+ context = kubernetes_utils.get_current_kube_config_context_name()
619
+ core_api = kube_client.core_api(context=context)
620
+ try:
621
+ services = core_api.list_namespaced_service(namespace='envoy-gateway-system')
622
+ for svc in services.items:
623
+ if svc.spec.type == 'LoadBalancer' and 'envoy' in svc.metadata.name:
624
+ ingress = svc.status.load_balancer.ingress
625
+ if ingress:
626
+ return ingress[0].ip or ingress[0].hostname
627
+ except Exception:
628
+ pass
629
+ return None
630
+
631
+
632
+ def show_status_table(namespace: str, all_users: bool):
633
+ """Display status of Konduktor Serve models."""
634
+ context = kubernetes_utils.get_current_kube_config_context_name()
635
+
636
+ # Build lookup maps (deployment_name -> object)
637
+ apps_api = kube_client.apps_api(context)
638
+ core_api = kube_client.core_api(context)
639
+
640
+ deployments_map = {}
641
+ for d in apps_api.list_namespaced_deployment(namespace=namespace).items:
642
+ name = (d.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
643
+ if name is not None:
644
+ deployments_map[name] = d
645
+
646
+ services_map = {}
647
+ for s in core_api.list_namespaced_service(namespace=namespace).items:
648
+ name = (s.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
649
+ if name is not None:
650
+ services_map[name] = s
651
+
652
+ autoscalers_map = build_autoscaler_map(namespace, context or '')
653
+
654
+ model_names = list_models(namespace)
655
+ if not model_names:
656
+ Console().print(
657
+ f'[yellow]No deployments found in namespace {namespace}.[/yellow]'
658
+ )
659
+ return
660
+
661
+ Console().print()
662
+ external_ip = get_envoy_external_ip()
663
+ title = '[bold]KONDUKTOR SERVE[/bold]'
664
+ is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
665
+
666
+ table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
667
+ if all_users:
668
+ table.add_column('User', style='magenta', no_wrap=True)
669
+ table.add_column('Name', style='cyan', no_wrap=True)
670
+ table.add_column('Status', no_wrap=True)
671
+ table.add_column('Summary', style='bold', no_wrap=True)
672
+ table.add_column('Endpoint', style='yellow', no_wrap=True)
673
+ table.add_column('Replicas', style='dim', no_wrap=True)
674
+ table.add_column('Resources', style='white', no_wrap=True)
675
+
676
+ unowned = 0
677
+
678
+ for idx, name in enumerate(model_names):
679
+ deployment = deployments_map.get(name)
680
+ service = services_map.get(name)
681
+ autoscaler = autoscalers_map.get(name)
682
+
683
+ # Extract owner
684
+ owner = None
685
+ for resource in [deployment, service, autoscaler]:
686
+ if not resource:
687
+ continue
688
+ metadata = (
689
+ resource.metadata
690
+ if hasattr(resource, 'metadata')
691
+ else resource.get('metadata', {})
692
+ )
693
+ labels = (
694
+ metadata.labels
695
+ if hasattr(metadata, 'labels')
696
+ else metadata.get('labels', {})
697
+ )
698
+ if labels:
699
+ owner = labels.get('trainy.ai/username')
700
+ if owner:
701
+ break
702
+
703
+ if not all_users and owner != common_utils.get_cleaned_username():
704
+ unowned += 1
705
+ continue
706
+
707
+ # Status
708
+ status = get_model_status(name, deployments_map, services_map, autoscalers_map)
709
+ states = [status['deployment'], status['service'], status['autoscaler']]
710
+
711
+ def emoji_line(label: str, state: str) -> str:
712
+ emoji_map = {
713
+ 'ready': '✅',
714
+ 'pending': '❓',
715
+ 'missing': '❌',
716
+ }
717
+ return f"{label}: {emoji_map.get(state, '❓')}"
718
+
719
+ summary_lines = [
720
+ emoji_line('Deploym', status['deployment'] or 'missing'),
721
+ emoji_line('Service', status['service'] or 'missing'),
722
+ ]
723
+ if status['autoscaler'] is not None:
724
+ summary_lines.append(
725
+ emoji_line('AScaler', status['autoscaler'] or 'missing')
726
+ )
727
+ summary = '\n'.join(summary_lines)
728
+
729
+ # Overall status
730
+ if any(s == 'missing' for s in states):
731
+ status_text = Text('FAILED', style='red')
732
+ else:
733
+ if status['autoscaler'] is not None:
734
+ status_text = (
735
+ Text('READY', style='green')
736
+ if all(s == 'ready' for s in states)
737
+ else Text('PENDING', style='yellow')
738
+ )
739
+ else:
740
+ status_text = (
741
+ Text('READY', style='green')
742
+ if (
743
+ status['deployment'] == 'ready' and status['service'] == 'ready'
744
+ )
745
+ else Text('PENDING', style='yellow')
746
+ )
747
+
748
+ # Type & endpoint
749
+ ip_str = '<pending>'
750
+ labels = (
751
+ (
752
+ deployment.metadata.labels
753
+ if deployment and hasattr(deployment.metadata, 'labels')
754
+ else {}
755
+ )
756
+ or (
757
+ service.metadata.labels
758
+ if service and hasattr(service.metadata, 'labels')
759
+ else {}
760
+ )
761
+ or {}
762
+ )
763
+ if AIBRIX_NAME_LABEL in labels:
764
+ ip_str = external_ip or '<pending>'
765
+ else:
766
+ if (
767
+ service
768
+ and service.status
769
+ and service.status.load_balancer
770
+ and service.status.load_balancer.ingress
771
+ ):
772
+ ing = service.status.load_balancer.ingress[0]
773
+ ip_str = ing.ip or ing.hostname or '<pending>'
774
+
775
+ # Port
776
+ port_str = ''
777
+ if service and service.spec and service.spec.ports:
778
+ port_obj = (
779
+ next((p for p in service.spec.ports if p.name == 'serve'), None)
780
+ or service.spec.ports[0]
781
+ )
782
+ if port_obj and port_obj.port:
783
+ port_str = str(port_obj.port)
784
+ endpoint_str = f'{ip_str}:{port_str}' if port_str else ip_str
785
+
786
+ # Replicas
787
+ ready_replicas = (
788
+ str(deployment.status.ready_replicas or 0) if deployment else '?'
789
+ )
790
+ desired_replicas = str(deployment.spec.replicas or 0) if deployment else '?'
791
+ replicas_text = Text()
792
+ replicas_text.append(
793
+ f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
794
+ )
795
+ if status['autoscaler']:
796
+ spec = (
797
+ autoscaler.get('spec', {})
798
+ if isinstance(autoscaler, dict)
799
+ else getattr(autoscaler, 'spec', {})
800
+ )
801
+ min_r = str(spec.get('minReplicas', spec.get('min_replicas', '?')))
802
+ max_r = str(spec.get('maxReplicas', spec.get('max_replicas', '?')))
803
+ replicas_text.append(f'Min : {min_r}\n', style='bold white')
804
+ replicas_text.append(f'Max : {max_r}', style='bold white')
805
+
806
+ # Resources
807
+ resources_text = _get_resource_summary(deployment)
808
+
809
+ # Row
810
+ if all_users:
811
+ table.add_row(
812
+ owner or '(unknown)',
813
+ name,
814
+ status_text,
815
+ summary,
816
+ endpoint_str,
817
+ replicas_text,
818
+ resources_text,
819
+ )
820
+ else:
821
+ table.add_row(
822
+ name, status_text, summary, endpoint_str, replicas_text, resources_text
823
+ )
824
+
825
+ if idx != len(model_names) - 1:
826
+ table.add_row(*([''] * len(table.columns)))
827
+
828
+ if len(model_names) == unowned:
829
+ Console().print(
830
+ f'[yellow]No deployments created by you found '
831
+ f'in namespace {namespace}. Try --all-users.[/yellow]'
832
+ )
833
+ return
834
+
835
+ Console().print(table)