konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1351 @@
1
+ """Deployment utils: wraps CRUD operations for deployments"""
2
+
3
+ import json
4
+ import os
5
+ import tempfile
6
+ import typing
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ import colorama
10
+ from kubernetes.client.exceptions import ApiException
11
+ from rich import box
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+ from rich.text import Text
15
+
16
+ import konduktor
17
+ from konduktor import config as konduktor_config
18
+ from konduktor import kube_client, logging
19
+ from konduktor.backends import constants as backend_constants
20
+ from konduktor.backends import pod_utils
21
+ from konduktor.utils import (
22
+ common_utils,
23
+ kubernetes_utils,
24
+ validator,
25
+ )
26
+
27
+ if typing.TYPE_CHECKING:
28
+ pass
29
+
30
+ logger = logging.get_logger(__name__)
31
+
32
+ # Use shared constants from konduktor.backends.constants
33
+ DEPLOYMENT_NAME_LABEL = backend_constants.DEPLOYMENT_NAME_LABEL
34
+ DEPLOYMENT_USERID_LABEL = backend_constants.USERID_LABEL
35
+ DEPLOYMENT_USER_LABEL = backend_constants.USER_LABEL
36
+ DEPLOYMENT_ACCELERATOR_LABEL = backend_constants.ACCELERATOR_LABEL
37
+ DEPLOYMENT_NUM_ACCELERATORS_LABEL = backend_constants.NUM_ACCELERATORS_LABEL
38
+ AIBRIX_NAME_LABEL = backend_constants.AIBRIX_NAME_LABEL
39
+
40
+ SECRET_BASENAME_LABEL = backend_constants.SECRET_BASENAME_LABEL
41
+
42
+ _DEPLOYMENT_METADATA_LABELS = {
43
+ 'deployment_name_label': DEPLOYMENT_NAME_LABEL,
44
+ 'deployment_userid_label': DEPLOYMENT_USERID_LABEL,
45
+ 'deployment_user_label': DEPLOYMENT_USER_LABEL,
46
+ 'deployment_accelerator_label': DEPLOYMENT_ACCELERATOR_LABEL,
47
+ 'deployment_num_accelerators_label': DEPLOYMENT_NUM_ACCELERATORS_LABEL,
48
+ 'model_name_label': AIBRIX_NAME_LABEL,
49
+ }
50
+
51
+
52
+ def render_specs(
53
+ task: 'konduktor.Task',
54
+ ) -> Tuple[
55
+ Dict[str, Any], Dict[str, Any], List[Dict[str, Any]], Optional[Dict[str, Any]]
56
+ ]:
57
+ """Renders Kubernetes resource specifications from a Konduktor task.
58
+
59
+ Takes a Konduktor task and generates the necessary Kubernetes resource
60
+ specifications for deployment by filling the deployment.yaml.j2 template.
61
+ Automatically detects deployment type (vLLM/Aibrix vs General) based on
62
+ the task's run command.
63
+
64
+ Args:
65
+ task: A Konduktor Task object containing deployment configuration
66
+ including resources, serving settings, and run commands.
67
+
68
+ Returns:
69
+ A tuple containing:
70
+ - deployment_spec (Dict[str, Any]): Kubernetes Deployment specification
71
+ - service_spec (Dict[str, Any]): Kubernetes Service specification
72
+ - http_addon_resources (List[Dict[str, Any]]): List of HTTP add-on resources
73
+ (HTTPScaledObject and Ingress) for general deployments; empty for vLLM
74
+ - pa_resource (Optional[Dict[str, Any]]): PodAutoscaler specification for
75
+ vLLM deployments with autoscaling enabled, None otherwise; empty for general
76
+
77
+ Raises:
78
+ ValueError: If required specs are missing after template rendering or
79
+ if spec validation fails.
80
+ """
81
+ general = True
82
+ if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
83
+ general = False
84
+
85
+ # Calculate accelerator info for template
86
+ assert task.resources is not None
87
+ accelerator_type = task.resources.get_accelerator_type() or 'None'
88
+ # For Deployments: GPUs per pod (not total across replicas)
89
+ num_accelerators = task.resources.get_accelerator_count() or 0
90
+
91
+ if task.run:
92
+ task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
93
+ with tempfile.NamedTemporaryFile() as temp:
94
+ common_utils.fill_template(
95
+ 'deployment.yaml.j2',
96
+ {
97
+ 'name': task.name,
98
+ 'user': common_utils.get_cleaned_username(),
99
+ 'accelerator_type': accelerator_type,
100
+ 'num_accelerators': str(num_accelerators),
101
+ 'min_replicas': task.serving.min_replicas if task.serving else 1,
102
+ 'max_replicas': task.serving.max_replicas if task.serving else 1,
103
+ 'ports': task.serving.ports if task.serving else 8000,
104
+ 'probe_path': (
105
+ task.serving.get('probe', None) if task.serving else None
106
+ ),
107
+ 'autoscaler': (
108
+ 'true'
109
+ if (
110
+ task.serving
111
+ and task.serving.min_replicas != task.serving.max_replicas
112
+ )
113
+ else 'false'
114
+ ),
115
+ 'general': general,
116
+ # Strip last 3 chars: backend Apoxy setup uses unique
117
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
118
+ # deleting/creating TunnelNode resources with same names too
119
+ # quickly, but we hide this complexity from user-facing endpoints
120
+ 'general_base_host': (
121
+ f'{get_unique_cluster_name_from_tunnel()[:-3]}2.trainy.us'
122
+ )
123
+ if general
124
+ else None,
125
+ **_DEPLOYMENT_METADATA_LABELS,
126
+ },
127
+ temp.name,
128
+ )
129
+ docs = common_utils.read_yaml_all(temp.name)
130
+
131
+ deployment_spec = None
132
+ service_spec = None
133
+ http_addon_resources = [] # For general deployments
134
+ pa_resource = None # For aibrix deployments w autoscaling
135
+
136
+ for doc in docs:
137
+ kind = doc.get('kind')
138
+ if kind == 'Deployment':
139
+ deployment_spec = doc
140
+ elif kind == 'Service':
141
+ service_spec = doc
142
+ # HTTPScaledObject resource for general deployments w autoscaling only
143
+ elif kind == 'HTTPScaledObject':
144
+ http_addon_resources.append(doc)
145
+ # Ingress resource for all general deployments
146
+ elif kind == 'Ingress':
147
+ http_addon_resources.append(doc)
148
+ # PodAutoscaler resource for aibrix deployments w autoscaling only
149
+ elif kind == 'PodAutoscaler':
150
+ pa_resource = doc
151
+
152
+ if deployment_spec is None:
153
+ raise ValueError('Deployment manifest not found.')
154
+ if service_spec is None:
155
+ raise ValueError('Service manifest not found.')
156
+ if general and not http_addon_resources:
157
+ raise ValueError('General deployment manifests not found.')
158
+ if (
159
+ not general
160
+ and task.serving
161
+ and task.serving.min_replicas != task.serving.max_replicas
162
+ and pa_resource is None
163
+ ):
164
+ raise ValueError('Aibrix deployment PodAutoscaler manifest not found.')
165
+
166
+ # Validate specs before returning
167
+ try:
168
+ validator.validate_deployment_spec(deployment_spec)
169
+ validator.validate_service_spec(service_spec)
170
+ except ValueError as e:
171
+ raise ValueError(f'Spec validation failed: {e}')
172
+
173
+ return deployment_spec, service_spec, http_addon_resources, pa_resource
174
+
175
+
176
+ def create_pod_autoscaler(
177
+ namespace: str,
178
+ task: 'konduktor.Task',
179
+ dryrun: bool = False,
180
+ ) -> None:
181
+ """Creates Aibrix PodAutoscaler for non-general deployments."""
182
+
183
+ # Check if this is a non-general deployment
184
+ general = True
185
+ if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
186
+ general = False
187
+
188
+ # Only create PA for aibrix deployments w autoscaling
189
+ if general:
190
+ return
191
+
192
+ # Check if autoscaling is needed
193
+ if not task.serving or task.serving.min_replicas == task.serving.max_replicas:
194
+ logger.debug(
195
+ f'[DEBUG] No autoscaling needed: '
196
+ f'min={task.serving.min_replicas if task.serving else "None"}, '
197
+ f'max={task.serving.max_replicas if task.serving else "None"}'
198
+ )
199
+ return # No autoscaling needed
200
+
201
+ logger.debug(
202
+ f'[DEBUG] PA autoscaling enabled: '
203
+ f'min={task.serving.min_replicas}, max={task.serving.max_replicas}'
204
+ )
205
+
206
+ # Get the PA spec from the rendered template
207
+ _, _, _, pa_spec = render_specs(task)
208
+
209
+ if not pa_spec:
210
+ logger.warning('[DEBUG] No PodAutoscaler found in rendered template')
211
+ return
212
+
213
+ if dryrun:
214
+ logger.debug(
215
+ f'[DRYRUN] Would create PA autoscaler: '
216
+ f'{pa_spec["metadata"].get("name", "<no-name>")}'
217
+ )
218
+ return
219
+
220
+ context = kubernetes_utils.get_current_kube_config_context_name()
221
+ custom_api = kube_client.crd_api(context=context)
222
+
223
+ # Create KPA for aibrix deployments w autoscaling
224
+ name = pa_spec.get('metadata', {}).get('name', '<no-name>')
225
+ try:
226
+ custom_api.create_namespaced_custom_object(
227
+ group='autoscaling.aibrix.ai',
228
+ version='v1alpha1',
229
+ namespace=namespace,
230
+ plural='podautoscalers',
231
+ body=pa_spec,
232
+ )
233
+ logger.info(f'Pod autoscaler {name} created')
234
+ except Exception as e:
235
+ if '409' in str(e) or 'AlreadyExists' in str(e):
236
+ logger.warning(f'Pod autoscaler {name} already exists, skipping')
237
+ else:
238
+ logger.error(f'Error creating pod autoscaler {name}: {e}')
239
+ raise
240
+
241
+
242
+ def create_deployment(
243
+ namespace: str,
244
+ task: 'konduktor.Task',
245
+ pod_spec: Dict[str, Any],
246
+ dryrun: bool = False,
247
+ ) -> Optional[Dict[str, Any]]:
248
+ """Creates a Kubernetes Deployment based on the task and pod spec."""
249
+
250
+ assert task.resources is not None, 'Task resources are undefined'
251
+
252
+ deployment_spec, _, _, _ = render_specs(task)
253
+
254
+ # Inject deployment-specific pod metadata
255
+ pod_utils.inject_deployment_pod_metadata(pod_spec, task)
256
+
257
+ # Inject pod spec directly (like jobset logic)
258
+ pod_utils.merge_pod_into_deployment_template(deployment_spec['spec'], pod_spec)
259
+
260
+ if dryrun:
261
+ logger.debug(f'[DRYRUN] Would create deployment:\n{deployment_spec}')
262
+ return deployment_spec
263
+
264
+ try:
265
+ context = kubernetes_utils.get_current_kube_config_context_name()
266
+ apps_api = kube_client.apps_api(context=context)
267
+ deployment = apps_api.create_namespaced_deployment(
268
+ namespace=namespace,
269
+ body=deployment_spec,
270
+ )
271
+ logger.info(
272
+ f'Deployment {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
273
+ f'{task.name}{colorama.Style.RESET_ALL} created'
274
+ )
275
+
276
+ return deployment
277
+ except kube_client.api_exception() as err:
278
+ try:
279
+ error_body = json.loads(err.body)
280
+ error_message = error_body.get('message', '')
281
+ logger.error(f'Error creating deployment: {error_message}')
282
+ except json.JSONDecodeError:
283
+ logger.error(f'Error creating deployment: {err.body}')
284
+ raise err
285
+
286
+
287
+ def create_service(
288
+ namespace: str,
289
+ task: 'konduktor.Task',
290
+ dryrun: bool = False,
291
+ ) -> Optional[Dict[str, Any]]:
292
+ """Creates a Kubernetes Service based on the task and pod spec."""
293
+
294
+ assert task.resources is not None, 'Task resources are undefined'
295
+
296
+ _, service_spec, _, _ = render_specs(task)
297
+
298
+ if dryrun:
299
+ logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
300
+ return service_spec
301
+
302
+ try:
303
+ context = kubernetes_utils.get_current_kube_config_context_name()
304
+ core_api = kube_client.core_api(context=context)
305
+ service = core_api.create_namespaced_service(
306
+ namespace=namespace,
307
+ body=service_spec,
308
+ )
309
+ logger.info(
310
+ f'Service {colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
311
+ f'{task.name}{colorama.Style.RESET_ALL} created'
312
+ )
313
+ return service
314
+ except kube_client.api_exception() as err:
315
+ try:
316
+ error_body = json.loads(err.body)
317
+ error_message = error_body.get('message', '')
318
+ logger.error(f'Error creating service: {error_message}')
319
+ except json.JSONDecodeError:
320
+ logger.error(f'Error creating service: {error_message}')
321
+ raise err
322
+
323
+
324
+ def create_http_addon_resources(
325
+ namespace: str,
326
+ task: 'konduktor.Task',
327
+ dryrun: bool = False,
328
+ ) -> None:
329
+ """Creates HTTP Add-on resources for general deployments."""
330
+
331
+ # Check if this is a non-general deployment
332
+ general = True
333
+ if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
334
+ general = False
335
+
336
+ # Only create PA for aibrix deployments w autoscaling
337
+ if not general:
338
+ return
339
+
340
+ _, _, http_addon_resources, _ = render_specs(task)
341
+
342
+ if not http_addon_resources:
343
+ logger.debug('[DEBUG] No HTTP Add-on resources to create')
344
+ return
345
+
346
+ if dryrun:
347
+ logger.debug(
348
+ f'[DRYRUN] Would create HTTP Add-on resources:\n' f'{http_addon_resources}'
349
+ )
350
+ return
351
+
352
+ context = kubernetes_utils.get_current_kube_config_context_name()
353
+ logger.debug(f'[DEBUG] Using Kubernetes context: {context}')
354
+
355
+ for resource in http_addon_resources:
356
+ kind = resource.get('kind')
357
+ name = resource['metadata']['name']
358
+
359
+ logger.debug(f'[DEBUG] Creating {kind}: {name}')
360
+
361
+ try:
362
+ if kind == 'HTTPScaledObject':
363
+ # Create HTTPScaledObject (only for autoscaling)
364
+ custom_api = kube_client.crd_api(context=context)
365
+ custom_api.create_namespaced_custom_object(
366
+ group='http.keda.sh',
367
+ version='v1alpha1',
368
+ namespace=namespace,
369
+ plural='httpscaledobjects',
370
+ body=resource,
371
+ )
372
+ logger.info(f'HTTPScaledObject {name} created')
373
+
374
+ elif kind == 'Ingress':
375
+ # Create Ingress (always needed for external access)
376
+ networking_api = kube_client.networking_api(context=context)
377
+ networking_api.create_namespaced_ingress(
378
+ namespace=namespace,
379
+ body=resource,
380
+ )
381
+ logger.info(f'Ingress {name} created')
382
+
383
+ except Exception as e:
384
+ if '409' in str(e) or 'AlreadyExists' in str(e):
385
+ logger.warning(
386
+ f'HTTP Add-on resource {kind} {name} already exists, skipping'
387
+ )
388
+ else:
389
+ logger.error(f'Error creating HTTP Add-on resource {kind} {name}: {e}')
390
+ raise
391
+
392
+
393
+ def list_models(namespace: str) -> List[str]:
394
+ """
395
+ Returns a list of unique model names in the namespace,
396
+ based on label DEPLOYMENT_NAME_LABEL=`trainy.ai/deployment-name`.
397
+ """
398
+ context = kubernetes_utils.get_current_kube_config_context_name()
399
+ apps = kube_client.apps_api(context)
400
+ core = kube_client.core_api(context)
401
+ crds = kube_client.crd_client(context)
402
+
403
+ label_selector = DEPLOYMENT_NAME_LABEL
404
+ model_names: set[str] = set()
405
+
406
+ # Deployments
407
+ for deploy in apps.list_namespaced_deployment(
408
+ namespace, label_selector=label_selector
409
+ ).items:
410
+ labels = getattr(deploy.metadata, 'labels', {}) or {}
411
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
412
+ if name:
413
+ model_names.add(name)
414
+
415
+ # Services
416
+ for svc in core.list_namespaced_service(
417
+ namespace, label_selector=label_selector
418
+ ).items:
419
+ labels = getattr(svc.metadata, 'labels', {}) or {}
420
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
421
+ if name:
422
+ model_names.add(name)
423
+
424
+ # Podautoscalers (KPA only)
425
+ try:
426
+ pa_list = crds.list_namespaced_custom_object(
427
+ group='autoscaling.aibrix.ai',
428
+ version='v1alpha1',
429
+ namespace=namespace,
430
+ plural='podautoscalers',
431
+ )
432
+ for pa in pa_list.get('items', []):
433
+ labels = pa.get('metadata', {}).get('labels', {})
434
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
435
+ if name:
436
+ model_names.add(name)
437
+ except ApiException as e:
438
+ if e.status != 404:
439
+ # re-raise if it's not just missing CRD
440
+ raise
441
+ # otherwise ignore, cluster just doesn't have Aibrix CRDs
442
+ logger.warning('Skipping PA lookup. Aibrix CRDs not found in cluster')
443
+
444
+ # HPA
445
+ autoscaling_api = kube_client.autoscaling_api(context=context)
446
+ hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
447
+ namespace=namespace
448
+ )
449
+ for hpa in hpa_list.items:
450
+ labels = getattr(hpa.metadata, 'labels', {}) or {}
451
+ name = labels.get(DEPLOYMENT_NAME_LABEL)
452
+ if name:
453
+ model_names.add(name)
454
+
455
+ return sorted(model_names)
456
+
457
+
458
+ def get_autoscaler_status_for_deployment(
459
+ name: str, autoscalers_map: dict, is_general: bool
460
+ ) -> bool:
461
+ """Return autoscaler readiness by deployment type.
462
+
463
+ - General: returns hpa_ready
464
+ - vLLM/Aibrix: returns kpa_ready
465
+ """
466
+
467
+ def _is_ready(obj: dict) -> bool:
468
+ try:
469
+ conditions = obj.get('status', {}).get('conditions') or []
470
+ kind = obj.get('kind') or ''
471
+
472
+ for cond in conditions:
473
+ if cond.get('type') == 'AbleToScale' and cond.get('status') == 'True':
474
+ return True
475
+
476
+ if kind == 'HorizontalPodAutoscaler':
477
+ # Check for ScalingActive condition
478
+ for cond in conditions:
479
+ if cond.get('type') == 'ScalingActive':
480
+ # ScalingActive: True means actively scaling
481
+ if cond.get('status') == 'True':
482
+ return True
483
+ # ScalingActive: False with ScalingDisabled reason
484
+ # is normal for scale-to-zero
485
+ if (
486
+ cond.get('status') == 'False'
487
+ and cond.get('reason') == 'ScalingDisabled'
488
+ ):
489
+ return True
490
+
491
+ # Treat existing HPA with no conditions as ready
492
+ return not conditions or any(
493
+ c.get('type') == 'AbleToScale' and c.get('status') == 'True'
494
+ for c in conditions
495
+ )
496
+ except Exception as e:
497
+ logger.warning(f'Error checking autoscaler readiness: {e}')
498
+ return False
499
+
500
+ kpa_ready = False
501
+ hpa_ready = False
502
+
503
+ dep_autos = autoscalers_map.get(name, {})
504
+
505
+ if is_general:
506
+ if 'hpa' in dep_autos:
507
+ hpa_ready = _is_ready(dep_autos['hpa'])
508
+ return hpa_ready
509
+ return False
510
+
511
+ if 'kpa' in dep_autos:
512
+ kpa_ready = _is_ready(dep_autos['kpa'])
513
+ return kpa_ready
514
+ return False
515
+
516
+
517
+ def _extract_min_max_from_autoscaler(autoscaler: dict) -> tuple[str, str]:
518
+ """Extract min/max replicas across PA/HPA/KEDA.
519
+
520
+ Returns (min_str, max_str). Unknowns as '?'.
521
+ """
522
+ try:
523
+ if not autoscaler:
524
+ return '?', '?'
525
+
526
+ spec = autoscaler.get('spec', {})
527
+
528
+ # Check for HTTPScaledObject format (replicas.min/max)
529
+ if 'replicas' in spec:
530
+ replicas = spec.get('replicas', {})
531
+ if 'min' in replicas or 'max' in replicas:
532
+ return (str(replicas.get('min', '?')), str(replicas.get('max', '?')))
533
+
534
+ # Check for KEDA ScaledObject format (minReplicaCount/maxReplicaCount)
535
+ if 'minReplicaCount' in spec or 'maxReplicaCount' in spec:
536
+ return (
537
+ str(spec.get('minReplicaCount', '?')),
538
+ str(spec.get('maxReplicaCount', '?')),
539
+ )
540
+
541
+ # Check for PA/HPA format (minReplicas/maxReplicas)
542
+ if 'minReplicas' in spec or 'maxReplicas' in spec:
543
+ return str(spec.get('minReplicas', '?')), str(spec.get('maxReplicas', '?'))
544
+ except Exception:
545
+ pass
546
+ return '?', '?'
547
+
548
+
549
+ def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
550
+ """Fetch autoscalers and return a simple map keyed by deployment name.
551
+
552
+ Simplified model:
553
+ - Aibrix deployments: 1 PodAutoscaler (KPA) if autoscaling enabled
554
+ - General deployments: 1 HPA (created by KEDA) if autoscaling enabled
555
+ - No autoscaling: No autoscaler
556
+
557
+ Returns: {deployment_name: {'kpa': pa_obj} or {'hpa': hpa_obj}}
558
+ """
559
+ autoscalers: Dict[str, Dict[str, Any]] = {}
560
+
561
+ # --- Aibrix deployment KPA ---
562
+ try:
563
+ crd_api = kube_client.crd_api(context=context)
564
+ pa_list = crd_api.list_namespaced_custom_object(
565
+ group='autoscaling.aibrix.ai',
566
+ version='v1alpha1',
567
+ namespace=namespace,
568
+ plural='podautoscalers',
569
+ )
570
+ for pa in pa_list.get('items', []):
571
+ labels = pa.get('metadata', {}).get('labels', {})
572
+ dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
573
+ if not dep_name:
574
+ # Fallback to scaleTargetRef.name
575
+ spec = pa.get('spec', {})
576
+ scale_ref = spec.get('scaleTargetRef', {})
577
+ dep_name = scale_ref.get('name')
578
+ if dep_name:
579
+ autoscalers[dep_name] = {'kpa': pa}
580
+ if pa_list.get('items'):
581
+ logger.debug(f"Found {len(pa_list.get('items', []))} PodAutoscalers")
582
+ except Exception as e:
583
+ logger.warning(f'Error fetching PodAutoscalers: {e}')
584
+
585
+ # --- General deployment HPA ---
586
+ try:
587
+ autoscaling_api = kube_client.autoscaling_api(context=context)
588
+ hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
589
+ namespace=namespace
590
+ )
591
+ for hpa in hpa_list.items:
592
+ labels = getattr(hpa.metadata, 'labels', {}) or {}
593
+ dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
594
+ if not dep_name:
595
+ # Fallback to scaleTargetRef.name
596
+ spec = hpa.spec.to_dict() if hpa.spec else {}
597
+ scale_ref = spec.get('scale_target_ref', {})
598
+ dep_name = scale_ref.get('name')
599
+ if dep_name:
600
+ hpa_dict = hpa.to_dict()
601
+ hpa_dict['kind'] = 'HorizontalPodAutoscaler'
602
+ hpa_dict['apiVersion'] = 'autoscaling/v2'
603
+ autoscalers[dep_name] = {'hpa': hpa_dict}
604
+ if hpa_list.items:
605
+ logger.debug(f'Found {len(hpa_list.items)} HPAs')
606
+ except Exception as e:
607
+ logger.warning(f'Error fetching HPAs: {e}')
608
+
609
+ return autoscalers
610
+
611
+
612
+ def get_model_status(
613
+ name: str,
614
+ deployments: dict[str, Any],
615
+ services: dict[str, Any],
616
+ autoscalers: dict[str, dict],
617
+ ) -> Dict[str, Optional[str]]:
618
+ """Check the status of Deployment, Service, and Autoscaler."""
619
+ status = {
620
+ 'deployment': 'missing',
621
+ 'service': 'missing',
622
+ 'autoscaler': None,
623
+ }
624
+
625
+ # --- Deployment ---
626
+ if name in deployments:
627
+ d = deployments[name]
628
+ ready = (d.status.ready_replicas or 0) if d.status else 0
629
+ desired = (d.spec.replicas or 0) if d.spec else 0
630
+
631
+ labels = d.metadata.labels or {}
632
+ is_aibrix = AIBRIX_NAME_LABEL in labels
633
+
634
+ if is_aibrix and name in autoscalers:
635
+ # For Aibrix deployments, get the original min replicas from
636
+ # deployment labels
637
+ original_min_replicas = 0
638
+ original_min_str = labels.get('trainy.ai/original-min-replicas')
639
+ if original_min_str:
640
+ try:
641
+ original_min_replicas = int(original_min_str)
642
+ except (ValueError, TypeError):
643
+ pass
644
+
645
+ # For Aibrix deployments, consider ready if:
646
+ # 1. Ready replicas >= original minimum replicas, OR
647
+ # 2. If original_min_replicas is 0 (scale-to-zero allowed),
648
+ # then ready == desired
649
+ if original_min_replicas == 0:
650
+ status['deployment'] = 'ready' if ready == desired else 'pending'
651
+ else:
652
+ status['deployment'] = (
653
+ 'ready' if ready >= original_min_replicas else 'pending'
654
+ )
655
+ else:
656
+ # General deployments or no autoscaler: use simple ready == desired check
657
+ status['deployment'] = 'ready' if ready == desired else 'pending'
658
+
659
+ # --- Service ---
660
+ if name in services:
661
+ status['service'] = 'ready'
662
+ else:
663
+ status['service'] = 'missing'
664
+
665
+ # --- Autoscaler ---
666
+ if name in autoscalers:
667
+ # Check if this is a general deployment (not vLLM/Aibrix)
668
+ is_general = True
669
+ if deployments.get(name) and hasattr(deployments[name].metadata, 'labels'):
670
+ labels = deployments[name].metadata.labels or {}
671
+ if AIBRIX_NAME_LABEL in labels:
672
+ is_general = False
673
+
674
+ # Check actual autoscaler readiness
675
+ autoscaler_ready = get_autoscaler_status_for_deployment(
676
+ name, autoscalers, is_general
677
+ )
678
+ status['autoscaler'] = 'ready' if autoscaler_ready else 'pending'
679
+ else:
680
+ status['autoscaler'] = None
681
+
682
+ return status
683
+
684
+
685
+ def get_deployment(namespace: str, job_name: str) -> Optional[Any]:
686
+ context = kubernetes_utils.get_current_kube_config_context_name()
687
+ apps_api = kube_client.apps_api(context=context)
688
+ try:
689
+ return apps_api.read_namespaced_deployment(name=job_name, namespace=namespace)
690
+ except ApiException as e:
691
+ if e.status == 404:
692
+ return None
693
+ raise
694
+
695
+
696
+ def get_service(namespace: str, job_name: str) -> Optional[Any]:
697
+ context = kubernetes_utils.get_current_kube_config_context_name()
698
+ core_api = kube_client.core_api(context=context)
699
+ try:
700
+ return core_api.read_namespaced_service(name=job_name, namespace=namespace)
701
+ except ApiException as e:
702
+ if e.status == 404:
703
+ return None
704
+ raise
705
+
706
+
707
+ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
708
+ context = kubernetes_utils.get_current_kube_config_context_name()
709
+ # --- Try Aibrix PA first ---
710
+ crd_api = kube_client.crd_api(context=context)
711
+ try:
712
+ return crd_api.get_namespaced_custom_object(
713
+ group='autoscaling.aibrix.ai',
714
+ version='v1alpha1',
715
+ namespace=namespace,
716
+ plural='podautoscalers',
717
+ name=f'{job_name}-pa',
718
+ )
719
+ except ApiException as e:
720
+ if e.status != 404:
721
+ raise
722
+ # Fall through to check HPA
723
+
724
+ # --- Try built‑in Kubernetes HPA ---
725
+ try:
726
+ autoscaling_api = kube_client.autoscaling_api(context=context)
727
+ return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
728
+ name=f'{job_name}-hpa', namespace=namespace
729
+ ).to_dict()
730
+ except ApiException as e:
731
+ if e.status != 404:
732
+ raise
733
+
734
+ # --- Try KEDA ScaledObject ---
735
+ try:
736
+ return crd_api.get_namespaced_custom_object(
737
+ group='keda.sh',
738
+ version='v1alpha1',
739
+ namespace=namespace,
740
+ plural='scaledobjects',
741
+ name=f'{job_name}-keda',
742
+ )
743
+ except ApiException as e:
744
+ if e.status == 404:
745
+ return None
746
+ raise
747
+
748
+
749
+ def delete_deployment(namespace: str, name: str) -> Optional[Dict[str, Any]]:
750
+ """Deletes a Kubernetes Deployment in the given namespace.
751
+
752
+ Args:
753
+ namespace: Namespace where the deployment exists.
754
+ name: Name of the deployment to delete.
755
+
756
+ Returns:
757
+ Response from delete operation, or None on error.
758
+ """
759
+ try:
760
+ context = kubernetes_utils.get_current_kube_config_context_name()
761
+ response = kube_client.apps_api(context=context).delete_namespaced_deployment(
762
+ name=name,
763
+ namespace=namespace,
764
+ )
765
+ return response
766
+ except kube_client.api_exception() as err:
767
+ try:
768
+ error_body = json.loads(err.body)
769
+ error_message = error_body.get('message', '')
770
+ logger.error(f'Error deleting deployment: {error_message}')
771
+ except json.JSONDecodeError:
772
+ error_message = str(err.body)
773
+ logger.error(f'Error deleting deployment: {error_message}')
774
+ else:
775
+ raise err
776
+ return None
777
+
778
+
779
+ def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
780
+ """Deletes a Kubernetes Service in the given namespace.
781
+
782
+ Args:
783
+ namespace: Namespace where the service exists.
784
+ name: Name of the service to delete.
785
+
786
+ Returns:
787
+ Response from delete operation, or None on error.
788
+ """
789
+ try:
790
+ context = kubernetes_utils.get_current_kube_config_context_name()
791
+ response = kube_client.core_api(context=context).delete_namespaced_service(
792
+ name=name,
793
+ namespace=namespace,
794
+ )
795
+ return response
796
+ except kube_client.api_exception() as err:
797
+ try:
798
+ error_body = json.loads(err.body)
799
+ error_message = error_body.get('message', '')
800
+ logger.error(f'Error deleting service: {error_message}')
801
+ except json.JSONDecodeError:
802
+ logger.error(f'Error deleting service: {err.body}')
803
+ raise err
804
+ return None
805
+
806
+
807
+ def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
808
+ """Delete all autoscalers associated with a deployment name.
809
+
810
+ This includes:
811
+ - All Aibrix PodAutoscalers (e.g., "-pa", "-apa") targeting the deployment
812
+ - Any HorizontalPodAutoscaler named "<name>-hpa"
813
+ - Any KEDA ScaledObject named "<name>-keda"
814
+ """
815
+ context = kubernetes_utils.get_current_kube_config_context_name()
816
+
817
+ # --- Delete ALL PodAutoscalers that target this deployment ---
818
+ try:
819
+ custom_api = kube_client.crd_api(context=context)
820
+ pa_list = custom_api.list_namespaced_custom_object(
821
+ group='autoscaling.aibrix.ai',
822
+ version='v1alpha1',
823
+ namespace=namespace,
824
+ plural='podautoscalers',
825
+ )
826
+ for pa in pa_list.get('items', []):
827
+ meta = pa.get('metadata', {})
828
+ spec = pa.get('spec', {})
829
+ pa_name = meta.get('name', '')
830
+ labels = meta.get('labels', {})
831
+ scale_ref = spec.get('scaleTargetRef', {}).get('name')
832
+ targets_deployment = (
833
+ labels.get(DEPLOYMENT_NAME_LABEL) == name
834
+ or scale_ref == name
835
+ or pa_name.startswith(f'{name}-')
836
+ )
837
+ if targets_deployment:
838
+ try:
839
+ custom_api.delete_namespaced_custom_object(
840
+ group='autoscaling.aibrix.ai',
841
+ version='v1alpha1',
842
+ namespace=namespace,
843
+ plural='podautoscalers',
844
+ name=pa_name,
845
+ )
846
+ logger.info(f'Deleted PodAutoscaler: {pa_name}')
847
+ except kube_client.api_exception() as err:
848
+ if getattr(err, 'status', None) != 404:
849
+ raise
850
+ except kube_client.api_exception() as err:
851
+ # If PA CRD is missing, skip; otherwise bubble up
852
+ if getattr(err, 'status', None) not in (404, None):
853
+ raise
854
+
855
+ # --- Delete HPA ---
856
+ try:
857
+ autoscaling_api = kube_client.autoscaling_api(context=context)
858
+ autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
859
+ name=f'{name}-hpa',
860
+ namespace=namespace,
861
+ )
862
+ logger.info(f'Deleted HPA: {name}-hpa')
863
+ except kube_client.api_exception() as err:
864
+ if getattr(err, 'status', None) not in (404, None):
865
+ try:
866
+ error_body = json.loads(err.body)
867
+ error_message = error_body.get('message', '')
868
+ logger.error(f'Error deleting HPA: {error_message}')
869
+ except json.JSONDecodeError:
870
+ logger.error(f'Error deleting HPA: {err.body}')
871
+ raise err
872
+
873
+ # --- Delete KEDA ScaledObject ---
874
+ try:
875
+ custom_api = kube_client.crd_api(context=context)
876
+ custom_api.delete_namespaced_custom_object(
877
+ group='keda.sh',
878
+ version='v1alpha1',
879
+ namespace=namespace,
880
+ plural='scaledobjects',
881
+ name=f'{name}-keda',
882
+ )
883
+ logger.info(f'Deleted ScaledObject: {name}-keda')
884
+ except kube_client.api_exception() as err:
885
+ if getattr(err, 'status', None) not in (404, None):
886
+ try:
887
+ error_body = json.loads(err.body)
888
+ error_message = error_body.get('message', '')
889
+ logger.error(f'Error deleting KEDA ScaledObject: {error_message}')
890
+ except json.JSONDecodeError:
891
+ logger.error(f'Error deleting KEDA ScaledObject: {err.body}')
892
+ raise err
893
+
894
+ return None
895
+
896
+
897
+ def delete_http_addon_resources(name: str, namespace: str) -> None:
898
+ """Deletes HTTP Add-on resources for general deployments."""
899
+ context = kubernetes_utils.get_current_kube_config_context_name()
900
+
901
+ # Delete HTTPScaledObject
902
+ try:
903
+ custom_api = kube_client.crd_api(context=context)
904
+ custom_api.delete_namespaced_custom_object(
905
+ group='http.keda.sh',
906
+ version='v1alpha1',
907
+ namespace=namespace,
908
+ plural='httpscaledobjects',
909
+ name=f'{name}-httpscaledobject',
910
+ )
911
+ logger.info(f'Deleted HTTPScaledObject: {name}-httpscaledobject')
912
+ except kube_client.api_exception() as err:
913
+ if err.status != 404:
914
+ logger.debug(
915
+ f'Failed to delete HTTPScaledObject {name}-httpscaledobject: {err}'
916
+ )
917
+
918
+ # Delete Ingress
919
+ try:
920
+ networking_api = kube_client.networking_api(context=context)
921
+ networking_api.delete_namespaced_ingress(
922
+ name=f'{name}-ingress',
923
+ namespace=namespace,
924
+ )
925
+ logger.info(f'Deleted Ingress: {name}-ingress')
926
+ except kube_client.api_exception() as err:
927
+ if err.status != 404:
928
+ logger.debug(f'Failed to delete Ingress {name}-ingress: {err}')
929
+
930
+
931
+ def delete_serving_specs(name: str, namespace: str) -> None:
932
+ for kind, delete_fn in [
933
+ ('deployment', delete_deployment),
934
+ ('service', delete_service),
935
+ ]:
936
+ try:
937
+ delete_fn(namespace, name)
938
+ logger.info(f'Deleted {kind}: {name}')
939
+ except Exception as e:
940
+ logger.debug(f'Failed to delete {kind} {name}: {e}')
941
+
942
+ # Delete autoscaler resources (Aibrix PA, HPA, or KEDA ScaledObject)
943
+ try:
944
+ delete_autoscaler(namespace=namespace, name=name)
945
+ except Exception as e:
946
+ logger.debug(f'Failed to delete autoscaler for {name}: {e}')
947
+
948
+ # Delete HTTP Add-on resources for general deployments
949
+ delete_http_addon_resources(name, namespace)
950
+
951
+
952
+ def _get_resource_summary(deployment) -> str:
953
+ """Extract and format pod resource information from a deployment.
954
+
955
+ Args:
956
+ deployment: Kubernetes deployment object
957
+
958
+ Returns:
959
+ Formatted string with resource information (GPU, CPU, memory)
960
+ """
961
+ if not deployment:
962
+ return '?'
963
+
964
+ try:
965
+ containers = deployment.spec.template.spec.containers
966
+ if not containers:
967
+ return '?'
968
+ container = containers[0]
969
+ res = container.resources.requests or {}
970
+
971
+ cpu = res.get('cpu', '?')
972
+ mem = res.get('memory', '?')
973
+ gpu = res.get('nvidia.com/gpu') or res.get('trainy.ai/gpu')
974
+
975
+ # Try to extract GPU type from deployment labels
976
+ labels = deployment.metadata.labels or {}
977
+ accelerator_type = labels.get('trainy.ai/accelerator', 'L4O')
978
+
979
+ gpu_str = f'{accelerator_type}:{gpu}' if gpu else 'None'
980
+ return f'{gpu_str}\n{cpu} CPU\n{mem}'
981
+ except Exception:
982
+ return '?'
983
+
984
+
985
+ def get_envoy_external_ip() -> Optional[str]:
986
+ context = kubernetes_utils.get_current_kube_config_context_name()
987
+ core_api = kube_client.core_api(context=context)
988
+ try:
989
+ services = core_api.list_namespaced_service(namespace='envoy-gateway-system')
990
+ for svc in services.items:
991
+ if svc.spec.type == 'LoadBalancer' and 'envoy' in svc.metadata.name:
992
+ ingress = svc.status.load_balancer.ingress
993
+ if ingress:
994
+ return ingress[0].ip or ingress[0].hostname
995
+ except Exception:
996
+ pass
997
+ return None
998
+
999
+
1000
+ def get_ingress_nginx_external_ip() -> Optional[str]:
1001
+ """Get the external IP of the keda-ingress-nginx-controller LoadBalancer."""
1002
+ context = kubernetes_utils.get_current_kube_config_context_name()
1003
+ core_api = kube_client.core_api(context=context)
1004
+ try:
1005
+ # Look for keda-ingress-nginx-controller service in keda namespace
1006
+ service = core_api.read_namespaced_service(
1007
+ name='keda-ingress-nginx-controller', namespace='keda'
1008
+ )
1009
+ if service.spec.type == 'LoadBalancer':
1010
+ ingress = service.status.load_balancer.ingress
1011
+ if ingress:
1012
+ return ingress[0].ip or ingress[0].hostname
1013
+ except Exception:
1014
+ pass
1015
+ return None
1016
+
1017
+
1018
+ def get_unique_cluster_name_from_tunnel() -> str:
1019
+ """Get cluster name from the apoxy deployment command."""
1020
+ try:
1021
+ context = kubernetes_utils.get_current_kube_config_context_name()
1022
+ apps_api = kube_client.apps_api(context=context)
1023
+
1024
+ # Get the apoxy deployment
1025
+ deployment = apps_api.read_namespaced_deployment(
1026
+ name='apoxy', namespace='apoxy-system'
1027
+ )
1028
+
1029
+ # Extract cluster name from the command
1030
+ containers = deployment.spec.template.spec.containers
1031
+ if containers and len(containers) > 0:
1032
+ command = containers[0].command
1033
+ if (
1034
+ command
1035
+ and len(command) >= 4
1036
+ and command[1] == 'tunnel'
1037
+ and command[2] == 'run'
1038
+ ):
1039
+ return command[3] # The cluster name is the 4th argument
1040
+
1041
+ logger.warning('Could not extract cluster name from apoxy deployment command')
1042
+
1043
+ except Exception as e:
1044
+ logger.warning(f'Error getting cluster name from apoxy deployment: {e}')
1045
+
1046
+ return 'default'
1047
+
1048
+
1049
+ def get_endpoint_type_from_config() -> str:
1050
+ """Get the endpoint type from konduktor config.
1051
+
1052
+ Returns:
1053
+ 'trainy' for Apoxy endpoints (default)
1054
+ 'direct' for LoadBalancer IP endpoints
1055
+ """
1056
+ try:
1057
+ # Use the proper config system that handles KONDUKTOR_CONFIG env var
1058
+ endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
1059
+ logger.debug(f'[DEBUG] Config endpoint_type: {endpoint_type}')
1060
+ return endpoint_type.lower()
1061
+ except Exception as e:
1062
+ logger.warning(f'Error reading endpoint config: {e}')
1063
+
1064
+ # Default to trainy if config not found or error
1065
+ logger.debug('[DEBUG] Falling back to default endpoint type: trainy')
1066
+ return 'trainy'
1067
+
1068
+
1069
+ def get_deployment_endpoint(
1070
+ force_direct: bool = False, deployment_type: str = 'AIBRIX'
1071
+ ) -> str:
1072
+ """Get the endpoint for both vLLM/Aibrix and general deployments."""
1073
+ if force_direct:
1074
+ endpoint_type = 'direct'
1075
+ else:
1076
+ endpoint_type = get_endpoint_type_from_config()
1077
+
1078
+ if endpoint_type == 'direct':
1079
+ # Check if this is a general deployment
1080
+ if deployment_type == 'GENERAL':
1081
+ # General deployments: ingress IP + Host header
1082
+ ingress_ip = get_ingress_nginx_external_ip()
1083
+ if ingress_ip:
1084
+ return f'{ingress_ip}'
1085
+ else:
1086
+ return '<pending>'
1087
+ else:
1088
+ # vLLM/Aibrix deployments: envoy IP
1089
+ try:
1090
+ aibrix_endpoint = get_envoy_external_ip()
1091
+ return aibrix_endpoint or '<pending>'
1092
+ except Exception:
1093
+ return '<pending>'
1094
+ else:
1095
+ # Use Apoxy (trainy.us)
1096
+ try:
1097
+ cluster_name = get_unique_cluster_name_from_tunnel()
1098
+ if deployment_type == 'GENERAL':
1099
+ # Strip last 3 chars: backend Apoxy setup uses unique
1100
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
1101
+ # deleting/creating TunnelNode resources with same names too
1102
+ # quickly, but we hide this complexity from user-facing endpoints
1103
+ return f'{cluster_name[:-3]}2.trainy.us' # General deployments
1104
+ else:
1105
+ # Strip last 3 chars: backend Apoxy setup uses unique
1106
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
1107
+ # deleting/creating TunnelNode resources with same names too
1108
+ # quickly, but we hide this complexity from user-facing endpoints
1109
+ return f'{cluster_name[:-3]}.trainy.us' # vLLM deployments
1110
+ except Exception:
1111
+ return '<pending>'
1112
+
1113
+
1114
+ def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
1115
+ """Display status of Konduktor Serve models."""
1116
+ context = kubernetes_utils.get_current_kube_config_context_name()
1117
+
1118
+ # Build lookup maps (deployment_name -> object)
1119
+ apps_api = kube_client.apps_api(context)
1120
+ core_api = kube_client.core_api(context)
1121
+
1122
+ deployments_map = {}
1123
+ for d in apps_api.list_namespaced_deployment(namespace=namespace).items:
1124
+ name = (d.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
1125
+ if name is not None:
1126
+ deployments_map[name] = d
1127
+
1128
+ services_map = {}
1129
+ for s in core_api.list_namespaced_service(namespace=namespace).items:
1130
+ name = (s.metadata.labels or {}).get(DEPLOYMENT_NAME_LABEL)
1131
+ if name is not None:
1132
+ services_map[name] = s
1133
+
1134
+ autoscalers_map = build_autoscaler_map(namespace, context or '')
1135
+
1136
+ model_names = list_models(namespace)
1137
+ if not model_names:
1138
+ Console().print(
1139
+ f'[yellow]No deployments found in namespace {namespace}.[/yellow]'
1140
+ )
1141
+ return
1142
+
1143
+ Console().print()
1144
+ title = '[bold]KONDUKTOR SERVE[/bold]'
1145
+ is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
1146
+
1147
+ # Get Aibrix endpoint once for all Aibrix deployments
1148
+ aibrix_endpoint = get_deployment_endpoint(force_direct, 'AIBRIX')
1149
+ # Get General endpoint once for all General deployments
1150
+ general_endpoint = get_deployment_endpoint(force_direct, 'GENERAL')
1151
+
1152
+ table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
1153
+ if all_users:
1154
+ table.add_column('User', style='magenta', no_wrap=True)
1155
+ table.add_column('Name', style='cyan', no_wrap=True)
1156
+ table.add_column('Status', no_wrap=True)
1157
+ table.add_column('Summary', style='bold', no_wrap=True)
1158
+ table.add_column('Endpoint', style='yellow', no_wrap=True)
1159
+ table.add_column('Replicas', style='dim', no_wrap=True)
1160
+ table.add_column('Resources', style='white', no_wrap=True)
1161
+
1162
+ unowned = 0
1163
+
1164
+ for idx, name in enumerate(model_names):
1165
+ deployment = deployments_map.get(name)
1166
+ service = services_map.get(name)
1167
+ autoscaler = autoscalers_map.get(name)
1168
+
1169
+ # Extract owner
1170
+ owner = None
1171
+ for resource in [deployment, service, autoscaler]:
1172
+ if not resource:
1173
+ continue
1174
+ metadata = (
1175
+ resource.metadata
1176
+ if hasattr(resource, 'metadata')
1177
+ else resource.get('metadata', {})
1178
+ )
1179
+ labels = (
1180
+ metadata.labels
1181
+ if hasattr(metadata, 'labels')
1182
+ else metadata.get('labels', {})
1183
+ )
1184
+ if labels:
1185
+ owner = labels.get('trainy.ai/username')
1186
+ if owner:
1187
+ break
1188
+
1189
+ if not all_users and owner != common_utils.get_cleaned_username():
1190
+ unowned += 1
1191
+ continue
1192
+
1193
+ # Status
1194
+ status = get_model_status(name, deployments_map, services_map, autoscalers_map)
1195
+ states = [status['deployment'], status['service'], status['autoscaler']]
1196
+
1197
+ def emoji_line(label: str, state: str) -> str:
1198
+ emoji_map = {
1199
+ 'ready': '✅',
1200
+ 'pending': '❓',
1201
+ 'missing': '❌',
1202
+ }
1203
+ return f"{label}: {emoji_map.get(state, '❓')}"
1204
+
1205
+ # Check if this is a general deployment (not vLLM/Aibrix)
1206
+ is_general = True
1207
+ if deployment and hasattr(deployment.metadata, 'labels'):
1208
+ labels = deployment.metadata.labels or {}
1209
+ if AIBRIX_NAME_LABEL in labels:
1210
+ is_general = False
1211
+
1212
+ summary_lines = [
1213
+ emoji_line('Deploym', status['deployment'] or 'missing'),
1214
+ emoji_line('Service', status['service'] or 'missing'),
1215
+ ]
1216
+
1217
+ if is_general:
1218
+ # Autoscaler for General: HPA only
1219
+ hpa_ready = get_autoscaler_status_for_deployment(
1220
+ name, autoscalers_map, is_general=True
1221
+ )
1222
+ if name in autoscalers_map:
1223
+ summary_lines.append(f"AScaler: {'✅' if hpa_ready else '❓'}")
1224
+ else:
1225
+ # Autoscaler for vLLM: only KPA (APA no longer used)
1226
+ if name in autoscalers_map:
1227
+ kpa_ready = get_autoscaler_status_for_deployment(
1228
+ name, autoscalers_map, is_general=False
1229
+ )
1230
+ if 'kpa' in autoscalers_map.get(name, {}):
1231
+ summary_lines.append(f"AScaler: {'✅' if kpa_ready else '❓'}")
1232
+ summary = '\n'.join(summary_lines)
1233
+
1234
+ # Overall status
1235
+ if any(s == 'missing' for s in states):
1236
+ status_text = Text('FAILED', style='red')
1237
+ else:
1238
+ if status['autoscaler'] is not None:
1239
+ status_text = (
1240
+ Text('READY', style='green')
1241
+ if all(s == 'ready' for s in states)
1242
+ else Text('PENDING', style='yellow')
1243
+ )
1244
+ else:
1245
+ status_text = (
1246
+ Text('READY', style='green')
1247
+ if (
1248
+ status['deployment'] == 'ready' and status['service'] == 'ready'
1249
+ )
1250
+ else Text('PENDING', style='yellow')
1251
+ )
1252
+
1253
+ # Extract labels from deployment, service, or fallback to empty dict
1254
+ labels = {}
1255
+ if deployment and hasattr(deployment.metadata, 'labels'):
1256
+ labels = deployment.metadata.labels or {}
1257
+ elif service and hasattr(service.metadata, 'labels'):
1258
+ labels = service.metadata.labels or {}
1259
+ else:
1260
+ labels = {}
1261
+
1262
+ endpoint_str = '<pending>'
1263
+ if AIBRIX_NAME_LABEL in labels:
1264
+ # Aibrix deployment
1265
+ endpoint_type = get_endpoint_type_from_config()
1266
+ if force_direct or endpoint_type == 'direct':
1267
+ # Direct access: use http for IP endpoints
1268
+ endpoint_str = (
1269
+ f'http://{aibrix_endpoint}'
1270
+ if aibrix_endpoint != '<pending>'
1271
+ else aibrix_endpoint
1272
+ )
1273
+ else:
1274
+ # Apoxy access: use https for trainy.us endpoints
1275
+ endpoint_str = (
1276
+ f'https://{aibrix_endpoint}'
1277
+ if aibrix_endpoint != '<pending>'
1278
+ else aibrix_endpoint
1279
+ )
1280
+ else:
1281
+ # General deployment
1282
+ endpoint_type = get_endpoint_type_from_config()
1283
+ if force_direct or endpoint_type == 'direct':
1284
+ # Direct access: IP + Host header
1285
+ endpoint_str = f'http://{general_endpoint}\nHost: {name}'
1286
+ else:
1287
+ # Apoxy access: single host + path
1288
+ endpoint_str = f'https://{general_endpoint}/{name}'
1289
+
1290
+ # Replicas
1291
+ if deployment:
1292
+ ready_replicas = str(deployment.status.ready_replicas or 0)
1293
+ desired_replicas = str(deployment.spec.replicas or 0)
1294
+ else:
1295
+ ready_replicas = '?'
1296
+ desired_replicas = '?'
1297
+
1298
+ replicas_text = Text()
1299
+ replicas_text.append(
1300
+ f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
1301
+ )
1302
+
1303
+ if status['autoscaler']:
1304
+ # Get min/max from deployment labels
1305
+ min_r, max_r = '?', '?'
1306
+
1307
+ if deployment and hasattr(deployment.metadata, 'labels'):
1308
+ labels = deployment.metadata.labels or {}
1309
+ # All deployments with autoscaling get these labels from the template
1310
+ original_min_str = labels.get('trainy.ai/original-min-replicas')
1311
+ original_max_str = labels.get('trainy.ai/original-max-replicas')
1312
+ if original_min_str and original_max_str:
1313
+ min_r, max_r = original_min_str, original_max_str
1314
+ logger.debug(
1315
+ f'[DEBUG] Got replicas from deployment labels: '
1316
+ f'min={min_r}, max={max_r}'
1317
+ )
1318
+
1319
+ replicas_text.append(f'Min : {min_r}\n', style='bold white')
1320
+ replicas_text.append(f'Max : {max_r}', style='bold white')
1321
+
1322
+ # Resources
1323
+ resources_text = _get_resource_summary(deployment)
1324
+
1325
+ # Row
1326
+ if all_users:
1327
+ table.add_row(
1328
+ owner or '(unknown)',
1329
+ name,
1330
+ status_text,
1331
+ summary,
1332
+ endpoint_str,
1333
+ replicas_text,
1334
+ resources_text,
1335
+ )
1336
+ else:
1337
+ table.add_row(
1338
+ name, status_text, summary, endpoint_str, replicas_text, resources_text
1339
+ )
1340
+
1341
+ if idx != len(model_names) - 1:
1342
+ table.add_row(*([''] * len(table.columns)))
1343
+
1344
+ if len(model_names) == unowned:
1345
+ Console().print(
1346
+ f'[yellow]No deployments created by you found '
1347
+ f'in namespace {namespace}. Try --all-users.[/yellow]'
1348
+ )
1349
+ return
1350
+
1351
+ Console().print(table)