konduktor-nightly 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@
2
2
 
3
3
  import json
4
4
  import os
5
- import random
6
5
  import tempfile
7
6
  import typing
8
7
  from typing import Any, Dict, List, Optional, Tuple
@@ -50,35 +49,35 @@ _DEPLOYMENT_METADATA_LABELS = {
50
49
  }
51
50
 
52
51
 
53
- # actually just gets highest existing deployment number and adds 1
54
- def get_next_deployment_number(cluster_name: str) -> int:
55
- """Get next number by counting existing Apoxy resources."""
56
- try:
57
- context = kubernetes_utils.get_current_kube_config_context_name()
58
- custom_api = kube_client.crd_api(context=context)
59
-
60
- # Count existing backends
61
- backends = custom_api.list_cluster_custom_object(
62
- group='core.apoxy.dev', version='v1alpha', plural='backends'
63
- )
64
-
65
- # Find the highest number
66
- max_number = 0
67
- for backend in backends.get('items', []):
68
- name = backend['metadata']['name']
69
- if name.startswith(f'{cluster_name}-backend-'):
70
- number = int(name.split('-')[-1])
71
- max_number = max(max_number, number)
52
+ def render_specs(
53
+ task: 'konduktor.Task',
54
+ ) -> Tuple[
55
+ Dict[str, Any], Dict[str, Any], List[Dict[str, Any]], Optional[Dict[str, Any]]
56
+ ]:
57
+ """Renders Kubernetes resource specifications from a Konduktor task.
72
58
 
73
- return max_number + 1
74
- except Exception as e:
75
- logger.warning(f'Error counting existing resources: {e}')
76
- return random.randint(100, 999)
59
+ Takes a Konduktor task and generates the necessary Kubernetes resource
60
+ specifications for deployment by filling the deployment.yaml.j2 template.
61
+ Automatically detects deployment type (vLLM/Aibrix vs General) based on
62
+ the task's run command.
77
63
 
64
+ Args:
65
+ task: A Konduktor Task object containing deployment configuration
66
+ including resources, serving settings, and run commands.
78
67
 
79
- def render_specs(
80
- task: 'konduktor.Task',
81
- ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
68
+ Returns:
69
+ A tuple containing:
70
+ - deployment_spec (Dict[str, Any]): Kubernetes Deployment specification
71
+ - service_spec (Dict[str, Any]): Kubernetes Service specification
72
+ - http_addon_resources (List[Dict[str, Any]]): List of HTTP add-on resources
73
+ (HTTPScaledObject and Ingress) for general deployments; empty for vLLM
74
+ - pa_resource (Optional[Dict[str, Any]]): PodAutoscaler specification for
75
+ vLLM deployments with autoscaling enabled, None otherwise; empty for general
76
+
77
+ Raises:
78
+ ValueError: If required specs are missing after template rendering or
79
+ if spec validation fails.
80
+ """
82
81
  general = True
83
82
  if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
84
83
  general = False
@@ -102,6 +101,9 @@ def render_specs(
102
101
  'min_replicas': task.serving.min_replicas if task.serving else 1,
103
102
  'max_replicas': task.serving.max_replicas if task.serving else 1,
104
103
  'ports': task.serving.ports if task.serving else 8000,
104
+ 'probe_path': (
105
+ task.serving.get('probe', None) if task.serving else None
106
+ ),
105
107
  'autoscaler': (
106
108
  'true'
107
109
  if (
@@ -111,6 +113,15 @@ def render_specs(
111
113
  else 'false'
112
114
  ),
113
115
  'general': general,
116
+ # Strip last 3 chars: backend Apoxy setup uses unique
117
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
118
+ # deleting/creating TunnelNode resources with same names too
119
+ # quickly, but we hide this complexity from user-facing endpoints
120
+ 'general_base_host': (
121
+ f'{get_unique_cluster_name_from_tunnel()[:-3]}2.trainy.us'
122
+ )
123
+ if general
124
+ else None,
114
125
  **_DEPLOYMENT_METADATA_LABELS,
115
126
  },
116
127
  temp.name,
@@ -119,7 +130,8 @@ def render_specs(
119
130
 
120
131
  deployment_spec = None
121
132
  service_spec = None
122
- autoscaler_spec = None
133
+ http_addon_resources = [] # For general deployments
134
+ pa_resource = None # For aibrix deployments w autoscaling
123
135
 
124
136
  for doc in docs:
125
137
  kind = doc.get('kind')
@@ -127,153 +139,104 @@ def render_specs(
127
139
  deployment_spec = doc
128
140
  elif kind == 'Service':
129
141
  service_spec = doc
130
- elif kind == 'PodAutoscaler' or kind == 'HorizontalPodAutoscaler':
131
- autoscaler_spec = doc
132
-
133
- # not every deployment + service will have podautoscaler
134
- if task.serving and task.serving.min_replicas == task.serving.max_replicas:
135
- autoscaler_spec = None
142
+ # HTTPScaledObject resource for general deployments w autoscaling only
143
+ elif kind == 'HTTPScaledObject':
144
+ http_addon_resources.append(doc)
145
+ # Ingress resource for all general deployments
146
+ elif kind == 'Ingress':
147
+ http_addon_resources.append(doc)
148
+ # PodAutoscaler resource for aibrix deployments w autoscaling only
149
+ elif kind == 'PodAutoscaler':
150
+ pa_resource = doc
136
151
 
137
152
  if deployment_spec is None:
138
153
  raise ValueError('Deployment manifest not found.')
139
154
  if service_spec is None:
140
155
  raise ValueError('Service manifest not found.')
156
+ if general and not http_addon_resources:
157
+ raise ValueError('General deployment manifests not found.')
158
+ if (
159
+ not general
160
+ and task.serving
161
+ and task.serving.min_replicas != task.serving.max_replicas
162
+ and pa_resource is None
163
+ ):
164
+ raise ValueError('Aibrix deployment PodAutoscaler manifest not found.')
141
165
 
142
166
  # Validate specs before returning
143
167
  try:
144
168
  validator.validate_deployment_spec(deployment_spec)
145
169
  validator.validate_service_spec(service_spec)
146
- # Only validate HPA if it exists (APA doesn't have official schema)
147
- if autoscaler_spec and autoscaler_spec.get('kind') == 'HorizontalPodAutoscaler':
148
- validator.validate_horizontalpodautoscaler_spec(autoscaler_spec)
149
170
  except ValueError as e:
150
171
  raise ValueError(f'Spec validation failed: {e}')
151
172
 
152
- return deployment_spec, service_spec, autoscaler_spec or {}
173
+ return deployment_spec, service_spec, http_addon_resources, pa_resource
153
174
 
154
175
 
155
- # For general deployments, create resources as needed
156
- def render_apoxy_spec(task: 'konduktor.Task') -> List[Dict[str, Any]]:
157
- """Renders the Apoxy specs for a general deployment."""
176
+ def create_pod_autoscaler(
177
+ namespace: str,
178
+ task: 'konduktor.Task',
179
+ dryrun: bool = False,
180
+ ) -> None:
181
+ """Creates Aibrix PodAutoscaler for non-general deployments."""
182
+
183
+ # Check if this is a non-general deployment
158
184
  general = True
159
185
  if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
160
186
  general = False
161
187
 
162
- if not general:
163
- return [] # Only render for general deployments
164
-
165
- if task.run:
166
- task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
167
-
168
- unique_cluster_name = get_unique_cluster_name_from_tunnel()
169
- cluster_name = unique_cluster_name[:-3]
170
- deployment_number = get_next_deployment_number(unique_cluster_name)
188
+ # Only create PA for aibrix deployments w autoscaling
189
+ if general:
190
+ return
171
191
 
172
- with tempfile.NamedTemporaryFile() as temp:
173
- common_utils.fill_template(
174
- 'apoxy-deployment.yaml.j2',
175
- {
176
- 'name': task.name,
177
- 'user': common_utils.get_cleaned_username(),
178
- 'ports': task.serving.ports if task.serving else 8000,
179
- 'general': general,
180
- 'cluster_name': cluster_name,
181
- 'unique_cluster_name': unique_cluster_name,
182
- 'deployment_number': deployment_number,
183
- **_DEPLOYMENT_METADATA_LABELS,
184
- },
185
- temp.name,
192
+ # Check if autoscaling is needed
193
+ if not task.serving or task.serving.min_replicas == task.serving.max_replicas:
194
+ logger.debug(
195
+ f'[DEBUG] No autoscaling needed: '
196
+ f'min={task.serving.min_replicas if task.serving else "None"}, '
197
+ f'max={task.serving.max_replicas if task.serving else "None"}'
186
198
  )
187
- docs = common_utils.read_yaml_all(temp.name)
188
- return docs
199
+ return # No autoscaling needed
189
200
 
201
+ logger.debug(
202
+ f'[DEBUG] PA autoscaling enabled: '
203
+ f'min={task.serving.min_replicas}, max={task.serving.max_replicas}'
204
+ )
190
205
 
191
- def create_apoxy_resources(
192
- namespace: str,
193
- task: 'konduktor.Task',
194
- dryrun: bool = False,
195
- ) -> None:
196
- """Creates Apoxy resources for a general deployment."""
197
-
198
- apoxy_specs = render_apoxy_spec(task)
206
+ # Get the PA spec from the rendered template
207
+ _, _, _, pa_spec = render_specs(task)
199
208
 
200
- if not apoxy_specs:
209
+ if not pa_spec:
210
+ logger.warning('[DEBUG] No PodAutoscaler found in rendered template')
201
211
  return
202
212
 
203
213
  if dryrun:
204
- logger.debug(f'[DRYRUN] Would create Apoxy resources:\n{apoxy_specs}')
214
+ logger.debug(
215
+ f'[DRYRUN] Would create PA autoscaler: '
216
+ f'{pa_spec["metadata"].get("name", "<no-name>")}'
217
+ )
205
218
  return
206
219
 
207
- try:
208
- context = kubernetes_utils.get_current_kube_config_context_name()
209
- custom_api = kube_client.crd_api(context=context)
210
-
211
- for spec in apoxy_specs:
212
- kind = spec.get('kind')
213
- name = spec['metadata']['name']
220
+ context = kubernetes_utils.get_current_kube_config_context_name()
221
+ custom_api = kube_client.crd_api(context=context)
214
222
 
215
- try:
216
- if kind == 'Backend':
217
- custom_api.create_cluster_custom_object(
218
- group='core.apoxy.dev',
219
- version='v1alpha',
220
- plural='backends',
221
- body=spec,
222
- )
223
- logger.info(f'Apoxy Backend {name} created')
224
- elif kind == 'HTTPRoute':
225
- custom_api.create_cluster_custom_object(
226
- group='gateway.apoxy.dev',
227
- version='v1',
228
- plural='httproutes',
229
- body=spec,
230
- )
231
- logger.info(f'Apoxy HTTPRoute {name} created')
232
- except Exception as e:
233
- if '409' in str(e) or 'AlreadyExists' in str(e):
234
- try:
235
- # Delete first, then create
236
- if kind == 'Backend':
237
- custom_api.delete_cluster_custom_object(
238
- group='core.apoxy.dev',
239
- version='v1alpha',
240
- plural='backends',
241
- name=name,
242
- )
243
- custom_api.create_cluster_custom_object(
244
- group='core.apoxy.dev',
245
- version='v1alpha',
246
- plural='backends',
247
- body=spec,
248
- )
249
- elif kind == 'HTTPRoute':
250
- custom_api.delete_cluster_custom_object(
251
- group='gateway.apoxy.dev',
252
- version='v1',
253
- plural='httproutes',
254
- name=name,
255
- )
256
- custom_api.create_cluster_custom_object(
257
- group='gateway.apoxy.dev',
258
- version='v1',
259
- plural='httproutes',
260
- body=spec,
261
- )
262
- logger.info(f'Apoxy {kind} {name} deleted and recreated')
263
- except Exception as delete_create_error:
264
- logger.error(
265
- f'Failed to delete and recreate {kind} {name}: '
266
- f'{delete_create_error}'
267
- )
268
- raise
269
- elif '404' in str(e) or 'NotFound' in str(e):
270
- logger.warning(f'Apoxy CRD for {kind} not found. Skipping {name}.')
271
- logger.info('Make sure Apoxy is deployed and CRDs are ready.')
272
- continue
273
- else:
274
- raise
223
+ # Create KPA for aibrix deployments w autoscaling
224
+ name = pa_spec.get('metadata', {}).get('name', '<no-name>')
225
+ try:
226
+ custom_api.create_namespaced_custom_object(
227
+ group='autoscaling.aibrix.ai',
228
+ version='v1alpha1',
229
+ namespace=namespace,
230
+ plural='podautoscalers',
231
+ body=pa_spec,
232
+ )
233
+ logger.info(f'Pod autoscaler {name} created')
275
234
  except Exception as e:
276
- logger.error(f'Error creating Apoxy resources: {e}')
235
+ if '409' in str(e) or 'AlreadyExists' in str(e):
236
+ logger.warning(f'Pod autoscaler {name} already exists, skipping')
237
+ else:
238
+ logger.error(f'Error creating pod autoscaler {name}: {e}')
239
+ raise
277
240
 
278
241
 
279
242
  def create_deployment(
@@ -286,7 +249,7 @@ def create_deployment(
286
249
 
287
250
  assert task.resources is not None, 'Task resources are undefined'
288
251
 
289
- deployment_spec, _, _ = render_specs(task)
252
+ deployment_spec, _, _, _ = render_specs(task)
290
253
 
291
254
  # Inject deployment-specific pod metadata
292
255
  pod_utils.inject_deployment_pod_metadata(pod_spec, task)
@@ -330,7 +293,7 @@ def create_service(
330
293
 
331
294
  assert task.resources is not None, 'Task resources are undefined'
332
295
 
333
- _, service_spec, _ = render_specs(task)
296
+ _, service_spec, _, _ = render_specs(task)
334
297
 
335
298
  if dryrun:
336
299
  logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
@@ -354,39 +317,77 @@ def create_service(
354
317
  error_message = error_body.get('message', '')
355
318
  logger.error(f'Error creating service: {error_message}')
356
319
  except json.JSONDecodeError:
357
- logger.error(f'Error creating service: {err.body}')
320
+ logger.error(f'Error creating service: {error_message}')
358
321
  raise err
359
322
 
360
323
 
361
- def create_autoscaler(namespace: str, task: 'konduktor.Task', dryrun: bool = False):
362
- _, _, autoscaler_spec = render_specs(task)
324
+ def create_http_addon_resources(
325
+ namespace: str,
326
+ task: 'konduktor.Task',
327
+ dryrun: bool = False,
328
+ ) -> None:
329
+ """Creates HTTP Add-on resources for general deployments."""
330
+
331
+ # Check if this is a non-general deployment
332
+ general = True
333
+ if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
334
+ general = False
363
335
 
364
- if not autoscaler_spec:
336
+ # Only create PA for aibrix deployments w autoscaling
337
+ if not general:
365
338
  return
366
339
 
367
- # Decide if it's APA or HPA by looking at autoscaler_spec["kind"]
368
- kind = autoscaler_spec.get('kind')
369
- context = kubernetes_utils.get_current_kube_config_context_name()
340
+ _, _, http_addon_resources, _ = render_specs(task)
370
341
 
371
- if dryrun:
372
- logger.debug(f'[DRYRUN] Would create {kind}:\n{autoscaler_spec}')
373
- return autoscaler_spec
342
+ if not http_addon_resources:
343
+ logger.debug('[DEBUG] No HTTP Add-on resources to create')
344
+ return
374
345
 
375
- if kind == 'PodAutoscaler':
376
- custom_api = kube_client.crd_api(context=context)
377
- return custom_api.create_namespaced_custom_object(
378
- group='autoscaling.aibrix.ai',
379
- version='v1alpha1',
380
- namespace=namespace,
381
- plural='podautoscalers',
382
- body=autoscaler_spec,
383
- )
384
- elif kind == 'HorizontalPodAutoscaler':
385
- autoscaling_api = kube_client.autoscaling_api(context=context)
386
- return autoscaling_api.create_namespaced_horizontal_pod_autoscaler(
387
- namespace=namespace,
388
- body=autoscaler_spec,
346
+ if dryrun:
347
+ logger.debug(
348
+ f'[DRYRUN] Would create HTTP Add-on resources:\n' f'{http_addon_resources}'
389
349
  )
350
+ return
351
+
352
+ context = kubernetes_utils.get_current_kube_config_context_name()
353
+ logger.debug(f'[DEBUG] Using Kubernetes context: {context}')
354
+
355
+ for resource in http_addon_resources:
356
+ kind = resource.get('kind')
357
+ name = resource['metadata']['name']
358
+
359
+ logger.debug(f'[DEBUG] Creating {kind}: {name}')
360
+
361
+ try:
362
+ if kind == 'HTTPScaledObject':
363
+ # Create HTTPScaledObject (only for autoscaling)
364
+ custom_api = kube_client.crd_api(context=context)
365
+ custom_api.create_namespaced_custom_object(
366
+ group='http.keda.sh',
367
+ version='v1alpha1',
368
+ namespace=namespace,
369
+ plural='httpscaledobjects',
370
+ body=resource,
371
+ )
372
+ logger.info(f'HTTPScaledObject {name} created')
373
+
374
+ elif kind == 'Ingress':
375
+ # Create Ingress (always needed for external access)
376
+ networking_api = kube_client.networking_api(context=context)
377
+ networking_api.create_namespaced_ingress(
378
+ namespace=namespace,
379
+ body=resource,
380
+ )
381
+ logger.info(f'Ingress {name} created')
382
+
383
+ except Exception as e:
384
+ if '409' in str(e) or 'AlreadyExists' in str(e):
385
+ logger.warning(
386
+ f'HTTP Add-on resource {kind} {name} already exists, skipping'
387
+ )
388
+ else:
389
+ logger.error(f'Error creating HTTP Add-on resource {kind} {name}: {e}')
390
+ raise
390
391
 
391
392
 
392
393
  def list_models(namespace: str) -> List[str]:
@@ -402,7 +403,7 @@ def list_models(namespace: str) -> List[str]:
402
403
  label_selector = DEPLOYMENT_NAME_LABEL
403
404
  model_names: set[str] = set()
404
405
 
405
- # --- Deployments ---
406
+ # Deployments
406
407
  for deploy in apps.list_namespaced_deployment(
407
408
  namespace, label_selector=label_selector
408
409
  ).items:
@@ -411,7 +412,7 @@ def list_models(namespace: str) -> List[str]:
411
412
  if name:
412
413
  model_names.add(name)
413
414
 
414
- # --- Services ---
415
+ # Services
415
416
  for svc in core.list_namespaced_service(
416
417
  namespace, label_selector=label_selector
417
418
  ).items:
@@ -420,17 +421,16 @@ def list_models(namespace: str) -> List[str]:
420
421
  if name:
421
422
  model_names.add(name)
422
423
 
423
- # --- PodAutoscalers ---
424
- # APA
424
+ # Podautoscalers (KPA only)
425
425
  try:
426
- apa_list = crds.list_namespaced_custom_object(
426
+ pa_list = crds.list_namespaced_custom_object(
427
427
  group='autoscaling.aibrix.ai',
428
428
  version='v1alpha1',
429
429
  namespace=namespace,
430
430
  plural='podautoscalers',
431
431
  )
432
- for apa in apa_list.get('items', []):
433
- labels = apa.get('metadata', {}).get('labels', {}) or {}
432
+ for pa in pa_list.get('items', []):
433
+ labels = pa.get('metadata', {}).get('labels', {})
434
434
  name = labels.get(DEPLOYMENT_NAME_LABEL)
435
435
  if name:
436
436
  model_names.add(name)
@@ -439,7 +439,7 @@ def list_models(namespace: str) -> List[str]:
439
439
  # re-raise if it's not just missing CRD
440
440
  raise
441
441
  # otherwise ignore, cluster just doesn't have Aibrix CRDs
442
- logger.warning('Skipping APA lookup. Aibrix CRDs not found in cluster')
442
+ logger.warning('Skipping PA lookup. Aibrix CRDs not found in cluster')
443
443
 
444
444
  # HPA
445
445
  autoscaling_api = kube_client.autoscaling_api(context=context)
@@ -455,56 +455,134 @@ def list_models(namespace: str) -> List[str]:
455
455
  return sorted(model_names)
456
456
 
457
457
 
458
- def is_autoscaler_ready(autoscaler_obj: dict) -> bool:
458
+ def get_autoscaler_status_for_deployment(
459
+ name: str, autoscalers_map: dict, is_general: bool
460
+ ) -> bool:
461
+ """Return autoscaler readiness by deployment type.
462
+
463
+ - General: returns hpa_ready
464
+ - vLLM/Aibrix: returns kpa_ready
459
465
  """
460
- Returns True if the autoscaler (PodAutoscaler or HPA) is considered healthy.
461
- For PodAutoscaler: AbleToScale == True.
462
- For HPA: AbleToScale == True, or presence of the HPA is enough if no conditions.
466
+
467
+ def _is_ready(obj: dict) -> bool:
468
+ try:
469
+ conditions = obj.get('status', {}).get('conditions') or []
470
+ kind = obj.get('kind') or ''
471
+
472
+ for cond in conditions:
473
+ if cond.get('type') == 'AbleToScale' and cond.get('status') == 'True':
474
+ return True
475
+
476
+ if kind == 'HorizontalPodAutoscaler':
477
+ # Check for ScalingActive condition
478
+ for cond in conditions:
479
+ if cond.get('type') == 'ScalingActive':
480
+ # ScalingActive: True means actively scaling
481
+ if cond.get('status') == 'True':
482
+ return True
483
+ # ScalingActive: False with ScalingDisabled reason
484
+ # is normal for scale-to-zero
485
+ if (
486
+ cond.get('status') == 'False'
487
+ and cond.get('reason') == 'ScalingDisabled'
488
+ ):
489
+ return True
490
+
491
+ # Treat existing HPA with no conditions as ready
492
+ return not conditions or any(
493
+ c.get('type') == 'AbleToScale' and c.get('status') == 'True'
494
+ for c in conditions
495
+ )
496
+ except Exception as e:
497
+ logger.warning(f'Error checking autoscaler readiness: {e}')
498
+ return False
499
+
500
+ kpa_ready = False
501
+ hpa_ready = False
502
+
503
+ dep_autos = autoscalers_map.get(name, {})
504
+
505
+ if is_general:
506
+ if 'hpa' in dep_autos:
507
+ hpa_ready = _is_ready(dep_autos['hpa'])
508
+ return hpa_ready
509
+ return False
510
+
511
+ if 'kpa' in dep_autos:
512
+ kpa_ready = _is_ready(dep_autos['kpa'])
513
+ return kpa_ready
514
+ return False
515
+
516
+
517
+ def _extract_min_max_from_autoscaler(autoscaler: dict) -> tuple[str, str]:
518
+ """Extract min/max replicas across PA/HPA/KEDA.
519
+
520
+ Returns (min_str, max_str). Unknowns as '?'.
463
521
  """
464
522
  try:
465
- if hasattr(autoscaler_obj, 'to_dict'):
466
- autoscaler_obj = autoscaler_obj.to_dict()
467
- conditions = autoscaler_obj.get('status', {}).get('conditions', []) or []
468
-
469
- # If conditions exist, look for AbleToScale == True
470
- for cond in conditions:
471
- cond_type = cond.get('type')
472
- cond_status = cond.get('status')
473
- if cond_type == 'AbleToScale' and cond_status == 'True':
474
- return True
475
-
476
- # If no conditions are present (common for HPAs), assume
477
- # it's fine as soon as object exists
478
- if not conditions:
479
- return True
523
+ if not autoscaler:
524
+ return '?', '?'
525
+
526
+ spec = autoscaler.get('spec', {})
527
+
528
+ # Check for HTTPScaledObject format (replicas.min/max)
529
+ if 'replicas' in spec:
530
+ replicas = spec.get('replicas', {})
531
+ if 'min' in replicas or 'max' in replicas:
532
+ return (str(replicas.get('min', '?')), str(replicas.get('max', '?')))
533
+
534
+ # Check for KEDA ScaledObject format (minReplicaCount/maxReplicaCount)
535
+ if 'minReplicaCount' in spec or 'maxReplicaCount' in spec:
536
+ return (
537
+ str(spec.get('minReplicaCount', '?')),
538
+ str(spec.get('maxReplicaCount', '?')),
539
+ )
480
540
 
481
- except Exception as e:
482
- logger.warning(f'Error checking autoscaler readiness: {e}')
483
- return False
541
+ # Check for PA/HPA format (minReplicas/maxReplicas)
542
+ if 'minReplicas' in spec or 'maxReplicas' in spec:
543
+ return str(spec.get('minReplicas', '?')), str(spec.get('maxReplicas', '?'))
544
+ except Exception:
545
+ pass
546
+ return '?', '?'
484
547
 
485
548
 
486
549
  def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
487
- """Fetch all APAs and HPAs and combine into 1 dict keyed by deployment name."""
488
- autoscalers = {}
550
+ """Fetch autoscalers and return a simple map keyed by deployment name.
551
+
552
+ Simplified model:
553
+ - Aibrix deployments: 1 PodAutoscaler (KPA) if autoscaling enabled
554
+ - General deployments: 1 HPA (created by KEDA) if autoscaling enabled
555
+ - No autoscaling: No autoscaler
489
556
 
490
- # --- Aibrix APAs ---
557
+ Returns: {deployment_name: {'kpa': pa_obj} or {'hpa': hpa_obj}}
558
+ """
559
+ autoscalers: Dict[str, Dict[str, Any]] = {}
560
+
561
+ # --- Aibrix deployment KPA ---
491
562
  try:
492
563
  crd_api = kube_client.crd_api(context=context)
493
- apa_list = crd_api.list_namespaced_custom_object(
564
+ pa_list = crd_api.list_namespaced_custom_object(
494
565
  group='autoscaling.aibrix.ai',
495
566
  version='v1alpha1',
496
567
  namespace=namespace,
497
568
  plural='podautoscalers',
498
569
  )
499
- for apa in apa_list.get('items', []):
500
- labels = apa.get('metadata', {}).get('labels', {}) or {}
570
+ for pa in pa_list.get('items', []):
571
+ labels = pa.get('metadata', {}).get('labels', {})
501
572
  dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
573
+ if not dep_name:
574
+ # Fallback to scaleTargetRef.name
575
+ spec = pa.get('spec', {})
576
+ scale_ref = spec.get('scaleTargetRef', {})
577
+ dep_name = scale_ref.get('name')
502
578
  if dep_name:
503
- autoscalers[dep_name] = apa
579
+ autoscalers[dep_name] = {'kpa': pa}
580
+ if pa_list.get('items'):
581
+ logger.debug(f"Found {len(pa_list.get('items', []))} PodAutoscalers")
504
582
  except Exception as e:
505
- logger.warning(f'Error fetching APAs: {e}')
583
+ logger.warning(f'Error fetching PodAutoscalers: {e}')
506
584
 
507
- # --- Standard HPAs ---
585
+ # --- General deployment HPA ---
508
586
  try:
509
587
  autoscaling_api = kube_client.autoscaling_api(context=context)
510
588
  hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
@@ -513,8 +591,18 @@ def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
513
591
  for hpa in hpa_list.items:
514
592
  labels = getattr(hpa.metadata, 'labels', {}) or {}
515
593
  dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
516
- if dep_name and dep_name not in autoscalers:
517
- autoscalers[dep_name] = hpa.to_dict()
594
+ if not dep_name:
595
+ # Fallback to scaleTargetRef.name
596
+ spec = hpa.spec.to_dict() if hpa.spec else {}
597
+ scale_ref = spec.get('scale_target_ref', {})
598
+ dep_name = scale_ref.get('name')
599
+ if dep_name:
600
+ hpa_dict = hpa.to_dict()
601
+ hpa_dict['kind'] = 'HorizontalPodAutoscaler'
602
+ hpa_dict['apiVersion'] = 'autoscaling/v2'
603
+ autoscalers[dep_name] = {'hpa': hpa_dict}
604
+ if hpa_list.items:
605
+ logger.debug(f'Found {len(hpa_list.items)} HPAs')
518
606
  except Exception as e:
519
607
  logger.warning(f'Error fetching HPAs: {e}')
520
608
 
@@ -539,28 +627,55 @@ def get_model_status(
539
627
  d = deployments[name]
540
628
  ready = (d.status.ready_replicas or 0) if d.status else 0
541
629
  desired = (d.spec.replicas or 0) if d.spec else 0
542
- status['deployment'] = 'ready' if ready == desired else 'pending'
630
+
631
+ labels = d.metadata.labels or {}
632
+ is_aibrix = AIBRIX_NAME_LABEL in labels
633
+
634
+ if is_aibrix and name in autoscalers:
635
+ # For Aibrix deployments, get the original min replicas from
636
+ # deployment labels
637
+ original_min_replicas = 0
638
+ original_min_str = labels.get('trainy.ai/original-min-replicas')
639
+ if original_min_str:
640
+ try:
641
+ original_min_replicas = int(original_min_str)
642
+ except (ValueError, TypeError):
643
+ pass
644
+
645
+ # For Aibrix deployments, consider ready if:
646
+ # 1. Ready replicas >= original minimum replicas, OR
647
+ # 2. If original_min_replicas is 0 (scale-to-zero allowed),
648
+ # then ready == desired
649
+ if original_min_replicas == 0:
650
+ status['deployment'] = 'ready' if ready == desired else 'pending'
651
+ else:
652
+ status['deployment'] = (
653
+ 'ready' if ready >= original_min_replicas else 'pending'
654
+ )
655
+ else:
656
+ # General deployments or no autoscaler: use simple ready == desired check
657
+ status['deployment'] = 'ready' if ready == desired else 'pending'
543
658
 
544
659
  # --- Service ---
545
660
  if name in services:
546
- s = services[name]
547
- labels = getattr(s.metadata, 'labels', {}) or {}
548
- is_vllm = AIBRIX_NAME_LABEL in labels
549
-
550
- if is_vllm:
551
- status['service'] = 'ready'
552
- else:
553
- lb_ready = False
554
- if s.status and s.status.load_balancer and s.status.load_balancer.ingress:
555
- ingress = s.status.load_balancer.ingress
556
- if ingress and (ingress[0].ip or ingress[0].hostname):
557
- lb_ready = True
558
- status['service'] = 'ready' if lb_ready else 'pending'
661
+ status['service'] = 'ready'
662
+ else:
663
+ status['service'] = 'missing'
559
664
 
560
665
  # --- Autoscaler ---
561
666
  if name in autoscalers:
562
- a = autoscalers[name]
563
- status['autoscaler'] = 'ready' if is_autoscaler_ready(a) else 'pending'
667
+ # Check if this is a general deployment (not vLLM/Aibrix)
668
+ is_general = True
669
+ if deployments.get(name) and hasattr(deployments[name].metadata, 'labels'):
670
+ labels = deployments[name].metadata.labels or {}
671
+ if AIBRIX_NAME_LABEL in labels:
672
+ is_general = False
673
+
674
+ # Check actual autoscaler readiness
675
+ autoscaler_ready = get_autoscaler_status_for_deployment(
676
+ name, autoscalers, is_general
677
+ )
678
+ status['autoscaler'] = 'ready' if autoscaler_ready else 'pending'
564
679
  else:
565
680
  status['autoscaler'] = None
566
681
 
@@ -591,7 +706,7 @@ def get_service(namespace: str, job_name: str) -> Optional[Any]:
591
706
 
592
707
  def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
593
708
  context = kubernetes_utils.get_current_kube_config_context_name()
594
- # --- Try Aibrix APA first ---
709
+ # --- Try Aibrix PA first ---
595
710
  crd_api = kube_client.crd_api(context=context)
596
711
  try:
597
712
  return crd_api.get_namespaced_custom_object(
@@ -599,7 +714,7 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
599
714
  version='v1alpha1',
600
715
  namespace=namespace,
601
716
  plural='podautoscalers',
602
- name=f'{job_name}-apa',
717
+ name=f'{job_name}-pa',
603
718
  )
604
719
  except ApiException as e:
605
720
  if e.status != 404:
@@ -612,6 +727,19 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
612
727
  return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
613
728
  name=f'{job_name}-hpa', namespace=namespace
614
729
  ).to_dict()
730
+ except ApiException as e:
731
+ if e.status != 404:
732
+ raise
733
+
734
+ # --- Try KEDA ScaledObject ---
735
+ try:
736
+ return crd_api.get_namespaced_custom_object(
737
+ group='keda.sh',
738
+ version='v1alpha1',
739
+ namespace=namespace,
740
+ plural='scaledobjects',
741
+ name=f'{job_name}-keda',
742
+ )
615
743
  except ApiException as e:
616
744
  if e.status == 404:
617
745
  return None
@@ -677,52 +805,133 @@ def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
677
805
 
678
806
 
679
807
  def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
680
- """Deletes either an Aibrix PodAutoscaler or a HorizontalPodAutoscaler."""
808
+ """Delete all autoscalers associated with a deployment name.
809
+
810
+ This includes:
811
+ - All Aibrix PodAutoscalers (e.g., "-pa", "-apa") targeting the deployment
812
+ - Any HorizontalPodAutoscaler named "<name>-hpa"
813
+ - Any KEDA ScaledObject named "<name>-keda"
814
+ """
681
815
  context = kubernetes_utils.get_current_kube_config_context_name()
682
816
 
683
- # --- Try delete APA first ---
817
+ # --- Delete ALL PodAutoscalers that target this deployment ---
684
818
  try:
685
819
  custom_api = kube_client.crd_api(context=context)
686
- response = custom_api.delete_namespaced_custom_object(
820
+ pa_list = custom_api.list_namespaced_custom_object(
687
821
  group='autoscaling.aibrix.ai',
688
822
  version='v1alpha1',
689
823
  namespace=namespace,
690
824
  plural='podautoscalers',
691
- name=f'{name}-apa',
692
825
  )
693
- return response
826
+ for pa in pa_list.get('items', []):
827
+ meta = pa.get('metadata', {})
828
+ spec = pa.get('spec', {})
829
+ pa_name = meta.get('name', '')
830
+ labels = meta.get('labels', {})
831
+ scale_ref = spec.get('scaleTargetRef', {}).get('name')
832
+ targets_deployment = (
833
+ labels.get(DEPLOYMENT_NAME_LABEL) == name
834
+ or scale_ref == name
835
+ or pa_name.startswith(f'{name}-')
836
+ )
837
+ if targets_deployment:
838
+ try:
839
+ custom_api.delete_namespaced_custom_object(
840
+ group='autoscaling.aibrix.ai',
841
+ version='v1alpha1',
842
+ namespace=namespace,
843
+ plural='podautoscalers',
844
+ name=pa_name,
845
+ )
846
+ logger.info(f'Deleted PodAutoscaler: {pa_name}')
847
+ except kube_client.api_exception() as err:
848
+ if getattr(err, 'status', None) != 404:
849
+ raise
694
850
  except kube_client.api_exception() as err:
695
- # If not found, try HPA
696
- try:
697
- error_body = json.loads(err.body)
698
- if err.status != 404:
699
- raise
700
- except Exception:
701
- if getattr(err, 'status', None) != 404:
702
- raise
851
+ # If PA CRD is missing, skip; otherwise bubble up
852
+ if getattr(err, 'status', None) not in (404, None):
853
+ raise
703
854
 
704
- # --- Try delete HPA ---
855
+ # --- Delete HPA ---
705
856
  try:
706
857
  autoscaling_api = kube_client.autoscaling_api(context=context)
707
- return autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
858
+ autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
708
859
  name=f'{name}-hpa',
709
860
  namespace=namespace,
710
861
  )
862
+ logger.info(f'Deleted HPA: {name}-hpa')
711
863
  except kube_client.api_exception() as err:
712
- try:
713
- error_body = json.loads(err.body)
714
- error_message = error_body.get('message', '')
715
- logger.error(f'Error deleting Pod Autoscaler: {error_message}')
716
- except json.JSONDecodeError:
717
- logger.error(f'Error deleting Pod Autoscaler: {err.body}')
718
- raise err
864
+ if getattr(err, 'status', None) not in (404, None):
865
+ try:
866
+ error_body = json.loads(err.body)
867
+ error_message = error_body.get('message', '')
868
+ logger.error(f'Error deleting HPA: {error_message}')
869
+ except json.JSONDecodeError:
870
+ logger.error(f'Error deleting HPA: {err.body}')
871
+ raise err
872
+
873
+ # --- Delete KEDA ScaledObject ---
874
+ try:
875
+ custom_api = kube_client.crd_api(context=context)
876
+ custom_api.delete_namespaced_custom_object(
877
+ group='keda.sh',
878
+ version='v1alpha1',
879
+ namespace=namespace,
880
+ plural='scaledobjects',
881
+ name=f'{name}-keda',
882
+ )
883
+ logger.info(f'Deleted ScaledObject: {name}-keda')
884
+ except kube_client.api_exception() as err:
885
+ if getattr(err, 'status', None) not in (404, None):
886
+ try:
887
+ error_body = json.loads(err.body)
888
+ error_message = error_body.get('message', '')
889
+ logger.error(f'Error deleting KEDA ScaledObject: {error_message}')
890
+ except json.JSONDecodeError:
891
+ logger.error(f'Error deleting KEDA ScaledObject: {err.body}')
892
+ raise err
893
+
894
+ return None
895
+
896
+
897
+ def delete_http_addon_resources(name: str, namespace: str) -> None:
898
+ """Deletes HTTP Add-on resources for general deployments."""
899
+ context = kubernetes_utils.get_current_kube_config_context_name()
900
+
901
+ # Delete HTTPScaledObject
902
+ try:
903
+ custom_api = kube_client.crd_api(context=context)
904
+ custom_api.delete_namespaced_custom_object(
905
+ group='http.keda.sh',
906
+ version='v1alpha1',
907
+ namespace=namespace,
908
+ plural='httpscaledobjects',
909
+ name=f'{name}-httpscaledobject',
910
+ )
911
+ logger.info(f'Deleted HTTPScaledObject: {name}-httpscaledobject')
912
+ except kube_client.api_exception() as err:
913
+ if err.status != 404:
914
+ logger.debug(
915
+ f'Failed to delete HTTPScaledObject {name}-httpscaledobject: {err}'
916
+ )
917
+
918
+ # Delete Ingress
919
+ try:
920
+ networking_api = kube_client.networking_api(context=context)
921
+ networking_api.delete_namespaced_ingress(
922
+ name=f'{name}-ingress',
923
+ namespace=namespace,
924
+ )
925
+ logger.info(f'Deleted Ingress: {name}-ingress')
926
+ except kube_client.api_exception() as err:
927
+ if err.status != 404:
928
+ logger.debug(f'Failed to delete Ingress {name}-ingress: {err}')
719
929
 
720
930
 
721
931
  def delete_serving_specs(name: str, namespace: str) -> None:
722
932
  for kind, delete_fn in [
723
933
  ('deployment', delete_deployment),
724
934
  ('service', delete_service),
725
- ('podautoscaler', delete_autoscaler),
726
935
  ]:
727
936
  try:
728
937
  delete_fn(namespace, name)
@@ -730,6 +939,15 @@ def delete_serving_specs(name: str, namespace: str) -> None:
730
939
  except Exception as e:
731
940
  logger.debug(f'Failed to delete {kind} {name}: {e}')
732
941
 
942
+ # Delete autoscaler resources (Aibrix PA, HPA, or KEDA ScaledObject)
943
+ try:
944
+ delete_autoscaler(namespace=namespace, name=name)
945
+ except Exception as e:
946
+ logger.debug(f'Failed to delete autoscaler for {name}: {e}')
947
+
948
+ # Delete HTTP Add-on resources for general deployments
949
+ delete_http_addon_resources(name, namespace)
950
+
733
951
 
734
952
  def _get_resource_summary(deployment) -> str:
735
953
  """Extract and format pod resource information from a deployment.
@@ -779,6 +997,24 @@ def get_envoy_external_ip() -> Optional[str]:
779
997
  return None
780
998
 
781
999
 
1000
+ def get_ingress_nginx_external_ip() -> Optional[str]:
1001
+ """Get the external IP of the keda-ingress-nginx-controller LoadBalancer."""
1002
+ context = kubernetes_utils.get_current_kube_config_context_name()
1003
+ core_api = kube_client.core_api(context=context)
1004
+ try:
1005
+ # Look for keda-ingress-nginx-controller service in keda namespace
1006
+ service = core_api.read_namespaced_service(
1007
+ name='keda-ingress-nginx-controller', namespace='keda'
1008
+ )
1009
+ if service.spec.type == 'LoadBalancer':
1010
+ ingress = service.status.load_balancer.ingress
1011
+ if ingress:
1012
+ return ingress[0].ip or ingress[0].hostname
1013
+ except Exception:
1014
+ pass
1015
+ return None
1016
+
1017
+
782
1018
  def get_unique_cluster_name_from_tunnel() -> str:
783
1019
  """Get cluster name from the apoxy deployment command."""
784
1020
  try:
@@ -787,7 +1023,7 @@ def get_unique_cluster_name_from_tunnel() -> str:
787
1023
 
788
1024
  # Get the apoxy deployment
789
1025
  deployment = apps_api.read_namespaced_deployment(
790
- name='apoxy', namespace='default'
1026
+ name='apoxy', namespace='apoxy-system'
791
1027
  )
792
1028
 
793
1029
  # Extract cluster name from the command
@@ -820,112 +1056,59 @@ def get_endpoint_type_from_config() -> str:
820
1056
  try:
821
1057
  # Use the proper config system that handles KONDUKTOR_CONFIG env var
822
1058
  endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
1059
+ logger.debug(f'[DEBUG] Config endpoint_type: {endpoint_type}')
823
1060
  return endpoint_type.lower()
824
1061
  except Exception as e:
825
1062
  logger.warning(f'Error reading endpoint config: {e}')
826
1063
 
827
1064
  # Default to trainy if config not found or error
1065
+ logger.debug('[DEBUG] Falling back to default endpoint type: trainy')
828
1066
  return 'trainy'
829
1067
 
830
1068
 
831
- def _get_loadbalancer_endpoint_with_port(service_name: str) -> str:
832
- """Helper function to get LoadBalancer endpoint with port."""
833
- try:
834
- context = kubernetes_utils.get_current_kube_config_context_name()
835
- core_api = kube_client.core_api(context=context)
836
-
837
- # Get the service
838
- service = core_api.read_namespaced_service(
839
- name=service_name, namespace='default'
840
- )
841
-
842
- # Check if it's LoadBalancer type
843
- if service.spec.type == 'LoadBalancer':
844
- ingress = service.status.load_balancer.ingress
845
- if ingress and len(ingress) > 0:
846
- ip = ingress[0].ip
847
- if ip:
848
- return f'{ip}:{service.spec.ports[0].port}'
849
-
850
- # If not LoadBalancer or no IP, return pending
851
- return '<pending>'
852
-
853
- except Exception:
854
- return '<pending>'
855
-
856
-
857
- def get_vllm_deployment_endpoint(force_direct: bool = False) -> str:
858
- """Get the endpoint for vLLM/Aibrix deployments based on config."""
1069
+ def get_deployment_endpoint(
1070
+ force_direct: bool = False, deployment_type: str = 'AIBRIX'
1071
+ ) -> str:
1072
+ """Get the endpoint for both vLLM/Aibrix and general deployments."""
859
1073
  if force_direct:
860
- # Force direct endpoint display regardless of config
861
1074
  endpoint_type = 'direct'
862
1075
  else:
863
1076
  endpoint_type = get_endpoint_type_from_config()
864
1077
 
865
1078
  if endpoint_type == 'direct':
866
- try:
867
- aibrix_endpoint = get_envoy_external_ip()
868
- return aibrix_endpoint or '<pending>'
869
- except Exception:
870
- return '<pending>'
871
- else:
872
- try:
873
- cluster_name = get_unique_cluster_name_from_tunnel()
874
- return f'{cluster_name[:-3]}.trainy.us'
875
- except Exception:
876
- # Fallback to direct endpoint if trainy.us not available
1079
+ # Check if this is a general deployment
1080
+ if deployment_type == 'GENERAL':
1081
+ # General deployments: ingress IP + Host header
1082
+ ingress_ip = get_ingress_nginx_external_ip()
1083
+ if ingress_ip:
1084
+ return f'{ingress_ip}'
1085
+ else:
1086
+ return '<pending>'
1087
+ else:
1088
+ # vLLM/Aibrix deployments: envoy IP
877
1089
  try:
878
1090
  aibrix_endpoint = get_envoy_external_ip()
879
- if aibrix_endpoint:
880
- # Aibrix deployments route through Envoy Gateway on port 80
881
- return f'{aibrix_endpoint}'
1091
+ return aibrix_endpoint or '<pending>'
882
1092
  except Exception:
883
- pass
884
- return '<pending>'
885
-
886
-
887
- def get_general_deployment_endpoint(
888
- service_name: str, force_direct: bool = False
889
- ) -> str:
890
- """Get the endpoint for a general deployment based on config."""
891
- if force_direct:
892
- # Force direct endpoint display regardless of config
893
- endpoint_type = 'direct'
894
- else:
895
- endpoint_type = get_endpoint_type_from_config()
896
-
897
- if endpoint_type == 'direct':
898
- # Use LoadBalancer IP with port
899
- return _get_loadbalancer_endpoint_with_port(service_name)
1093
+ return '<pending>'
900
1094
  else:
901
- # Use Apoxy (trainy.us) - existing logic
1095
+ # Use Apoxy (trainy.us)
902
1096
  try:
903
- context = kubernetes_utils.get_current_kube_config_context_name()
904
- custom_api = kube_client.crd_api(context=context)
905
-
906
- # Query route with label selector using the original task name
907
- routes = custom_api.list_cluster_custom_object(
908
- group='gateway.apoxy.dev',
909
- version='v1',
910
- plural='httproutes',
911
- label_selector=f'task_name={service_name}',
912
- )
913
-
914
- # Extract endpoint_name from the route labels
915
- if routes.get('items') and len(routes['items']) > 0:
916
- route = routes['items'][0] # Should only be one route with this label
917
- labels = route.get('metadata', {}).get('labels', {})
918
- endpoint_name = labels.get('endpoint_name')
919
- if endpoint_name:
920
- return endpoint_name
921
-
922
- # Fallback if no route found - try direct LoadBalancer endpoint
923
- return _get_loadbalancer_endpoint_with_port(service_name)
924
-
925
- except Exception as e:
926
- logger.warning(f'Endpoint error for general deployment {service_name}: {e}')
927
- # Fallback to direct LoadBalancer endpoint on error
928
- return _get_loadbalancer_endpoint_with_port(service_name)
1097
+ cluster_name = get_unique_cluster_name_from_tunnel()
1098
+ if deployment_type == 'GENERAL':
1099
+ # Strip last 3 chars: backend Apoxy setup uses unique
1100
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
1101
+ # deleting/creating TunnelNode resources with same names too
1102
+ # quickly, but we hide this complexity from user-facing endpoints
1103
+ return f'{cluster_name[:-3]}2.trainy.us' # General deployments
1104
+ else:
1105
+ # Strip last 3 chars: backend Apoxy setup uses unique
1106
+ # suffixes (3 random numbers)to avoid Apoxy bugs when
1107
+ # deleting/creating TunnelNode resources with same names too
1108
+ # quickly, but we hide this complexity from user-facing endpoints
1109
+ return f'{cluster_name[:-3]}.trainy.us' # vLLM deployments
1110
+ except Exception:
1111
+ return '<pending>'
929
1112
 
930
1113
 
931
1114
  def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
@@ -962,7 +1145,9 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
962
1145
  is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
963
1146
 
964
1147
  # Get Aibrix endpoint once for all Aibrix deployments
965
- aibrix_endpoint = get_vllm_deployment_endpoint(force_direct)
1148
+ aibrix_endpoint = get_deployment_endpoint(force_direct, 'AIBRIX')
1149
+ # Get General endpoint once for all General deployments
1150
+ general_endpoint = get_deployment_endpoint(force_direct, 'GENERAL')
966
1151
 
967
1152
  table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
968
1153
  if all_users:
@@ -1017,14 +1202,33 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
1017
1202
  }
1018
1203
  return f"{label}: {emoji_map.get(state, '❓')}"
1019
1204
 
1205
+ # Check if this is a general deployment (not vLLM/Aibrix)
1206
+ is_general = True
1207
+ if deployment and hasattr(deployment.metadata, 'labels'):
1208
+ labels = deployment.metadata.labels or {}
1209
+ if AIBRIX_NAME_LABEL in labels:
1210
+ is_general = False
1211
+
1020
1212
  summary_lines = [
1021
1213
  emoji_line('Deploym', status['deployment'] or 'missing'),
1022
1214
  emoji_line('Service', status['service'] or 'missing'),
1023
1215
  ]
1024
- if status['autoscaler'] is not None:
1025
- summary_lines.append(
1026
- emoji_line('AScaler', status['autoscaler'] or 'missing')
1216
+
1217
+ if is_general:
1218
+ # Autoscaler for General: HPA only
1219
+ hpa_ready = get_autoscaler_status_for_deployment(
1220
+ name, autoscalers_map, is_general=True
1027
1221
  )
1222
+ if name in autoscalers_map:
1223
+ summary_lines.append(f"AScaler: {'✅' if hpa_ready else '❓'}")
1224
+ else:
1225
+ # Autoscaler for vLLM: only KPA (APA no longer used)
1226
+ if name in autoscalers_map:
1227
+ kpa_ready = get_autoscaler_status_for_deployment(
1228
+ name, autoscalers_map, is_general=False
1229
+ )
1230
+ if 'kpa' in autoscalers_map.get(name, {}):
1231
+ summary_lines.append(f"AScaler: {'✅' if kpa_ready else '❓'}")
1028
1232
  summary = '\n'.join(summary_lines)
1029
1233
 
1030
1234
  # Overall status
@@ -1057,29 +1261,61 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
1057
1261
 
1058
1262
  endpoint_str = '<pending>'
1059
1263
  if AIBRIX_NAME_LABEL in labels:
1060
- # Aibrix deployment - use the pre-computed endpoint
1061
- endpoint_str = aibrix_endpoint
1264
+ # Aibrix deployment
1265
+ endpoint_type = get_endpoint_type_from_config()
1266
+ if force_direct or endpoint_type == 'direct':
1267
+ # Direct access: use http for IP endpoints
1268
+ endpoint_str = (
1269
+ f'http://{aibrix_endpoint}'
1270
+ if aibrix_endpoint != '<pending>'
1271
+ else aibrix_endpoint
1272
+ )
1273
+ else:
1274
+ # Apoxy access: use https for trainy.us endpoints
1275
+ endpoint_str = (
1276
+ f'https://{aibrix_endpoint}'
1277
+ if aibrix_endpoint != '<pending>'
1278
+ else aibrix_endpoint
1279
+ )
1062
1280
  else:
1063
1281
  # General deployment
1064
- endpoint_str = get_general_deployment_endpoint(name, force_direct)
1282
+ endpoint_type = get_endpoint_type_from_config()
1283
+ if force_direct or endpoint_type == 'direct':
1284
+ # Direct access: IP + Host header
1285
+ endpoint_str = f'http://{general_endpoint}\nHost: {name}'
1286
+ else:
1287
+ # Apoxy access: single host + path
1288
+ endpoint_str = f'https://{general_endpoint}/{name}'
1065
1289
 
1066
1290
  # Replicas
1067
- ready_replicas = (
1068
- str(deployment.status.ready_replicas or 0) if deployment else '?'
1069
- )
1070
- desired_replicas = str(deployment.spec.replicas or 0) if deployment else '?'
1291
+ if deployment:
1292
+ ready_replicas = str(deployment.status.ready_replicas or 0)
1293
+ desired_replicas = str(deployment.spec.replicas or 0)
1294
+ else:
1295
+ ready_replicas = '?'
1296
+ desired_replicas = '?'
1297
+
1071
1298
  replicas_text = Text()
1072
1299
  replicas_text.append(
1073
1300
  f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
1074
1301
  )
1302
+
1075
1303
  if status['autoscaler']:
1076
- spec = (
1077
- autoscaler.get('spec', {})
1078
- if isinstance(autoscaler, dict)
1079
- else getattr(autoscaler, 'spec', {})
1080
- )
1081
- min_r = str(spec.get('minReplicas', spec.get('min_replicas', '?')))
1082
- max_r = str(spec.get('maxReplicas', spec.get('max_replicas', '?')))
1304
+ # Get min/max from deployment labels
1305
+ min_r, max_r = '?', '?'
1306
+
1307
+ if deployment and hasattr(deployment.metadata, 'labels'):
1308
+ labels = deployment.metadata.labels or {}
1309
+ # All deployments with autoscaling get these labels from the template
1310
+ original_min_str = labels.get('trainy.ai/original-min-replicas')
1311
+ original_max_str = labels.get('trainy.ai/original-max-replicas')
1312
+ if original_min_str and original_max_str:
1313
+ min_r, max_r = original_min_str, original_max_str
1314
+ logger.debug(
1315
+ f'[DEBUG] Got replicas from deployment labels: '
1316
+ f'min={min_r}, max={max_r}'
1317
+ )
1318
+
1083
1319
  replicas_text.append(f'Min : {min_r}\n', style='bold white')
1084
1320
  replicas_text.append(f'Max : {max_r}', style='bold white')
1085
1321