PyPI - konduktor-nightly - Versions diffs - 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl - Mend

konduktor-nightly 0.1.0.dev20250915104603py3-none-any.whl → 0.1.0.dev20251107104752py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

konduktor/__init__.py +2 -2
konduktor/backends/constants.py +1 -0
konduktor/backends/deployment.py +27 -10
konduktor/backends/deployment_utils.py +594 -358
konduktor/backends/jobset_utils.py +6 -6
konduktor/backends/pod_utils.py +133 -18
konduktor/cli.py +61 -29
konduktor/manifests/aibrix-setup.yaml +430 -0
konduktor/manifests/apoxy-setup.yaml +42 -9
konduktor/manifests/apoxy-setup2.yaml +69 -5
konduktor/resource.py +9 -2
konduktor/serving.py +10 -6
konduktor/task.py +8 -5
konduktor/templates/deployment.yaml.j2 +96 -47
konduktor/templates/pod.yaml.j2 +123 -9
konduktor/utils/base64_utils.py +2 -0
konduktor/utils/schemas.py +1 -1
konduktor/utils/validator.py +12 -0
{konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/METADATA +1 -1
{konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/RECORD +23 -23
konduktor/templates/apoxy-deployment.yaml.j2 +0 -33
{konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/LICENSE +0 -0
{konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/WHEEL +0 -0
{konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/entry_points.txt +0 -0

konduktor/backends/deployment_utils.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import json
 import os
-import random
 import tempfile
 import typing
 from typing import Any, Dict, List, Optional, Tuple
@@ -50,35 +49,35 @@ _DEPLOYMENT_METADATA_LABELS = {
 }
-# actually just gets highest existing deployment number and adds 1
-def get_next_deployment_number(cluster_name: str) -> int:
-    """Get next number by counting existing Apoxy resources."""
-    try:
-        context = kubernetes_utils.get_current_kube_config_context_name()
-        custom_api = kube_client.crd_api(context=context)
-        # Count existing backends
-        backends = custom_api.list_cluster_custom_object(
-            group='core.apoxy.dev', version='v1alpha', plural='backends'
-        )
-        # Find the highest number
-        max_number = 0
-        for backend in backends.get('items', []):
-            name = backend['metadata']['name']
-            if name.startswith(f'{cluster_name}-backend-'):
-                number = int(name.split('-')[-1])
-                max_number = max(max_number, number)
+def render_specs(
+    task: 'konduktor.Task',
+) -> Tuple[
+    Dict[str, Any], Dict[str, Any], List[Dict[str, Any]], Optional[Dict[str, Any]]
+]:
+    """Renders Kubernetes resource specifications from a Konduktor task.
-        return max_number + 1
-    except Exception as e:
-        logger.warning(f'Error counting existing resources: {e}')
-        return random.randint(100, 999)
+    Takes a Konduktor task and generates the necessary Kubernetes resource
+    specifications for deployment by filling the deployment.yaml.j2 template.
+    Automatically detects deployment type (vLLM/Aibrix vs General) based on
+    the task's run command.
+    Args:
+        task: A Konduktor Task object containing deployment configuration
+              including resources, serving settings, and run commands.
-def render_specs(
-    task: 'konduktor.Task',
-) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
+    Returns:
+        A tuple containing:
+        - deployment_spec (Dict[str, Any]): Kubernetes Deployment specification
+        - service_spec (Dict[str, Any]): Kubernetes Service specification
+        - http_addon_resources (List[Dict[str, Any]]): List of HTTP add-on resources
+          (HTTPScaledObject and Ingress) for general deployments; empty for vLLM
+        - pa_resource (Optional[Dict[str, Any]]): PodAutoscaler specification for
+          vLLM deployments with autoscaling enabled, None otherwise; empty for general
+    Raises:
+        ValueError: If required specs are missing after template rendering or
+                   if spec validation fails.
+    """
     general = True
     if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
         general = False
@@ -102,6 +101,9 @@ def render_specs(
                 'min_replicas': task.serving.min_replicas if task.serving else 1,
                 'max_replicas': task.serving.max_replicas if task.serving else 1,
                 'ports': task.serving.ports if task.serving else 8000,
+                'probe_path': (
+                    task.serving.get('probe', None) if task.serving else None
+                ),
                 'autoscaler': (
                     'true'
                     if (
@@ -111,6 +113,15 @@ def render_specs(
                     else 'false'
                 ),
                 'general': general,
+                # Strip last 3 chars: backend Apoxy setup uses unique
+                # suffixes (3 random numbers)to avoid Apoxy bugs when
+                # deleting/creating TunnelNode resources with same names too
+                # quickly, but we hide this complexity from user-facing endpoints
+                'general_base_host': (
+                    f'{get_unique_cluster_name_from_tunnel()[:-3]}2.trainy.us'
+                )
+                if general
+                else None,
                 **_DEPLOYMENT_METADATA_LABELS,
             },
             temp.name,
@@ -119,7 +130,8 @@ def render_specs(
     deployment_spec = None
     service_spec = None
-    autoscaler_spec = None
+    http_addon_resources = []  # For general deployments
+    pa_resource = None  # For aibrix deployments w autoscaling
     for doc in docs:
         kind = doc.get('kind')
@@ -127,153 +139,104 @@ def render_specs(
             deployment_spec = doc
         elif kind == 'Service':
             service_spec = doc
-        elif kind == 'PodAutoscaler' or kind == 'HorizontalPodAutoscaler':
-            autoscaler_spec = doc
-    # not every deployment + service will have podautoscaler
-    if task.serving and task.serving.min_replicas == task.serving.max_replicas:
-        autoscaler_spec = None
+        # HTTPScaledObject resource for general deployments w autoscaling only
+        elif kind == 'HTTPScaledObject':
+            http_addon_resources.append(doc)
+        # Ingress resource for all general deployments
+        elif kind == 'Ingress':
+            http_addon_resources.append(doc)
+        # PodAutoscaler resource for aibrix deployments w autoscaling only
+        elif kind == 'PodAutoscaler':
+            pa_resource = doc
     if deployment_spec is None:
         raise ValueError('Deployment manifest not found.')
     if service_spec is None:
         raise ValueError('Service manifest not found.')
+    if general and not http_addon_resources:
+        raise ValueError('General deployment manifests not found.')
+    if (
+        not general
+        and task.serving
+        and task.serving.min_replicas != task.serving.max_replicas
+        and pa_resource is None
+    ):
+        raise ValueError('Aibrix deployment PodAutoscaler manifest not found.')
     # Validate specs before returning
     try:
         validator.validate_deployment_spec(deployment_spec)
         validator.validate_service_spec(service_spec)
-        # Only validate HPA if it exists (APA doesn't have official schema)
-        if autoscaler_spec and autoscaler_spec.get('kind') == 'HorizontalPodAutoscaler':
-            validator.validate_horizontalpodautoscaler_spec(autoscaler_spec)
     except ValueError as e:
         raise ValueError(f'Spec validation failed: {e}')
-    return deployment_spec, service_spec, autoscaler_spec or {}
+    return deployment_spec, service_spec, http_addon_resources, pa_resource
-# For general deployments, create resources as needed
-def render_apoxy_spec(task: 'konduktor.Task') -> List[Dict[str, Any]]:
-    """Renders the Apoxy specs for a general deployment."""
+def create_pod_autoscaler(
+    namespace: str,
+    task: 'konduktor.Task',
+    dryrun: bool = False,
+) -> None:
+    """Creates Aibrix PodAutoscaler for non-general deployments."""
+    # Check if this is a non-general deployment
     general = True
     if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
         general = False
-    if not general:
-        return []  # Only render for general deployments
-    if task.run:
-        task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
-    unique_cluster_name = get_unique_cluster_name_from_tunnel()
-    cluster_name = unique_cluster_name[:-3]
-    deployment_number = get_next_deployment_number(unique_cluster_name)
+    # Only create PA for aibrix deployments w autoscaling
+    if general:
+        return
-    with tempfile.NamedTemporaryFile() as temp:
-        common_utils.fill_template(
-            'apoxy-deployment.yaml.j2',
-            {
-                'name': task.name,
-                'user': common_utils.get_cleaned_username(),
-                'ports': task.serving.ports if task.serving else 8000,
-                'general': general,
-                'cluster_name': cluster_name,
-                'unique_cluster_name': unique_cluster_name,
-                'deployment_number': deployment_number,
-                **_DEPLOYMENT_METADATA_LABELS,
-            },
-            temp.name,
+    # Check if autoscaling is needed
+    if not task.serving or task.serving.min_replicas == task.serving.max_replicas:
+        logger.debug(
+            f'[DEBUG] No autoscaling needed: '
+            f'min={task.serving.min_replicas if task.serving else "None"}, '
+            f'max={task.serving.max_replicas if task.serving else "None"}'
         )
-        docs = common_utils.read_yaml_all(temp.name)
-        return docs
+        return  # No autoscaling needed
+    logger.debug(
+        f'[DEBUG] PA autoscaling enabled: '
+        f'min={task.serving.min_replicas}, max={task.serving.max_replicas}'
+    )
-def create_apoxy_resources(
-    namespace: str,
-    task: 'konduktor.Task',
-    dryrun: bool = False,
-) -> None:
-    """Creates Apoxy resources for a general deployment."""
-    apoxy_specs = render_apoxy_spec(task)
+    # Get the PA spec from the rendered template
+    _, _, _, pa_spec = render_specs(task)
-    if not apoxy_specs:
+    if not pa_spec:
+        logger.warning('[DEBUG] No PodAutoscaler found in rendered template')
         return
     if dryrun:
-        logger.debug(f'[DRYRUN] Would create Apoxy resources:\n{apoxy_specs}')
+        logger.debug(
+            f'[DRYRUN] Would create PA autoscaler: '
+            f'{pa_spec["metadata"].get("name", "<no-name>")}'
+        )
         return
-    try:
-        context = kubernetes_utils.get_current_kube_config_context_name()
-        custom_api = kube_client.crd_api(context=context)
-        for spec in apoxy_specs:
-            kind = spec.get('kind')
-            name = spec['metadata']['name']
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    custom_api = kube_client.crd_api(context=context)
-            try:
-                if kind == 'Backend':
-                    custom_api.create_cluster_custom_object(
-                        group='core.apoxy.dev',
-                        version='v1alpha',
-                        plural='backends',
-                        body=spec,
-                    )
-                    logger.info(f'Apoxy Backend {name} created')
-                elif kind == 'HTTPRoute':
-                    custom_api.create_cluster_custom_object(
-                        group='gateway.apoxy.dev',
-                        version='v1',
-                        plural='httproutes',
-                        body=spec,
-                    )
-                    logger.info(f'Apoxy HTTPRoute {name} created')
-            except Exception as e:
-                if '409' in str(e) or 'AlreadyExists' in str(e):
-                    try:
-                        # Delete first, then create
-                        if kind == 'Backend':
-                            custom_api.delete_cluster_custom_object(
-                                group='core.apoxy.dev',
-                                version='v1alpha',
-                                plural='backends',
-                                name=name,
-                            )
-                            custom_api.create_cluster_custom_object(
-                                group='core.apoxy.dev',
-                                version='v1alpha',
-                                plural='backends',
-                                body=spec,
-                            )
-                        elif kind == 'HTTPRoute':
-                            custom_api.delete_cluster_custom_object(
-                                group='gateway.apoxy.dev',
-                                version='v1',
-                                plural='httproutes',
-                                name=name,
-                            )
-                            custom_api.create_cluster_custom_object(
-                                group='gateway.apoxy.dev',
-                                version='v1',
-                                plural='httproutes',
-                                body=spec,
-                            )
-                        logger.info(f'Apoxy {kind} {name} deleted and recreated')
-                    except Exception as delete_create_error:
-                        logger.error(
-                            f'Failed to delete and recreate {kind} {name}: '
-                            f'{delete_create_error}'
-                        )
-                        raise
-                elif '404' in str(e) or 'NotFound' in str(e):
-                    logger.warning(f'Apoxy CRD for {kind} not found. Skipping {name}.')
-                    logger.info('Make sure Apoxy is deployed and CRDs are ready.')
-                    continue
-                else:
-                    raise
+    # Create KPA for aibrix deployments w autoscaling
+    name = pa_spec.get('metadata', {}).get('name', '<no-name>')
+    try:
+        custom_api.create_namespaced_custom_object(
+            group='autoscaling.aibrix.ai',
+            version='v1alpha1',
+            namespace=namespace,
+            plural='podautoscalers',
+            body=pa_spec,
+        )
+        logger.info(f'Pod autoscaler {name} created')
     except Exception as e:
-        logger.error(f'Error creating Apoxy resources: {e}')
+        if '409' in str(e) or 'AlreadyExists' in str(e):
+            logger.warning(f'Pod autoscaler {name} already exists, skipping')
+        else:
+            logger.error(f'Error creating pod autoscaler {name}: {e}')
+            raise
 def create_deployment(
@@ -286,7 +249,7 @@ def create_deployment(
     assert task.resources is not None, 'Task resources are undefined'
-    deployment_spec, _, _ = render_specs(task)
+    deployment_spec, _, _, _ = render_specs(task)
     # Inject deployment-specific pod metadata
     pod_utils.inject_deployment_pod_metadata(pod_spec, task)
@@ -330,7 +293,7 @@ def create_service(
     assert task.resources is not None, 'Task resources are undefined'
-    _, service_spec, _ = render_specs(task)
+    _, service_spec, _, _ = render_specs(task)
     if dryrun:
         logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
@@ -354,39 +317,77 @@ def create_service(
             error_message = error_body.get('message', '')
             logger.error(f'Error creating service: {error_message}')
         except json.JSONDecodeError:
-            logger.error(f'Error creating service: {err.body}')
+            logger.error(f'Error creating service: {error_message}')
         raise err
-def create_autoscaler(namespace: str, task: 'konduktor.Task', dryrun: bool = False):
-    _, _, autoscaler_spec = render_specs(task)
+def create_http_addon_resources(
+    namespace: str,
+    task: 'konduktor.Task',
+    dryrun: bool = False,
+) -> None:
+    """Creates HTTP Add-on resources for general deployments."""
+    # Check if this is a non-general deployment
+    general = True
+    if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
+        general = False
-    if not autoscaler_spec:
+    # Only create PA for aibrix deployments w autoscaling
+    if not general:
         return
-    # Decide if it's APA or HPA by looking at autoscaler_spec["kind"]
-    kind = autoscaler_spec.get('kind')
-    context = kubernetes_utils.get_current_kube_config_context_name()
+    _, _, http_addon_resources, _ = render_specs(task)
-    if dryrun:
-        logger.debug(f'[DRYRUN] Would create {kind}:\n{autoscaler_spec}')
-        return autoscaler_spec
+    if not http_addon_resources:
+        logger.debug('[DEBUG] No HTTP Add-on resources to create')
+        return
-    if kind == 'PodAutoscaler':
-        custom_api = kube_client.crd_api(context=context)
-        return custom_api.create_namespaced_custom_object(
-            group='autoscaling.aibrix.ai',
-            version='v1alpha1',
-            namespace=namespace,
-            plural='podautoscalers',
-            body=autoscaler_spec,
-        )
-    elif kind == 'HorizontalPodAutoscaler':
-        autoscaling_api = kube_client.autoscaling_api(context=context)
-        return autoscaling_api.create_namespaced_horizontal_pod_autoscaler(
-            namespace=namespace,
-            body=autoscaler_spec,
+    if dryrun:
+        logger.debug(
+            f'[DRYRUN] Would create HTTP Add-on resources:\n' f'{http_addon_resources}'
         )
+        return
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    logger.debug(f'[DEBUG] Using Kubernetes context: {context}')
+    for resource in http_addon_resources:
+        kind = resource.get('kind')
+        name = resource['metadata']['name']
+        logger.debug(f'[DEBUG] Creating {kind}: {name}')
+        try:
+            if kind == 'HTTPScaledObject':
+                # Create HTTPScaledObject (only for autoscaling)
+                custom_api = kube_client.crd_api(context=context)
+                custom_api.create_namespaced_custom_object(
+                    group='http.keda.sh',
+                    version='v1alpha1',
+                    namespace=namespace,
+                    plural='httpscaledobjects',
+                    body=resource,
+                )
+                logger.info(f'HTTPScaledObject {name} created')
+            elif kind == 'Ingress':
+                # Create Ingress (always needed for external access)
+                networking_api = kube_client.networking_api(context=context)
+                networking_api.create_namespaced_ingress(
+                    namespace=namespace,
+                    body=resource,
+                )
+                logger.info(f'Ingress {name} created')
+        except Exception as e:
+            if '409' in str(e) or 'AlreadyExists' in str(e):
+                logger.warning(
+                    f'HTTP Add-on resource {kind} {name} already exists, skipping'
+                )
+            else:
+                logger.error(f'Error creating HTTP Add-on resource {kind} {name}: {e}')
+                raise
 def list_models(namespace: str) -> List[str]:
@@ -402,7 +403,7 @@ def list_models(namespace: str) -> List[str]:
     label_selector = DEPLOYMENT_NAME_LABEL
     model_names: set[str] = set()
-    # --- Deployments ---
+    # Deployments
     for deploy in apps.list_namespaced_deployment(
         namespace, label_selector=label_selector
     ).items:
@@ -411,7 +412,7 @@ def list_models(namespace: str) -> List[str]:
         if name:
             model_names.add(name)
-    # --- Services ---
+    # Services
     for svc in core.list_namespaced_service(
         namespace, label_selector=label_selector
     ).items:
@@ -420,17 +421,16 @@ def list_models(namespace: str) -> List[str]:
         if name:
             model_names.add(name)
-    # --- PodAutoscalers ---
-    # APA
+    # Podautoscalers (KPA only)
     try:
-        apa_list = crds.list_namespaced_custom_object(
+        pa_list = crds.list_namespaced_custom_object(
             group='autoscaling.aibrix.ai',
             version='v1alpha1',
             namespace=namespace,
             plural='podautoscalers',
         )
-        for apa in apa_list.get('items', []):
-            labels = apa.get('metadata', {}).get('labels', {}) or {}
+        for pa in pa_list.get('items', []):
+            labels = pa.get('metadata', {}).get('labels', {})
             name = labels.get(DEPLOYMENT_NAME_LABEL)
             if name:
                 model_names.add(name)
@@ -439,7 +439,7 @@ def list_models(namespace: str) -> List[str]:
             # re-raise if it's not just missing CRD
             raise
         # otherwise ignore, cluster just doesn't have Aibrix CRDs
-        logger.warning('Skipping APA lookup. Aibrix CRDs not found in cluster')
+        logger.warning('Skipping PA lookup. Aibrix CRDs not found in cluster')
     # HPA
     autoscaling_api = kube_client.autoscaling_api(context=context)
@@ -455,56 +455,134 @@ def list_models(namespace: str) -> List[str]:
     return sorted(model_names)
-def is_autoscaler_ready(autoscaler_obj: dict) -> bool:
+def get_autoscaler_status_for_deployment(
+    name: str, autoscalers_map: dict, is_general: bool
+) -> bool:
+    """Return autoscaler readiness by deployment type.
+    - General: returns hpa_ready
+    - vLLM/Aibrix: returns kpa_ready
     """
-    Returns True if the autoscaler (PodAutoscaler or HPA) is considered healthy.
-    For PodAutoscaler: AbleToScale == True.
-    For HPA: AbleToScale == True, or presence of the HPA is enough if no conditions.
+    def _is_ready(obj: dict) -> bool:
+        try:
+            conditions = obj.get('status', {}).get('conditions') or []
+            kind = obj.get('kind') or ''
+            for cond in conditions:
+                if cond.get('type') == 'AbleToScale' and cond.get('status') == 'True':
+                    return True
+            if kind == 'HorizontalPodAutoscaler':
+                # Check for ScalingActive condition
+                for cond in conditions:
+                    if cond.get('type') == 'ScalingActive':
+                        # ScalingActive: True means actively scaling
+                        if cond.get('status') == 'True':
+                            return True
+                        # ScalingActive: False with ScalingDisabled reason
+                        # is normal for scale-to-zero
+                        if (
+                            cond.get('status') == 'False'
+                            and cond.get('reason') == 'ScalingDisabled'
+                        ):
+                            return True
+                # Treat existing HPA with no conditions as ready
+                return not conditions or any(
+                    c.get('type') == 'AbleToScale' and c.get('status') == 'True'
+                    for c in conditions
+                )
+        except Exception as e:
+            logger.warning(f'Error checking autoscaler readiness: {e}')
+        return False
+    kpa_ready = False
+    hpa_ready = False
+    dep_autos = autoscalers_map.get(name, {})
+    if is_general:
+        if 'hpa' in dep_autos:
+            hpa_ready = _is_ready(dep_autos['hpa'])
+            return hpa_ready
+        return False
+    if 'kpa' in dep_autos:
+        kpa_ready = _is_ready(dep_autos['kpa'])
+        return kpa_ready
+    return False
+def _extract_min_max_from_autoscaler(autoscaler: dict) -> tuple[str, str]:
+    """Extract min/max replicas across PA/HPA/KEDA.
+    Returns (min_str, max_str). Unknowns as '?'.
     """
     try:
-        if hasattr(autoscaler_obj, 'to_dict'):
-            autoscaler_obj = autoscaler_obj.to_dict()
-        conditions = autoscaler_obj.get('status', {}).get('conditions', []) or []
-        # If conditions exist, look for AbleToScale == True
-        for cond in conditions:
-            cond_type = cond.get('type')
-            cond_status = cond.get('status')
-            if cond_type == 'AbleToScale' and cond_status == 'True':
-                return True
-        # If no conditions are present (common for HPAs), assume
-        # it's fine as soon as object exists
-        if not conditions:
-            return True
+        if not autoscaler:
+            return '?', '?'
+        spec = autoscaler.get('spec', {})
+        # Check for HTTPScaledObject format (replicas.min/max)
+        if 'replicas' in spec:
+            replicas = spec.get('replicas', {})
+            if 'min' in replicas or 'max' in replicas:
+                return (str(replicas.get('min', '?')), str(replicas.get('max', '?')))
+        # Check for KEDA ScaledObject format (minReplicaCount/maxReplicaCount)
+        if 'minReplicaCount' in spec or 'maxReplicaCount' in spec:
+            return (
+                str(spec.get('minReplicaCount', '?')),
+                str(spec.get('maxReplicaCount', '?')),
+            )
-    except Exception as e:
-        logger.warning(f'Error checking autoscaler readiness: {e}')
-    return False
+        # Check for PA/HPA format (minReplicas/maxReplicas)
+        if 'minReplicas' in spec or 'maxReplicas' in spec:
+            return str(spec.get('minReplicas', '?')), str(spec.get('maxReplicas', '?'))
+    except Exception:
+        pass
+    return '?', '?'
 def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
-    """Fetch all APAs and HPAs and combine into 1 dict keyed by deployment name."""
-    autoscalers = {}
+    """Fetch autoscalers and return a simple map keyed by deployment name.
+    Simplified model:
+    - Aibrix deployments: 1 PodAutoscaler (KPA) if autoscaling enabled
+    - General deployments: 1 HPA (created by KEDA) if autoscaling enabled
+    - No autoscaling: No autoscaler
-    # --- Aibrix APAs ---
+    Returns: {deployment_name: {'kpa': pa_obj} or {'hpa': hpa_obj}}
+    """
+    autoscalers: Dict[str, Dict[str, Any]] = {}
+    # --- Aibrix deployment KPA ---
     try:
         crd_api = kube_client.crd_api(context=context)
-        apa_list = crd_api.list_namespaced_custom_object(
+        pa_list = crd_api.list_namespaced_custom_object(
             group='autoscaling.aibrix.ai',
             version='v1alpha1',
             namespace=namespace,
             plural='podautoscalers',
         )
-        for apa in apa_list.get('items', []):
-            labels = apa.get('metadata', {}).get('labels', {}) or {}
+        for pa in pa_list.get('items', []):
+            labels = pa.get('metadata', {}).get('labels', {})
             dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
+            if not dep_name:
+                # Fallback to scaleTargetRef.name
+                spec = pa.get('spec', {})
+                scale_ref = spec.get('scaleTargetRef', {})
+                dep_name = scale_ref.get('name')
             if dep_name:
-                autoscalers[dep_name] = apa
+                autoscalers[dep_name] = {'kpa': pa}
+        if pa_list.get('items'):
+            logger.debug(f"Found {len(pa_list.get('items', []))} PodAutoscalers")
     except Exception as e:
-        logger.warning(f'Error fetching APAs: {e}')
+        logger.warning(f'Error fetching PodAutoscalers: {e}')
-    # --- Standard HPAs ---
+    # --- General deployment HPA ---
     try:
         autoscaling_api = kube_client.autoscaling_api(context=context)
         hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
@@ -513,8 +591,18 @@ def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
         for hpa in hpa_list.items:
             labels = getattr(hpa.metadata, 'labels', {}) or {}
             dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
-            if dep_name and dep_name not in autoscalers:
-                autoscalers[dep_name] = hpa.to_dict()
+            if not dep_name:
+                # Fallback to scaleTargetRef.name
+                spec = hpa.spec.to_dict() if hpa.spec else {}
+                scale_ref = spec.get('scale_target_ref', {})
+                dep_name = scale_ref.get('name')
+            if dep_name:
+                hpa_dict = hpa.to_dict()
+                hpa_dict['kind'] = 'HorizontalPodAutoscaler'
+                hpa_dict['apiVersion'] = 'autoscaling/v2'
+                autoscalers[dep_name] = {'hpa': hpa_dict}
+        if hpa_list.items:
+            logger.debug(f'Found {len(hpa_list.items)} HPAs')
     except Exception as e:
         logger.warning(f'Error fetching HPAs: {e}')
@@ -539,28 +627,55 @@ def get_model_status(
         d = deployments[name]
         ready = (d.status.ready_replicas or 0) if d.status else 0
         desired = (d.spec.replicas or 0) if d.spec else 0
-        status['deployment'] = 'ready' if ready == desired else 'pending'
+        labels = d.metadata.labels or {}
+        is_aibrix = AIBRIX_NAME_LABEL in labels
+        if is_aibrix and name in autoscalers:
+            # For Aibrix deployments, get the original min replicas from
+            # deployment labels
+            original_min_replicas = 0
+            original_min_str = labels.get('trainy.ai/original-min-replicas')
+            if original_min_str:
+                try:
+                    original_min_replicas = int(original_min_str)
+                except (ValueError, TypeError):
+                    pass
+            # For Aibrix deployments, consider ready if:
+            # 1. Ready replicas >= original minimum replicas, OR
+            # 2. If original_min_replicas is 0 (scale-to-zero allowed),
+            #    then ready == desired
+            if original_min_replicas == 0:
+                status['deployment'] = 'ready' if ready == desired else 'pending'
+            else:
+                status['deployment'] = (
+                    'ready' if ready >= original_min_replicas else 'pending'
+                )
+        else:
+            # General deployments or no autoscaler: use simple ready == desired check
+            status['deployment'] = 'ready' if ready == desired else 'pending'
     # --- Service ---
     if name in services:
-        s = services[name]
-        labels = getattr(s.metadata, 'labels', {}) or {}
-        is_vllm = AIBRIX_NAME_LABEL in labels
-        if is_vllm:
-            status['service'] = 'ready'
-        else:
-            lb_ready = False
-            if s.status and s.status.load_balancer and s.status.load_balancer.ingress:
-                ingress = s.status.load_balancer.ingress
-                if ingress and (ingress[0].ip or ingress[0].hostname):
-                    lb_ready = True
-            status['service'] = 'ready' if lb_ready else 'pending'
+        status['service'] = 'ready'
+    else:
+        status['service'] = 'missing'
     # --- Autoscaler ---
     if name in autoscalers:
-        a = autoscalers[name]
-        status['autoscaler'] = 'ready' if is_autoscaler_ready(a) else 'pending'
+        # Check if this is a general deployment (not vLLM/Aibrix)
+        is_general = True
+        if deployments.get(name) and hasattr(deployments[name].metadata, 'labels'):
+            labels = deployments[name].metadata.labels or {}
+            if AIBRIX_NAME_LABEL in labels:
+                is_general = False
+        # Check actual autoscaler readiness
+        autoscaler_ready = get_autoscaler_status_for_deployment(
+            name, autoscalers, is_general
+        )
+        status['autoscaler'] = 'ready' if autoscaler_ready else 'pending'
     else:
         status['autoscaler'] = None
@@ -591,7 +706,7 @@ def get_service(namespace: str, job_name: str) -> Optional[Any]:
 def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
     context = kubernetes_utils.get_current_kube_config_context_name()
-    # --- Try Aibrix APA first ---
+    # --- Try Aibrix PA first ---
     crd_api = kube_client.crd_api(context=context)
     try:
         return crd_api.get_namespaced_custom_object(
@@ -599,7 +714,7 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
             version='v1alpha1',
             namespace=namespace,
             plural='podautoscalers',
-            name=f'{job_name}-apa',
+            name=f'{job_name}-pa',
         )
     except ApiException as e:
         if e.status != 404:
@@ -612,6 +727,19 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
         return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
             name=f'{job_name}-hpa', namespace=namespace
         ).to_dict()
+    except ApiException as e:
+        if e.status != 404:
+            raise
+    # --- Try KEDA ScaledObject ---
+    try:
+        return crd_api.get_namespaced_custom_object(
+            group='keda.sh',
+            version='v1alpha1',
+            namespace=namespace,
+            plural='scaledobjects',
+            name=f'{job_name}-keda',
+        )
     except ApiException as e:
         if e.status == 404:
             return None
@@ -677,52 +805,133 @@ def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
 def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
-    """Deletes either an Aibrix PodAutoscaler or a HorizontalPodAutoscaler."""
+    """Delete all autoscalers associated with a deployment name.
+    This includes:
+    - All Aibrix PodAutoscalers (e.g., "-pa", "-apa") targeting the deployment
+    - Any HorizontalPodAutoscaler named "<name>-hpa"
+    - Any KEDA ScaledObject named "<name>-keda"
+    """
     context = kubernetes_utils.get_current_kube_config_context_name()
-    # --- Try delete APA first ---
+    # --- Delete ALL PodAutoscalers that target this deployment ---
     try:
         custom_api = kube_client.crd_api(context=context)
-        response = custom_api.delete_namespaced_custom_object(
+        pa_list = custom_api.list_namespaced_custom_object(
             group='autoscaling.aibrix.ai',
             version='v1alpha1',
             namespace=namespace,
             plural='podautoscalers',
-            name=f'{name}-apa',
         )
-        return response
+        for pa in pa_list.get('items', []):
+            meta = pa.get('metadata', {})
+            spec = pa.get('spec', {})
+            pa_name = meta.get('name', '')
+            labels = meta.get('labels', {})
+            scale_ref = spec.get('scaleTargetRef', {}).get('name')
+            targets_deployment = (
+                labels.get(DEPLOYMENT_NAME_LABEL) == name
+                or scale_ref == name
+                or pa_name.startswith(f'{name}-')
+            )
+            if targets_deployment:
+                try:
+                    custom_api.delete_namespaced_custom_object(
+                        group='autoscaling.aibrix.ai',
+                        version='v1alpha1',
+                        namespace=namespace,
+                        plural='podautoscalers',
+                        name=pa_name,
+                    )
+                    logger.info(f'Deleted PodAutoscaler: {pa_name}')
+                except kube_client.api_exception() as err:
+                    if getattr(err, 'status', None) != 404:
+                        raise
     except kube_client.api_exception() as err:
-        # If not found, try HPA
-        try:
-            error_body = json.loads(err.body)
-            if err.status != 404:
-                raise
-        except Exception:
-            if getattr(err, 'status', None) != 404:
-                raise
+        # If PA CRD is missing, skip; otherwise bubble up
+        if getattr(err, 'status', None) not in (404, None):
+            raise
-    # --- Try delete HPA ---
+    # --- Delete HPA ---
     try:
         autoscaling_api = kube_client.autoscaling_api(context=context)
-        return autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
+        autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
             name=f'{name}-hpa',
             namespace=namespace,
         )
+        logger.info(f'Deleted HPA: {name}-hpa')
     except kube_client.api_exception() as err:
-        try:
-            error_body = json.loads(err.body)
-            error_message = error_body.get('message', '')
-            logger.error(f'Error deleting Pod Autoscaler: {error_message}')
-        except json.JSONDecodeError:
-            logger.error(f'Error deleting Pod Autoscaler: {err.body}')
-        raise err
+        if getattr(err, 'status', None) not in (404, None):
+            try:
+                error_body = json.loads(err.body)
+                error_message = error_body.get('message', '')
+                logger.error(f'Error deleting HPA: {error_message}')
+            except json.JSONDecodeError:
+                logger.error(f'Error deleting HPA: {err.body}')
+            raise err
+    # --- Delete KEDA ScaledObject ---
+    try:
+        custom_api = kube_client.crd_api(context=context)
+        custom_api.delete_namespaced_custom_object(
+            group='keda.sh',
+            version='v1alpha1',
+            namespace=namespace,
+            plural='scaledobjects',
+            name=f'{name}-keda',
+        )
+        logger.info(f'Deleted ScaledObject: {name}-keda')
+    except kube_client.api_exception() as err:
+        if getattr(err, 'status', None) not in (404, None):
+            try:
+                error_body = json.loads(err.body)
+                error_message = error_body.get('message', '')
+                logger.error(f'Error deleting KEDA ScaledObject: {error_message}')
+            except json.JSONDecodeError:
+                logger.error(f'Error deleting KEDA ScaledObject: {err.body}')
+            raise err
+    return None
+def delete_http_addon_resources(name: str, namespace: str) -> None:
+    """Deletes HTTP Add-on resources for general deployments."""
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    # Delete HTTPScaledObject
+    try:
+        custom_api = kube_client.crd_api(context=context)
+        custom_api.delete_namespaced_custom_object(
+            group='http.keda.sh',
+            version='v1alpha1',
+            namespace=namespace,
+            plural='httpscaledobjects',
+            name=f'{name}-httpscaledobject',
+        )
+        logger.info(f'Deleted HTTPScaledObject: {name}-httpscaledobject')
+    except kube_client.api_exception() as err:
+        if err.status != 404:
+            logger.debug(
+                f'Failed to delete HTTPScaledObject {name}-httpscaledobject: {err}'
+            )
+    # Delete Ingress
+    try:
+        networking_api = kube_client.networking_api(context=context)
+        networking_api.delete_namespaced_ingress(
+            name=f'{name}-ingress',
+            namespace=namespace,
+        )
+        logger.info(f'Deleted Ingress: {name}-ingress')
+    except kube_client.api_exception() as err:
+        if err.status != 404:
+            logger.debug(f'Failed to delete Ingress {name}-ingress: {err}')
 def delete_serving_specs(name: str, namespace: str) -> None:
     for kind, delete_fn in [
         ('deployment', delete_deployment),
         ('service', delete_service),
-        ('podautoscaler', delete_autoscaler),
     ]:
         try:
             delete_fn(namespace, name)
@@ -730,6 +939,15 @@ def delete_serving_specs(name: str, namespace: str) -> None:
         except Exception as e:
             logger.debug(f'Failed to delete {kind} {name}: {e}')
+    # Delete autoscaler resources (Aibrix PA, HPA, or KEDA ScaledObject)
+    try:
+        delete_autoscaler(namespace=namespace, name=name)
+    except Exception as e:
+        logger.debug(f'Failed to delete autoscaler for {name}: {e}')
+    # Delete HTTP Add-on resources for general deployments
+    delete_http_addon_resources(name, namespace)
 def _get_resource_summary(deployment) -> str:
     """Extract and format pod resource information from a deployment.
@@ -779,6 +997,24 @@ def get_envoy_external_ip() -> Optional[str]:
     return None
+def get_ingress_nginx_external_ip() -> Optional[str]:
+    """Get the external IP of the keda-ingress-nginx-controller LoadBalancer."""
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    core_api = kube_client.core_api(context=context)
+    try:
+        # Look for keda-ingress-nginx-controller service in keda namespace
+        service = core_api.read_namespaced_service(
+            name='keda-ingress-nginx-controller', namespace='keda'
+        )
+        if service.spec.type == 'LoadBalancer':
+            ingress = service.status.load_balancer.ingress
+            if ingress:
+                return ingress[0].ip or ingress[0].hostname
+    except Exception:
+        pass
+    return None
 def get_unique_cluster_name_from_tunnel() -> str:
     """Get cluster name from the apoxy deployment command."""
     try:
@@ -787,7 +1023,7 @@ def get_unique_cluster_name_from_tunnel() -> str:
         # Get the apoxy deployment
         deployment = apps_api.read_namespaced_deployment(
-            name='apoxy', namespace='default'
+            name='apoxy', namespace='apoxy-system'
         )
         # Extract cluster name from the command
@@ -820,112 +1056,59 @@ def get_endpoint_type_from_config() -> str:
     try:
         # Use the proper config system that handles KONDUKTOR_CONFIG env var
         endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
+        logger.debug(f'[DEBUG] Config endpoint_type: {endpoint_type}')
         return endpoint_type.lower()
     except Exception as e:
         logger.warning(f'Error reading endpoint config: {e}')
     # Default to trainy if config not found or error
+    logger.debug('[DEBUG] Falling back to default endpoint type: trainy')
     return 'trainy'
-def _get_loadbalancer_endpoint_with_port(service_name: str) -> str:
-    """Helper function to get LoadBalancer endpoint with port."""
-    try:
-        context = kubernetes_utils.get_current_kube_config_context_name()
-        core_api = kube_client.core_api(context=context)
-        # Get the service
-        service = core_api.read_namespaced_service(
-            name=service_name, namespace='default'
-        )
-        # Check if it's LoadBalancer type
-        if service.spec.type == 'LoadBalancer':
-            ingress = service.status.load_balancer.ingress
-            if ingress and len(ingress) > 0:
-                ip = ingress[0].ip
-                if ip:
-                    return f'{ip}:{service.spec.ports[0].port}'
-        # If not LoadBalancer or no IP, return pending
-        return '<pending>'
-    except Exception:
-        return '<pending>'
-def get_vllm_deployment_endpoint(force_direct: bool = False) -> str:
-    """Get the endpoint for vLLM/Aibrix deployments based on config."""
+def get_deployment_endpoint(
+    force_direct: bool = False, deployment_type: str = 'AIBRIX'
+) -> str:
+    """Get the endpoint for both vLLM/Aibrix and general deployments."""
     if force_direct:
-        # Force direct endpoint display regardless of config
         endpoint_type = 'direct'
     else:
         endpoint_type = get_endpoint_type_from_config()
     if endpoint_type == 'direct':
-        try:
-            aibrix_endpoint = get_envoy_external_ip()
-            return aibrix_endpoint or '<pending>'
-        except Exception:
-            return '<pending>'
-    else:
-        try:
-            cluster_name = get_unique_cluster_name_from_tunnel()
-            return f'{cluster_name[:-3]}.trainy.us'
-        except Exception:
-            # Fallback to direct endpoint if trainy.us not available
+        # Check if this is a general deployment
+        if deployment_type == 'GENERAL':
+            # General deployments: ingress IP + Host header
+            ingress_ip = get_ingress_nginx_external_ip()
+            if ingress_ip:
+                return f'{ingress_ip}'
+            else:
+                return '<pending>'
+        else:
+            # vLLM/Aibrix deployments: envoy IP
             try:
                 aibrix_endpoint = get_envoy_external_ip()
-                if aibrix_endpoint:
-                    # Aibrix deployments route through Envoy Gateway on port 80
-                    return f'{aibrix_endpoint}'
+                return aibrix_endpoint or '<pending>'
             except Exception:
-                pass
-            return '<pending>'
-def get_general_deployment_endpoint(
-    service_name: str, force_direct: bool = False
-) -> str:
-    """Get the endpoint for a general deployment based on config."""
-    if force_direct:
-        # Force direct endpoint display regardless of config
-        endpoint_type = 'direct'
-    else:
-        endpoint_type = get_endpoint_type_from_config()
-    if endpoint_type == 'direct':
-        # Use LoadBalancer IP with port
-        return _get_loadbalancer_endpoint_with_port(service_name)
+                return '<pending>'
     else:
-        # Use Apoxy (trainy.us) - existing logic
+        # Use Apoxy (trainy.us)
         try:
-            context = kubernetes_utils.get_current_kube_config_context_name()
-            custom_api = kube_client.crd_api(context=context)
-            # Query route with label selector using the original task name
-            routes = custom_api.list_cluster_custom_object(
-                group='gateway.apoxy.dev',
-                version='v1',
-                plural='httproutes',
-                label_selector=f'task_name={service_name}',
-            )
-            # Extract endpoint_name from the route labels
-            if routes.get('items') and len(routes['items']) > 0:
-                route = routes['items'][0]  # Should only be one route with this label
-                labels = route.get('metadata', {}).get('labels', {})
-                endpoint_name = labels.get('endpoint_name')
-                if endpoint_name:
-                    return endpoint_name
-            # Fallback if no route found - try direct LoadBalancer endpoint
-            return _get_loadbalancer_endpoint_with_port(service_name)
-        except Exception as e:
-            logger.warning(f'Endpoint error for general deployment {service_name}: {e}')
-            # Fallback to direct LoadBalancer endpoint on error
-            return _get_loadbalancer_endpoint_with_port(service_name)
+            cluster_name = get_unique_cluster_name_from_tunnel()
+            if deployment_type == 'GENERAL':
+                # Strip last 3 chars: backend Apoxy setup uses unique
+                # suffixes (3 random numbers)to avoid Apoxy bugs when
+                # deleting/creating TunnelNode resources with same names too
+                # quickly, but we hide this complexity from user-facing endpoints
+                return f'{cluster_name[:-3]}2.trainy.us'  # General deployments
+            else:
+                # Strip last 3 chars: backend Apoxy setup uses unique
+                # suffixes (3 random numbers)to avoid Apoxy bugs when
+                # deleting/creating TunnelNode resources with same names too
+                # quickly, but we hide this complexity from user-facing endpoints
+                return f'{cluster_name[:-3]}.trainy.us'  # vLLM deployments
+        except Exception:
+            return '<pending>'
 def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
@@ -962,7 +1145,9 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
     is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
     # Get Aibrix endpoint once for all Aibrix deployments
-    aibrix_endpoint = get_vllm_deployment_endpoint(force_direct)
+    aibrix_endpoint = get_deployment_endpoint(force_direct, 'AIBRIX')
+    # Get General endpoint once for all General deployments
+    general_endpoint = get_deployment_endpoint(force_direct, 'GENERAL')
     table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
     if all_users:
@@ -1017,14 +1202,33 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
             }
             return f"{label}: {emoji_map.get(state, '❓')}"
+        # Check if this is a general deployment (not vLLM/Aibrix)
+        is_general = True
+        if deployment and hasattr(deployment.metadata, 'labels'):
+            labels = deployment.metadata.labels or {}
+            if AIBRIX_NAME_LABEL in labels:
+                is_general = False
         summary_lines = [
             emoji_line('Deploym', status['deployment'] or 'missing'),
             emoji_line('Service', status['service'] or 'missing'),
         ]
-        if status['autoscaler'] is not None:
-            summary_lines.append(
-                emoji_line('AScaler', status['autoscaler'] or 'missing')
+        if is_general:
+            # Autoscaler for General: HPA only
+            hpa_ready = get_autoscaler_status_for_deployment(
+                name, autoscalers_map, is_general=True
             )
+            if name in autoscalers_map:
+                summary_lines.append(f"AScaler: {'✅' if hpa_ready else '❓'}")
+        else:
+            # Autoscaler for vLLM: only KPA (APA no longer used)
+            if name in autoscalers_map:
+                kpa_ready = get_autoscaler_status_for_deployment(
+                    name, autoscalers_map, is_general=False
+                )
+                if 'kpa' in autoscalers_map.get(name, {}):
+                    summary_lines.append(f"AScaler: {'✅' if kpa_ready else '❓'}")
         summary = '\n'.join(summary_lines)
         # Overall status
@@ -1057,29 +1261,61 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
         endpoint_str = '<pending>'
         if AIBRIX_NAME_LABEL in labels:
-            # Aibrix deployment - use the pre-computed endpoint
-            endpoint_str = aibrix_endpoint
+            # Aibrix deployment
+            endpoint_type = get_endpoint_type_from_config()
+            if force_direct or endpoint_type == 'direct':
+                # Direct access: use http for IP endpoints
+                endpoint_str = (
+                    f'http://{aibrix_endpoint}'
+                    if aibrix_endpoint != '<pending>'
+                    else aibrix_endpoint
+                )
+            else:
+                # Apoxy access: use https for trainy.us endpoints
+                endpoint_str = (
+                    f'https://{aibrix_endpoint}'
+                    if aibrix_endpoint != '<pending>'
+                    else aibrix_endpoint
+                )
         else:
             # General deployment
-            endpoint_str = get_general_deployment_endpoint(name, force_direct)
+            endpoint_type = get_endpoint_type_from_config()
+            if force_direct or endpoint_type == 'direct':
+                # Direct access: IP + Host header
+                endpoint_str = f'http://{general_endpoint}\nHost: {name}'
+            else:
+                # Apoxy access: single host + path
+                endpoint_str = f'https://{general_endpoint}/{name}'
         # Replicas
-        ready_replicas = (
-            str(deployment.status.ready_replicas or 0) if deployment else '?'
-        )
-        desired_replicas = str(deployment.spec.replicas or 0) if deployment else '?'
+        if deployment:
+            ready_replicas = str(deployment.status.ready_replicas or 0)
+            desired_replicas = str(deployment.spec.replicas or 0)
+        else:
+            ready_replicas = '?'
+            desired_replicas = '?'
         replicas_text = Text()
         replicas_text.append(
             f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
         )
         if status['autoscaler']:
-            spec = (
-                autoscaler.get('spec', {})
-                if isinstance(autoscaler, dict)
-                else getattr(autoscaler, 'spec', {})
-            )
-            min_r = str(spec.get('minReplicas', spec.get('min_replicas', '?')))
-            max_r = str(spec.get('maxReplicas', spec.get('max_replicas', '?')))
+            # Get min/max from deployment labels
+            min_r, max_r = '?', '?'
+            if deployment and hasattr(deployment.metadata, 'labels'):
+                labels = deployment.metadata.labels or {}
+                # All deployments with autoscaling get these labels from the template
+                original_min_str = labels.get('trainy.ai/original-min-replicas')
+                original_max_str = labels.get('trainy.ai/original-max-replicas')
+                if original_min_str and original_max_str:
+                    min_r, max_r = original_min_str, original_max_str
+                    logger.debug(
+                        f'[DEBUG] Got replicas from deployment labels: '
+                        f'min={min_r}, max={max_r}'
+                    )
             replicas_text.append(f'Min  : {min_r}\n', style='bold white')
             replicas_text.append(f'Max  : {max_r}', style='bold white')

konduktor-nightly 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl

konduktor-nightly 0.1.0.dev20250915104603py3-none-any.whl → 0.1.0.dev20251107104752py3-none-any.whl