PyPI - konduktor-nightly - Versions diffs - 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl - Mend

konduktor-nightly 0.1.0.dev20250805105421py3-none-any.whl → 0.1.0.dev20250807105334py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of konduktor-nightly might be problematic. Click here for more details.

Files changed (26) hide show

konduktor/__init__.py +4 -7
konduktor/backends/__init__.py +2 -4
konduktor/backends/constants.py +12 -0
konduktor/backends/deployment.py +179 -0
konduktor/backends/deployment_utils.py +835 -0
konduktor/backends/jobset.py +2 -2
konduktor/backends/jobset_utils.py +16 -266
konduktor/backends/pod_utils.py +392 -0
konduktor/cli.py +343 -8
konduktor/controller/launch.py +1 -1
konduktor/execution.py +5 -2
konduktor/kube_client.py +8 -0
konduktor/resource.py +20 -0
konduktor/serving.py +149 -0
konduktor/task.py +61 -0
konduktor/templates/deployment.yaml.j2 +142 -0
konduktor/templates/pod.yaml.j2 +36 -0
konduktor/utils/accelerator_registry.py +1 -1
konduktor/utils/log_utils.py +1 -1
konduktor/utils/schemas.py +42 -0
konduktor/utils/validator.py +51 -16
{konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/METADATA +1 -1
{konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/RECORD +26 -21
{konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/LICENSE +0 -0
{konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/WHEEL +0 -0
{konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/entry_points.txt +0 -0

konduktor/cli.py CHANGED Viewed

@@ -51,7 +51,7 @@ from rich.progress import track
 import konduktor
 from konduktor import check as konduktor_check
 from konduktor import logging
-from konduktor.backends import jobset_utils
+from konduktor.backends import deployment_utils, jobset_utils
 from konduktor.utils import (
     common_utils,
     kubernetes_utils,
@@ -107,6 +107,10 @@ def _make_task_with_overrides(
     disk_size: Optional[int] = None,
     env: Optional[List[Tuple[str, str]]] = None,
     field_to_ignore: Optional[List[str]] = None,
+    min_replicas: Optional[int] = None,
+    max_replicas: Optional[int] = None,
+    ports: Optional[int] = None,
+    probe: Optional[str] = None,
 ) -> konduktor.Task:
     """Creates a task from an entrypoint with overrides.
@@ -135,6 +139,14 @@ def _make_task_with_overrides(
         disk_size=disk_size,
     )
+    serving_override_params = _parse_serving_override_params(
+        num_nodes=num_nodes,
+        min_replicas=min_replicas,
+        max_replicas=max_replicas,
+        ports=ports,
+        probe=probe,
+    )
     if field_to_ignore is not None:
         _pop_and_ignore_fields_in_override_params(override_params, field_to_ignore)
@@ -147,6 +159,8 @@ def _make_task_with_overrides(
         task.workdir = workdir
     task.set_resources_override(override_params)
+    if task.serving:
+        task.set_serving_override(serving_override_params)
     if max_restarts is not None:
         assert task.resources is not None
@@ -299,6 +313,49 @@ _EXTRA_RESOURCES_OPTIONS = [
         ),
     ),
 ]
+_EXTRA_SERVING_OPTIONS = [
+    click.option(
+        '--min-replicas',
+        required=False,
+        type=int,
+        help=(
+            'Minimum number of replicas to run for the service. '
+            'Overrides the "min_replicas" field in the YAML if both '
+            'are supplied.'
+        ),
+    ),
+    click.option(
+        '--max-replicas',
+        required=False,
+        type=int,
+        help=(
+            'Maximum number of replicas to allow for the service. '
+            'Overrides the "max_replicas" field in the YAML if both '
+            'are supplied.'
+        ),
+    ),
+    click.option(
+        '--ports',
+        required=False,
+        type=int,
+        help=(
+            'The container port on which your service will listen for HTTP '
+            'traffic. Overrides the "ports" field in the YAML if both '
+            'are supplied.'
+        ),
+    ),
+    click.option(
+        '--probe',
+        required=False,
+        type=str,
+        help=(
+            'The HTTP path to use for health checks (liveness, readiness, and '
+            'startup probes). Overrides the "probe" field in the YAML '
+            'if both are supplied. The service should respond with HTTP 200 on '
+            'this path when healthy.'
+        ),
+    ),
+]
 def _get_click_major_version():
@@ -354,12 +411,36 @@ def _parse_override_params(
     return override_params
+def _parse_serving_override_params(
+    num_nodes: Optional[int] = None,
+    min_replicas: Optional[int] = None,
+    max_replicas: Optional[int] = None,
+    ports: Optional[int] = None,
+    probe: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Parses the relevant serving override parameters into a dictionary."""
+    override_params: Dict[str, Any] = {}
+    if num_nodes is not None:
+        override_params['num_nodes'] = num_nodes
+    if min_replicas is not None:
+        override_params['min_replicas'] = min_replicas
+    if max_replicas is not None:
+        override_params['max_replicas'] = max_replicas
+    if ports is not None:
+        override_params['ports'] = ports
+    if probe is not None:
+        override_params['probe'] = probe
+    return override_params
 def _launch_with_confirm(
     task: konduktor.Task,
     *,
     dryrun: bool,
     detach_run: bool,
     no_confirm: bool,
+    serving: bool,
 ):
     """Launch a cluster with a Task."""
@@ -367,17 +448,27 @@ def _launch_with_confirm(
     if not no_confirm:
         # Prompt if (1) --cluster is None, or (2) cluster doesn't exist, or (3)
         # it exists but is STOPPED.
-        prompt = (
-            f'Launching a new job {colorama.Style.BRIGHT}'
-            f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
-            'Proceed?'
-        )
+        if serving:
+            prompt = (
+                f'Launching a new deployment {colorama.Style.BRIGHT}'
+                f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
+                'Proceed?'
+            )
+        else:
+            prompt = (
+                f'Launching a new job {colorama.Style.BRIGHT}'
+                f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
+                'Proceed?'
+            )
         if prompt is not None:
             confirm_shown = True
             click.confirm(prompt, default=True, abort=True, show_default=True)
     if not confirm_shown:
-        click.secho(f'Running task {task.name}...', fg='yellow')
+        if serving:
+            click.secho(f'Creating deployment {task.name}...', fg='yellow')
+        else:
+            click.secho(f'Running task {task.name}...', fg='yellow')
     return konduktor.launch(
         task,
         dryrun=dryrun,
@@ -675,6 +766,11 @@ def launch(
         image_id=image_id,
         env=env,
         disk_size=disk_size,
+        # serving stuff
+        min_replicas=None,
+        max_replicas=None,
+        ports=None,
+        probe=None,
     )
     click.secho(
@@ -693,11 +789,18 @@ def launch(
     )
     print(table)
+    if task.serving:
+        raise click.UsageError(
+            'Serving information detected. Use '
+            '`konduktor serve launch` instead for serving.'
+        )
     job_name = _launch_with_confirm(
         task,
         dryrun=dryrun,
         detach_run=detach_run,
         no_confirm=yes,
+        serving=bool(task.serving),
     )
     click.secho(
         ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB, job_name),
@@ -800,7 +903,7 @@ def down(
             # Use fnmatch for both wildcard and exact pattern matching
             pattern_matches = fnmatch.filter(all_job_names, job_pattern)
             if not pattern_matches:
-                click.echo(
+                click.secho(
                     f'Warning: No jobs found matching pattern "{job_pattern}"',
                     fg='yellow',
                     err=True,
@@ -1147,6 +1250,238 @@ def list_secrets(all_users: bool):
             click.echo(f'{basename:30}   kind={kind:10}')
+@cli.group(cls=_NaturalOrderGroup)
+def serve():
+    """Manage LLM serving with Konduktor.
+    USAGE: konduktor serve COMMAND
+    \b
+    Use one of the following COMMANDS:
+      launch
+      down
+      status
+    \b
+    Examples:
+      konduktor serve launch my-deployment
+      konduktor serve down my-deployment
+      konduktor serve status
+    \b
+    For details on COMMAND ARGS:
+      konduktor serve launch -h
+      konduktor serve down -h
+      konduktor serve status -h
+    """
+    pass
+@serve.command(name='launch')
+@click.argument(
+    'entrypoint',
+    required=False,
+    type=str,
+    nargs=-1,
+)
+@click.option(
+    '--dryrun',
+    default=False,
+    is_flag=True,
+    help='If True, do not actually run the job.',
+)
+@click.option(
+    '--detach-run',
+    '-d',
+    default=False,
+    is_flag=True,
+    help=(
+        'If True, as soon as a job is submitted, return from this call '
+        'and do not stream execution logs.'
+    ),
+)
+@_add_click_options(
+    _TASK_OPTIONS_WITH_NAME + _EXTRA_RESOURCES_OPTIONS + _EXTRA_SERVING_OPTIONS
+)
+@click.option(
+    '--yes',
+    '-y',
+    is_flag=True,
+    default=False,
+    required=False,
+    # Disabling quote check here, as there seems to be a bug in pylint,
+    # which incorrectly recognizes the help string as a docstring.
+    # pylint: disable=bad-docstring-quotes
+    help='Skip confirmation prompt.',
+)
+def serve_launch(
+    entrypoint: Tuple[str, ...],
+    dryrun: bool,
+    detach_run: bool,
+    name: Optional[str],
+    workdir: Optional[str],
+    cloud: Optional[str],
+    gpus: Optional[str],
+    cpus: Optional[str],
+    memory: Optional[str],
+    num_nodes: Optional[int],
+    max_restarts: Optional[int],
+    image_id: Optional[str],
+    env_file: Optional[Dict[str, str]],
+    env: List[Tuple[str, str]],
+    disk_size: Optional[int],
+    min_replicas: Optional[int],
+    max_replicas: Optional[int],
+    ports: Optional[int],
+    probe: Optional[str],
+    yes: bool,
+):
+    """Launch a deployment to serve.
+    If ENTRYPOINT points to a valid YAML file, it is read in as the task
+    specification. Otherwise, it is interpreted as a bash command.
+    """
+    # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
+    env = _merge_env_vars(env_file, env)
+    task = _make_task_with_overrides(
+        entrypoint=entrypoint,
+        name=name,
+        workdir=workdir,
+        cloud=cloud,
+        gpus=gpus,
+        cpus=cpus,
+        memory=memory,
+        num_nodes=num_nodes,
+        max_restarts=max_restarts,
+        image_id=image_id,
+        env=env,
+        disk_size=disk_size,
+        min_replicas=min_replicas,
+        max_replicas=max_replicas,
+        ports=ports,
+        probe=probe,
+    )
+    click.secho(
+        f'Considered resources ({task.num_nodes} nodes):', fg='green', bold=True
+    )
+    table_kwargs = {
+        'hrules': prettytable.FRAME,
+        'vrules': prettytable.NONE,
+        'border': True,
+    }
+    headers = ['CPUs', 'Mem (GB)', 'GPUs']
+    table = log_utils.create_table(headers, **table_kwargs)
+    assert task.resources is not None
+    table.add_row(
+        [task.resources.cpus, task.resources.memory, task.resources.accelerators]
+    )
+    print(table)
+    if not task.serving:
+        raise click.UsageError(
+            'No serving information detected. '
+            'Use `konduktor launch` instead for workloads.'
+        )
+    job_name = _launch_with_confirm(
+        task,
+        dryrun=dryrun,
+        detach_run=detach_run,
+        no_confirm=yes,
+        serving=bool(task.serving),
+    )
+    click.secho(f'Deployment Name: {job_name}', fg='green', bold=True)
+@serve.command(name='down')
+@click.argument('names', nargs=-1, required=False)
+@click.option(
+    '--all', '-a', default=False, is_flag=True, help='Tear down all deployments.'
+)
+@click.option(
+    '--yes',
+    '-y',
+    is_flag=True,
+    default=False,
+    required=False,
+    help='Skip confirmation prompt.',
+)
+def serve_down(
+    names: List[str],
+    all: bool,
+    yes: bool,
+):
+    """Tear down deployments (Deployment, Service, PodAutoscaler).
+    Use --all or -a to tear down all deployments.
+    Examples:
+    \b
+      konduktor serve down my-deployment
+      konduktor serve down -a
+    """
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    namespace = kubernetes_utils.get_kube_config_context_namespace(context)
+    all_models = deployment_utils.list_models(namespace)
+    if all:
+        names = all_models
+        if not names:
+            logger.warning(
+                f'No deployments found in namespace '
+                f'{namespace}, but continuing teardown.'
+            )
+    elif names:
+        matched = []
+        for pattern in names:
+            matched.extend(fnmatch.filter(all_models, pattern))
+        names = sorted(set(matched))
+        if not names:
+            raise click.ClickException(
+                f'No matching deployments found. Check with: '
+                f'{colorama.Style.BRIGHT}konduktor serve '
+                f'status{colorama.Style.RESET_ALL}'
+            )
+    else:
+        raise click.ClickException(
+            'No deployments specified. Use --all to tear down all deplotments '
+            'or pass names/patterns.'
+        )
+    if not yes:
+        prompt = (
+            f'Tearing down deployment(s) '
+            f'{colorama.Style.BRIGHT}{colorama.Fore.GREEN}{names}'
+            f'{colorama.Style.RESET_ALL}. '
+            f'Proceed?'
+        )
+        click.confirm(prompt, default=True, abort=True, show_default=True)
+    for name in track(names, description='Tearing down deployment(s)...'):
+        deployment_utils.delete_serving_specs(name, namespace)
+@serve.command(name='status')
+@click.option(
+    '--all-users',
+    '-u',
+    default=False,
+    is_flag=True,
+    required=False,
+    help='Show all deployments, including those not owned by the ' 'current user.',
+)
+def serve_status(all_users: bool):
+    """Show status of deployments launched via `konduktor serve launch`."""
+    context = kubernetes_utils.get_current_kube_config_context_name()
+    namespace = kubernetes_utils.get_kube_config_context_namespace(context)
+    deployment_utils.show_status_table(namespace, all_users=all_users)
 def main():
     return cli()

konduktor/controller/launch.py CHANGED Viewed

@@ -25,7 +25,7 @@ KONDUKTOR_CONTROLLER_HEALTH_CHECK_FREQ = 5
 logger = logging.get_logger('konduktor.controller')
-def main():
+def main() -> None:
     logger.info(
         f'starting konduktor.controller ver. {constants.KONDUKTOR_CONTROLLER_VERSION}'
     )

konduktor/execution.py CHANGED Viewed

@@ -12,7 +12,7 @@ if typing.TYPE_CHECKING:
 from konduktor import config, constants
 from konduktor import logging as konduktor_logging
-from konduktor.backends import JobsetBackend
+from konduktor.backends import DeploymentBackend, JobsetBackend
 from konduktor.data import data_utils
 from konduktor.data import registry as storage_registry
 from konduktor.data import storage as storage_lib
@@ -42,7 +42,10 @@ def _execute(
         the cluster.
     """
     # (asaiacai): in the future we may support more backends but not likely
-    backend = JobsetBackend()
+    if task.serving:
+        backend = DeploymentBackend()  # type: ignore
+    else:
+        backend = JobsetBackend()  # type: ignore
     # template the commands for syncing the contents within the shell command
     # initialization of the pod
     job_name = backend.execute(task, detach_run, dryrun=dryrun)

konduktor/kube_client.py CHANGED Viewed

@@ -152,6 +152,14 @@ def crd_client(context: Optional[str] = None):
     return kubernetes.client.CustomObjectsApi()
+@_api_logging_decorator('urllib3', logging.ERROR)
+@annotations.lru_cache(scope='request')
+def autoscaling_api(context: Optional[str] = None):
+    """Return the Kubernetes AutoscalingV2Api client."""
+    _load_config(context)
+    return kubernetes.client.AutoscalingV2Api()
 def api_exception():
     return kubernetes.client.rest.ApiException

konduktor/resource.py CHANGED Viewed

@@ -399,6 +399,26 @@ class Resources:
             return value
         return None
+    def get_accelerator_type(self) -> Optional[str]:
+        """Returns the first accelerator type from the accelerators dict.
+        Returns:
+            The accelerator type (e.g., 'V100', 'A100') or None if no accelerators
+        """
+        if self.accelerators is None or not self.accelerators:
+            return None
+        return next(iter(self.accelerators.keys()))  # type: ignore
+    def get_accelerator_count(self) -> Optional[int]:
+        """Returns the count of the first accelerator type from the accelerators dict.
+        Returns:
+            The accelerator count (e.g., 1, 2) or None if no accelerators
+        """
+        if self.accelerators is None or not self.accelerators:
+            return None
+        return next(iter(self.accelerators.values()))  # type: ignore
     def copy(self, **override) -> 'Resources':
         """Returns a copy of the given Resources."""
         resources = Resources(

konduktor/serving.py ADDED Viewed

@@ -0,0 +1,149 @@
+# Proprietary Changes made for Trainy under the Trainy Software License
+# Original source: skypilot: https://github.com/skypilot-org/skypilot
+# which is Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Serving: configuration for long-running serving deployments."""
+from typing import Any, Dict, Optional, Union
+from konduktor import logging
+from konduktor.utils import common_utils, schemas, ux_utils
+logger = logging.get_logger(__name__)
+class Serving:
+    """Serving: configuration for deployments.
+    Immutable once created. Use `copy()` to create a modified copy.
+    Used:
+    * to represent serving config in tasks
+    """
+    _VERSION = 1
+    def __init__(
+        self,
+        min_replicas: Optional[int] = None,
+        max_replicas: Optional[int] = None,
+        ports: Optional[int] = 8000,
+        probe: Optional[str] = '/health',
+    ):
+        self._version = self._VERSION
+        if min_replicas is None and max_replicas is None:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    'At least one of min_replicas or ' 'max_replicas must be specified.'
+                )
+        if min_replicas is None:
+            min_replicas = max_replicas
+        if max_replicas is None:
+            max_replicas = min_replicas
+        if min_replicas is not None and min_replicas <= 0:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError('min_replicas must be >= 1')
+        if (
+            max_replicas is not None
+            and min_replicas is not None
+            and max_replicas < min_replicas
+        ):
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    f'max_replicas ({max_replicas}) must '
+                    f'be >= min_replicas ({min_replicas})'
+                )
+        self._min_replicas = min_replicas
+        self._max_replicas = max_replicas
+        self._ports = ports
+        self._probe = probe
+    @property
+    def min_replicas(self) -> int:
+        assert self._min_replicas is not None
+        return self._min_replicas
+    @property
+    def max_replicas(self) -> int:
+        assert self._max_replicas is not None
+        return self._max_replicas
+    @property
+    def ports(self) -> int:
+        assert self._ports is not None
+        return self._ports
+    @property
+    def probe(self) -> Optional[str]:
+        return self._probe
+    def get(self, key: str, default=None):
+        return {
+            'min_replicas': self._min_replicas,
+            'max_replicas': self._max_replicas,
+            'ports': self._ports,
+            'probe': self._probe,
+        }.get(key, default)
+    def copy(self, **override) -> 'Serving':
+        """Returns a copy of this Serving with fields overridden."""
+        return Serving(
+            min_replicas=override.pop('min_replicas', self._min_replicas),
+            max_replicas=override.pop('max_replicas', self._max_replicas),
+            ports=override.pop('ports', self._ports),
+            probe=override.pop('probe', self._probe),
+        )
+    @classmethod
+    def from_yaml_config(
+        cls, config: Optional[Dict[str, Any]], task_run: Optional[str] = None
+    ) -> Optional['Serving']:
+        if config is None:
+            return None
+        common_utils.validate_schema(
+            config,
+            schemas.get_serving_schema(),
+            'Invalid serving config YAML: ',
+        )
+        if 'min_replicas' not in config and 'max_replicas' not in config:
+            raise ValueError(
+                'At least one of min_replicas or '
+                'max_replicas must be specified in serving'
+            )
+        # Determine default probe based on deployment type
+        default_probe = None  # No probing by default for general deployments
+        if task_run and 'vllm.entrypoints.openai.api_server' in task_run:
+            default_probe = '/health'  # Aibrix deployments get /health by default
+        return cls(
+            min_replicas=config.get('min_replicas', None),
+            max_replicas=config.get('max_replicas', None),
+            ports=config.get('ports', 8000),
+            probe=config.get('probe', default_probe),
+        )
+    def to_yaml_config(self) -> Dict[str, Union[int, str]]:
+        config: Dict[str, Union[int, str]] = {
+            'min_replicas': self._min_replicas or 1,
+            'max_replicas': self._max_replicas or 1,
+            'ports': self._ports or 8000,
+        }
+        # Only include probe if it's not None
+        if self._probe is not None:
+            config['probe'] = self._probe
+        return config

konduktor-nightly 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl

Potentially problematic release.

konduktor-nightly 0.1.0.dev20250805105421py3-none-any.whl → 0.1.0.dev20250807105334py3-none-any.whl