PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl - Mend

nemo-evaluator-launcher 0.1.19py3-none-any.whl → 0.1.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

nemo_evaluator_launcher/configs/deployment/trtllm.yaml CHANGED Viewed

@@ -3,7 +3,7 @@ image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0
 checkpoint_path: ???
 served_model_name: ???
 port: 8000
-tensor_parallel_size: 4
+tensor_parallel_size: 8
 pipeline_parallel_size: 1
 extra_args: ""
@@ -12,8 +12,7 @@ endpoints:
   completions: /v1/completions
   health: /health
-command:
-  mpirun --allow-run-as-root --oversubscribe
+command: mpirun --allow-run-as-root --oversubscribe
   trtllm-serve serve /checkpoint
   --tp_size=${deployment.tensor_parallel_size}
   --pp_size=${deployment.pipeline_parallel_size}

nemo_evaluator_launcher/configs/deployment/vllm.yaml CHANGED Viewed

@@ -37,6 +37,5 @@ command: vllm serve ${oc.select:deployment.hf_model_handle,/checkpoint}
   --port ${deployment.port}
   --trust-remote-code
   --served-model-name ${deployment.served_model_name}
-  --enforce-eager
   --gpu-memory-utilization ${deployment.gpu_memory_utilization}
   ${deployment.extra_args}

nemo_evaluator_launcher/configs/execution/slurm/default.yaml CHANGED Viewed

@@ -25,6 +25,12 @@ ntasks_per_node: 1
 gres: gpu:8
 walltime: 01:00:00
 subproject: nemo-evaluator-launcher
+sbatch_comment: null  # Optional comment for SLURM job (translates to #SBATCH --comment='...')
+# Deployment-specific SLURM configuration
+deployment:
+  n_tasks: 1             # Number of tasks for deployment srun (default: 1, for multi-instance set to num_nodes)
 env_vars:
   deployment: {}
   evaluation: {}
@@ -32,3 +38,11 @@ mounts:
   deployment: {}
   evaluation: {}
   mount_home: true
+proxy:
+  type: haproxy
+  image: haproxy:latest
+  config:
+    haproxy_port: 5009
+    health_check_path: /health
+    health_check_status: 200

nemo_evaluator_launcher/executors/base.py CHANGED Viewed

@@ -21,10 +21,12 @@ Defines the abstract interface for all executor implementations and common statu
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Optional
+from typing import Any, Iterator, Optional, Tuple
 from omegaconf import DictConfig
+from nemo_evaluator_launcher.common.logging_utils import logger
 class ExecutionState(Enum):
     """Enumeration of possible execution states."""
@@ -118,3 +120,31 @@ class BaseExecutor(ABC):
             return f"Could not find or kill job {job_id} ({container_or_id}) - job was already killed"
         # Generic error message
         return f"Could not find or kill job {job_id} ({container_or_id})"
+    @staticmethod
+    def stream_logs(
+        id: str, executor_name: Optional[str] = None
+    ) -> Iterator[Tuple[str, str, str]]:
+        """Stream logs from a job or invocation group.
+        This is an optional method that executors can implement to provide log streaming.
+        If not implemented, it will log a warning and raise NotImplementedError.
+        Args:
+            id: Unique job identifier or invocation identifier.
+            executor_name: Optional executor name for warning messages. If not provided,
+                will attempt to infer from the calling context.
+        Yields:
+            Tuple[str, str, str]: Tuples of (job_id, task_name, log_line) for each log line.
+                Empty lines are yielded as empty strings.
+        Raises:
+            NotImplementedError: If the executor does not support log streaming.
+        """
+        executor_display_name = executor_name or "this executor"
+        logger.warning(
+            f"Log streaming is not yet implemented for executor '{executor_display_name}'. "
+            "Only 'local' executor currently supports log streaming."
+        )
+        raise NotImplementedError("This executor does not support log streaming")

nemo_evaluator_launcher/executors/lepton/deployment_helpers.py CHANGED Viewed

@@ -19,6 +19,7 @@ Handles Lepton endpoint creation, management, and health checks.
 """
 import json
+import shlex
 import subprocess
 import time
 from pathlib import Path
@@ -27,6 +28,7 @@ from typing import Any, Dict, Optional
 # Import lepton dependencies
 from omegaconf import DictConfig
+from nemo_evaluator_launcher.common.helpers import _str_to_echo_command
 from nemo_evaluator_launcher.common.logging_utils import logger
@@ -235,6 +237,8 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
     Returns:
         Container specification for Lepton.
     """
+    # Extract pre_cmd from deployment_cfg
+    pre_cmd: str = deployment_cfg.get("pre_cmd") or ""
     container_spec = {
         "image": deployment_cfg.image,
         "ports": [{"container_port": deployment_cfg.port}],
@@ -258,6 +262,18 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
         if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
             command_parts.extend(deployment_cfg.extra_args.split())
+        # Wrap with pre_cmd if provided
+        if pre_cmd:
+            create_pre_script_cmd = _str_to_echo_command(
+                pre_cmd, filename="deployment_pre_cmd.sh"
+            )
+            original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
+            command_parts = [
+                "/bin/bash",
+                "-c",
+                f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
+            ]
         container_spec["command"] = command_parts
     elif deployment_cfg.type == "sglang":
@@ -278,12 +294,31 @@ def _create_inference_container_spec(deployment_cfg: DictConfig) -> Dict[str, An
         if hasattr(deployment_cfg, "extra_args") and deployment_cfg.extra_args:
             command_parts.extend(deployment_cfg.extra_args.split())
+        # Wrap with pre_cmd if provided
+        if pre_cmd:
+            create_pre_script_cmd = _str_to_echo_command(
+                pre_cmd, filename="deployment_pre_cmd.sh"
+            )
+            original_cmd = " ".join(shlex.quote(str(c)) for c in command_parts)
+            command_parts = [
+                "/bin/bash",
+                "-c",
+                f"{create_pre_script_cmd.cmd} && source deployment_pre_cmd.sh && exec {original_cmd}",
+            ]
         container_spec["command"] = command_parts
     elif deployment_cfg.type == "nim":
         # NIM containers use their default entrypoint - no custom command needed
         # Configuration is handled via environment variables
-        pass
+        # pre_cmd is not supported for NIM deployments
+        if pre_cmd:
+            logger.error(
+                "pre_cmd is not supported for NIM deployments",
+                deployment_type="nim",
+                pre_cmd=pre_cmd,
+            )
+            raise ValueError("pre_cmd is not supported for NIM deployments")
     return container_spec

nemo_evaluator_launcher/executors/lepton/executor.py CHANGED Viewed

@@ -18,6 +18,7 @@
 Handles deployment and evaluation using Lepton endpoints with NIM containers.
 """
+import os
 import time
 from pathlib import Path
 from typing import List
@@ -33,9 +34,10 @@ from nemo_evaluator_launcher.common.execdb import (
 from nemo_evaluator_launcher.common.helpers import get_eval_factory_command
 from nemo_evaluator_launcher.common.logging_utils import logger
 from nemo_evaluator_launcher.common.mapping import (
-    get_task_from_mapping,
+    get_task_definition_for_job,
     load_tasks_mapping,
 )
+from nemo_evaluator_launcher.common.printing_utils import red
 from nemo_evaluator_launcher.executors.base import (
     BaseExecutor,
     ExecutionState,
@@ -88,6 +90,23 @@ class LeptonExecutor(BaseExecutor):
         # Generate invocation ID
         invocation_id = generate_invocation_id()
+        # TODO(agronskiy): the structure of this executor differs from others,
+        # so the best place to check for unsafe commands yelids a bit of duplication.
+        # We can't use the get_eval_factory_command here because the port is not yet
+        # populated.
+        # Refactor the whole thing.
+        is_potentially_unsafe = False
+        for idx, task in enumerate(cfg.evaluation.tasks):
+            pre_cmd: str = task.get("pre_cmd") or cfg.evaluation.get("pre_cmd") or ""
+            if pre_cmd:
+                is_potentially_unsafe = True
+                break
+        # Check for deployment pre_cmd
+        deployment_pre_cmd: str = cfg.deployment.get("pre_cmd") or ""
+        if deployment_pre_cmd:
+            is_potentially_unsafe = True
         # DRY-RUN mode
         if dry_run:
             output_dir = Path(cfg.execution.output_dir).absolute() / invocation_id
@@ -102,8 +121,34 @@ class LeptonExecutor(BaseExecutor):
             else:
                 print(f"with endpoint type '{cfg.deployment.type}'")
+            if is_potentially_unsafe:
+                print(
+                    red(
+                        "\nFound `pre_cmd` (evaluation or deployment) which carries security risk. When running without --dry-run "
+                        "make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1"
+                    )
+                )
             return invocation_id
+        if is_potentially_unsafe:
+            if os.environ.get("NEMO_EVALUATOR_TRUST_PRE_CMD", "") == "1":
+                logger.warning(
+                    "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
+                    "is set, proceeding with caution."
+                )
+            else:
+                logger.error(
+                    "Found non-empty commands (e.g. `pre_cmd` in evaluation or deployment) and NEMO_EVALUATOR_TRUST_PRE_CMD "
+                    "is not set. This might carry security risk and unstable environments. "
+                    "To continue, make sure you trust the command and set NEMO_EVALUATOR_TRUST_PRE_CMD=1.",
+                )
+                raise AttributeError(
+                    "Untrusted command found in config, make sure you trust and "
+                    "set NEMO_EVALUATOR_TRUST_PRE_CMD=1."
+                )
         # For deployment: none, we use the existing endpoint for all tasks
         if cfg.deployment.type == "none":
             print("📌 Using existing endpoint (deployment: none)")
@@ -248,8 +293,10 @@ class LeptonExecutor(BaseExecutor):
                             return
                         # Construct the full endpoint URL
-                        task_definition = get_task_from_mapping(
-                            task.name, tasks_mapping
+                        task_definition = get_task_definition_for_job(
+                            task_query=task.name,
+                            base_mapping=tasks_mapping,
+                            container=task.get("container"),
                         )
                         task_endpoint_type = task_definition["endpoint_type"]
                         endpoint_path = cfg.deployment.endpoints[task_endpoint_type]
@@ -338,7 +385,11 @@ class LeptonExecutor(BaseExecutor):
             # Submit each evaluation task as a Lepton job
             for idx, task in enumerate(cfg.evaluation.tasks):
-                task_definition = get_task_from_mapping(task.name, tasks_mapping)
+                task_definition = get_task_definition_for_job(
+                    task_query=task.name,
+                    base_mapping=tasks_mapping,
+                    container=task.get("container"),
+                )
                 # Create job ID and Lepton job name (max 36 chars)
                 job_id = generate_job_id(invocation_id, idx)
@@ -491,6 +542,33 @@ class LeptonExecutor(BaseExecutor):
                     job_mounts.append(mount_dict)
+                # Handle dataset directory mounting if NEMO_EVALUATOR_DATASET_DIR is required
+                if "NEMO_EVALUATOR_DATASET_DIR" in task_definition.get(
+                    "required_env_vars", []
+                ):
+                    # Get dataset directory from task config
+                    if "dataset_dir" in task:
+                        dataset_mount_host = task["dataset_dir"]
+                    else:
+                        raise ValueError(
+                            f"{task.name} task requires a dataset_dir to be specified. "
+                            f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
+                        )
+                    # Get container mount path (default to /datasets if not specified)
+                    dataset_mount_container = task.get(
+                        "dataset_mount_path", "/datasets"
+                    )
+                    # Add dataset mount to job mounts
+                    # Lepton mount format: {"path": "/path/in/container", "mount_from": {"path": "/host/path"}}
+                    job_mounts.append(
+                        {
+                            "path": dataset_mount_container,
+                            "mount_from": {"path": dataset_mount_host},
+                        }
+                    )
+                    # Add NEMO_EVALUATOR_DATASET_DIR environment variable
+                    job_env_vars["NEMO_EVALUATOR_DATASET_DIR"] = dataset_mount_container
                 print(
                     f"   - Storage: {len(job_mounts)} mount(s) with evaluation ID isolation"
                 )
@@ -610,7 +688,7 @@ class LeptonExecutor(BaseExecutor):
                 job_state = lepton_status.get("state", "Unknown")
                 # Map Lepton job states to our execution states
-                if job_state == "Succeeded":
+                if job_state in ["Succeeded", "Completed"]:
                     state = ExecutionState.SUCCESS
                 elif job_state in ["Running", "Pending", "Starting"]:
                     state = ExecutionState.RUNNING
@@ -817,9 +895,13 @@ def _dry_run_lepton(
 ) -> None:
     print("DRY RUN: Lepton job configurations prepared")
     try:
-        # validate tasks
+        # validate tasks (container overrides are supported)
         for task in cfg.evaluation.tasks:
-            get_task_from_mapping(task.name, tasks_mapping)
+            _ = get_task_definition_for_job(
+                task_query=task.name,
+                base_mapping=tasks_mapping,
+                container=task.get("container"),
+            )
         # nice-to-have checks (existing endpoint URL or endpoints mapping)
         if getattr(cfg.deployment, "type", None) == "none":
@@ -837,7 +919,11 @@ def _dry_run_lepton(
         else:
             endpoints_cfg = getattr(cfg.deployment, "endpoints", {}) or {}
             for task in cfg.evaluation.tasks:
-                td = get_task_from_mapping(task.name, tasks_mapping)
+                td = get_task_definition_for_job(
+                    task_query=task.name,
+                    base_mapping=tasks_mapping,
+                    container=task.get("container"),
+                )
                 etype = td.get("endpoint_type")
                 if etype not in endpoints_cfg:
                     raise ValueError(
@@ -856,9 +942,21 @@ def _dry_run_lepton(
             getattr(cfg, "target", {}).get("api_endpoint", {}), "api_key_name", None
         )
         for task in cfg.evaluation.tasks:
-            td = get_task_from_mapping(task.name, tasks_mapping)
+            td = get_task_definition_for_job(
+                task_query=task.name,
+                base_mapping=tasks_mapping,
+                container=task.get("container"),
+            )
             required = td.get("required_env_vars", []) or []
             for var in required:
+                # Skip NEMO_EVALUATOR_DATASET_DIR as it's handled by dataset mounting logic
+                if var == "NEMO_EVALUATOR_DATASET_DIR":
+                    if "dataset_dir" not in task:
+                        raise ValueError(
+                            f"Task '{task.name}' requires dataset_dir to be specified. "
+                            f"Add 'dataset_dir: /path/to/your/dataset' under the task configuration."
+                        )
+                    continue
                 if var == "API_KEY":
                     if not (("API_KEY" in lepton_env_vars) or bool(api_key_name)):
                         raise ValueError(

nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl

nemo-evaluator-launcher 0.1.19py3-none-any.whl → 0.1.56py3-none-any.whl