PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.12__tar.gz → 0.1.13__tar.gz - Mend

nemo-evaluator-launcher 0.1.12tar.gz → 0.1.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (64) hide show

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nemo-evaluator-launcher
-Version: 0.1.12
+Version: 0.1.13
 Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
 Author: NVIDIA
 Author-email: nemo-toolkit@nvidia.com

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/api/functional.py RENAMED Viewed

@@ -456,6 +456,7 @@ def export_results(
                                 yaml.safe_load(ypath_export.read_text(encoding="utf-8"))
                                 or {}
                             )
+                            # execution.auto_export contains auto-export destinations
                             exec_cfg = cfg_yaml.get("execution") or {}
                             auto_exp = (exp_yaml.get("execution") or {}).get(
                                 "auto_export"
@@ -463,15 +464,39 @@ def export_results(
                             if auto_exp is not None:
                                 exec_cfg["auto_export"] = auto_exp
                                 cfg_yaml["execution"] = exec_cfg
+                            # top-level export block contains exporter config
+                            if "export" in exp_yaml:
+                                cfg_yaml["export"] = exp_yaml["export"]
+                            # Merge evaluation.tasks from export_config (Slurm writes it there)
+                            if "evaluation" in exp_yaml and exp_yaml["evaluation"]:
+                                eval_cfg = cfg_yaml.get("evaluation") or {}
+                                eval_cfg.update(exp_yaml["evaluation"])
+                                cfg_yaml["evaluation"] = eval_cfg
                         # metadata
+                        executor_name = (cfg_yaml.get("execution") or {}).get(
+                            "type", "local"
+                        )
                         md_job_data = JobData(
                             invocation_id=single_id.split(".")[0],
                             job_id=single_id,
                             timestamp=0.0,
-                            executor="local",  #
-                            data={"output_dir": str(Path.cwd().parent)},
+                            executor=executor_name,
+                            data={
+                                "output_dir": str(Path.cwd().parent),
+                                "storage_type": "remote_local",
+                            },
                             config=cfg_yaml,
                         )
+                        # DEBUG: print what we loaded
+                        print(f"DEBUG: cfg_yaml keys: {list(cfg_yaml.keys())}")
+                        if "evaluation" in cfg_yaml:
+                            print(
+                                f"DEBUG: evaluation.tasks: {cfg_yaml.get('evaluation', {}).get('tasks')}"
+                            )
                     except Exception:
                         md_job_data = None
                 # fallback to execDB only
@@ -492,6 +517,7 @@ def export_results(
                             "success": job_result.success,
                             "message": job_result.message,
                             "metadata": job_result.metadata or {},
+                            "dest": getattr(job_result, "dest", None),
                         }
                     },
                     "metadata": job_result.metadata or {},

nemo_evaluator_launcher-0.1.13/src/nemo_evaluator_launcher/cli/export.py ADDED Viewed

@@ -0,0 +1,267 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Export evaluation results to specified target."""
+from dataclasses import dataclass
+from typing import Any, List, Optional
+from simple_parsing import field
+@dataclass
+class ExportCmd:
+    """Export evaluation results."""
+    # Short usage examples will show up in -h as the class docstring:
+    # Examples:
+    #   nemo-evaluator-launcher export 8abcd123 --dest local --format json --out .
+    #   nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv --out results/ -fname processed_results.csv
+    #   nemo-evaluator-launcher export 8abcd123 --dest jet
+    invocation_ids: List[str] = field(
+        positional=True,
+        help="IDs to export (space-separated). Accepts invocation IDs (xxxxxxxx) and job IDs (xxxxxxxx.n); mixture of both allowed.",
+    )
+    dest: str = field(
+        default="local",
+        alias=["--dest"],
+        choices=["local", "wandb", "mlflow", "gsheets", "jet"],
+        help="Export destination.",
+    )
+    # overrides for exporter config; use -o similar to run command
+    override: List[str] = field(
+        default_factory=list,
+        action="append",
+        nargs="?",
+        alias=["-o", "--override"],
+        help="Hydra-style overrides for exporter config. Use `export.<dest>.key=value` (e.g., -o export.wandb.entity=org-name).",
+    )
+    output_dir: Optional[str] = field(
+        default=".",
+        alias=["--output-dir", "-out"],
+        help="Output directory (default: current directory).",
+    )
+    output_filename: Optional[str] = field(
+        default=None,
+        alias=["--output-filename", "-fname"],
+        help="Summary filename (default: processed_results.json/csv based on --format).",
+    )
+    format: Optional[str] = field(
+        default=None,
+        alias=["--format"],
+        choices=["json", "csv"],
+        help="Summary format for --dest local. Omit to only copy artifacts.",
+    )
+    copy_logs: bool = field(
+        default=False,
+        alias=["--copy-logs"],
+        help="Include logs when copying locally (default: False).",
+    )
+    log_metrics: List[str] = field(
+        default_factory=list,
+        alias=["--log-metrics"],
+        help="Filter metrics by name (repeatable). Examples: score, f1, mmlu_score_micro.",
+    )
+    only_required: Optional[bool] = field(
+        default=None,
+        alias=["--only-required"],
+        help="Copy only required+optional artifacts (default: True). Set to False to copy all available artifacts.",
+    )
+    def execute(self) -> None:
+        """Execute export."""
+        # Import heavy dependencies only when needed
+        from omegaconf import OmegaConf
+        from nemo_evaluator_launcher.api.functional import export_results
+        # Validation: ensure IDs are provided
+        if not self.invocation_ids:
+            print("Error: No IDs provided. Specify one or more invocation or job IDs.")
+            print(
+                "Usage: nemo-evaluator-launcher export <id> [<id>...] --dest <destination>"
+            )
+            return
+        config: dict[str, Any] = {
+            "copy_logs": self.copy_logs,
+        }
+        # Output handling
+        if self.output_dir:
+            config["output_dir"] = self.output_dir
+        if self.output_filename:
+            config["output_filename"] = self.output_filename
+        # Format and filters
+        if self.format:
+            config["format"] = self.format
+        if self.log_metrics:
+            config["log_metrics"] = self.log_metrics
+        # Add only_required if explicitly passed via CLI
+        if self.only_required is not None:
+            config["only_required"] = self.only_required
+        # Parse and validate overrides
+        if self.override:
+            # Flatten possible list-of-lists from parser
+            flat_overrides: list[str] = []
+            for item in self.override:
+                if isinstance(item, list):
+                    flat_overrides.extend(str(x) for x in item)
+                else:
+                    flat_overrides.append(str(item))
+            try:
+                self._validate_overrides(flat_overrides, self.dest)
+            except ValueError as e:
+                print(f"Error: {e}")
+                return
+            # Expand env vars in override vals ($VAR / ${VAR})
+            import os
+            from omegaconf import OmegaConf
+            expanded_overrides: list[str] = []
+            for ov in flat_overrides:
+                if "=" in ov:
+                    k, v = ov.split("=", 1)
+                    expanded_overrides.append(f"{k}={os.path.expandvars(v)}")
+                else:
+                    expanded_overrides.append(os.path.expandvars(ov))
+            dot_cfg = OmegaConf.from_dotlist(expanded_overrides)
+            as_dict = OmegaConf.to_container(dot_cfg, resolve=True) or {}
+            if isinstance(as_dict, dict) and "export" in as_dict:
+                export_map = as_dict.get("export") or {}
+                if isinstance(export_map, dict) and self.dest in export_map:
+                    config.update(export_map[self.dest] or {})
+                else:
+                    config.update(as_dict)
+            else:
+                config.update(as_dict)
+        if self.format and self.dest != "local":
+            print(
+                "Note: --format is only used by --dest local. It will be ignored for other destinations."
+            )
+        if "only_required" in config and self.only_required is True:
+            config.pop("only_required", None)
+        print(
+            f"Exporting {len(self.invocation_ids)} {'invocations' if len(self.invocation_ids) > 1 else 'invocation'} to {self.dest}..."
+        )
+        result = export_results(self.invocation_ids, self.dest, config)
+        if not result.get("success", False):
+            err = result.get("error", "Unknown error")
+            print(f"\nExport failed: {err}")
+            # Provide actionable guidance for common configuration issues
+            if self.dest == "mlflow":
+                if "tracking_uri" in str(err).lower():
+                    print("\nMLflow requires 'tracking_uri' to be configured.")
+                    print(
+                        "Set it via: -o export.mlflow.tracking_uri=http://mlflow-server:5000"
+                    )
+                elif "not installed" in str(err).lower():
+                    print("\nMLflow package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[mlflow]")
+            elif self.dest == "wandb":
+                if "entity" in str(err).lower() or "project" in str(err).lower():
+                    print("\nW&B requires 'entity' and 'project' to be configured.")
+                    print(
+                        "Set via: -o export.wandb.entity=my-org -o export.wandb.project=my-proj"
+                    )
+                elif "not installed" in str(err).lower():
+                    print("\nW&B package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[wandb]")
+            elif self.dest == "gsheets":
+                if "not installed" in str(err).lower():
+                    print("\nGoogle Sheets package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[gsheets]")
+            return
+        # Success path
+        if len(self.invocation_ids) == 1:
+            # Single invocation
+            invocation_id = self.invocation_ids[0]
+            print(f"Export completed for {invocation_id}")
+            for job_id, job_result in result["jobs"].items():
+                if job_result.get("success"):
+                    print(f"  {job_id}: {job_result.get('message', '')}")
+                    metadata = job_result.get("metadata", {})
+                    if metadata.get("run_url"):
+                        print(f"    URL: {metadata['run_url']}")
+                    if metadata.get("summary_path"):
+                        print(f"    Summary: {metadata['summary_path']}")
+                    path_hint = job_result.get("dest") or metadata.get("output_dir")
+                    if self.dest == "local" and path_hint:
+                        print(f"    Path: {path_hint}")
+                else:
+                    print(f"  {job_id} failed: {job_result.get('message', '')}")
+        else:
+            # Multiple invocations
+            metadata = result.get("metadata", {})
+            print(
+                f"Export completed: {metadata.get('successful_invocations', 0)}/{metadata.get('total_invocations', 0)} successful"
+            )
+            # Show summary path if available
+            if metadata.get("summary_path"):
+                print(f"Summary: {metadata['summary_path']}")
+            # Show per-invocation status
+            for invocation_id, inv_result in result["invocations"].items():
+                if inv_result.get("success"):
+                    job_count = len(inv_result.get("jobs", {}))
+                    print(f"  {invocation_id}: {job_count} jobs")
+                else:
+                    print(
+                        f"  {invocation_id}: failed, {inv_result.get('error', 'Unknown error')}"
+                    )
+    def _validate_overrides(self, overrides: List[str], dest: str) -> None:
+        """Validate override list for destination consistency.
+        Raises:
+            ValueError: If overrides specify wrong destination or have other issues.
+        """
+        if not overrides:
+            return  # nothing to validate
+        # Check each override for destination mismatch
+        for override_str in overrides:
+            if override_str.startswith(
+                "export."
+            ):  # check if override starts with export.
+                # Extract destination from override path
+                try:
+                    key_part = override_str.split("=")[0]  # Get left side before =
+                    parts = key_part.split(".")
+                    if len(parts) >= 2:
+                        override_dest = parts[1]
+                        if override_dest != dest:
+                            raise ValueError(
+                                f"Override destination mismatch: override specifies 'export.{override_dest}' but --dest is '{dest}'. "
+                                f"Either change --dest to '{override_dest}' or use 'export.{dest}' in overrides."
+                            )
+                except (IndexError, AttributeError):
+                    # miconstructed override -> OmegaConf handles this
+                    pass

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/run.py RENAMED Viewed

@@ -98,7 +98,17 @@ class Cmd:
                 config_dir=self.config_dir,
             )
-        invocation_id = run_eval(config, self.dry_run)
+        try:
+            invocation_id = run_eval(config, self.dry_run)
+        except Exception as e:
+            print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
+            raise
+        # Print general success message with invocation ID
+        if invocation_id is not None and not self.dry_run:
+            print(
+                f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
+            )
         # Save the complete configuration
         if not self.dry_run and invocation_id is not None:
@@ -146,6 +156,15 @@ class Cmd:
         if invocation_id is not None:
             print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
             print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
-            print(
-                f"to kill individual jobs: nemo-evaluator-launcher kill <job_id> (e.g., {invocation_id}.0)"
+            # Show actual job IDs and task names
+            print("to kill individual jobs:")
+            # Access tasks - will work after normalization in run_eval
+            tasks = (
+                config.evaluation.tasks
+                if hasattr(config.evaluation, "tasks")
+                else config.evaluation
             )
+            for idx, task in enumerate(tasks):
+                job_id = f"{invocation_id}.{idx}"
+                print(f"  nemo-evaluator-launcher kill {job_id}  # {task.name}")

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/status.py RENAMED Viewed

@@ -102,6 +102,8 @@ class Cmd:
             status = job.get("status", "")
             formatted_status = self._format_status_with_indicators(status)
+            # Extract task name
             rows.append(
                 [
                     job.get("job_id", ""),
@@ -144,7 +146,7 @@ class Cmd:
             ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m",  # Green Unicode checkmark
             ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m",  # Red Unicode X
             ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m",  # Yellow Unicode play button
-            ExecutionState.PENDING.value: "\033[36m⏳ PENDING\033[0m",  # Cyan Unicode hourglass
+            ExecutionState.PENDING.value: "\033[36m⧗ PENDING\033[0m",  # Cyan Unicode hourglass (U+29D7)
             ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m",  # Magenta Unicode X
             # Additional states for error handling
             "not_found": "\033[90m? NOT FOUND\033[0m",  # Gray question mark

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py RENAMED Viewed

@@ -428,14 +428,34 @@ def create_lepton_endpoint(cfg: DictConfig, endpoint_name: str) -> bool:
             print(f"✅ Successfully created Lepton endpoint: {endpoint_name}")
             return True
         else:
-            print(f"❌ Failed to create Lepton endpoint: {result.stderr}")
+            error_msg = result.stderr.strip() if result.stderr else ""
+            output_msg = result.stdout.strip() if result.stdout else ""
+            print(
+                f"✗ Failed to create Lepton endpoint | Endpoint: {endpoint_name} | Return code: {result.returncode}"
+            )
+            if error_msg:
+                print(f"   stderr: {error_msg}")
+            if output_msg:
+                print(f"   stdout: {output_msg}")
             return False
-    except subprocess.TimeoutExpired:
-        print(f"❌ Timeout creating Lepton endpoint: {endpoint_name}")
+    except subprocess.TimeoutExpired as e:
+        print(
+            f"✗ Timeout creating Lepton endpoint | Endpoint: {endpoint_name} | Timeout: 300s"
+        )
+        if hasattr(e, "stderr") and e.stderr:
+            print(f"   stderr: {e.stderr}")
+        if hasattr(e, "stdout") and e.stdout:
+            print(f"   stdout: {e.stdout}")
         return False
     except subprocess.CalledProcessError as e:
-        print(f"❌ Error creating Lepton endpoint: {e}")
+        print(
+            f"✗ Error creating Lepton endpoint | Endpoint: {endpoint_name} | Error: {e}"
+        )
+        if hasattr(e, "stderr") and e.stderr:
+            print(f"   stderr: {e.stderr}")
+        if hasattr(e, "stdout") and e.stdout:
+            print(f"   stdout: {e.stdout}")
         return False
     finally:
         # Clean up temporary file

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/executor.py RENAMED Viewed

@@ -482,7 +482,8 @@ class LeptonExecutor(BaseExecutor):
                 if not job_success:
                     raise RuntimeError(
-                        f"Failed to submit Lepton job for task: {task.name}. Error: {error_msg}"
+                        f"Failed to submit Lepton job | Task: {task.name} | Job ID: {job_id} | "
+                        f"Lepton job name: {lepton_job_name} | Error: {error_msg}"
                     )
                 # Store job metadata in database (with task-specific endpoint info)
@@ -504,8 +505,6 @@ class LeptonExecutor(BaseExecutor):
                     )
                 )
-                print(f"✅ Task {task.name}: Submitted evaluation job {job_id}")
             # Jobs submitted successfully - return immediately (non-blocking)
             print(
                 f"\n✅ Successfully submitted {len(lepton_job_names)} evaluation jobs to Lepton"
@@ -536,9 +535,8 @@ class LeptonExecutor(BaseExecutor):
             return invocation_id
-        except Exception as e:
+        except Exception:
             # Clean up any created endpoints on failure
-            print(f"❌ Error during evaluation: {e}")
             if cfg.deployment.type != "none" and "endpoint_names" in locals():
                 for endpoint_name in endpoint_names:
                     if endpoint_name:

{nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/local/executor.py RENAMED Viewed

@@ -23,6 +23,7 @@ import os
 import pathlib
 import platform
 import shlex
+import shutil
 import subprocess
 import time
 from typing import List, Optional
@@ -76,6 +77,13 @@ class LocalExecutor(BaseExecutor):
                 f"type {cfg.deployment.type} is not implemented -- add deployment support"
             )
+        # Check if docker is available (skip in dry_run mode)
+        if not dry_run and shutil.which("docker") is None:
+            raise RuntimeError(
+                "Docker is not installed or not in PATH. "
+                "Please install Docker to run local evaluations."
+            )
         # Generate invocation ID for this evaluation run
         invocation_id = generate_invocation_id()
@@ -233,35 +241,48 @@ class LocalExecutor(BaseExecutor):
         # To ensure subprocess continues after python exits:
         # - on Unix-like systems, to fully detach the subprocess
         #   so it does not die when Python exits, pass start_new_session=True;
-        # - on Widnows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
+        # - on Windows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
         os_name = platform.system()
+        processes = []
         if is_execution_mode_sequential:
             if os_name == "Windows":
-                subprocess.Popen(
+                proc = subprocess.Popen(
                     shlex.split("bash run_all.sequential.sh"),
                     cwd=output_dir,
                     creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
                 )
             else:
-                subprocess.Popen(
+                proc = subprocess.Popen(
                     shlex.split("bash run_all.sequential.sh"),
                     cwd=output_dir,
                     start_new_session=True,
                 )
+            processes.append(("run_all.sequential.sh", proc, output_dir))
         else:
             for task in cfg.evaluation.tasks:
                 if os_name == "Windows":
-                    subprocess.Popen(
+                    proc = subprocess.Popen(
                         shlex.split("bash run.sh"),
                         cwd=output_dir / task.name,
                         creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
                     )
                 else:
-                    subprocess.Popen(
+                    proc = subprocess.Popen(
                         shlex.split("bash run.sh"),
                         cwd=output_dir / task.name,
                         start_new_session=True,
                     )
+                processes.append((task.name, proc, output_dir / task.name))
+        # Wait briefly and check if bash scripts exited immediately (which means error)
+        time.sleep(0.3)
+        for name, proc, work_dir in processes:
+            exit_code = proc.poll()
+            if exit_code is not None and exit_code != 0:
+                error_msg = f"Script for {name} exited with code {exit_code}"
+                raise RuntimeError(f"Job startup failed | {error_msg}")
         print("\nCommands for real-time monitoring:")
         for job_id, evaluation_task in zip(job_ids, evaluation_tasks):

nemo-evaluator-launcher 0.1.12__tar.gz → 0.1.13__tar.gz

Potentially problematic release.

nemo-evaluator-launcher 0.1.12tar.gz → 0.1.13tar.gz