PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.17__tar.gz → 0.1.18__tar.gz - Mend

nemo-evaluator-launcher 0.1.17tar.gz → 0.1.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (65) hide show

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nemo-evaluator-launcher
-Version: 0.1.17
+Version: 0.1.18
 Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
 Author: NVIDIA
 Author-email: nemo-toolkit@nvidia.com

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/api/types.py RENAMED Viewed

@@ -19,9 +19,18 @@ This module defines data structures and helpers for configuration and type safet
 """
 import os
+import warnings
 from dataclasses import dataclass
 from typing import cast
+# ruff: noqa: E402
+# Later when adding optional module to hydra, since the internal package is optional,
+# will generate a hydra warning. We suppress it as distraction and bad UX, before hydra gets invoked.
+warnings.filterwarnings(
+    "ignore",
+    message="provider=hydra.searchpath.*path=nemo_evaluator_launcher_internal.*is not available\\.",
+)
 import hydra
 from hydra.core.global_hydra import GlobalHydra
 from omegaconf import DictConfig, OmegaConf

nemo_evaluator_launcher-0.1.17/src/nemo_evaluator_launcher/cli/debug.py → nemo_evaluator_launcher-0.1.18/src/nemo_evaluator_launcher/cli/info.py RENAMED Viewed

@@ -14,16 +14,16 @@
 # limitations under the License.
 #
-"""Debugging helper functionalities for nemo-evaluator-launcher."""
+"""Job information helper functionalities for nemo-evaluator-launcher."""
+import sys
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Tuple
 from simple_parsing import field
-from nemo_evaluator_launcher.cli.export import ExportCmd
 from nemo_evaluator_launcher.cli.version import Cmd as VersionCmd
 from nemo_evaluator_launcher.common.execdb import EXEC_DB_FILE, ExecutionDB, JobData
 from nemo_evaluator_launcher.common.logging_utils import logger
@@ -35,52 +35,60 @@ _EXPORT_HELPER = LocalExporter({})
 @dataclass
-class DebugCmd(ExportCmd):
-    """Debugging functionalities for nemo-evaluator-launcher.
+class InfoCmd:
+    """Job information functionalities for nemo-evaluator-launcher.
     Examples:
-      nemo-evaluator-launcher debug <inv>                 # Full debug info
-      nemo-evaluator-launcher debug <inv> --config        # Show stored job config (YAML)
-      nemo-evaluator-launcher debug <inv> --artifacts     # Show artifact locations
-      nemo-evaluator-launcher debug <inv> --logs          # Show log locations
-      nemo-evaluator-launcher debug <inv> --copy-logs <path>       # Copy logs (default: current dir)
-      nemo-evaluator-launcher debug <inv> --copy-artifacts <path>   # Copy artifacts (default: current dir)
+      nemo-evaluator-launcher info <inv>                 # Full job info
+      nemo-evaluator-launcher info <inv> --config        # Show stored job config (YAML)
+      nemo-evaluator-launcher info <inv> --artifacts     # Show artifact locations and key files
+      nemo-evaluator-launcher info <inv> --logs          # Show log locations and key files
+      nemo-evaluator-launcher info <inv> --copy-logs <DIR>       # Copy logs to <DIR>
+      nemo-evaluator-launcher info <inv> --copy-artifacts <DIR>  # Copy artifacts to <DIR>
     Notes:
-      - Supports invocation IDs and job IDs
+      - Supports invocation IDs and job IDs (space-separated)
       - Shows local or remote paths depending on executor (local/slurm/lepton)
+      - Copy operations work for both local and remote jobs (expect longer time for remote jobs)
+      - Copy operations are not supported for Lepton executor (yet).
     """
-    # local exporter destination defaults to local
-    dest: str = field(default="local", init=False)
+    invocation_ids: List[str] = field(
+        positional=True,
+        help="IDs to show info for (space-separated). Accepts invocation IDs or/and job IDs.",
+    )
-    # debug modes
-    config: bool = field(default=False, help="Show job configuration")
-    artifacts: bool = field(default=False, help="Show artifact locations")
-    logs: bool = field(default=False, help="Show log locations")
+    # info modes
+    config: bool = field(
+        default=False, action="store_true", help="Show job configuration"
+    )
+    artifacts: bool = field(
+        default=False, action="store_true", help="Show artifact locations and key files"
+    )
+    logs: bool = field(
+        default=False, action="store_true", help="Show log locations and key files"
+    )
-    # copy operations
-    copy_logs: Optional[str] = field(
+    # copy operations - work for both local and remote jobs
+    copy_logs: str | None = field(
         default=None,
         alias=["--copy-logs"],
-        nargs="?",
-        help="Copy logs to local directory (default: current dir)",
+        help="Copy logs to a local directory",
+        metavar="DIR",
     )
-    copy_artifacts: Optional[str] = field(
+    copy_artifacts: str | None = field(
         default=None,
         alias=["--copy-artifacts"],
-        nargs="?",
-        help="Copy artifacts to local directory (default: current dir)",
+        help="Copy artifacts to a local directory",
+        metavar="DIR",
     )
     def execute(self) -> None:
-        # show version
         VersionCmd().execute()
-        logger.info("Debug command started", invocation_ids=self.invocation_ids)
+        logger.info("Info command started", invocation_ids=self.invocation_ids)
         if not self.invocation_ids:
-            logger.error("No invocation IDs provided")
+            logger.error("No job or invocation IDs provided.")
             raise ValueError("No job or invocation IDs provided.")
         jobs = self._resolve_jobs()
@@ -96,48 +104,63 @@ class DebugCmd(ExportCmd):
                 "No valid jobs found (jobs may have been deleted or IDs may be incorrect)."
             )
             print(
-                "No valid jobs found (jobs may have been deletedd or IDs may be incorrect)."
+                "No valid jobs found (jobs may have been deleted or IDs may be incorrect)."
             )
             return
+        # show ops
         if self.config:
-            logger.info("Showing job configuration", job_count=len(jobs))
             self._show_config_info(jobs)
-        elif self.logs:
-            logger.info("Showing job logs locations", job_count=len(jobs))
+        if self.logs:
             self._show_logs_info(jobs)
-        elif self.artifacts:
-            logger.info("Showing artifacts locations", job_count=len(jobs))
+        if self.artifacts:
             self._show_artifacts_info(jobs)
-        elif self.copy_logs is not None:
-            dest = self.copy_logs or "."
-            if not self.copy_logs:
-                print(
-                    "No destination provided for --copy-logs; defaulting to current dir"
-                )
+        # copy ops
+        args = sys.argv[1:]
+        copy_logs_flag = "--copy-logs" in args
+        copy_artifacts_flag = "--copy-artifacts" in args
+        if copy_logs_flag:
+            if self.copy_logs is None:
+                raise ValueError("--copy-logs requires a directory path")
+            if not self.copy_logs.strip():
+                raise ValueError("--copy-logs requires a directory path")
             logger.info(
-                "Copying logs to local directory", dest_dir=dest, job_count=len(jobs)
+                "Copying logs to local directory",
+                dest_dir=self.copy_logs,
+                job_count=len(jobs),
             )
-            self._copy_logs(jobs, dest)
-        elif self.copy_artifacts is not None:
-            dest = self.copy_artifacts or "."
-            if not self.copy_artifacts:
-                print(
-                    "No destination provided for --copy-artifacts; defaulting to current dir)"
-                )
+            self._copy_logs(jobs, self.copy_logs)
+        if copy_artifacts_flag:
+            if self.copy_artifacts is None:
+                raise ValueError("--copy-artifacts requires a directory path")
+            if not self.copy_artifacts.strip():
+                raise ValueError("--copy-artifacts requires a directory path")
             logger.info(
                 "Copying artifacts to local directory",
-                dest_dir=dest,
+                dest_dir=self.copy_artifacts,
                 job_count=len(jobs),
             )
-            self._copy_artifacts(jobs, dest)
-        else:
+            self._copy_artifacts(jobs, self.copy_artifacts)
+        # default view when no flags
+        if not any(
+            [
+                self.config,
+                self.logs,
+                self.artifacts,
+                self.copy_logs,
+                self.copy_artifacts,
+            ]
+        ):
             logger.info(
                 "Job metadata details",
                 invocation_id=jobs[0][1].invocation_id if jobs else None,
                 jobs=len(jobs),
             )
-            self._show_invocation_debug_info(jobs)
+            self._show_invocation_info(jobs)
     def _resolve_jobs(self) -> List[Tuple[str, JobData]]:
         """Resolve jobs from ExecDB using IDs (job IDs and/or invocation IDs)."""
@@ -160,15 +183,15 @@ class DebugCmd(ExportCmd):
                 uniq.append((jid, jd))
         return sorted(uniq, key=lambda p: p[0])
-    def _show_invocation_debug_info(self, jobs: List[Tuple[str, JobData]]) -> None:
+    def _show_invocation_info(self, jobs: List[Tuple[str, JobData]]) -> None:
         inv = jobs[0][1].invocation_id if jobs else None
-        logger.info("Debug information", jobs=len(jobs), invocation=inv)
+        logger.info("Job information", jobs=len(jobs), invocation=inv)
         print(
-            f"Debug information for {len(jobs)} job(s){f' under invocation {inv}' if inv else ''}:\n"
+            f"Job information for {len(jobs)} job(s){f' under invocation {inv}' if inv else ''}:\n"
         )
         for job_id, job_data in jobs:
-            self._show_job_debug_info(job_id, job_data)
+            self._show_job_info(job_id, job_data)
             print()
         # footer hint: where to find more metadata
@@ -184,10 +207,14 @@ class DebugCmd(ExportCmd):
         print("  - Use --logs to show log locations.")
         print("  - Use --artifacts to show artifact locations.")
         print("  - Use --config to show stored job configuration (YAML).")
-        print("  - Use --copy-logs [DIR] to copy logs to a local directory.")
-        print("  - Use --copy-artifacts [DIR] to copy artifacts to a local directory.")
+        print(
+            "  - Use --copy-logs [DIR] to copy logs to a local directory (works for local and remote jobs)."
+        )
+        print(
+            "  - Use --copy-artifacts [DIR] to copy artifacts to a local directory (works for local and remote jobs)."
+        )
-    def _show_job_debug_info(self, job_id: str, job_data: JobData) -> None:
+    def _show_job_info(self, job_id: str, job_data: JobData) -> None:
         logger.info("Job", job_id=job_id)
         print(f"Job {job_id}")
@@ -208,14 +235,22 @@ class DebugCmd(ExportCmd):
             logger.info("Task", job_id=job_id, name=task_name)
             print(f"├── Task: {task_name}")
+        # Determine executor type for file descriptions
+        cfg_exec_type = ((job_data.config or {}).get("execution") or {}).get("type")
+        exec_type = (job_data.executor or cfg_exec_type or "").lower()
         # locations via exporter helper
         paths = _EXPORT_HELPER.get_job_paths(job_data)
-        # Artifacts
+        # Artifacts with file descriptions
+        artifacts_list = _get_artifacts_file_list()
         if paths.get("storage_type") == "remote_ssh":
             artifacts_path = f"{paths['username']}@{paths['hostname']}:{paths['remote_path']}/artifacts"
             logger.info("Artifacts", job_id=job_id, path=artifacts_path, remote=True)
             print(f"├── Artifacts: {artifacts_path} (remote)")
+            print("│   └── Key files:")
+            for filename, desc in artifacts_list:
+                print(f"│       ├── {filename} - {desc}")
         else:
             ap = paths.get("artifacts_dir")
             if ap:
@@ -224,14 +259,21 @@ class DebugCmd(ExportCmd):
                     "Artifacts", job_id=job_id, path=str(ap), exists_indicator=exists
                 )
                 print(f"├── Artifacts: {ap} {exists} (local)")
+                print("│   └── Key files:")
+                for filename, desc in artifacts_list:
+                    print(f"│       ├── {filename} - {desc}")
-        # Logs
+        # Logs with file descriptions
+        logs_list = _get_log_file_list(exec_type)
         if paths.get("storage_type") == "remote_ssh":
             logs_path = (
                 f"{paths['username']}@{paths['hostname']}:{paths['remote_path']}/logs"
             )
             logger.info("Logs", job_id=job_id, path=logs_path, remote=True)
             print(f"├── Logs: {logs_path} (remote)")
+            print("│   └── Key files:")
+            for filename, desc in logs_list:
+                print(f"│       ├── {filename} - {desc}")
         else:
             lp = paths.get("logs_dir")
             if lp:
@@ -240,6 +282,9 @@ class DebugCmd(ExportCmd):
                     "Logs", job_id=job_id, path=str(lp), exists_indicator=exists
                 )
                 print(f"├── Logs: {lp} {exists} (local)")
+                print("│   └── Key files:")
+                for filename, desc in logs_list:
+                    print(f"│       ├── {filename} - {desc}")
         # executor-specific
         d = job_data.data or {}
@@ -264,17 +309,23 @@ class DebugCmd(ExportCmd):
             eu = d.get("endpoint_url")
             if eu:
                 print(f"├── Endpoint URL: {eu}")
-        # local and others: paths already displayed above; no extra fields needed
     def _show_logs_info(self, jobs: List[Tuple[str, JobData]]) -> None:
         logger.info("Log locations")
         print("Log locations:\n")
         for job_id, job_data in jobs:
             paths = _EXPORT_HELPER.get_job_paths(job_data)
+            cfg_exec_type = ((job_data.config or {}).get("execution") or {}).get("type")
+            exec_type = (job_data.executor or cfg_exec_type or "").lower()
+            logs_list = _get_log_file_list(exec_type)
             if paths.get("storage_type") == "remote_ssh":
                 logs_path = f"ssh://{paths['username']}@{paths['hostname']}{paths['remote_path']}/logs"
                 logger.info("Logs", job_id=job_id, path=logs_path, remote=True)
                 print(f"{job_id}: {logs_path} (remote)")
+                print("  └── Key files:")
+                for filename, desc in logs_list:
+                    print(f"      ├── {filename} - {desc}")
             else:
                 lp = paths.get("logs_dir")
                 if lp:
@@ -283,18 +334,26 @@ class DebugCmd(ExportCmd):
                         "Logs", job_id=job_id, path=str(lp), exists_indicator=exists
                     )
                     print(f"{job_id}: {lp} {exists} (local)")
+                    print("  └── Key files:")
+                    for filename, desc in logs_list:
+                        print(f"      ├── {filename} - {desc}")
     def _show_artifacts_info(self, jobs: List[Tuple[str, JobData]]) -> None:
         logger.info("Artifact locations")
         print("Artifact locations:\n")
         for job_id, job_data in jobs:
             paths = _EXPORT_HELPER.get_job_paths(job_data)
+            artifacts_list = _get_artifacts_file_list()
             if paths.get("storage_type") == "remote_ssh":
                 artifacts_path = f"ssh://{paths['username']}@{paths['hostname']}{paths['remote_path']}/artifacts"
                 logger.info(
                     "Artifacts", job_id=job_id, path=artifacts_path, remote=True
                 )
                 print(f"{job_id}: {artifacts_path} (remote)")
+                print("  └── Key files:")
+                for filename, desc in artifacts_list:
+                    print(f"      ├── {filename} - {desc}")
             else:
                 ap = paths.get("artifacts_dir")
                 if ap:
@@ -306,6 +365,9 @@ class DebugCmd(ExportCmd):
                         exists_indicator=exists,
                     )
                     print(f"{job_id}: {ap} {exists} (local)")
+                    print("  └── Key files:")
+                    for filename, desc in artifacts_list:
+                        print(f"      ├── {filename} - {desc}")
     def _show_config_info(self, jobs: List[Tuple[str, JobData]]) -> None:
         for job_id, job_data in jobs:
@@ -383,6 +445,9 @@ class DebugCmd(ExportCmd):
                         print(
                             f"{jid}: Failed - {job_result.get('message', 'Unknown error')}"
                         )
+            # Show full destination path
+            full_dest_path = Path(dest_dir).resolve()
+            print(f"Copied to: {full_dest_path}")
         else:
             err = result.get("error", "Unknown error")
             logger.warning("Content copy failed", error=err, dest_dir=dest_dir)
@@ -403,3 +468,45 @@ class DebugCmd(ExportCmd):
         except Exception:
             pass
         return ""
+# Helper functions for file descriptions (based on actual code and content analysis)
+def _get_artifacts_file_list() -> list[tuple[str, str]]:
+    """Files generated in artifacts/."""
+    return [
+        (
+            "results.yml",
+            "Benchmark scores, task results and resolved run configuration.",
+        ),
+        (
+            "eval_factory_metrics.json",
+            "Response + runtime stats (latency, tokens count, memory)",
+        ),
+        ("metrics.json", "Harness/benchmark metric and configuration"),
+        ("report.html", "Request-Response Pairs samples in HTML format (if enabled)"),
+        ("report.json", "Report data in json format, if enabled"),
+    ]
+def _get_log_file_list(executor_type: str) -> list[tuple[str, str]]:
+    """Files actually generated in logs/ - executor-specific."""
+    et = (executor_type or "local").lower()
+    if et == "slurm":
+        return [
+            ("client-{SLURM_JOB_ID}.out", "Evaluation container/process output"),
+            (
+                "slurm-{SLURM_JOB_ID}.out",
+                "SLURM scheduler stdout/stderr (batch submission, export steps).",
+            ),
+            (
+                "server-{SLURM_JOB_ID}.out",
+                "Model server logs when a deployment is used.",
+            ),
+        ]
+    # local executor
+    return [
+        (
+            "stdout.log",
+            "Complete evaluation output (timestamps, resolved config, run/export messages).",
+        ),
+    ]

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/cli/main.py RENAMED Viewed

@@ -19,8 +19,8 @@ import os
 from simple_parsing import ArgumentParser
-import nemo_evaluator_launcher.cli.debug as debug
 import nemo_evaluator_launcher.cli.export as export
+import nemo_evaluator_launcher.cli.info as info
 import nemo_evaluator_launcher.cli.kill as kill
 import nemo_evaluator_launcher.cli.ls_runs as ls_runs
 import nemo_evaluator_launcher.cli.ls_tasks as ls_tasks
@@ -42,12 +42,12 @@ def is_verbose_enabled(args) -> bool:
     subcommands = [
         "run",
         "status",
+        "info",
         "kill",
         "tasks_alias",
         "tasks",
         "runs",
         "export",
-        "debug",
     ]
     for subcmd in subcommands:
         if hasattr(args, subcmd) and hasattr(getattr(args, subcmd), "verbose"):
@@ -163,16 +163,16 @@ def create_parser() -> ArgumentParser:
     )
     export_parser.add_arguments(export.ExportCmd, dest="export")
-    # Debug helper subcommand
-    debug_parser = subparsers.add_parser(
-        "debug",
+    # Info subcommand
+    info_parser = subparsers.add_parser(
+        "info",
         help="Display evaluation job information",
-        description="Debug helper functionalities for nemo-evaluator-launcher",
+        description="Info functionalities for nemo-evaluator-launcher",
     )
-    debug_parser.add_argument(
+    info_parser.add_argument(
         "-v", "--verbose", action="store_true", help="Enable verbose logging"
     )
-    debug_parser.add_arguments(debug.DebugCmd, dest="debug")
+    info_parser.add_arguments(info.InfoCmd, dest="info")
     return parser
@@ -218,8 +218,8 @@ def main() -> None:
             args.runs.execute()
     elif args.command == "export":
         args.export.execute()
-    elif args.command == "debug":
-        args.debug.execute()
+    elif args.command == "info":
+        args.info.execute()
 if __name__ == "__main__":

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/cli/run.py RENAMED Viewed

@@ -19,6 +19,15 @@ from dataclasses import dataclass
 from simple_parsing import field
+from nemo_evaluator_launcher.common.logging_utils import logger
+from nemo_evaluator_launcher.common.printing_utils import (
+    bold,
+    cyan,
+    green,
+    magenta,
+    red,
+)
 @dataclass
 class Cmd:
@@ -101,15 +110,10 @@ class Cmd:
         try:
             invocation_id = run_eval(config, self.dry_run)
         except Exception as e:
-            print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
+            print(red(f"✗ Job submission failed, see logs | Error: {e}"))
+            logger.error("Job submission failed", error=e)
             raise
-        # Print general success message with invocation ID
-        if invocation_id is not None and not self.dry_run:
-            print(
-                f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
-            )
         # Save the complete configuration
         if not self.dry_run and invocation_id is not None:
             # Determine config output directory
@@ -151,14 +155,22 @@ class Cmd:
                 f.write("#\n")
                 f.write(config_yaml)
-            print(f"Complete run config saved to: {config_path}")
+            print(bold(cyan("Complete run config saved to: ")) + f"\n  {config_path}\n")
+            logger.info("Saved complete config", path=config_path)
-        if invocation_id is not None:
-            print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
-            print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
+        # Print general success message with invocation ID and helpful commands
+        if invocation_id is not None and not self.dry_run:
+            print(
+                bold(cyan("To check status: "))
+                + f"nemo-evaluator-launcher status {invocation_id}"
+            )
+            print(
+                bold(cyan("To kill all jobs: "))
+                + f"nemo-evaluator-launcher kill {invocation_id}"
+            )
             # Show actual job IDs and task names
-            print("to kill individual jobs:")
+            print(bold(cyan("To kill individual jobs:")))
             # Access tasks - will work after normalization in run_eval
             tasks = (
                 config.evaluation.tasks
@@ -168,7 +180,21 @@ class Cmd:
             for idx, task in enumerate(tasks):
                 job_id = f"{invocation_id}.{idx}"
                 print(f"  nemo-evaluator-launcher kill {job_id}  # {task.name}")
+            print(
+                magenta(
+                    "(all commands accept shortened IDs as long as there are no conflicts)"
+                )
+            )
             print(
-                "to print all jobs: nemo-evaluator-launcher ls runs"
+                bold(cyan("To print all jobs: ")) + "nemo-evaluator-launcher ls runs"
                 "\n  (--since 1d or --since 6h for time span, see --help)"
             )
+            print(
+                green(
+                    bold(
+                        f"✓ Job submission successful | Invocation ID: {invocation_id}"
+                    )
+                )
+            )

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/cli/status.py RENAMED Viewed

@@ -17,6 +17,7 @@ from dataclasses import dataclass
 from simple_parsing import field
+import nemo_evaluator_launcher.common.printing_utils as pu
 from nemo_evaluator_launcher.executors.base import ExecutionState
@@ -143,17 +144,17 @@ class Cmd:
         """Format status with Unicode visual indicators only."""
         # Status mapping based on ExecutionState enum
         status_formats = {
-            ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m",  # Green Unicode checkmark
-            ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m",  # Red Unicode X
-            ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m",  # Yellow Unicode play button
-            ExecutionState.PENDING.value: "\033[36m⧗ PENDING\033[0m",  # Cyan Unicode hourglass (U+29D7)
-            ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m",  # Magenta Unicode X
+            ExecutionState.SUCCESS.value: pu.green("✓ SUCCESS"),
+            ExecutionState.FAILED.value: pu.red("✗ FAILED"),
+            ExecutionState.RUNNING.value: pu.yellow("▶ RUNNING"),
+            ExecutionState.PENDING.value: pu.cyan("⧗ PENDING"),
+            ExecutionState.KILLED.value: pu.magenta("✗ KILLED"),
             # Additional states for error handling
-            "not_found": "\033[90m? NOT FOUND\033[0m",  # Gray question mark
-            "error": "\033[31m✗ ERROR\033[0m",  # Red Unicode X
+            "not_found": pu.grey("? NOT FOUND"),
+            "error": pu.red("✗ ERROR"),
         }
-        return status_formats.get(status.lower(), f"\033[90m? {status.upper()}\033[0m")
+        return status_formats.get(status.lower(), pu.grey(status.upper()))
     def _strip_ansi_codes(self, text: str) -> str:
         """Remove ANSI color codes from text for length calculation."""

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/common/helpers.py RENAMED Viewed

@@ -16,6 +16,7 @@
 import base64
 import copy
 import datetime
+from dataclasses import dataclass
 from typing import Optional
 import yaml
@@ -24,9 +25,36 @@ from omegaconf import DictConfig, OmegaConf
 from nemo_evaluator_launcher.common.logging_utils import logger
-def _yaml_to_echo_command(yaml_str: str, filename: str = "config_ef.yaml") -> str:
+@dataclass(frozen=True)
+class CmdAndReadableComment:
+    """See the comment to `_yaml_to_echo_command`."""
+    # Actual command. Might include hard-to-debug elements such as base64-encoded
+    # configs.
+    cmd: str
+    # A debuggale readable comment that can be passed along for accompanying
+    # the actual command
+    debug: str
+def _yaml_to_echo_command(
+    yaml_str: str, filename: str = "config_ef.yaml"
+) -> CmdAndReadableComment:
+    """Create a safe (see below) echo command saving a yaml to file.
+    Safety in this context means the ability to pass such echo command through the
+    `bash -c '...'` boundaries for example.
+    Naturally, enconding with base64 creates debuggability issues. For that, the second
+    output of the function is the yaml string with bash comment signs prepended.
+    """
     yaml_str_b64 = base64.b64encode(yaml_str.encode("utf-8")).decode("utf-8")
-    return f'echo "{yaml_str_b64}" | base64 -d > {filename}'
+    debug_str = "\n".join(
+        [f"# Contents of {filename}"] + ["# " + s for s in yaml_str.splitlines()]
+    )
+    return CmdAndReadableComment(
+        cmd=f'echo "{yaml_str_b64}" | base64 -d > {filename}', debug=debug_str
+    )
 def get_eval_factory_config(
@@ -55,7 +83,7 @@ def get_eval_factory_config(
 def get_eval_factory_command(
     cfg: DictConfig, user_task_config: DictConfig, task_definition: dict
-) -> str:
+) -> CmdAndReadableComment:
     config_fields = get_eval_factory_config(cfg, user_task_config, task_definition)
     overrides = copy.deepcopy(dict(cfg.evaluation.get("overrides", {})))
@@ -80,7 +108,11 @@ def get_eval_factory_command(
     if overrides:
         eval_command = f"{eval_command} --overrides {overrides_str}"
-    return create_file_cmd + " && " + "cat config_ef.yaml && " + eval_command
+    # We return both the command and the debugging base64-decoded strings, useful
+    # for exposing when building scripts.
+    return CmdAndReadableComment(
+        cmd=create_file_cmd.cmd + " && " + eval_command, debug=create_file_cmd.debug
+    )
 def get_endpoint_url(

nemo_evaluator_launcher-0.1.18/src/nemo_evaluator_launcher/common/printing_utils.py ADDED Viewed

@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Printing utils for more structured or visually appealing prints.
+NOTE: use printing only for main application output that matters. For logging,
+see `logging_utils.py`.
+USAGE:
+```
+  from nemo_evaluator_launcher.common.printing_utils import red, bold
+  print(bold(red("some red bold")))
+```
+"""
+import os
+# If this env var is set, it will override a more standard "LOG_LEVEL". If
+# both are unset, default would be used.
+_DISABLE_COLOR_ENV_VAR = "NEMO_EVALUATOR_DISABLE_COLOR"
+def _is_color_disabled():
+    env_var = os.environ.get(_DISABLE_COLOR_ENV_VAR, "0").lower()
+    if "1" in env_var or "yes" in env_var or "y" in env_var or "true" in env_var:
+        return True
+    return False
+_CODES: dict[str, str] = dict(
+    green="\033[32m",
+    red="\033[31m",
+    red_bg="\033[41m",  # red background
+    cyan="\033[36m",
+    yellow="\033[33m",
+    magenta="\033[35m",
+    grey="\033[90m",
+    bold="\033[1m",
+    reset="\033[0m",
+)
+# If the colors are disabled, we null-out all the codes.
+if _is_color_disabled():
+    for c in _CODES.keys():
+        _CODES[c] = ""
+def green(s: str) -> str:
+    return _CODES["green"] + s + _CODES["reset"]
+def red(s: str) -> str:
+    return _CODES["red"] + s + _CODES["reset"]
+def red_bg(s: str) -> str:
+    return _CODES["red_bg"] + s + _CODES["reset"]
+def cyan(s: str) -> str:
+    return _CODES["cyan"] + s + _CODES["reset"]
+def yellow(s: str) -> str:
+    return _CODES["yellow"] + s + _CODES["reset"]
+def magenta(s: str) -> str:
+    return _CODES["magenta"] + s + _CODES["reset"]
+def grey(s: str) -> str:
+    return _CODES["grey"] + s + _CODES["reset"]
+def bold(s: str) -> str:
+    return _CODES["bold"] + s + _CODES["reset"]

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml RENAMED Viewed

@@ -14,16 +14,17 @@
 # limitations under the License.
 #
 # Each slurm cluster has its own flavour, below we provide some defaults that might meet one's needs.
-hostname: ???
-username: ${oc.env:USER}
-account: ???
+type: slurm              # Executor is chosen based on this field
+hostname: ???            # SLURM headnode (login) hostname (required)
+username: ${oc.env:USER} # Defaults to $USER env var
+account: ???             # SLURM account allocation (required)
+output_dir: ???          # Absolute path accessible on compute nodes (required)
 partition: batch
 num_nodes: 1
 ntasks_per_node: 1
 gres: gpu:8
 walltime: 01:00:00
 subproject: nemo-evaluator-launcher
-output_dir: ???
 env_vars:
   deployment: {}
   evaluation: {}

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/executors/lepton/executor.py RENAMED Viewed

@@ -406,7 +406,12 @@ class LeptonExecutor(BaseExecutor):
                     cfg.target.api_endpoint.url = full_endpoint_url
                     # Generate command with the correct endpoint URL
-                    eval_command = get_eval_factory_command(cfg, task, task_definition)
+                    eval_command_struct = get_eval_factory_command(
+                        cfg, task, task_definition
+                    )
+                    eval_command = eval_command_struct.cmd
+                    # Debug string for explainability of some base64-parts of the command
+                    eval_command_debug_comment = eval_command_struct.debug
                 finally:
                     # Restore original URL and struct mode
@@ -431,6 +436,7 @@ class LeptonExecutor(BaseExecutor):
                     task_name=task.name,
                     invocation_id=invocation_id,
                     eval_command=eval_command,  # Pass the fixed command
+                    eval_command_debug_comment=eval_command_debug_comment,
                 )
                 # Prepare job command to run the launch script
@@ -734,6 +740,7 @@ def _create_evaluation_launch_script(
     task_name: str,
     invocation_id: str,
     eval_command: str,
+    eval_command_debug_comment: str,
 ) -> str:
     """Create bash script for running evaluation in Lepton job container.
@@ -747,6 +754,7 @@ def _create_evaluation_launch_script(
         task_name: Name of the evaluation task.
         invocation_id: Unique invocation identifier.
         eval_command: The evaluation command with correct endpoint URL.
+        eval_command_debug_comment: The debug comment for placing into the script and easy debug
     Returns:
         String containing the bash launch script.
@@ -779,6 +787,8 @@ echo "Invocation ID: {invocation_id}"
 echo "Endpoint URL: {endpoint_url}"
 echo "Command: {eval_command_modified}"
+{eval_command_debug_comment}
 # Execute the evaluation with proper error handling
 set +e
 {eval_command_modified}

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/executors/local/executor.py RENAMED Viewed

@@ -47,6 +47,7 @@ from nemo_evaluator_launcher.common.mapping import (
     get_task_from_mapping,
     load_tasks_mapping,
 )
+from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey
 from nemo_evaluator_launcher.executors.base import (
     BaseExecutor,
     ExecutionState,
@@ -155,6 +156,16 @@ class LocalExecutor(BaseExecutor):
             task_output_dir = output_dir / task.name
             task_output_dir.mkdir(parents=True, exist_ok=True)
+            eval_factory_command_struct = get_eval_factory_command(
+                cfg, task, task_definition
+            )
+            eval_factory_command = eval_factory_command_struct.cmd
+            # The debug comment for placing into the script and easy debug. Reason
+            # (see `CmdAndReadableComment`) is the current way of passing the command
+            # is base64-encoded config `echo`-ed into file.
+            # TODO(agronskiy): cleaner way is to encode everything with base64, not
+            # some parts (like ef_config.yaml) and just output as logs somewhere.
+            eval_factory_command_debug_comment = eval_factory_command_struct.debug
             evaluation_task = {
                 "name": task.name,
                 "job_id": job_id,
@@ -162,9 +173,8 @@ class LocalExecutor(BaseExecutor):
                 "container_name": container_name,
                 "env_vars": env_vars,
                 "output_dir": task_output_dir,
-                "eval_factory_command": get_eval_factory_command(
-                    cfg, task, task_definition
-                ),
+                "eval_factory_command": eval_factory_command,
+                "eval_factory_command_debug_comment": eval_factory_command_debug_comment,
             }
             evaluation_tasks.append(evaluation_task)
@@ -198,23 +208,28 @@ class LocalExecutor(BaseExecutor):
         )
         if dry_run:
-            print("\n\n=============================================\n\n")
-            print(f"DRY RUN: Scripts prepared and saved to {output_dir}")
+            print(bold("\n\n=============================================\n\n"))
+            print(bold(cyan(f"DRY RUN: Scripts prepared and saved to {output_dir}")))
             if is_execution_mode_sequential:
                 print(
-                    "\n\n =========== Main script | run_all.sequential.sh ===================== \n\n"
+                    cyan(
+                        "\n\n=========== Main script | run_all.sequential.sh =====================\n\n"
+                    )
                 )
                 with open(output_dir / "run_all.sequential.sh", "r") as f:
-                    print(f.read())
+                    print(grey(f.read()))
             else:
                 for idx, task in enumerate(cfg.evaluation.tasks):
                     task_output_dir = output_dir / task.name
                     print(
-                        f"\n\n =========== Task script | {task.name}/run.sh ===================== \n\n"
+                        cyan(
+                            f"\n\n=========== Task script | {task.name}/run.sh =====================\n\n"
+                        )
                     )
                     with open(task_output_dir / "run.sh", "r") as f:
-                        print(f.read())
-            print("\nTo execute, run without --dry-run")
+                        print(grey(f.read()))
+            print(bold("\nTo execute, run without --dry-run"))
             return invocation_id
         # Save launched jobs metadata
@@ -284,13 +299,13 @@ class LocalExecutor(BaseExecutor):
                 error_msg = f"Script for {name} exited with code {exit_code}"
                 raise RuntimeError(f"Job startup failed | {error_msg}")
-        print("\nCommands for real-time monitoring:")
+        print(bold(cyan("\nCommands for real-time monitoring:")))
         for job_id, evaluation_task in zip(job_ids, evaluation_tasks):
             log_file = evaluation_task["output_dir"] / "logs" / "stdout.log"
             print(f"  tail -f {log_file}")
-        print("\nFollow all logs for this invocation:")
-        print(f"  tail -f {output_dir}/*/logs/stdout.log")
+        print(bold(cyan("\nFollow all logs for this invocation:")))
+        print(f"  tail -f {output_dir}/*/logs/stdout.log\n")
         return invocation_id

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/executors/local/run.template.sh RENAMED Viewed

@@ -40,6 +40,9 @@ else
     # Create pre-start stage file
     echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.pre-start"
+    # Debug contents of the eval factory command's config
+    {{ task.eval_factory_command_debug_comment | indent(4) }}
     # Docker run with eval factory command
     (
         echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running"
@@ -51,7 +54,7 @@ else
       {% endfor -%}
       {{ task.eval_image }} \
       bash -c '
-        {{ task.eval_factory_command }} ;
+        {{ task.eval_factory_command | indent(8) }} ;
         exit_code=$?
         chmod 777 -R /results;
         if [ "$exit_code" -ne 0 ]; then

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/executors/slurm/executor.py RENAMED Viewed

@@ -50,6 +50,7 @@ from nemo_evaluator_launcher.common.mapping import (
     get_task_from_mapping,
     load_tasks_mapping,
 )
+from nemo_evaluator_launcher.common.printing_utils import bold, cyan, grey
 from nemo_evaluator_launcher.executors.base import (
     BaseExecutor,
     ExecutionState,
@@ -130,13 +131,13 @@ class SlurmExecutor(BaseExecutor):
                 remote_runsub_paths.append(remote_runsub_path)
             if dry_run:
-                print("\n\n=============================================\n\n")
-                print("DRY RUN: SLURM scripts prepared")
+                print(bold("\n\n=============================================\n\n"))
+                print(bold(cyan("DRY RUN: SLURM scripts prepared")))
                 for idx, local_runsub_path in enumerate(local_runsub_paths):
-                    print(f"\n\n =========== Task {idx} ===================== \n\n")
+                    print(cyan(f"\n\n=========== Task {idx} =====================\n\n"))
                     with open(local_runsub_path, "r") as f:
-                        print(f.read())
-                print("\nTo submit jobs, run the executor without --dry-run")
+                        print(grey(f.read()))
+                print(bold("To submit jobs") + ", run the executor without --dry-run")
                 return invocation_id
             socket = str(Path(tmpdirname) / "socket")
@@ -589,7 +590,20 @@ def _create_slurm_sbatch_script(
     ):
         evaluation_mounts_list.append(f"{source_mnt}:{target_mnt}")
+    eval_factory_command_struct = get_eval_factory_command(cfg, task, task_definition)
+    eval_factory_command = eval_factory_command_struct.cmd
+    # The debug comment for placing into the script and easy debug. Reason
+    # (see `CmdAndReadableComment`) is the current way of passing the command
+    # is base64-encoded config `echo`-ed into file.
+    # TODO(agronskiy): cleaner way is to encode everything with base64, not
+    # some parts (like ef_config.yaml) and just output as logs somewhere.
+    eval_factory_command_debug_comment = eval_factory_command_struct.debug
     # add evaluation srun command
+    s += "# Debug contents of the eval factory command's config\n"
+    s += eval_factory_command_debug_comment
+    s += "\n\n"
     s += "# evaluation client\n"
     s += "srun --mpi pmix --overlap "
     s += "--container-image {} ".format(eval_image)
@@ -600,10 +614,11 @@ def _create_slurm_sbatch_script(
         s += "--container-env {} ".format(",".join(evaluation_env_var_names))
     if not cfg.execution.get("mounts", {}).get("mount_home", True):
         s += "--no-container-mount-home "
     s += "--container-mounts {} ".format(",".join(evaluation_mounts_list))
     s += "--output {} ".format(remote_task_subdir / "logs" / "client-%A.out")
-    s += "bash -c '"
-    s += get_eval_factory_command(cfg, task, task_definition)
+    s += "bash -c '\n"
+    s += eval_factory_command
     s += "'\n\n"
     # terminate the server after all evaluation clients finish

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher/package_info.py RENAMED Viewed

@@ -16,7 +16,7 @@
 # Below is the _next_ version that will be published, not the currently published one.
 MAJOR = 0
 MINOR = 1
-PATCH = 17
+PATCH = 18
 PRE_RELEASE = ""
 # Use the following formatting: (major, minor, patch, pre-release)

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nemo-evaluator-launcher
-Version: 0.1.17
+Version: 0.1.18
 Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
 Author: NVIDIA
 Author-email: nemo-toolkit@nvidia.com

{nemo_evaluator_launcher-0.1.17 → nemo_evaluator_launcher-0.1.18}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt RENAMED Viewed

@@ -14,8 +14,8 @@ src/nemo_evaluator_launcher/api/functional.py
 src/nemo_evaluator_launcher/api/types.py
 src/nemo_evaluator_launcher/api/utils.py
 src/nemo_evaluator_launcher/cli/__init__.py
-src/nemo_evaluator_launcher/cli/debug.py
 src/nemo_evaluator_launcher/cli/export.py
+src/nemo_evaluator_launcher/cli/info.py
 src/nemo_evaluator_launcher/cli/kill.py
 src/nemo_evaluator_launcher/cli/ls_runs.py
 src/nemo_evaluator_launcher/cli/ls_tasks.py
@@ -28,6 +28,7 @@ src/nemo_evaluator_launcher/common/execdb.py
 src/nemo_evaluator_launcher/common/helpers.py
 src/nemo_evaluator_launcher/common/logging_utils.py
 src/nemo_evaluator_launcher/common/mapping.py
+src/nemo_evaluator_launcher/common/printing_utils.py
 src/nemo_evaluator_launcher/configs/__init__.py
 src/nemo_evaluator_launcher/configs/default.yaml
 src/nemo_evaluator_launcher/configs/deployment/generic.yaml