PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.28__py3-none-any.whl - Mend

nemo-evaluator-launcher 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (60) hide show

nemo_evaluator_launcher/__init__.py +79 -0
nemo_evaluator_launcher/api/__init__.py +24 -0
nemo_evaluator_launcher/api/functional.py +698 -0
nemo_evaluator_launcher/api/types.py +98 -0
nemo_evaluator_launcher/api/utils.py +19 -0
nemo_evaluator_launcher/cli/__init__.py +15 -0
nemo_evaluator_launcher/cli/export.py +267 -0
nemo_evaluator_launcher/cli/info.py +512 -0
nemo_evaluator_launcher/cli/kill.py +41 -0
nemo_evaluator_launcher/cli/ls_runs.py +134 -0
nemo_evaluator_launcher/cli/ls_tasks.py +136 -0
nemo_evaluator_launcher/cli/main.py +226 -0
nemo_evaluator_launcher/cli/run.py +200 -0
nemo_evaluator_launcher/cli/status.py +164 -0
nemo_evaluator_launcher/cli/version.py +55 -0
nemo_evaluator_launcher/common/__init__.py +16 -0
nemo_evaluator_launcher/common/execdb.py +283 -0
nemo_evaluator_launcher/common/helpers.py +366 -0
nemo_evaluator_launcher/common/logging_utils.py +357 -0
nemo_evaluator_launcher/common/mapping.py +295 -0
nemo_evaluator_launcher/common/printing_utils.py +93 -0
nemo_evaluator_launcher/configs/__init__.py +15 -0
nemo_evaluator_launcher/configs/default.yaml +28 -0
nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
nemo_evaluator_launcher/configs/deployment/vllm.yaml +42 -0
nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
nemo_evaluator_launcher/configs/execution/local.yaml +19 -0
nemo_evaluator_launcher/configs/execution/slurm/default.yaml +34 -0
nemo_evaluator_launcher/executors/__init__.py +22 -0
nemo_evaluator_launcher/executors/base.py +120 -0
nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +609 -0
nemo_evaluator_launcher/executors/lepton/executor.py +1004 -0
nemo_evaluator_launcher/executors/lepton/job_helpers.py +398 -0
nemo_evaluator_launcher/executors/local/__init__.py +15 -0
nemo_evaluator_launcher/executors/local/executor.py +605 -0
nemo_evaluator_launcher/executors/local/run.template.sh +103 -0
nemo_evaluator_launcher/executors/registry.py +38 -0
nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
nemo_evaluator_launcher/executors/slurm/executor.py +1147 -0
nemo_evaluator_launcher/exporters/__init__.py +36 -0
nemo_evaluator_launcher/exporters/base.py +121 -0
nemo_evaluator_launcher/exporters/gsheets.py +409 -0
nemo_evaluator_launcher/exporters/local.py +502 -0
nemo_evaluator_launcher/exporters/mlflow.py +619 -0
nemo_evaluator_launcher/exporters/registry.py +40 -0
nemo_evaluator_launcher/exporters/utils.py +624 -0
nemo_evaluator_launcher/exporters/wandb.py +490 -0
nemo_evaluator_launcher/package_info.py +38 -0
nemo_evaluator_launcher/resources/mapping.toml +380 -0
nemo_evaluator_launcher-0.1.28.dist-info/METADATA +494 -0
nemo_evaluator_launcher-0.1.28.dist-info/RECORD +60 -0
nemo_evaluator_launcher-0.1.28.dist-info/WHEEL +5 -0
nemo_evaluator_launcher-0.1.28.dist-info/entry_points.txt +3 -0
nemo_evaluator_launcher-0.1.28.dist-info/licenses/LICENSE +451 -0
nemo_evaluator_launcher-0.1.28.dist-info/top_level.txt +1 -0

nemo_evaluator_launcher/cli/ls_tasks.py ADDED Viewed

@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from collections import defaultdict
+from dataclasses import dataclass
+from simple_parsing import field
+@dataclass
+class Cmd:
+    """List command configuration."""
+    json: bool = field(
+        default=False,
+        action="store_true",
+        help="Print output as JSON instead of table format",
+    )
+    def execute(self) -> None:
+        # Import heavy dependencies only when needed
+        import json
+        from nemo_evaluator_launcher.api.functional import get_tasks_list
+        # TODO(dfridman): modify `get_tasks_list` to return a list of dicts in the first place
+        data = get_tasks_list()
+        headers = ["task", "endpoint_type", "harness", "container"]
+        supported_benchmarks = []
+        for task_data in data:
+            assert len(task_data) == len(headers)
+            supported_benchmarks.append(dict(zip(headers, task_data)))
+        if self.json:
+            print(json.dumps({"tasks": supported_benchmarks}, indent=2))
+        else:
+            self._print_table(supported_benchmarks)
+    def _print_table(self, tasks: list[dict]) -> None:
+        """Print tasks grouped by harness and container in table format."""
+        if not tasks:
+            print("No tasks found.")
+            return
+        # Group tasks by harness and container
+        grouped = defaultdict(lambda: defaultdict(list))
+        for task in tasks:
+            harness = task["harness"]
+            container = task["container"]
+            grouped[harness][container].append(task)
+        # Print grouped tables
+        for i, (harness, containers) in enumerate(grouped.items()):
+            if i > 0:
+                print()  # Extra spacing between harnesses
+            for j, (container, container_tasks) in enumerate(containers.items()):
+                if j > 0:
+                    print()  # Spacing between containers
+                # Prepare task table first to get column widths
+                task_headers = ["task", "endpoint_type"]
+                rows = []
+                for task in container_tasks:
+                    rows.append([task["task"], task["endpoint_type"]])
+                # Sort tasks alphabetically for better readability
+                rows.sort(key=lambda x: x[0])
+                # Calculate column widths with some padding
+                widths = [
+                    max(len(task_headers[i]), max(len(str(row[i])) for row in rows)) + 2
+                    for i in range(len(task_headers))
+                ]
+                # Calculate minimum table width based on task columns
+                min_table_width = sum(widths) + len(widths) + 1
+                # Calculate required width for header content
+                harness_line = f"harness: {harness}"
+                container_line = f"container: {container}"
+                header_content_width = (
+                    max(len(harness_line), len(container_line)) + 4
+                )  # +4 for "| " and " |"
+                # Use the larger of the two widths
+                table_width = max(min_table_width, header_content_width)
+                # Print combined header with harness and container info
+                print("=" * table_width)
+                print(f"{harness_line}")
+                print(f"{container_line}")
+                # Adjust column widths to fill the full table width
+                available_width = table_width
+                # Give more space to the first column (task names can be long)
+                adjusted_widths = [
+                    max(
+                        widths[0], available_width * 2 // 3
+                    ),  # 2/3 of available width for task
+                    0,  # Will be calculated as remainder
+                ]
+                adjusted_widths[1] = (
+                    available_width - adjusted_widths[0]
+                )  # Remainder for endpoint_type
+                # Print task table header separator
+                print(" " * table_width)
+                header_row = f"{task_headers[0]:<{adjusted_widths[0]}}{task_headers[1]:<{adjusted_widths[1]}}"
+                print(header_row)
+                print("-" * table_width)
+                # Print task rows
+                for row in rows:
+                    data_row = f"{str(row[0]):<{adjusted_widths[0]}}{str(row[1]):<{adjusted_widths[1]}}"
+                    print(data_row)
+                print("-" * table_width)
+                # Show task count
+                task_count = len(rows)
+                print(f"  {task_count} task{'s' if task_count != 1 else ''} available")
+                print("=" * table_width)
+                print()

nemo_evaluator_launcher/cli/main.py ADDED Viewed

@@ -0,0 +1,226 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Main CLI module using simple-parsing with subcommands."""
+import os
+from simple_parsing import ArgumentParser
+import nemo_evaluator_launcher.cli.export as export
+import nemo_evaluator_launcher.cli.info as info
+import nemo_evaluator_launcher.cli.kill as kill
+import nemo_evaluator_launcher.cli.ls_runs as ls_runs
+import nemo_evaluator_launcher.cli.ls_tasks as ls_tasks
+import nemo_evaluator_launcher.cli.run as run
+import nemo_evaluator_launcher.cli.status as status
+import nemo_evaluator_launcher.cli.version as version
+from nemo_evaluator_launcher.common.logging_utils import logger
+VERSION_HELP = "Show version information"
+def is_verbose_enabled(args) -> bool:
+    """Check if verbose flag is enabled in any subcommand."""
+    # Check global verbose flag
+    if hasattr(args, "verbose") and args.verbose:
+        return True
+    # Check subcommand verbose flags
+    subcommands = [
+        "run",
+        "status",
+        "info",
+        "kill",
+        "tasks_alias",
+        "tasks",
+        "runs",
+        "export",
+    ]
+    for subcmd in subcommands:
+        if hasattr(args, subcmd) and hasattr(getattr(args, subcmd), "verbose"):
+            if getattr(getattr(args, subcmd), "verbose"):
+                return True
+    return False
+def create_parser() -> ArgumentParser:
+    """Create and configure the CLI argument parser with subcommands."""
+    parser = ArgumentParser()
+    # Add --version flag at the top level
+    parser.add_argument("--version", action="store_true", help=VERSION_HELP)
+    # Add --verbose/-v flag for debug logging
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=False)
+    # Version subcommand
+    version_parser = subparsers.add_parser(
+        "version",
+        help=VERSION_HELP,
+        description=VERSION_HELP,
+    )
+    version_parser.add_arguments(version.Cmd, dest="version")
+    # Run subcommand
+    run_parser = subparsers.add_parser(
+        "run", help="Run evaluation", description="Run evaluation"
+    )
+    run_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    run_parser.add_arguments(run.Cmd, dest="run")
+    # Status subcommand
+    status_parser = subparsers.add_parser(
+        "status", help="Check job status", description="Check job status"
+    )
+    status_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    status_parser.add_arguments(status.Cmd, dest="status")
+    # Kill subcommand
+    kill_parser = subparsers.add_parser(
+        "kill",
+        help="Kill a job or invocation",
+        description="Kill a job (e.g., aefc4819.0) or entire invocation (e.g., aefc4819) by its ID",
+    )
+    kill_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    kill_parser.add_arguments(kill.Cmd, dest="kill")
+    # Ls subcommand (with nested subcommands)
+    ls_parser = subparsers.add_parser(
+        "ls", help="List resources", description="List tasks or runs"
+    )
+    ls_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    # Add arguments from `ls tasks` so that they work with `ls` as default alias
+    ls_parser.add_arguments(ls_tasks.Cmd, dest="tasks_alias")
+    ls_sub = ls_parser.add_subparsers(dest="ls_command", required=False)
+    # ls tasks (default)
+    ls_tasks_parser = ls_sub.add_parser(
+        "tasks", help="List available tasks", description="List available tasks"
+    )
+    ls_tasks_parser.add_arguments(ls_tasks.Cmd, dest="tasks")
+    # ls runs (invocations summary)
+    ls_runs_parser = ls_sub.add_parser(
+        "runs",
+        help="List invocations (runs)",
+        description="Show a concise table of invocations from the exec DB",
+    )
+    ls_runs_parser.add_arguments(ls_runs.Cmd, dest="runs")
+    # Export subcommand
+    export_parser = subparsers.add_parser(
+        "export",
+        help="Export evaluation results",
+        description="Export evaluation results takes a List of invocation ids and a list of destinations(local, gitlab, wandb)",
+    )
+    export_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
+    )
+    export_parser.add_arguments(export.ExportCmd, dest="export")
+    # Info subcommand
+    info_parser = subparsers.add_parser(
+        "info",
+        help="Display evaluation job information",
+        description="Info functionalities for nemo-evaluator-launcher",
+    )
+    info_parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable verbose logging"
+    )
+    info_parser.add_arguments(info.InfoCmd, dest="info")
+    return parser
+def main() -> None:
+    """Main CLI entry point with subcommands."""
+    parser = create_parser()
+    args = parser.parse_args()
+    # Handle --verbose flag
+    if is_verbose_enabled(args):
+        os.environ["LOG_LEVEL"] = "DEBUG"
+    # Handle --version flag
+    if hasattr(args, "version") and args.version:
+        version_cmd = version.Cmd()
+        version_cmd.execute()
+        return
+    # Handle case where no command is provided but --version wasn't used
+    if not hasattr(args, "command") or args.command is None:
+        parser.print_help()
+        return
+    logger.debug("Parsed arguments", args=args)
+    if args.command == "version":
+        args.version.execute()
+    elif args.command == "run":
+        args.run.execute()
+    elif args.command == "status":
+        args.status.execute()
+    elif args.command == "kill":
+        args.kill.execute()
+    elif args.command == "ls":
+        # Dispatch nested ls subcommands
+        if args.ls_command is None or args.ls_command == "tasks":
+            # Default to tasks when no subcommand specified
+            if hasattr(args, "tasks_alias"):
+                args.tasks_alias.execute()
+            else:
+                args.tasks.execute()
+        elif args.ls_command == "runs":
+            args.runs.execute()
+    elif args.command == "export":
+        args.export.execute()
+    elif args.command == "info":
+        args.info.execute()
+if __name__ == "__main__":
+    main()

nemo_evaluator_launcher/cli/run.py ADDED Viewed

@@ -0,0 +1,200 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import pathlib
+import time
+from dataclasses import dataclass
+from simple_parsing import field
+from nemo_evaluator_launcher.common.logging_utils import logger
+from nemo_evaluator_launcher.common.printing_utils import (
+    bold,
+    cyan,
+    green,
+    magenta,
+    red,
+)
+@dataclass
+class Cmd:
+    """Run command parameters"""
+    config_name: str = field(
+        default="default",
+        alias=["-c", "--config-name"],
+        metadata={
+            "help": "Config name to use. Consult `nemo_evaluator_launcher.configs`"
+        },
+    )
+    config_dir: str | None = field(
+        default=None,
+        alias=["-d", "--config-dir"],
+        metadata={
+            "help": "Path to user config directory. If provided, searches here first, then falls back to internal configs."
+        },
+    )
+    run_config_file: str | None = field(
+        default=None,
+        alias=["-f", "--run-config-file"],
+        metadata={
+            "help": "Path to a run config file to load directly (bypasses Hydra config loading)."
+        },
+    )
+    override: list[str] = field(
+        default_factory=list,
+        action="append",
+        nargs="?",
+        alias=["-o"],
+        metadata={
+            "help": "Hydra override in the form some.param.path=value (pass multiple `-o` for multiple overrides).",
+        },
+    )
+    dry_run: bool = field(
+        default=False,
+        alias=["-n", "--dry-run"],
+        metadata={"help": "Do not run the evaluation, just print the config."},
+    )
+    config_output: str | None = field(
+        default=None,
+        alias=["--config-output"],
+        metadata={
+            "help": "Directory to save the complete run config. Defaults to ~/.nemo-evaluator/run_configs/"
+        },
+    )
+    def execute(self) -> None:
+        # Import heavy dependencies only when needed
+        import yaml
+        from omegaconf import OmegaConf
+        from nemo_evaluator_launcher.api.functional import RunConfig, run_eval
+        # Load configuration either from Hydra or from a run config file
+        if self.run_config_file:
+            # Validate that run config file is not used with other config options
+            if self.config_name != "default":
+                raise ValueError("Cannot use --run-config-file with --config-name")
+            if self.config_dir is not None:
+                raise ValueError("Cannot use --run-config-file with --config-dir")
+            if self.override:
+                raise ValueError("Cannot use --run-config-file with --override")
+            # Load from run config file
+            with open(self.run_config_file, "r") as f:
+                config_dict = yaml.safe_load(f)
+            # Create RunConfig from the loaded data
+            config = OmegaConf.create(config_dict)
+        else:
+            # Load the complete Hydra configuration
+            config = RunConfig.from_hydra(
+                config_name=self.config_name,
+                hydra_overrides=self.override,
+                config_dir=self.config_dir,
+            )
+        try:
+            invocation_id = run_eval(config, self.dry_run)
+        except Exception as e:
+            print(red(f"✗ Job submission failed, see logs | Error: {e}"))
+            logger.error("Job submission failed", error=e)
+            raise
+        # Save the complete configuration
+        if not self.dry_run and invocation_id is not None:
+            # Determine config output directory
+            if self.config_output:
+                # Use custom directory specified by --config-output
+                config_dir = pathlib.Path(self.config_output)
+            else:
+                # Default to original location: ~/.nemo-evaluator/run_configs
+                home_dir = pathlib.Path.home()
+                config_dir = home_dir / ".nemo-evaluator" / "run_configs"
+            # Ensure the directory exists
+            config_dir.mkdir(parents=True, exist_ok=True)
+            # Convert DictConfig to dict and save as YAML
+            config_dict = OmegaConf.to_container(config, resolve=True)
+            config_yaml = yaml.dump(
+                config_dict, default_flow_style=False, sort_keys=False, indent=2
+            )
+            # Create config filename with invocation ID
+            config_filename = f"{invocation_id}_config.yml"
+            config_path = config_dir / config_filename
+            # Save the complete Hydra configuration
+            with open(config_path, "w") as f:
+                f.write("# Complete configuration from nemo-evaluator-launcher\n")
+                f.write(
+                    f"# Generated at: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n"
+                )
+                f.write(f"# Invocation ID: {invocation_id}\n")
+                f.write("#\n")
+                f.write("# This is the complete raw configuration\n")
+                f.write("#\n")
+                f.write("# To rerun this exact configuration:\n")
+                f.write(
+                    f"# nemo-evaluator-launcher run --run-config-file {config_path}\n"
+                )
+                f.write("#\n")
+                f.write(config_yaml)
+            print(bold(cyan("Complete run config saved to: ")) + f"\n  {config_path}\n")
+            logger.info("Saved complete config", path=config_path)
+        # Print general success message with invocation ID and helpful commands
+        if invocation_id is not None and not self.dry_run:
+            print(
+                bold(cyan("To check status: "))
+                + f"nemo-evaluator-launcher status {invocation_id}"
+            )
+            print(
+                bold(cyan("To kill all jobs: "))
+                + f"nemo-evaluator-launcher kill {invocation_id}"
+            )
+            # Show actual job IDs and task names
+            print(bold(cyan("To kill individual jobs:")))
+            # Access tasks - will work after normalization in run_eval
+            tasks = (
+                config.evaluation.tasks
+                if hasattr(config.evaluation, "tasks")
+                else config.evaluation
+            )
+            for idx, task in enumerate(tasks):
+                job_id = f"{invocation_id}.{idx}"
+                print(f"  nemo-evaluator-launcher kill {job_id}  # {task.name}")
+            print(
+                magenta(
+                    "(all commands accept shortened IDs as long as there are no conflicts)"
+                )
+            )
+            print(
+                bold(cyan("To print all jobs: ")) + "nemo-evaluator-launcher ls runs"
+                "\n  (--since 1d or --since 6h for time span, see --help)"
+            )
+            print(
+                green(
+                    bold(
+                        f"✓ Job submission successful | Invocation ID: {invocation_id}"
+                    )
+                )
+            )