nemo-evaluator-launcher 0.1.44__tar.gz → 0.1.64__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/PKG-INFO +2 -1
  2. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/pyproject.toml +1 -0
  3. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/api/functional.py +54 -4
  4. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/api/types.py +21 -14
  5. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/cli/ls_task.py +280 -0
  6. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/cli/ls_tasks.py +289 -0
  7. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/main.py +17 -2
  8. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/run.py +43 -52
  9. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  10. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  11. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  12. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  13. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  14. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/common/helpers.py +44 -28
  15. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/common/mapping.py +284 -0
  16. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/common/printing_utils.py +18 -12
  17. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +3 -1
  18. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/lepton/executor.py +26 -8
  19. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/local/executor.py +6 -2
  20. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/slurm/executor.py +267 -19
  21. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/package_info.py +1 -1
  22. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  23. nemo_evaluator_launcher-0.1.64/src/nemo_evaluator_launcher/resources/mapping.toml +93 -0
  24. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +2 -1
  25. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +7 -0
  26. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/requires.txt +1 -0
  27. nemo_evaluator_launcher-0.1.44/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -136
  28. nemo_evaluator_launcher-0.1.44/src/nemo_evaluator_launcher/common/mapping.py +0 -295
  29. nemo_evaluator_launcher-0.1.44/src/nemo_evaluator_launcher/resources/mapping.toml +0 -385
  30. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/LICENSE +0 -0
  31. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/README.md +0 -0
  32. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/setup.cfg +0 -0
  33. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/__init__.py +0 -0
  34. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
  35. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/api/utils.py +0 -0
  36. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
  37. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/export.py +0 -0
  38. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/info.py +0 -0
  39. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
  40. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/logs.py +0 -0
  41. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
  42. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/status.py +0 -0
  43. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/cli/version.py +0 -0
  44. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
  45. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/common/execdb.py +0 -0
  46. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
  47. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
  48. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
  49. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +0 -0
  50. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
  51. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
  52. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +0 -0
  53. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
  54. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
  55. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
  56. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
  57. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
  58. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/base.py +0 -0
  59. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
  60. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +0 -0
  61. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
  62. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
  63. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/local/run.template.sh +0 -0
  64. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
  65. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
  66. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +0 -0
  67. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
  68. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/base.py +0 -0
  69. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/gsheets.py +0 -0
  70. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/local.py +0 -0
  71. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/mlflow.py +0 -0
  72. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
  73. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/utils.py +0 -0
  74. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher/exporters/wandb.py +0 -0
  75. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
  76. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
  77. {nemo_evaluator_launcher-0.1.44 → nemo_evaluator_launcher-0.1.64}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.44
3
+ Version: 0.1.64
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -467,6 +467,7 @@ License-File: LICENSE
467
467
  Requires-Dist: hydra-core<2.0.0,>=1.3.2
468
468
  Requires-Dist: jinja2<4.0.0,>=3.1.6
469
469
  Requires-Dist: leptonai>=0.25.0
470
+ Requires-Dist: nemo-evaluator
470
471
  Requires-Dist: pyyaml>=6.0.0
471
472
  Requires-Dist: requests>=2.32.4
472
473
  Requires-Dist: simple-parsing<0.2.0,>=0.1.7
@@ -4,6 +4,7 @@ dependencies = [
4
4
  "hydra-core>=1.3.2,<2.0.0",
5
5
  "jinja2>=3.1.6,<4.0.0",
6
6
  "leptonai>=0.25.0",
7
+ "nemo-evaluator",
7
8
  "pyyaml>=6.0.0",
8
9
  "requests>=2.32.4",
9
10
  "simple-parsing>=0.1.7,<0.2.0",
@@ -18,6 +18,7 @@
18
18
  This module provides the main functional entry points for running evaluations, querying job status, and listing available tasks. These functions are intended to be used by CLI commands and external integrations.
19
19
  """
20
20
 
21
+ import copy
21
22
  from pathlib import Path
22
23
  from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
23
24
 
@@ -35,7 +36,7 @@ def get_tasks_list() -> list[list[Any]]:
35
36
  """Get a list of available tasks from the mapping.
36
37
 
37
38
  Returns:
38
- list[list[Any]]: Each sublist contains task name, endpoint type, harness, and container.
39
+ list[list[Any]]: Each sublist contains task name, endpoint type, harness, container, arch, description, and type.
39
40
  """
40
41
  mapping = load_tasks_mapping()
41
42
  data = [
@@ -44,6 +45,9 @@ def get_tasks_list() -> list[list[Any]]:
44
45
  task_data.get("endpoint_type"),
45
46
  task_data.get("harness"),
46
47
  task_data.get("container"),
48
+ task_data.get("arch", ""),
49
+ task_data.get("description", ""),
50
+ task_data.get("type", ""),
47
51
  ]
48
52
  for task_data in mapping.values()
49
53
  ]
@@ -75,12 +79,54 @@ def _validate_no_missing_values(cfg: Any, path: str = "") -> None:
75
79
  _validate_no_missing_values(value, current_path)
76
80
 
77
81
 
78
- def run_eval(cfg: RunConfig, dry_run: bool = False) -> Optional[str]:
82
+ def filter_tasks(cfg: RunConfig, task_names: list[str]) -> RunConfig:
83
+ """Filter evaluation tasks to only include specified task names.
84
+
85
+ Args:
86
+ cfg: The configuration object for the evaluation run.
87
+ task_names: List of task names to include (e.g., ["ifeval", "gsm8k"]).
88
+
89
+ Returns:
90
+ RunConfig: A new configuration with filtered tasks (input is not mutated).
91
+
92
+ Raises:
93
+ ValueError: If any requested task is not found in config or no tasks defined.
94
+ """
95
+ if not task_names:
96
+ return cfg
97
+
98
+ if not hasattr(cfg.evaluation, "tasks") or not cfg.evaluation.tasks:
99
+ raise ValueError("No tasks defined in config. Cannot filter tasks.")
100
+
101
+ requested_tasks = set(task_names)
102
+ original_tasks = cfg.evaluation.tasks
103
+ filtered_tasks = [task for task in original_tasks if task.name in requested_tasks]
104
+
105
+ # Fail if ANY requested tasks are not found
106
+ found_names = {task.name for task in filtered_tasks}
107
+ not_found = requested_tasks - found_names
108
+ if not_found:
109
+ available = [task.name for task in original_tasks]
110
+ raise ValueError(
111
+ f"Requested task(s) not found in config: {sorted(not_found)}. "
112
+ f"Available tasks: {available}"
113
+ )
114
+
115
+ # Create a deep copy to preserve input immutability
116
+ result = copy.deepcopy(cfg)
117
+ result.evaluation.tasks = filtered_tasks
118
+ return result
119
+
120
+
121
+ def run_eval(
122
+ cfg: RunConfig, dry_run: bool = False, tasks: Optional[list[str]] = None
123
+ ) -> Optional[str]:
79
124
  """Run evaluation with specified config and overrides.
80
125
 
81
126
  Args:
82
127
  cfg: The configuration object for the evaluation run.
83
128
  dry_run: If True, do not run the evaluation, just prepare scripts and save them.
129
+ tasks: Optional list of task names to run. If provided, only these tasks will be executed.
84
130
 
85
131
  Returns:
86
132
  Optional[str]: The invocation ID for the evaluation run.
@@ -89,6 +135,10 @@ def run_eval(cfg: RunConfig, dry_run: bool = False) -> Optional[str]:
89
135
  ValueError: If configuration validation fails or MISSING values are found.
90
136
  RuntimeError: If the executor fails to start the evaluation.
91
137
  """
138
+ # Filter tasks if specified
139
+ if tasks:
140
+ cfg = filter_tasks(cfg, tasks)
141
+
92
142
  # Validate that no MISSING values exist in the configuration
93
143
  _validate_no_missing_values(cfg)
94
144
 
@@ -285,7 +335,7 @@ def stream_logs(
285
335
 
286
336
  # Collect all jobs from all IDs, grouped by executor
287
337
  executor_to_jobs: Dict[str, Dict[str, JobData]] = {}
288
- executor_to_invocations: Dict[str, List[str]] = {}
338
+ executor_to_invocations: Dict[str, list[str]] = {}
289
339
 
290
340
  # TODO(agronskiy): refactor the `.`-checking job in all the functions.
291
341
  for id_or_prefix in ids_or_prefixes:
@@ -517,7 +567,7 @@ def kill_job_or_invocation(id: str) -> list[dict[str, Any]]:
517
567
 
518
568
 
519
569
  def export_results(
520
- invocation_ids: Union[str, List[str]],
570
+ invocation_ids: Union[str, list[str]],
521
571
  dest: str = "local",
522
572
  config: dict[Any, Any] | None = None,
523
573
  ) -> dict:
@@ -18,7 +18,7 @@
18
18
  This module defines data structures and helpers for configuration and type safety in the API layer.
19
19
  """
20
20
 
21
- import os
21
+ import pathlib
22
22
  import warnings
23
23
  from dataclasses import dataclass
24
24
  from typing import cast
@@ -42,33 +42,40 @@ from nemo_evaluator_launcher.common.logging_utils import logger
42
42
  class RunConfig(DictConfig):
43
43
  @staticmethod
44
44
  def from_hydra(
45
- config_name: str = "default",
46
- config_dir: str | None = None,
47
- hydra_overrides: list[str] = [],
48
- dict_overrides: dict = {},
45
+ config: str | None = None,
46
+ hydra_overrides: list[str] | None = None,
47
+ dict_overrides: dict | None = None,
49
48
  ) -> "RunConfig":
50
49
  """Load configuration from Hydra and merge with dictionary overrides.
51
50
 
52
51
  Args:
53
- config_name: Name of the Hydra configuration to load.
52
+ config: Optional full path to a config file (e.g. /path/to/my_config.yaml).
53
+ If omitted, loads the internal default config from
54
+ `nemo_evaluator_launcher.configs`.
54
55
  hydra_overrides: List of Hydra command-line style overrides.
55
56
  dict_overrides: Dictionary of configuration overrides to merge.
56
- config_dir: Optional path to user config directory. If provided, Hydra will
57
- search in this directory first, then fall back to internal configs.
58
57
 
59
58
  Returns:
60
59
  RunConfig: Merged configuration object.
61
60
  """
62
- overrides = hydra_overrides.copy()
61
+ overrides = list(hydra_overrides or [])
62
+ dict_overrides = dict_overrides or {}
63
+
64
+ resolved_config_path: str | None = None
65
+ config_name = "default"
66
+
63
67
  # Check if a GlobalHydra instance is already initialized and clear it
64
68
  if GlobalHydra.instance().is_initialized():
65
69
  GlobalHydra.instance().clear()
66
70
 
67
- if config_dir:
68
- # Convert relative path to absolute path if needed
69
- if not os.path.isabs(config_dir):
70
- config_dir = os.path.abspath(config_dir)
71
+ if config:
72
+ config_path = pathlib.Path(config).expanduser()
73
+ if not config_path.is_absolute():
74
+ config_path = (pathlib.Path.cwd() / config_path).resolve()
75
+ resolved_config_path = str(config_path)
71
76
 
77
+ config_dir = str(config_path.parent)
78
+ config_name = str(config_path.stem)
72
79
  hydra.initialize_config_dir(
73
80
  config_dir=config_dir,
74
81
  version_base=None,
@@ -90,7 +97,7 @@ class RunConfig(DictConfig):
90
97
  logger.debug(
91
98
  "Loaded run config from hydra",
92
99
  config_name=config_name,
93
- config_dir=config_dir,
100
+ config=resolved_config_path,
94
101
  overrides=hydra_overrides,
95
102
  dict_overrides=dict_overrides,
96
103
  result=cfg,
@@ -0,0 +1,280 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """CLI command for listing task details."""
17
+
18
+ import json
19
+ from dataclasses import dataclass
20
+
21
+ import yaml
22
+ from simple_parsing import field
23
+
24
+ from nemo_evaluator_launcher.common.container_metadata import (
25
+ TaskIntermediateRepresentation,
26
+ load_tasks_from_tasks_file,
27
+ )
28
+ from nemo_evaluator_launcher.common.logging_utils import logger
29
+ from nemo_evaluator_launcher.common.mapping import load_tasks_mapping
30
+ from nemo_evaluator_launcher.common.printing_utils import (
31
+ bold,
32
+ cyan,
33
+ magenta,
34
+ yellow,
35
+ )
36
+
37
+
38
+ @dataclass
39
+ class Cmd:
40
+ """List task command configuration."""
41
+
42
+ task_identifier: str = field(
43
+ default="",
44
+ positional=True,
45
+ help="Task identifier in format '[harness.]task_name'. If empty, shows all tasks.",
46
+ )
47
+ json: bool = field(
48
+ default=False,
49
+ action="store_true",
50
+ help="Print output as JSON instead of formatted text",
51
+ )
52
+ tasks_file: str = field(
53
+ default="",
54
+ help="Path to all_tasks_irs.yaml file (default: auto-detect)",
55
+ )
56
+ from_container: str = field(
57
+ default="",
58
+ help="Load tasks from container image (e.g., nvcr.io/nvidia/eval-factory/simple-evals:25.10). "
59
+ "If provided, extracts framework.yml from container and loads tasks on-the-fly instead of using all_tasks_irs.yaml",
60
+ )
61
+
62
+ def execute(self) -> None:
63
+ """Execute the ls task command."""
64
+ import pathlib
65
+
66
+ # Initialize tasks_path to None - it will be set when loading from file
67
+ tasks_path = None
68
+
69
+ # If --from is provided, load tasks from container
70
+ if self.from_container:
71
+ from nemo_evaluator_launcher.common.container_metadata import (
72
+ load_tasks_from_container,
73
+ )
74
+
75
+ try:
76
+ tasks = load_tasks_from_container(self.from_container)
77
+ except ValueError as e:
78
+ print(f"Error: {e}")
79
+ return
80
+ except Exception as e:
81
+ logger.error(
82
+ "Failed to load tasks from container",
83
+ container=self.from_container,
84
+ error=str(e),
85
+ exc_info=True,
86
+ )
87
+ return
88
+
89
+ if not tasks:
90
+ logger.error(
91
+ "No tasks found in container",
92
+ container=self.from_container,
93
+ )
94
+ return
95
+
96
+ logger.debug(
97
+ "Loaded tasks from container",
98
+ container=self.from_container,
99
+ num_tasks=len(tasks),
100
+ containers=set(task.container for task in tasks),
101
+ )
102
+ mapping_verified = True # Tasks from container are always verified
103
+ else:
104
+ # Default behavior: load from all_tasks_irs.yaml
105
+ if self.tasks_file:
106
+ tasks_path = pathlib.Path(self.tasks_file)
107
+ if not tasks_path.exists():
108
+ logger.error("Tasks file not found", path=str(tasks_path))
109
+ return
110
+
111
+ # Load tasks
112
+ try:
113
+ tasks, mapping_verified = load_tasks_from_tasks_file(tasks_path)
114
+ except Exception as e:
115
+ print(f"Error loading tasks: {e}")
116
+ import traceback
117
+
118
+ traceback.print_exc()
119
+ logger.error("Failed to load tasks", error=str(e), exc_info=True)
120
+ return
121
+
122
+ # Display warning if mapping is not verified
123
+ if not mapping_verified:
124
+ print(
125
+ yellow(
126
+ "⚠ Warning: Tasks are from unverified mapping (mapping.toml checksum mismatch)"
127
+ )
128
+ )
129
+ print(
130
+ yellow(
131
+ " Consider regenerating all_tasks_irs.yaml if mapping.toml has changed"
132
+ )
133
+ )
134
+ print()
135
+
136
+ # Override containers from mapping.toml (which has the latest containers)
137
+ # This ensures ls task shows the same containers as ls tasks
138
+ # Only do this when NOT using --from (when loading from all_tasks_irs.yaml)
139
+ try:
140
+ mapping = load_tasks_mapping()
141
+ # Create a lookup: (normalized_harness, normalized_task_name) -> container
142
+ # Use case-insensitive keys for matching
143
+ container_lookup = {}
144
+ for (harness, task_name), task_data in mapping.items():
145
+ container = task_data.get("container")
146
+ if container:
147
+ # Normalize harness name for lookup (frameworks.yaml uses hyphens)
148
+ normalized_harness = harness.replace("_", "-").lower()
149
+ normalized_task = task_name.lower()
150
+ container_lookup[(normalized_harness, normalized_task)] = (
151
+ container
152
+ )
153
+
154
+ # Update task containers from mapping.toml
155
+ for task in tasks:
156
+ # Defensive checks: ensure task has required attributes
157
+ if not hasattr(task, "harness") or not task.harness:
158
+ logger.warning(
159
+ "Task missing harness attribute, skipping container override",
160
+ task_name=getattr(task, "name", "unknown"),
161
+ )
162
+ continue
163
+ if not hasattr(task, "name") or not task.name:
164
+ logger.warning(
165
+ "Task missing name attribute, skipping container override",
166
+ harness=getattr(task, "harness", "unknown"),
167
+ )
168
+ continue
169
+
170
+ # Normalize both harness and task name for case-insensitive lookup
171
+ normalized_harness = task.harness.lower()
172
+ normalized_task = task.name.lower()
173
+ lookup_key = (normalized_harness, normalized_task)
174
+ if lookup_key in container_lookup:
175
+ task.container = container_lookup[lookup_key]
176
+ except Exception as e:
177
+ logger.debug(
178
+ "Failed to override containers from mapping.toml",
179
+ error=str(e),
180
+ )
181
+ # Continue with containers from all_tasks_irs.yaml if mapping load fails
182
+
183
+ if not tasks:
184
+ print("No tasks found.")
185
+ if tasks_path:
186
+ print(f" Tasks file: {tasks_path}")
187
+ else:
188
+ print(
189
+ " Note: Make sure all_tasks_irs.yaml exists and contains valid task definitions."
190
+ )
191
+ return
192
+
193
+ # Parse task identifier
194
+ harness_filter = None
195
+ task_filter = None
196
+ if self.task_identifier:
197
+ if "." in self.task_identifier:
198
+ parts = self.task_identifier.split(".", 1)
199
+ harness_filter = parts[0]
200
+ task_filter = parts[1]
201
+ else:
202
+ task_filter = self.task_identifier
203
+
204
+ # Filter tasks
205
+ filtered_tasks = []
206
+ for task in tasks:
207
+ if harness_filter and task.harness.lower() != harness_filter.lower():
208
+ continue
209
+ if task_filter and task.name.lower() != task_filter.lower():
210
+ continue
211
+ filtered_tasks.append(task)
212
+
213
+ if not filtered_tasks:
214
+ print(f"No tasks found matching: {self.task_identifier}")
215
+ if self.task_identifier:
216
+ # Show available tasks for debugging
217
+ print("\nAvailable tasks (showing first 10):")
218
+ for i, task in enumerate(tasks[:10]):
219
+ print(f" - {task.harness}.{task.name}")
220
+ if len(tasks) > 10:
221
+ print(f" ... and {len(tasks) - 10} more")
222
+ return
223
+
224
+ # Display tasks
225
+ if self.json:
226
+ self._print_json(filtered_tasks)
227
+ else:
228
+ self._print_formatted(filtered_tasks, mapping_verified)
229
+
230
+ def _print_json(self, tasks: list[TaskIntermediateRepresentation]) -> None:
231
+ """Print tasks as JSON."""
232
+ tasks_dict = [task.to_dict() for task in tasks]
233
+ print(json.dumps({"tasks": tasks_dict}, indent=2))
234
+
235
+ def _print_formatted(
236
+ self, tasks: list[TaskIntermediateRepresentation], mapping_verified: bool = True
237
+ ) -> None:
238
+ """Print tasks in formatted text with colorized output."""
239
+ for i, task in enumerate(tasks):
240
+ if i > 0:
241
+ print() # Spacing between tasks
242
+ print(bold("=" * 80))
243
+
244
+ # Task name - bold and magenta key, cyan value (matching logging utils)
245
+ print(f"{bold(magenta('Task:'))} {bold(cyan(str(task.name)))}")
246
+
247
+ # Description - magenta key, cyan value
248
+ if task.description:
249
+ print(f"{magenta('Description:')} {cyan(str(task.description))}")
250
+
251
+ # Harness - magenta key, cyan value
252
+ print(f"{magenta('Harness:')} {cyan(str(task.harness))}")
253
+
254
+ # Container - magenta key, cyan value
255
+ print(f"{magenta('Container:')} {cyan(str(task.container))}")
256
+
257
+ # Container Digest - magenta key, cyan value
258
+ if task.container_digest:
259
+ print(
260
+ f"{magenta('Container Digest:')} {cyan(str(task.container_digest))}"
261
+ )
262
+
263
+ # Print defaults as YAML
264
+ if task.defaults:
265
+ print(f"\n{bold(magenta('Defaults:'))}")
266
+ defaults_yaml = yaml.dump(
267
+ task.defaults, default_flow_style=False, sort_keys=False
268
+ )
269
+ # Indent defaults - use cyan for YAML content (FDF values)
270
+ for line in defaults_yaml.split("\n"):
271
+ if line.strip():
272
+ print(f" {cyan(line)}")
273
+ else:
274
+ print()
275
+
276
+ print(bold("-" * 80))
277
+
278
+ # Total count - bold
279
+ task_word = "task" if len(tasks) == 1 else "tasks"
280
+ print(f"\n{bold(f'Total: {len(tasks)} {task_word}')}")