nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. nemo_evaluator_launcher/api/functional.py +159 -5
  2. nemo_evaluator_launcher/cli/logs.py +102 -0
  3. nemo_evaluator_launcher/cli/ls_task.py +280 -0
  4. nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
  5. nemo_evaluator_launcher/cli/main.py +29 -2
  6. nemo_evaluator_launcher/cli/run.py +114 -16
  7. nemo_evaluator_launcher/cli/version.py +26 -23
  8. nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  9. nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  10. nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  11. nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  12. nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  13. nemo_evaluator_launcher/common/helpers.py +200 -51
  14. nemo_evaluator_launcher/common/logging_utils.py +16 -5
  15. nemo_evaluator_launcher/common/mapping.py +341 -155
  16. nemo_evaluator_launcher/common/printing_utils.py +25 -12
  17. nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
  18. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
  19. nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
  20. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
  21. nemo_evaluator_launcher/executors/base.py +31 -1
  22. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
  23. nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
  24. nemo_evaluator_launcher/executors/local/executor.py +383 -24
  25. nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
  26. nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
  27. nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
  28. nemo_evaluator_launcher/exporters/utils.py +32 -46
  29. nemo_evaluator_launcher/package_info.py +1 -1
  30. nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  31. nemo_evaluator_launcher/resources/mapping.toml +64 -315
  32. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
  33. nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
  34. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
  35. nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
  36. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
  37. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
  38. {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,13 @@ from dataclasses import dataclass
18
18
 
19
19
  from simple_parsing import field
20
20
 
21
+ from nemo_evaluator_launcher.common.printing_utils import (
22
+ bold,
23
+ cyan,
24
+ grey,
25
+ magenta,
26
+ )
27
+
21
28
 
22
29
  @dataclass
23
30
  class Cmd:
@@ -28,20 +35,101 @@ class Cmd:
28
35
  action="store_true",
29
36
  help="Print output as JSON instead of table format",
30
37
  )
38
+ from_container: str = field(
39
+ default="",
40
+ help="Load tasks from container image (e.g., nvcr.io/nvidia/eval-factory/simple-evals:25.10). "
41
+ "If provided, extracts framework.yml from container and lists tasks on-the-fly instead of using mapping.toml",
42
+ )
31
43
 
32
44
  def execute(self) -> None:
33
45
  # Import heavy dependencies only when needed
34
46
  import json
35
47
 
36
- from nemo_evaluator_launcher.api.functional import get_tasks_list
48
+ if self.from_container:
49
+ # Load tasks from container
50
+ from nemo_evaluator_launcher.common.container_metadata import (
51
+ load_tasks_from_container,
52
+ )
53
+
54
+ try:
55
+ tasks = load_tasks_from_container(self.from_container)
56
+ except ValueError as e:
57
+ from nemo_evaluator_launcher.common.logging_utils import logger
58
+
59
+ logger.error(
60
+ "Failed to load tasks from container",
61
+ container=self.from_container,
62
+ error=str(e),
63
+ )
64
+ return
65
+ except Exception as e:
66
+ from nemo_evaluator_launcher.common.logging_utils import logger
67
+
68
+ logger.error(
69
+ "Failed to load tasks from container",
70
+ container=self.from_container,
71
+ error=str(e),
72
+ exc_info=True,
73
+ )
74
+ return
75
+
76
+ if not tasks:
77
+ from nemo_evaluator_launcher.common.logging_utils import logger
78
+
79
+ logger.error(
80
+ "No tasks found in container",
81
+ container=self.from_container,
82
+ )
83
+ return
37
84
 
38
- # TODO(dfridman): modify `get_tasks_list` to return a list of dicts in the first place
39
- data = get_tasks_list()
40
- headers = ["task", "endpoint_type", "harness", "container"]
85
+ # Convert TaskIntermediateRepresentation to format expected by get_tasks_list()
86
+ # Build data structure matching get_tasks_list() output format
87
+ data = []
88
+ for task in tasks:
89
+ # Extract endpoint types from defaults
90
+ endpoint_types = (
91
+ task.defaults.get("target", {})
92
+ .get("api_endpoint", {})
93
+ .get("type", "chat")
94
+ )
95
+ if isinstance(endpoint_types, str):
96
+ endpoint_types = [endpoint_types]
97
+
98
+ data.append(
99
+ [
100
+ task.name, # task
101
+ ",".join(endpoint_types)
102
+ if isinstance(endpoint_types, list)
103
+ else endpoint_types, # endpoint_type
104
+ task.harness, # harness
105
+ task.container, # container
106
+ getattr(task, "container_arch", "") or "", # arch
107
+ task.description, # description
108
+ ]
109
+ )
110
+ else:
111
+ # Default behavior: load from mapping.toml via get_tasks_list()
112
+ from nemo_evaluator_launcher.api.functional import get_tasks_list
113
+
114
+ # TODO(dfridman): modify `get_tasks_list` to return a list of dicts in the first place
115
+ data = get_tasks_list()
116
+
117
+ headers = [
118
+ "task",
119
+ "endpoint_type",
120
+ "harness",
121
+ "container",
122
+ "arch",
123
+ "description",
124
+ ]
41
125
  supported_benchmarks = []
42
126
  for task_data in data:
43
- assert len(task_data) == len(headers)
44
- supported_benchmarks.append(dict(zip(headers, task_data)))
127
+ if len(task_data) < len(headers):
128
+ raise ValueError(
129
+ f"Invalid task row shape: expected at least {len(headers)} columns, got {len(task_data)}"
130
+ )
131
+ # Backwards/forwards compat: allow extra columns and ignore them.
132
+ supported_benchmarks.append(dict(zip(headers, task_data[: len(headers)])))
45
133
 
46
134
  if self.json:
47
135
  print(json.dumps({"tasks": supported_benchmarks}, indent=2))
@@ -49,11 +137,55 @@ class Cmd:
49
137
  self._print_table(supported_benchmarks)
50
138
 
51
139
  def _print_table(self, tasks: list[dict]) -> None:
52
- """Print tasks grouped by harness and container in table format."""
140
+ """Print tasks grouped by harness and container in table format with colorized output."""
53
141
  if not tasks:
54
142
  print("No tasks found.")
55
143
  return
56
144
 
145
+ def _truncate(s: str, max_len: int) -> str:
146
+ s = s or ""
147
+ if max_len <= 0:
148
+ return ""
149
+ if len(s) <= max_len:
150
+ return s
151
+ if max_len <= 3:
152
+ return s[:max_len]
153
+ return s[: max_len - 3] + "..."
154
+
155
+ def _infer_arch(container: str, container_tasks: list[dict]) -> str:
156
+ # Prefer explicit arch from task IRs.
157
+ for t in container_tasks:
158
+ a = (t.get("arch") or "").strip()
159
+ if a:
160
+ return a
161
+
162
+ # Heuristic fallback: look for common suffixes in tag.
163
+ c = (container or "").lower()
164
+ if "arm64" in c or "aarch64" in c:
165
+ return "arm"
166
+ if "amd64" in c or "x86_64" in c:
167
+ return "amd"
168
+ return "unknown"
169
+
170
+ def _infer_registry(container: str) -> str:
171
+ try:
172
+ from nemo_evaluator_launcher.common.container_metadata.utils import (
173
+ parse_container_image,
174
+ )
175
+
176
+ registry_type, _registry_url, _repo, _ref = parse_container_image(
177
+ container
178
+ )
179
+ return str(registry_type)
180
+ except Exception:
181
+ # Best-effort fallback for unknown formats.
182
+ c = (container or "").lower()
183
+ if "nvcr.io/" in c or c.startswith("nvcr.io"):
184
+ return "nvcr"
185
+ if "gitlab" in c:
186
+ return "gitlab"
187
+ return ""
188
+
57
189
  # Group tasks by harness and container
58
190
  grouped = defaultdict(lambda: defaultdict(list))
59
191
  for task in tasks:
@@ -70,67 +202,88 @@ class Cmd:
70
202
  if j > 0:
71
203
  print() # Spacing between containers
72
204
 
73
- # Prepare task table first to get column widths
74
- task_headers = ["task", "endpoint_type"]
75
205
  rows = []
76
206
  for task in container_tasks:
77
- rows.append([task["task"], task["endpoint_type"]])
78
-
79
- # Sort tasks alphabetically for better readability
80
- rows.sort(key=lambda x: x[0])
81
-
82
- # Calculate column widths with some padding
83
- widths = [
84
- max(len(task_headers[i]), max(len(str(row[i])) for row in rows)) + 2
85
- for i in range(len(task_headers))
86
- ]
87
-
88
- # Calculate minimum table width based on task columns
89
- min_table_width = sum(widths) + len(widths) + 1
207
+ rows.append(
208
+ {
209
+ "task": str(task.get("task", "")),
210
+ "endpoint": str(task.get("endpoint_type", "")),
211
+ "description": str(task.get("description", "")),
212
+ }
213
+ )
214
+ rows.sort(key=lambda r: r["task"].lower())
90
215
 
91
216
  # Calculate required width for header content
92
217
  harness_line = f"harness: {harness}"
93
218
  container_line = f"container: {container}"
219
+ arch_line = f"arch: {_infer_arch(container, container_tasks)}"
220
+ registry_line = f"registry: {_infer_registry(container)}"
94
221
  header_content_width = (
95
- max(len(harness_line), len(container_line)) + 4
222
+ max(
223
+ len(harness_line),
224
+ len(container_line),
225
+ len(arch_line),
226
+ len(registry_line),
227
+ )
228
+ + 4
96
229
  ) # +4 for "| " and " |"
97
230
 
98
- # Use the larger of the two widths
99
- table_width = max(min_table_width, header_content_width)
231
+ # Limit separator width to prevent overflow on small terminals
232
+ # Use terminal width if available, otherwise cap at 120 characters
233
+ import shutil
100
234
 
101
- # Print combined header with harness and container info
102
- print("=" * table_width)
103
- print(f"{harness_line}")
104
- print(f"{container_line}")
235
+ try:
236
+ terminal_width = shutil.get_terminal_size().columns
237
+ separator_width = min(terminal_width - 2, 160) # -2 safety margin
238
+ except Exception:
239
+ # Fallback if terminal size can't be determined
240
+ separator_width = 120
105
241
 
106
- # Adjust column widths to fill the full table width
107
- available_width = table_width
108
- # Give more space to the first column (task names can be long)
109
- adjusted_widths = [
110
- max(
111
- widths[0], available_width * 2 // 3
112
- ), # 2/3 of available width for task
113
- 0, # Will be calculated as remainder
114
- ]
115
- adjusted_widths[1] = (
116
- available_width - adjusted_widths[0]
117
- ) # Remainder for endpoint_type
242
+ separator_width = max(separator_width, min(header_content_width, 160))
243
+
244
+ # Table columns (keep compact and stable).
245
+ col_task = 36
246
+ col_endpoint = 14
247
+ sep = " "
248
+ fixed = col_task + col_endpoint + len(sep) * 2
249
+ col_desc = max(20, separator_width - fixed)
250
+
251
+ # Print combined header with harness and container info - colorized
252
+ # Keys: magenta, Values: cyan (matching logging utils)
253
+ print(bold("=" * separator_width))
254
+ print(f"{magenta('harness:')} {cyan(str(harness))}")
255
+ print(f"{magenta('container:')} {cyan(str(container))}")
256
+ arch = _infer_arch(container, container_tasks)
257
+ registry = _infer_registry(container)
258
+ print(f"{magenta('arch:')} {cyan(str(arch))}")
259
+ if registry:
260
+ print(f"{magenta('registry:')} {cyan(str(registry))}")
118
261
 
119
262
  # Print task table header separator
120
- print(" " * table_width)
121
- header_row = f"{task_headers[0]:<{adjusted_widths[0]}}{task_headers[1]:<{adjusted_widths[1]}}"
122
- print(header_row)
123
- print("-" * table_width)
124
-
125
- # Print task rows
126
- for row in rows:
127
- data_row = f"{str(row[0]):<{adjusted_widths[0]}}{str(row[1]):<{adjusted_widths[1]}}"
128
- print(data_row)
129
-
130
- print("-" * table_width)
131
- # Show task count
263
+ print()
264
+ print(
265
+ bold(
266
+ f"{'task':<{col_task}}{sep}"
267
+ f"{'endpoint':<{col_endpoint}}{sep}"
268
+ f"{'description':<{col_desc}}"
269
+ )
270
+ )
271
+ print(bold("-" * separator_width))
272
+
273
+ # Print task rows - use grey for task descriptions
274
+ for r in rows:
275
+ line = (
276
+ f"{_truncate(r['task'], col_task):<{col_task}}{sep}"
277
+ f"{_truncate(r['endpoint'], col_endpoint):<{col_endpoint}}{sep}"
278
+ f"{_truncate(r['description'], col_desc):<{col_desc}}"
279
+ )
280
+ print(grey(line))
281
+
282
+ print(bold("-" * separator_width))
283
+ # Show task count - grey for count text
132
284
  task_count = len(rows)
133
- print(f" {task_count} task{'s' if task_count != 1 else ''} available")
134
- print("=" * table_width)
285
+ task_word = "task" if task_count == 1 else "tasks"
286
+ print(f" {grey(f'{task_count} {task_word} available')}")
287
+ print(bold("=" * separator_width))
135
288
 
136
289
  print()
@@ -22,7 +22,9 @@ from simple_parsing import ArgumentParser
22
22
  import nemo_evaluator_launcher.cli.export as export
23
23
  import nemo_evaluator_launcher.cli.info as info
24
24
  import nemo_evaluator_launcher.cli.kill as kill
25
+ import nemo_evaluator_launcher.cli.logs as logs
25
26
  import nemo_evaluator_launcher.cli.ls_runs as ls_runs
27
+ import nemo_evaluator_launcher.cli.ls_task as ls_task
26
28
  import nemo_evaluator_launcher.cli.ls_tasks as ls_tasks
27
29
  import nemo_evaluator_launcher.cli.run as run
28
30
  import nemo_evaluator_launcher.cli.status as status
@@ -42,11 +44,13 @@ def is_verbose_enabled(args) -> bool:
42
44
  subcommands = [
43
45
  "run",
44
46
  "status",
47
+ "logs",
45
48
  "info",
46
49
  "kill",
47
50
  "tasks_alias",
48
51
  "tasks",
49
52
  "runs",
53
+ "task",
50
54
  "export",
51
55
  ]
52
56
  for subcmd in subcommands:
@@ -106,6 +110,14 @@ def create_parser() -> ArgumentParser:
106
110
  )
107
111
  status_parser.add_arguments(status.Cmd, dest="status")
108
112
 
113
+ # Logs subcommand
114
+ logs_parser = subparsers.add_parser(
115
+ "logs",
116
+ help="Stream logs from evaluation jobs",
117
+ description="Stream logs from evaluation jobs by invocation ID or job ID",
118
+ )
119
+ logs_parser.add_arguments(logs.Cmd, dest="logs")
120
+
109
121
  # Kill subcommand
110
122
  kill_parser = subparsers.add_parser(
111
123
  "kill",
@@ -149,6 +161,14 @@ def create_parser() -> ArgumentParser:
149
161
  )
150
162
  ls_runs_parser.add_arguments(ls_runs.Cmd, dest="runs")
151
163
 
164
+ # ls task (task details)
165
+ ls_task_parser = ls_sub.add_parser(
166
+ "task",
167
+ help="Show task details",
168
+ description="Show detailed information about a specific task",
169
+ )
170
+ ls_task_parser.add_arguments(ls_task.Cmd, dest="task")
171
+
152
172
  # Export subcommand
153
173
  export_parser = subparsers.add_parser(
154
174
  "export",
@@ -204,16 +224,23 @@ def main() -> None:
204
224
  args.run.execute()
205
225
  elif args.command == "status":
206
226
  args.status.execute()
227
+ elif args.command == "logs":
228
+ args.logs.execute()
207
229
  elif args.command == "kill":
208
230
  args.kill.execute()
209
231
  elif args.command == "ls":
210
232
  # Dispatch nested ls subcommands
211
- if args.ls_command is None or args.ls_command == "tasks":
212
- # Default to tasks when no subcommand specified
233
+ if args.ls_command == "tasks":
234
+ # When explicitly "ls tasks", use args.tasks (has correct from_container)
235
+ args.tasks.execute()
236
+ elif args.ls_command is None:
237
+ # When just "ls" (no subcommand), use args.tasks_alias
213
238
  if hasattr(args, "tasks_alias"):
214
239
  args.tasks_alias.execute()
215
240
  else:
216
241
  args.tasks.execute()
242
+ elif args.ls_command == "task":
243
+ args.task.execute()
217
244
  elif args.ls_command == "runs":
218
245
  args.runs.execute()
219
246
  elif args.command == "export":
@@ -16,6 +16,7 @@
16
16
  import pathlib
17
17
  import time
18
18
  from dataclasses import dataclass
19
+ from typing import Literal
19
20
 
20
21
  from simple_parsing import field
21
22
 
@@ -26,6 +27,7 @@ from nemo_evaluator_launcher.common.printing_utils import (
26
27
  green,
27
28
  magenta,
28
29
  red,
30
+ yellow,
29
31
  )
30
32
 
31
33
 
@@ -33,6 +35,13 @@ from nemo_evaluator_launcher.common.printing_utils import (
33
35
  class Cmd:
34
36
  """Run command parameters"""
35
37
 
38
+ config: str | None = field(
39
+ default=None,
40
+ alias=["--config"],
41
+ metadata={
42
+ "help": "Full path to config file. Uses Hydra by default (--config-mode=hydra). Use --config-mode=raw to load directly (bypasses Hydra)."
43
+ },
44
+ )
36
45
  config_name: str = field(
37
46
  default="default",
38
47
  alias=["-c", "--config-name"],
@@ -47,11 +56,11 @@ class Cmd:
47
56
  "help": "Path to user config directory. If provided, searches here first, then falls back to internal configs."
48
57
  },
49
58
  )
50
- run_config_file: str | None = field(
51
- default=None,
52
- alias=["-f", "--run-config-file"],
59
+ config_mode: Literal["hydra", "raw"] = field(
60
+ default="hydra",
61
+ alias=["--config-mode"],
53
62
  metadata={
54
- "help": "Path to a run config file to load directly (bypasses Hydra config loading)."
63
+ "help": "Config loading mode: 'hydra' (default) uses Hydra config system, 'raw' loads config file directly bypassing Hydra."
55
64
  },
56
65
  )
57
66
  override: list[str] = field(
@@ -68,6 +77,15 @@ class Cmd:
68
77
  alias=["-n", "--dry-run"],
69
78
  metadata={"help": "Do not run the evaluation, just print the config."},
70
79
  )
80
+ tasks: list[str] = field(
81
+ default_factory=list,
82
+ action="append",
83
+ nargs="?",
84
+ alias=["-t"],
85
+ metadata={
86
+ "help": "Run only specific tasks from the config. Example: -t ifeval -t gsm8k"
87
+ },
88
+ )
71
89
  config_output: str | None = field(
72
90
  default=None,
73
91
  alias=["--config-output"],
@@ -76,35 +94,97 @@ class Cmd:
76
94
  },
77
95
  )
78
96
 
97
+ def _parse_requested_tasks(self) -> list[str]:
98
+ """Parse -t arguments into a list of task names.
99
+
100
+ Handles None values that can be appended when using nargs="?" with action="append".
101
+ """
102
+ requested_tasks = []
103
+ for task_arg in self.tasks:
104
+ # Skip None or empty values (can happen with nargs="?")
105
+ if not task_arg:
106
+ continue
107
+ task_name = task_arg.strip()
108
+ if task_name and task_name not in requested_tasks:
109
+ requested_tasks.append(task_name)
110
+ return requested_tasks
111
+
79
112
  def execute(self) -> None:
80
113
  # Import heavy dependencies only when needed
81
114
  import yaml
82
115
  from omegaconf import OmegaConf
83
116
 
84
- from nemo_evaluator_launcher.api.functional import RunConfig, run_eval
117
+ from nemo_evaluator_launcher.api.functional import (
118
+ RunConfig,
119
+ filter_tasks,
120
+ run_eval,
121
+ )
122
+
123
+ # Validate config_mode value
124
+ if self.config_mode not in ["hydra", "raw"]:
125
+ raise ValueError(
126
+ f"Invalid --config-mode value: {self.config_mode}. Must be 'hydra' or 'raw'."
127
+ )
85
128
 
86
- # Load configuration either from Hydra or from a run config file
87
- if self.run_config_file:
88
- # Validate that run config file is not used with other config options
129
+ # Validate that raw mode requires --config
130
+ if self.config_mode == "raw" and self.config is None:
131
+ raise ValueError(
132
+ "--config-mode=raw requires --config to be specified. Raw mode loads config files directly."
133
+ )
134
+
135
+ # Parse requested tasks if -t is specified
136
+ requested_tasks = self._parse_requested_tasks() if self.tasks else None
137
+
138
+ # Load configuration either from Hydra or directly from a config file
139
+ if self.config_mode == "raw" and self.config:
140
+ # Validate that raw config loading is not used with other config options
89
141
  if self.config_name != "default":
90
- raise ValueError("Cannot use --run-config-file with --config-name")
142
+ raise ValueError(
143
+ "Cannot use --config-mode=raw with --config-name. Raw mode only works with --config."
144
+ )
91
145
  if self.config_dir is not None:
92
- raise ValueError("Cannot use --run-config-file with --config-dir")
146
+ raise ValueError(
147
+ "Cannot use --config-mode=raw with --config-dir. Raw mode only works with --config."
148
+ )
93
149
  if self.override:
94
- raise ValueError("Cannot use --run-config-file with --override")
150
+ raise ValueError(
151
+ "Cannot use --config-mode=raw with --override. Raw mode only works with --config."
152
+ )
95
153
 
96
- # Load from run config file
97
- with open(self.run_config_file, "r") as f:
154
+ # Load from config file directly (bypass Hydra)
155
+ with open(self.config, "r") as f:
98
156
  config_dict = yaml.safe_load(f)
99
157
 
100
158
  # Create RunConfig from the loaded data
101
159
  config = OmegaConf.create(config_dict)
102
160
  else:
161
+ # Handle --config parameter: split path into config_dir and config_name for Hydra
162
+ if self.config:
163
+ if self.config_name != "default":
164
+ raise ValueError("Cannot use --config with --config-name")
165
+ if self.config_dir is not None:
166
+ raise ValueError("Cannot use --config with --config-dir")
167
+ config_path = pathlib.Path(self.config)
168
+ config_dir = str(config_path.parent)
169
+ config_name = str(config_path.stem)
170
+ else:
171
+ config_dir = self.config_dir
172
+ config_name = self.config_name
173
+
103
174
  # Load the complete Hydra configuration
104
175
  config = RunConfig.from_hydra(
105
- config_name=self.config_name,
176
+ config_dir=config_dir,
177
+ config_name=config_name,
106
178
  hydra_overrides=self.override,
107
- config_dir=self.config_dir,
179
+ )
180
+
181
+ # Apply task filtering if -t is specified
182
+ if requested_tasks:
183
+ config = filter_tasks(config, requested_tasks)
184
+ logger.info(
185
+ "Running filtered tasks",
186
+ count=len(config.evaluation.tasks),
187
+ tasks=[t.name for t in config.evaluation.tasks],
108
188
  )
109
189
 
110
190
  try:
@@ -150,7 +230,7 @@ class Cmd:
150
230
  f.write("#\n")
151
231
  f.write("# To rerun this exact configuration:\n")
152
232
  f.write(
153
- f"# nemo-evaluator-launcher run --run-config-file {config_path}\n"
233
+ f"# nemo-evaluator-launcher run --config {config_path} --config-mode=raw\n"
154
234
  )
155
235
  f.write("#\n")
156
236
  f.write(config_yaml)
@@ -164,6 +244,10 @@ class Cmd:
164
244
  bold(cyan("To check status: "))
165
245
  + f"nemo-evaluator-launcher status {invocation_id}"
166
246
  )
247
+ print(
248
+ bold(cyan("To view job info: "))
249
+ + f"nemo-evaluator-launcher info {invocation_id}"
250
+ )
167
251
  print(
168
252
  bold(cyan("To kill all jobs: "))
169
253
  + f"nemo-evaluator-launcher kill {invocation_id}"
@@ -198,3 +282,17 @@ class Cmd:
198
282
  )
199
283
  )
200
284
  )
285
+
286
+ # Warn if both config_dir and config_name are provided (and config_name is not default)
287
+ if (
288
+ self.config is None
289
+ and self.config_dir is not None
290
+ and self.config_name != "default"
291
+ ):
292
+ joint_path = pathlib.Path(self.config_dir) / f"{self.config_name}.yaml"
293
+ print(
294
+ yellow(
295
+ f"Warning: Using --config-dir and --config-name together is deprecated. "
296
+ f"Please use --config {joint_path} instead."
297
+ )
298
+ )
@@ -19,6 +19,29 @@ import importlib
19
19
  from dataclasses import dataclass
20
20
 
21
21
  from nemo_evaluator_launcher import __package_name__, __version__
22
+ from nemo_evaluator_launcher.common.logging_utils import logger
23
+
24
+
25
+ def get_versions() -> dict:
26
+ internal_module_name = "nemo_evaluator_launcher_internal"
27
+ res = {__package_name__: __version__}
28
+ # Check for internal package
29
+ try:
30
+ internal_module = importlib.import_module(internal_module_name)
31
+ # Try to get version from internal package
32
+ internal_version = getattr(internal_module, "__version__", None)
33
+ if internal_version:
34
+ res[internal_module_name] = internal_version
35
+ else:
36
+ res[internal_module_name] = "available (version unknown)"
37
+ except ImportError:
38
+ # Internal package not available - this is expected in many cases
39
+ pass
40
+ except Exception as e:
41
+ logger.error(f"nemo_evaluator_launcher_internal: error loading ({e})")
42
+ raise
43
+
44
+ return res
22
45
 
23
46
 
24
47
  @dataclass
@@ -27,26 +50,6 @@ class Cmd:
27
50
 
28
51
  def execute(self) -> None:
29
52
  """Execute the version command."""
30
- print(f"{__package_name__}: {__version__}")
31
-
32
- # Check for internal package
33
- try:
34
- internal_module = importlib.import_module(
35
- "nemo_evaluator_launcher_internal"
36
- )
37
- # Try to get version from internal package
38
- try:
39
- internal_version = getattr(internal_module, "__version__", None)
40
- if internal_version:
41
- print(f"nemo-evaluator-launcher-internal: {internal_version}")
42
- else:
43
- print(
44
- "nemo-evaluator-launcher-internal: available (version unknown)"
45
- )
46
- except Exception:
47
- print("nemo-evaluator-launcher-internal: available (version unknown)")
48
- except ImportError:
49
- # Internal package not available - this is expected in many cases
50
- pass
51
- except Exception as e:
52
- print(f"nemo-evaluator-launcher-internal: error loading ({e})")
53
+ res = get_versions()
54
+ for package, version in res.items():
55
+ print(f"{package}: {version}")