nemo-evaluator-launcher 0.1.12__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/PKG-INFO +1 -1
  2. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/api/functional.py +28 -2
  3. nemo_evaluator_launcher-0.1.14/src/nemo_evaluator_launcher/cli/export.py +267 -0
  4. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/run.py +22 -3
  5. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/status.py +3 -1
  6. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +24 -4
  7. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/lepton/executor.py +3 -5
  8. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/local/executor.py +26 -5
  9. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/slurm/executor.py +90 -26
  10. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/base.py +9 -0
  11. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/gsheets.py +27 -9
  12. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/local.py +5 -0
  13. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/mlflow.py +105 -32
  14. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/utils.py +22 -105
  15. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/wandb.py +117 -38
  16. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/package_info.py +1 -1
  17. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +1 -1
  18. nemo_evaluator_launcher-0.1.12/src/nemo_evaluator_launcher/cli/export.py +0 -149
  19. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/LICENSE +0 -0
  20. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/README.md +0 -0
  21. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/pyproject.toml +0 -0
  22. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/setup.cfg +0 -0
  23. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/__init__.py +0 -0
  24. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
  25. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/api/types.py +0 -0
  26. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/api/utils.py +0 -0
  27. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
  28. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
  29. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
  30. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
  31. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/main.py +0 -0
  32. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/cli/version.py +0 -0
  33. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
  34. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/common/execdb.py +0 -0
  35. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/common/helpers.py +0 -0
  36. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
  37. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
  38. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
  39. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
  40. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +0 -0
  41. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
  42. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
  43. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
  44. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +0 -0
  45. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
  46. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
  47. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
  48. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
  49. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
  50. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/base.py +0 -0
  51. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
  52. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
  53. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
  54. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/local/run.template.sh +0 -0
  55. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
  56. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
  57. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
  58. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
  59. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher/resources/mapping.toml +0 -0
  60. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +0 -0
  61. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
  62. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
  63. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/requires.txt +0 -0
  64. {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.14}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.12
3
+ Version: 0.1.14
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -456,6 +456,7 @@ def export_results(
456
456
  yaml.safe_load(ypath_export.read_text(encoding="utf-8"))
457
457
  or {}
458
458
  )
459
+ # execution.auto_export contains auto-export destinations
459
460
  exec_cfg = cfg_yaml.get("execution") or {}
460
461
  auto_exp = (exp_yaml.get("execution") or {}).get(
461
462
  "auto_export"
@@ -463,15 +464,39 @@ def export_results(
463
464
  if auto_exp is not None:
464
465
  exec_cfg["auto_export"] = auto_exp
465
466
  cfg_yaml["execution"] = exec_cfg
467
+
468
+ # top-level export block contains exporter config
469
+ if "export" in exp_yaml:
470
+ cfg_yaml["export"] = exp_yaml["export"]
471
+
472
+ # Merge evaluation.tasks from export_config (Slurm writes it there)
473
+ if "evaluation" in exp_yaml and exp_yaml["evaluation"]:
474
+ eval_cfg = cfg_yaml.get("evaluation") or {}
475
+ eval_cfg.update(exp_yaml["evaluation"])
476
+ cfg_yaml["evaluation"] = eval_cfg
477
+
466
478
  # metadata
479
+ executor_name = (cfg_yaml.get("execution") or {}).get(
480
+ "type", "local"
481
+ )
482
+
467
483
  md_job_data = JobData(
468
484
  invocation_id=single_id.split(".")[0],
469
485
  job_id=single_id,
470
486
  timestamp=0.0,
471
- executor="local", #
472
- data={"output_dir": str(Path.cwd().parent)},
487
+ executor=executor_name,
488
+ data={
489
+ "output_dir": str(Path.cwd().parent),
490
+ "storage_type": "remote_local",
491
+ },
473
492
  config=cfg_yaml,
474
493
  )
494
+ # DEBUG: print what we loaded
495
+ print(f"DEBUG: cfg_yaml keys: {list(cfg_yaml.keys())}")
496
+ if "evaluation" in cfg_yaml:
497
+ print(
498
+ f"DEBUG: evaluation.tasks: {cfg_yaml.get('evaluation', {}).get('tasks')}"
499
+ )
475
500
  except Exception:
476
501
  md_job_data = None
477
502
  # fallback to execDB only
@@ -492,6 +517,7 @@ def export_results(
492
517
  "success": job_result.success,
493
518
  "message": job_result.message,
494
519
  "metadata": job_result.metadata or {},
520
+ "dest": getattr(job_result, "dest", None),
495
521
  }
496
522
  },
497
523
  "metadata": job_result.metadata or {},
@@ -0,0 +1,267 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Export evaluation results to specified target."""
17
+
18
+ from dataclasses import dataclass
19
+ from typing import Any, List, Optional
20
+
21
+ from simple_parsing import field
22
+
23
+
24
+ @dataclass
25
+ class ExportCmd:
26
+ """Export evaluation results."""
27
+
28
+ # Short usage examples will show up in -h as the class docstring:
29
+ # Examples:
30
+ # nemo-evaluator-launcher export 8abcd123 --dest local --format json --out .
31
+ # nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv --out results/ -fname processed_results.csv
32
+ # nemo-evaluator-launcher export 8abcd123 --dest jet
33
+
34
+ invocation_ids: List[str] = field(
35
+ positional=True,
36
+ help="IDs to export (space-separated). Accepts invocation IDs (xxxxxxxx) and job IDs (xxxxxxxx.n); mixture of both allowed.",
37
+ )
38
+ dest: str = field(
39
+ default="local",
40
+ alias=["--dest"],
41
+ choices=["local", "wandb", "mlflow", "gsheets", "jet"],
42
+ help="Export destination.",
43
+ )
44
+ # overrides for exporter config; use -o similar to run command
45
+ override: List[str] = field(
46
+ default_factory=list,
47
+ action="append",
48
+ nargs="?",
49
+ alias=["-o", "--override"],
50
+ help="Hydra-style overrides for exporter config. Use `export.<dest>.key=value` (e.g., -o export.wandb.entity=org-name).",
51
+ )
52
+ output_dir: Optional[str] = field(
53
+ default=".",
54
+ alias=["--output-dir", "-out"],
55
+ help="Output directory (default: current directory).",
56
+ )
57
+ output_filename: Optional[str] = field(
58
+ default=None,
59
+ alias=["--output-filename", "-fname"],
60
+ help="Summary filename (default: processed_results.json/csv based on --format).",
61
+ )
62
+ format: Optional[str] = field(
63
+ default=None,
64
+ alias=["--format"],
65
+ choices=["json", "csv"],
66
+ help="Summary format for --dest local. Omit to only copy artifacts.",
67
+ )
68
+ copy_logs: bool = field(
69
+ default=False,
70
+ alias=["--copy-logs"],
71
+ help="Include logs when copying locally (default: False).",
72
+ )
73
+ log_metrics: List[str] = field(
74
+ default_factory=list,
75
+ alias=["--log-metrics"],
76
+ help="Filter metrics by name (repeatable). Examples: score, f1, mmlu_score_micro.",
77
+ )
78
+ only_required: Optional[bool] = field(
79
+ default=None,
80
+ alias=["--only-required"],
81
+ help="Copy only required+optional artifacts (default: True). Set to False to copy all available artifacts.",
82
+ )
83
+
84
+ def execute(self) -> None:
85
+ """Execute export."""
86
+ # Import heavy dependencies only when needed
87
+ from omegaconf import OmegaConf
88
+
89
+ from nemo_evaluator_launcher.api.functional import export_results
90
+
91
+ # Validation: ensure IDs are provided
92
+ if not self.invocation_ids:
93
+ print("Error: No IDs provided. Specify one or more invocation or job IDs.")
94
+ print(
95
+ "Usage: nemo-evaluator-launcher export <id> [<id>...] --dest <destination>"
96
+ )
97
+ return
98
+
99
+ config: dict[str, Any] = {
100
+ "copy_logs": self.copy_logs,
101
+ }
102
+
103
+ # Output handling
104
+ if self.output_dir:
105
+ config["output_dir"] = self.output_dir
106
+ if self.output_filename:
107
+ config["output_filename"] = self.output_filename
108
+
109
+ # Format and filters
110
+ if self.format:
111
+ config["format"] = self.format
112
+ if self.log_metrics:
113
+ config["log_metrics"] = self.log_metrics
114
+
115
+ # Add only_required if explicitly passed via CLI
116
+ if self.only_required is not None:
117
+ config["only_required"] = self.only_required
118
+
119
+ # Parse and validate overrides
120
+ if self.override:
121
+ # Flatten possible list-of-lists from parser
122
+ flat_overrides: list[str] = []
123
+ for item in self.override:
124
+ if isinstance(item, list):
125
+ flat_overrides.extend(str(x) for x in item)
126
+ else:
127
+ flat_overrides.append(str(item))
128
+
129
+ try:
130
+ self._validate_overrides(flat_overrides, self.dest)
131
+ except ValueError as e:
132
+ print(f"Error: {e}")
133
+ return
134
+
135
+ # Expand env vars in override vals ($VAR / ${VAR})
136
+ import os
137
+
138
+ from omegaconf import OmegaConf
139
+
140
+ expanded_overrides: list[str] = []
141
+ for ov in flat_overrides:
142
+ if "=" in ov:
143
+ k, v = ov.split("=", 1)
144
+ expanded_overrides.append(f"{k}={os.path.expandvars(v)}")
145
+ else:
146
+ expanded_overrides.append(os.path.expandvars(ov))
147
+
148
+ dot_cfg = OmegaConf.from_dotlist(expanded_overrides)
149
+ as_dict = OmegaConf.to_container(dot_cfg, resolve=True) or {}
150
+ if isinstance(as_dict, dict) and "export" in as_dict:
151
+ export_map = as_dict.get("export") or {}
152
+ if isinstance(export_map, dict) and self.dest in export_map:
153
+ config.update(export_map[self.dest] or {})
154
+ else:
155
+ config.update(as_dict)
156
+ else:
157
+ config.update(as_dict)
158
+
159
+ if self.format and self.dest != "local":
160
+ print(
161
+ "Note: --format is only used by --dest local. It will be ignored for other destinations."
162
+ )
163
+
164
+ if "only_required" in config and self.only_required is True:
165
+ config.pop("only_required", None)
166
+
167
+ print(
168
+ f"Exporting {len(self.invocation_ids)} {'invocations' if len(self.invocation_ids) > 1 else 'invocation'} to {self.dest}..."
169
+ )
170
+
171
+ result = export_results(self.invocation_ids, self.dest, config)
172
+
173
+ if not result.get("success", False):
174
+ err = result.get("error", "Unknown error")
175
+ print(f"\nExport failed: {err}")
176
+ # Provide actionable guidance for common configuration issues
177
+ if self.dest == "mlflow":
178
+ if "tracking_uri" in str(err).lower():
179
+ print("\nMLflow requires 'tracking_uri' to be configured.")
180
+ print(
181
+ "Set it via: -o export.mlflow.tracking_uri=http://mlflow-server:5000"
182
+ )
183
+ elif "not installed" in str(err).lower():
184
+ print("\nMLflow package not installed.")
185
+ print("Install via: pip install nemo-evaluator-launcher[mlflow]")
186
+ elif self.dest == "wandb":
187
+ if "entity" in str(err).lower() or "project" in str(err).lower():
188
+ print("\nW&B requires 'entity' and 'project' to be configured.")
189
+ print(
190
+ "Set via: -o export.wandb.entity=my-org -o export.wandb.project=my-proj"
191
+ )
192
+ elif "not installed" in str(err).lower():
193
+ print("\nW&B package not installed.")
194
+ print("Install via: pip install nemo-evaluator-launcher[wandb]")
195
+ elif self.dest == "gsheets":
196
+ if "not installed" in str(err).lower():
197
+ print("\nGoogle Sheets package not installed.")
198
+ print("Install via: pip install nemo-evaluator-launcher[gsheets]")
199
+ return
200
+
201
+ # Success path
202
+ if len(self.invocation_ids) == 1:
203
+ # Single invocation
204
+ invocation_id = self.invocation_ids[0]
205
+ print(f"Export completed for {invocation_id}")
206
+
207
+ for job_id, job_result in result["jobs"].items():
208
+ if job_result.get("success"):
209
+ print(f" {job_id}: {job_result.get('message', '')}")
210
+ metadata = job_result.get("metadata", {})
211
+ if metadata.get("run_url"):
212
+ print(f" URL: {metadata['run_url']}")
213
+ if metadata.get("summary_path"):
214
+ print(f" Summary: {metadata['summary_path']}")
215
+ path_hint = job_result.get("dest") or metadata.get("output_dir")
216
+ if self.dest == "local" and path_hint:
217
+ print(f" Path: {path_hint}")
218
+ else:
219
+ print(f" {job_id} failed: {job_result.get('message', '')}")
220
+ else:
221
+ # Multiple invocations
222
+ metadata = result.get("metadata", {})
223
+ print(
224
+ f"Export completed: {metadata.get('successful_invocations', 0)}/{metadata.get('total_invocations', 0)} successful"
225
+ )
226
+
227
+ # Show summary path if available
228
+ if metadata.get("summary_path"):
229
+ print(f"Summary: {metadata['summary_path']}")
230
+ # Show per-invocation status
231
+ for invocation_id, inv_result in result["invocations"].items():
232
+ if inv_result.get("success"):
233
+ job_count = len(inv_result.get("jobs", {}))
234
+ print(f" {invocation_id}: {job_count} jobs")
235
+ else:
236
+ print(
237
+ f" {invocation_id}: failed, {inv_result.get('error', 'Unknown error')}"
238
+ )
239
+
240
+ def _validate_overrides(self, overrides: List[str], dest: str) -> None:
241
+ """Validate override list for destination consistency.
242
+
243
+ Raises:
244
+ ValueError: If overrides specify wrong destination or have other issues.
245
+ """
246
+ if not overrides:
247
+ return # nothing to validate
248
+
249
+ # Check each override for destination mismatch
250
+ for override_str in overrides:
251
+ if override_str.startswith(
252
+ "export."
253
+ ): # check if override starts with export.
254
+ # Extract destination from override path
255
+ try:
256
+ key_part = override_str.split("=")[0] # Get left side before =
257
+ parts = key_part.split(".")
258
+ if len(parts) >= 2:
259
+ override_dest = parts[1]
260
+ if override_dest != dest:
261
+ raise ValueError(
262
+ f"Override destination mismatch: override specifies 'export.{override_dest}' but --dest is '{dest}'. "
263
+ f"Either change --dest to '{override_dest}' or use 'export.{dest}' in overrides."
264
+ )
265
+ except (IndexError, AttributeError):
266
+ # miconstructed override -> OmegaConf handles this
267
+ pass
@@ -98,7 +98,17 @@ class Cmd:
98
98
  config_dir=self.config_dir,
99
99
  )
100
100
 
101
- invocation_id = run_eval(config, self.dry_run)
101
+ try:
102
+ invocation_id = run_eval(config, self.dry_run)
103
+ except Exception as e:
104
+ print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
105
+ raise
106
+
107
+ # Print general success message with invocation ID
108
+ if invocation_id is not None and not self.dry_run:
109
+ print(
110
+ f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
111
+ )
102
112
 
103
113
  # Save the complete configuration
104
114
  if not self.dry_run and invocation_id is not None:
@@ -146,6 +156,15 @@ class Cmd:
146
156
  if invocation_id is not None:
147
157
  print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
148
158
  print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
149
- print(
150
- f"to kill individual jobs: nemo-evaluator-launcher kill <job_id> (e.g., {invocation_id}.0)"
159
+
160
+ # Show actual job IDs and task names
161
+ print("to kill individual jobs:")
162
+ # Access tasks - will work after normalization in run_eval
163
+ tasks = (
164
+ config.evaluation.tasks
165
+ if hasattr(config.evaluation, "tasks")
166
+ else config.evaluation
151
167
  )
168
+ for idx, task in enumerate(tasks):
169
+ job_id = f"{invocation_id}.{idx}"
170
+ print(f" nemo-evaluator-launcher kill {job_id} # {task.name}")
@@ -102,6 +102,8 @@ class Cmd:
102
102
  status = job.get("status", "")
103
103
  formatted_status = self._format_status_with_indicators(status)
104
104
 
105
+ # Extract task name
106
+
105
107
  rows.append(
106
108
  [
107
109
  job.get("job_id", ""),
@@ -144,7 +146,7 @@ class Cmd:
144
146
  ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m", # Green Unicode checkmark
145
147
  ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m", # Red Unicode X
146
148
  ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m", # Yellow Unicode play button
147
- ExecutionState.PENDING.value: "\033[36m PENDING\033[0m", # Cyan Unicode hourglass
149
+ ExecutionState.PENDING.value: "\033[36m PENDING\033[0m", # Cyan Unicode hourglass (U+29D7)
148
150
  ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m", # Magenta Unicode X
149
151
  # Additional states for error handling
150
152
  "not_found": "\033[90m? NOT FOUND\033[0m", # Gray question mark
@@ -428,14 +428,34 @@ def create_lepton_endpoint(cfg: DictConfig, endpoint_name: str) -> bool:
428
428
  print(f"✅ Successfully created Lepton endpoint: {endpoint_name}")
429
429
  return True
430
430
  else:
431
- print(f"❌ Failed to create Lepton endpoint: {result.stderr}")
431
+ error_msg = result.stderr.strip() if result.stderr else ""
432
+ output_msg = result.stdout.strip() if result.stdout else ""
433
+ print(
434
+ f"✗ Failed to create Lepton endpoint | Endpoint: {endpoint_name} | Return code: {result.returncode}"
435
+ )
436
+ if error_msg:
437
+ print(f" stderr: {error_msg}")
438
+ if output_msg:
439
+ print(f" stdout: {output_msg}")
432
440
  return False
433
441
 
434
- except subprocess.TimeoutExpired:
435
- print(f"❌ Timeout creating Lepton endpoint: {endpoint_name}")
442
+ except subprocess.TimeoutExpired as e:
443
+ print(
444
+ f"✗ Timeout creating Lepton endpoint | Endpoint: {endpoint_name} | Timeout: 300s"
445
+ )
446
+ if hasattr(e, "stderr") and e.stderr:
447
+ print(f" stderr: {e.stderr}")
448
+ if hasattr(e, "stdout") and e.stdout:
449
+ print(f" stdout: {e.stdout}")
436
450
  return False
437
451
  except subprocess.CalledProcessError as e:
438
- print(f"❌ Error creating Lepton endpoint: {e}")
452
+ print(
453
+ f"✗ Error creating Lepton endpoint | Endpoint: {endpoint_name} | Error: {e}"
454
+ )
455
+ if hasattr(e, "stderr") and e.stderr:
456
+ print(f" stderr: {e.stderr}")
457
+ if hasattr(e, "stdout") and e.stdout:
458
+ print(f" stdout: {e.stdout}")
439
459
  return False
440
460
  finally:
441
461
  # Clean up temporary file
@@ -482,7 +482,8 @@ class LeptonExecutor(BaseExecutor):
482
482
 
483
483
  if not job_success:
484
484
  raise RuntimeError(
485
- f"Failed to submit Lepton job for task: {task.name}. Error: {error_msg}"
485
+ f"Failed to submit Lepton job | Task: {task.name} | Job ID: {job_id} | "
486
+ f"Lepton job name: {lepton_job_name} | Error: {error_msg}"
486
487
  )
487
488
 
488
489
  # Store job metadata in database (with task-specific endpoint info)
@@ -504,8 +505,6 @@ class LeptonExecutor(BaseExecutor):
504
505
  )
505
506
  )
506
507
 
507
- print(f"✅ Task {task.name}: Submitted evaluation job {job_id}")
508
-
509
508
  # Jobs submitted successfully - return immediately (non-blocking)
510
509
  print(
511
510
  f"\n✅ Successfully submitted {len(lepton_job_names)} evaluation jobs to Lepton"
@@ -536,9 +535,8 @@ class LeptonExecutor(BaseExecutor):
536
535
 
537
536
  return invocation_id
538
537
 
539
- except Exception as e:
538
+ except Exception:
540
539
  # Clean up any created endpoints on failure
541
- print(f"❌ Error during evaluation: {e}")
542
540
  if cfg.deployment.type != "none" and "endpoint_names" in locals():
543
541
  for endpoint_name in endpoint_names:
544
542
  if endpoint_name:
@@ -23,6 +23,7 @@ import os
23
23
  import pathlib
24
24
  import platform
25
25
  import shlex
26
+ import shutil
26
27
  import subprocess
27
28
  import time
28
29
  from typing import List, Optional
@@ -76,6 +77,13 @@ class LocalExecutor(BaseExecutor):
76
77
  f"type {cfg.deployment.type} is not implemented -- add deployment support"
77
78
  )
78
79
 
80
+ # Check if docker is available (skip in dry_run mode)
81
+ if not dry_run and shutil.which("docker") is None:
82
+ raise RuntimeError(
83
+ "Docker is not installed or not in PATH. "
84
+ "Please install Docker to run local evaluations."
85
+ )
86
+
79
87
  # Generate invocation ID for this evaluation run
80
88
  invocation_id = generate_invocation_id()
81
89
 
@@ -233,35 +241,48 @@ class LocalExecutor(BaseExecutor):
233
241
  # To ensure subprocess continues after python exits:
234
242
  # - on Unix-like systems, to fully detach the subprocess
235
243
  # so it does not die when Python exits, pass start_new_session=True;
236
- # - on Widnows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
244
+ # - on Windows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
237
245
  os_name = platform.system()
246
+ processes = []
247
+
238
248
  if is_execution_mode_sequential:
239
249
  if os_name == "Windows":
240
- subprocess.Popen(
250
+ proc = subprocess.Popen(
241
251
  shlex.split("bash run_all.sequential.sh"),
242
252
  cwd=output_dir,
243
253
  creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
244
254
  )
245
255
  else:
246
- subprocess.Popen(
256
+ proc = subprocess.Popen(
247
257
  shlex.split("bash run_all.sequential.sh"),
248
258
  cwd=output_dir,
249
259
  start_new_session=True,
250
260
  )
261
+ processes.append(("run_all.sequential.sh", proc, output_dir))
251
262
  else:
252
263
  for task in cfg.evaluation.tasks:
253
264
  if os_name == "Windows":
254
- subprocess.Popen(
265
+ proc = subprocess.Popen(
255
266
  shlex.split("bash run.sh"),
256
267
  cwd=output_dir / task.name,
257
268
  creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
258
269
  )
259
270
  else:
260
- subprocess.Popen(
271
+ proc = subprocess.Popen(
261
272
  shlex.split("bash run.sh"),
262
273
  cwd=output_dir / task.name,
263
274
  start_new_session=True,
264
275
  )
276
+ processes.append((task.name, proc, output_dir / task.name))
277
+
278
+ # Wait briefly and check if bash scripts exited immediately (which means error)
279
+ time.sleep(0.3)
280
+
281
+ for name, proc, work_dir in processes:
282
+ exit_code = proc.poll()
283
+ if exit_code is not None and exit_code != 0:
284
+ error_msg = f"Script for {name} exited with code {exit_code}"
285
+ raise RuntimeError(f"Job startup failed | {error_msg}")
265
286
 
266
287
  print("\nCommands for real-time monitoring:")
267
288
  for job_id, evaluation_task in zip(job_ids, evaluation_tasks):