nemo-evaluator-launcher 0.1.12__tar.gz → 0.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nemo-evaluator-launcher might be problematic. Click here for more details.
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/PKG-INFO +1 -1
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/api/functional.py +28 -2
- nemo_evaluator_launcher-0.1.13/src/nemo_evaluator_launcher/cli/export.py +267 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/run.py +22 -3
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/status.py +3 -1
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +24 -4
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/executor.py +3 -5
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/local/executor.py +26 -5
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/slurm/executor.py +90 -26
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/base.py +9 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/gsheets.py +27 -9
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/local.py +5 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/mlflow.py +105 -32
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/utils.py +22 -105
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/wandb.py +117 -38
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/package_info.py +1 -1
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +1 -1
- nemo_evaluator_launcher-0.1.12/src/nemo_evaluator_launcher/cli/export.py +0 -149
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/README.md +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/pyproject.toml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/setup.cfg +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/api/types.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/api/utils.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/main.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/cli/version.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/common/execdb.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/common/helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/trtllm.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/base.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/local/run.template.sh +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher/resources/mapping.toml +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/requires.txt +0 -0
- {nemo_evaluator_launcher-0.1.12 → nemo_evaluator_launcher-0.1.13}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
|
@@ -456,6 +456,7 @@ def export_results(
|
|
|
456
456
|
yaml.safe_load(ypath_export.read_text(encoding="utf-8"))
|
|
457
457
|
or {}
|
|
458
458
|
)
|
|
459
|
+
# execution.auto_export contains auto-export destinations
|
|
459
460
|
exec_cfg = cfg_yaml.get("execution") or {}
|
|
460
461
|
auto_exp = (exp_yaml.get("execution") or {}).get(
|
|
461
462
|
"auto_export"
|
|
@@ -463,15 +464,39 @@ def export_results(
|
|
|
463
464
|
if auto_exp is not None:
|
|
464
465
|
exec_cfg["auto_export"] = auto_exp
|
|
465
466
|
cfg_yaml["execution"] = exec_cfg
|
|
467
|
+
|
|
468
|
+
# top-level export block contains exporter config
|
|
469
|
+
if "export" in exp_yaml:
|
|
470
|
+
cfg_yaml["export"] = exp_yaml["export"]
|
|
471
|
+
|
|
472
|
+
# Merge evaluation.tasks from export_config (Slurm writes it there)
|
|
473
|
+
if "evaluation" in exp_yaml and exp_yaml["evaluation"]:
|
|
474
|
+
eval_cfg = cfg_yaml.get("evaluation") or {}
|
|
475
|
+
eval_cfg.update(exp_yaml["evaluation"])
|
|
476
|
+
cfg_yaml["evaluation"] = eval_cfg
|
|
477
|
+
|
|
466
478
|
# metadata
|
|
479
|
+
executor_name = (cfg_yaml.get("execution") or {}).get(
|
|
480
|
+
"type", "local"
|
|
481
|
+
)
|
|
482
|
+
|
|
467
483
|
md_job_data = JobData(
|
|
468
484
|
invocation_id=single_id.split(".")[0],
|
|
469
485
|
job_id=single_id,
|
|
470
486
|
timestamp=0.0,
|
|
471
|
-
executor=
|
|
472
|
-
data={
|
|
487
|
+
executor=executor_name,
|
|
488
|
+
data={
|
|
489
|
+
"output_dir": str(Path.cwd().parent),
|
|
490
|
+
"storage_type": "remote_local",
|
|
491
|
+
},
|
|
473
492
|
config=cfg_yaml,
|
|
474
493
|
)
|
|
494
|
+
# DEBUG: print what we loaded
|
|
495
|
+
print(f"DEBUG: cfg_yaml keys: {list(cfg_yaml.keys())}")
|
|
496
|
+
if "evaluation" in cfg_yaml:
|
|
497
|
+
print(
|
|
498
|
+
f"DEBUG: evaluation.tasks: {cfg_yaml.get('evaluation', {}).get('tasks')}"
|
|
499
|
+
)
|
|
475
500
|
except Exception:
|
|
476
501
|
md_job_data = None
|
|
477
502
|
# fallback to execDB only
|
|
@@ -492,6 +517,7 @@ def export_results(
|
|
|
492
517
|
"success": job_result.success,
|
|
493
518
|
"message": job_result.message,
|
|
494
519
|
"metadata": job_result.metadata or {},
|
|
520
|
+
"dest": getattr(job_result, "dest", None),
|
|
495
521
|
}
|
|
496
522
|
},
|
|
497
523
|
"metadata": job_result.metadata or {},
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Export evaluation results to specified target."""
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Any, List, Optional
|
|
20
|
+
|
|
21
|
+
from simple_parsing import field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ExportCmd:
|
|
26
|
+
"""Export evaluation results."""
|
|
27
|
+
|
|
28
|
+
# Short usage examples will show up in -h as the class docstring:
|
|
29
|
+
# Examples:
|
|
30
|
+
# nemo-evaluator-launcher export 8abcd123 --dest local --format json --out .
|
|
31
|
+
# nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv --out results/ -fname processed_results.csv
|
|
32
|
+
# nemo-evaluator-launcher export 8abcd123 --dest jet
|
|
33
|
+
|
|
34
|
+
invocation_ids: List[str] = field(
|
|
35
|
+
positional=True,
|
|
36
|
+
help="IDs to export (space-separated). Accepts invocation IDs (xxxxxxxx) and job IDs (xxxxxxxx.n); mixture of both allowed.",
|
|
37
|
+
)
|
|
38
|
+
dest: str = field(
|
|
39
|
+
default="local",
|
|
40
|
+
alias=["--dest"],
|
|
41
|
+
choices=["local", "wandb", "mlflow", "gsheets", "jet"],
|
|
42
|
+
help="Export destination.",
|
|
43
|
+
)
|
|
44
|
+
# overrides for exporter config; use -o similar to run command
|
|
45
|
+
override: List[str] = field(
|
|
46
|
+
default_factory=list,
|
|
47
|
+
action="append",
|
|
48
|
+
nargs="?",
|
|
49
|
+
alias=["-o", "--override"],
|
|
50
|
+
help="Hydra-style overrides for exporter config. Use `export.<dest>.key=value` (e.g., -o export.wandb.entity=org-name).",
|
|
51
|
+
)
|
|
52
|
+
output_dir: Optional[str] = field(
|
|
53
|
+
default=".",
|
|
54
|
+
alias=["--output-dir", "-out"],
|
|
55
|
+
help="Output directory (default: current directory).",
|
|
56
|
+
)
|
|
57
|
+
output_filename: Optional[str] = field(
|
|
58
|
+
default=None,
|
|
59
|
+
alias=["--output-filename", "-fname"],
|
|
60
|
+
help="Summary filename (default: processed_results.json/csv based on --format).",
|
|
61
|
+
)
|
|
62
|
+
format: Optional[str] = field(
|
|
63
|
+
default=None,
|
|
64
|
+
alias=["--format"],
|
|
65
|
+
choices=["json", "csv"],
|
|
66
|
+
help="Summary format for --dest local. Omit to only copy artifacts.",
|
|
67
|
+
)
|
|
68
|
+
copy_logs: bool = field(
|
|
69
|
+
default=False,
|
|
70
|
+
alias=["--copy-logs"],
|
|
71
|
+
help="Include logs when copying locally (default: False).",
|
|
72
|
+
)
|
|
73
|
+
log_metrics: List[str] = field(
|
|
74
|
+
default_factory=list,
|
|
75
|
+
alias=["--log-metrics"],
|
|
76
|
+
help="Filter metrics by name (repeatable). Examples: score, f1, mmlu_score_micro.",
|
|
77
|
+
)
|
|
78
|
+
only_required: Optional[bool] = field(
|
|
79
|
+
default=None,
|
|
80
|
+
alias=["--only-required"],
|
|
81
|
+
help="Copy only required+optional artifacts (default: True). Set to False to copy all available artifacts.",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def execute(self) -> None:
|
|
85
|
+
"""Execute export."""
|
|
86
|
+
# Import heavy dependencies only when needed
|
|
87
|
+
from omegaconf import OmegaConf
|
|
88
|
+
|
|
89
|
+
from nemo_evaluator_launcher.api.functional import export_results
|
|
90
|
+
|
|
91
|
+
# Validation: ensure IDs are provided
|
|
92
|
+
if not self.invocation_ids:
|
|
93
|
+
print("Error: No IDs provided. Specify one or more invocation or job IDs.")
|
|
94
|
+
print(
|
|
95
|
+
"Usage: nemo-evaluator-launcher export <id> [<id>...] --dest <destination>"
|
|
96
|
+
)
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
config: dict[str, Any] = {
|
|
100
|
+
"copy_logs": self.copy_logs,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Output handling
|
|
104
|
+
if self.output_dir:
|
|
105
|
+
config["output_dir"] = self.output_dir
|
|
106
|
+
if self.output_filename:
|
|
107
|
+
config["output_filename"] = self.output_filename
|
|
108
|
+
|
|
109
|
+
# Format and filters
|
|
110
|
+
if self.format:
|
|
111
|
+
config["format"] = self.format
|
|
112
|
+
if self.log_metrics:
|
|
113
|
+
config["log_metrics"] = self.log_metrics
|
|
114
|
+
|
|
115
|
+
# Add only_required if explicitly passed via CLI
|
|
116
|
+
if self.only_required is not None:
|
|
117
|
+
config["only_required"] = self.only_required
|
|
118
|
+
|
|
119
|
+
# Parse and validate overrides
|
|
120
|
+
if self.override:
|
|
121
|
+
# Flatten possible list-of-lists from parser
|
|
122
|
+
flat_overrides: list[str] = []
|
|
123
|
+
for item in self.override:
|
|
124
|
+
if isinstance(item, list):
|
|
125
|
+
flat_overrides.extend(str(x) for x in item)
|
|
126
|
+
else:
|
|
127
|
+
flat_overrides.append(str(item))
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
self._validate_overrides(flat_overrides, self.dest)
|
|
131
|
+
except ValueError as e:
|
|
132
|
+
print(f"Error: {e}")
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
# Expand env vars in override vals ($VAR / ${VAR})
|
|
136
|
+
import os
|
|
137
|
+
|
|
138
|
+
from omegaconf import OmegaConf
|
|
139
|
+
|
|
140
|
+
expanded_overrides: list[str] = []
|
|
141
|
+
for ov in flat_overrides:
|
|
142
|
+
if "=" in ov:
|
|
143
|
+
k, v = ov.split("=", 1)
|
|
144
|
+
expanded_overrides.append(f"{k}={os.path.expandvars(v)}")
|
|
145
|
+
else:
|
|
146
|
+
expanded_overrides.append(os.path.expandvars(ov))
|
|
147
|
+
|
|
148
|
+
dot_cfg = OmegaConf.from_dotlist(expanded_overrides)
|
|
149
|
+
as_dict = OmegaConf.to_container(dot_cfg, resolve=True) or {}
|
|
150
|
+
if isinstance(as_dict, dict) and "export" in as_dict:
|
|
151
|
+
export_map = as_dict.get("export") or {}
|
|
152
|
+
if isinstance(export_map, dict) and self.dest in export_map:
|
|
153
|
+
config.update(export_map[self.dest] or {})
|
|
154
|
+
else:
|
|
155
|
+
config.update(as_dict)
|
|
156
|
+
else:
|
|
157
|
+
config.update(as_dict)
|
|
158
|
+
|
|
159
|
+
if self.format and self.dest != "local":
|
|
160
|
+
print(
|
|
161
|
+
"Note: --format is only used by --dest local. It will be ignored for other destinations."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if "only_required" in config and self.only_required is True:
|
|
165
|
+
config.pop("only_required", None)
|
|
166
|
+
|
|
167
|
+
print(
|
|
168
|
+
f"Exporting {len(self.invocation_ids)} {'invocations' if len(self.invocation_ids) > 1 else 'invocation'} to {self.dest}..."
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
result = export_results(self.invocation_ids, self.dest, config)
|
|
172
|
+
|
|
173
|
+
if not result.get("success", False):
|
|
174
|
+
err = result.get("error", "Unknown error")
|
|
175
|
+
print(f"\nExport failed: {err}")
|
|
176
|
+
# Provide actionable guidance for common configuration issues
|
|
177
|
+
if self.dest == "mlflow":
|
|
178
|
+
if "tracking_uri" in str(err).lower():
|
|
179
|
+
print("\nMLflow requires 'tracking_uri' to be configured.")
|
|
180
|
+
print(
|
|
181
|
+
"Set it via: -o export.mlflow.tracking_uri=http://mlflow-server:5000"
|
|
182
|
+
)
|
|
183
|
+
elif "not installed" in str(err).lower():
|
|
184
|
+
print("\nMLflow package not installed.")
|
|
185
|
+
print("Install via: pip install nemo-evaluator-launcher[mlflow]")
|
|
186
|
+
elif self.dest == "wandb":
|
|
187
|
+
if "entity" in str(err).lower() or "project" in str(err).lower():
|
|
188
|
+
print("\nW&B requires 'entity' and 'project' to be configured.")
|
|
189
|
+
print(
|
|
190
|
+
"Set via: -o export.wandb.entity=my-org -o export.wandb.project=my-proj"
|
|
191
|
+
)
|
|
192
|
+
elif "not installed" in str(err).lower():
|
|
193
|
+
print("\nW&B package not installed.")
|
|
194
|
+
print("Install via: pip install nemo-evaluator-launcher[wandb]")
|
|
195
|
+
elif self.dest == "gsheets":
|
|
196
|
+
if "not installed" in str(err).lower():
|
|
197
|
+
print("\nGoogle Sheets package not installed.")
|
|
198
|
+
print("Install via: pip install nemo-evaluator-launcher[gsheets]")
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
# Success path
|
|
202
|
+
if len(self.invocation_ids) == 1:
|
|
203
|
+
# Single invocation
|
|
204
|
+
invocation_id = self.invocation_ids[0]
|
|
205
|
+
print(f"Export completed for {invocation_id}")
|
|
206
|
+
|
|
207
|
+
for job_id, job_result in result["jobs"].items():
|
|
208
|
+
if job_result.get("success"):
|
|
209
|
+
print(f" {job_id}: {job_result.get('message', '')}")
|
|
210
|
+
metadata = job_result.get("metadata", {})
|
|
211
|
+
if metadata.get("run_url"):
|
|
212
|
+
print(f" URL: {metadata['run_url']}")
|
|
213
|
+
if metadata.get("summary_path"):
|
|
214
|
+
print(f" Summary: {metadata['summary_path']}")
|
|
215
|
+
path_hint = job_result.get("dest") or metadata.get("output_dir")
|
|
216
|
+
if self.dest == "local" and path_hint:
|
|
217
|
+
print(f" Path: {path_hint}")
|
|
218
|
+
else:
|
|
219
|
+
print(f" {job_id} failed: {job_result.get('message', '')}")
|
|
220
|
+
else:
|
|
221
|
+
# Multiple invocations
|
|
222
|
+
metadata = result.get("metadata", {})
|
|
223
|
+
print(
|
|
224
|
+
f"Export completed: {metadata.get('successful_invocations', 0)}/{metadata.get('total_invocations', 0)} successful"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Show summary path if available
|
|
228
|
+
if metadata.get("summary_path"):
|
|
229
|
+
print(f"Summary: {metadata['summary_path']}")
|
|
230
|
+
# Show per-invocation status
|
|
231
|
+
for invocation_id, inv_result in result["invocations"].items():
|
|
232
|
+
if inv_result.get("success"):
|
|
233
|
+
job_count = len(inv_result.get("jobs", {}))
|
|
234
|
+
print(f" {invocation_id}: {job_count} jobs")
|
|
235
|
+
else:
|
|
236
|
+
print(
|
|
237
|
+
f" {invocation_id}: failed, {inv_result.get('error', 'Unknown error')}"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def _validate_overrides(self, overrides: List[str], dest: str) -> None:
|
|
241
|
+
"""Validate override list for destination consistency.
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
ValueError: If overrides specify wrong destination or have other issues.
|
|
245
|
+
"""
|
|
246
|
+
if not overrides:
|
|
247
|
+
return # nothing to validate
|
|
248
|
+
|
|
249
|
+
# Check each override for destination mismatch
|
|
250
|
+
for override_str in overrides:
|
|
251
|
+
if override_str.startswith(
|
|
252
|
+
"export."
|
|
253
|
+
): # check if override starts with export.
|
|
254
|
+
# Extract destination from override path
|
|
255
|
+
try:
|
|
256
|
+
key_part = override_str.split("=")[0] # Get left side before =
|
|
257
|
+
parts = key_part.split(".")
|
|
258
|
+
if len(parts) >= 2:
|
|
259
|
+
override_dest = parts[1]
|
|
260
|
+
if override_dest != dest:
|
|
261
|
+
raise ValueError(
|
|
262
|
+
f"Override destination mismatch: override specifies 'export.{override_dest}' but --dest is '{dest}'. "
|
|
263
|
+
f"Either change --dest to '{override_dest}' or use 'export.{dest}' in overrides."
|
|
264
|
+
)
|
|
265
|
+
except (IndexError, AttributeError):
|
|
266
|
+
# miconstructed override -> OmegaConf handles this
|
|
267
|
+
pass
|
|
@@ -98,7 +98,17 @@ class Cmd:
|
|
|
98
98
|
config_dir=self.config_dir,
|
|
99
99
|
)
|
|
100
100
|
|
|
101
|
-
|
|
101
|
+
try:
|
|
102
|
+
invocation_id = run_eval(config, self.dry_run)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
|
|
105
|
+
raise
|
|
106
|
+
|
|
107
|
+
# Print general success message with invocation ID
|
|
108
|
+
if invocation_id is not None and not self.dry_run:
|
|
109
|
+
print(
|
|
110
|
+
f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
|
|
111
|
+
)
|
|
102
112
|
|
|
103
113
|
# Save the complete configuration
|
|
104
114
|
if not self.dry_run and invocation_id is not None:
|
|
@@ -146,6 +156,15 @@ class Cmd:
|
|
|
146
156
|
if invocation_id is not None:
|
|
147
157
|
print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
|
|
148
158
|
print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
|
|
149
|
-
|
|
150
|
-
|
|
159
|
+
|
|
160
|
+
# Show actual job IDs and task names
|
|
161
|
+
print("to kill individual jobs:")
|
|
162
|
+
# Access tasks - will work after normalization in run_eval
|
|
163
|
+
tasks = (
|
|
164
|
+
config.evaluation.tasks
|
|
165
|
+
if hasattr(config.evaluation, "tasks")
|
|
166
|
+
else config.evaluation
|
|
151
167
|
)
|
|
168
|
+
for idx, task in enumerate(tasks):
|
|
169
|
+
job_id = f"{invocation_id}.{idx}"
|
|
170
|
+
print(f" nemo-evaluator-launcher kill {job_id} # {task.name}")
|
|
@@ -102,6 +102,8 @@ class Cmd:
|
|
|
102
102
|
status = job.get("status", "")
|
|
103
103
|
formatted_status = self._format_status_with_indicators(status)
|
|
104
104
|
|
|
105
|
+
# Extract task name
|
|
106
|
+
|
|
105
107
|
rows.append(
|
|
106
108
|
[
|
|
107
109
|
job.get("job_id", ""),
|
|
@@ -144,7 +146,7 @@ class Cmd:
|
|
|
144
146
|
ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m", # Green Unicode checkmark
|
|
145
147
|
ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m", # Red Unicode X
|
|
146
148
|
ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m", # Yellow Unicode play button
|
|
147
|
-
ExecutionState.PENDING.value: "\033[36m
|
|
149
|
+
ExecutionState.PENDING.value: "\033[36m⧗ PENDING\033[0m", # Cyan Unicode hourglass (U+29D7)
|
|
148
150
|
ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m", # Magenta Unicode X
|
|
149
151
|
# Additional states for error handling
|
|
150
152
|
"not_found": "\033[90m? NOT FOUND\033[0m", # Gray question mark
|
|
@@ -428,14 +428,34 @@ def create_lepton_endpoint(cfg: DictConfig, endpoint_name: str) -> bool:
|
|
|
428
428
|
print(f"✅ Successfully created Lepton endpoint: {endpoint_name}")
|
|
429
429
|
return True
|
|
430
430
|
else:
|
|
431
|
-
|
|
431
|
+
error_msg = result.stderr.strip() if result.stderr else ""
|
|
432
|
+
output_msg = result.stdout.strip() if result.stdout else ""
|
|
433
|
+
print(
|
|
434
|
+
f"✗ Failed to create Lepton endpoint | Endpoint: {endpoint_name} | Return code: {result.returncode}"
|
|
435
|
+
)
|
|
436
|
+
if error_msg:
|
|
437
|
+
print(f" stderr: {error_msg}")
|
|
438
|
+
if output_msg:
|
|
439
|
+
print(f" stdout: {output_msg}")
|
|
432
440
|
return False
|
|
433
441
|
|
|
434
|
-
except subprocess.TimeoutExpired:
|
|
435
|
-
print(
|
|
442
|
+
except subprocess.TimeoutExpired as e:
|
|
443
|
+
print(
|
|
444
|
+
f"✗ Timeout creating Lepton endpoint | Endpoint: {endpoint_name} | Timeout: 300s"
|
|
445
|
+
)
|
|
446
|
+
if hasattr(e, "stderr") and e.stderr:
|
|
447
|
+
print(f" stderr: {e.stderr}")
|
|
448
|
+
if hasattr(e, "stdout") and e.stdout:
|
|
449
|
+
print(f" stdout: {e.stdout}")
|
|
436
450
|
return False
|
|
437
451
|
except subprocess.CalledProcessError as e:
|
|
438
|
-
print(
|
|
452
|
+
print(
|
|
453
|
+
f"✗ Error creating Lepton endpoint | Endpoint: {endpoint_name} | Error: {e}"
|
|
454
|
+
)
|
|
455
|
+
if hasattr(e, "stderr") and e.stderr:
|
|
456
|
+
print(f" stderr: {e.stderr}")
|
|
457
|
+
if hasattr(e, "stdout") and e.stdout:
|
|
458
|
+
print(f" stdout: {e.stdout}")
|
|
439
459
|
return False
|
|
440
460
|
finally:
|
|
441
461
|
# Clean up temporary file
|
|
@@ -482,7 +482,8 @@ class LeptonExecutor(BaseExecutor):
|
|
|
482
482
|
|
|
483
483
|
if not job_success:
|
|
484
484
|
raise RuntimeError(
|
|
485
|
-
f"Failed to submit Lepton job
|
|
485
|
+
f"Failed to submit Lepton job | Task: {task.name} | Job ID: {job_id} | "
|
|
486
|
+
f"Lepton job name: {lepton_job_name} | Error: {error_msg}"
|
|
486
487
|
)
|
|
487
488
|
|
|
488
489
|
# Store job metadata in database (with task-specific endpoint info)
|
|
@@ -504,8 +505,6 @@ class LeptonExecutor(BaseExecutor):
|
|
|
504
505
|
)
|
|
505
506
|
)
|
|
506
507
|
|
|
507
|
-
print(f"✅ Task {task.name}: Submitted evaluation job {job_id}")
|
|
508
|
-
|
|
509
508
|
# Jobs submitted successfully - return immediately (non-blocking)
|
|
510
509
|
print(
|
|
511
510
|
f"\n✅ Successfully submitted {len(lepton_job_names)} evaluation jobs to Lepton"
|
|
@@ -536,9 +535,8 @@ class LeptonExecutor(BaseExecutor):
|
|
|
536
535
|
|
|
537
536
|
return invocation_id
|
|
538
537
|
|
|
539
|
-
except Exception
|
|
538
|
+
except Exception:
|
|
540
539
|
# Clean up any created endpoints on failure
|
|
541
|
-
print(f"❌ Error during evaluation: {e}")
|
|
542
540
|
if cfg.deployment.type != "none" and "endpoint_names" in locals():
|
|
543
541
|
for endpoint_name in endpoint_names:
|
|
544
542
|
if endpoint_name:
|
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
import pathlib
|
|
24
24
|
import platform
|
|
25
25
|
import shlex
|
|
26
|
+
import shutil
|
|
26
27
|
import subprocess
|
|
27
28
|
import time
|
|
28
29
|
from typing import List, Optional
|
|
@@ -76,6 +77,13 @@ class LocalExecutor(BaseExecutor):
|
|
|
76
77
|
f"type {cfg.deployment.type} is not implemented -- add deployment support"
|
|
77
78
|
)
|
|
78
79
|
|
|
80
|
+
# Check if docker is available (skip in dry_run mode)
|
|
81
|
+
if not dry_run and shutil.which("docker") is None:
|
|
82
|
+
raise RuntimeError(
|
|
83
|
+
"Docker is not installed or not in PATH. "
|
|
84
|
+
"Please install Docker to run local evaluations."
|
|
85
|
+
)
|
|
86
|
+
|
|
79
87
|
# Generate invocation ID for this evaluation run
|
|
80
88
|
invocation_id = generate_invocation_id()
|
|
81
89
|
|
|
@@ -233,35 +241,48 @@ class LocalExecutor(BaseExecutor):
|
|
|
233
241
|
# To ensure subprocess continues after python exits:
|
|
234
242
|
# - on Unix-like systems, to fully detach the subprocess
|
|
235
243
|
# so it does not die when Python exits, pass start_new_session=True;
|
|
236
|
-
# - on
|
|
244
|
+
# - on Windows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
|
|
237
245
|
os_name = platform.system()
|
|
246
|
+
processes = []
|
|
247
|
+
|
|
238
248
|
if is_execution_mode_sequential:
|
|
239
249
|
if os_name == "Windows":
|
|
240
|
-
subprocess.Popen(
|
|
250
|
+
proc = subprocess.Popen(
|
|
241
251
|
shlex.split("bash run_all.sequential.sh"),
|
|
242
252
|
cwd=output_dir,
|
|
243
253
|
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
|
|
244
254
|
)
|
|
245
255
|
else:
|
|
246
|
-
subprocess.Popen(
|
|
256
|
+
proc = subprocess.Popen(
|
|
247
257
|
shlex.split("bash run_all.sequential.sh"),
|
|
248
258
|
cwd=output_dir,
|
|
249
259
|
start_new_session=True,
|
|
250
260
|
)
|
|
261
|
+
processes.append(("run_all.sequential.sh", proc, output_dir))
|
|
251
262
|
else:
|
|
252
263
|
for task in cfg.evaluation.tasks:
|
|
253
264
|
if os_name == "Windows":
|
|
254
|
-
subprocess.Popen(
|
|
265
|
+
proc = subprocess.Popen(
|
|
255
266
|
shlex.split("bash run.sh"),
|
|
256
267
|
cwd=output_dir / task.name,
|
|
257
268
|
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
|
|
258
269
|
)
|
|
259
270
|
else:
|
|
260
|
-
subprocess.Popen(
|
|
271
|
+
proc = subprocess.Popen(
|
|
261
272
|
shlex.split("bash run.sh"),
|
|
262
273
|
cwd=output_dir / task.name,
|
|
263
274
|
start_new_session=True,
|
|
264
275
|
)
|
|
276
|
+
processes.append((task.name, proc, output_dir / task.name))
|
|
277
|
+
|
|
278
|
+
# Wait briefly and check if bash scripts exited immediately (which means error)
|
|
279
|
+
time.sleep(0.3)
|
|
280
|
+
|
|
281
|
+
for name, proc, work_dir in processes:
|
|
282
|
+
exit_code = proc.poll()
|
|
283
|
+
if exit_code is not None and exit_code != 0:
|
|
284
|
+
error_msg = f"Script for {name} exited with code {exit_code}"
|
|
285
|
+
raise RuntimeError(f"Job startup failed | {error_msg}")
|
|
265
286
|
|
|
266
287
|
print("\nCommands for real-time monitoring:")
|
|
267
288
|
for job_id, evaluation_task in zip(job_ids, evaluation_tasks):
|