nemo-evaluator-launcher 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nemo-evaluator-launcher might be problematic. Click here for more details.
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/PKG-INFO +1 -1
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/api/functional.py +22 -18
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/main.py +60 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/run.py +20 -6
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/status.py +42 -3
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/common/execdb.py +121 -27
- nemo_evaluator_launcher-0.1.9/src/nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/lepton/executor.py +1 -1
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/local/executor.py +22 -22
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/slurm/executor.py +2 -2
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/package_info.py +1 -1
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/PKG-INFO +1 -1
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/SOURCES.txt +1 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/README.md +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/pyproject.toml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/setup.cfg +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/api/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/api/types.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/api/utils.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/export.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/kill.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/ls_runs.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/ls_tasks.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/cli/version.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/common/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/common/helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/common/logging_utils.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/common/mapping.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/deployment/nim.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/deployment/none.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/deployment/sglang.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/execution/lepton/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/execution/local.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/configs/execution/slurm/default.yaml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/base.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/lepton/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/lepton/job_helpers.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/local/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/local/run.template.sh +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/executors/slurm/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/__init__.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/base.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/gsheets.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/local.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/mlflow.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/registry.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/utils.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/exporters/wandb.py +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher/resources/mapping.toml +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/dependency_links.txt +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/entry_points.txt +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/requires.txt +0 -0
- {nemo_evaluator_launcher-0.1.8 → nemo_evaluator_launcher-0.1.9}/src/nemo_evaluator_launcher.egg-info/top_level.txt +0 -0
|
@@ -99,11 +99,13 @@ def run_eval(cfg: RunConfig, dry_run: bool = False) -> Optional[str]:
|
|
|
99
99
|
return get_executor(cfg.execution.type).execute_eval(cfg, dry_run)
|
|
100
100
|
|
|
101
101
|
|
|
102
|
-
def get_status(
|
|
102
|
+
def get_status(ids_or_prefixes: list[str]) -> list[dict[str, Any]]:
|
|
103
103
|
"""Get status of jobs by their IDs or invocation IDs.
|
|
104
104
|
|
|
105
105
|
Args:
|
|
106
|
-
job_ids: List of job IDs or invocation IDs to check status for.
|
|
106
|
+
job_ids: List of job IDs or invocation IDs to check status for. Short ones are allowed,
|
|
107
|
+
we would try to match the full ones from prefixes if no collisions are
|
|
108
|
+
present.
|
|
107
109
|
|
|
108
110
|
Returns:
|
|
109
111
|
list[dict[str, Any]]: List of status dictionaries for each job or invocation.
|
|
@@ -114,14 +116,14 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
114
116
|
db = ExecutionDB()
|
|
115
117
|
results: List[dict[str, Any]] = []
|
|
116
118
|
|
|
117
|
-
for
|
|
118
|
-
# If id looks like an invocation_id (
|
|
119
|
-
if
|
|
120
|
-
jobs = db.get_jobs(
|
|
119
|
+
for id_or_prefix in ids_or_prefixes:
|
|
120
|
+
# If id looks like an invocation_id (no dot), get all jobs for it
|
|
121
|
+
if "." not in id_or_prefix:
|
|
122
|
+
jobs = db.get_jobs(id_or_prefix)
|
|
121
123
|
if not jobs:
|
|
122
124
|
results.append(
|
|
123
125
|
{
|
|
124
|
-
"invocation":
|
|
126
|
+
"invocation": id_or_prefix,
|
|
125
127
|
"job_id": None,
|
|
126
128
|
"status": "not_found",
|
|
127
129
|
"data": {},
|
|
@@ -136,7 +138,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
136
138
|
except ValueError as e:
|
|
137
139
|
results.append(
|
|
138
140
|
{
|
|
139
|
-
"invocation":
|
|
141
|
+
"invocation": id_or_prefix,
|
|
140
142
|
"job_id": None,
|
|
141
143
|
"status": "error",
|
|
142
144
|
"data": {"error": str(e)},
|
|
@@ -146,7 +148,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
146
148
|
|
|
147
149
|
# Get status from the executor for all jobs in the invocation
|
|
148
150
|
try:
|
|
149
|
-
status_list = executor_cls.get_status(
|
|
151
|
+
status_list = executor_cls.get_status(id_or_prefix)
|
|
150
152
|
|
|
151
153
|
# Create a result for each job in the invocation
|
|
152
154
|
for job_id_in_invocation, job_data in jobs.items():
|
|
@@ -161,7 +163,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
161
163
|
|
|
162
164
|
results.append(
|
|
163
165
|
{
|
|
164
|
-
"invocation":
|
|
166
|
+
"invocation": job_data.invocation_id,
|
|
165
167
|
"job_id": job_id_in_invocation,
|
|
166
168
|
"status": (
|
|
167
169
|
job_status if job_status is not None else "unknown"
|
|
@@ -176,7 +178,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
176
178
|
except Exception as e:
|
|
177
179
|
results.append(
|
|
178
180
|
{
|
|
179
|
-
"invocation":
|
|
181
|
+
"invocation": id_or_prefix,
|
|
180
182
|
"job_id": None,
|
|
181
183
|
"status": "error",
|
|
182
184
|
"data": {"error": str(e)},
|
|
@@ -184,13 +186,13 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
184
186
|
)
|
|
185
187
|
else:
|
|
186
188
|
# Otherwise, treat as job_id
|
|
187
|
-
single_job_data: Optional[JobData] = db.get_job(
|
|
189
|
+
single_job_data: Optional[JobData] = db.get_job(id_or_prefix)
|
|
188
190
|
|
|
189
191
|
if single_job_data is None:
|
|
190
192
|
results.append(
|
|
191
193
|
{
|
|
192
194
|
"invocation": None,
|
|
193
|
-
"job_id":
|
|
195
|
+
"job_id": id_or_prefix,
|
|
194
196
|
"status": "not_found",
|
|
195
197
|
"data": {},
|
|
196
198
|
}
|
|
@@ -204,7 +206,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
204
206
|
results.append(
|
|
205
207
|
{
|
|
206
208
|
"invocation": None,
|
|
207
|
-
"job_id":
|
|
209
|
+
"job_id": id_or_prefix,
|
|
208
210
|
"status": "error",
|
|
209
211
|
"data": {"error": str(e)},
|
|
210
212
|
}
|
|
@@ -213,13 +215,13 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
213
215
|
|
|
214
216
|
# Get status from the executor
|
|
215
217
|
try:
|
|
216
|
-
status_list = executor_cls.get_status(
|
|
218
|
+
status_list = executor_cls.get_status(id_or_prefix)
|
|
217
219
|
|
|
218
220
|
if not status_list:
|
|
219
221
|
results.append(
|
|
220
222
|
{
|
|
221
223
|
"invocation": single_job_data.invocation_id,
|
|
222
|
-
"job_id": job_id,
|
|
224
|
+
"job_id": single_job_data.job_id,
|
|
223
225
|
"status": "unknown",
|
|
224
226
|
"data": single_job_data.data,
|
|
225
227
|
}
|
|
@@ -229,7 +231,7 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
229
231
|
results.append(
|
|
230
232
|
{
|
|
231
233
|
"invocation": single_job_data.invocation_id,
|
|
232
|
-
"job_id": job_id,
|
|
234
|
+
"job_id": single_job_data.job_id,
|
|
233
235
|
"status": (
|
|
234
236
|
status_list[0].state.value if status_list else "unknown"
|
|
235
237
|
),
|
|
@@ -246,7 +248,9 @@ def get_status(job_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
246
248
|
"invocation": (
|
|
247
249
|
single_job_data.invocation_id if single_job_data else None
|
|
248
250
|
),
|
|
249
|
-
"job_id":
|
|
251
|
+
"job_id": (
|
|
252
|
+
single_job_data.job_id if single_job_data else id_or_prefix
|
|
253
|
+
),
|
|
250
254
|
"status": "error",
|
|
251
255
|
"data": {"error": str(e)},
|
|
252
256
|
}
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
#
|
|
16
16
|
"""Main CLI module using simple-parsing with subcommands."""
|
|
17
17
|
|
|
18
|
+
import os
|
|
19
|
+
|
|
18
20
|
from simple_parsing import ArgumentParser
|
|
19
21
|
|
|
20
22
|
import nemo_evaluator_launcher.cli.export as export
|
|
@@ -29,6 +31,22 @@ from nemo_evaluator_launcher.common.logging_utils import logger
|
|
|
29
31
|
VERSION_HELP = "Show version information"
|
|
30
32
|
|
|
31
33
|
|
|
34
|
+
def is_verbose_enabled(args) -> bool:
|
|
35
|
+
"""Check if verbose flag is enabled in any subcommand."""
|
|
36
|
+
# Check global verbose flag
|
|
37
|
+
if hasattr(args, "verbose") and args.verbose:
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
# Check subcommand verbose flags
|
|
41
|
+
subcommands = ["run", "status", "kill", "tasks_alias", "tasks", "runs", "export"]
|
|
42
|
+
for subcmd in subcommands:
|
|
43
|
+
if hasattr(args, subcmd) and hasattr(getattr(args, subcmd), "verbose"):
|
|
44
|
+
if getattr(getattr(args, subcmd), "verbose"):
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
32
50
|
def create_parser() -> ArgumentParser:
|
|
33
51
|
"""Create and configure the CLI argument parser with subcommands."""
|
|
34
52
|
parser = ArgumentParser()
|
|
@@ -36,6 +54,14 @@ def create_parser() -> ArgumentParser:
|
|
|
36
54
|
# Add --version flag at the top level
|
|
37
55
|
parser.add_argument("--version", action="store_true", help=VERSION_HELP)
|
|
38
56
|
|
|
57
|
+
# Add --verbose/-v flag for debug logging
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"-v",
|
|
60
|
+
"--verbose",
|
|
61
|
+
action="store_true",
|
|
62
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
63
|
+
)
|
|
64
|
+
|
|
39
65
|
subparsers = parser.add_subparsers(dest="command", required=False)
|
|
40
66
|
|
|
41
67
|
# Version subcommand
|
|
@@ -50,12 +76,24 @@ def create_parser() -> ArgumentParser:
|
|
|
50
76
|
run_parser = subparsers.add_parser(
|
|
51
77
|
"run", help="Run evaluation", description="Run evaluation"
|
|
52
78
|
)
|
|
79
|
+
run_parser.add_argument(
|
|
80
|
+
"-v",
|
|
81
|
+
"--verbose",
|
|
82
|
+
action="store_true",
|
|
83
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
84
|
+
)
|
|
53
85
|
run_parser.add_arguments(run.Cmd, dest="run")
|
|
54
86
|
|
|
55
87
|
# Status subcommand
|
|
56
88
|
status_parser = subparsers.add_parser(
|
|
57
89
|
"status", help="Check job status", description="Check job status"
|
|
58
90
|
)
|
|
91
|
+
status_parser.add_argument(
|
|
92
|
+
"-v",
|
|
93
|
+
"--verbose",
|
|
94
|
+
action="store_true",
|
|
95
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
96
|
+
)
|
|
59
97
|
status_parser.add_arguments(status.Cmd, dest="status")
|
|
60
98
|
|
|
61
99
|
# Kill subcommand
|
|
@@ -64,12 +102,24 @@ def create_parser() -> ArgumentParser:
|
|
|
64
102
|
help="Kill a job or invocation",
|
|
65
103
|
description="Kill a job (e.g., aefc4819.0) or entire invocation (e.g., aefc4819) by its ID",
|
|
66
104
|
)
|
|
105
|
+
kill_parser.add_argument(
|
|
106
|
+
"-v",
|
|
107
|
+
"--verbose",
|
|
108
|
+
action="store_true",
|
|
109
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
110
|
+
)
|
|
67
111
|
kill_parser.add_arguments(kill.Cmd, dest="kill")
|
|
68
112
|
|
|
69
113
|
# Ls subcommand (with nested subcommands)
|
|
70
114
|
ls_parser = subparsers.add_parser(
|
|
71
115
|
"ls", help="List resources", description="List tasks or runs"
|
|
72
116
|
)
|
|
117
|
+
ls_parser.add_argument(
|
|
118
|
+
"-v",
|
|
119
|
+
"--verbose",
|
|
120
|
+
action="store_true",
|
|
121
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
122
|
+
)
|
|
73
123
|
# Add arguments from `ls tasks` so that they work with `ls` as default alias
|
|
74
124
|
ls_parser.add_arguments(ls_tasks.Cmd, dest="tasks_alias")
|
|
75
125
|
|
|
@@ -95,6 +145,12 @@ def create_parser() -> ArgumentParser:
|
|
|
95
145
|
help="Export evaluation results",
|
|
96
146
|
description="Export evaluation results takes a List of invocation ids and a list of destinations(local, gitlab, wandb)",
|
|
97
147
|
)
|
|
148
|
+
export_parser.add_argument(
|
|
149
|
+
"-v",
|
|
150
|
+
"--verbose",
|
|
151
|
+
action="store_true",
|
|
152
|
+
help="Enable verbose logging (sets LOG_LEVEL=DEBUG)",
|
|
153
|
+
)
|
|
98
154
|
export_parser.add_arguments(export.ExportCmd, dest="export")
|
|
99
155
|
|
|
100
156
|
return parser
|
|
@@ -105,6 +161,10 @@ def main() -> None:
|
|
|
105
161
|
parser = create_parser()
|
|
106
162
|
args = parser.parse_args()
|
|
107
163
|
|
|
164
|
+
# Handle --verbose flag
|
|
165
|
+
if is_verbose_enabled(args):
|
|
166
|
+
os.environ["LOG_LEVEL"] = "DEBUG"
|
|
167
|
+
|
|
108
168
|
# Handle --version flag
|
|
109
169
|
if hasattr(args, "version") and args.version:
|
|
110
170
|
version_cmd = version.Cmd()
|
|
@@ -59,6 +59,13 @@ class Cmd:
|
|
|
59
59
|
alias=["-n", "--dry-run"],
|
|
60
60
|
metadata={"help": "Do not run the evaluation, just print the config."},
|
|
61
61
|
)
|
|
62
|
+
config_output: str | None = field(
|
|
63
|
+
default=None,
|
|
64
|
+
alias=["--config-output"],
|
|
65
|
+
metadata={
|
|
66
|
+
"help": "Directory to save the complete run config. Defaults to ~/.nemo-evaluator/run_configs/"
|
|
67
|
+
},
|
|
68
|
+
)
|
|
62
69
|
|
|
63
70
|
def execute(self) -> None:
|
|
64
71
|
# Import heavy dependencies only when needed
|
|
@@ -93,12 +100,19 @@ class Cmd:
|
|
|
93
100
|
|
|
94
101
|
invocation_id = run_eval(config, self.dry_run)
|
|
95
102
|
|
|
96
|
-
# Save the complete configuration
|
|
103
|
+
# Save the complete configuration
|
|
97
104
|
if not self.dry_run and invocation_id is not None:
|
|
98
|
-
#
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
105
|
+
# Determine config output directory
|
|
106
|
+
if self.config_output:
|
|
107
|
+
# Use custom directory specified by --config-output
|
|
108
|
+
config_dir = pathlib.Path(self.config_output)
|
|
109
|
+
else:
|
|
110
|
+
# Default to original location: ~/.nemo-evaluator/run_configs
|
|
111
|
+
home_dir = pathlib.Path.home()
|
|
112
|
+
config_dir = home_dir / ".nemo-evaluator" / "run_configs"
|
|
113
|
+
|
|
114
|
+
# Ensure the directory exists
|
|
115
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
102
116
|
|
|
103
117
|
# Convert DictConfig to dict and save as YAML
|
|
104
118
|
config_dict = OmegaConf.to_container(config, resolve=True)
|
|
@@ -108,7 +122,7 @@ class Cmd:
|
|
|
108
122
|
|
|
109
123
|
# Create config filename with invocation ID
|
|
110
124
|
config_filename = f"{invocation_id}_config.yml"
|
|
111
|
-
config_path =
|
|
125
|
+
config_path = config_dir / config_filename
|
|
112
126
|
|
|
113
127
|
# Save the complete Hydra configuration
|
|
114
128
|
with open(config_path, "w") as f:
|
|
@@ -17,6 +17,8 @@ from dataclasses import dataclass
|
|
|
17
17
|
|
|
18
18
|
from simple_parsing import field
|
|
19
19
|
|
|
20
|
+
from nemo_evaluator_launcher.executors.base import ExecutionState
|
|
21
|
+
|
|
20
22
|
|
|
21
23
|
@dataclass
|
|
22
24
|
class Cmd:
|
|
@@ -96,10 +98,14 @@ class Cmd:
|
|
|
96
98
|
else:
|
|
97
99
|
location = ""
|
|
98
100
|
|
|
101
|
+
# Format status with visual indicators and colors
|
|
102
|
+
status = job.get("status", "")
|
|
103
|
+
formatted_status = self._format_status_with_indicators(status)
|
|
104
|
+
|
|
99
105
|
rows.append(
|
|
100
106
|
[
|
|
101
107
|
job.get("job_id", ""),
|
|
102
|
-
|
|
108
|
+
formatted_status,
|
|
103
109
|
# job.get("progress", ""), temporarily disabled as this is a WIP feature
|
|
104
110
|
executor_info,
|
|
105
111
|
location,
|
|
@@ -108,7 +114,10 @@ class Cmd:
|
|
|
108
114
|
|
|
109
115
|
# Calculate column widths and print
|
|
110
116
|
widths = [
|
|
111
|
-
max(
|
|
117
|
+
max(
|
|
118
|
+
len(str(headers[i])),
|
|
119
|
+
max(len(self._strip_ansi_codes(str(row[i]))) for row in rows),
|
|
120
|
+
)
|
|
112
121
|
for i in range(len(headers))
|
|
113
122
|
]
|
|
114
123
|
|
|
@@ -119,4 +128,34 @@ class Cmd:
|
|
|
119
128
|
print("-" * len(header_row))
|
|
120
129
|
|
|
121
130
|
for row in rows:
|
|
122
|
-
|
|
131
|
+
# Adjust padding for ANSI color codes
|
|
132
|
+
formatted_row = []
|
|
133
|
+
for i in range(len(row)):
|
|
134
|
+
content = str(row[i])
|
|
135
|
+
visible_length = len(self._strip_ansi_codes(content))
|
|
136
|
+
padding = widths[i] - visible_length
|
|
137
|
+
formatted_row.append(content + " " * padding)
|
|
138
|
+
print(" | ".join(formatted_row))
|
|
139
|
+
|
|
140
|
+
def _format_status_with_indicators(self, status: str) -> str:
|
|
141
|
+
"""Format status with Unicode visual indicators only."""
|
|
142
|
+
# Status mapping based on ExecutionState enum
|
|
143
|
+
status_formats = {
|
|
144
|
+
ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m", # Green Unicode checkmark
|
|
145
|
+
ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m", # Red Unicode X
|
|
146
|
+
ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m", # Yellow Unicode play button
|
|
147
|
+
ExecutionState.PENDING.value: "\033[36m⏳ PENDING\033[0m", # Cyan Unicode hourglass
|
|
148
|
+
ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m", # Magenta Unicode X
|
|
149
|
+
# Additional states for error handling
|
|
150
|
+
"not_found": "\033[90m? NOT FOUND\033[0m", # Gray question mark
|
|
151
|
+
"error": "\033[31m✗ ERROR\033[0m", # Red Unicode X
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return status_formats.get(status.lower(), f"\033[90m? {status.upper()}\033[0m")
|
|
155
|
+
|
|
156
|
+
def _strip_ansi_codes(self, text: str) -> str:
|
|
157
|
+
"""Remove ANSI color codes from text for length calculation."""
|
|
158
|
+
import re
|
|
159
|
+
|
|
160
|
+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
161
|
+
return ansi_escape.sub("", text)
|
|
@@ -29,15 +29,15 @@ EXEC_DB_FILE = EXEC_DB_DIR / "exec.v1.jsonl"
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def generate_invocation_id() -> str:
|
|
32
|
-
"""Generate a unique invocation ID as an
|
|
33
|
-
return secrets.token_hex(
|
|
32
|
+
"""Generate a unique invocation ID as an 16-digit hex string."""
|
|
33
|
+
return secrets.token_hex(8)
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def generate_job_id(invocation_id: str, index: int) -> str:
|
|
37
37
|
"""Generate a job ID as <invocation_id>.<n>.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
invocation_id: The invocation group ID (
|
|
40
|
+
invocation_id: The invocation group ID (16-digit hex).
|
|
41
41
|
index: The job index (0-based integer).
|
|
42
42
|
Returns:
|
|
43
43
|
The job ID string.
|
|
@@ -50,7 +50,7 @@ class JobData:
|
|
|
50
50
|
"""Data structure for job execution information.
|
|
51
51
|
|
|
52
52
|
Attributes:
|
|
53
|
-
invocation_id:
|
|
53
|
+
invocation_id: 16-digit hex string.
|
|
54
54
|
job_id: <invocation_id>.<n> string.
|
|
55
55
|
timestamp: Unix timestamp when the job was created.
|
|
56
56
|
executor: Name of the executor that handled this job.
|
|
@@ -148,41 +148,135 @@ class ExecutionDB:
|
|
|
148
148
|
)
|
|
149
149
|
raise
|
|
150
150
|
|
|
151
|
+
def _resolve_invocation_id(self, short_id: str) -> Optional[str]:
|
|
152
|
+
"""Resolve a short invocation ID to the full one.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
short_id: Partial or full invocation ID.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Full invocation ID if found uniquely, None if not found.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: If the short_id matches multiple invocation IDs.
|
|
162
|
+
"""
|
|
163
|
+
if not short_id:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
short_id = short_id.lower()
|
|
167
|
+
|
|
168
|
+
# NOTE(agronskiy): this is a non-optimized implementation that assumes small amount
|
|
169
|
+
# of jobs in ExecDB(), a typical scenario. Speeding up would involve building a
|
|
170
|
+
# prefix tree when loading invocations/jobs.
|
|
171
|
+
matches = [
|
|
172
|
+
inv_id
|
|
173
|
+
for inv_id in self._invocations.keys()
|
|
174
|
+
if inv_id.lower().startswith(short_id)
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
if len(matches) == 1:
|
|
178
|
+
return matches[0]
|
|
179
|
+
elif len(matches) > 1:
|
|
180
|
+
raise ValueError(f"Ambiguous invocation ID '{short_id}': matches {matches}")
|
|
181
|
+
else:
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def _resolve_job_id(self, short_job_id: str) -> Optional[str]:
|
|
185
|
+
"""Resolve a short job ID to the full one.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
short_job_id: Partial or full job ID.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Full job ID if found uniquely, None if not found.
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If the short_job_id matches multiple job IDs.
|
|
195
|
+
"""
|
|
196
|
+
if not short_job_id:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
# Normalize to lowercase for case-insensitive matching
|
|
200
|
+
short_job_id = short_job_id.lower()
|
|
201
|
+
|
|
202
|
+
if "." in short_job_id:
|
|
203
|
+
parts = short_job_id.split(".", 1)
|
|
204
|
+
short_inv_id, job_index = parts[0], parts[1]
|
|
205
|
+
|
|
206
|
+
# Resolve the invocation part
|
|
207
|
+
full_inv_id = self._resolve_invocation_id(short_inv_id)
|
|
208
|
+
if full_inv_id:
|
|
209
|
+
candidate_job_id = f"{full_inv_id}.{job_index}"
|
|
210
|
+
if candidate_job_id in self._jobs:
|
|
211
|
+
return candidate_job_id
|
|
212
|
+
|
|
213
|
+
# NOTE(agronskiy): unfortunately, due to legacy, there exist usecases where
|
|
214
|
+
# job_id is the same format as invocation_id
|
|
215
|
+
candidate_job_id = self._resolve_invocation_id(short_job_id)
|
|
216
|
+
if candidate_job_id and candidate_job_id in self._jobs:
|
|
217
|
+
return candidate_job_id
|
|
218
|
+
|
|
219
|
+
return None
|
|
220
|
+
|
|
151
221
|
def get_job(self, job_id: str) -> Optional[JobData]:
|
|
152
|
-
|
|
222
|
+
"""Get job by full or partial job ID.
|
|
153
223
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return {
|
|
157
|
-
job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
|
|
158
|
-
}
|
|
224
|
+
Args:
|
|
225
|
+
job_id: Full or partial job ID.
|
|
159
226
|
|
|
160
|
-
|
|
161
|
-
|
|
227
|
+
Returns:
|
|
228
|
+
JobData if found, None otherwise.
|
|
162
229
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
230
|
+
Raises:
|
|
231
|
+
ValueError: If the job_id matches multiple jobs.
|
|
232
|
+
"""
|
|
233
|
+
resolved_id = self._resolve_job_id(job_id)
|
|
234
|
+
if resolved_id:
|
|
235
|
+
return self._jobs.get(resolved_id)
|
|
166
236
|
|
|
237
|
+
return None
|
|
167
238
|
|
|
168
|
-
def
|
|
169
|
-
|
|
170
|
-
db.write_job(job)
|
|
239
|
+
def get_jobs(self, invocation_id: str) -> Dict[str, JobData]:
|
|
240
|
+
"""Get all jobs for a full or partial invocation ID.
|
|
171
241
|
|
|
242
|
+
Args:
|
|
243
|
+
invocation_id: Full or partial invocation ID.
|
|
172
244
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
return db.get_job(job_id)
|
|
245
|
+
Returns:
|
|
246
|
+
Dictionary mapping job_id to JobData for all jobs in the invocation.
|
|
176
247
|
|
|
248
|
+
Raises:
|
|
249
|
+
ValueError: If the invocation_id matches multiple invocations.
|
|
250
|
+
"""
|
|
251
|
+
resolved_inv_id = self._resolve_invocation_id(invocation_id)
|
|
252
|
+
if not resolved_inv_id:
|
|
253
|
+
return {}
|
|
177
254
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
255
|
+
job_ids = self._invocations.get(resolved_inv_id, [])
|
|
256
|
+
return {
|
|
257
|
+
job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
def get_invocation_jobs(self, invocation_id: str) -> List[str]:
|
|
261
|
+
"""Get job IDs for a full or partial invocation ID.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
invocation_id: Full or partial invocation ID.
|
|
181
265
|
|
|
266
|
+
Returns:
|
|
267
|
+
List of job IDs for the invocation.
|
|
182
268
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
269
|
+
Raises:
|
|
270
|
+
ValueError: If the invocation_id matches multiple invocations.
|
|
271
|
+
"""
|
|
272
|
+
resolved_inv_id = self._resolve_invocation_id(invocation_id)
|
|
273
|
+
if not resolved_inv_id:
|
|
274
|
+
return []
|
|
275
|
+
return self._invocations.get(resolved_inv_id, [])
|
|
276
|
+
|
|
277
|
+
def get_all_jobs(self) -> Dict[str, JobData]:
|
|
278
|
+
"""Return a copy of all jobs in the execution DB."""
|
|
279
|
+
return dict(self._jobs)
|
|
186
280
|
|
|
187
281
|
|
|
188
282
|
# Ensure all the paths
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
# Generic server deployment configuration template
|
|
17
|
+
#
|
|
18
|
+
type: generic
|
|
19
|
+
image: ??? # Docker image to use for deployment
|
|
20
|
+
command: ??? # Command to run the server
|
|
21
|
+
|
|
22
|
+
# Server configuration
|
|
23
|
+
port: 8000
|
|
24
|
+
served_model_name: ??? # Name of the served model (used in command templates and evaluation)
|
|
25
|
+
extra_args: "" # Additional command line arguments
|
|
26
|
+
env_vars: {} # Environment variables as {name: value} dict
|
|
27
|
+
checkpoint_path: null # Path to model checkpoint
|
|
28
|
+
|
|
29
|
+
# API endpoints (customize based on your server)
|
|
30
|
+
endpoints:
|
|
31
|
+
chat: /v1/chat/completions
|
|
32
|
+
completions: /v1/completions
|
|
33
|
+
health: /health
|
|
@@ -559,7 +559,7 @@ class LeptonExecutor(BaseExecutor):
|
|
|
559
559
|
db = ExecutionDB()
|
|
560
560
|
|
|
561
561
|
# If id looks like an invocation_id (8 hex digits, no dot), get all jobs for it
|
|
562
|
-
if
|
|
562
|
+
if "." not in id:
|
|
563
563
|
return _get_statuses_for_invocation_id(id=id, db=db)
|
|
564
564
|
# Otherwise, treat as job_id
|
|
565
565
|
job_data = db.get_job(id)
|
|
@@ -185,26 +185,6 @@ class LocalExecutor(BaseExecutor):
|
|
|
185
185
|
run_all_sequentially_sh_content
|
|
186
186
|
)
|
|
187
187
|
|
|
188
|
-
# Save launched jobs metadata
|
|
189
|
-
db = ExecutionDB()
|
|
190
|
-
for job_id, task, evaluation_task in zip(
|
|
191
|
-
job_ids, cfg.evaluation.tasks, evaluation_tasks
|
|
192
|
-
):
|
|
193
|
-
db.write_job(
|
|
194
|
-
job=JobData(
|
|
195
|
-
invocation_id=invocation_id,
|
|
196
|
-
job_id=job_id,
|
|
197
|
-
timestamp=time.time(),
|
|
198
|
-
executor="local",
|
|
199
|
-
data={
|
|
200
|
-
"output_dir": str(evaluation_task["output_dir"]),
|
|
201
|
-
"container": evaluation_task["container_name"],
|
|
202
|
-
"eval_image": evaluation_task["eval_image"],
|
|
203
|
-
},
|
|
204
|
-
config=OmegaConf.to_object(cfg),
|
|
205
|
-
)
|
|
206
|
-
)
|
|
207
|
-
|
|
208
188
|
if dry_run:
|
|
209
189
|
print("\n\n=============================================\n\n")
|
|
210
190
|
print(f"DRY RUN: Scripts prepared and saved to {output_dir}")
|
|
@@ -225,6 +205,26 @@ class LocalExecutor(BaseExecutor):
|
|
|
225
205
|
print("\nTo execute, run without --dry-run")
|
|
226
206
|
return invocation_id
|
|
227
207
|
|
|
208
|
+
# Save launched jobs metadata
|
|
209
|
+
db = ExecutionDB()
|
|
210
|
+
for job_id, task, evaluation_task in zip(
|
|
211
|
+
job_ids, cfg.evaluation.tasks, evaluation_tasks
|
|
212
|
+
):
|
|
213
|
+
db.write_job(
|
|
214
|
+
job=JobData(
|
|
215
|
+
invocation_id=invocation_id,
|
|
216
|
+
job_id=job_id,
|
|
217
|
+
timestamp=time.time(),
|
|
218
|
+
executor="local",
|
|
219
|
+
data={
|
|
220
|
+
"output_dir": str(evaluation_task["output_dir"]),
|
|
221
|
+
"container": evaluation_task["container_name"],
|
|
222
|
+
"eval_image": evaluation_task["eval_image"],
|
|
223
|
+
},
|
|
224
|
+
config=OmegaConf.to_object(cfg),
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
|
|
228
228
|
# Launch bash scripts with Popen for non-blocking execution.
|
|
229
229
|
# To ensure subprocess continues after python exits:
|
|
230
230
|
# - on Unix-like systems, to fully detach the subprocess
|
|
@@ -281,8 +281,8 @@ class LocalExecutor(BaseExecutor):
|
|
|
281
281
|
"""
|
|
282
282
|
db = ExecutionDB()
|
|
283
283
|
|
|
284
|
-
# If id looks like an invocation_id (
|
|
285
|
-
if
|
|
284
|
+
# If id looks like an invocation_id (no dot), get all jobs for it
|
|
285
|
+
if "." not in id:
|
|
286
286
|
jobs = db.get_jobs(id)
|
|
287
287
|
statuses: List[ExecutionStatus] = []
|
|
288
288
|
for job_id, _ in jobs.items():
|
|
@@ -204,8 +204,8 @@ class SlurmExecutor(BaseExecutor):
|
|
|
204
204
|
"""
|
|
205
205
|
db = ExecutionDB()
|
|
206
206
|
|
|
207
|
-
# If id looks like an invocation_id (
|
|
208
|
-
if
|
|
207
|
+
# If id looks like an invocation_id (no dot), get all jobs for it
|
|
208
|
+
if "." not in id:
|
|
209
209
|
jobs = db.get_jobs(id)
|
|
210
210
|
if not jobs:
|
|
211
211
|
return []
|
|
@@ -29,6 +29,7 @@ src/nemo_evaluator_launcher/common/logging_utils.py
|
|
|
29
29
|
src/nemo_evaluator_launcher/common/mapping.py
|
|
30
30
|
src/nemo_evaluator_launcher/configs/__init__.py
|
|
31
31
|
src/nemo_evaluator_launcher/configs/default.yaml
|
|
32
|
+
src/nemo_evaluator_launcher/configs/deployment/generic.yaml
|
|
32
33
|
src/nemo_evaluator_launcher/configs/deployment/nim.yaml
|
|
33
34
|
src/nemo_evaluator_launcher/configs/deployment/none.yaml
|
|
34
35
|
src/nemo_evaluator_launcher/configs/deployment/sglang.yaml
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|