nemo-evaluator-launcher 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +28 -2
- nemo_evaluator_launcher/cli/export.py +128 -10
- nemo_evaluator_launcher/cli/run.py +22 -3
- nemo_evaluator_launcher/cli/status.py +3 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +24 -4
- nemo_evaluator_launcher/executors/lepton/executor.py +3 -5
- nemo_evaluator_launcher/executors/local/executor.py +26 -5
- nemo_evaluator_launcher/executors/slurm/executor.py +90 -26
- nemo_evaluator_launcher/exporters/base.py +9 -0
- nemo_evaluator_launcher/exporters/gsheets.py +27 -9
- nemo_evaluator_launcher/exporters/local.py +5 -0
- nemo_evaluator_launcher/exporters/mlflow.py +105 -32
- nemo_evaluator_launcher/exporters/utils.py +22 -105
- nemo_evaluator_launcher/exporters/wandb.py +117 -38
- nemo_evaluator_launcher/package_info.py +1 -1
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/METADATA +1 -1
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/RECORD +21 -21
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/entry_points.txt +0 -0
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/top_level.txt +0 -0
|
@@ -19,7 +19,7 @@ import os
|
|
|
19
19
|
import shutil
|
|
20
20
|
import tempfile
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
from typing import Any, Dict, List
|
|
22
|
+
from typing import Any, Dict, List, Optional
|
|
23
23
|
|
|
24
24
|
import yaml
|
|
25
25
|
|
|
@@ -38,6 +38,7 @@ from nemo_evaluator_launcher.exporters.registry import register_exporter
|
|
|
38
38
|
from nemo_evaluator_launcher.exporters.utils import (
|
|
39
39
|
extract_accuracy_metrics,
|
|
40
40
|
extract_exporter_config,
|
|
41
|
+
get_artifact_root,
|
|
41
42
|
get_available_artifacts,
|
|
42
43
|
get_benchmark_info,
|
|
43
44
|
get_task_name,
|
|
@@ -163,29 +164,92 @@ class WandBExporter(BaseExporter):
|
|
|
163
164
|
return {"success": False, "error": f"W&B export failed: {str(e)}"}
|
|
164
165
|
|
|
165
166
|
def _log_artifacts(
|
|
166
|
-
self,
|
|
167
|
+
self,
|
|
168
|
+
job_data: JobData,
|
|
169
|
+
wandb_config: Dict[str, Any],
|
|
170
|
+
artifact,
|
|
171
|
+
register_staging_dir=None,
|
|
167
172
|
) -> List[str]:
|
|
168
|
-
"""Log evaluation artifacts to WandB using LocalExporter for
|
|
173
|
+
"""Log evaluation artifacts to WandB using LocalExporter for staging."""
|
|
169
174
|
if not wandb_config.get("log_artifacts", True):
|
|
170
175
|
return []
|
|
171
176
|
try:
|
|
172
177
|
temp_dir = tempfile.mkdtemp(prefix="wandb_artifacts_")
|
|
173
|
-
|
|
178
|
+
if callable(register_staging_dir):
|
|
179
|
+
register_staging_dir(temp_dir)
|
|
180
|
+
local_exporter = LocalExporter(
|
|
181
|
+
{
|
|
182
|
+
"output_dir": temp_dir,
|
|
183
|
+
"copy_logs": wandb_config.get(
|
|
184
|
+
"log_logs", wandb_config.get("copy_logs", False)
|
|
185
|
+
),
|
|
186
|
+
"only_required": wandb_config.get("only_required", True),
|
|
187
|
+
"format": wandb_config.get("format"),
|
|
188
|
+
"log_metrics": wandb_config.get("log_metrics", []),
|
|
189
|
+
"output_filename": wandb_config.get("output_filename"),
|
|
190
|
+
}
|
|
191
|
+
)
|
|
174
192
|
local_result = local_exporter.export_job(job_data)
|
|
175
193
|
|
|
176
194
|
if not local_result.success:
|
|
177
195
|
logger.error(f"Failed to download artifacts: {local_result.message}")
|
|
178
196
|
return []
|
|
179
197
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
198
|
+
base_dir = Path(local_result.dest)
|
|
199
|
+
artifacts_dir = base_dir / "artifacts"
|
|
200
|
+
logs_dir = base_dir / "logs"
|
|
201
|
+
logged_names: list[str] = []
|
|
202
|
+
|
|
203
|
+
artifact_root = get_artifact_root(job_data) # "<harness>.<benchmark>"
|
|
204
|
+
|
|
205
|
+
# Add config file only when artifacts logging is enabled
|
|
206
|
+
if wandb_config.get("log_artifacts", True):
|
|
207
|
+
cfg_added = False
|
|
208
|
+
for fname in ("config.yml", "run_config.yml"):
|
|
209
|
+
p = artifacts_dir / fname
|
|
210
|
+
if p.exists():
|
|
211
|
+
artifact.add_file(str(p), name=f"{artifact_root}/{fname}")
|
|
212
|
+
logged_names.append(fname)
|
|
213
|
+
cfg_added = True
|
|
214
|
+
break
|
|
215
|
+
if not cfg_added:
|
|
216
|
+
with tempfile.NamedTemporaryFile(
|
|
217
|
+
"w", suffix=".yaml", delete=False
|
|
218
|
+
) as tmp_cfg:
|
|
219
|
+
yaml.dump(
|
|
220
|
+
job_data.config or {},
|
|
221
|
+
tmp_cfg,
|
|
222
|
+
default_flow_style=False,
|
|
223
|
+
sort_keys=False,
|
|
224
|
+
)
|
|
225
|
+
cfg_path = tmp_cfg.name
|
|
226
|
+
artifact.add_file(cfg_path, name=f"{artifact_root}/config.yaml")
|
|
227
|
+
os.unlink(cfg_path)
|
|
228
|
+
logged_names.append("config.yaml")
|
|
229
|
+
|
|
230
|
+
files_to_upload: list[Path] = []
|
|
231
|
+
if wandb_config.get("only_required", True):
|
|
232
|
+
for fname in get_available_artifacts(artifacts_dir):
|
|
233
|
+
p = artifacts_dir / fname
|
|
234
|
+
if p.exists():
|
|
235
|
+
files_to_upload.append(p)
|
|
236
|
+
else:
|
|
237
|
+
for p in artifacts_dir.iterdir():
|
|
238
|
+
if p.is_file():
|
|
239
|
+
files_to_upload.append(p)
|
|
240
|
+
|
|
241
|
+
for fpath in files_to_upload:
|
|
242
|
+
rel = fpath.relative_to(artifacts_dir).as_posix()
|
|
243
|
+
artifact.add_file(str(fpath), name=f"{artifact_root}/artifacts/{rel}")
|
|
244
|
+
logged_names.append(rel)
|
|
245
|
+
|
|
246
|
+
if wandb_config.get("log_logs", False) and logs_dir.exists():
|
|
247
|
+
for p in logs_dir.rglob("*"):
|
|
248
|
+
if p.is_file():
|
|
249
|
+
rel = p.relative_to(logs_dir).as_posix()
|
|
250
|
+
artifact.add_file(str(p), name=f"{artifact_root}/logs/{rel}")
|
|
251
|
+
logged_names.append(f"logs/{rel}")
|
|
252
|
+
|
|
189
253
|
return logged_names
|
|
190
254
|
except Exception as e:
|
|
191
255
|
logger.error(f"Error logging artifacts: {e}")
|
|
@@ -193,7 +257,7 @@ class WandBExporter(BaseExporter):
|
|
|
193
257
|
|
|
194
258
|
def _check_existing_run(
|
|
195
259
|
self, identifier: str, job_data: JobData, config: Dict[str, Any]
|
|
196
|
-
) -> tuple[bool, str]:
|
|
260
|
+
) -> tuple[bool, Optional[str]]:
|
|
197
261
|
"""Check if run exists based on webhook metadata then name patterns."""
|
|
198
262
|
try:
|
|
199
263
|
import wandb
|
|
@@ -204,7 +268,7 @@ class WandBExporter(BaseExporter):
|
|
|
204
268
|
if not (entity and project):
|
|
205
269
|
return False, None
|
|
206
270
|
|
|
207
|
-
#
|
|
271
|
+
# Check webhook metadata for run_id first
|
|
208
272
|
webhook_meta = job_data.data.get("webhook_metadata", {})
|
|
209
273
|
if (
|
|
210
274
|
webhook_meta.get("webhook_source") == "wandb"
|
|
@@ -306,6 +370,13 @@ class WandBExporter(BaseExporter):
|
|
|
306
370
|
# Initialize
|
|
307
371
|
run = wandb.init(**{k: v for k, v in run_args.items() if v is not None})
|
|
308
372
|
|
|
373
|
+
# Track staging dirs for this run
|
|
374
|
+
staging_dirs: List[str] = []
|
|
375
|
+
|
|
376
|
+
def register_staging_dir(path: str) -> None:
|
|
377
|
+
if path and os.path.isdir(path):
|
|
378
|
+
staging_dirs.append(path)
|
|
379
|
+
|
|
309
380
|
# In multi_task, aggregate lists after init (no overwrite)
|
|
310
381
|
if log_mode == "multi_task":
|
|
311
382
|
try:
|
|
@@ -339,34 +410,42 @@ class WandBExporter(BaseExporter):
|
|
|
339
410
|
"harness": harness,
|
|
340
411
|
},
|
|
341
412
|
)
|
|
342
|
-
with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as tmp_cfg:
|
|
343
|
-
yaml.dump(job_data.config or {}, tmp_cfg, default_flow_style=False)
|
|
344
|
-
cfg_path = tmp_cfg.name
|
|
345
|
-
artifact.add_file(cfg_path, name="config.yaml")
|
|
346
|
-
os.unlink(cfg_path)
|
|
347
413
|
|
|
348
|
-
logged_artifacts = self._log_artifacts(
|
|
349
|
-
|
|
414
|
+
logged_artifacts = self._log_artifacts(
|
|
415
|
+
job_data, config, artifact, register_staging_dir=register_staging_dir
|
|
416
|
+
)
|
|
350
417
|
|
|
351
|
-
# charts for each logged metric
|
|
352
418
|
try:
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
419
|
+
run.log_artifact(artifact)
|
|
420
|
+
# charts for each logged metric
|
|
421
|
+
try:
|
|
422
|
+
for k in metrics.keys():
|
|
423
|
+
run.define_metric(k, summary="last")
|
|
424
|
+
except Exception:
|
|
425
|
+
pass
|
|
357
426
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
427
|
+
# Log metrics with per-task step
|
|
428
|
+
try:
|
|
429
|
+
step_idx = int(job_data.job_id.split(".")[-1])
|
|
430
|
+
except Exception:
|
|
431
|
+
step_idx = 0
|
|
432
|
+
run.log(metrics, step=step_idx)
|
|
364
433
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
434
|
+
# metrics summary
|
|
435
|
+
try:
|
|
436
|
+
run.summary.update(metrics)
|
|
437
|
+
except Exception:
|
|
438
|
+
pass
|
|
439
|
+
finally:
|
|
440
|
+
for d in staging_dirs:
|
|
441
|
+
try:
|
|
442
|
+
shutil.rmtree(d, ignore_errors=True)
|
|
443
|
+
except Exception:
|
|
444
|
+
pass
|
|
445
|
+
try:
|
|
446
|
+
run.finish()
|
|
447
|
+
except Exception:
|
|
448
|
+
pass
|
|
370
449
|
|
|
371
450
|
return {
|
|
372
451
|
"run_id": run.id,
|
{nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/RECORD
RENAMED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
nemo_evaluator_launcher/__init__.py,sha256=2F703fttLaIyMHoVD54rptHMXt4AWnplHDrwWJ3e3PM,1930
|
|
2
|
-
nemo_evaluator_launcher/package_info.py,sha256=
|
|
2
|
+
nemo_evaluator_launcher/package_info.py,sha256=mb9aX6B6nFEfIArIwK-S6yhj2ORaUTmNybkpiR6ltDw,1586
|
|
3
3
|
nemo_evaluator_launcher/api/__init__.py,sha256=U9q_MJK2vRsFaymanhyy0nD1SNAZQZC8oY45RXPX7ac,1024
|
|
4
|
-
nemo_evaluator_launcher/api/functional.py,sha256=
|
|
4
|
+
nemo_evaluator_launcher/api/functional.py,sha256=NyALslqIaFVqZM2eZlEKwp_8huOffd-yzmOLR519cBQ,28448
|
|
5
5
|
nemo_evaluator_launcher/api/types.py,sha256=RXr_QoKdhejj1T9-HybSjd4KTxJmSv0bE0uLUFtF7Zc,3269
|
|
6
6
|
nemo_evaluator_launcher/api/utils.py,sha256=q5HArRj7PKgBfeH3bOX8q1U97yMyQQp72yRRA5JP9PE,818
|
|
7
7
|
nemo_evaluator_launcher/cli/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
|
|
8
|
-
nemo_evaluator_launcher/cli/export.py,sha256=
|
|
8
|
+
nemo_evaluator_launcher/cli/export.py,sha256=GRXxusKDq_1qjMKN6MKOIjZ8x4u5ERgXwHSAGrvsGCY,11211
|
|
9
9
|
nemo_evaluator_launcher/cli/kill.py,sha256=C-4PWmMu8mIITo92o5AHxtq_s-8Cckbp7wAlG0I_ylw,1323
|
|
10
10
|
nemo_evaluator_launcher/cli/ls_runs.py,sha256=5QGUofeKK6tNtQnUF5wJ5YBZ5TL7RRueKlPkvA1wclY,3873
|
|
11
11
|
nemo_evaluator_launcher/cli/ls_tasks.py,sha256=Pd2lBQOQBNHBWrjk4tZg0SQ9Ul9F2Ak-zOyh-G9x-DY,5293
|
|
12
12
|
nemo_evaluator_launcher/cli/main.py,sha256=Wu_AB_6LJL2tvbl-9jw6dPYQ_uvgYivlyM2E9soHvMc,6649
|
|
13
|
-
nemo_evaluator_launcher/cli/run.py,sha256=
|
|
14
|
-
nemo_evaluator_launcher/cli/status.py,sha256=
|
|
13
|
+
nemo_evaluator_launcher/cli/run.py,sha256=h5DZM3agAV7FSOLt3evPgt5kx9dX0i1cPaeqfxSP-P4,6608
|
|
14
|
+
nemo_evaluator_launcher/cli/status.py,sha256=3kv-VHMHlD2qHwgKzgmTZ5x_D1c9-rW86hBOghHk35M,6108
|
|
15
15
|
nemo_evaluator_launcher/cli/version.py,sha256=puMwIvkmfD3HESjftdTSP6T3Nc8J4cbz8uXWHJcTemY,2030
|
|
16
16
|
nemo_evaluator_launcher/common/__init__.py,sha256=6-xb4KpG8-lZbWBI42c_Gax-Sq0kMSW8UG0Vn8dOBlo,744
|
|
17
17
|
nemo_evaluator_launcher/common/execdb.py,sha256=WPzg5Iu2ojvFpBuYahSt3voP_iEUpoO8NgqMLUBwFxA,9767
|
|
@@ -33,26 +33,26 @@ nemo_evaluator_launcher/executors/__init__.py,sha256=mSU1op5r7R_vqOCLDP84z6utfFg
|
|
|
33
33
|
nemo_evaluator_launcher/executors/base.py,sha256=c_n8LasxAa_3_GbaAy-SkIK9A6HIl-T5Cp-kWZ8V2l4,2939
|
|
34
34
|
nemo_evaluator_launcher/executors/registry.py,sha256=8QXSrsJyHeNi8iSttJ8KWQLXmZve1vxnnCNw_CkeopI,1409
|
|
35
35
|
nemo_evaluator_launcher/executors/lepton/__init__.py,sha256=F_7yuBaYQ6WWTcptADdkL3AIZ_jXJQHGgKag-Hm7BbQ,698
|
|
36
|
-
nemo_evaluator_launcher/executors/lepton/deployment_helpers.py,sha256=
|
|
37
|
-
nemo_evaluator_launcher/executors/lepton/executor.py,sha256=
|
|
36
|
+
nemo_evaluator_launcher/executors/lepton/deployment_helpers.py,sha256=AAIlHHn-WifevNosug0DlSDLN6NtjkclEu5LHyu1xq8,21799
|
|
37
|
+
nemo_evaluator_launcher/executors/lepton/executor.py,sha256=HsdHhQ4C_pSG7jircGkFXlR8poAzUTttTzd25CzrDXg,37221
|
|
38
38
|
nemo_evaluator_launcher/executors/lepton/job_helpers.py,sha256=6baTxcygfP1oFgAJ7I9EL4xRlcJDWqbqzZoE1CRrwSk,13528
|
|
39
39
|
nemo_evaluator_launcher/executors/local/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
|
|
40
|
-
nemo_evaluator_launcher/executors/local/executor.py,sha256=
|
|
40
|
+
nemo_evaluator_launcher/executors/local/executor.py,sha256=5DasUn_2r3X3ICNHM4-WHcFHR0vnN1jpv267cfdwV9U,18870
|
|
41
41
|
nemo_evaluator_launcher/executors/local/run.template.sh,sha256=oD7QvDqEvd9NpS_5SZhYqIhCjVdcXI-GD98g5j-Ek4I,3344
|
|
42
42
|
nemo_evaluator_launcher/executors/slurm/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
|
|
43
|
-
nemo_evaluator_launcher/executors/slurm/executor.py,sha256=
|
|
43
|
+
nemo_evaluator_launcher/executors/slurm/executor.py,sha256=LdHG4S9gB1pZS92nBkh9U-d2r_PF7j7HDC_fP857rlQ,38363
|
|
44
44
|
nemo_evaluator_launcher/exporters/__init__.py,sha256=mBXG9FG48FeYrs8sF0zA2mgo1eqBmRgoml7zjJrqDso,1323
|
|
45
|
-
nemo_evaluator_launcher/exporters/base.py,sha256=
|
|
46
|
-
nemo_evaluator_launcher/exporters/gsheets.py,sha256=
|
|
47
|
-
nemo_evaluator_launcher/exporters/local.py,sha256=
|
|
48
|
-
nemo_evaluator_launcher/exporters/mlflow.py,sha256=
|
|
45
|
+
nemo_evaluator_launcher/exporters/base.py,sha256=0BEqS-Zjez-KsrGE9yfo8S5w2uwMW3btBZve3SiiUp0,4307
|
|
46
|
+
nemo_evaluator_launcher/exporters/gsheets.py,sha256=hBOL3vaomCW2fPMDEOQWkZkFCgF4jCoS4U5ZlsNVENs,15911
|
|
47
|
+
nemo_evaluator_launcher/exporters/local.py,sha256=oaHMyFaxihtHJUSmMdp9nlrVe8KfZ8IIq0jqDGbgS6s,19487
|
|
48
|
+
nemo_evaluator_launcher/exporters/mlflow.py,sha256=1yCgB7--lLD8an2IdVjZVfZ190oeYqx-T6bUFG5H0sI,20192
|
|
49
49
|
nemo_evaluator_launcher/exporters/registry.py,sha256=XsPTv_SBAFjcErO6BJ3OHqs3EvXQpLeyKRJuK9Ql4_M,1299
|
|
50
|
-
nemo_evaluator_launcher/exporters/utils.py,sha256=
|
|
51
|
-
nemo_evaluator_launcher/exporters/wandb.py,sha256=
|
|
50
|
+
nemo_evaluator_launcher/exporters/utils.py,sha256=5AXO5C6s93lavA2EHHfGGx6o9u_nUQ5WZtXEWxG6Hg0,20999
|
|
51
|
+
nemo_evaluator_launcher/exporters/wandb.py,sha256=FhhrVxtwE0CGiSMXSgAwF6dFD5jojA4qGFEEyKkIrX4,16531
|
|
52
52
|
nemo_evaluator_launcher/resources/mapping.toml,sha256=uOg4Y-gDXXskbbba2vuwJ5FLJ3W0kSZz7Fap_nJnFQc,11322
|
|
53
|
-
nemo_evaluator_launcher-0.1.
|
|
54
|
-
nemo_evaluator_launcher-0.1.
|
|
55
|
-
nemo_evaluator_launcher-0.1.
|
|
56
|
-
nemo_evaluator_launcher-0.1.
|
|
57
|
-
nemo_evaluator_launcher-0.1.
|
|
58
|
-
nemo_evaluator_launcher-0.1.
|
|
53
|
+
nemo_evaluator_launcher-0.1.14.dist-info/licenses/LICENSE,sha256=DyGb0fqHPZAsd_uXHA0DGcOCqsvrNsImuLC0Ts4s1zI,23413
|
|
54
|
+
nemo_evaluator_launcher-0.1.14.dist-info/METADATA,sha256=Q3OhzGfr5E4fGGvNfyLJgijXvjGFwFZrhrCQG7f8xtU,28725
|
|
55
|
+
nemo_evaluator_launcher-0.1.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
56
|
+
nemo_evaluator_launcher-0.1.14.dist-info/entry_points.txt,sha256=64z1T5GKSB9PW1fCENQuor6X6eqH1rcfg0NQGfKrEy8,130
|
|
57
|
+
nemo_evaluator_launcher-0.1.14.dist-info/top_level.txt,sha256=5PvawNm9TXKqPRjZita1xPOtFiMOipcoRf50FI1iY3s,24
|
|
58
|
+
nemo_evaluator_launcher-0.1.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nemo_evaluator_launcher-0.1.12.dist-info → nemo_evaluator_launcher-0.1.14.dist-info}/top_level.txt
RENAMED
|
File without changes
|