nemo-evaluator-launcher 0.1.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (57) hide show
  1. nemo_evaluator_launcher/__init__.py +65 -0
  2. nemo_evaluator_launcher/api/__init__.py +24 -0
  3. nemo_evaluator_launcher/api/functional.py +641 -0
  4. nemo_evaluator_launcher/api/types.py +89 -0
  5. nemo_evaluator_launcher/api/utils.py +19 -0
  6. nemo_evaluator_launcher/cli/__init__.py +15 -0
  7. nemo_evaluator_launcher/cli/export.py +148 -0
  8. nemo_evaluator_launcher/cli/info.py +117 -0
  9. nemo_evaluator_launcher/cli/kill.py +39 -0
  10. nemo_evaluator_launcher/cli/ls_runs.py +113 -0
  11. nemo_evaluator_launcher/cli/ls_tasks.py +34 -0
  12. nemo_evaluator_launcher/cli/main.py +136 -0
  13. nemo_evaluator_launcher/cli/run.py +135 -0
  14. nemo_evaluator_launcher/cli/status.py +118 -0
  15. nemo_evaluator_launcher/cli/version.py +52 -0
  16. nemo_evaluator_launcher/common/__init__.py +16 -0
  17. nemo_evaluator_launcher/common/execdb.py +189 -0
  18. nemo_evaluator_launcher/common/helpers.py +157 -0
  19. nemo_evaluator_launcher/common/logging_utils.py +349 -0
  20. nemo_evaluator_launcher/common/mapping.py +310 -0
  21. nemo_evaluator_launcher/configs/__init__.py +15 -0
  22. nemo_evaluator_launcher/configs/default.yaml +28 -0
  23. nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
  24. nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
  25. nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
  26. nemo_evaluator_launcher/configs/deployment/vllm.yaml +41 -0
  27. nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
  28. nemo_evaluator_launcher/configs/execution/local.yaml +17 -0
  29. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +33 -0
  30. nemo_evaluator_launcher/executors/__init__.py +22 -0
  31. nemo_evaluator_launcher/executors/base.py +97 -0
  32. nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
  33. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +589 -0
  34. nemo_evaluator_launcher/executors/lepton/executor.py +905 -0
  35. nemo_evaluator_launcher/executors/lepton/job_helpers.py +394 -0
  36. nemo_evaluator_launcher/executors/local/__init__.py +15 -0
  37. nemo_evaluator_launcher/executors/local/executor.py +491 -0
  38. nemo_evaluator_launcher/executors/local/run.template.sh +88 -0
  39. nemo_evaluator_launcher/executors/registry.py +38 -0
  40. nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
  41. nemo_evaluator_launcher/executors/slurm/executor.py +982 -0
  42. nemo_evaluator_launcher/exporters/__init__.py +36 -0
  43. nemo_evaluator_launcher/exporters/base.py +112 -0
  44. nemo_evaluator_launcher/exporters/gsheets.py +391 -0
  45. nemo_evaluator_launcher/exporters/local.py +488 -0
  46. nemo_evaluator_launcher/exporters/mlflow.py +448 -0
  47. nemo_evaluator_launcher/exporters/registry.py +40 -0
  48. nemo_evaluator_launcher/exporters/utils.py +669 -0
  49. nemo_evaluator_launcher/exporters/wandb.py +376 -0
  50. nemo_evaluator_launcher/package_info.py +35 -0
  51. nemo_evaluator_launcher/resources/mapping.toml +344 -0
  52. nemo_evaluator_launcher-0.1.0rc2.dist-info/METADATA +35 -0
  53. nemo_evaluator_launcher-0.1.0rc2.dist-info/RECORD +57 -0
  54. nemo_evaluator_launcher-0.1.0rc2.dist-info/WHEEL +5 -0
  55. nemo_evaluator_launcher-0.1.0rc2.dist-info/entry_points.txt +3 -0
  56. nemo_evaluator_launcher-0.1.0rc2.dist-info/licenses/LICENSE +451 -0
  57. nemo_evaluator_launcher-0.1.0rc2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,488 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Export evaluation artifacts to local filesystem."""
17
+
18
+ import csv
19
+ import json
20
+ import shutil
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List
24
+
25
+ from nemo_evaluator_launcher.common.execdb import JobData
26
+ from nemo_evaluator_launcher.common.logging_utils import logger
27
+ from nemo_evaluator_launcher.exporters.base import BaseExporter, ExportResult
28
+ from nemo_evaluator_launcher.exporters.registry import register_exporter
29
+ from nemo_evaluator_launcher.exporters.utils import (
30
+ download_gitlab_artifacts,
31
+ extract_accuracy_metrics,
32
+ extract_exporter_config,
33
+ get_benchmark_info,
34
+ get_container_from_mapping,
35
+ get_model_name,
36
+ get_relevant_artifacts,
37
+ get_task_name,
38
+ ssh_cleanup_masters,
39
+ ssh_download_artifacts,
40
+ ssh_setup_masters,
41
+ validate_artifacts,
42
+ )
43
+
44
+
45
+ @register_exporter("local")
46
+ class LocalExporter(BaseExporter):
47
+ """Export all artifacts to local/remote filesystem with optional JSON/CSV summaries.
48
+
49
+ Config keys:
50
+ output_dir (str): Output directory for exported results (default: "./nemo-evaluator-launcher-results")
51
+ copy_logs (bool): Whether to copy logs with artifacts (default: False)
52
+ only_required (bool): Copy only required+optional artifacts (default: True)
53
+ format (str or None): Summary format, one of None, "json", or "csv" (default: None; no summary, only original artifacts)
54
+ log_metrics (list[str]): Filters for metric names; includes full metric name or substring pattern
55
+ output_filename (str): Overrides default processed_results.json/csv filename
56
+ """
57
+
58
+ def supports_executor(self, executor_type: str) -> bool:
59
+ return True # Local export compatible with all executors
60
+
61
+ def export_job(self, job_data: JobData) -> ExportResult:
62
+ """Export job artifacts to local directory."""
63
+ # Merge auto-export + CLI config
64
+ cfg = extract_exporter_config(job_data, "local", self.config)
65
+
66
+ output_dir = Path(cfg.get("output_dir", "./nemo-evaluator-launcher-results"))
67
+ job_export_dir = output_dir / job_data.invocation_id / job_data.job_id
68
+ job_export_dir.mkdir(parents=True, exist_ok=True)
69
+
70
+ try:
71
+ paths = self.get_job_paths(job_data)
72
+ exported_files: List[str] = []
73
+
74
+ # Stage artifacts per storage type
75
+ if paths["storage_type"] == "local_filesystem":
76
+ exported_files = self._copy_local_artifacts(paths, job_export_dir, cfg)
77
+ elif paths["storage_type"] == "remote_ssh":
78
+ exported_files = ssh_download_artifacts(
79
+ paths, job_export_dir, cfg, None
80
+ )
81
+ elif paths["storage_type"] == "gitlab_ci_local":
82
+ exported_files = self._copy_local_artifacts(paths, job_export_dir, cfg)
83
+ elif paths["storage_type"] == "gitlab_remote":
84
+ raise NotImplementedError("Unsupported storage type")
85
+ # exported_files = self._download_gitlab_remote_artifacts(
86
+ # paths, job_export_dir
87
+ # )
88
+ else:
89
+ raise ValueError(
90
+ f"Cannot export from storage type: {paths['storage_type']}"
91
+ )
92
+
93
+ # Validate artifacts
94
+ artifacts_dir = job_export_dir / "artifacts"
95
+ validation = validate_artifacts(artifacts_dir)
96
+
97
+ # Save metadata
98
+ self._save_job_metadata(job_data, job_export_dir)
99
+ exported_files.append(str(job_export_dir / "job_metadata.json"))
100
+
101
+ if not validation["can_export"]:
102
+ return ExportResult(
103
+ success=False,
104
+ dest=str(job_export_dir),
105
+ message=validation["message"],
106
+ metadata=validation,
107
+ )
108
+
109
+ if validation["missing_optional"]:
110
+ logger.info(
111
+ f"Exporting without optional artifacts: {', '.join(validation['missing_optional'])}",
112
+ job_id=job_data.job_id,
113
+ )
114
+
115
+ # Optional summary (JSON/CSV) at invocation level
116
+ msg = f"Exported {len(exported_files)} files. {validation['message']}"
117
+ meta: Dict[str, Any] = {"files_count": len(exported_files)}
118
+ fmt = cfg.get("format")
119
+ if fmt in ["json", "csv"]:
120
+ try:
121
+ summary_path = self._write_summary(job_data, cfg)
122
+ meta["summary_path"] = str(summary_path)
123
+ msg += f". Summary: {summary_path.name}"
124
+ except Exception as e:
125
+ logger.warning(f"Failed to create {fmt} summary: {e}")
126
+ msg += " (summary failed)"
127
+
128
+ return ExportResult(
129
+ success=True, dest=str(job_export_dir), message=msg, metadata=meta
130
+ )
131
+
132
+ except Exception as e:
133
+ logger.error(f"Failed to export job {job_data.job_id}: {e}")
134
+ return ExportResult(
135
+ success=False,
136
+ dest=str(job_export_dir),
137
+ message=f"Export failed: {str(e)}",
138
+ metadata={},
139
+ )
140
+
141
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
142
+ """Export all jobs in an invocation (with connection reuse)."""
143
+ jobs = self.db.get_jobs(invocation_id)
144
+ if not jobs:
145
+ return {
146
+ "success": False,
147
+ "error": f"No jobs found for invocation {invocation_id}",
148
+ }
149
+
150
+ control_paths = ssh_setup_masters(jobs)
151
+ try:
152
+ results = {}
153
+ for job_id, job_data in jobs.items():
154
+ result = self.export_job(job_data)
155
+ results[job_id] = result.__dict__
156
+ return {"success": True, "invocation_id": invocation_id, "jobs": results}
157
+ finally:
158
+ ssh_cleanup_masters(control_paths)
159
+
160
+ def export_multiple_invocations(self, invocation_ids: List[str]) -> Dict[str, Any]:
161
+ db_jobs: Dict[str, JobData] = {}
162
+ results: Dict[str, Any] = {}
163
+ for inv in invocation_ids:
164
+ jobs = self.db.get_jobs(inv)
165
+ if jobs:
166
+ db_jobs.update(jobs)
167
+ results[inv] = {"success": True, "job_count": len(jobs)}
168
+ else:
169
+ results[inv] = {"success": False, "error": f"No jobs found for {inv}"}
170
+ if not db_jobs:
171
+ return {
172
+ "success": False,
173
+ "error": "No jobs to export",
174
+ "invocations": results,
175
+ }
176
+
177
+ # Reuse SSH masters across all jobs/hosts and stage artifacts locally
178
+ cp = ssh_setup_masters(db_jobs)
179
+ try:
180
+ first = next(iter(db_jobs.values()))
181
+ cfg = extract_exporter_config(first, "local", self.config)
182
+ fmt = cfg.get("format")
183
+ output_dir = Path(
184
+ cfg.get("output_dir", "./nemo-evaluator-launcher-results")
185
+ )
186
+ filename = cfg.get("output_filename", f"processed_results.{fmt}")
187
+ out_path = output_dir / filename # consolidated file at output_dir
188
+
189
+ # Stage artifacts for all jobs into <output_dir>/<inv>/<job>/
190
+ for jd in db_jobs.values():
191
+ try:
192
+ self.export_job(jd)
193
+ except Exception:
194
+ pass # keep going; remaining jobs may still contribute
195
+
196
+ # Build consolidated summary from staged artifacts
197
+ all_metrics, jobs_list = {}, []
198
+ for jd in db_jobs.values():
199
+ artifacts_dir = output_dir / jd.invocation_id / jd.job_id / "artifacts"
200
+ metrics = extract_accuracy_metrics(
201
+ jd,
202
+ lambda _: {
203
+ "artifacts_dir": artifacts_dir,
204
+ "storage_type": "local_filesystem",
205
+ },
206
+ cfg.get("log_metrics", []),
207
+ )
208
+ all_metrics[jd.job_id] = metrics
209
+ jobs_list.append(jd)
210
+
211
+ if fmt == "json":
212
+ if out_path.exists():
213
+ data = json.loads(out_path.read_text(encoding="utf-8"))
214
+ else:
215
+ data = {
216
+ "export_timestamp": datetime.now().isoformat(),
217
+ "benchmarks": {},
218
+ }
219
+ for jd in jobs_list:
220
+ bench, model, entry = self._build_entry(
221
+ jd, all_metrics.get(jd.job_id, {}), cfg
222
+ )
223
+ m = (
224
+ data.setdefault("benchmarks", {})
225
+ .setdefault(bench, {})
226
+ .setdefault("models", {})
227
+ )
228
+ lst = m.setdefault(model, [])
229
+ idx = next(
230
+ (
231
+ i
232
+ for i, e in enumerate(lst)
233
+ if e.get("invocation_id") == jd.invocation_id
234
+ and e.get("job_id") == jd.job_id
235
+ ),
236
+ None,
237
+ )
238
+ if idx is None:
239
+ lst.append(entry)
240
+ else:
241
+ lst[idx] = entry
242
+ data["export_timestamp"] = datetime.now().isoformat()
243
+ out_path.parent.mkdir(parents=True, exist_ok=True)
244
+ out_path.write_text(
245
+ json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
246
+ )
247
+ elif fmt == "csv":
248
+ for jd in jobs_list:
249
+ self._csv_upsert(out_path, jd, all_metrics.get(jd.job_id, {}), cfg)
250
+
251
+ return {
252
+ "success": True,
253
+ "invocations": results,
254
+ "metadata": {
255
+ "total_invocations": len(invocation_ids),
256
+ "total_jobs": len(db_jobs),
257
+ "summary_path": str(out_path.resolve()),
258
+ },
259
+ }
260
+ finally:
261
+ ssh_cleanup_masters(cp)
262
+
263
+ # Artifact staging helpers
264
+ def _copy_local_artifacts(
265
+ self, paths: Dict[str, Any], export_dir: Path, cfg: Dict[str, Any]
266
+ ) -> List[str]:
267
+ exported_files: List[str] = []
268
+ copy_logs = bool(cfg.get("copy_logs", False))
269
+ only_required = bool(cfg.get("only_required", True))
270
+
271
+ # artifacts/
272
+ if paths["artifacts_dir"].exists():
273
+ if only_required:
274
+ names = [
275
+ a
276
+ for a in get_relevant_artifacts()
277
+ if (paths["artifacts_dir"] / a).exists()
278
+ ]
279
+ (export_dir / "artifacts").mkdir(parents=True, exist_ok=True)
280
+ for name in names:
281
+ src = paths["artifacts_dir"] / name
282
+ dst = export_dir / "artifacts" / name
283
+ shutil.copy2(src, dst)
284
+ exported_files.append(str(dst))
285
+ else:
286
+ # Copy everything under artifacts/ recursively
287
+ shutil.copytree(
288
+ paths["artifacts_dir"], export_dir / "artifacts", dirs_exist_ok=True
289
+ )
290
+ exported_files.extend(
291
+ [
292
+ str(f)
293
+ for f in (export_dir / "artifacts").rglob("*")
294
+ if f.is_file()
295
+ ]
296
+ )
297
+
298
+ # logs/
299
+ # If only_required is False → always copy logs; otherwise respect copy_logs
300
+ if (not only_required or copy_logs) and paths["logs_dir"].exists():
301
+ shutil.copytree(paths["logs_dir"], export_dir / "logs", dirs_exist_ok=True)
302
+ exported_files.extend(
303
+ [str(f) for f in (export_dir / "logs").rglob("*") if f.is_file()]
304
+ )
305
+
306
+ return exported_files
307
+
308
+ def _download_gitlab_remote_artifacts(
309
+ self, paths: Dict[str, Any], export_dir: Path
310
+ ) -> List[str]:
311
+ artifacts = download_gitlab_artifacts(paths, export_dir, extract_specific=True)
312
+ return [str(p) for p in artifacts.values()]
313
+
314
+ def _save_job_metadata(self, job_data: JobData, export_dir: Path):
315
+ metadata = {
316
+ "invocation_id": job_data.invocation_id,
317
+ "job_id": job_data.job_id,
318
+ "executor": job_data.executor,
319
+ "timestamp": job_data.timestamp,
320
+ }
321
+ (export_dir / "job_metadata.json").write_text(
322
+ json.dumps(metadata, indent=2, default=str)
323
+ )
324
+
325
+ # Summary JSON/CSV helpers
326
+ def _write_summary(self, job_data: JobData, cfg: Dict[str, Any]) -> Path:
327
+ """Read per-job artifacts, extract metrics, and update invocation-level summary."""
328
+ output_dir = Path(cfg.get("output_dir", "./nemo-evaluator-launcher-results"))
329
+ artifacts_dir = (
330
+ output_dir / job_data.invocation_id / job_data.job_id / "artifacts"
331
+ )
332
+ fmt = cfg.get("format")
333
+ filename = cfg.get("output_filename", f"processed_results.{fmt}")
334
+ out_path = output_dir / job_data.invocation_id / filename
335
+
336
+ # Extract metrics
337
+ metrics = extract_accuracy_metrics(
338
+ job_data,
339
+ lambda jd: {
340
+ "artifacts_dir": artifacts_dir,
341
+ "storage_type": "local_filesystem",
342
+ },
343
+ cfg.get("log_metrics", []),
344
+ )
345
+
346
+ if fmt == "json":
347
+ self._json_upsert(out_path, job_data, metrics, cfg)
348
+ elif fmt == "csv":
349
+ self._csv_upsert(out_path, job_data, metrics, cfg)
350
+ return out_path.resolve()
351
+
352
+ def _build_entry(
353
+ self, job_data: JobData, metrics: Dict[str, float], cfg: Dict[str, Any]
354
+ ) -> tuple[str, str, dict]:
355
+ bench = get_benchmark_info(job_data)
356
+ benchmark_name = bench["benchmark"]
357
+ model_name = get_model_name(job_data, cfg)
358
+ entry = {
359
+ "invocation_id": job_data.invocation_id,
360
+ "job_id": job_data.job_id,
361
+ "harness": bench.get("harness", "unknown"),
362
+ "container": get_container_from_mapping(job_data),
363
+ "scores": metrics,
364
+ "timestamp": datetime.now().isoformat(),
365
+ "executor": job_data.executor,
366
+ }
367
+ return benchmark_name, model_name, entry
368
+
369
+ def _json_upsert(
370
+ self,
371
+ out_path: Path,
372
+ job_data: JobData,
373
+ metrics: Dict[str, float],
374
+ cfg: Dict[str, Any],
375
+ ) -> None:
376
+ if out_path.exists():
377
+ data = json.loads(out_path.read_text(encoding="utf-8"))
378
+ else:
379
+ data = {"export_timestamp": datetime.now().isoformat(), "benchmarks": {}}
380
+
381
+ benchmark_name, model_name, entry = self._build_entry(job_data, metrics, cfg)
382
+ bench = data.setdefault("benchmarks", {}).setdefault(benchmark_name, {})
383
+ models = bench.setdefault("models", {})
384
+
385
+ # Switch to list semantics
386
+ lst = models.setdefault(model_name, [])
387
+ # Upsert by unique combination
388
+ idx = next(
389
+ (
390
+ i
391
+ for i, e in enumerate(lst)
392
+ if e.get("invocation_id") == job_data.invocation_id
393
+ and e.get("job_id") == job_data.job_id
394
+ ),
395
+ None,
396
+ )
397
+ if idx is None:
398
+ lst.append(entry) # append
399
+ else:
400
+ lst[idx] = entry # override
401
+
402
+ data["export_timestamp"] = datetime.now().isoformat()
403
+ out_path.parent.mkdir(parents=True, exist_ok=True)
404
+ out_path.write_text(
405
+ json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
406
+ )
407
+
408
+ def _csv_upsert(
409
+ self,
410
+ out_path: Path,
411
+ job_data: JobData,
412
+ metrics: Dict[str, float],
413
+ cfg: Dict[str, Any],
414
+ ) -> None:
415
+ base_cols = [
416
+ "Model Name",
417
+ "Harness",
418
+ "Task Name",
419
+ "Executor",
420
+ "Container",
421
+ "Invocation ID",
422
+ "Job ID",
423
+ ]
424
+ rows, headers = [], []
425
+ if out_path.exists():
426
+ with out_path.open("r", newline="", encoding="utf-8") as f:
427
+ r = csv.reader(f)
428
+ headers = next(r, [])
429
+ rows = list(r)
430
+ else:
431
+ headers = base_cols.copy()
432
+
433
+ # Build metric names by stripping <benchmark>_ prefix from keys
434
+ benchmark, model_name, entry = self._build_entry(job_data, metrics, cfg)
435
+
436
+ # clean headers using bare benchmark
437
+ task_prefix = benchmark # no harness prefix
438
+ clean_metrics = []
439
+ for full_key in metrics.keys():
440
+ if full_key.startswith(f"{task_prefix}_"):
441
+ clean_metrics.append(full_key[len(task_prefix) + 1 :])
442
+ else:
443
+ clean_metrics.append(full_key)
444
+
445
+ # Extend headers if new metrics appear
446
+ metric_cols_existing = [h for h in headers if h not in base_cols]
447
+ new_metric_cols = [
448
+ m for m in sorted(set(clean_metrics)) if m not in metric_cols_existing
449
+ ]
450
+ if new_metric_cols:
451
+ headers = headers + new_metric_cols
452
+ for row in rows:
453
+ row.extend([""] * len(new_metric_cols))
454
+
455
+ # Build row for this job (upsert keyed by invocation_id + job_id)
456
+ bench = get_benchmark_info(job_data)
457
+ task_name = get_task_name(job_data)
458
+ row = [
459
+ model_name,
460
+ bench.get("harness", "unknown"),
461
+ task_name,
462
+ job_data.executor,
463
+ get_container_from_mapping(job_data),
464
+ job_data.invocation_id,
465
+ job_data.job_id,
466
+ ]
467
+ # Fill metric columns from <benchmark>_<...>
468
+ for col in headers[len(base_cols) :]:
469
+ full_key = f"{task_prefix}_{col}"
470
+ val = metrics.get(full_key, "")
471
+ try:
472
+ row.append("" if val == "" else float(val))
473
+ except Exception:
474
+ row.append(val)
475
+
476
+ # Upsert row
477
+ idx_by_key = {(r[5], r[6]): i for i, r in enumerate(rows) if len(r) >= 7}
478
+ key = (job_data.invocation_id, job_data.job_id)
479
+ if key in idx_by_key:
480
+ rows[idx_by_key[key]] = row
481
+ else:
482
+ rows.append(row)
483
+
484
+ out_path.parent.mkdir(parents=True, exist_ok=True)
485
+ with out_path.open("w", newline="", encoding="utf-8") as f:
486
+ w = csv.writer(f)
487
+ w.writerow(headers)
488
+ w.writerows(rows)