nemo-evaluator-launcher 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (60) hide show
  1. nemo_evaluator_launcher/__init__.py +79 -0
  2. nemo_evaluator_launcher/api/__init__.py +24 -0
  3. nemo_evaluator_launcher/api/functional.py +698 -0
  4. nemo_evaluator_launcher/api/types.py +98 -0
  5. nemo_evaluator_launcher/api/utils.py +19 -0
  6. nemo_evaluator_launcher/cli/__init__.py +15 -0
  7. nemo_evaluator_launcher/cli/export.py +267 -0
  8. nemo_evaluator_launcher/cli/info.py +512 -0
  9. nemo_evaluator_launcher/cli/kill.py +41 -0
  10. nemo_evaluator_launcher/cli/ls_runs.py +134 -0
  11. nemo_evaluator_launcher/cli/ls_tasks.py +136 -0
  12. nemo_evaluator_launcher/cli/main.py +226 -0
  13. nemo_evaluator_launcher/cli/run.py +200 -0
  14. nemo_evaluator_launcher/cli/status.py +164 -0
  15. nemo_evaluator_launcher/cli/version.py +55 -0
  16. nemo_evaluator_launcher/common/__init__.py +16 -0
  17. nemo_evaluator_launcher/common/execdb.py +283 -0
  18. nemo_evaluator_launcher/common/helpers.py +366 -0
  19. nemo_evaluator_launcher/common/logging_utils.py +357 -0
  20. nemo_evaluator_launcher/common/mapping.py +295 -0
  21. nemo_evaluator_launcher/common/printing_utils.py +93 -0
  22. nemo_evaluator_launcher/configs/__init__.py +15 -0
  23. nemo_evaluator_launcher/configs/default.yaml +28 -0
  24. nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
  25. nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
  26. nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
  27. nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
  28. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
  29. nemo_evaluator_launcher/configs/deployment/vllm.yaml +42 -0
  30. nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
  31. nemo_evaluator_launcher/configs/execution/local.yaml +19 -0
  32. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +34 -0
  33. nemo_evaluator_launcher/executors/__init__.py +22 -0
  34. nemo_evaluator_launcher/executors/base.py +120 -0
  35. nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
  36. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +609 -0
  37. nemo_evaluator_launcher/executors/lepton/executor.py +1004 -0
  38. nemo_evaluator_launcher/executors/lepton/job_helpers.py +398 -0
  39. nemo_evaluator_launcher/executors/local/__init__.py +15 -0
  40. nemo_evaluator_launcher/executors/local/executor.py +605 -0
  41. nemo_evaluator_launcher/executors/local/run.template.sh +103 -0
  42. nemo_evaluator_launcher/executors/registry.py +38 -0
  43. nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
  44. nemo_evaluator_launcher/executors/slurm/executor.py +1147 -0
  45. nemo_evaluator_launcher/exporters/__init__.py +36 -0
  46. nemo_evaluator_launcher/exporters/base.py +121 -0
  47. nemo_evaluator_launcher/exporters/gsheets.py +409 -0
  48. nemo_evaluator_launcher/exporters/local.py +502 -0
  49. nemo_evaluator_launcher/exporters/mlflow.py +619 -0
  50. nemo_evaluator_launcher/exporters/registry.py +40 -0
  51. nemo_evaluator_launcher/exporters/utils.py +624 -0
  52. nemo_evaluator_launcher/exporters/wandb.py +490 -0
  53. nemo_evaluator_launcher/package_info.py +38 -0
  54. nemo_evaluator_launcher/resources/mapping.toml +380 -0
  55. nemo_evaluator_launcher-0.1.28.dist-info/METADATA +494 -0
  56. nemo_evaluator_launcher-0.1.28.dist-info/RECORD +60 -0
  57. nemo_evaluator_launcher-0.1.28.dist-info/WHEEL +5 -0
  58. nemo_evaluator_launcher-0.1.28.dist-info/entry_points.txt +3 -0
  59. nemo_evaluator_launcher-0.1.28.dist-info/licenses/LICENSE +451 -0
  60. nemo_evaluator_launcher-0.1.28.dist-info/top_level.txt +1 -0
@@ -0,0 +1,619 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Evaluation results exporter for MLflow tracking."""
17
+
18
+ import os
19
+ import tempfile
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List
22
+
23
+ try:
24
+ import mlflow
25
+
26
+ MLFLOW_AVAILABLE = True
27
+ except ImportError:
28
+ MLFLOW_AVAILABLE = False
29
+
30
+ from nemo_evaluator_launcher.common.execdb import JobData
31
+ from nemo_evaluator_launcher.common.logging_utils import logger
32
+ from nemo_evaluator_launcher.exporters.base import BaseExporter, ExportResult
33
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
34
+ from nemo_evaluator_launcher.exporters.registry import register_exporter
35
+ from nemo_evaluator_launcher.exporters.utils import (
36
+ extract_accuracy_metrics,
37
+ extract_exporter_config,
38
+ get_artifact_root,
39
+ get_available_artifacts,
40
+ get_benchmark_info,
41
+ get_task_name,
42
+ mlflow_sanitize,
43
+ )
44
+
45
+
46
+ @register_exporter("mlflow")
47
+ class MLflowExporter(BaseExporter):
48
+ """Export accuracy metrics to MLflow tracking server."""
49
+
50
+ def supports_executor(self, executor_type: str) -> bool:
51
+ return True
52
+
53
+ def is_available(self) -> bool:
54
+ return MLFLOW_AVAILABLE
55
+
56
+ def _get_existing_run_info(
57
+ self, job_data: JobData, config: Dict[str, Any]
58
+ ) -> tuple[bool, str]:
59
+ """Check if MLflow run exists for this invocation/job."""
60
+ try:
61
+ import mlflow
62
+
63
+ tracking_uri = config.get("tracking_uri")
64
+ if not tracking_uri:
65
+ return False, None
66
+
67
+ mlflow.set_tracking_uri(tracking_uri)
68
+ experiment_name = config.get("experiment_name", "nemo-evaluator-launcher")
69
+
70
+ try:
71
+ experiment = mlflow.get_experiment_by_name(experiment_name)
72
+ if not experiment:
73
+ return False, None
74
+
75
+ # Search for runs with matching invocation_id tag
76
+ runs = mlflow.search_runs(
77
+ experiment_ids=[experiment.experiment_id],
78
+ filter_string=f"tags.invocation_id = '{job_data.invocation_id}'",
79
+ )
80
+
81
+ if not runs.empty:
82
+ existing_run = runs.iloc[0]
83
+ return True, existing_run.run_id
84
+
85
+ except Exception:
86
+ pass
87
+
88
+ return False, None
89
+ except ImportError:
90
+ return False, None
91
+
92
+ def export_job(self, job_data: JobData) -> ExportResult:
93
+ """Export job to MLflow."""
94
+ if not self.is_available():
95
+ return ExportResult(
96
+ success=False, dest="mlflow", message="mlflow package not installed"
97
+ )
98
+
99
+ try:
100
+ # Extract config using common utility
101
+ mlflow_config = extract_exporter_config(job_data, "mlflow", self.config)
102
+
103
+ # resolve tracking_uri with fallbacks
104
+ tracking_uri = mlflow_config.get("tracking_uri")
105
+ if not tracking_uri:
106
+ tracking_uri = os.getenv("MLFLOW_TRACKING_URI")
107
+ # allow env var name
108
+ if tracking_uri and "://" not in tracking_uri:
109
+ tracking_uri = os.getenv(tracking_uri, tracking_uri)
110
+
111
+ if not tracking_uri:
112
+ return ExportResult(
113
+ success=False,
114
+ dest="mlflow",
115
+ message="tracking_uri is required (set export.mlflow.tracking_uri or MLFLOW_TRACKING_URI)",
116
+ )
117
+
118
+ # Stage artifacts locally if remote_ssh (e.g., Slurm), so we can extract metrics
119
+ staged_base_dir = None
120
+ try:
121
+ paths = self.get_job_paths(job_data)
122
+ if paths.get("storage_type") == "remote_ssh":
123
+ tmp_stage = Path(tempfile.mkdtemp(prefix="mlflow_stage_"))
124
+ LocalExporter(
125
+ {
126
+ "output_dir": str(tmp_stage),
127
+ "copy_logs": mlflow_config.get(
128
+ "log_logs", False
129
+ ), # log_logs -> copy_logs
130
+ "only_required": mlflow_config.get("only_required", True),
131
+ }
132
+ ).export_job(job_data)
133
+ staged_base_dir = (
134
+ tmp_stage / job_data.invocation_id / job_data.job_id
135
+ )
136
+ except Exception as e:
137
+ logger.warning(f"Failed staging artifacts for {job_data.job_id}: {e}")
138
+
139
+ # Extract metrics (prefer staged if available)
140
+ log_metrics = mlflow_config.get("log_metrics", [])
141
+ if staged_base_dir and (staged_base_dir / "artifacts").exists():
142
+ accuracy_metrics = extract_accuracy_metrics(
143
+ job_data,
144
+ lambda _: {
145
+ "artifacts_dir": staged_base_dir / "artifacts",
146
+ "storage_type": "local_filesystem",
147
+ },
148
+ log_metrics,
149
+ )
150
+ else:
151
+ accuracy_metrics = extract_accuracy_metrics(
152
+ job_data, self.get_job_paths, log_metrics
153
+ )
154
+
155
+ if not accuracy_metrics:
156
+ return ExportResult(
157
+ success=False, dest="mlflow", message="No accuracy metrics found"
158
+ )
159
+
160
+ # Set up MLflow
161
+ tracking_uri = tracking_uri.rstrip("/")
162
+ mlflow.set_tracking_uri(tracking_uri)
163
+
164
+ # Set experiment
165
+ experiment_name = mlflow_config.get(
166
+ "experiment_name", "nemo-evaluator-launcher"
167
+ )
168
+ mlflow.set_experiment(experiment_name)
169
+
170
+ # Prepare parameters
171
+ all_params = {
172
+ "invocation_id": job_data.invocation_id,
173
+ "executor": job_data.executor,
174
+ "timestamp": str(job_data.timestamp),
175
+ }
176
+
177
+ # Add extra metadata if provided
178
+ if mlflow_config.get("extra_metadata"):
179
+ all_params.update(mlflow_config["extra_metadata"])
180
+
181
+ # Add webhook info if available
182
+ if mlflow_config.get("triggered_by_webhook"):
183
+ all_params.update(
184
+ {
185
+ "webhook_triggered": "true",
186
+ "webhook_source": mlflow_config.get("webhook_source"),
187
+ "source_artifact": mlflow_config.get("source_artifact"),
188
+ "config_source": mlflow_config.get("config_source"),
189
+ }
190
+ )
191
+
192
+ # Sanitize params
193
+ safe_params = {
194
+ mlflow_sanitize(k, "param_key"): mlflow_sanitize(v, "param_value")
195
+ for k, v in (all_params or {}).items()
196
+ if v is not None
197
+ }
198
+
199
+ # Prepare tags
200
+ tags = {}
201
+ if mlflow_config.get("tags"):
202
+ tags.update({k: v for k, v in mlflow_config["tags"].items() if v})
203
+
204
+ bench_info = get_benchmark_info(job_data)
205
+ benchmark = bench_info.get("benchmark", get_task_name(job_data))
206
+ harness = bench_info.get("harness", "unknown")
207
+
208
+ # Tag the run with invocation_id and task metadata
209
+ exec_type = (job_data.config or {}).get("execution", {}).get(
210
+ "type"
211
+ ) or job_data.executor
212
+ tags.update(
213
+ {
214
+ "invocation_id": job_data.invocation_id,
215
+ "job_id": job_data.job_id,
216
+ "task_name": benchmark,
217
+ "benchmark": benchmark,
218
+ "harness": harness,
219
+ "executor": exec_type,
220
+ }
221
+ )
222
+
223
+ # Sanitize tags
224
+ safe_tags = {
225
+ mlflow_sanitize(k, "tag_key"): mlflow_sanitize(v, "tag_value")
226
+ for k, v in (tags or {}).items()
227
+ if v is not None
228
+ }
229
+
230
+ # skip run if it already exists
231
+ exists, existing_run_id = self._get_existing_run_info(
232
+ job_data, mlflow_config
233
+ )
234
+ if exists and mlflow_config.get("skip_existing"):
235
+ return ExportResult(
236
+ success=True,
237
+ dest="mlflow",
238
+ message=f"Run already exists: {existing_run_id}, skipped",
239
+ )
240
+
241
+ # run
242
+ with mlflow.start_run() as run:
243
+ # Set tags
244
+ if safe_tags:
245
+ mlflow.set_tags(safe_tags)
246
+
247
+ # Set run name
248
+ run_name = (
249
+ mlflow_config.get("run_name")
250
+ or f"eval-{job_data.invocation_id}-{benchmark}"
251
+ )
252
+ mlflow.set_tag("mlflow.runName", mlflow_sanitize(run_name, "tag_value"))
253
+
254
+ # Set description only if provided
255
+ description = mlflow_config.get("description")
256
+ if description:
257
+ mlflow.set_tag(
258
+ "mlflow.note.content", mlflow_sanitize(description, "tag_value")
259
+ )
260
+
261
+ # Log parameters
262
+ mlflow.log_params(safe_params)
263
+
264
+ # Sanitize metric keys before logging
265
+ safe_metrics = {
266
+ mlflow_sanitize(k, "metric"): v
267
+ for k, v in (accuracy_metrics or {}).items()
268
+ }
269
+ mlflow.log_metrics(safe_metrics)
270
+
271
+ # Log artifacts
272
+ artifacts_logged = self._log_artifacts(
273
+ job_data, mlflow_config, staged_base_dir
274
+ )
275
+
276
+ # Build run URL
277
+ run_url = None
278
+ if tracking_uri.startswith(("http://", "https://")):
279
+ run_url = f"{tracking_uri}/#/experiments/{run.info.experiment_id}/runs/{run.info.run_id}"
280
+
281
+ return ExportResult(
282
+ success=True,
283
+ dest="mlflow",
284
+ message=f"Logged {len(accuracy_metrics)} metrics to MLflow",
285
+ metadata={
286
+ "run_id": run.info.run_id,
287
+ "experiment_id": run.info.experiment_id,
288
+ "tracking_uri": tracking_uri,
289
+ "run_url": run_url,
290
+ "invocation_id": job_data.invocation_id,
291
+ "metrics_logged": len(accuracy_metrics),
292
+ "params_logged": len(safe_params),
293
+ "artifacts_logged": len(artifacts_logged),
294
+ },
295
+ )
296
+
297
+ except Exception as e:
298
+ logger.error(f"MLflow export failed: {e}")
299
+ return ExportResult(
300
+ success=False, dest="mlflow", message=f"Failed: {str(e)}"
301
+ )
302
+
303
+ def _log_artifacts(
304
+ self,
305
+ job_data: JobData,
306
+ mlflow_config: Dict[str, Any],
307
+ pre_staged_dir: Path = None,
308
+ ) -> List[str]:
309
+ """Log evaluation artifacts to MLflow using LocalExporter for transfer."""
310
+
311
+ # Check if artifacts should be logged (default: True)
312
+ if not mlflow_config.get("log_artifacts", True):
313
+ return []
314
+
315
+ try:
316
+ should_cleanup = False
317
+ # Use pre-staged dir if available; otherwise stage now
318
+ if pre_staged_dir and pre_staged_dir.exists():
319
+ base_dir = pre_staged_dir
320
+ else:
321
+ temp_dir = tempfile.mkdtemp(prefix="mlflow_artifacts_")
322
+ local_exporter = LocalExporter(
323
+ {
324
+ "output_dir": str(temp_dir),
325
+ "copy_logs": mlflow_config.get(
326
+ "log_logs", mlflow_config.get("copy_logs", False)
327
+ ),
328
+ "only_required": mlflow_config.get("only_required", True),
329
+ "format": mlflow_config.get("format", None),
330
+ "log_metrics": mlflow_config.get("log_metrics", []),
331
+ "output_filename": mlflow_config.get("output_filename", None),
332
+ }
333
+ )
334
+ local_result = local_exporter.export_job(job_data)
335
+ if not local_result.success:
336
+ logger.error(
337
+ f"Failed to download artifacts: {local_result.message}"
338
+ )
339
+ return []
340
+ base_dir = Path(local_result.dest)
341
+ should_cleanup = True
342
+
343
+ artifacts_dir = base_dir / "artifacts"
344
+ logs_dir = base_dir / "logs"
345
+ logged_names: list[str] = []
346
+ artifact_path = get_artifact_root(job_data) # "<harness>.<benchmark>"
347
+
348
+ # Log config at root level (or synthesize)
349
+ cfg_logged = False
350
+ for fname in ("config.yml", "run_config.yml"):
351
+ p = artifacts_dir / fname
352
+ if p.exists():
353
+ mlflow.log_artifact(str(p))
354
+ cfg_logged = True
355
+ break
356
+ if not cfg_logged:
357
+ with tempfile.TemporaryDirectory() as tmpdir:
358
+ from yaml import dump as ydump
359
+
360
+ cfg_file = Path(tmpdir) / "config.yaml"
361
+ cfg_file.write_text(
362
+ ydump(
363
+ job_data.config or {},
364
+ default_flow_style=False,
365
+ sort_keys=False,
366
+ )
367
+ )
368
+ mlflow.log_artifact(str(cfg_file))
369
+
370
+ # Choose files to upload
371
+ files_to_upload: list[Path] = []
372
+ if mlflow_config.get("only_required", True):
373
+ for fname in get_available_artifacts(artifacts_dir):
374
+ p = artifacts_dir / fname
375
+ if p.exists():
376
+ files_to_upload.append(p)
377
+ else:
378
+ for p in artifacts_dir.iterdir(): # top-level files only
379
+ if p.is_file():
380
+ files_to_upload.append(p)
381
+
382
+ # Upload artifacts (with DEBUG per-file)
383
+ for fpath in files_to_upload:
384
+ rel = fpath.relative_to(artifacts_dir).as_posix()
385
+ parent = os.path.dirname(rel)
386
+ mlflow.log_artifact(
387
+ str(fpath),
388
+ artifact_path=f"{artifact_path}/artifacts/{parent}".rstrip("/"),
389
+ )
390
+ logged_names.append(rel)
391
+ logger.debug(f"mlflow upload artifact: {rel}")
392
+
393
+ # Optionally upload logs under "<harness.task>/logs"
394
+ if mlflow_config.get("log_logs", False) and logs_dir.exists():
395
+ for p in logs_dir.iterdir():
396
+ if p.is_file():
397
+ rel = p.name
398
+ mlflow.log_artifact(
399
+ str(p), artifact_path=f"{artifact_path}/logs"
400
+ )
401
+ logged_names.append(f"logs/{rel}")
402
+ logger.debug(f"mlflow upload log: {rel}")
403
+
404
+ logger.info(
405
+ f"MLflow upload summary: files={len(logged_names)}, only_required={mlflow_config.get('only_required', True)}, log_logs={mlflow_config.get('log_logs', False)}"
406
+ )
407
+ if should_cleanup:
408
+ import shutil
409
+
410
+ shutil.rmtree(base_dir, ignore_errors=True)
411
+
412
+ return logged_names
413
+ except Exception as e:
414
+ logger.error(f"Error logging artifacts: {e}")
415
+ return []
416
+
417
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
418
+ """Export all jobs in invocation as one MLflow run."""
419
+ if not self.is_available():
420
+ return {"success": False, "error": "mlflow package not installed"}
421
+
422
+ jobs = self.db.get_jobs(invocation_id)
423
+ if not jobs:
424
+ return {
425
+ "success": False,
426
+ "error": f"No jobs found for invocation {invocation_id}",
427
+ }
428
+
429
+ try:
430
+ # Get first job for config access
431
+ first_job = list(jobs.values())[0]
432
+
433
+ # Extract config using common utility
434
+ mlflow_config = extract_exporter_config(first_job, "mlflow", self.config)
435
+
436
+ # resolve tracking_uri with fallbacks
437
+ tracking_uri = mlflow_config.get("tracking_uri") or os.getenv(
438
+ "MLFLOW_TRACKING_URI"
439
+ )
440
+ if tracking_uri and "://" not in tracking_uri:
441
+ tracking_uri = os.getenv(tracking_uri, tracking_uri)
442
+ if not tracking_uri:
443
+ return {
444
+ "success": False,
445
+ "error": "tracking_uri is required (set export.mlflow.tracking_uri or MLFLOW_TRACKING_URI)",
446
+ }
447
+
448
+ # Collect metrics from ALL jobs
449
+ all_metrics = {}
450
+ staged_map: dict[str, Path] = {}
451
+ for job_id, job_data in jobs.items():
452
+ try:
453
+ paths = self.get_job_paths(job_data)
454
+ if paths.get("storage_type") == "remote_ssh":
455
+ tmp_stage = Path(tempfile.mkdtemp(prefix="mlflow_inv_stage_"))
456
+ LocalExporter(
457
+ {
458
+ "output_dir": str(tmp_stage),
459
+ "copy_logs": mlflow_config.get("log_logs", False),
460
+ "only_required": mlflow_config.get(
461
+ "only_required", True
462
+ ),
463
+ }
464
+ ).export_job(job_data)
465
+ staged_map[job_id] = (
466
+ tmp_stage / job_data.invocation_id / job_data.job_id
467
+ )
468
+ except Exception as e:
469
+ logger.warning(f"Staging failed for {job_id}: {e}")
470
+
471
+ for job_id, job_data in jobs.items():
472
+ log_metrics = mlflow_config.get("log_metrics", [])
473
+ if job_id in staged_map and (staged_map[job_id] / "artifacts").exists():
474
+ job_metrics = extract_accuracy_metrics(
475
+ job_data,
476
+ lambda _: {
477
+ "artifacts_dir": staged_map[job_id] / "artifacts",
478
+ "storage_type": "local_filesystem",
479
+ },
480
+ log_metrics,
481
+ )
482
+ else:
483
+ job_metrics = extract_accuracy_metrics(
484
+ job_data, self.get_job_paths, log_metrics
485
+ )
486
+ all_metrics.update(job_metrics)
487
+
488
+ if not all_metrics:
489
+ return {
490
+ "success": False,
491
+ "error": "No accuracy metrics found in any job",
492
+ }
493
+
494
+ # Set up MLflow
495
+ tracking_uri = tracking_uri.rstrip("/")
496
+ mlflow.set_tracking_uri(tracking_uri)
497
+
498
+ experiment_name = mlflow_config.get(
499
+ "experiment_name", "nemo-evaluator-launcher"
500
+ )
501
+ mlflow.set_experiment(experiment_name)
502
+
503
+ # Prepare parameters for invocation
504
+ inv_exec_type = (first_job.config or {}).get("execution", {}).get(
505
+ "type"
506
+ ) or first_job.executor
507
+ all_params = {
508
+ "invocation_id": invocation_id,
509
+ "executor": inv_exec_type,
510
+ "timestamp": str(first_job.timestamp),
511
+ "jobs_count": str(len(jobs)),
512
+ }
513
+
514
+ # Add webhook info if available
515
+ if mlflow_config.get("triggered_by_webhook"):
516
+ all_params.update(
517
+ {
518
+ "webhook_triggered": "true",
519
+ "webhook_source": mlflow_config.get("webhook_source"),
520
+ "source_artifact": mlflow_config.get("source_artifact"),
521
+ "config_source": mlflow_config.get("config_source"),
522
+ }
523
+ )
524
+
525
+ if mlflow_config.get("extra_metadata"):
526
+ all_params.update(mlflow_config["extra_metadata"])
527
+
528
+ # Prepare tags
529
+ tags = {"invocation_id": invocation_id}
530
+ if mlflow_config.get("tags"):
531
+ tags.update({k: v for k, v in mlflow_config["tags"].items() if v})
532
+
533
+ # Truncate
534
+ safe_params = {
535
+ str(k)[:250]: str(v)[:250] for k, v in all_params.items() if v
536
+ }
537
+ safe_tags = {str(k)[:250]: str(v)[:5000] for k, v in tags.items() if v}
538
+
539
+ # Check for existing run
540
+ exists, existing_run_id = self._get_existing_run_info(
541
+ first_job, mlflow_config
542
+ )
543
+ if exists and mlflow_config.get("skip_existing"):
544
+ return {
545
+ "success": True,
546
+ "invocation_id": invocation_id,
547
+ "jobs": {
548
+ job_id: {
549
+ "success": True,
550
+ "message": f"Run already exists: {existing_run_id}, skipped",
551
+ }
552
+ for job_id in jobs.keys()
553
+ },
554
+ "metadata": {"run_id": existing_run_id, "skipped": True},
555
+ }
556
+
557
+ # Create MLflow run with ALL metrics
558
+ with mlflow.start_run() as run:
559
+ # Set tags
560
+ if safe_tags:
561
+ mlflow.set_tags(safe_tags)
562
+
563
+ # Set run name
564
+ run_name = mlflow_config.get("run_name") or f"eval-{invocation_id}"
565
+ mlflow.set_tag("mlflow.runName", mlflow_sanitize(run_name, "tag_value"))
566
+
567
+ # Set description
568
+ description = mlflow_config.get("description")
569
+ if description:
570
+ mlflow.set_tag(
571
+ "mlflow.note.content", mlflow_sanitize(description, "tag_value")
572
+ )
573
+
574
+ # Log parameters
575
+ mlflow.log_params(safe_params)
576
+
577
+ # Sanitize metric keys
578
+ safe_all_metrics = {
579
+ mlflow_sanitize(k, "metric"): v
580
+ for k, v in (all_metrics or {}).items()
581
+ }
582
+ mlflow.log_metrics(safe_all_metrics)
583
+
584
+ # Log artifacts from all jobs
585
+ total_artifacts = 0
586
+ for job_id, job_data in jobs.items():
587
+ artifacts_logged = self._log_artifacts(
588
+ job_data, mlflow_config, staged_map.get(job_id)
589
+ )
590
+ total_artifacts += len(artifacts_logged)
591
+
592
+ # Build run URL
593
+ run_url = None
594
+ if tracking_uri.startswith(("http://", "https://")):
595
+ run_url = f"{tracking_uri}/#/experiments/{run.info.experiment_id}/runs/{run.info.run_id}"
596
+
597
+ return {
598
+ "success": True,
599
+ "invocation_id": invocation_id,
600
+ "jobs": {
601
+ job_id: {
602
+ "success": True,
603
+ "message": "Contributed to invocation run",
604
+ }
605
+ for job_id in jobs.keys()
606
+ },
607
+ "metadata": {
608
+ "run_id": run.info.run_id,
609
+ "experiment_id": run.info.experiment_id,
610
+ "tracking_uri": tracking_uri,
611
+ "run_url": run_url,
612
+ "metrics_logged": len(all_metrics),
613
+ "params_logged": len(safe_params),
614
+ "artifacts_logged": total_artifacts,
615
+ },
616
+ }
617
+ except Exception as e:
618
+ logger.error(f"MLflow export failed for invocation {invocation_id}: {e}")
619
+ return {"success": False, "error": f"MLflow export failed: {str(e)}"}
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from typing import Callable, Dict
17
+
18
+ from nemo_evaluator_launcher.exporters.base import BaseExporter
19
+
20
+ _EXPORTER_REGISTRY: Dict[str, BaseExporter] = {}
21
+
22
+
23
+ def register_exporter(name: str) -> Callable:
24
+ def wrapper(cls):
25
+ _EXPORTER_REGISTRY[name] = cls
26
+ return cls
27
+
28
+ return wrapper
29
+
30
+
31
+ def get_exporter(name: str) -> BaseExporter:
32
+ if name not in _EXPORTER_REGISTRY:
33
+ raise ValueError(
34
+ f"Unknown exporter: {name}. Available: {list(_EXPORTER_REGISTRY.keys())}"
35
+ )
36
+ return _EXPORTER_REGISTRY[name]
37
+
38
+
39
+ def available_exporters() -> list[str]:
40
+ return list(_EXPORTER_REGISTRY.keys())