nemo-evaluator-launcher 0.1.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (57) hide show
  1. nemo_evaluator_launcher/__init__.py +65 -0
  2. nemo_evaluator_launcher/api/__init__.py +24 -0
  3. nemo_evaluator_launcher/api/functional.py +641 -0
  4. nemo_evaluator_launcher/api/types.py +89 -0
  5. nemo_evaluator_launcher/api/utils.py +19 -0
  6. nemo_evaluator_launcher/cli/__init__.py +15 -0
  7. nemo_evaluator_launcher/cli/export.py +148 -0
  8. nemo_evaluator_launcher/cli/info.py +117 -0
  9. nemo_evaluator_launcher/cli/kill.py +39 -0
  10. nemo_evaluator_launcher/cli/ls_runs.py +113 -0
  11. nemo_evaluator_launcher/cli/ls_tasks.py +34 -0
  12. nemo_evaluator_launcher/cli/main.py +136 -0
  13. nemo_evaluator_launcher/cli/run.py +135 -0
  14. nemo_evaluator_launcher/cli/status.py +118 -0
  15. nemo_evaluator_launcher/cli/version.py +52 -0
  16. nemo_evaluator_launcher/common/__init__.py +16 -0
  17. nemo_evaluator_launcher/common/execdb.py +189 -0
  18. nemo_evaluator_launcher/common/helpers.py +157 -0
  19. nemo_evaluator_launcher/common/logging_utils.py +349 -0
  20. nemo_evaluator_launcher/common/mapping.py +310 -0
  21. nemo_evaluator_launcher/configs/__init__.py +15 -0
  22. nemo_evaluator_launcher/configs/default.yaml +28 -0
  23. nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
  24. nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
  25. nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
  26. nemo_evaluator_launcher/configs/deployment/vllm.yaml +41 -0
  27. nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
  28. nemo_evaluator_launcher/configs/execution/local.yaml +17 -0
  29. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +33 -0
  30. nemo_evaluator_launcher/executors/__init__.py +22 -0
  31. nemo_evaluator_launcher/executors/base.py +97 -0
  32. nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
  33. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +589 -0
  34. nemo_evaluator_launcher/executors/lepton/executor.py +905 -0
  35. nemo_evaluator_launcher/executors/lepton/job_helpers.py +394 -0
  36. nemo_evaluator_launcher/executors/local/__init__.py +15 -0
  37. nemo_evaluator_launcher/executors/local/executor.py +491 -0
  38. nemo_evaluator_launcher/executors/local/run.template.sh +88 -0
  39. nemo_evaluator_launcher/executors/registry.py +38 -0
  40. nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
  41. nemo_evaluator_launcher/executors/slurm/executor.py +982 -0
  42. nemo_evaluator_launcher/exporters/__init__.py +36 -0
  43. nemo_evaluator_launcher/exporters/base.py +112 -0
  44. nemo_evaluator_launcher/exporters/gsheets.py +391 -0
  45. nemo_evaluator_launcher/exporters/local.py +488 -0
  46. nemo_evaluator_launcher/exporters/mlflow.py +448 -0
  47. nemo_evaluator_launcher/exporters/registry.py +40 -0
  48. nemo_evaluator_launcher/exporters/utils.py +669 -0
  49. nemo_evaluator_launcher/exporters/wandb.py +376 -0
  50. nemo_evaluator_launcher/package_info.py +35 -0
  51. nemo_evaluator_launcher/resources/mapping.toml +344 -0
  52. nemo_evaluator_launcher-0.1.0rc2.dist-info/METADATA +35 -0
  53. nemo_evaluator_launcher-0.1.0rc2.dist-info/RECORD +57 -0
  54. nemo_evaluator_launcher-0.1.0rc2.dist-info/WHEEL +5 -0
  55. nemo_evaluator_launcher-0.1.0rc2.dist-info/entry_points.txt +3 -0
  56. nemo_evaluator_launcher-0.1.0rc2.dist-info/licenses/LICENSE +451 -0
  57. nemo_evaluator_launcher-0.1.0rc2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,448 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Evaluation results exporter for MLflow tracking."""
17
+
18
+ import shutil
19
+ import tempfile
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List
22
+
23
+ import yaml
24
+
25
+ try:
26
+ import mlflow
27
+
28
+ MLFLOW_AVAILABLE = True
29
+ except ImportError:
30
+ MLFLOW_AVAILABLE = False
31
+
32
+ from nemo_evaluator_launcher.common.execdb import JobData
33
+ from nemo_evaluator_launcher.common.logging_utils import logger
34
+ from nemo_evaluator_launcher.exporters.base import BaseExporter, ExportResult
35
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
36
+ from nemo_evaluator_launcher.exporters.registry import register_exporter
37
+ from nemo_evaluator_launcher.exporters.utils import (
38
+ extract_accuracy_metrics,
39
+ extract_exporter_config,
40
+ get_available_artifacts,
41
+ get_benchmark_info,
42
+ get_task_name,
43
+ )
44
+
45
+
46
+ @register_exporter("mlflow")
47
+ class MLflowExporter(BaseExporter):
48
+ """Export accuracy metrics to MLflow tracking server."""
49
+
50
+ def supports_executor(self, executor_type: str) -> bool:
51
+ return True
52
+
53
+ def is_available(self) -> bool:
54
+ return MLFLOW_AVAILABLE
55
+
56
+ def _get_existing_run_info(
57
+ self, job_data: JobData, config: Dict[str, Any]
58
+ ) -> tuple[bool, str]:
59
+ """Check if MLflow run exists for this invocation/job."""
60
+ try:
61
+ import mlflow
62
+
63
+ tracking_uri = config.get("tracking_uri")
64
+ if not tracking_uri:
65
+ return False, None
66
+
67
+ mlflow.set_tracking_uri(tracking_uri)
68
+ experiment_name = config.get("experiment_name", "nemo-evaluator-launcher")
69
+
70
+ try:
71
+ experiment = mlflow.get_experiment_by_name(experiment_name)
72
+ if not experiment:
73
+ return False, None
74
+
75
+ # Search for runs with matching invocation_id tag
76
+ runs = mlflow.search_runs(
77
+ experiment_ids=[experiment.experiment_id],
78
+ filter_string=f"tags.invocation_id = '{job_data.invocation_id}'",
79
+ )
80
+
81
+ if not runs.empty:
82
+ existing_run = runs.iloc[0]
83
+ return True, existing_run.run_id
84
+
85
+ except Exception:
86
+ pass
87
+
88
+ return False, None
89
+ except ImportError:
90
+ return False, None
91
+
92
+ def export_job(self, job_data: JobData) -> ExportResult:
93
+ """Export job to MLflow."""
94
+ if not self.is_available():
95
+ return ExportResult(
96
+ success=False, dest="mlflow", message="mlflow package not installed"
97
+ )
98
+
99
+ try:
100
+ # Extract config using common utility
101
+ mlflow_config = extract_exporter_config(job_data, "mlflow", self.config)
102
+
103
+ # Extract metrics
104
+ log_metrics = mlflow_config.get("log_metrics", [])
105
+ accuracy_metrics = extract_accuracy_metrics(
106
+ job_data, self.get_job_paths, log_metrics
107
+ )
108
+
109
+ if not accuracy_metrics:
110
+ return ExportResult(
111
+ success=False, dest="mlflow", message="No accuracy metrics found"
112
+ )
113
+
114
+ # Set up MLflow
115
+ tracking_uri = mlflow_config.get("tracking_uri")
116
+ if not tracking_uri:
117
+ return ExportResult(
118
+ success=False, dest="mlflow", message="tracking_uri is required"
119
+ )
120
+
121
+ tracking_uri = tracking_uri.rstrip("/")
122
+ mlflow.set_tracking_uri(tracking_uri)
123
+
124
+ # Set experiment
125
+ experiment_name = mlflow_config.get(
126
+ "experiment_name", "nemo-evaluator-launcher"
127
+ )
128
+ mlflow.set_experiment(experiment_name)
129
+
130
+ # Prepare parameters
131
+ all_params = {
132
+ "invocation_id": job_data.invocation_id,
133
+ "executor": job_data.executor,
134
+ "timestamp": str(job_data.timestamp),
135
+ }
136
+
137
+ # Add extra metadata if provided
138
+ if mlflow_config.get("extra_metadata"):
139
+ all_params.update(mlflow_config["extra_metadata"])
140
+
141
+ # Add webhook info if available
142
+ if mlflow_config.get("triggered_by_webhook"):
143
+ all_params.update(
144
+ {
145
+ "webhook_triggered": "true",
146
+ "webhook_source": mlflow_config.get("webhook_source"),
147
+ "source_artifact": mlflow_config.get("source_artifact"),
148
+ "config_source": mlflow_config.get("config_source"),
149
+ }
150
+ )
151
+
152
+ # Truncate params
153
+ safe_params = {
154
+ str(k)[:250]: str(v)[:250] for k, v in all_params.items() if v
155
+ }
156
+ # Prepare tags
157
+ tags = {}
158
+ if mlflow_config.get("tags"):
159
+ tags.update({k: v for k, v in mlflow_config["tags"].items() if v})
160
+
161
+ bench_info = get_benchmark_info(job_data)
162
+ benchmark = bench_info.get("benchmark", get_task_name(job_data))
163
+ harness = bench_info.get("harness", "unknown")
164
+
165
+ # Tag the run with invocation_id and task metadata (task_name is benchmark-only)
166
+ tags.update(
167
+ {
168
+ "invocation_id": job_data.invocation_id,
169
+ "job_id": job_data.job_id,
170
+ "task_name": benchmark,
171
+ "benchmark": benchmark,
172
+ "harness": harness,
173
+ "executor": job_data.executor,
174
+ }
175
+ )
176
+ # Truncate tags
177
+ safe_tags = {str(k)[:250]: str(v)[:5000] for k, v in tags.items() if v}
178
+
179
+ # skip run if it already exists
180
+ exists, existing_run_id = self._get_existing_run_info(
181
+ job_data, mlflow_config
182
+ )
183
+ if exists and mlflow_config.get("skip_existing"):
184
+ return ExportResult(
185
+ success=True,
186
+ dest="mlflow",
187
+ message=f"Run already exists: {existing_run_id}, skipped",
188
+ )
189
+
190
+ # run
191
+ with mlflow.start_run() as run:
192
+ # Set tags
193
+ if safe_tags:
194
+ mlflow.set_tags(safe_tags)
195
+
196
+ # Set run name)
197
+ run_name = (
198
+ mlflow_config.get("run_name")
199
+ or f"eval-{job_data.invocation_id}-{benchmark}"
200
+ )
201
+ mlflow.set_tag("mlflow.runName", run_name)
202
+
203
+ # Set description only if provided
204
+ description = mlflow_config.get("description")
205
+ if description:
206
+ mlflow.set_tag("mlflow.note.content", str(description)[:5000])
207
+
208
+ # Log parameters
209
+ mlflow.log_params(safe_params)
210
+
211
+ # Log metrics
212
+ mlflow.log_metrics(accuracy_metrics)
213
+
214
+ # Log artifacts
215
+ artifacts_logged = self._log_artifacts(job_data, mlflow_config)
216
+
217
+ # Build run URL
218
+ run_url = None
219
+ if tracking_uri.startswith(("http://", "https://")):
220
+ run_url = f"{tracking_uri}/#/experiments/{run.info.experiment_id}/runs/{run.info.run_id}"
221
+
222
+ return ExportResult(
223
+ success=True,
224
+ dest="mlflow",
225
+ message=f"Logged {len(accuracy_metrics)} metrics to MLflow",
226
+ metadata={
227
+ "run_id": run.info.run_id,
228
+ "experiment_id": run.info.experiment_id,
229
+ "tracking_uri": tracking_uri,
230
+ "run_url": run_url,
231
+ "invocation_id": job_data.invocation_id,
232
+ "metrics_logged": len(accuracy_metrics),
233
+ "params_logged": len(safe_params),
234
+ "artifacts_logged": len(artifacts_logged),
235
+ },
236
+ )
237
+
238
+ except Exception as e:
239
+ logger.error(f"MLflow export failed: {e}")
240
+ return ExportResult(
241
+ success=False, dest="mlflow", message=f"Failed: {str(e)}"
242
+ )
243
+
244
+ def _log_artifacts(
245
+ self, job_data: JobData, mlflow_config: Dict[str, Any]
246
+ ) -> List[str]:
247
+ """Log evaluation artifacts to MLflow using LocalExporter for transfer."""
248
+
249
+ # Check if artifacts should be logged (default: True)
250
+ if not mlflow_config.get("log_artifacts", True):
251
+ return []
252
+
253
+ try:
254
+ # Use LocalExporter to get files locally first
255
+ temp_dir = tempfile.mkdtemp(prefix="mlflow_artifacts_")
256
+ local_exporter = LocalExporter({"output_dir": temp_dir})
257
+ local_result = local_exporter.export_job(job_data)
258
+
259
+ if not local_result.success:
260
+ logger.error(f"Failed to download artifacts: {local_result.message}")
261
+ return []
262
+
263
+ artifacts_dir = Path(local_result.dest) / "artifacts"
264
+ logged_names = []
265
+
266
+ task_name = get_task_name(job_data)
267
+ artifact_path = task_name
268
+
269
+ # Log config at root level
270
+ with tempfile.TemporaryDirectory() as tmpdir:
271
+ cfg_file = Path(tmpdir) / "config.yaml"
272
+ with cfg_file.open("w") as f:
273
+ yaml.dump(
274
+ job_data.config or {},
275
+ f,
276
+ default_flow_style=False,
277
+ sort_keys=False,
278
+ )
279
+ mlflow.log_artifact(str(cfg_file))
280
+
281
+ # Then log results files
282
+ for fname in get_available_artifacts(artifacts_dir):
283
+ file_path = artifacts_dir / fname
284
+ if file_path.exists():
285
+ mlflow.log_artifact(str(file_path), artifact_path=artifact_path)
286
+ logged_names.append(fname)
287
+
288
+ # cleanup temp
289
+ shutil.rmtree(temp_dir)
290
+ return logged_names
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error logging artifacts: {e}")
294
+ return []
295
+
296
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
297
+ """Export all jobs in invocation as one MLflow run."""
298
+ if not self.is_available():
299
+ return {"success": False, "error": "mlflow package not installed"}
300
+
301
+ jobs = self.db.get_jobs(invocation_id)
302
+ if not jobs:
303
+ return {
304
+ "success": False,
305
+ "error": f"No jobs found for invocation {invocation_id}",
306
+ }
307
+
308
+ try:
309
+ # Get first job for config access
310
+ first_job = list(jobs.values())[0]
311
+
312
+ # Extract config using common utility
313
+ mlflow_config = extract_exporter_config(first_job, "mlflow", self.config)
314
+
315
+ # Collect metrics from ALL jobs
316
+ all_metrics = {}
317
+ for job_id, job_data in jobs.items():
318
+ log_metrics = mlflow_config.get("log_metrics", [])
319
+ job_metrics = extract_accuracy_metrics(
320
+ job_data, self.get_job_paths, log_metrics
321
+ )
322
+ all_metrics.update(job_metrics)
323
+
324
+ if not all_metrics:
325
+ return {
326
+ "success": False,
327
+ "error": "No accuracy metrics found in any job",
328
+ }
329
+
330
+ # Set up MLflow
331
+ tracking_uri = mlflow_config.get("tracking_uri")
332
+ if not tracking_uri:
333
+ return {"success": False, "error": "tracking_uri is required"}
334
+
335
+ tracking_uri = tracking_uri.rstrip("/")
336
+ mlflow.set_tracking_uri(tracking_uri)
337
+
338
+ experiment_name = mlflow_config.get(
339
+ "experiment_name", "nemo-evaluator-launcher"
340
+ )
341
+ mlflow.set_experiment(experiment_name)
342
+
343
+ # Prepare parameters for invocation
344
+ all_params = {
345
+ "invocation_id": invocation_id,
346
+ "executor": first_job.executor,
347
+ "timestamp": str(first_job.timestamp),
348
+ "jobs_count": str(len(jobs)),
349
+ }
350
+
351
+ # Add webhook info if available
352
+ if mlflow_config.get("triggered_by_webhook"):
353
+ all_params.update(
354
+ {
355
+ "webhook_triggered": "true",
356
+ "webhook_source": mlflow_config.get("webhook_source"),
357
+ "source_artifact": mlflow_config.get("source_artifact"),
358
+ "config_source": mlflow_config.get("config_source"),
359
+ }
360
+ )
361
+
362
+ if mlflow_config.get("extra_metadata"):
363
+ all_params.update(mlflow_config["extra_metadata"])
364
+
365
+ # Prepare tags
366
+ tags = {"invocation_id": invocation_id}
367
+ if mlflow_config.get("tags"):
368
+ tags.update({k: v for k, v in mlflow_config["tags"].items() if v})
369
+
370
+ # Truncate
371
+ safe_params = {
372
+ str(k)[:250]: str(v)[:250] for k, v in all_params.items() if v
373
+ }
374
+ safe_tags = {str(k)[:250]: str(v)[:5000] for k, v in tags.items() if v}
375
+
376
+ # Check for existing run
377
+ exists, existing_run_id = self._get_existing_run_info(
378
+ first_job, mlflow_config
379
+ )
380
+ if exists and mlflow_config.get("skip_existing"):
381
+ return {
382
+ "success": True,
383
+ "invocation_id": invocation_id,
384
+ "jobs": {
385
+ job_id: {
386
+ "success": True,
387
+ "message": f"Run already exists: {existing_run_id}, skipped",
388
+ }
389
+ for job_id in jobs.keys()
390
+ },
391
+ "metadata": {"run_id": existing_run_id, "skipped": True},
392
+ }
393
+
394
+ # Create MLflow run with ALL metrics
395
+ with mlflow.start_run() as run:
396
+ # Set tags
397
+ if safe_tags:
398
+ mlflow.set_tags(safe_tags)
399
+
400
+ # Set run name
401
+ run_name = mlflow_config.get("run_name") or f"eval-{invocation_id}"
402
+ mlflow.set_tag("mlflow.runName", run_name)
403
+
404
+ # Set description
405
+ description = mlflow_config.get("description")
406
+ if description:
407
+ mlflow.set_tag("mlflow.note.content", str(description)[:5000])
408
+
409
+ # Log parameters
410
+ mlflow.log_params(safe_params)
411
+
412
+ # Log ALL metrics
413
+ mlflow.log_metrics(all_metrics)
414
+
415
+ # Log artifacts from all jobs
416
+ total_artifacts = 0
417
+ for job_data in jobs.values():
418
+ artifacts_logged = self._log_artifacts(job_data, mlflow_config)
419
+ total_artifacts += len(artifacts_logged)
420
+
421
+ # Build run URL
422
+ run_url = None
423
+ if tracking_uri.startswith(("http://", "https://")):
424
+ run_url = f"{tracking_uri}/#/experiments/{run.info.experiment_id}/runs/{run.info.run_id}"
425
+
426
+ return {
427
+ "success": True,
428
+ "invocation_id": invocation_id,
429
+ "jobs": {
430
+ job_id: {
431
+ "success": True,
432
+ "message": "Contributed to invocation run",
433
+ }
434
+ for job_id in jobs.keys()
435
+ },
436
+ "metadata": {
437
+ "run_id": run.info.run_id,
438
+ "experiment_id": run.info.experiment_id,
439
+ "tracking_uri": tracking_uri,
440
+ "run_url": run_url,
441
+ "metrics_logged": len(all_metrics),
442
+ "params_logged": len(safe_params),
443
+ "artifacts_logged": total_artifacts,
444
+ },
445
+ }
446
+ except Exception as e:
447
+ logger.error(f"MLflow export failed for invocation {invocation_id}: {e}")
448
+ return {"success": False, "error": f"MLflow export failed: {str(e)}"}
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from typing import Callable, Dict
17
+
18
+ from nemo_evaluator_launcher.exporters.base import BaseExporter
19
+
20
+ _EXPORTER_REGISTRY: Dict[str, BaseExporter] = {}
21
+
22
+
23
+ def register_exporter(name: str) -> Callable:
24
+ def wrapper(cls):
25
+ _EXPORTER_REGISTRY[name] = cls
26
+ return cls
27
+
28
+ return wrapper
29
+
30
+
31
+ def get_exporter(name: str) -> BaseExporter:
32
+ if name not in _EXPORTER_REGISTRY:
33
+ raise ValueError(
34
+ f"Unknown exporter: {name}. Available: {list(_EXPORTER_REGISTRY.keys())}"
35
+ )
36
+ return _EXPORTER_REGISTRY[name]
37
+
38
+
39
+ def available_exporters() -> list[str]:
40
+ return list(_EXPORTER_REGISTRY.keys())