nemo-evaluator-launcher 0.1.0rc6__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/__init__.py +15 -1
- nemo_evaluator_launcher/api/functional.py +188 -27
- nemo_evaluator_launcher/api/types.py +9 -0
- nemo_evaluator_launcher/cli/export.py +131 -12
- nemo_evaluator_launcher/cli/info.py +477 -82
- nemo_evaluator_launcher/cli/kill.py +5 -3
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/ls_runs.py +31 -10
- nemo_evaluator_launcher/cli/ls_tasks.py +105 -3
- nemo_evaluator_launcher/cli/main.py +101 -5
- nemo_evaluator_launcher/cli/run.py +153 -30
- nemo_evaluator_launcher/cli/status.py +49 -5
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/execdb.py +121 -27
- nemo_evaluator_launcher/common/helpers.py +213 -33
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/printing_utils.py +100 -0
- nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +23 -0
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +2 -2
- nemo_evaluator_launcher/configs/execution/local.yaml +2 -0
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +19 -4
- nemo_evaluator_launcher/executors/base.py +54 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +60 -5
- nemo_evaluator_launcher/executors/lepton/executor.py +240 -101
- nemo_evaluator_launcher/executors/lepton/job_helpers.py +15 -11
- nemo_evaluator_launcher/executors/local/executor.py +492 -56
- nemo_evaluator_launcher/executors/local/run.template.sh +76 -9
- nemo_evaluator_launcher/executors/slurm/executor.py +571 -98
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/base.py +9 -0
- nemo_evaluator_launcher/exporters/gsheets.py +27 -9
- nemo_evaluator_launcher/exporters/local.py +30 -16
- nemo_evaluator_launcher/exporters/mlflow.py +245 -74
- nemo_evaluator_launcher/exporters/utils.py +139 -184
- nemo_evaluator_launcher/exporters/wandb.py +157 -43
- nemo_evaluator_launcher/package_info.py +6 -3
- nemo_evaluator_launcher/resources/mapping.toml +56 -15
- nemo_evaluator_launcher-0.1.41.dist-info/METADATA +494 -0
- nemo_evaluator_launcher-0.1.41.dist-info/RECORD +62 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
- nemo_evaluator_launcher-0.1.0rc6.dist-info/METADATA +0 -35
- nemo_evaluator_launcher-0.1.0rc6.dist-info/RECORD +0 -57
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"""Shared utilities for metrics and configuration handling."""
|
|
17
17
|
|
|
18
18
|
import json
|
|
19
|
+
import re
|
|
19
20
|
import subprocess
|
|
20
21
|
from pathlib import Path
|
|
21
22
|
from typing import Any, Callable, Dict, List, Tuple
|
|
@@ -148,15 +149,12 @@ def extract_exporter_config(
|
|
|
148
149
|
"""Extract and merge exporter configuration from multiple sources."""
|
|
149
150
|
config = {}
|
|
150
151
|
|
|
151
|
-
#
|
|
152
|
+
# root-level `export.<exporter-name>`
|
|
152
153
|
if job_data.config:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# No conversion needed
|
|
159
|
-
config.update(yaml_config)
|
|
154
|
+
export_block = (job_data.config or {}).get("export", {})
|
|
155
|
+
yaml_config = (export_block or {}).get(exporter_name, {})
|
|
156
|
+
if yaml_config:
|
|
157
|
+
config.update(yaml_config)
|
|
160
158
|
|
|
161
159
|
# From webhook metadata (if triggered by webhook)
|
|
162
160
|
if "webhook_metadata" in job_data.data:
|
|
@@ -167,8 +165,6 @@ def extract_exporter_config(
|
|
|
167
165
|
"source_artifact": f"{webhook_data.get('artifact_name', 'unknown')}:{webhook_data.get('artifact_version', 'unknown')}",
|
|
168
166
|
"config_source": webhook_data.get("config_file", "unknown"),
|
|
169
167
|
}
|
|
170
|
-
|
|
171
|
-
# For W&B specifically, extract run info if available
|
|
172
168
|
if exporter_name == "wandb" and webhook_data.get("webhook_source") == "wandb":
|
|
173
169
|
wandb_specific = {
|
|
174
170
|
"entity": webhook_data.get("entity"),
|
|
@@ -176,10 +172,9 @@ def extract_exporter_config(
|
|
|
176
172
|
"run_id": webhook_data.get("run_id"),
|
|
177
173
|
}
|
|
178
174
|
webhook_config.update({k: v for k, v in wandb_specific.items() if v})
|
|
179
|
-
|
|
180
175
|
config.update(webhook_config)
|
|
181
176
|
|
|
182
|
-
#
|
|
177
|
+
# allows CLI overrides
|
|
183
178
|
if constructor_config:
|
|
184
179
|
config.update(constructor_config)
|
|
185
180
|
|
|
@@ -269,6 +264,14 @@ def get_container_from_mapping(job_data: JobData) -> str:
|
|
|
269
264
|
return None
|
|
270
265
|
|
|
271
266
|
|
|
267
|
+
def get_artifact_root(job_data: JobData) -> str:
|
|
268
|
+
"""Get artifact root from job data."""
|
|
269
|
+
bench = get_benchmark_info(job_data)
|
|
270
|
+
h = bench.get("harness", "unknown")
|
|
271
|
+
b = bench.get("benchmark", get_task_name(job_data))
|
|
272
|
+
return f"{h}.{b}"
|
|
273
|
+
|
|
274
|
+
|
|
272
275
|
# =============================================================================
|
|
273
276
|
# GITLAB DOWNLOAD
|
|
274
277
|
# =============================================================================
|
|
@@ -288,91 +291,6 @@ def download_gitlab_artifacts(
|
|
|
288
291
|
Dictionary mapping artifact names to local file paths
|
|
289
292
|
"""
|
|
290
293
|
raise NotImplementedError("Downloading from gitlab is not implemented")
|
|
291
|
-
# TODO: rework this logic
|
|
292
|
-
# pipeline_id = paths["pipeline_id"]
|
|
293
|
-
# project_id = paths["project_id"]
|
|
294
|
-
# gitlab_token = os.getenv("GITLAB_TOKEN")
|
|
295
|
-
#
|
|
296
|
-
# if not gitlab_token:
|
|
297
|
-
# raise RuntimeError(
|
|
298
|
-
# "GITLAB_TOKEN environment variable required for GitLab remote downloads"
|
|
299
|
-
# )
|
|
300
|
-
#
|
|
301
|
-
# # GitLab API endpoint for artifacts
|
|
302
|
-
# base_url = "TODO: replace"
|
|
303
|
-
# artifacts_url = "TODO: replace"
|
|
304
|
-
#
|
|
305
|
-
# headers = {"Private-Token": gitlab_token}
|
|
306
|
-
# downloaded_artifacts = {}
|
|
307
|
-
#
|
|
308
|
-
# try:
|
|
309
|
-
# # Get pipeline jobs
|
|
310
|
-
# response = requests.get(artifacts_url, headers=headers, timeout=30)
|
|
311
|
-
# response.raise_for_status()
|
|
312
|
-
# jobs = response.json()
|
|
313
|
-
#
|
|
314
|
-
# for job in jobs:
|
|
315
|
-
# if job.get("artifacts_file"):
|
|
316
|
-
# job_id = job["id"]
|
|
317
|
-
# job_name = job.get("name", f"job_{job_id}")
|
|
318
|
-
# artifacts_download_url = (
|
|
319
|
-
# f"{base_url}/api/v4/projects/{project_id}/jobs/{job_id}/artifacts"
|
|
320
|
-
# )
|
|
321
|
-
#
|
|
322
|
-
# logger.info(f"Downloading artifacts from job: {job_name}")
|
|
323
|
-
#
|
|
324
|
-
# # Download job artifacts
|
|
325
|
-
# response = requests.get(
|
|
326
|
-
# artifacts_download_url, headers=headers, timeout=300
|
|
327
|
-
# )
|
|
328
|
-
# response.raise_for_status()
|
|
329
|
-
#
|
|
330
|
-
# if extract_specific:
|
|
331
|
-
# # Extract specific files from ZIP
|
|
332
|
-
# with tempfile.NamedTemporaryFile(
|
|
333
|
-
# suffix=".zip", delete=False
|
|
334
|
-
# ) as temp_zip:
|
|
335
|
-
# temp_zip.write(response.content)
|
|
336
|
-
# temp_zip_path = temp_zip.name
|
|
337
|
-
#
|
|
338
|
-
# try:
|
|
339
|
-
# with zipfile.ZipFile(temp_zip_path, "r") as zip_ref:
|
|
340
|
-
# # Create artifacts directory
|
|
341
|
-
# artifacts_dir = export_dir / "artifacts"
|
|
342
|
-
# artifacts_dir.mkdir(parents=True, exist_ok=True)
|
|
343
|
-
#
|
|
344
|
-
# # Extract to be logged artifacts
|
|
345
|
-
# for member in zip_ref.namelist():
|
|
346
|
-
# filename = Path(member).name
|
|
347
|
-
# if filename in get_relevant_artifacts():
|
|
348
|
-
# # Extract the file
|
|
349
|
-
# source = zip_ref.open(member)
|
|
350
|
-
# target_path = artifacts_dir / filename
|
|
351
|
-
# with open(target_path, "wb") as f:
|
|
352
|
-
# f.write(source.read())
|
|
353
|
-
# source.close()
|
|
354
|
-
#
|
|
355
|
-
# downloaded_artifacts[filename] = target_path
|
|
356
|
-
# logger.info(f"Extracted: {filename}")
|
|
357
|
-
# finally:
|
|
358
|
-
# os.unlink(temp_zip_path)
|
|
359
|
-
# else:
|
|
360
|
-
# # Save as ZIP files (original behavior)
|
|
361
|
-
# artifacts_zip = export_dir / f"job_{job_id}_artifacts.zip"
|
|
362
|
-
# with open(artifacts_zip, "wb") as f:
|
|
363
|
-
# f.write(response.content)
|
|
364
|
-
#
|
|
365
|
-
# downloaded_artifacts[f"job_{job_id}_artifacts.zip"] = artifacts_zip
|
|
366
|
-
# logger.info(f"Downloaded: {artifacts_zip.name}")
|
|
367
|
-
#
|
|
368
|
-
# except requests.RequestException as e:
|
|
369
|
-
# logger.error(f"GitLab API request failed: {e}")
|
|
370
|
-
# raise RuntimeError(f"GitLab API request failed: {e}")
|
|
371
|
-
# except Exception as e:
|
|
372
|
-
# logger.error(f"GitLab remote download failed: {e}")
|
|
373
|
-
# raise RuntimeError(f"GitLab remote download failed: {e}")
|
|
374
|
-
#
|
|
375
|
-
# return downloaded_artifacts
|
|
376
294
|
|
|
377
295
|
|
|
378
296
|
# =============================================================================
|
|
@@ -389,21 +307,28 @@ def ssh_setup_masters(jobs: Dict[str, JobData]) -> Dict[Tuple[str, str], str]:
|
|
|
389
307
|
remote_pairs: set[tuple[str, str]] = set()
|
|
390
308
|
for jd in jobs.values():
|
|
391
309
|
try:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
310
|
+
# Preferred: explicit 'paths' from job data
|
|
311
|
+
p = (jd.data or {}).get("paths") or {}
|
|
312
|
+
if (
|
|
313
|
+
p.get("storage_type") == "remote_ssh"
|
|
314
|
+
and p.get("username")
|
|
315
|
+
and p.get("hostname")
|
|
316
|
+
):
|
|
317
|
+
remote_pairs.add((p["username"], p["hostname"]))
|
|
318
|
+
continue
|
|
319
|
+
# Fallback: common slurm fields (works with BaseExporter.get_job_paths)
|
|
320
|
+
d = jd.data or {}
|
|
321
|
+
if jd.executor == "slurm" and d.get("username") and d.get("hostname"):
|
|
322
|
+
remote_pairs.add((d["username"], d["hostname"]))
|
|
395
323
|
except Exception:
|
|
396
324
|
pass
|
|
397
325
|
|
|
398
326
|
if not remote_pairs:
|
|
399
|
-
return {}
|
|
327
|
+
return {}
|
|
400
328
|
|
|
401
|
-
# Ensure connections directory exists (like execDB does)
|
|
402
329
|
CONNECTIONS_DIR.mkdir(parents=True, exist_ok=True)
|
|
403
|
-
|
|
404
330
|
control_paths: Dict[Tuple[str, str], str] = {}
|
|
405
331
|
for username, hostname in remote_pairs:
|
|
406
|
-
# Simple socket name
|
|
407
332
|
socket_path = CONNECTIONS_DIR / f"{username}_{hostname}.sock"
|
|
408
333
|
try:
|
|
409
334
|
cmd = [
|
|
@@ -454,9 +379,10 @@ def ssh_download_artifacts(
|
|
|
454
379
|
config: Dict[str, Any] | None = None,
|
|
455
380
|
control_paths: Dict[Tuple[str, str], str] | None = None,
|
|
456
381
|
) -> List[str]:
|
|
457
|
-
"""Download artifacts via SSH with optional connection reuse."""
|
|
382
|
+
"""Download artifacts/logs via SSH with optional connection reuse."""
|
|
458
383
|
exported_files: List[str] = []
|
|
459
384
|
copy_logs = bool((config or {}).get("copy_logs", False))
|
|
385
|
+
copy_artifacts = bool((config or {}).get("copy_artifacts", True))
|
|
460
386
|
only_required = bool((config or {}).get("only_required", True))
|
|
461
387
|
|
|
462
388
|
control_path = None
|
|
@@ -473,44 +399,49 @@ def ssh_download_artifacts(
|
|
|
473
399
|
str(local_path),
|
|
474
400
|
]
|
|
475
401
|
)
|
|
476
|
-
|
|
477
|
-
return result.returncode == 0
|
|
402
|
+
return subprocess.run(cmd, capture_output=True).returncode == 0
|
|
478
403
|
|
|
479
404
|
export_dir.mkdir(parents=True, exist_ok=True)
|
|
480
|
-
(export_dir / "artifacts").mkdir(parents=True, exist_ok=True)
|
|
481
|
-
|
|
482
|
-
available_local = (
|
|
483
|
-
get_available_artifacts(paths.get("artifacts_dir", Path()))
|
|
484
|
-
if not only_required
|
|
485
|
-
else None
|
|
486
|
-
)
|
|
487
|
-
artifact_names = (
|
|
488
|
-
[a for a in get_relevant_artifacts()]
|
|
489
|
-
if only_required
|
|
490
|
-
else (available_local or [])
|
|
491
|
-
)
|
|
492
|
-
|
|
493
|
-
for artifact in artifact_names:
|
|
494
|
-
remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
|
|
495
|
-
local_file = export_dir / "artifacts" / artifact
|
|
496
|
-
if scp_file(remote_file, local_file):
|
|
497
|
-
exported_files.append(str(local_file))
|
|
498
405
|
|
|
406
|
+
# Artifacts
|
|
407
|
+
if copy_artifacts:
|
|
408
|
+
art_dir = export_dir / "artifacts"
|
|
409
|
+
art_dir.mkdir(parents=True, exist_ok=True)
|
|
410
|
+
|
|
411
|
+
if only_required:
|
|
412
|
+
for artifact in get_relevant_artifacts():
|
|
413
|
+
remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
|
|
414
|
+
local_file = art_dir / artifact
|
|
415
|
+
local_file.parent.mkdir(parents=True, exist_ok=True)
|
|
416
|
+
if scp_file(remote_file, local_file):
|
|
417
|
+
exported_files.append(str(local_file))
|
|
418
|
+
else:
|
|
419
|
+
# Copy known files individually to avoid subfolders and satisfy tests
|
|
420
|
+
for artifact in get_available_artifacts(paths.get("artifacts_dir", Path())):
|
|
421
|
+
remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
|
|
422
|
+
local_file = art_dir / artifact
|
|
423
|
+
if scp_file(remote_file, local_file):
|
|
424
|
+
exported_files.append(str(local_file))
|
|
425
|
+
|
|
426
|
+
# Logs (top-level only)
|
|
499
427
|
if copy_logs:
|
|
500
|
-
remote_logs = f"{paths['remote_path']}/logs"
|
|
501
428
|
local_logs = export_dir / "logs"
|
|
429
|
+
remote_logs = f"{paths['remote_path']}/logs"
|
|
502
430
|
cmd = (
|
|
503
431
|
["scp", "-r"]
|
|
504
432
|
+ ssh_opts
|
|
505
433
|
+ [
|
|
506
|
-
f"{paths['username']}@{paths['hostname']}:{remote_logs}",
|
|
434
|
+
f"{paths['username']}@{paths['hostname']}:{remote_logs}/.",
|
|
507
435
|
str(local_logs),
|
|
508
436
|
]
|
|
509
437
|
)
|
|
510
438
|
if subprocess.run(cmd, capture_output=True).returncode == 0:
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
439
|
+
for p in local_logs.iterdir():
|
|
440
|
+
if p.is_dir():
|
|
441
|
+
import shutil
|
|
442
|
+
|
|
443
|
+
shutil.rmtree(p, ignore_errors=True)
|
|
444
|
+
exported_files.extend([str(f) for f in local_logs.glob("*") if f.is_file()])
|
|
514
445
|
|
|
515
446
|
return exported_files
|
|
516
447
|
|
|
@@ -522,16 +453,16 @@ def ssh_download_artifacts(
|
|
|
522
453
|
|
|
523
454
|
def _get_artifacts_dir(paths: Dict[str, Any]) -> Path:
|
|
524
455
|
"""Get artifacts directory from paths."""
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
elif paths["storage_type"] == "remote_ssh":
|
|
530
|
-
return None
|
|
531
|
-
else:
|
|
532
|
-
logger.error(f"Unsupported storage type: {paths['storage_type']}")
|
|
456
|
+
storage_type = paths.get("storage_type")
|
|
457
|
+
|
|
458
|
+
# For SSH-based remote access, artifacts aren't available locally yet
|
|
459
|
+
if storage_type == "remote_ssh":
|
|
533
460
|
return None
|
|
534
461
|
|
|
462
|
+
# For all local access (local_filesystem, remote_local, gitlab_ci_local)
|
|
463
|
+
# return the artifacts_dir from paths
|
|
464
|
+
return paths.get("artifacts_dir")
|
|
465
|
+
|
|
535
466
|
|
|
536
467
|
def _extract_metrics_from_results(results: dict) -> Dict[str, float]:
|
|
537
468
|
"""Extract metrics from a 'results' dict (with optional 'groups'/'tasks')."""
|
|
@@ -540,15 +471,12 @@ def _extract_metrics_from_results(results: dict) -> Dict[str, float]:
|
|
|
540
471
|
section_data = results.get(section)
|
|
541
472
|
if isinstance(section_data, dict):
|
|
542
473
|
for task_name, task_data in section_data.items():
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
source=task_metrics,
|
|
550
|
-
context=f" while extracting results for task '{task_name}'",
|
|
551
|
-
)
|
|
474
|
+
task_metrics = _extract_task_metrics(task_name, task_data)
|
|
475
|
+
_safe_update_metrics(
|
|
476
|
+
target=metrics,
|
|
477
|
+
source=task_metrics,
|
|
478
|
+
context=f" while extracting results for task '{task_name}'",
|
|
479
|
+
)
|
|
552
480
|
return metrics
|
|
553
481
|
|
|
554
482
|
|
|
@@ -587,54 +515,43 @@ def _extract_from_json_files(artifacts_dir: Path) -> Dict[str, float]:
|
|
|
587
515
|
return metrics
|
|
588
516
|
|
|
589
517
|
|
|
590
|
-
def _extract_task_metrics(task_name: str,
|
|
518
|
+
def _extract_task_metrics(task_name: str, task_data: dict) -> Dict[str, float]:
|
|
591
519
|
"""Extract metrics from a task's metrics data."""
|
|
592
520
|
extracted = {}
|
|
593
|
-
score_patterns = [
|
|
594
|
-
"acc",
|
|
595
|
-
"accuracy",
|
|
596
|
-
"score",
|
|
597
|
-
"exact_match",
|
|
598
|
-
"f1",
|
|
599
|
-
"em",
|
|
600
|
-
"pass@1",
|
|
601
|
-
"pass@k",
|
|
602
|
-
]
|
|
603
521
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
522
|
+
metrics_data = task_data.get("metrics", {})
|
|
523
|
+
if "groups" in task_data:
|
|
524
|
+
for group_name, group_data in task_data["groups"].items():
|
|
525
|
+
group_extracted = _extract_task_metrics(
|
|
526
|
+
f"{task_name}_{group_name}", group_data
|
|
527
|
+
)
|
|
528
|
+
_safe_update_metrics(
|
|
529
|
+
target=extracted,
|
|
530
|
+
source=group_extracted,
|
|
531
|
+
context=f" in task '{task_name}'",
|
|
532
|
+
)
|
|
608
533
|
|
|
534
|
+
for metric_name, metric_data in metrics_data.items():
|
|
609
535
|
try:
|
|
610
|
-
|
|
611
|
-
if
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
if isinstance(score_data, dict) and "value" in score_data:
|
|
615
|
-
key = f"{task_name}_{metric_name}_{score_type}"
|
|
616
|
-
_safe_set_metric(
|
|
617
|
-
container=extracted,
|
|
618
|
-
key=key,
|
|
619
|
-
new_value=score_data["value"],
|
|
620
|
-
context=f" in task '{task_name}'",
|
|
621
|
-
)
|
|
622
|
-
elif "value" in metric_data:
|
|
536
|
+
for score_type, score_data in metric_data["scores"].items():
|
|
537
|
+
if score_type != metric_name:
|
|
538
|
+
key = f"{task_name}_{metric_name}_{score_type}"
|
|
539
|
+
else:
|
|
623
540
|
key = f"{task_name}_{metric_name}"
|
|
624
|
-
_safe_set_metric(
|
|
625
|
-
container=extracted,
|
|
626
|
-
key=key,
|
|
627
|
-
new_value=metric_data["value"],
|
|
628
|
-
context=f" in task '{task_name}'",
|
|
629
|
-
)
|
|
630
|
-
elif isinstance(metric_data, (int, float)):
|
|
631
|
-
key = f"{task_name}_{metric_name}"
|
|
632
541
|
_safe_set_metric(
|
|
633
542
|
container=extracted,
|
|
634
543
|
key=key,
|
|
635
|
-
new_value=
|
|
544
|
+
new_value=score_data["value"],
|
|
636
545
|
context=f" in task '{task_name}'",
|
|
637
546
|
)
|
|
547
|
+
for stat_name, stat_value in metric_data.get("stats", {}).items():
|
|
548
|
+
stats_key = f"{key}_{stat_name}"
|
|
549
|
+
_safe_set_metric(
|
|
550
|
+
container=extracted,
|
|
551
|
+
key=stats_key,
|
|
552
|
+
new_value=stat_value,
|
|
553
|
+
context=f" in task '{task_name}'",
|
|
554
|
+
)
|
|
638
555
|
except (ValueError, TypeError) as e:
|
|
639
556
|
logger.warning(
|
|
640
557
|
f"Failed to extract metric {metric_name} for task {task_name}: {e}"
|
|
@@ -667,3 +584,41 @@ def _safe_update_metrics(
|
|
|
667
584
|
"""Update target from source safely, raising on collisions with detailed values."""
|
|
668
585
|
for k, v in source.items():
|
|
669
586
|
_safe_set_metric(target, k, v, context)
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
# =============================================================================
|
|
590
|
+
# MLFLOW FUNCTIONS
|
|
591
|
+
# =============================================================================
|
|
592
|
+
|
|
593
|
+
# MLflow constants
|
|
594
|
+
_MLFLOW_KEY_MAX = 250
|
|
595
|
+
_MLFLOW_PARAM_VAL_MAX = 250
|
|
596
|
+
_MLFLOW_TAG_VAL_MAX = 5000
|
|
597
|
+
|
|
598
|
+
_INVALID_KEY_CHARS = re.compile(r"[^/\w.\- ]")
|
|
599
|
+
_MULTI_UNDERSCORE = re.compile(r"_+")
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def mlflow_sanitize(s: Any, kind: str = "key") -> str:
|
|
603
|
+
"""
|
|
604
|
+
Sanitize strings for MLflow logging.
|
|
605
|
+
|
|
606
|
+
kind:
|
|
607
|
+
- "key", "metric", "tag_key", "param_key": apply key rules
|
|
608
|
+
- "tag_value": apply tag value rules
|
|
609
|
+
- "param_value": apply param value rules
|
|
610
|
+
"""
|
|
611
|
+
s = "" if s is None else str(s)
|
|
612
|
+
|
|
613
|
+
if kind in ("key", "metric", "tag_key", "param_key"):
|
|
614
|
+
# common replacements
|
|
615
|
+
s = s.replace("pass@", "pass_at_")
|
|
616
|
+
# drop disallowed chars, collapse underscores, trim
|
|
617
|
+
s = _INVALID_KEY_CHARS.sub("_", s)
|
|
618
|
+
s = _MULTI_UNDERSCORE.sub("_", s).strip()
|
|
619
|
+
return s[:_MLFLOW_KEY_MAX] or "key"
|
|
620
|
+
|
|
621
|
+
# values: normalize whitespace, enforce length
|
|
622
|
+
s = s.replace("\n", " ").replace("\r", " ").strip()
|
|
623
|
+
max_len = _MLFLOW_TAG_VAL_MAX if kind == "tag_value" else _MLFLOW_PARAM_VAL_MAX
|
|
624
|
+
return s[:max_len]
|