nemo-evaluator-launcher 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

@@ -16,13 +16,10 @@
16
16
  """Evaluation results exporter for MLflow tracking."""
17
17
 
18
18
  import os
19
- import shutil
20
19
  import tempfile
21
20
  from pathlib import Path
22
21
  from typing import Any, Dict, List
23
22
 
24
- import yaml
25
-
26
23
  try:
27
24
  import mlflow
28
25
 
@@ -42,6 +39,7 @@ from nemo_evaluator_launcher.exporters.utils import (
42
39
  get_available_artifacts,
43
40
  get_benchmark_info,
44
41
  get_task_name,
42
+ mlflow_sanitize,
45
43
  )
46
44
 
47
45
 
@@ -117,11 +115,42 @@ class MLflowExporter(BaseExporter):
117
115
  message="tracking_uri is required (set export.mlflow.tracking_uri or MLFLOW_TRACKING_URI)",
118
116
  )
119
117
 
120
- # Extract metrics
118
+ # Stage artifacts locally if remote_ssh (e.g., Slurm), so we can extract metrics
119
+ staged_base_dir = None
120
+ try:
121
+ paths = self.get_job_paths(job_data)
122
+ if paths.get("storage_type") == "remote_ssh":
123
+ tmp_stage = Path(tempfile.mkdtemp(prefix="mlflow_stage_"))
124
+ LocalExporter(
125
+ {
126
+ "output_dir": str(tmp_stage),
127
+ "copy_logs": mlflow_config.get(
128
+ "log_logs", False
129
+ ), # log_logs -> copy_logs
130
+ "only_required": mlflow_config.get("only_required", True),
131
+ }
132
+ ).export_job(job_data)
133
+ staged_base_dir = (
134
+ tmp_stage / job_data.invocation_id / job_data.job_id
135
+ )
136
+ except Exception as e:
137
+ logger.warning(f"Failed staging artifacts for {job_data.job_id}: {e}")
138
+
139
+ # Extract metrics (prefer staged if available)
121
140
  log_metrics = mlflow_config.get("log_metrics", [])
122
- accuracy_metrics = extract_accuracy_metrics(
123
- job_data, self.get_job_paths, log_metrics
124
- )
141
+ if staged_base_dir and (staged_base_dir / "artifacts").exists():
142
+ accuracy_metrics = extract_accuracy_metrics(
143
+ job_data,
144
+ lambda _: {
145
+ "artifacts_dir": staged_base_dir / "artifacts",
146
+ "storage_type": "local_filesystem",
147
+ },
148
+ log_metrics,
149
+ )
150
+ else:
151
+ accuracy_metrics = extract_accuracy_metrics(
152
+ job_data, self.get_job_paths, log_metrics
153
+ )
125
154
 
126
155
  if not accuracy_metrics:
127
156
  return ExportResult(
@@ -160,10 +189,13 @@ class MLflowExporter(BaseExporter):
160
189
  }
161
190
  )
162
191
 
163
- # Truncate params
192
+ # Sanitize params
164
193
  safe_params = {
165
- str(k)[:250]: str(v)[:250] for k, v in all_params.items() if v
194
+ mlflow_sanitize(k, "param_key"): mlflow_sanitize(v, "param_value")
195
+ for k, v in (all_params or {}).items()
196
+ if v is not None
166
197
  }
198
+
167
199
  # Prepare tags
168
200
  tags = {}
169
201
  if mlflow_config.get("tags"):
@@ -173,7 +205,10 @@ class MLflowExporter(BaseExporter):
173
205
  benchmark = bench_info.get("benchmark", get_task_name(job_data))
174
206
  harness = bench_info.get("harness", "unknown")
175
207
 
176
- # Tag the run with invocation_id and task metadata (task_name is benchmark-only)
208
+ # Tag the run with invocation_id and task metadata
209
+ exec_type = (job_data.config or {}).get("execution", {}).get(
210
+ "type"
211
+ ) or job_data.executor
177
212
  tags.update(
178
213
  {
179
214
  "invocation_id": job_data.invocation_id,
@@ -181,11 +216,16 @@ class MLflowExporter(BaseExporter):
181
216
  "task_name": benchmark,
182
217
  "benchmark": benchmark,
183
218
  "harness": harness,
184
- "executor": job_data.executor,
219
+ "executor": exec_type,
185
220
  }
186
221
  )
187
- # Truncate tags
188
- safe_tags = {str(k)[:250]: str(v)[:5000] for k, v in tags.items() if v}
222
+
223
+ # Sanitize tags
224
+ safe_tags = {
225
+ mlflow_sanitize(k, "tag_key"): mlflow_sanitize(v, "tag_value")
226
+ for k, v in (tags or {}).items()
227
+ if v is not None
228
+ }
189
229
 
190
230
  # skip run if it already exists
191
231
  exists, existing_run_id = self._get_existing_run_info(
@@ -204,26 +244,34 @@ class MLflowExporter(BaseExporter):
204
244
  if safe_tags:
205
245
  mlflow.set_tags(safe_tags)
206
246
 
207
- # Set run name)
247
+ # Set run name
208
248
  run_name = (
209
249
  mlflow_config.get("run_name")
210
250
  or f"eval-{job_data.invocation_id}-{benchmark}"
211
251
  )
212
- mlflow.set_tag("mlflow.runName", run_name)
252
+ mlflow.set_tag("mlflow.runName", mlflow_sanitize(run_name, "tag_value"))
213
253
 
214
254
  # Set description only if provided
215
255
  description = mlflow_config.get("description")
216
256
  if description:
217
- mlflow.set_tag("mlflow.note.content", str(description)[:5000])
257
+ mlflow.set_tag(
258
+ "mlflow.note.content", mlflow_sanitize(description, "tag_value")
259
+ )
218
260
 
219
261
  # Log parameters
220
262
  mlflow.log_params(safe_params)
221
263
 
222
- # Log metrics
223
- mlflow.log_metrics(accuracy_metrics)
264
+ # Sanitize metric keys before logging
265
+ safe_metrics = {
266
+ mlflow_sanitize(k, "metric"): v
267
+ for k, v in (accuracy_metrics or {}).items()
268
+ }
269
+ mlflow.log_metrics(safe_metrics)
224
270
 
225
271
  # Log artifacts
226
- artifacts_logged = self._log_artifacts(job_data, mlflow_config)
272
+ artifacts_logged = self._log_artifacts(
273
+ job_data, mlflow_config, staged_base_dir
274
+ )
227
275
 
228
276
  # Build run URL
229
277
  run_url = None
@@ -253,7 +301,10 @@ class MLflowExporter(BaseExporter):
253
301
  )
254
302
 
255
303
  def _log_artifacts(
256
- self, job_data: JobData, mlflow_config: Dict[str, Any]
304
+ self,
305
+ job_data: JobData,
306
+ mlflow_config: Dict[str, Any],
307
+ pre_staged_dir: Path = None,
257
308
  ) -> List[str]:
258
309
  """Log evaluation artifacts to MLflow using LocalExporter for transfer."""
259
310
 
@@ -262,34 +313,39 @@ class MLflowExporter(BaseExporter):
262
313
  return []
263
314
 
264
315
  try:
265
- # Use LocalExporter to get files locally first
266
- temp_dir = tempfile.mkdtemp(prefix="mlflow_artifacts_")
267
- local_exporter = LocalExporter(
268
- {
269
- "output_dir": temp_dir,
270
- "copy_logs": mlflow_config.get(
271
- "log_logs", mlflow_config.get("copy_logs", False)
272
- ),
273
- "only_required": mlflow_config.get("only_required", True),
274
- "format": mlflow_config.get("format", None),
275
- "log_metrics": mlflow_config.get("log_metrics", []),
276
- "output_filename": mlflow_config.get("output_filename", None),
277
- }
278
- )
279
- local_result = local_exporter.export_job(job_data)
280
-
281
- if not local_result.success:
282
- logger.error(f"Failed to download artifacts: {local_result.message}")
283
- return []
316
+ should_cleanup = False
317
+ # Use pre-staged dir if available; otherwise stage now
318
+ if pre_staged_dir and pre_staged_dir.exists():
319
+ base_dir = pre_staged_dir
320
+ else:
321
+ temp_dir = tempfile.mkdtemp(prefix="mlflow_artifacts_")
322
+ local_exporter = LocalExporter(
323
+ {
324
+ "output_dir": str(temp_dir),
325
+ "copy_logs": mlflow_config.get(
326
+ "log_logs", mlflow_config.get("copy_logs", False)
327
+ ),
328
+ "only_required": mlflow_config.get("only_required", True),
329
+ "format": mlflow_config.get("format", None),
330
+ "log_metrics": mlflow_config.get("log_metrics", []),
331
+ "output_filename": mlflow_config.get("output_filename", None),
332
+ }
333
+ )
334
+ local_result = local_exporter.export_job(job_data)
335
+ if not local_result.success:
336
+ logger.error(
337
+ f"Failed to download artifacts: {local_result.message}"
338
+ )
339
+ return []
340
+ base_dir = Path(local_result.dest)
341
+ should_cleanup = True
284
342
 
285
- base_dir = Path(local_result.dest)
286
343
  artifacts_dir = base_dir / "artifacts"
287
344
  logs_dir = base_dir / "logs"
288
345
  logged_names: list[str] = []
289
-
290
346
  artifact_path = get_artifact_root(job_data) # "<harness>.<benchmark>"
291
347
 
292
- # Log config at root level
348
+ # Log config at root level (or synthesize)
293
349
  cfg_logged = False
294
350
  for fname in ("config.yml", "run_config.yml"):
295
351
  p = artifacts_dir / fname
@@ -299,16 +355,19 @@ class MLflowExporter(BaseExporter):
299
355
  break
300
356
  if not cfg_logged:
301
357
  with tempfile.TemporaryDirectory() as tmpdir:
358
+ from yaml import dump as ydump
359
+
302
360
  cfg_file = Path(tmpdir) / "config.yaml"
303
- with cfg_file.open("w") as f:
304
- yaml.dump(
361
+ cfg_file.write_text(
362
+ ydump(
305
363
  job_data.config or {},
306
- f,
307
364
  default_flow_style=False,
308
365
  sort_keys=False,
309
366
  )
367
+ )
310
368
  mlflow.log_artifact(str(cfg_file))
311
369
 
370
+ # Choose files to upload
312
371
  files_to_upload: list[Path] = []
313
372
  if mlflow_config.get("only_required", True):
314
373
  for fname in get_available_artifacts(artifacts_dir):
@@ -316,10 +375,11 @@ class MLflowExporter(BaseExporter):
316
375
  if p.exists():
317
376
  files_to_upload.append(p)
318
377
  else:
319
- for p in artifacts_dir.iterdir():
378
+ for p in artifacts_dir.iterdir(): # top-level files only
320
379
  if p.is_file():
321
380
  files_to_upload.append(p)
322
381
 
382
+ # Upload artifacts (with DEBUG per-file)
323
383
  for fpath in files_to_upload:
324
384
  rel = fpath.relative_to(artifacts_dir).as_posix()
325
385
  parent = os.path.dirname(rel)
@@ -328,32 +388,28 @@ class MLflowExporter(BaseExporter):
328
388
  artifact_path=f"{artifact_path}/artifacts/{parent}".rstrip("/"),
329
389
  )
330
390
  logged_names.append(rel)
391
+ logger.debug(f"mlflow upload artifact: {rel}")
331
392
 
332
393
  # Optionally upload logs under "<harness.task>/logs"
333
394
  if mlflow_config.get("log_logs", False) and logs_dir.exists():
334
- for p in logs_dir.rglob("*"):
395
+ for p in logs_dir.iterdir():
335
396
  if p.is_file():
397
+ rel = p.name
336
398
  mlflow.log_artifact(
337
- str(p),
338
- artifact_path=f"{artifact_path}/logs",
399
+ str(p), artifact_path=f"{artifact_path}/logs"
339
400
  )
340
- logged_names.append(f"logs/{p.name}")
401
+ logged_names.append(f"logs/{rel}")
402
+ logger.debug(f"mlflow upload log: {rel}")
341
403
 
342
- # Debug summary of what we uploaded
343
404
  logger.info(
344
405
  f"MLflow upload summary: files={len(logged_names)}, only_required={mlflow_config.get('only_required', True)}, log_logs={mlflow_config.get('log_logs', False)}"
345
406
  )
346
- if logger.isEnabledFor(10): # DEBUG
347
- try:
348
- preview = "\n - " + "\n - ".join(sorted(logged_names)[:50])
349
- logger.debug(f"Uploaded files preview (first 50):{preview}")
350
- except Exception:
351
- pass
407
+ if should_cleanup:
408
+ import shutil
352
409
 
353
- # cleanup temp
354
- shutil.rmtree(temp_dir)
355
- return logged_names
410
+ shutil.rmtree(base_dir, ignore_errors=True)
356
411
 
412
+ return logged_names
357
413
  except Exception as e:
358
414
  logger.error(f"Error logging artifacts: {e}")
359
415
  return []
@@ -391,11 +447,42 @@ class MLflowExporter(BaseExporter):
391
447
 
392
448
  # Collect metrics from ALL jobs
393
449
  all_metrics = {}
450
+ staged_map: dict[str, Path] = {}
451
+ for job_id, job_data in jobs.items():
452
+ try:
453
+ paths = self.get_job_paths(job_data)
454
+ if paths.get("storage_type") == "remote_ssh":
455
+ tmp_stage = Path(tempfile.mkdtemp(prefix="mlflow_inv_stage_"))
456
+ LocalExporter(
457
+ {
458
+ "output_dir": str(tmp_stage),
459
+ "copy_logs": mlflow_config.get("log_logs", False),
460
+ "only_required": mlflow_config.get(
461
+ "only_required", True
462
+ ),
463
+ }
464
+ ).export_job(job_data)
465
+ staged_map[job_id] = (
466
+ tmp_stage / job_data.invocation_id / job_data.job_id
467
+ )
468
+ except Exception as e:
469
+ logger.warning(f"Staging failed for {job_id}: {e}")
470
+
394
471
  for job_id, job_data in jobs.items():
395
472
  log_metrics = mlflow_config.get("log_metrics", [])
396
- job_metrics = extract_accuracy_metrics(
397
- job_data, self.get_job_paths, log_metrics
398
- )
473
+ if job_id in staged_map and (staged_map[job_id] / "artifacts").exists():
474
+ job_metrics = extract_accuracy_metrics(
475
+ job_data,
476
+ lambda _: {
477
+ "artifacts_dir": staged_map[job_id] / "artifacts",
478
+ "storage_type": "local_filesystem",
479
+ },
480
+ log_metrics,
481
+ )
482
+ else:
483
+ job_metrics = extract_accuracy_metrics(
484
+ job_data, self.get_job_paths, log_metrics
485
+ )
399
486
  all_metrics.update(job_metrics)
400
487
 
401
488
  if not all_metrics:
@@ -414,9 +501,12 @@ class MLflowExporter(BaseExporter):
414
501
  mlflow.set_experiment(experiment_name)
415
502
 
416
503
  # Prepare parameters for invocation
504
+ inv_exec_type = (first_job.config or {}).get("execution", {}).get(
505
+ "type"
506
+ ) or first_job.executor
417
507
  all_params = {
418
508
  "invocation_id": invocation_id,
419
- "executor": first_job.executor,
509
+ "executor": inv_exec_type,
420
510
  "timestamp": str(first_job.timestamp),
421
511
  "jobs_count": str(len(jobs)),
422
512
  }
@@ -472,23 +562,31 @@ class MLflowExporter(BaseExporter):
472
562
 
473
563
  # Set run name
474
564
  run_name = mlflow_config.get("run_name") or f"eval-{invocation_id}"
475
- mlflow.set_tag("mlflow.runName", run_name)
565
+ mlflow.set_tag("mlflow.runName", mlflow_sanitize(run_name, "tag_value"))
476
566
 
477
567
  # Set description
478
568
  description = mlflow_config.get("description")
479
569
  if description:
480
- mlflow.set_tag("mlflow.note.content", str(description)[:5000])
570
+ mlflow.set_tag(
571
+ "mlflow.note.content", mlflow_sanitize(description, "tag_value")
572
+ )
481
573
 
482
574
  # Log parameters
483
575
  mlflow.log_params(safe_params)
484
576
 
485
- # Log ALL metrics
486
- mlflow.log_metrics(all_metrics)
577
+ # Sanitize metric keys
578
+ safe_all_metrics = {
579
+ mlflow_sanitize(k, "metric"): v
580
+ for k, v in (all_metrics or {}).items()
581
+ }
582
+ mlflow.log_metrics(safe_all_metrics)
487
583
 
488
584
  # Log artifacts from all jobs
489
585
  total_artifacts = 0
490
- for job_data in jobs.values():
491
- artifacts_logged = self._log_artifacts(job_data, mlflow_config)
586
+ for job_id, job_data in jobs.items():
587
+ artifacts_logged = self._log_artifacts(
588
+ job_data, mlflow_config, staged_map.get(job_id)
589
+ )
492
590
  total_artifacts += len(artifacts_logged)
493
591
 
494
592
  # Build run URL
@@ -16,6 +16,7 @@
16
16
  """Shared utilities for metrics and configuration handling."""
17
17
 
18
18
  import json
19
+ import re
19
20
  import subprocess
20
21
  from pathlib import Path
21
22
  from typing import Any, Callable, Dict, List, Tuple
@@ -306,21 +307,28 @@ def ssh_setup_masters(jobs: Dict[str, JobData]) -> Dict[Tuple[str, str], str]:
306
307
  remote_pairs: set[tuple[str, str]] = set()
307
308
  for jd in jobs.values():
308
309
  try:
309
- paths = jd.data.get("paths") or {}
310
- if paths.get("storage_type") == "remote_ssh":
311
- remote_pairs.add((paths["username"], paths["hostname"]))
310
+ # Preferred: explicit 'paths' from job data
311
+ p = (jd.data or {}).get("paths") or {}
312
+ if (
313
+ p.get("storage_type") == "remote_ssh"
314
+ and p.get("username")
315
+ and p.get("hostname")
316
+ ):
317
+ remote_pairs.add((p["username"], p["hostname"]))
318
+ continue
319
+ # Fallback: common slurm fields (works with BaseExporter.get_job_paths)
320
+ d = jd.data or {}
321
+ if jd.executor == "slurm" and d.get("username") and d.get("hostname"):
322
+ remote_pairs.add((d["username"], d["hostname"]))
312
323
  except Exception:
313
324
  pass
314
325
 
315
326
  if not remote_pairs:
316
- return {} # no remote jobs
327
+ return {}
317
328
 
318
- # Ensure connections directory exists (like execDB does)
319
329
  CONNECTIONS_DIR.mkdir(parents=True, exist_ok=True)
320
-
321
330
  control_paths: Dict[Tuple[str, str], str] = {}
322
331
  for username, hostname in remote_pairs:
323
- # Simple socket name
324
332
  socket_path = CONNECTIONS_DIR / f"{username}_{hostname}.sock"
325
333
  try:
326
334
  cmd = [
@@ -371,9 +379,10 @@ def ssh_download_artifacts(
371
379
  config: Dict[str, Any] | None = None,
372
380
  control_paths: Dict[Tuple[str, str], str] | None = None,
373
381
  ) -> List[str]:
374
- """Download artifacts via SSH with optional connection reuse."""
382
+ """Download artifacts/logs via SSH with optional connection reuse."""
375
383
  exported_files: List[str] = []
376
384
  copy_logs = bool((config or {}).get("copy_logs", False))
385
+ copy_artifacts = bool((config or {}).get("copy_artifacts", True))
377
386
  only_required = bool((config or {}).get("only_required", True))
378
387
 
379
388
  control_path = None
@@ -390,44 +399,49 @@ def ssh_download_artifacts(
390
399
  str(local_path),
391
400
  ]
392
401
  )
393
- result = subprocess.run(cmd, capture_output=True)
394
- return result.returncode == 0
402
+ return subprocess.run(cmd, capture_output=True).returncode == 0
395
403
 
396
404
  export_dir.mkdir(parents=True, exist_ok=True)
397
- (export_dir / "artifacts").mkdir(parents=True, exist_ok=True)
398
-
399
- available_local = (
400
- get_available_artifacts(paths.get("artifacts_dir", Path()))
401
- if not only_required
402
- else None
403
- )
404
- artifact_names = (
405
- [a for a in get_relevant_artifacts()]
406
- if only_required
407
- else (available_local or [])
408
- )
409
-
410
- for artifact in artifact_names:
411
- remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
412
- local_file = export_dir / "artifacts" / artifact
413
- if scp_file(remote_file, local_file):
414
- exported_files.append(str(local_file))
415
405
 
406
+ # Artifacts
407
+ if copy_artifacts:
408
+ art_dir = export_dir / "artifacts"
409
+ art_dir.mkdir(parents=True, exist_ok=True)
410
+
411
+ if only_required:
412
+ for artifact in get_relevant_artifacts():
413
+ remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
414
+ local_file = art_dir / artifact
415
+ local_file.parent.mkdir(parents=True, exist_ok=True)
416
+ if scp_file(remote_file, local_file):
417
+ exported_files.append(str(local_file))
418
+ else:
419
+ # Copy known files individually to avoid subfolders and satisfy tests
420
+ for artifact in get_available_artifacts(paths.get("artifacts_dir", Path())):
421
+ remote_file = f"{paths['remote_path']}/artifacts/{artifact}"
422
+ local_file = art_dir / artifact
423
+ if scp_file(remote_file, local_file):
424
+ exported_files.append(str(local_file))
425
+
426
+ # Logs (top-level only)
416
427
  if copy_logs:
417
- remote_logs = f"{paths['remote_path']}/logs"
418
428
  local_logs = export_dir / "logs"
429
+ remote_logs = f"{paths['remote_path']}/logs"
419
430
  cmd = (
420
431
  ["scp", "-r"]
421
432
  + ssh_opts
422
433
  + [
423
- f"{paths['username']}@{paths['hostname']}:{remote_logs}",
434
+ f"{paths['username']}@{paths['hostname']}:{remote_logs}/.",
424
435
  str(local_logs),
425
436
  ]
426
437
  )
427
438
  if subprocess.run(cmd, capture_output=True).returncode == 0:
428
- exported_files.extend(
429
- [str(f) for f in local_logs.rglob("*") if f.is_file()]
430
- )
439
+ for p in local_logs.iterdir():
440
+ if p.is_dir():
441
+ import shutil
442
+
443
+ shutil.rmtree(p, ignore_errors=True)
444
+ exported_files.extend([str(f) for f in local_logs.glob("*") if f.is_file()])
431
445
 
432
446
  return exported_files
433
447
 
@@ -584,3 +598,41 @@ def _safe_update_metrics(
584
598
  """Update target from source safely, raising on collisions with detailed values."""
585
599
  for k, v in source.items():
586
600
  _safe_set_metric(target, k, v, context)
601
+
602
+
603
+ # =============================================================================
604
+ # MLFLOW FUNCTIONS
605
+ # =============================================================================
606
+
607
+ # MLflow constants
608
+ _MLFLOW_KEY_MAX = 250
609
+ _MLFLOW_PARAM_VAL_MAX = 250
610
+ _MLFLOW_TAG_VAL_MAX = 5000
611
+
612
+ _INVALID_KEY_CHARS = re.compile(r"[^/\w.\- ]")
613
+ _MULTI_UNDERSCORE = re.compile(r"_+")
614
+
615
+
616
+ def mlflow_sanitize(s: Any, kind: str = "key") -> str:
617
+ """
618
+ Sanitize strings for MLflow logging.
619
+
620
+ kind:
621
+ - "key", "metric", "tag_key", "param_key": apply key rules
622
+ - "tag_value": apply tag value rules
623
+ - "param_value": apply param value rules
624
+ """
625
+ s = "" if s is None else str(s)
626
+
627
+ if kind in ("key", "metric", "tag_key", "param_key"):
628
+ # common replacements
629
+ s = s.replace("pass@", "pass_at_")
630
+ # drop disallowed chars, collapse underscores, trim
631
+ s = _INVALID_KEY_CHARS.sub("_", s)
632
+ s = _MULTI_UNDERSCORE.sub("_", s).strip()
633
+ return s[:_MLFLOW_KEY_MAX] or "key"
634
+
635
+ # values: normalize whitespace, enforce length
636
+ s = s.replace("\n", " ").replace("\r", " ").strip()
637
+ max_len = _MLFLOW_TAG_VAL_MAX if kind == "tag_value" else _MLFLOW_PARAM_VAL_MAX
638
+ return s[:max_len]
@@ -68,10 +68,41 @@ class WandBExporter(BaseExporter):
68
68
  "log_mode", "per_task"
69
69
  ) # Default per_task for immediate export
70
70
 
71
- # Get metrics
72
- metrics = extract_accuracy_metrics(
73
- job_data, self.get_job_paths, wandb_config.get("log_metrics", [])
74
- )
71
+ # Stage artifacts locally if remote_ssh (e.g., Slurm), so we can extract metrics
72
+ staged_base_dir = None
73
+ try:
74
+ paths = self.get_job_paths(job_data)
75
+ if paths.get("storage_type") == "remote_ssh":
76
+ tmp_stage = Path(tempfile.mkdtemp(prefix="wandb_stage_"))
77
+ LocalExporter(
78
+ {
79
+ "output_dir": str(tmp_stage),
80
+ "copy_logs": wandb_config.get("log_logs", False),
81
+ "only_required": wandb_config.get("only_required", True),
82
+ }
83
+ ).export_job(job_data)
84
+ staged_base_dir = (
85
+ tmp_stage / job_data.invocation_id / job_data.job_id
86
+ )
87
+ except Exception as e:
88
+ logger.warning(f"W&B: staging failed for {job_data.job_id}: {e}")
89
+
90
+ # Metrics (prefer staged if available)
91
+ log_metrics = wandb_config.get("log_metrics", [])
92
+ if staged_base_dir and (staged_base_dir / "artifacts").exists():
93
+ metrics = extract_accuracy_metrics(
94
+ job_data,
95
+ lambda _: {
96
+ "artifacts_dir": staged_base_dir / "artifacts",
97
+ "storage_type": "local_filesystem",
98
+ },
99
+ log_metrics,
100
+ )
101
+ else:
102
+ metrics = extract_accuracy_metrics(
103
+ job_data, self.get_job_paths, log_metrics
104
+ )
105
+
75
106
  if not metrics:
76
107
  return ExportResult(
77
108
  success=False, dest="wandb", message="No metrics found"
@@ -345,10 +376,14 @@ class WandBExporter(BaseExporter):
345
376
  run_args["resume"] = "allow"
346
377
 
347
378
  # Config metadata
379
+ exec_type = (job_data.config or {}).get("execution", {}).get(
380
+ "type"
381
+ ) or job_data.executor
348
382
  run_config = {
349
383
  "invocation_id": job_data.invocation_id,
350
- "executor": job_data.executor,
384
+ "executor": exec_type,
351
385
  }
386
+
352
387
  if log_mode == "per_task":
353
388
  run_config["job_id"] = job_data.job_id
354
389
  run_config["harness"] = harness
@@ -16,7 +16,7 @@
16
16
  # Below is the _next_ version that will be published, not the currently published one.
17
17
  MAJOR = 0
18
18
  MINOR = 1
19
- PATCH = 14
19
+ PATCH = 15
20
20
  PRE_RELEASE = ""
21
21
 
22
22
  # Use the following formatting: (major, minor, patch, pre-release)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com