PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

nemo-evaluator-launcher 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

nemo_evaluator_launcher/exporters/base.py CHANGED Viewed

@@ -70,6 +70,15 @@ class BaseExporter(ABC):
     def get_job_paths(self, job_data: JobData) -> Dict[str, Any]:
         """Get result paths based on executor type from job metadata."""
+        # Special case: remote executor artifacts accessed locally (remote auto-export)
+        if job_data.data.get("storage_type") == "remote_local":
+            output_dir = Path(job_data.data["output_dir"])
+            return {
+                "artifacts_dir": output_dir / "artifacts",
+                "logs_dir": output_dir / "logs",
+                "storage_type": "remote_local",
+            }
         if job_data.executor == "local":
             output_dir = Path(job_data.data["output_dir"])
             return {

nemo_evaluator_launcher/exporters/gsheets.py CHANGED Viewed

@@ -15,6 +15,7 @@
 #
 """Google Sheets evaluation results exporter."""
+import os
 import shutil
 import tempfile
 from pathlib import Path
@@ -89,28 +90,38 @@ class GSheetsExporter(BaseExporter):
             }
         try:
+            # Load exporter config from the first job (supports job-embedded config and CLI overrides)
+            first_job = next(iter(jobs.values()))
+            gsheets_config = extract_exporter_config(first_job, "gsheets", self.config)
             # Connect to Google Sheets
-            service_account_file = self.config.get("service_account_file")
-            spreadsheet_name = self.config.get(
+            service_account_file = gsheets_config.get("service_account_file")
+            spreadsheet_name = gsheets_config.get(
                 "spreadsheet_name", "NeMo Evaluator Launcher Results"
             )
             if service_account_file:
-                gc = gspread.service_account(filename=service_account_file)
+                gc = gspread.service_account(
+                    filename=os.path.expanduser(service_account_file)
+                )
             else:
                 gc = gspread.service_account()
             # Get or create spreadsheet
+            spreadsheet_id = gsheets_config.get("spreadsheet_id")
             try:
-                sh = gc.open(spreadsheet_name)
+                if spreadsheet_id:
+                    sh = gc.open_by_key(spreadsheet_id)
+                else:
+                    sh = gc.open(spreadsheet_name)
                 logger.info(f"Opened existing spreadsheet: {spreadsheet_name}")
             except gspread.SpreadsheetNotFound:
+                if spreadsheet_id:
+                    raise  # Can't create with explicit ID
                 sh = gc.create(spreadsheet_name)
                 logger.info(f"Created new spreadsheet: {spreadsheet_name}")
-                sh.share("", perm_type="anyone", role="reader")
             worksheet = sh.sheet1
             # Extract metrics from ALL jobs first to determine headers
             all_job_metrics = {}
             results = {}
@@ -226,16 +237,23 @@ class GSheetsExporter(BaseExporter):
                 )
                 if service_account_file:
-                    gc = gspread.service_account(filename=service_account_file)
+                    gc = gspread.service_account(
+                        filename=os.path.expanduser(service_account_file)
+                    )
                 else:
                     gc = gspread.service_account()
                 # Get or create spreadsheet
+                spreadsheet_id = gsheets_config.get("spreadsheet_id")
                 try:
-                    sh = gc.open(spreadsheet_name)
+                    if spreadsheet_id:
+                        sh = gc.open_by_key(spreadsheet_id)
+                    else:
+                        sh = gc.open(spreadsheet_name)
                 except gspread.SpreadsheetNotFound:
+                    if spreadsheet_id:
+                        raise  # Can't create with explicit ID
                     sh = gc.create(spreadsheet_name)
-                    sh.share("", perm_type="anyone", role="reader")
                 worksheet = sh.sheet1

nemo_evaluator_launcher/exporters/local.py CHANGED Viewed

@@ -74,6 +74,9 @@ class LocalExporter(BaseExporter):
             # Stage artifacts per storage type
             if paths["storage_type"] == "local_filesystem":
                 exported_files = self._copy_local_artifacts(paths, job_export_dir, cfg)
+            elif paths["storage_type"] == "remote_local":
+                # Same as local_filesystem (we're on the remote machine, accessing locally)
+                exported_files = self._copy_local_artifacts(paths, job_export_dir, cfg)
             elif paths["storage_type"] == "remote_ssh":
                 exported_files = ssh_download_artifacts(
                     paths, job_export_dir, cfg, None
@@ -125,6 +128,8 @@ class LocalExporter(BaseExporter):
                     logger.warning(f"Failed to create {fmt} summary: {e}")
                     msg += " (summary failed)"
+            meta["output_dir"] = str(job_export_dir.resolve())
             return ExportResult(
                 success=True, dest=str(job_export_dir), message=msg, metadata=meta
             )

nemo_evaluator_launcher/exporters/mlflow.py CHANGED Viewed

@@ -15,6 +15,7 @@
 #
 """Evaluation results exporter for MLflow tracking."""
+import os
 import shutil
 import tempfile
 from pathlib import Path
@@ -37,6 +38,7 @@ from nemo_evaluator_launcher.exporters.registry import register_exporter
 from nemo_evaluator_launcher.exporters.utils import (
     extract_accuracy_metrics,
     extract_exporter_config,
+    get_artifact_root,
     get_available_artifacts,
     get_benchmark_info,
     get_task_name,
@@ -100,6 +102,21 @@ class MLflowExporter(BaseExporter):
             # Extract config using common utility
             mlflow_config = extract_exporter_config(job_data, "mlflow", self.config)
+            # resolve tracking_uri with fallbacks
+            tracking_uri = mlflow_config.get("tracking_uri")
+            if not tracking_uri:
+                tracking_uri = os.getenv("MLFLOW_TRACKING_URI")
+            # allow env var name
+            if tracking_uri and "://" not in tracking_uri:
+                tracking_uri = os.getenv(tracking_uri, tracking_uri)
+            if not tracking_uri:
+                return ExportResult(
+                    success=False,
+                    dest="mlflow",
+                    message="tracking_uri is required (set export.mlflow.tracking_uri or MLFLOW_TRACKING_URI)",
+                )
             # Extract metrics
             log_metrics = mlflow_config.get("log_metrics", [])
             accuracy_metrics = extract_accuracy_metrics(
@@ -112,12 +129,6 @@ class MLflowExporter(BaseExporter):
                 )
             # Set up MLflow
-            tracking_uri = mlflow_config.get("tracking_uri")
-            if not tracking_uri:
-                return ExportResult(
-                    success=False, dest="mlflow", message="tracking_uri is required"
-                )
             tracking_uri = tracking_uri.rstrip("/")
             mlflow.set_tracking_uri(tracking_uri)
@@ -253,37 +264,91 @@ class MLflowExporter(BaseExporter):
         try:
             # Use LocalExporter to get files locally first
             temp_dir = tempfile.mkdtemp(prefix="mlflow_artifacts_")
-            local_exporter = LocalExporter({"output_dir": temp_dir})
+            local_exporter = LocalExporter(
+                {
+                    "output_dir": temp_dir,
+                    "copy_logs": mlflow_config.get(
+                        "log_logs", mlflow_config.get("copy_logs", False)
+                    ),
+                    "only_required": mlflow_config.get("only_required", True),
+                    "format": mlflow_config.get("format", None),
+                    "log_metrics": mlflow_config.get("log_metrics", []),
+                    "output_filename": mlflow_config.get("output_filename", None),
+                }
+            )
             local_result = local_exporter.export_job(job_data)
             if not local_result.success:
                 logger.error(f"Failed to download artifacts: {local_result.message}")
                 return []
-            artifacts_dir = Path(local_result.dest) / "artifacts"
-            logged_names = []
+            base_dir = Path(local_result.dest)
+            artifacts_dir = base_dir / "artifacts"
+            logs_dir = base_dir / "logs"
+            logged_names: list[str] = []
-            task_name = get_task_name(job_data)
-            artifact_path = task_name
+            artifact_path = get_artifact_root(job_data)  # "<harness>.<benchmark>"
             # Log config at root level
-            with tempfile.TemporaryDirectory() as tmpdir:
-                cfg_file = Path(tmpdir) / "config.yaml"
-                with cfg_file.open("w") as f:
-                    yaml.dump(
-                        job_data.config or {},
-                        f,
-                        default_flow_style=False,
-                        sort_keys=False,
-                    )
-                mlflow.log_artifact(str(cfg_file))
-            # Then log results files
-            for fname in get_available_artifacts(artifacts_dir):
-                file_path = artifacts_dir / fname
-                if file_path.exists():
-                    mlflow.log_artifact(str(file_path), artifact_path=artifact_path)
-                    logged_names.append(fname)
+            cfg_logged = False
+            for fname in ("config.yml", "run_config.yml"):
+                p = artifacts_dir / fname
+                if p.exists():
+                    mlflow.log_artifact(str(p))
+                    cfg_logged = True
+                    break
+            if not cfg_logged:
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    cfg_file = Path(tmpdir) / "config.yaml"
+                    with cfg_file.open("w") as f:
+                        yaml.dump(
+                            job_data.config or {},
+                            f,
+                            default_flow_style=False,
+                            sort_keys=False,
+                        )
+                    mlflow.log_artifact(str(cfg_file))
+            files_to_upload: list[Path] = []
+            if mlflow_config.get("only_required", True):
+                for fname in get_available_artifacts(artifacts_dir):
+                    p = artifacts_dir / fname
+                    if p.exists():
+                        files_to_upload.append(p)
+            else:
+                for p in artifacts_dir.iterdir():
+                    if p.is_file():
+                        files_to_upload.append(p)
+            for fpath in files_to_upload:
+                rel = fpath.relative_to(artifacts_dir).as_posix()
+                parent = os.path.dirname(rel)
+                mlflow.log_artifact(
+                    str(fpath),
+                    artifact_path=f"{artifact_path}/artifacts/{parent}".rstrip("/"),
+                )
+                logged_names.append(rel)
+            # Optionally upload logs under "<harness.task>/logs"
+            if mlflow_config.get("log_logs", False) and logs_dir.exists():
+                for p in logs_dir.rglob("*"):
+                    if p.is_file():
+                        mlflow.log_artifact(
+                            str(p),
+                            artifact_path=f"{artifact_path}/logs",
+                        )
+                        logged_names.append(f"logs/{p.name}")
+            # Debug summary of what we uploaded
+            logger.info(
+                f"MLflow upload summary: files={len(logged_names)}, only_required={mlflow_config.get('only_required', True)}, log_logs={mlflow_config.get('log_logs', False)}"
+            )
+            if logger.isEnabledFor(10):  # DEBUG
+                try:
+                    preview = "\n  - " + "\n  - ".join(sorted(logged_names)[:50])
+                    logger.debug(f"Uploaded files preview (first 50):{preview}")
+                except Exception:
+                    pass
             # cleanup temp
             shutil.rmtree(temp_dir)
@@ -312,6 +377,18 @@ class MLflowExporter(BaseExporter):
             # Extract config using common utility
             mlflow_config = extract_exporter_config(first_job, "mlflow", self.config)
+            # resolve tracking_uri with fallbacks
+            tracking_uri = mlflow_config.get("tracking_uri") or os.getenv(
+                "MLFLOW_TRACKING_URI"
+            )
+            if tracking_uri and "://" not in tracking_uri:
+                tracking_uri = os.getenv(tracking_uri, tracking_uri)
+            if not tracking_uri:
+                return {
+                    "success": False,
+                    "error": "tracking_uri is required (set export.mlflow.tracking_uri or MLFLOW_TRACKING_URI)",
+                }
             # Collect metrics from ALL jobs
             all_metrics = {}
             for job_id, job_data in jobs.items():
@@ -328,10 +405,6 @@ class MLflowExporter(BaseExporter):
                 }
             # Set up MLflow
-            tracking_uri = mlflow_config.get("tracking_uri")
-            if not tracking_uri:
-                return {"success": False, "error": "tracking_uri is required"}
             tracking_uri = tracking_uri.rstrip("/")
             mlflow.set_tracking_uri(tracking_uri)

nemo_evaluator_launcher/exporters/utils.py CHANGED Viewed

@@ -148,15 +148,12 @@ def extract_exporter_config(
     """Extract and merge exporter configuration from multiple sources."""
     config = {}
-    # Get config from dedicated field
+    # root-level `export.<exporter-name>`
     if job_data.config:
-        execution_config = job_data.config.get("execution", {})
-        auto_export_config = execution_config.get("auto_export", {})
-        exporter_configs = auto_export_config.get("configs", {})
-        yaml_config = exporter_configs.get(exporter_name, {})
-        # No conversion needed
-        config.update(yaml_config)
+        export_block = (job_data.config or {}).get("export", {})
+        yaml_config = (export_block or {}).get(exporter_name, {})
+        if yaml_config:
+            config.update(yaml_config)
     # From webhook metadata (if triggered by webhook)
     if "webhook_metadata" in job_data.data:
@@ -167,8 +164,6 @@ def extract_exporter_config(
             "source_artifact": f"{webhook_data.get('artifact_name', 'unknown')}:{webhook_data.get('artifact_version', 'unknown')}",
             "config_source": webhook_data.get("config_file", "unknown"),
         }
-        # For W&B specifically, extract run info if available
         if exporter_name == "wandb" and webhook_data.get("webhook_source") == "wandb":
             wandb_specific = {
                 "entity": webhook_data.get("entity"),
@@ -176,10 +171,9 @@ def extract_exporter_config(
                 "run_id": webhook_data.get("run_id"),
             }
             webhook_config.update({k: v for k, v in wandb_specific.items() if v})
         config.update(webhook_config)
-    # Constructor config: allows CLI overrides
+    # allows CLI overrides
     if constructor_config:
         config.update(constructor_config)
@@ -269,6 +263,14 @@ def get_container_from_mapping(job_data: JobData) -> str:
         return None
+def get_artifact_root(job_data: JobData) -> str:
+    """Get artifact root from job data."""
+    bench = get_benchmark_info(job_data)
+    h = bench.get("harness", "unknown")
+    b = bench.get("benchmark", get_task_name(job_data))
+    return f"{h}.{b}"
 # =============================================================================
 # GITLAB DOWNLOAD
 # =============================================================================
@@ -288,91 +290,6 @@ def download_gitlab_artifacts(
         Dictionary mapping artifact names to local file paths
     """
     raise NotImplementedError("Downloading from gitlab is not implemented")
-    # TODO: rework this logic
-    # pipeline_id = paths["pipeline_id"]
-    # project_id = paths["project_id"]
-    # gitlab_token = os.getenv("GITLAB_TOKEN")
-    #
-    # if not gitlab_token:
-    #     raise RuntimeError(
-    #         "GITLAB_TOKEN environment variable required for GitLab remote downloads"
-    #     )
-    #
-    # # GitLab API endpoint for artifacts
-    # base_url = "TODO: replace"
-    # artifacts_url = "TODO: replace"
-    #
-    # headers = {"Private-Token": gitlab_token}
-    # downloaded_artifacts = {}
-    #
-    # try:
-    #     # Get pipeline jobs
-    #     response = requests.get(artifacts_url, headers=headers, timeout=30)
-    #     response.raise_for_status()
-    #     jobs = response.json()
-    #
-    #     for job in jobs:
-    #         if job.get("artifacts_file"):
-    #             job_id = job["id"]
-    #             job_name = job.get("name", f"job_{job_id}")
-    #             artifacts_download_url = (
-    #                 f"{base_url}/api/v4/projects/{project_id}/jobs/{job_id}/artifacts"
-    #             )
-    #
-    #             logger.info(f"Downloading artifacts from job: {job_name}")
-    #
-    #             # Download job artifacts
-    #             response = requests.get(
-    #                 artifacts_download_url, headers=headers, timeout=300
-    #             )
-    #             response.raise_for_status()
-    #
-    #             if extract_specific:
-    #                 # Extract specific files from ZIP
-    #                 with tempfile.NamedTemporaryFile(
-    #                     suffix=".zip", delete=False
-    #                 ) as temp_zip:
-    #                     temp_zip.write(response.content)
-    #                     temp_zip_path = temp_zip.name
-    #
-    #                 try:
-    #                     with zipfile.ZipFile(temp_zip_path, "r") as zip_ref:
-    #                         # Create artifacts directory
-    #                         artifacts_dir = export_dir / "artifacts"
-    #                         artifacts_dir.mkdir(parents=True, exist_ok=True)
-    #
-    #                         # Extract to be logged artifacts
-    #                         for member in zip_ref.namelist():
-    #                             filename = Path(member).name
-    #                             if filename in get_relevant_artifacts():
-    #                                 # Extract the file
-    #                                 source = zip_ref.open(member)
-    #                                 target_path = artifacts_dir / filename
-    #                                 with open(target_path, "wb") as f:
-    #                                     f.write(source.read())
-    #                                 source.close()
-    #
-    #                                 downloaded_artifacts[filename] = target_path
-    #                                 logger.info(f"Extracted: {filename}")
-    #                 finally:
-    #                     os.unlink(temp_zip_path)
-    #             else:
-    #                 # Save as ZIP files (original behavior)
-    #                 artifacts_zip = export_dir / f"job_{job_id}_artifacts.zip"
-    #                 with open(artifacts_zip, "wb") as f:
-    #                     f.write(response.content)
-    #
-    #                 downloaded_artifacts[f"job_{job_id}_artifacts.zip"] = artifacts_zip
-    #                 logger.info(f"Downloaded: {artifacts_zip.name}")
-    #
-    # except requests.RequestException as e:
-    #     logger.error(f"GitLab API request failed: {e}")
-    #     raise RuntimeError(f"GitLab API request failed: {e}")
-    # except Exception as e:
-    #     logger.error(f"GitLab remote download failed: {e}")
-    #     raise RuntimeError(f"GitLab remote download failed: {e}")
-    #
-    # return downloaded_artifacts
 # =============================================================================
@@ -522,16 +439,16 @@ def ssh_download_artifacts(
 def _get_artifacts_dir(paths: Dict[str, Any]) -> Path:
     """Get artifacts directory from paths."""
-    if paths["storage_type"] == "local_filesystem":
-        return paths["artifacts_dir"]
-    elif paths["storage_type"] == "gitlab_ci_local":
-        return paths["artifacts_dir"]
-    elif paths["storage_type"] == "remote_ssh":
-        return None
-    else:
-        logger.error(f"Unsupported storage type: {paths['storage_type']}")
+    storage_type = paths.get("storage_type")
+    # For SSH-based remote access, artifacts aren't available locally yet
+    if storage_type == "remote_ssh":
         return None
+    # For all local access (local_filesystem, remote_local, gitlab_ci_local)
+    # return the artifacts_dir from paths
+    return paths.get("artifacts_dir")
 def _extract_metrics_from_results(results: dict) -> Dict[str, float]:
     """Extract metrics from a 'results' dict (with optional 'groups'/'tasks')."""

nemo-evaluator-launcher 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

nemo-evaluator-launcher 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl