PyPI - nemo-evaluator-launcher - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

nemo-evaluator-launcher 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

nemo_evaluator_launcher/api/functional.py CHANGED Viewed

@@ -456,6 +456,7 @@ def export_results(
                                 yaml.safe_load(ypath_export.read_text(encoding="utf-8"))
                                 or {}
                             )
+                            # execution.auto_export contains auto-export destinations
                             exec_cfg = cfg_yaml.get("execution") or {}
                             auto_exp = (exp_yaml.get("execution") or {}).get(
                                 "auto_export"
@@ -463,15 +464,39 @@ def export_results(
                             if auto_exp is not None:
                                 exec_cfg["auto_export"] = auto_exp
                                 cfg_yaml["execution"] = exec_cfg
+                            # top-level export block contains exporter config
+                            if "export" in exp_yaml:
+                                cfg_yaml["export"] = exp_yaml["export"]
+                            # Merge evaluation.tasks from export_config (Slurm writes it there)
+                            if "evaluation" in exp_yaml and exp_yaml["evaluation"]:
+                                eval_cfg = cfg_yaml.get("evaluation") or {}
+                                eval_cfg.update(exp_yaml["evaluation"])
+                                cfg_yaml["evaluation"] = eval_cfg
                         # metadata
+                        executor_name = (cfg_yaml.get("execution") or {}).get(
+                            "type", "local"
+                        )
                         md_job_data = JobData(
                             invocation_id=single_id.split(".")[0],
                             job_id=single_id,
                             timestamp=0.0,
-                            executor="local",  #
-                            data={"output_dir": str(Path.cwd().parent)},
+                            executor=executor_name,
+                            data={
+                                "output_dir": str(Path.cwd().parent),
+                                "storage_type": "remote_local",
+                            },
                             config=cfg_yaml,
                         )
+                        # DEBUG: print what we loaded
+                        print(f"DEBUG: cfg_yaml keys: {list(cfg_yaml.keys())}")
+                        if "evaluation" in cfg_yaml:
+                            print(
+                                f"DEBUG: evaluation.tasks: {cfg_yaml.get('evaluation', {}).get('tasks')}"
+                            )
                     except Exception:
                         md_job_data = None
                 # fallback to execDB only
@@ -492,6 +517,7 @@ def export_results(
                             "success": job_result.success,
                             "message": job_result.message,
                             "metadata": job_result.metadata or {},
+                            "dest": getattr(job_result, "dest", None),
                         }
                     },
                     "metadata": job_result.metadata or {},

nemo_evaluator_launcher/cli/export.py CHANGED Viewed

@@ -27,8 +27,8 @@ class ExportCmd:
     # Short usage examples will show up in -h as the class docstring:
     # Examples:
-    #   nemo-evaluator-launcher export 8abcd123 --dest local --format json -o .
-    #   nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv -o results/ -fname processed_results.csv
+    #   nemo-evaluator-launcher export 8abcd123 --dest local --format json --out .
+    #   nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv --out results/ -fname processed_results.csv
     #   nemo-evaluator-launcher export 8abcd123 --dest jet
     invocation_ids: List[str] = field(
@@ -41,9 +41,17 @@ class ExportCmd:
         choices=["local", "wandb", "mlflow", "gsheets", "jet"],
         help="Export destination.",
     )
+    # overrides for exporter config; use -o similar to run command
+    override: List[str] = field(
+        default_factory=list,
+        action="append",
+        nargs="?",
+        alias=["-o", "--override"],
+        help="Hydra-style overrides for exporter config. Use `export.<dest>.key=value` (e.g., -o export.wandb.entity=org-name).",
+    )
     output_dir: Optional[str] = field(
         default=".",
-        alias=["--output-dir", "-o"],
+        alias=["--output-dir", "-out"],
         help="Output directory (default: current directory).",
     )
     output_filename: Optional[str] = field(
@@ -67,8 +75,8 @@ class ExportCmd:
         alias=["--log-metrics"],
         help="Filter metrics by name (repeatable). Examples: score, f1, mmlu_score_micro.",
     )
-    only_required: bool = field(
-        default=True,
+    only_required: Optional[bool] = field(
+        default=None,
         alias=["--only-required"],
         help="Copy only required+optional artifacts (default: True). Set to False to copy all available artifacts.",
     )
@@ -76,11 +84,20 @@ class ExportCmd:
     def execute(self) -> None:
         """Execute export."""
         # Import heavy dependencies only when needed
+        from omegaconf import OmegaConf
         from nemo_evaluator_launcher.api.functional import export_results
+        # Validation: ensure IDs are provided
+        if not self.invocation_ids:
+            print("Error: No IDs provided. Specify one or more invocation or job IDs.")
+            print(
+                "Usage: nemo-evaluator-launcher export <id> [<id>...] --dest <destination>"
+            )
+            return
         config: dict[str, Any] = {
             "copy_logs": self.copy_logs,
-            "only_required": self.only_required,
         }
         # Output handling
@@ -95,20 +112,90 @@ class ExportCmd:
         if self.log_metrics:
             config["log_metrics"] = self.log_metrics
+        # Add only_required if explicitly passed via CLI
+        if self.only_required is not None:
+            config["only_required"] = self.only_required
+        # Parse and validate overrides
+        if self.override:
+            # Flatten possible list-of-lists from parser
+            flat_overrides: list[str] = []
+            for item in self.override:
+                if isinstance(item, list):
+                    flat_overrides.extend(str(x) for x in item)
+                else:
+                    flat_overrides.append(str(item))
+            try:
+                self._validate_overrides(flat_overrides, self.dest)
+            except ValueError as e:
+                print(f"Error: {e}")
+                return
+            # Expand env vars in override vals ($VAR / ${VAR})
+            import os
+            from omegaconf import OmegaConf
+            expanded_overrides: list[str] = []
+            for ov in flat_overrides:
+                if "=" in ov:
+                    k, v = ov.split("=", 1)
+                    expanded_overrides.append(f"{k}={os.path.expandvars(v)}")
+                else:
+                    expanded_overrides.append(os.path.expandvars(ov))
+            dot_cfg = OmegaConf.from_dotlist(expanded_overrides)
+            as_dict = OmegaConf.to_container(dot_cfg, resolve=True) or {}
+            if isinstance(as_dict, dict) and "export" in as_dict:
+                export_map = as_dict.get("export") or {}
+                if isinstance(export_map, dict) and self.dest in export_map:
+                    config.update(export_map[self.dest] or {})
+                else:
+                    config.update(as_dict)
+            else:
+                config.update(as_dict)
         if self.format and self.dest != "local":
             print(
                 "Note: --format is only used by --dest local. It will be ignored for other destinations."
             )
-        # Execute
+        if "only_required" in config and self.only_required is True:
+            config.pop("only_required", None)
         print(
             f"Exporting {len(self.invocation_ids)} {'invocations' if len(self.invocation_ids) > 1 else 'invocation'} to {self.dest}..."
         )
         result = export_results(self.invocation_ids, self.dest, config)
-        if not result["success"]:
-            print(f"Export failed: {result.get('error', 'Unknown error')}")
+        if not result.get("success", False):
+            err = result.get("error", "Unknown error")
+            print(f"\nExport failed: {err}")
+            # Provide actionable guidance for common configuration issues
+            if self.dest == "mlflow":
+                if "tracking_uri" in str(err).lower():
+                    print("\nMLflow requires 'tracking_uri' to be configured.")
+                    print(
+                        "Set it via: -o export.mlflow.tracking_uri=http://mlflow-server:5000"
+                    )
+                elif "not installed" in str(err).lower():
+                    print("\nMLflow package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[mlflow]")
+            elif self.dest == "wandb":
+                if "entity" in str(err).lower() or "project" in str(err).lower():
+                    print("\nW&B requires 'entity' and 'project' to be configured.")
+                    print(
+                        "Set via: -o export.wandb.entity=my-org -o export.wandb.project=my-proj"
+                    )
+                elif "not installed" in str(err).lower():
+                    print("\nW&B package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[wandb]")
+            elif self.dest == "gsheets":
+                if "not installed" in str(err).lower():
+                    print("\nGoogle Sheets package not installed.")
+                    print("Install via: pip install nemo-evaluator-launcher[gsheets]")
             return
         # Success path
@@ -125,6 +212,9 @@ class ExportCmd:
                         print(f"    URL: {metadata['run_url']}")
                     if metadata.get("summary_path"):
                         print(f"    Summary: {metadata['summary_path']}")
+                    path_hint = job_result.get("dest") or metadata.get("output_dir")
+                    if self.dest == "local" and path_hint:
+                        print(f"    Path: {path_hint}")
                 else:
                     print(f"  {job_id} failed: {job_result.get('message', '')}")
         else:
@@ -137,7 +227,6 @@ class ExportCmd:
             # Show summary path if available
             if metadata.get("summary_path"):
                 print(f"Summary: {metadata['summary_path']}")
             # Show per-invocation status
             for invocation_id, inv_result in result["invocations"].items():
                 if inv_result.get("success"):
@@ -147,3 +236,32 @@ class ExportCmd:
                     print(
                         f"  {invocation_id}: failed, {inv_result.get('error', 'Unknown error')}"
                     )
+    def _validate_overrides(self, overrides: List[str], dest: str) -> None:
+        """Validate override list for destination consistency.
+        Raises:
+            ValueError: If overrides specify wrong destination or have other issues.
+        """
+        if not overrides:
+            return  # nothing to validate
+        # Check each override for destination mismatch
+        for override_str in overrides:
+            if override_str.startswith(
+                "export."
+            ):  # check if override starts with export.
+                # Extract destination from override path
+                try:
+                    key_part = override_str.split("=")[0]  # Get left side before =
+                    parts = key_part.split(".")
+                    if len(parts) >= 2:
+                        override_dest = parts[1]
+                        if override_dest != dest:
+                            raise ValueError(
+                                f"Override destination mismatch: override specifies 'export.{override_dest}' but --dest is '{dest}'. "
+                                f"Either change --dest to '{override_dest}' or use 'export.{dest}' in overrides."
+                            )
+                except (IndexError, AttributeError):
+                    # miconstructed override -> OmegaConf handles this
+                    pass

nemo_evaluator_launcher/cli/run.py CHANGED Viewed

@@ -98,7 +98,17 @@ class Cmd:
                 config_dir=self.config_dir,
             )
-        invocation_id = run_eval(config, self.dry_run)
+        try:
+            invocation_id = run_eval(config, self.dry_run)
+        except Exception as e:
+            print(f"\033[31m✗ Job submission failed | Error: {e}\033[0m")
+            raise
+        # Print general success message with invocation ID
+        if invocation_id is not None and not self.dry_run:
+            print(
+                f"\033[32m✓ Job submission successful | Invocation ID: {invocation_id}\033[0m"
+            )
         # Save the complete configuration
         if not self.dry_run and invocation_id is not None:
@@ -146,6 +156,15 @@ class Cmd:
         if invocation_id is not None:
             print(f"to check status: nemo-evaluator-launcher status {invocation_id}")
             print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
-            print(
-                f"to kill individual jobs: nemo-evaluator-launcher kill <job_id> (e.g., {invocation_id}.0)"
+            # Show actual job IDs and task names
+            print("to kill individual jobs:")
+            # Access tasks - will work after normalization in run_eval
+            tasks = (
+                config.evaluation.tasks
+                if hasattr(config.evaluation, "tasks")
+                else config.evaluation
             )
+            for idx, task in enumerate(tasks):
+                job_id = f"{invocation_id}.{idx}"
+                print(f"  nemo-evaluator-launcher kill {job_id}  # {task.name}")

nemo_evaluator_launcher/cli/status.py CHANGED Viewed

@@ -102,6 +102,8 @@ class Cmd:
             status = job.get("status", "")
             formatted_status = self._format_status_with_indicators(status)
+            # Extract task name
             rows.append(
                 [
                     job.get("job_id", ""),
@@ -144,7 +146,7 @@ class Cmd:
             ExecutionState.SUCCESS.value: "\033[32m✓ SUCCESS\033[0m",  # Green Unicode checkmark
             ExecutionState.FAILED.value: "\033[31m✗ FAILED\033[0m",  # Red Unicode X
             ExecutionState.RUNNING.value: "\033[33m▶ RUNNING\033[0m",  # Yellow Unicode play button
-            ExecutionState.PENDING.value: "\033[36m⏳ PENDING\033[0m",  # Cyan Unicode hourglass
+            ExecutionState.PENDING.value: "\033[36m⧗ PENDING\033[0m",  # Cyan Unicode hourglass (U+29D7)
             ExecutionState.KILLED.value: "\033[35m✗ KILLED\033[0m",  # Magenta Unicode X
             # Additional states for error handling
             "not_found": "\033[90m? NOT FOUND\033[0m",  # Gray question mark

nemo_evaluator_launcher/executors/lepton/deployment_helpers.py CHANGED Viewed

@@ -428,14 +428,34 @@ def create_lepton_endpoint(cfg: DictConfig, endpoint_name: str) -> bool:
             print(f"✅ Successfully created Lepton endpoint: {endpoint_name}")
             return True
         else:
-            print(f"❌ Failed to create Lepton endpoint: {result.stderr}")
+            error_msg = result.stderr.strip() if result.stderr else ""
+            output_msg = result.stdout.strip() if result.stdout else ""
+            print(
+                f"✗ Failed to create Lepton endpoint | Endpoint: {endpoint_name} | Return code: {result.returncode}"
+            )
+            if error_msg:
+                print(f"   stderr: {error_msg}")
+            if output_msg:
+                print(f"   stdout: {output_msg}")
             return False
-    except subprocess.TimeoutExpired:
-        print(f"❌ Timeout creating Lepton endpoint: {endpoint_name}")
+    except subprocess.TimeoutExpired as e:
+        print(
+            f"✗ Timeout creating Lepton endpoint | Endpoint: {endpoint_name} | Timeout: 300s"
+        )
+        if hasattr(e, "stderr") and e.stderr:
+            print(f"   stderr: {e.stderr}")
+        if hasattr(e, "stdout") and e.stdout:
+            print(f"   stdout: {e.stdout}")
         return False
     except subprocess.CalledProcessError as e:
-        print(f"❌ Error creating Lepton endpoint: {e}")
+        print(
+            f"✗ Error creating Lepton endpoint | Endpoint: {endpoint_name} | Error: {e}"
+        )
+        if hasattr(e, "stderr") and e.stderr:
+            print(f"   stderr: {e.stderr}")
+        if hasattr(e, "stdout") and e.stdout:
+            print(f"   stdout: {e.stdout}")
         return False
     finally:
         # Clean up temporary file

nemo_evaluator_launcher/executors/lepton/executor.py CHANGED Viewed

@@ -482,7 +482,8 @@ class LeptonExecutor(BaseExecutor):
                 if not job_success:
                     raise RuntimeError(
-                        f"Failed to submit Lepton job for task: {task.name}. Error: {error_msg}"
+                        f"Failed to submit Lepton job | Task: {task.name} | Job ID: {job_id} | "
+                        f"Lepton job name: {lepton_job_name} | Error: {error_msg}"
                     )
                 # Store job metadata in database (with task-specific endpoint info)
@@ -504,8 +505,6 @@ class LeptonExecutor(BaseExecutor):
                     )
                 )
-                print(f"✅ Task {task.name}: Submitted evaluation job {job_id}")
             # Jobs submitted successfully - return immediately (non-blocking)
             print(
                 f"\n✅ Successfully submitted {len(lepton_job_names)} evaluation jobs to Lepton"
@@ -536,9 +535,8 @@ class LeptonExecutor(BaseExecutor):
             return invocation_id
-        except Exception as e:
+        except Exception:
             # Clean up any created endpoints on failure
-            print(f"❌ Error during evaluation: {e}")
             if cfg.deployment.type != "none" and "endpoint_names" in locals():
                 for endpoint_name in endpoint_names:
                     if endpoint_name:

nemo_evaluator_launcher/executors/local/executor.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 import pathlib
 import platform
 import shlex
+import shutil
 import subprocess
 import time
 from typing import List, Optional
@@ -76,6 +77,13 @@ class LocalExecutor(BaseExecutor):
                 f"type {cfg.deployment.type} is not implemented -- add deployment support"
             )
+        # Check if docker is available (skip in dry_run mode)
+        if not dry_run and shutil.which("docker") is None:
+            raise RuntimeError(
+                "Docker is not installed or not in PATH. "
+                "Please install Docker to run local evaluations."
+            )
         # Generate invocation ID for this evaluation run
         invocation_id = generate_invocation_id()
@@ -233,35 +241,48 @@ class LocalExecutor(BaseExecutor):
         # To ensure subprocess continues after python exits:
         # - on Unix-like systems, to fully detach the subprocess
         #   so it does not die when Python exits, pass start_new_session=True;
-        # - on Widnows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
+        # - on Windows use creationflags=subprocess.CREATE_NEW_PROCESS_GROUP flag.
         os_name = platform.system()
+        processes = []
         if is_execution_mode_sequential:
             if os_name == "Windows":
-                subprocess.Popen(
+                proc = subprocess.Popen(
                     shlex.split("bash run_all.sequential.sh"),
                     cwd=output_dir,
                     creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
                 )
             else:
-                subprocess.Popen(
+                proc = subprocess.Popen(
                     shlex.split("bash run_all.sequential.sh"),
                     cwd=output_dir,
                     start_new_session=True,
                 )
+            processes.append(("run_all.sequential.sh", proc, output_dir))
         else:
             for task in cfg.evaluation.tasks:
                 if os_name == "Windows":
-                    subprocess.Popen(
+                    proc = subprocess.Popen(
                         shlex.split("bash run.sh"),
                         cwd=output_dir / task.name,
                         creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
                     )
                 else:
-                    subprocess.Popen(
+                    proc = subprocess.Popen(
                         shlex.split("bash run.sh"),
                         cwd=output_dir / task.name,
                         start_new_session=True,
                     )
+                processes.append((task.name, proc, output_dir / task.name))
+        # Wait briefly and check if bash scripts exited immediately (which means error)
+        time.sleep(0.3)
+        for name, proc, work_dir in processes:
+            exit_code = proc.poll()
+            if exit_code is not None and exit_code != 0:
+                error_msg = f"Script for {name} exited with code {exit_code}"
+                raise RuntimeError(f"Job startup failed | {error_msg}")
         print("\nCommands for real-time monitoring:")
         for job_id, evaluation_task in zip(job_ids, evaluation_tasks):

nemo_evaluator_launcher/executors/slurm/executor.py CHANGED Viewed

@@ -174,10 +174,11 @@ class SlurmExecutor(BaseExecutor):
             for idx, (slurm_job_id, remote_runsub_path) in enumerate(
                 zip(slurm_job_ids, remote_runsub_paths)
             ):
+                job_id = generate_job_id(invocation_id, idx)
                 db.write_job(
                     job=JobData(
                         invocation_id=invocation_id,
-                        job_id=generate_job_id(invocation_id, idx),
+                        job_id=job_id,
                         timestamp=time.time(),
                         executor="slurm",
                         data={
@@ -204,7 +205,7 @@ class SlurmExecutor(BaseExecutor):
         """
         db = ExecutionDB()
-        # If id looks like an invocation_id (no dot), get all jobs for it
+        # If id looks like an invocation_id
         if "." not in id:
             jobs = db.get_jobs(id)
             if not jobs:
@@ -605,20 +606,27 @@ def _create_slurm_sbatch_script(
         s += "kill $SERVER_PID  # terminate the server to finish gracefully\n\n"
     # auto-export
-    if cfg.execution.get("auto_export", {}).get("destinations", []):
-        s += _generate_auto_export_section(cfg, job_id)
+    ae_cfg = cfg.execution.get("auto_export")
+    destinations: list = []
+    if isinstance(ae_cfg, list):
+        destinations = list(ae_cfg)
+    elif isinstance(ae_cfg, dict) or isinstance(ae_cfg, DictConfig):
+        destinations = list(ae_cfg.get("destinations", []) or [])
+    if destinations:
+        export_env = dict(cfg.execution.get("env_vars", {}).get("export", {}) or {})
+        s += _generate_auto_export_section(cfg, job_id, destinations, export_env)
     return s
 def _generate_auto_export_section(
     cfg: DictConfig,
-    job_id: str,  # Complete job_id string
+    job_id: str,
+    destinations: list,
+    export_env: dict,
 ) -> str:
     """Generate simple auto-export section for sbatch script."""
-    auto_export_config = cfg.execution.get("auto_export", {})
-    destinations = auto_export_config.get("destinations", [])
     if not destinations:
         return ""
@@ -626,18 +634,65 @@ def _generate_auto_export_section(
     s += "EVAL_EXIT_CODE=$?\n"
     s += "if [ $EVAL_EXIT_CODE -eq 0 ]; then\n"
     s += "    echo 'Evaluation completed successfully. Starting auto-export...'\n"
-    s += "    set +e\n"  # per exporter failure allowed
+    s += "    set +e\n"
     s += "    set +x\n"
+    s += "    set +u\n"
     s += '    cd "$TASK_DIR/artifacts"\n'
-    auto_export_cfg = OmegaConf.to_container(
-        cfg.execution.get("auto_export", {}), resolve=True
+    # Work with DictConfig; convert only for YAML at the end
+    exec_type = (
+        cfg.execution.type
+        if hasattr(cfg.execution, "type")
+        else cfg.execution.get("type", "slurm")
     )
-    yaml_str = yaml.safe_dump(
-        {"execution": {"auto_export": auto_export_cfg}}, sort_keys=False
+    eval_tasks = (
+        list(cfg.evaluation.tasks)
+        if hasattr(cfg, "evaluation") and hasattr(cfg.evaluation, "tasks")
+        else list((cfg.get("evaluation", {}) or {}).get("tasks", []) or [])
     )
+    export_block = cfg.get("export", {}) or {}
+    payload = {
+        "execution": {
+            "auto_export": {
+                "destinations": list(destinations),
+                **({"env_vars": dict(export_env)} if export_env else {}),
+            },
+            "type": exec_type,
+        },
+        "evaluation": {"tasks": eval_tasks},
+    }
+    if export_block:
+        # Convert just this block to plain for YAML
+        payload["export"] = (
+            OmegaConf.to_object(export_block)
+            if OmegaConf.is_config(export_block)
+            else dict(export_block)
+        )
+    # Final YAML (single conversion at the end)
+    payload_clean = OmegaConf.to_container(OmegaConf.create(payload), resolve=True)
+    yaml_str = yaml.safe_dump(payload_clean, sort_keys=False)
     s += "    cat > export_config.yml << 'EOF'\n"
     s += yaml_str
     s += "EOF\n"
+    # write launcher config as config.yml for exporters (no core command)
+    submitted_yaml = yaml.safe_dump(
+        OmegaConf.to_container(cfg, resolve=True), sort_keys=False
+    )
+    s += "    cat > config.yml << 'EOF'\n"
+    s += submitted_yaml
+    s += "EOF\n"
+    # Export host only env before running auto export
+    for k, v in (export_env or {}).items():
+        if isinstance(v, str) and re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", v):
+            s += f'    export {k}="${{{v}}}"\n'
+        else:
+            esc = str(v).replace('"', '\\"')
+            s += f'    export {k}="{esc}"\n'
     for dest in destinations:
         s += f"    echo 'Exporting to {dest}...'\n"
         s += f"    nemo-evaluator-launcher export {job_id} --dest {dest} || echo 'Export to {dest} failed'\n"
@@ -656,7 +711,9 @@ def _open_master_connection(
     socket: str,
 ) -> str | None:
     ssh_command = f"ssh -MNf -S {socket} {username}@{hostname}"
-    completed_process = subprocess.run(args=shlex.split(ssh_command))
+    completed_process = subprocess.run(
+        args=shlex.split(ssh_command), capture_output=True
+    )
     if completed_process.returncode == 0:
         return socket
     return None
@@ -694,12 +751,17 @@ def _make_remote_execution_output_dir(
     ssh_command.append(f"{username}@{hostname}")
     ssh_command.append(mkdir_command)
     ssh_command = " ".join(ssh_command)
-    completed_process = subprocess.run(args=shlex.split(ssh_command))
+    completed_process = subprocess.run(
+        args=shlex.split(ssh_command), capture_output=True
+    )
     if completed_process.returncode != 0:
+        error_msg = (
+            completed_process.stderr.decode("utf-8")
+            if completed_process.stderr
+            else "Unknown error"
+        )
         raise RuntimeError(
-            "failed to make a remote execution output dir\n{}".format(
-                completed_process.stderr.decode("utf-8")
-            )
+            "failed to make a remote execution output dir\n{}".format(error_msg)
         )
@@ -725,13 +787,16 @@ def _rsync_upload_rundirs(
     remote_destination_str = f"{username}@{hostname}:{remote_target}"
     local_sources_str = " ".join(map(str, local_sources))
     rsync_upload_command = f"rsync -qcaz {local_sources_str} {remote_destination_str}"
-    completed_process = subprocess.run(args=shlex.split(rsync_upload_command))
+    completed_process = subprocess.run(
+        args=shlex.split(rsync_upload_command), capture_output=True
+    )
     if completed_process.returncode != 0:
-        raise RuntimeError(
-            "failed to upload local sources\n{}".format(
-                completed_process.stderr.decode("utf-8")
-            )
+        error_msg = (
+            completed_process.stderr.decode("utf-8")
+            if completed_process.stderr
+            else "Unknown error"
         )
+        raise RuntimeError("failed to upload local sources\n{}".format(error_msg))
 def _sbatch_remote_runsubs(
@@ -757,10 +822,9 @@ def _sbatch_remote_runsubs(
         args=shlex.split(ssh_command), capture_output=True
     )
     if completed_process.returncode != 0:
+        error_msg = completed_process.stderr.decode("utf-8")
         raise RuntimeError(
-            "failed to submit sbatch scripts for execution\n{}".format(
-                completed_process.stderr.decode("utf-8")
-            )
+            "failed to submit sbatch scripts for execution\n{}".format(error_msg)
         )
     sbatch_output = completed_process.stdout.decode("utf-8")

nemo-evaluator-launcher 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

nemo-evaluator-launcher 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl