PyPI - euler-train - Versions diffs - 1.3.1__py3-none-any.whl - Mend

euler-train 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

euler_train/__init__.py +83 -0
euler_train/architecture.py +125 -0
euler_train/environment.py +137 -0
euler_train/git_info.py +43 -0
euler_train/outputs.py +194 -0
euler_train/run.py +1249 -0
euler_train/serialization.py +86 -0
euler_train/slurm.py +27 -0
euler_train-1.3.1.dist-info/METADATA +22 -0
euler_train-1.3.1.dist-info/RECORD +12 -0
euler_train-1.3.1.dist-info/WHEEL +5 -0
euler_train-1.3.1.dist-info/top_level.txt +1 -0

euler_train/__init__.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""runlog — lightweight file-based experiment logging."""
+from __future__ import annotations
+from .run import Run
+from .architecture import export_architecture
+__all__ = ["init", "Run", "export_architecture"]
+__version__ = "0.1.0"
+def init(
+    dir: str | None = None,
+    config=None,
+    meta: dict | None = None,
+    output_formats: dict[str, str] | None = None,
+    run_id: str | None = None,
+    datasets: dict | None = None,
+    run_name: str | None = None,
+    evaluations: dict[str, dict] | None = None,
+    mode: str | None = None,
+) -> Run:
+    """Create a new run — or resume an existing one — and return the handle.
+    Parameters
+    ----------
+    dir:
+        Project / output directory.  Each call creates a unique run
+        under ``{dir}/runs/{timestamp_id}/``.  When *None* (the
+        default), the directory is resolved as:
+        1. ``$ET_HOME/<project>`` (if ``$ET_HOME`` is set),
+        2. ``~/euler_train/<project>``,
+        where ``<project>`` is the git repository name, or the current
+        working directory name when not inside a git repo.
+    config:
+        Hyperparameters — accepts a *dict*, a path to a JSON / YAML file,
+        an ``argparse.Namespace``, or a dataclass instance.
+    meta:
+        Extra user-defined fields merged into ``meta.json``
+        (e.g. ``{"description": "baseline", "tags": ["v2"]}``).
+    output_formats:
+        Override auto-inferred save formats.  Keys can be an output type
+        (``"depth"``), a slot / aux name (``"transmission"``), or a
+        dotted combination (``"depth.pred"``).  Values are ``"png"``,
+        ``"npy"``, or ``"npz"``.
+    run_id:
+        If given, resume an existing run instead of creating a new one.
+        The run directory ``{dir}/runs/{run_id}/`` must already exist.
+        The existing ``config.json`` is loaded automatically (unless
+        *config* is explicitly provided to override it).
+    datasets:
+        Optional mapping of split name to ``euler_loading.MultiModalDataset``
+        instance (e.g. ``{"train": train_ds, "val": val_ds}``).  When
+        provided, each split is logged into ``meta.json`` under
+        ``datasets[split]`` with per-modality records:
+        ``path`` and inferred metadata (``used_as``, ``slot``,
+        ``modality_type``).  Hierarchical modalities also include
+        ``hierarchy_scope`` and ``applies_to``.  If a dataset implements
+        ``describe_for_runlog()``, that contract is used directly.
+        Otherwise inference prefers
+        ``ds-crawler`` config properties when available, then falls back to
+        naming-based heuristics.
+    run_name:
+        Optional human-readable name for the run.  Stored in ``meta.json``.
+    evaluations:
+        Optional mapping of evaluation key to evaluation entry.  Each
+        entry may contain ``datasets`` (same dataset objects accepted by
+        *datasets*), ``name``, ``status``, ``checkpoint``, and
+        ``metadata``.  Typically used when resuming a run (via *run_id*)
+        for evaluation.  See also :meth:`Run.add_evaluation`.
+    mode:
+        Optional label for the current process context (for example
+        ``"train"``, ``"val"``, or ``"eval"``).  When provided,
+        lifecycle fields and crash details are mirrored into
+        ``meta.json`` under ``modes[mode]``.
+    """
+    return Run(
+        dir=dir, config=config, meta=meta,
+        output_formats=output_formats, run_id=run_id,
+        datasets=datasets, run_name=run_name,
+        evaluations=evaluations, mode=mode,
+    )

euler_train/architecture.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Export a PyTorch model to a lightweight ONNX graph for Netron visualization."""
+from __future__ import annotations
+import logging
+import tempfile
+from pathlib import Path
+from typing import Any
+log = logging.getLogger("euler_train")
+_MISSING_DEPS_MSG = (
+    "Architecture export requires optional dependencies: onnx, onnxruntime, onnxsim. "
+    "Install them with:  pip install euler-train[architecture]"
+)
+def export_architecture(
+    model: Any,
+    dummy_input: Any,
+    output_path: str | Path = "architecture.onnx",
+) -> Path:
+    """Export a PyTorch model to a simplified, weightless ONNX graph.
+    The resulting file is optimized for visual inspection in Netron:
+    redundant nodes are removed, operator fusions are applied, and
+    weight tensors are stripped so only the graph topology remains.
+    Parameters
+    ----------
+    model:
+        A PyTorch ``nn.Module``.  Temporarily set to eval mode for
+        export; the original training/eval state is restored afterward.
+    dummy_input:
+        Example input tensor(s) matching the model's forward signature.
+    output_path:
+        Where to write the final ``.onnx`` file.
+    Returns
+    -------
+    Path
+        The written output path.
+    """
+    import torch
+    try:
+        import onnx
+        import onnxruntime as ort
+        from onnxsim import simplify
+    except ImportError:
+        raise ImportError(_MISSING_DEPS_MSG)
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    was_training = model.training
+    model.eval()
+    try:
+        return _do_export(model, dummy_input, output_path, onnx, ort, simplify, torch)
+    finally:
+        if was_training:
+            model.train()
+def _do_export(model, dummy_input, output_path, onnx, ort, simplify, torch):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        raw_path = Path(tmpdir) / "raw.onnx"
+        ort_path = Path(tmpdir) / "ort.onnx"
+        # Step 1: Export to ONNX (with weights, needed for optimizer passes)
+        _export_onnx(model, dummy_input, raw_path, torch=torch)
+        # Step 2: Simplify — removes redundant glue nodes
+        log.info("Simplifying ONNX graph with onnxsim...")
+        raw_model = onnx.load(str(raw_path))
+        simplified_model, check = simplify(raw_model)
+        if not check:
+            log.warning("onnxsim validation failed, continuing anyway.")
+        onnx.save(simplified_model, str(raw_path))
+        # Step 3: ORT optimization — fuses standard blocks (Conv+BN+ReLU, etc.)
+        log.info("Applying ONNX Runtime graph optimizations...")
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = (
+            ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        )
+        sess_options.optimized_model_filepath = str(ort_path)
+        ort.InferenceSession(str(raw_path), sess_options)
+        # Step 4: Strip weights for a lightweight file
+        log.info("Stripping weights...")
+        fused_model = onnx.load(str(ort_path))
+        while fused_model.graph.initializer:
+            fused_model.graph.initializer.pop()
+        onnx.save(fused_model, str(output_path))
+        log.info("Architecture exported to %s", output_path)
+        return output_path
+def _export_onnx(model: Any, dummy_input: Any, path: Path, *, torch: Any) -> None:
+    """Export using dynamo (PyTorch >= 2.1) or legacy torch.onnx.export."""
+    # Try dynamo-based export first (produces a cleaner functional graph)
+    if hasattr(torch.onnx, "dynamo_export"):
+        try:
+            log.info("Exporting ONNX via torch.onnx.dynamo_export (PyTorch 2.x)...")
+            export_output = torch.onnx.dynamo_export(model, dummy_input)
+            export_output.save(str(path))
+            return
+        except Exception as exc:
+            log.warning(
+                "dynamo_export failed (%s), falling back to legacy export.", exc
+            )
+    # Legacy export (PyTorch 1.x / 2.0 / dynamo fallback)
+    log.info("Exporting ONNX via torch.onnx.export (legacy)...")
+    torch.onnx.export(
+        model,
+        dummy_input,
+        str(path),
+        export_params=True,
+        opset_version=14,
+        do_constant_folding=True,
+    )

euler_train/environment.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""Collect runtime environment metadata for run_environment.json."""
+from __future__ import annotations
+import os
+import platform
+import subprocess
+import sys
+def get_run_environment() -> dict:
+    """Return a dict matching the run_environments schema."""
+    return {
+        "name": _hostname(),
+        "python_version": sys.version.split()[0],
+        "cuda_version": _cuda_version(),
+        "gpu_type": _gpu_type(),
+        "gpu_count": _gpu_count(),
+        "packages_snapshot": _packages_snapshot(),
+        "docker_image": None,
+        "docker_digest": None,
+        "metadata": None,
+    }
+def _hostname() -> str | None:
+    try:
+        return platform.node() or None
+    except Exception:
+        return None
+def _cuda_version() -> str | None:
+    # 1. torch.version.cuda
+    try:
+        import torch
+        if torch.version.cuda:
+            return torch.version.cuda
+    except Exception:
+        pass
+    # 2. nvcc --version
+    try:
+        result = subprocess.run(
+            ["nvcc", "--version"],
+            capture_output=True, text=True, timeout=10,
+        )
+        if result.returncode == 0:
+            for line in result.stdout.splitlines():
+                if "release" in line.lower():
+                    # e.g. "Cuda compilation tools, release 12.1, V12.1.66"
+                    parts = line.split("release")[-1].strip().split(",")
+                    return parts[0].strip()
+    except Exception:
+        pass
+    # 3. CUDA_VERSION env var
+    return os.environ.get("CUDA_VERSION")
+def _gpu_type() -> str | None:
+    # 1. pynvml
+    try:
+        import pynvml
+        pynvml.nvmlInit()
+        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+        name = pynvml.nvmlDeviceGetName(handle)
+        if isinstance(name, bytes):
+            name = name.decode()
+        return name
+    except Exception:
+        pass
+    # 2. nvidia-smi
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"],
+            capture_output=True, text=True, timeout=10,
+        )
+        if result.returncode == 0:
+            first_line = result.stdout.strip().splitlines()[0].strip()
+            return first_line or None
+    except Exception:
+        pass
+    return None
+def _gpu_count() -> int | None:
+    # 1. torch.cuda.device_count()
+    try:
+        import torch
+        count = torch.cuda.device_count()
+        if count > 0:
+            return count
+    except Exception:
+        pass
+    # 2. pynvml
+    try:
+        import pynvml
+        pynvml.nvmlInit()
+        return pynvml.nvmlDeviceGetCount()
+    except Exception:
+        pass
+    # 3. CUDA_VISIBLE_DEVICES
+    visible = os.environ.get("CUDA_VISIBLE_DEVICES")
+    if visible is not None:
+        devices = [d.strip() for d in visible.split(",") if d.strip()]
+        if devices:
+            return len(devices)
+    return None
+def _packages_snapshot() -> dict[str, str] | None:
+    # Try uv pip freeze, then pip freeze
+    for cmd in (["uv", "pip", "freeze"], ["pip", "freeze"]):
+        try:
+            result = subprocess.run(
+                cmd, capture_output=True, text=True, timeout=30,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                return _parse_freeze(result.stdout)
+        except Exception:
+            continue
+    return None
+def _parse_freeze(output: str) -> dict[str, str]:
+    packages = {}
+    for line in output.strip().splitlines():
+        line = line.strip()
+        if "==" in line:
+            name, _, version = line.partition("==")
+            packages[name.strip()] = version.strip()
+    return packages

euler_train/git_info.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Collect git repository metadata for code_ref.json."""
+from __future__ import annotations
+import subprocess
+def get_code_ref() -> dict:
+    """Return a dict matching the code_refs schema."""
+    return {
+        "repo_url": _git("config", "--get", "remote.origin.url"),
+        "branch": _git("rev-parse", "--abbrev-ref", "HEAD"),
+        "commit_sha": _git("rev-parse", "HEAD"),
+        "is_dirty": _is_dirty(),
+        "dirty_diff": _dirty_diff(),
+        "commit_message": _git("log", "-1", "--format=%B"),
+        "committed_at": _git("log", "-1", "--format=%aI"),
+    }
+def _git(*args: str) -> str | None:
+    try:
+        result = subprocess.run(
+            ["git", *args],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        if result.returncode != 0:
+            return None
+        return result.stdout.strip() or None
+    except Exception:
+        return None
+def _is_dirty() -> bool:
+    porcelain = _git("status", "--porcelain")
+    return porcelain is not None
+def _dirty_diff() -> str | None:
+    if not _is_dirty():
+        return None
+    return _git("diff", "HEAD")

euler_train/outputs.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""Save prediction / ground-truth / auxiliary outputs to disk."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import numpy as np
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+def save_output_tree(
+    type_dir: Path,
+    slots: dict[str, Any],
+    format_overrides: dict[str, str],
+    output_type: str,
+) -> None:
+    """Persist all slots (pred, gt, input, aux/…) for one *output_type*.
+    *slots* example::
+        {
+            "pred": array,
+            "gt":   array,
+            "aux":  {"transmission": array, "attention": array},
+        }
+    """
+    for slot_name, data in slots.items():
+        if data is None:
+            continue
+        if slot_name == "aux" and isinstance(data, dict):
+            for aux_name, aux_data in data.items():
+                if aux_data is None:
+                    continue
+                _save_slot(
+                    type_dir / "aux" / aux_name, aux_data,
+                    output_type, aux_name, format_overrides,
+                )
+        else:
+            _save_slot(
+                type_dir / slot_name, data,
+                output_type, slot_name, format_overrides,
+            )
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+def _save_slot(
+    slot_dir: Path,
+    data: Any,
+    output_type: str,
+    leaf_name: str,
+    format_overrides: dict[str, str],
+) -> None:
+    slot_dir.mkdir(parents=True, exist_ok=True)
+    items = _unpack(data)
+    for idx, item in enumerate(items):
+        fmt = _resolve_format(item, output_type, leaf_name, format_overrides)
+        _save_item(slot_dir / f"{idx:04d}.{fmt}", item, fmt)
+# ---- normalisation -------------------------------------------------------
+def _unpack(data: Any) -> list:
+    """Normalise *data* into a flat list of saveable items."""
+    if isinstance(data, (list, tuple)):
+        return [_prepare(d) for d in data]
+    prepared = _prepare(data)
+    # 4-D numpy → treat as batch
+    if isinstance(prepared, np.ndarray) and prepared.ndim == 4:
+        return [prepared[i] for i in range(prepared.shape[0])]
+    return [prepared]
+def _prepare(data: Any) -> Any:
+    """Convert torch tensors → numpy; pass PIL images through unchanged."""
+    # PIL Image — return as-is
+    try:
+        from PIL import Image as _PIL
+        if isinstance(data, _PIL.Image):
+            return data
+    except ImportError:
+        pass
+    # torch Tensor → numpy, channels-first → channels-last
+    if hasattr(data, "detach"):
+        arr: np.ndarray = data.detach().cpu().numpy()
+        # (C, H, W) → (H, W, C)  when C looks like a channel dim
+        if (
+            arr.ndim == 3
+            and arr.shape[0] in (1, 3, 4)
+            and min(arr.shape[1:]) > 4
+        ):
+            arr = np.transpose(arr, (1, 2, 0))
+        # (B, C, H, W) → (B, H, W, C)
+        elif (
+            arr.ndim == 4
+            and arr.shape[1] in (1, 3, 4)
+            and min(arr.shape[2:]) > 4
+        ):
+            arr = np.transpose(arr, (0, 2, 3, 1))
+        return arr
+    return np.asarray(data)
+# ---- format inference ----------------------------------------------------
+def _is_image_like(arr: np.ndarray) -> bool:
+    """Heuristic: does this array look like it should be saved as a PNG?"""
+    if arr.ndim == 2 and arr.dtype == np.uint8:
+        return True  # grayscale uint8
+    if arr.ndim == 3 and arr.shape[2] in (1, 3, 4):
+        return True  # HxWx{1,3,4}
+    return False
+def _resolve_format(
+    item: Any,
+    output_type: str,
+    leaf_name: str,
+    overrides: dict[str, str],
+) -> str:
+    """Pick save format: check overrides (most-specific first), then infer."""
+    # "rgb.pred" > "rgb" > "pred"
+    specific = f"{output_type}.{leaf_name}"
+    if specific in overrides:
+        return overrides[specific]
+    if output_type in overrides:
+        return overrides[output_type]
+    if leaf_name in overrides:
+        return overrides[leaf_name]
+    # PIL Image
+    try:
+        from PIL import Image as _PIL
+        if isinstance(item, _PIL.Image):
+            return "png"
+    except ImportError:
+        pass
+    if isinstance(item, np.ndarray) and _is_image_like(item):
+        return "png"
+    return "npy"
+# ---- writers -------------------------------------------------------------
+def _save_item(path: Path, item: Any, fmt: str) -> None:
+    if fmt == "png":
+        _save_png(path, item)
+    elif fmt == "npy":
+        np.save(str(path), item if isinstance(item, np.ndarray) else np.asarray(item))
+    elif fmt == "npz":
+        np.savez_compressed(
+            str(path),
+            data=item if isinstance(item, np.ndarray) else np.asarray(item),
+        )
+    else:
+        raise ValueError(f"Unsupported format: {fmt!r}")
+def _save_png(path: Path, item: Any) -> None:
+    from PIL import Image
+    # PIL Image — save directly
+    if isinstance(item, Image.Image):
+        item.save(str(path))
+        return
+    arr: np.ndarray = item
+    # float → [0,1] → uint8
+    if np.issubdtype(arr.dtype, np.floating):
+        arr = np.clip(arr, 0.0, 1.0)
+        arr = (arr * 255).astype(np.uint8)
+    elif arr.dtype != np.uint8:
+        arr = arr.astype(np.uint8)
+    if arr.ndim == 2:
+        Image.fromarray(arr, mode="L").save(str(path))
+    elif arr.ndim == 3:
+        c = arr.shape[2]
+        mode = {1: "L", 3: "RGB", 4: "RGBA"}.get(c)
+        if mode is None:
+            raise ValueError(f"Cannot save array with {c} channels as PNG")
+        plane = arr[:, :, 0] if c == 1 else arr
+        Image.fromarray(plane, mode=mode).save(str(path))
+    else:
+        raise ValueError(f"Cannot save {arr.ndim}D array as PNG")