PyPI - simcortexpp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

simcortexpp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

simcortexpp/__init__.py +0 -0
simcortexpp/cli/__init__.py +0 -0
simcortexpp/cli/main.py +81 -0
simcortexpp/configs/__init__.py +0 -0
simcortexpp/configs/deform/__init__.py +0 -0
simcortexpp/configs/deform/eval.yaml +34 -0
simcortexpp/configs/deform/inference.yaml +60 -0
simcortexpp/configs/deform/train.yaml +98 -0
simcortexpp/configs/initsurf/__init__.py +0 -0
simcortexpp/configs/initsurf/generate.yaml +50 -0
simcortexpp/configs/seg/__init__.py +0 -0
simcortexpp/configs/seg/eval.yaml +31 -0
simcortexpp/configs/seg/inference.yaml +35 -0
simcortexpp/configs/seg/train.yaml +42 -0
simcortexpp/deform/__init__.py +0 -0
simcortexpp/deform/data/__init__.py +0 -0
simcortexpp/deform/data/dataloader.py +268 -0
simcortexpp/deform/eval.py +347 -0
simcortexpp/deform/inference.py +244 -0
simcortexpp/deform/models/__init__.py +0 -0
simcortexpp/deform/models/surfdeform.py +356 -0
simcortexpp/deform/train.py +1173 -0
simcortexpp/deform/utils/__init__.py +0 -0
simcortexpp/deform/utils/coords.py +90 -0
simcortexpp/initsurf/__init__.py +0 -0
simcortexpp/initsurf/generate.py +354 -0
simcortexpp/initsurf/paths.py +19 -0
simcortexpp/preproc/__init__.py +0 -0
simcortexpp/preproc/fs_to_mni.py +696 -0
simcortexpp/seg/__init__.py +0 -0
simcortexpp/seg/data/__init__.py +0 -0
simcortexpp/seg/data/dataloader.py +328 -0
simcortexpp/seg/eval.py +248 -0
simcortexpp/seg/inference.py +291 -0
simcortexpp/seg/models/__init__.py +0 -0
simcortexpp/seg/models/unet.py +63 -0
simcortexpp/seg/train.py +432 -0
simcortexpp/utils/__init__.py +0 -0
simcortexpp/utils/tca.py +298 -0
simcortexpp-0.1.0.dist-info/METADATA +334 -0
simcortexpp-0.1.0.dist-info/RECORD +44 -0
simcortexpp-0.1.0.dist-info/WHEEL +5 -0
simcortexpp-0.1.0.dist-info/entry_points.txt +2 -0
simcortexpp-0.1.0.dist-info/top_level.txt +1 -0

simcortexpp/deform/inference.py ADDED Viewed

@@ -0,0 +1,244 @@
+from __future__ import annotations
+import os
+import time
+import json
+import logging
+from typing import Dict, List, Tuple
+import hydra
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+from torch.nn.utils.rnn import pad_sequence
+from omegaconf import DictConfig, OmegaConf
+from tqdm import tqdm
+import trimesh
+from simcortexpp.deform.data.dataloader import CSRDeformDataset, collate_csr_deform
+from simcortexpp.deform.utils.coords import voxel_to_world
+from simcortexpp.deform.models.surfdeform import SurfDeform
+log = logging.getLogger(__name__)
+_SURF_MAP = {
+    "lh_pial": ("L", "pial"),
+    "lh_white": ("L", "white"),
+    "rh_pial": ("R", "pial"),
+    "rh_white": ("R", "white"),
+}
+def _ses(session_label: str) -> str:
+    return f"ses-{session_label}"
+def ensure_derivative_description(out_root: str, name: str = "scpp-deform"):
+    p = os.path.join(out_root, "dataset_description.json")
+    if os.path.isfile(p):
+        return
+    os.makedirs(out_root, exist_ok=True)
+    desc = {
+        "Name": name,
+        "BIDSVersion": "1.8.0",
+        "DatasetType": "derivative",
+        "GeneratedBy": [{"Name": "SimCortexPP", "Description": "Surface deformation stage"}],
+    }
+    with open(p, "w") as f:
+        json.dump(desc, f, indent=2)
+def load_checkpoint(model: torch.nn.Module, ckpt_path: str, strict: bool = True):
+    if not os.path.isfile(ckpt_path):
+        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
+    sd = torch.load(ckpt_path, map_location="cpu")
+    if isinstance(sd, dict) and ("state_dict" in sd or "model" in sd):
+        sd = sd.get("state_dict", sd.get("model", sd))
+    # strip DDP prefix if any
+    sd = { (k[len("module."):] if k.startswith("module.") else k): v for k, v in sd.items() }
+    target = model.module if hasattr(model, "module") else model
+    target.load_state_dict(sd, strict=strict)
+    log.info("Loaded checkpoint: %s (strict=%s)", ckpt_path, strict)
+def build_unified_init(batch: Dict, device: torch.device, surface_names: List[str]):
+    B = len(batch["subject"])
+    unified_list = []
+    per_counts = []
+    faces_per_subj = []
+    affines = batch["affine"].to(device)     # [B,4,4]
+    shifts  = batch["shift_ijk"].to(device)  # [B,3]
+    for i in range(B):
+        verts_cat = []
+        counts_i = []
+        faces_i = []
+        for s in surface_names:
+            v = batch["init_verts_vox"][i][s].to(device)      # [Ni,3] voxel in cropped/padded space
+            f = batch["init_faces"][i][s].to(device).long()   # [Fi,3]
+            verts_cat.append(v)
+            counts_i.append(int(v.shape[0]))
+            faces_i.append(f.detach().cpu().numpy().astype(np.int64))
+        merged = torch.cat(verts_cat, dim=0)
+        unified_list.append(merged)
+        per_counts.append(counts_i)
+        faces_per_subj.append(faces_i)
+    lengths = torch.tensor([v.shape[0] for v in unified_list], device=device, dtype=torch.long)
+    padded = pad_sequence(unified_list, batch_first=True).to(device)  # [B,Nmax,3]
+    return padded, lengths, per_counts, faces_per_subj, affines, shifts
+def out_surface_path(out_root: str, subj: str, session_label: str, space: str, surf_name: str) -> str:
+    ses = _ses(session_label)
+    hemi, surf = _SURF_MAP[surf_name]
+    return os.path.join(
+        out_root, subj, ses, "surfaces",
+        f"{subj}_{ses}_space-{space}_desc-deform_hemi-{hemi}_{surf}.surf.ply"
+    )
+@hydra.main(version_base=None, config_path="pkg://simcortexpp.configs.deform", config_name="inference")
+def main(cfg: DictConfig):
+    level = getattr(logging, str(getattr(cfg.inference, "log_level", "INFO")).upper(), logging.INFO)
+    logging.basicConfig(level=level, format="%(asctime)s | %(levelname)s | %(name)s | %(message)s")
+    if cfg.user_config:
+        cfg = OmegaConf.merge(cfg, OmegaConf.load(cfg.user_config))
+    surface_names = list(cfg.dataset.surface_name)
+    # add_prob_grad forced if c_in==3
+    add_prob_grad = bool(getattr(cfg.dataset, "add_prob_grad", False))
+    if int(cfg.model.c_in) == 3:
+        add_prob_grad = True
+    device_str = str(getattr(cfg.inference, "device", "cuda:0"))
+    device = torch.device(device_str if (("cuda" not in device_str) or torch.cuda.is_available()) else "cpu")
+    split_file = str(cfg.dataset.split_file)
+    split_name = str(cfg.dataset.split_name)
+    session_label = str(getattr(cfg.dataset, "session_label", "01"))
+    space = str(getattr(cfg.dataset, "space", "MNI152"))
+    df = pd.read_csv(split_file)
+    df = df[df["split"] == split_name]
+    if len(df) == 0:
+        raise RuntimeError(f"No subjects found for split_name='{split_name}' in {split_file}")
+    # model
+    model = SurfDeform(
+        C_hid=cfg.model.c_hid,
+        C_in=int(cfg.model.c_in),
+        inshape=list(cfg.model.inshape),
+        sigma=float(cfg.model.sigma),
+        device=device,
+        geom_ratio=float(getattr(cfg.model, "geom_ratio", 0.5)),
+        geom_depth=int(getattr(cfg.model, "geom_depth", 6)),
+        gn_groups=int(getattr(cfg.model, "gn_groups", 8)),
+        gate_init=float(getattr(cfg.model, "gate_init", -3.0)),
+    ).to(device)
+    load_checkpoint(model, str(cfg.model.ckpt_path), strict=bool(getattr(cfg.model, "strict_load", True)))
+    model.eval()
+    overwrite = bool(getattr(cfg.inference, "overwrite", False))
+    bs = int(getattr(cfg.inference, "batch_size", 1))
+    nw = int(getattr(cfg.inference, "num_workers", 2))
+    # per-dataset inference
+    times = []
+    for ds_key, ds_df in df.groupby("dataset"):
+        if ds_key not in cfg.dataset.roots or ds_key not in cfg.dataset.initsurf_roots:
+            raise KeyError(f"Missing dataset key in config roots: {ds_key}")
+        preproc_root = str(cfg.dataset.roots[ds_key])
+        initsurf_root = str(cfg.dataset.initsurf_roots[ds_key])
+        out_root = str(cfg.outputs.out_roots[ds_key])
+        ensure_derivative_description(out_root)
+        subjects = ds_df["subject"].astype(str).tolist()
+        ds = CSRDeformDataset(
+            preproc_root=preproc_root,
+            initsurf_root=initsurf_root,
+            subjects=subjects,
+            session_label=session_label,
+            space=space,
+            surface_names=surface_names,
+            inshape_dhw=list(cfg.model.inshape),
+            prob_clip_min=float(cfg.dataset.prob_clip_min),
+            prob_clip_max=float(cfg.dataset.prob_clip_max),
+            prob_gamma=float(cfg.dataset.prob_gamma),
+            add_prob_grad=add_prob_grad,
+            aug=False,
+        )
+        loader = DataLoader(
+            ds,
+            batch_size=bs,
+            shuffle=False,
+            num_workers=nw,
+            pin_memory=True,
+            collate_fn=collate_csr_deform,
+        )
+        log.info("[%s] subjects=%d | out_root=%s", ds_key, len(ds), out_root)
+        with torch.no_grad():
+            for batch in tqdm(loader, desc=f"Infer {ds_key}", leave=False):
+                vol = batch["vol"].to(device)  # [B,C,D,H,W]
+                B = vol.shape[0]
+                padded_init, lengths, per_counts, faces_per_subj, affines, shifts = build_unified_init(
+                    batch, device, surface_names
+                )
+                if device.type == "cuda":
+                    torch.cuda.synchronize()
+                t0 = time.time()
+                pred_all = model(padded_init, vol, int(cfg.model.n_steps))  # [B,Nmax,3]
+                if device.type == "cuda":
+                    torch.cuda.synchronize()
+                t1 = time.time()
+                times.extend([(t1 - t0) / max(B, 1)] * B)
+                for i in range(B):
+                    subj = str(batch["subject"][i])
+                    A = affines[i]
+                    sh = shifts[i]  # [3]
+                    pred_unified = pred_all[i, : int(lengths[i].item())]
+                    splits = torch.split(pred_unified, per_counts[i], dim=0)
+                    for j, surf in enumerate(surface_names):
+                        out_path = out_surface_path(out_root, subj, session_label, space, surf)
+                        if (not overwrite) and os.path.isfile(out_path):
+                            continue
+                        os.makedirs(os.path.dirname(out_path), exist_ok=True)
+                        v_vox_cp = splits[j]       # cropped/padded voxel space
+                        v_vox_orig = v_vox_cp - sh # undo shift (back to original voxel space)
+                        v_mm = voxel_to_world(v_vox_orig, A).detach().cpu().numpy().astype(np.float32)
+                        f = faces_per_subj[i][j]
+                        trimesh.Trimesh(vertices=v_mm, faces=f, process=False).export(out_path)
+    if times:
+        log.info("Avg inference time/subject: %.4fs", float(sum(times) / len(times)))
+    log.info("Done.")
+if __name__ == "__main__":
+    main()

simcortexpp/deform/models/__init__.py ADDED Viewed

File without changes

simcortexpp/deform/models/surfdeform.py ADDED Viewed

@@ -0,0 +1,356 @@
+from __future__ import annotations
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# -------------------------
+# Utils blocks (small-batch friendly)
+# -------------------------
+class ConvGNAct(nn.Module):
+    """
+    Conv3D + GroupNorm + LeakyReLU (+ optional Dropout3D)
+    Works better than BatchNorm with small batch sizes.
+    """
+    def __init__(self, cin, cout, k=3, s=1, groups=8, dropout=0.0):
+        super().__init__()
+        p = k // 2  # k=3 -> p=1
+        self.conv = nn.Conv3d(cin, cout, kernel_size=k, stride=s, padding=p, bias=False)
+        g = min(groups, cout)
+        while g > 1 and (cout % g) != 0:
+            g -= 1
+        self.gn = nn.GroupNorm(g, cout)
+        self.act = nn.LeakyReLU(0.2, inplace=True)
+        self.do = nn.Dropout3d(dropout) if dropout > 0 else nn.Identity()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.gn(x)
+        x = self.act(x)
+        x = self.do(x)
+        return x
+class ResBlock3D(nn.Module):
+    """
+    Lightweight residual block.
+    """
+    def __init__(self, c, k=3, groups=8, dropout=0.0):
+        super().__init__()
+        self.b1 = ConvGNAct(c, c, k=k, s=1, groups=groups, dropout=dropout)
+        self.b2 = ConvGNAct(c, c, k=k, s=1, groups=groups, dropout=dropout)
+    def forward(self, x):
+        return x + self.b2(self.b1(x))
+class GaussianFilter(nn.Module):
+    """
+    Smooth phi field after integration (as you used before).
+    """
+    def __init__(self, C=3, K=3, sigma=0.5):
+        super().__init__()
+        grids = torch.meshgrid([torch.linspace(-(K - 1) / 2, (K - 1) / 2, K)] * 3, indexing="ij")
+        kernel = 1.0
+        for g in grids:
+            kernel *= (1.0 / (sigma * math.sqrt(2 * math.pi))) * torch.exp(-((g / sigma) ** 2) / 2.0)
+        kernel = kernel / kernel.sum()
+        kernel = kernel[None, None].repeat(C, 1, 1, 1, 1)  # (C,1,K,K,K)
+        self.register_buffer("kernel", kernel)
+        self.K = K
+        self.C = C
+    def forward(self, x):
+        return F.conv3d(x, weight=self.kernel, padding=self.K // 2, groups=self.C)
+class GeomInject(nn.Module):
+    """
+    Stable fusion: f = m + gate * proj(g)
+    gate is learnable scalar (per-level) initialized very small => starts MRI-only, then learns to use geom.
+    """
+    def __init__(self, c_mri: int, c_geom: int, gate_init: float = -3.0):
+        super().__init__()
+        self.proj = nn.Conv3d(c_geom, c_mri, kernel_size=1, bias=True)
+        self.gate_logit = nn.Parameter(torch.tensor(gate_init, dtype=torch.float32))
+    def forward(self, m, g):
+        gate = torch.sigmoid(self.gate_logit)  # ~0.018 at -4.0
+        return m + gate * self.proj(g)
+# -------------------------
+# Dual-Encoder U-Net that outputs multi-scale SVFs
+# -------------------------
+class DualMUNetV2(nn.Module):
+    """
+    True dual encoder:
+      - MRI encoder (full)
+      - Geom/prob encoder (lighter + optional shallow depth)
+    Fusion is done at each level via GeomInject (stable, reduces overfit risk).
+    geom_depth:
+      1..6 : number of learned geom stages
+      if <6, deeper geom features are generated by pooling (no extra params).
+    """
+    def __init__(
+        self,
+        C_in: int = 2,                    # [MRI] + geom channels
+        C_hid=(8, 16, 32, 64, 128, 128),
+        geom_ratio: float = 0.5,          # geom width ratio
+        geom_depth: int = 4,              # <=6 (learned depth for geom)
+        K: int = 3,
+        gn_groups: int = 8,
+        gate_init: float = -3.0,
+    ):
+        super().__init__()
+        assert C_in >= 2, "Need MRI + at least 1 geom/prob channel"
+        geom_depth = int(max(1, min(6, geom_depth)))
+        # MRI channels
+        Cm = list(C_hid)
+        # Geom channels (smaller)
+        Cg = [max(4, int(c * geom_ratio)) for c in Cm]
+        self.geom_depth = geom_depth
+        # ---- MRI encoder (6 stages) ----
+        self.m1 = nn.Sequential(ConvGNAct(1,   Cm[0], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[0], k=K, groups=gn_groups))
+        self.m2 = nn.Sequential(ConvGNAct(Cm[0], Cm[1], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[1], k=K, groups=gn_groups))
+        self.m3 = nn.Sequential(ConvGNAct(Cm[1], Cm[2], k=K, s=2, groups=gn_groups), ResBlock3D(Cm[2], k=K, groups=gn_groups))  # /2
+        self.m4 = nn.Sequential(ConvGNAct(Cm[2], Cm[3], k=K, s=2, groups=gn_groups), ResBlock3D(Cm[3], k=K, groups=gn_groups))  # /4
+        self.m5 = nn.Sequential(ConvGNAct(Cm[3], Cm[4], k=K, s=2, groups=gn_groups), ResBlock3D(Cm[4], k=K, groups=gn_groups))  # /8
+        self.m6 = nn.Sequential(ConvGNAct(Cm[4], Cm[5], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[5], k=K, groups=gn_groups))
+        # ---- Geom encoder (up to geom_depth stages learned) ----
+        # stage 1 is always learned
+        self.g1 = nn.Sequential(
+            ConvGNAct(C_in - 1, Cg[0], k=K, s=1, groups=gn_groups),
+            ResBlock3D(Cg[0], k=K, groups=gn_groups),
+        )
+        # build optional learned stages
+        self.g2 = nn.Sequential(
+            ConvGNAct(Cg[0], Cg[1], k=K, s=1, groups=gn_groups),
+            ResBlock3D(Cg[1], k=K, groups=gn_groups),
+        ) if geom_depth >= 2 else None
+        self.g3 = nn.Sequential(
+            ConvGNAct(Cg[1], Cg[2], k=K, s=2, groups=gn_groups),
+            ResBlock3D(Cg[2], k=K, groups=gn_groups),
+        ) if geom_depth >= 3 else None
+        self.g4 = nn.Sequential(
+            ConvGNAct(Cg[2], Cg[3], k=K, s=2, groups=gn_groups),
+            ResBlock3D(Cg[3], k=K, groups=gn_groups),
+        ) if geom_depth >= 4 else None
+        self.g5 = nn.Sequential(
+            ConvGNAct(Cg[3], Cg[4], k=K, s=2, groups=gn_groups),
+            ResBlock3D(Cg[4], k=K, groups=gn_groups),
+        ) if geom_depth >= 5 else None
+        self.g6 = nn.Sequential(
+            ConvGNAct(Cg[4], Cg[5], k=K, s=1, groups=gn_groups),
+            ResBlock3D(Cg[5], k=K, groups=gn_groups),
+        ) if geom_depth >= 6 else None
+        # ---- Fusion injectors (per level) ----
+        self.f1 = GeomInject(Cm[0], Cg[0], gate_init=gate_init)
+        self.f2 = GeomInject(Cm[1], Cg[1] if geom_depth >= 2 else Cg[0], gate_init=gate_init)
+        self.f3 = GeomInject(Cm[2], Cg[2] if geom_depth >= 3 else (Cg[1] if geom_depth >= 2 else Cg[0]), gate_init=gate_init)
+        self.f4 = GeomInject(Cm[3], Cg[3] if geom_depth >= 4 else (Cg[2] if geom_depth >= 3 else (Cg[1] if geom_depth >= 2 else Cg[0])), gate_init=gate_init)
+        self.f5 = GeomInject(Cm[4], Cg[4] if geom_depth >= 5 else (Cg[3] if geom_depth >= 4 else (Cg[2] if geom_depth >= 3 else (Cg[1] if geom_depth >= 2 else Cg[0]))), gate_init=gate_init)
+        self.f6 = GeomInject(Cm[5], Cg[5] if geom_depth >= 6 else (Cg[4] if geom_depth >= 5 else (Cg[3] if geom_depth >= 4 else (Cg[2] if geom_depth >= 3 else (Cg[1] if geom_depth >= 2 else Cg[0])))), gate_init=gate_init)
+        # ---- Decoder (uses fused skips) ----
+        self.up = nn.Upsample(scale_factor=2, mode="trilinear", align_corners=True)
+        self.d5 = nn.Sequential(ConvGNAct(Cm[5] + Cm[4], Cm[4], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[4], k=K, groups=gn_groups))
+        self.d4 = nn.Sequential(ConvGNAct(Cm[4] + Cm[3], Cm[3], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[3], k=K, groups=gn_groups))
+        self.d3 = nn.Sequential(ConvGNAct(Cm[3] + Cm[2], Cm[2], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[2], k=K, groups=gn_groups))
+        self.d2 = nn.Sequential(ConvGNAct(Cm[2] + Cm[1], Cm[1], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[1], k=K, groups=gn_groups))
+        self.d1 = nn.Sequential(ConvGNAct(Cm[1] + Cm[0], Cm[0], k=K, s=1, groups=gn_groups), ResBlock3D(Cm[0], k=K, groups=gn_groups))
+        # ---- Multi-scale flow heads ----
+        self.flow1 = nn.Conv3d(Cm[3], 3, K, 1, padding=K // 2)
+        self.flow2 = nn.Conv3d(Cm[2], 3, K, 1, padding=K // 2)
+        self.flow3 = nn.Conv3d(Cm[1], 3, K, 1, padding=K // 2)
+        self.flow4 = nn.Conv3d(Cm[0], 3, K, 1, padding=K // 2)
+        for layer in [self.flow1, self.flow2, self.flow3, self.flow4]:
+            nn.init.normal_(layer.weight, 0, 1e-5)
+            nn.init.constant_(layer.bias, 0.0)
+    def _geom_pool(self, g, times: int):
+        # downsample by factor 2^times using avg pooling (no parameters)
+        for _ in range(times):
+            g = F.avg_pool3d(g, kernel_size=2, stride=2)
+        return g
+    def forward(self, x):
+        mri = x[:, 0:1]     # (B,1,D,H,W)
+        geom = x[:, 1:]     # (B,C_in-1,D,H,W)
+        # ----- MRI encoder -----
+        m1 = self.m1(mri)      # full
+        m2 = self.m2(m1)       # full
+        m3 = self.m3(m2)       # /2
+        m4 = self.m4(m3)       # /4
+        m5 = self.m5(m4)       # /8
+        m6 = self.m6(m5)       # /8
+        # ----- Geom encoder (learned up to geom_depth) -----
+        g1 = self.g1(geom)     # full
+        if self.geom_depth >= 2:
+            g2 = self.g2(g1)   # full
+        else:
+            g2 = g1
+        if self.geom_depth >= 3:
+            g3 = self.g3(g2)   # /2
+        else:
+            # if not learned, make it by pooling
+            g3 = self._geom_pool(g2, times=1)
+        if self.geom_depth >= 4:
+            g4 = self.g4(g3)   # /4
+        else:
+            g4 = self._geom_pool(g3, times=1)
+        if self.geom_depth >= 5:
+            g5 = self.g5(g4)   # /8
+        else:
+            g5 = self._geom_pool(g4, times=1)
+        if self.geom_depth >= 6:
+            g6 = self.g6(g5)   # /8
+        else:
+            g6 = g5
+        # ----- Fusion (stable inject) -----
+        f1 = self.f1(m1, g1)
+        f2 = self.f2(m2, g2)
+        f3 = self.f3(m3, g3)
+        f4 = self.f4(m4, g4)
+        f5 = self.f5(m5, g5)
+        f6 = self.f6(m6, g6)
+        # ----- Decoder -----
+        x = torch.cat([f6, f5], dim=1)  # /8
+        x = self.d5(x)
+        x = self.up(x)                  # /4
+        x = torch.cat([x, f4], dim=1)   # /4
+        x = self.d4(x)
+        svf1 = self.up(self.up(self.flow1(x)))  # /4 -> full
+        x = self.up(x)                  # /2
+        x = torch.cat([x, f3], dim=1)   # /2
+        x = self.d3(x)
+        svf2 = self.up(self.flow2(x))   # /2 -> full
+        x = self.up(x)                  # full
+        x = torch.cat([x, f2], dim=1)   # full
+        x = self.d2(x)
+        svf3 = self.flow3(x)            # full
+        x = torch.cat([x, f1], dim=1)   # full
+        x = self.d1(x)
+        svf4 = self.flow4(x)            # full
+        return svf1, svf2, svf3, svf4
+# -------------------------
+# SurfDeform (same logic, but uses DualMUNetV2)
+# -------------------------
+class SurfDeform(nn.Module):
+    def __init__(
+        self,
+        C_in=2,
+        C_hid=(8, 16, 32, 64, 128, 128),
+        inshape=(184, 224, 184),
+        sigma=1.0,
+        device="cpu",
+        # dual encoder controls
+        geom_ratio=0.5,
+        geom_depth=4,
+        gn_groups=8,
+        gate_init =-3.0,
+    ):
+        super().__init__()
+        self.inshape = tuple(inshape)
+        self.munet = DualMUNetV2(
+            C_in=C_in,
+            C_hid=C_hid,
+            geom_ratio=geom_ratio,
+            geom_depth=geom_depth,
+            gn_groups=gn_groups,
+            gate_init=gate_init,
+        ).to(device)
+        D, H, W = self.inshape
+        # fixed buffers (no reassignment in forward)
+        self.register_buffer("scale", torch.tensor([D, H, W], dtype=torch.float32))
+        grid = torch.stack(
+            torch.meshgrid(
+                torch.arange(D), torch.arange(H), torch.arange(W), indexing="ij"
+            )
+        )[None].float()  # (1,3,D,H,W)
+        self.register_buffer("grid", grid)
+        self.gaussian = GaussianFilter(C=3, K=3, sigma=sigma)
+    def forward(self, vert: torch.Tensor, vol: torch.Tensor, n_steps: int):
+        """
+        vert: (B,V,3) voxel ijk
+        vol : (B,C_in,D,H,W) (MRI + prob/geom)
+        """
+        D, H, W = vol.shape[2:]
+        if (D, H, W) != self.inshape:
+            raise ValueError(f"Input vol shape {(D,H,W)} != inshape {self.inshape}. Fix padding/inshape.")
+        svfs = self.munet(vol)
+        for idx, svf in enumerate(svfs):
+            phi = self.integrate(svf, n_steps=n_steps)
+            if idx < 2:
+                phi = self.gaussian(phi)
+            coord = vert[:, :, None, None].clone()       # (B,V,1,1,3) ijk
+            deform = self.interpolate(coord, phi)        # (B,3,V,1,1)
+            deform = deform[..., 0, 0].permute(0, 2, 1)  # (B,V,3)
+            vert = vert + deform
+        return vert
+    def integrate(self, svf, n_steps=7):
+        # scaling and squaring
+        flow = svf / (2 ** n_steps)
+        for _ in range(n_steps):
+            flow = flow + self.transform(flow, flow)
+        return flow
+    def transform(self, src, flow):
+        coord = self.grid.to(flow.device) + flow
+        coord = coord.permute(0, 2, 3, 4, 1)  # (B,D,H,W,3)
+        return self.interpolate(coord, src)
+    def interpolate(self, coord, src):
+        # align_corners=True => normalize by (size-1)
+        scale = self.scale.to(coord.device)
+        coord = coord.clone()
+        coord[..., 0] = 2.0 * coord[..., 0] / (scale[0] - 1.0) - 1.0  # D
+        coord[..., 1] = 2.0 * coord[..., 1] / (scale[1] - 1.0) - 1.0  # H
+        coord[..., 2] = 2.0 * coord[..., 2] / (scale[2] - 1.0) - 1.0  # W
+        # grid_sample expects (x,y,z)=(W,H,D) => flip ijk -> kji
+        coord = coord.flip(-1)
+        return F.grid_sample(
+            src, coord.to(src.device),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True
+        )