PyPI - foscat - Versions diffs - 2025.11.1__py3-none-any.whl → 2026.1.1__py3-none-any.whl - Mend

foscat 2025.11.1py3-none-any.whl → 2026.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

foscat/FoCUS.py +57 -11
foscat/SphereDownGeo.py +380 -0
foscat/SphereUpGeo.py +175 -0
foscat/SphericalStencil.py +27 -246
foscat/alm_loc.py +270 -0
foscat/healpix_vit_torch-old.py +658 -0
foscat/scat_cov.py +22 -22
{foscat-2025.11.1.dist-info → foscat-2026.1.1.dist-info}/METADATA +1 -69
{foscat-2025.11.1.dist-info → foscat-2026.1.1.dist-info}/RECORD +12 -8
{foscat-2025.11.1.dist-info → foscat-2026.1.1.dist-info}/WHEEL +1 -1
{foscat-2025.11.1.dist-info → foscat-2026.1.1.dist-info}/licenses/LICENSE +0 -0
{foscat-2025.11.1.dist-info → foscat-2026.1.1.dist-info}/top_level.txt +0 -0

foscat/FoCUS.py CHANGED Viewed

@@ -5,8 +5,11 @@ import healpy as hp
 import numpy as np
 import foscat.HealSpline as HS
 from scipy.interpolate import griddata
+from foscat.SphereDownGeo import SphereDownGeo
+from foscat.SphereUpGeo import SphereUpGeo
+import torch
-TMPFILE_VERSION = "V10_0"
+TMPFILE_VERSION = "V12_0"
 class FoCUS:
@@ -36,7 +39,7 @@ class FoCUS:
             mpi_rank=0
     ):
-        self.__version__ = "2025.11.1"
+        self.__version__ = "2026.01.1"
         # P00 coeff for normalization for scat_cov
         self.TMPFILE_VERSION = TMPFILE_VERSION
         self.P1_dic = None
@@ -57,7 +60,8 @@ class FoCUS:
         self.kernelR_conv = {}
         self.kernelI_conv = {}
         self.padding_conv = {}
+        self.down = {}
+        self.up = {}
         if not self.silent:
             print("================================================")
             print("          START FOSCAT CONFIGURATION")
@@ -648,6 +652,7 @@ class FoCUS:
         return rim
     # --------------------------------------------------------
     def ud_grade_2(self, im, axis=0, cell_ids=None, nside=None,max_poll=False):
         if self.use_2D:
@@ -721,6 +726,22 @@ class FoCUS:
         else:
             shape = list(im.shape)
+            if nside is None:
+                l_nside=int(np.sqrt(shape[-1]//12))
+            else:
+                l_nside=nside
+            nbatch=1
+            for k in range(len(shape)-1):
+                nbatch*=shape[k]
+            if l_nside not in self.down:
+                print('initialise down', l_nside)
+                self.down[l_nside] = SphereDownGeo(nside_in=l_nside, dtype=self.all_bk_type,mode="smooth", in_cell_ids=cell_ids)
+            res,out_cell=self.down[l_nside](self.backend.bk_reshape(im,[nbatch,1,shape[-1]]))
+            return self.backend.bk_reshape(res,shape[:-1]+[out_cell.shape[0]]),out_cell
+            '''
             if self.use_median:
                 if cell_ids is not None:
                     sim, new_cell_ids = self.backend.binned_mean(im, cell_ids,reduce='median')
@@ -747,6 +768,7 @@ class FoCUS:
                 return self.backend.bk_reduce_mean(
                     self.backend.bk_reshape(im, shape[0:-1]+[shape[-1]//4,4]), axis=-1
                 ),None
+            '''
     # --------------------------------------------------------
     def up_grade(self, im, nout,
@@ -836,6 +858,7 @@ class FoCUS:
             else:
                 lout = nside
+            '''
             if (lout,nout) not in self.pix_interp_val or force_init_index:
                 if not self.silent:
                     print("compute lout nout", lout, nout)
@@ -926,12 +949,32 @@ class FoCUS:
                     del w
                     del p
-            if lout == nout:
-                imout = im
-            else:
-                # work only on the last column
+            '''
+            shape=list(im.shape)
+            nbatch=1
+            for k in range(len(shape)-1):
+                nbatch*=shape[k]
+            im=self.backend.bk_reshape(im,[nbatch,1,shape[-1]])
+            while lout<nout:
+                if lout not in self.up:
+                    if o_cell_ids is None:
+                        l_o_cell_ids=torch.tensor(np.arange(12*(lout**2),dtype='int'),device=im.device)
+                    else:
+                        l_o_cell_ids=o_cell_ids
+                    self.up[lout] = SphereUpGeo(nside_out=lout,
+                                                dtype=self.all_bk_type,
+                                                 cell_ids_out=l_o_cell_ids,
+                                                 up_norm="col_l1")
+                im, fine_ids = self.up[lout](self.backend.bk_cast(im))
+                lout*=2
+                if lout<nout and o_cell_ids is not None:
+                    o_cell_ids=torch.repeat(fine_ids,4)*4+ \
+                                torch.tile(torch.tensor([0,1,2,3],device=fine_ids.device,dtype=fine_ids.dtype),fine_ids.shape[0])
+            return self.backend.bk_reshape(im,shape[:-1]+[im.shape[-1]])
+            '''
                 ndata = 1
                 for k in range(len(ishape)-1):
                     ndata = ndata * ishape[k]
@@ -960,6 +1003,7 @@ class FoCUS:
                     return self.backend.bk_reshape(
                         imout, ishape[0:-1]+[imout.shape[-1]]
                     )
+            '''
         return imout
     # --------------------------------------------------------
@@ -1354,7 +1398,9 @@ class FoCUS:
                     else:
                         l_cell_ids=cell_ids
-                    nvalid=self.KERNELSZ**2
+                    nvalid=4*self.KERNELSZ**2
+                    if nvalid>12*nside**2:
+                        nvalid=12*nside**2
                     idxEB=hconvol.idx_nn[:,0:nvalid]
                     tmpEB=np.zeros([self.NORIENT,4,l_cell_ids.shape[0],nvalid],dtype='complex')
                     tmpS=np.zeros([4,l_cell_ids.shape[0],nvalid],dtype='float')
@@ -1500,7 +1546,7 @@ class FoCUS:
                 else:
                     if l_kernel == 5:
-                        pw = 0.5
+                        pw = 0.75
                         pw2 = 0.5
                         threshold = 2e-5

foscat/SphereDownGeo.py ADDED Viewed

@@ -0,0 +1,380 @@
+import torch
+import torch.nn as nn
+import numpy as np
+import healpy as hp
+class SphereDownGeo(nn.Module):
+    """
+    Geometric HEALPix downsampling operator (NESTED indexing).
+    This module reduces resolution by a factor 2:
+        nside_out = nside_in // 2
+    Input conventions
+    -----------------
+    - If in_cell_ids is None:
+        x is expected to be full-sphere: [B, C, N_in]
+        output is [B, C, K_out] with K_out = len(cell_ids_out) (or N_out if None).
+    - If in_cell_ids is provided (fine pixels at nside_in, NESTED):
+        x can be either:
+          * compact: [B, C, K_in] where K_in = len(in_cell_ids), aligned with in_cell_ids order
+          * full-sphere: [B, C, N_in] (also supported)
+        output is [B, C, K_out] where cell_ids_out is derived as unique(in_cell_ids // 4),
+        unless you explicitly pass cell_ids_out (then it will be intersected with the derived set).
+    Modes
+    -----
+    - mode="smooth": linear downsampling y = M @ x  (M sparse)
+    - mode="maxpool": non-linear max over available children (fast)
+    """
+    def __init__(
+        self,
+        nside_in: int,
+        mode: str = "smooth",
+        radius_deg: float | None = None,
+        sigma_deg: float | None = None,
+        weight_norm: str = "l1",
+        cell_ids_out: np.ndarray | list[int] | None = None,
+        in_cell_ids: np.ndarray | list[int] | torch.Tensor | None = None,
+        use_csr=True,
+        device=None,
+        dtype: torch.dtype = torch.float32,
+    ):
+        super().__init__()
+        if device is None:
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = device
+        self.dtype = dtype
+        self.nside_in = int(nside_in)
+        assert (self.nside_in & (self.nside_in - 1)) == 0, "nside_in must be a power of 2."
+        self.nside_out = self.nside_in // 2
+        assert self.nside_out >= 1, "nside_out must be >= 1."
+        self.N_in = 12 * self.nside_in * self.nside_in
+        self.N_out = 12 * self.nside_out * self.nside_out
+        self.mode = str(mode).lower()
+        assert self.mode in ("smooth", "maxpool"), "mode must be 'smooth' or 'maxpool'."
+        self.weight_norm = str(weight_norm).lower()
+        assert self.weight_norm in ("l1", "l2"), "weight_norm must be 'l1' or 'l2'."
+        # ---- Handle reduced-domain inputs (fine pixels) ----
+        self.in_cell_ids = self._validate_in_cell_ids(in_cell_ids)
+        self.has_in_subset = self.in_cell_ids is not None
+        if self.has_in_subset:
+            # derive parents
+            derived_out = np.unique(self.in_cell_ids // 4).astype(np.int64)
+            if cell_ids_out is None:
+                self.cell_ids_out = derived_out
+            else:
+                req_out = self._validate_cell_ids_out(cell_ids_out)
+                # keep only those compatible with derived_out (otherwise they'd be all-zero)
+                self.cell_ids_out = np.intersect1d(req_out, derived_out, assume_unique=False)
+                if self.cell_ids_out.size == 0:
+                    raise ValueError(
+                        "After intersecting cell_ids_out with unique(in_cell_ids//4), "
+                        "no coarse pixel remains. Check your inputs."
+                    )
+        else:
+            self.cell_ids_out = self._validate_cell_ids_out(cell_ids_out)
+        self.K_out = int(self.cell_ids_out.size)
+        # Column basis for smooth matrix:
+        # - full sphere: columns are 0..N_in-1
+        # - subset: columns are 0..K_in-1 aligned to self.in_cell_ids
+        self.K_in = int(self.in_cell_ids.size) if self.has_in_subset else self.N_in
+        if self.mode == "smooth":
+            if radius_deg is None:
+                # default: include roughly the 4 children footprint
+                # (healpy pixel size ~ sqrt(4pi/N), coarse pixel is 4x area)
+                radius_deg = 2.0 * hp.nside2resol(self.nside_out, arcmin=True) / 60.0
+            if sigma_deg is None:
+                sigma_deg = max(radius_deg / 2.0, 1e-6)
+            self.radius_deg = float(radius_deg)
+            self.sigma_deg = float(sigma_deg)
+            self.radius_rad = self.radius_deg * np.pi / 180.0
+            self.sigma_rad = self.sigma_deg * np.pi / 180.0
+            M = self._build_down_matrix()  # shape (K_out, K_in or N_in)
+            self.M = M.coalesce()
+            if use_csr:
+                self.M = self.M.to_sparse_csr().to(self.device)
+            self.M_size = M.size()
+        else:
+            # Precompute children indices for maxpool
+            # For subset mode, store mapping from each parent to indices in compact vector,
+            # with -1 for missing children.
+            children = np.stack(
+                [4 * self.cell_ids_out + i for i in range(4)],
+                axis=1,
+            ).astype(np.int64)  # [K_out, 4] in fine pixel ids (full indexing)
+            if self.has_in_subset:
+                # map each child pixel id to position in in_cell_ids (compact index)
+                pos = self._positions_in_sorted(self.in_cell_ids, children.reshape(-1))
+                children_compact = pos.reshape(self.K_out, 4).astype(np.int64)  # -1 if missing
+                self.register_buffer(
+                    "children_compact",
+                    torch.tensor(children_compact, dtype=torch.long, device=self.device),
+                )
+            else:
+                self.register_buffer(
+                    "children_full",
+                    torch.tensor(children, dtype=torch.long, device=self.device),
+                )
+        # expose ids as torch buffers for convenience
+        self.register_buffer(
+            "cell_ids_out_t",
+            torch.tensor(self.cell_ids_out.astype(np.int64), dtype=torch.long, device=self.device),
+        )
+        if self.has_in_subset:
+            self.register_buffer(
+                "in_cell_ids_t",
+                torch.tensor(self.in_cell_ids.astype(np.int64), dtype=torch.long, device=self.device),
+            )
+    # ---------------- validation helpers ----------------
+    def _validate_cell_ids_out(self, cell_ids_out):
+        """Return a 1D np.int64 array of coarse cell ids (nside_out)."""
+        if cell_ids_out is None:
+            return np.arange(self.N_out, dtype=np.int64)
+        arr = np.asarray(cell_ids_out, dtype=np.int64).reshape(-1)
+        if arr.size == 0:
+            raise ValueError("cell_ids_out is empty: provide at least one coarse pixel id.")
+        arr = np.unique(arr)
+        if arr.min() < 0 or arr.max() >= self.N_out:
+            raise ValueError(f"cell_ids_out must be in [0, {self.N_out-1}] for nside_out={self.nside_out}.")
+        return arr
+    def _validate_in_cell_ids(self, in_cell_ids):
+        """Return a 1D np.int64 array of fine cell ids (nside_in) or None."""
+        if in_cell_ids is None:
+            return None
+        if torch.is_tensor(in_cell_ids):
+            arr = in_cell_ids.detach().cpu().numpy()
+        else:
+            arr = np.asarray(in_cell_ids)
+        arr = np.asarray(arr, dtype=np.int64).reshape(-1)
+        if arr.size == 0:
+            raise ValueError("in_cell_ids is empty: provide at least one fine pixel id or None.")
+        arr = np.unique(arr)
+        if arr.min() < 0 or arr.max() >= self.N_in:
+            raise ValueError(f"in_cell_ids must be in [0, {self.N_in-1}] for nside_in={self.nside_in}.")
+        return arr
+    @staticmethod
+    def _positions_in_sorted(sorted_ids: np.ndarray, query_ids: np.ndarray) -> np.ndarray:
+        """
+        For each query_id, return its index in sorted_ids if present, else -1.
+        sorted_ids must be sorted ascending unique.
+        """
+        q = np.asarray(query_ids, dtype=np.int64)
+        idx = np.searchsorted(sorted_ids, q)
+        ok = (idx >= 0) & (idx < sorted_ids.size) & (sorted_ids[idx] == q)
+        out = np.full(q.shape, -1, dtype=np.int64)
+        out[ok] = idx[ok]
+        return out
+    # ---------------- weights and matrix build ----------------
+    def _normalize_weights(self, w: np.ndarray) -> np.ndarray:
+        w = np.asarray(w, dtype=np.float64)
+        if w.size == 0:
+            return w
+        w = np.maximum(w, 0.0)
+        if self.weight_norm == "l1":
+            s = w.sum()
+            if s <= 0.0:
+                return np.ones_like(w) / max(w.size, 1)
+            return w / s
+        # l2
+        s2 = (w * w).sum()
+        if s2 <= 0.0:
+            return np.ones_like(w) / max(np.sqrt(w.size), 1.0)
+        return w / np.sqrt(s2)
+    def _build_down_matrix(self) -> torch.Tensor:
+        """Construct sparse matrix M (K_out, K_in or N_in) for the selected coarse pixels."""
+        nside_in = self.nside_in
+        nside_out = self.nside_out
+        radius_rad = self.radius_rad
+        sigma_rad = self.sigma_rad
+        rows: list[int] = []
+        cols: list[int] = []
+        vals: list[float] = []
+        # For subset columns, we use self.in_cell_ids as the basis
+        subset_cols = self.has_in_subset
+        in_ids = self.in_cell_ids  # np.ndarray or None
+        for r, p_out in enumerate(self.cell_ids_out.tolist()):
+            theta0, phi0 = hp.pix2ang(nside_out, int(p_out), nest=True)
+            vec0 = hp.ang2vec(theta0, phi0)
+            neigh = hp.query_disc(nside_in, vec0, radius_rad, inclusive=True, nest=True)
+            neigh = np.asarray(neigh, dtype=np.int64)
+            if subset_cols:
+                # keep only valid fine pixels
+                # neigh is not sorted; intersect1d expects sorted
+                neigh_sorted = np.sort(neigh)
+                keep = np.intersect1d(neigh_sorted, in_ids, assume_unique=False)
+                neigh = keep
+            # Fallback: if radius query returns nothing in subset mode, at least try the 4 children
+            if neigh.size == 0:
+                children = (4 * int(p_out) + np.arange(4, dtype=np.int64))
+                if subset_cols:
+                    pos = self._positions_in_sorted(in_ids, children)
+                    ok = pos >= 0
+                    if np.any(ok):
+                        neigh = children[ok]
+                    else:
+                        # nothing to connect -> row stays zero
+                        continue
+                else:
+                    neigh = children
+            theta, phi = hp.pix2ang(nside_in, neigh, nest=True)
+            vec = hp.ang2vec(theta, phi)
+            # angular distance via dot product
+            dots = np.clip(np.dot(vec, vec0), -1.0, 1.0)
+            ang = np.arccos(dots)
+            w = np.exp(- 2*(ang / sigma_rad) ** 2)
+            w = self._normalize_weights(w)
+            if subset_cols:
+                pos = self._positions_in_sorted(in_ids, neigh)
+                # all should be present due to filtering, but guard anyway
+                ok = pos >= 0
+                neigh_pos = pos[ok]
+                w = w[ok]
+                if neigh_pos.size == 0:
+                    continue
+                for c, v in zip(neigh_pos.tolist(), w.tolist()):
+                    rows.append(r)
+                    cols.append(int(c))
+                    vals.append(float(v))
+            else:
+                for c, v in zip(neigh.tolist(), w.tolist()):
+                    rows.append(r)
+                    cols.append(int(c))
+                    vals.append(float(v))
+        if len(rows) == 0:
+            # build an all-zero sparse tensor
+            indices = torch.zeros((2, 0), dtype=torch.long, device=self.device)
+            vals_t = torch.zeros((0,), dtype=self.dtype, device=self.device)
+            return torch.sparse_coo_tensor(
+                indices, vals_t, size=(self.K_out, self.K_in), device=self.device, dtype=self.dtype
+            ).coalesce()
+        rows_t = torch.tensor(rows, dtype=torch.long, device=self.device)
+        cols_t = torch.tensor(cols, dtype=torch.long, device=self.device)
+        vals_t = torch.tensor(vals, dtype=self.dtype, device=self.device)
+        indices = torch.stack([rows_t, cols_t], dim=0)
+        M = torch.sparse_coo_tensor(
+            indices,
+            vals_t,
+            size=(self.K_out, self.K_in),
+            device=self.device,
+            dtype=self.dtype,
+        ).coalesce()
+        return M
+    # ---------------- forward ----------------
+    def forward(self, x: torch.Tensor):
+        """
+        Parameters
+        ----------
+        x : torch.Tensor
+            If has_in_subset:
+                - [B,C,K_in] (compact, aligned with in_cell_ids) OR [B,C,N_in] (full sphere)
+            Else:
+                - [B,C,N_in] (full sphere)
+        Returns
+        -------
+        y : torch.Tensor
+            [B,C,K_out]
+        cell_ids_out : torch.Tensor
+            [K_out] coarse pixel ids (nside_out), aligned with y last dimension.
+        """
+        if x.dim() != 3:
+            raise ValueError("x must be [B, C, N]")
+        B, C, N = x.shape
+        if self.has_in_subset:
+            if N not in (self.K_in, self.N_in):
+                raise ValueError(
+                    f"x last dim must be K_in={self.K_in} (compact) or N_in={self.N_in} (full), got {N}"
+                )
+        else:
+            if N != self.N_in:
+                raise ValueError(f"x last dim must be N_in={self.N_in}, got {N}")
+        if self.mode == "smooth":
+            # If x is full-sphere but M is subset-based, gather compact inputs
+            if self.has_in_subset and N == self.N_in:
+                x_use = x.index_select(dim=2, index=self.in_cell_ids_t.to(x.device))
+            else:
+                x_use = x
+            # sparse mm expects 2D: (K_out, K_in) @ (K_in, B*C)
+            x2 = x_use.reshape(B * C, -1).transpose(0, 1).contiguous()
+            y2 = torch.sparse.mm(self.M, x2)
+            y = y2.transpose(0, 1).reshape(B, C, self.K_out).contiguous()
+            return y, self.cell_ids_out_t.to(x.device)
+        # maxpool
+        if self.has_in_subset and N == self.N_in:
+            x_use = x.index_select(dim=2, index=self.in_cell_ids_t.to(x.device))
+        else:
+            x_use = x
+        if self.has_in_subset:
+            # children_compact: [K_out, 4] indices in 0..K_in-1 or -1
+            ch = self.children_compact.to(x.device)  # [K_out,4]
+            # gather with masking
+            # We build y by iterating 4 children with max
+            y = None
+            for j in range(4):
+                idx = ch[:, j]  # [K_out]
+                mask = idx >= 0
+                # start with very negative so missing children don't win
+                tmp = torch.full((B, C, self.K_out), -torch.inf, device=x.device, dtype=x.dtype)
+                if mask.any():
+                    tmp[:, :, mask] = x_use.index_select(dim=2, index=idx[mask]).reshape(B, C, -1)
+                y = tmp if y is None else torch.maximum(y, tmp)
+            # If a parent had no valid children at all, it is -inf -> set to 0
+            y = torch.where(torch.isfinite(y), y, torch.zeros_like(y))
+            return y, self.cell_ids_out_t.to(x.device)
+        else:
+            ch = self.children_full.to(x.device)  # [K_out,4] full indices
+            # gather children and max
+            xch = x_use.index_select(dim=2, index=ch.reshape(-1)).reshape(B, C, self.K_out, 4)
+            y = xch.max(dim=3).values
+            return y, self.cell_ids_out_t.to(x.device)

foscat/SphereUpGeo.py ADDED Viewed

@@ -0,0 +1,175 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from foscat.SphereDownGeo import SphereDownGeo
+class SphereUpGeo(nn.Module):
+    """Geometric HEALPix upsampling operator using the transpose of SphereDownGeo.
+    `cell_ids_out` (coarse pixels at nside_out, NESTED) is mandatory.
+    Forward expects x of shape [B, C, K_out] aligned with that order.
+    Output is a full fine-grid map [B, C, N_in] at nside_in = 2*nside_out.
+    Normalization (diagonal corrections):
+      - up_norm='adjoint': x_up = M^T x
+      - up_norm='col_l1':  x_up = (M^T x) / col_sum, col_sum[i] = sum_k M[k,i]
+      - up_norm='diag_l2': x_up = (M^T x) / col_l2,  col_l2[i]  = sum_k M[k,i]^2
+    """
+    def __init__(
+        self,
+        nside_out: int,
+        cell_ids_out,
+        radius_deg: float | None = None,
+        sigma_deg: float | None = None,
+        weight_norm: str = "l1",
+        up_norm: str = "col_l1",
+        eps: float = 1e-12,
+        device=None,
+        dtype=torch.float32,
+    ):
+        super().__init__()
+        if cell_ids_out is None:
+            raise ValueError("cell_ids_out is mandatory (1D list/np/tensor of coarse HEALPix ids at nside_out).")
+        if device is None:
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = device
+        self.dtype = dtype
+        self.nside_out = int(nside_out)
+        assert (self.nside_out & (self.nside_out - 1)) == 0, "nside_out must be a power of 2."
+        self.nside_in = self.nside_out * 2
+        self.N_out = 12 * self.nside_out * self.nside_out
+        self.N_in = 12 * self.nside_in * self.nside_in
+        up_norm = str(up_norm).lower().strip()
+        if up_norm not in ("adjoint", "col_l1", "diag_l2"):
+            raise ValueError("up_norm must be 'adjoint', 'col_l1', or 'diag_l2'.")
+        self.up_norm = up_norm
+        self.eps = float(eps)
+        # Coarse ids in user-provided order (must be unique for alignment)
+        if isinstance(cell_ids_out, torch.Tensor):
+            cell_ids_out_np = cell_ids_out.detach().cpu().numpy().astype(np.int64)
+        else:
+            cell_ids_out_np = np.asarray(cell_ids_out, dtype=np.int64)
+        if cell_ids_out_np.ndim != 1:
+            raise ValueError("cell_ids_out must be 1D")
+        if cell_ids_out_np.size == 0:
+            raise ValueError("cell_ids_out must be non-empty")
+        if cell_ids_out_np.min() < 0 or cell_ids_out_np.max() >= self.N_out:
+            raise ValueError("cell_ids_out contains out-of-bounds ids for this nside_out")
+        if np.unique(cell_ids_out_np).size != cell_ids_out_np.size:
+            raise ValueError("cell_ids_out must not contain duplicates (order matters for alignment).")
+        self.cell_ids_out_np = cell_ids_out_np
+        self.K_out = int(cell_ids_out_np.size)
+        self.register_buffer("cell_ids_out_t", torch.as_tensor(cell_ids_out_np, dtype=torch.long, device=self.device))
+        # Build the FULL down operator at fine resolution (nside_in -> nside_out)
+        tmp_down = SphereDownGeo(
+            nside_in=self.nside_in,
+            mode="smooth",
+            radius_deg=radius_deg,
+            sigma_deg=sigma_deg,
+            weight_norm=weight_norm,
+            device=self.device,
+            dtype=self.dtype,
+            use_csr=False,
+        )
+        M_down_full = torch.sparse_coo_tensor(
+            tmp_down.M.indices(),
+            tmp_down.M.values(),
+            size=(tmp_down.N_out, tmp_down.N_in),
+            device=self.device,
+            dtype=self.dtype,
+        ).coalesce()
+        # Extract ONLY the requested coarse rows, in the provided order.
+        # We do this on CPU with numpy for simplicity and speed at init.
+        idx = M_down_full.indices().cpu().numpy()
+        vals = M_down_full.values().cpu().numpy()
+        rows = idx[0]
+        cols = idx[1]
+        # Map original row id -> new row position [0..K_out-1]
+        row_map = {int(r): i for i, r in enumerate(cell_ids_out_np.tolist())}
+        mask = np.fromiter((r in row_map for r in rows), dtype=bool, count=rows.size)
+        rows_sel = rows[mask]
+        cols_sel = cols[mask]
+        vals_sel = vals[mask]
+        new_rows = np.fromiter((row_map[int(r)] for r in rows_sel), dtype=np.int64, count=rows_sel.size)
+        M_down_sub = torch.sparse_coo_tensor(
+            torch.as_tensor(np.stack([new_rows, cols_sel], axis=0), dtype=torch.long),
+            torch.as_tensor(vals_sel, dtype=self.dtype),
+            size=(self.K_out, self.N_in),
+            device=self.device,
+            dtype=self.dtype,
+        ).coalesce()
+        # Store M^T (sparse) so forward is just sparse.mm
+        M_up = self._transpose_sparse(M_down_sub)  # [N_in, K_out]
+        self.register_buffer("M_indices", M_up.indices())
+        self.register_buffer("M_values", M_up.values())
+        self.M_size = M_up.size()
+        # Diagonal normalizers (length N_in), based on the selected coarse rows only
+        idx_sub = M_down_sub.indices()
+        vals_sub = M_down_sub.values()
+        fine_cols = idx_sub[1]
+        col_sum = torch.zeros(self.N_in, device=self.device, dtype=self.dtype)
+        col_l2 = torch.zeros(self.N_in, device=self.device, dtype=self.dtype)
+        col_sum.scatter_add_(0, fine_cols, vals_sub)
+        col_l2.scatter_add_(0, fine_cols, vals_sub * vals_sub)
+        self.register_buffer("col_sum", col_sum)
+        self.register_buffer("col_l2", col_l2)
+        # Fine ids (full sphere)
+        self.register_buffer("cell_ids_in_t", torch.arange(self.N_in, dtype=torch.long, device=self.device))
+        self.M_T =  torch.sparse_coo_tensor(
+            self.M_indices.to(device=self.device),
+            self.M_values.to(device=self.device, dtype=self.dtype),
+            size=self.M_size,
+            device=self.device,
+            dtype=self.dtype,
+        ).coalesce().to_sparse_csr().to(self.device)
+    @staticmethod
+    def _transpose_sparse(M: torch.Tensor) -> torch.Tensor:
+        M = M.coalesce()
+        idx = M.indices()
+        vals = M.values()
+        R, C = M.size()
+        idx_T = torch.stack([idx[1], idx[0]], dim=0)
+        return torch.sparse_coo_tensor(idx_T, vals, size=(C, R), device=M.device, dtype=M.dtype).coalesce()
+    def forward(self, x: torch.Tensor):
+        """x: [B, C, K_out] -> x_up: [B, C, N_in]."""
+        B, C, K_out = x.shape
+        assert K_out == self.K_out, f"Expected K_out={self.K_out}, got {K_out}"
+        x_bc = x.reshape(B * C, K_out)
+        x_up_bc_T = torch.sparse.mm(self.M_T, x_bc.T)    # [N_in, B*C]
+        x_up = x_up_bc_T.T.reshape(B, C, self.N_in) # [B, C, N_in]
+        if self.up_norm == "col_l1":
+            denom = self.col_sum.to(device=x.device, dtype=x.dtype).clamp_min(self.eps)
+            x_up = x_up / denom.view(1, 1, -1)
+        elif self.up_norm == "diag_l2":
+            denom = self.col_l2.to(device=x.device, dtype=x.dtype).clamp_min(self.eps)
+            x_up = x_up / denom.view(1, 1, -1)
+        return x_up, self.cell_ids_in_t.to(device=x.device)

foscat 2025.11.1__py3-none-any.whl → 2026.1.1__py3-none-any.whl

foscat 2025.11.1py3-none-any.whl → 2026.1.1py3-none-any.whl