PyPI - sawnergy - Versions diffs - 1.0.3__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

sawnergy 1.0.3py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

sawnergy/__init__.py +3 -1
sawnergy/embedding/SGNS_pml.py +324 -51
sawnergy/embedding/SGNS_torch.py +282 -39
sawnergy/embedding/__init__.py +26 -1
sawnergy/embedding/embedder.py +426 -203
sawnergy/embedding/visualizer.py +251 -0
sawnergy/logging_util.py +1 -1
sawnergy/rin/rin_builder.py +4 -4
sawnergy/visual/visualizer.py +6 -6
sawnergy/visual/visualizer_util.py +3 -0
sawnergy/walks/walker.py +43 -22
{sawnergy-1.0.3.dist-info → sawnergy-1.0.9.dist-info}/METADATA +91 -57
sawnergy-1.0.9.dist-info/RECORD +23 -0
sawnergy-1.0.3.dist-info/RECORD +0 -22
{sawnergy-1.0.3.dist-info → sawnergy-1.0.9.dist-info}/WHEEL +0 -0
{sawnergy-1.0.3.dist-info → sawnergy-1.0.9.dist-info}/licenses/LICENSE +0 -0
{sawnergy-1.0.3.dist-info → sawnergy-1.0.9.dist-info}/licenses/NOTICE +0 -0
{sawnergy-1.0.3.dist-info → sawnergy-1.0.9.dist-info}/top_level.txt +0 -0

sawnergy/embedding/visualizer.py ADDED Viewed

@@ -0,0 +1,251 @@
+from __future__ import annotations
+# third party
+import numpy as np
+import matplotlib as mpl
+# built-in
+from pathlib import Path
+from typing import Sequence
+import logging
+# local
+from ..visual import visualizer_util
+from .. import sawnergy_util
+# *----------------------------------------------------*
+#                        GLOBALS
+# *----------------------------------------------------*
+_logger = logging.getLogger(__name__)
+# *----------------------------------------------------*
+#                        HELPERS
+# *----------------------------------------------------*
+def _safe_svd_pca(X: np.ndarray, k: int, *, row_l2: bool = False) -> tuple[np.ndarray, np.ndarray]:
+    """Compute k principal directions via SVD and project onto them."""
+    if X.ndim != 2:
+        raise ValueError(f"PCA expects 2D array (N, D); got {X.shape}")
+    _, D = X.shape
+    if k not in (2, 3):
+        raise ValueError(f"PCA dimensionality must be 2 or 3; got {k}")
+    if D < k:
+        raise ValueError(f"Requested k={k} exceeds feature dim D={D}")
+    Xc = X - X.mean(axis=0, keepdims=True)
+    if row_l2:
+        norms = np.linalg.norm(Xc, axis=1, keepdims=True)
+        Xc = Xc / np.clip(norms, 1e-9, None)
+    _, _, Vt = np.linalg.svd(Xc, full_matrices=False)
+    comps = Vt[:k].copy()
+    proj = Xc @ comps.T
+    return proj, comps
+def _set_equal_axes_3d(ax, xyz: np.ndarray, *, padding: float = 0.05) -> None:
+    if xyz.size == 0:
+        return
+    x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2]
+    xmin, xmax = float(x.min()), float(x.max())
+    ymin, ymax = float(y.min()), float(y.max())
+    zmin, zmax = float(z.min()), float(z.max())
+    xr = xmax - xmin
+    yr = ymax - ymin
+    zr = zmax - zmin
+    r = max(xr, yr, zr)
+    pad = padding * (r if r > 0 else 1.0)
+    cx, cy, cz = (xmin + xmax) / 2.0, (ymin + ymax) / 2.0, (zmin + zmax) / 2.0
+    ax.set_xlim(cx - r / 2 - pad, cx + r / 2 + pad)
+    ax.set_ylim(cy - r / 2 - pad, cy + r / 2 + pad)
+    ax.set_zlim(cz - r / 2 - pad, cz + r / 2 + pad)
+    try:
+        ax.set_box_aspect([1, 1, 1])
+    except Exception:
+        pass
+# *----------------------------------------------------*
+#                        CLASS
+# *----------------------------------------------------*
+class Visualizer:
+    """3D PCA visualizer for per-frame embeddings"""
+    no_instances: bool = True
+    def __init__(
+        self,
+        EMB_path: str | Path,
+        figsize: tuple[int, int] = (9, 7),
+        default_node_color: str = visualizer_util.GRAY,
+        depthshade: bool = False,
+        antialiased: bool = False,
+        init_elev: float = 35,
+        init_azim: float = 45,
+        *,
+        show: bool = False,
+        normalize_rows: bool = False,
+    ) -> None:
+        # Backend & pyplot
+        visualizer_util.ensure_backend(show)
+        import matplotlib.pyplot as plt
+        self._plt = plt
+        if Visualizer.no_instances:
+            try:
+                visualizer_util.warm_start_matplotlib()
+            finally:
+                Visualizer.no_instances = False
+        # Load embeddings archive
+        EMB_path = Path(EMB_path)
+        with sawnergy_util.ArrayStorage(EMB_path, mode="r") as storage:
+            name = storage.get_attr("frame_embeddings_name")
+            E = storage.read(name, slice(None))
+        if E.ndim != 3:
+            raise ValueError(f"Expected embeddings of shape (T,N,D); got {E.shape}")
+        self.E = np.asarray(E)
+        self.T, self.N, self.D = map(int, self.E.shape)
+        _logger.info("Loaded embeddings: T=%d, N=%d, D=%d", self.T, self.N, self.D)
+        # Coloring normalizer (parity with RIN Visualizer)
+        self._residue_norm = mpl.colors.Normalize(0, max(1, self.N - 1))
+        # Figure / axes / artists
+        self._fig = self._plt.figure(figsize=figsize, num="SAWNERGY")
+        self._ax = None
+        self._scatter = None
+        self._marker_size = 30.0
+        self._init_elev = init_elev
+        self._init_azim = init_azim
+        self.default_node_color = default_node_color
+        self._antialiased = bool(antialiased)
+        self._depthshade = bool(depthshade)
+        self._normalize_rows = bool(normalize_rows)
+    # ------------------------------ PRIVATE ------------------------------ #
+    def _ensure_axes(self) -> None:
+        if self._ax is not None and self._scatter is not None:
+            return
+        self._fig.clf()
+        self._ax = self._fig.add_subplot(111, projection="3d")
+        self._ax.view_init(self._init_elev, self._init_azim)
+        self._scatter = self._ax.scatter(
+            [], [], [],
+            s=self._marker_size,
+            depthshade=self._depthshade,
+            edgecolors="none",
+            antialiased=self._antialiased,
+        )
+        try:
+            self._ax.set_axis_off()
+        except Exception:
+            pass
+    def _project3(self, X: np.ndarray) -> np.ndarray:
+        """Return a 3D PCA projection of embeddings (always 3 coordinates).
+        If the embedding dimensionality D < 3, the remaining coordinate(s) are set to 0
+        so that the returned array still has shape (N, 3).
+        """
+        k = 3 if X.shape[1] >= 3 else 2
+        P, _ = _safe_svd_pca(X, k, row_l2=self._normalize_rows)
+        if k == 2:
+            P = np.c_[P, np.zeros((P.shape[0], 1), dtype=P.dtype)]
+        return P
+    def _select_nodes(self, displayed_nodes: Sequence[int] | str | None) -> np.ndarray:
+        if displayed_nodes is None or displayed_nodes == "ALL":
+            return np.arange(self.N, dtype=np.int64)
+        idx = np.asarray(displayed_nodes)
+        if idx.dtype.kind not in "iu":
+            raise TypeError("displayed_nodes must be None, 'ALL', or an integer sequence.")
+        if idx.min() < 1 or idx.max() > self.N:
+            raise IndexError(f"displayed_nodes out of range [1,{self.N}]")
+        return idx.astype(np.int64) - 1
+    def _apply_colors(self, node_colors, idx: np.ndarray) -> np.ndarray:
+        # RIN Visualizer semantics:
+        if isinstance(node_colors, str):
+            node_cmap = self._plt.get_cmap(node_colors)
+            return node_cmap(self._residue_norm(idx))
+        if node_colors is None:
+            full = visualizer_util.map_groups_to_colors(
+                N=self.N, groups=None, default_color=self.default_node_color, one_based=True
+            )
+            return np.asarray(full)[idx]
+        arr = np.asarray(node_colors)
+        if arr.ndim == 2 and arr.shape[0] == self.N and arr.shape[1] in (3, 4):
+            return arr[idx]
+        full = visualizer_util.map_groups_to_colors(
+            N=self.N, groups=node_colors, default_color=self.default_node_color, one_based=True
+        )
+        return np.asarray(full)[idx]
+    # ------------------------------ PUBLIC ------------------------------- #
+    def build_frame(
+        self,
+        frame_id: int,
+        *,
+        node_colors: str | np.ndarray | None = "rainbow",
+        displayed_nodes: Sequence[int] | str | None = "ALL",
+        show_node_labels: bool = False,
+        show: bool = False
+    ) -> None:
+        """Render a single frame as a PCA **3D** scatter (matches RIN Visualizer API)."""
+        frame0 = int(frame_id) - 1
+        if not (0 <= frame0 < self.T):
+            raise IndexError(f"frame_id out of range [1,{self.T}]")
+        self._ensure_axes()
+        idx = self._select_nodes(displayed_nodes)
+        X = self.E[frame0, idx, :]   # (n, D)
+        P = self._project3(X)        # (n, 3)
+        colors = self._apply_colors(node_colors, idx)
+        x, y, z = P[:, 0], P[:, 1], P[:, 2]
+        self._scatter._offsets3d = (x, y, z)
+        self._scatter.set_facecolors(colors)
+        _set_equal_axes_3d(self._ax, P, padding=0.05)
+        self._ax.view_init(self._init_elev, self._init_azim)
+        if show_node_labels:
+            for txt in getattr(self, "_labels", []):
+                try:
+                    txt.remove()
+                except Exception:
+                    pass
+            self._labels = []
+            for p, nid in zip(P, idx + 1):
+                self._labels.append(self._ax.text(p[0], p[1], p[2], str(int(nid)), fontsize=8))
+        try:
+            self._fig.tight_layout()
+        except Exception:
+            try:
+                self._fig.subplots_adjust()
+            except Exception:
+                pass
+        try:
+            self._fig.canvas.draw_idle()
+        except Exception:
+            pass
+        if show:
+            try:
+                self._plt.show(block=True)
+            except TypeError:
+                self._plt.show()
+    # convenience
+    def savefig(self, path: str | Path, *, dpi: int = 150) -> None:
+        self._fig.savefig(path, dpi=dpi)
+    def close(self) -> None:
+        try:
+            self._plt.close(self._fig)
+        except Exception:
+            pass
+__all__ = ["Visualizer"]

sawnergy/logging_util.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import datetime
 def configure_logging(
     logs_dir: Path | str,
-    file_level: int = logging.DEBUG,
+    file_level: int = logging.WARNING,
     console_level: int = logging.WARNING
 ) -> None:
     """

sawnergy/rin/rin_builder.py CHANGED Viewed

@@ -669,7 +669,7 @@ class RINBuilder:
         molecule_of_interest: int,
         frame_range: tuple[int, int] | None = None,
         frame_batch_size: int = -1,
-        prune_low_energies_frac: float = 0.3,
+        prune_low_energies_frac: float = 0.85,
         output_path: str | Path | None = None,
         keep_prenormalized_energies: bool = True,
         *,
@@ -690,9 +690,9 @@ class RINBuilder:
         2. For each frame batch:
-            a) Run cpptraj `pairwise` on atoms → EMAP + VMAP → sum (atomic matrix).
+            a) Run cpptraj 'pairwise' on atoms → EMAP + VMAP → sum (atomic matrix).
-            b) Project atomic → residue with ``R = Pᵀ @ A @ P``.
+            b) Project atomic → residue with 'R = Pᵀ @ A @ P'.
             c) Post-process residue matrix:
                 split into (attractive, repulsive) channels,
@@ -700,7 +700,7 @@ class RINBuilder:
                 remove self-interactions,
                 symmetrize.
-            d. Optionally store **pre-normalized energies** (attractive or repulsive or both, depending on `include_<kind>`).
+            d. Optionally store **pre-normalized energies** (attractive or repulsive or both, depending on 'include_<kind>').
             e. Row-wise L1 normalize (directed transition probabilities) and store.

sawnergy/visual/visualizer.py CHANGED Viewed

@@ -107,7 +107,7 @@ class Visualizer:
         visualizer_util.ensure_backend(show)
         import matplotlib.pyplot as plt
         self._plt = plt
-    # ---------- WARM UP MPL ------------ #
+        # ---------- WARM UP MPL ------------ #
         _logger.debug("Visualizer.__init__ start | RIN_path=%s, figsize=%s, node_size=%s, edge_width=%s, depthshade=%s, antialiased=%s, init_view=(%s,%s)",
                       RIN_path, figsize, node_size, edge_width, depthshade, antialiased, init_elev, init_azim)
         if Visualizer.no_instances:
@@ -116,7 +116,7 @@ class Visualizer:
         else:
             _logger.debug("Skipping warm-start (no_instances=False).")
-    # ---------- LOAD THE DATA ---------- #
+        # ---------- LOAD THE DATA ---------- #
         with sawnergy_util.ArrayStorage(RIN_path, mode="r") as storage:
             com_name = storage.get_attr("com_name")
             attr_energies_name = storage.get_attr("attractive_energies_name")
@@ -135,7 +135,7 @@ class Visualizer:
         self.N = np.size(self.COM_coords[0], axis=0)
         _logger.debug("Computed N=%d", self.N)
-    # - SET UP THE CANVAS AND THE AXES - #
+        # - SET UP THE CANVAS AND THE AXES - #
         self._fig = plt.figure(figsize=figsize, num="SAWNERGY")
         self._ax  = self._fig.add_subplot(111, projection="3d")
         self._fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
@@ -145,14 +145,14 @@ class Visualizer:
         self._ax.set_axis_off()
         _logger.debug("Figure and 3D axes initialized.")
-    # ------ SET UP PLOT ELEMENTS ------ #
+        # ------ SET UP PLOT ELEMENTS ------ #
         self._scatter: PathCollection  = self._ax.scatter([], [], [], s=node_size, depthshade=depthshade, edgecolors="none")
         self._attr: Line3DCollection   = Line3DCollection(np.empty((0,2,3)), linewidths=edge_width, antialiased=antialiased)
         self._repuls: Line3DCollection = Line3DCollection(np.empty((0,2,3)), linewidths=edge_width, antialiased=antialiased)
         self._ax.add_collection3d(self._attr); self._ax.add_collection3d(self._repuls) # set pointers to the attractive and repulsive collections
         _logger.debug("Artists created | scatter(empty), attr_lines(empty), repuls_lines(empty).")
-    # ---------- HELPER FIELDS --------- #
+        # ---------- HELPER FIELDS --------- #
         # NOTE: 'under the hood' everything is 0-base indexed,
         # BUT, from the API point of view, the indexing is 1-base,
         # because amino acid residues are 1-base indexed.
@@ -160,7 +160,7 @@ class Visualizer:
         self.default_node_color = default_node_color
         _logger.debug("Helper fields set | residue_norm=[0,%d], default_node_color=%s", self.N-1, self.default_node_color)
-    # DISALLOW MPL WARM-UP IN THE FUTURE
+        # DISALLOW MPL WARM-UP IN THE FUTURE
         Visualizer.no_instances = False
         _logger.debug("Visualizer.no_instances set to False.")

sawnergy/visual/visualizer_util.py CHANGED Viewed

@@ -319,6 +319,9 @@ def build_line_segments(
     kept = edge_weights >= thresh
     rows, cols = rows[kept], cols[kept]
+    nz = weights[rows, cols] > 0.0
+    rows, cols = rows[nz], cols[nz]
     if rows.size == 0:
         _logger.debug("build_line_segments: no edges kept after threshold; returning empties.")
         return (np.empty((0, 2, 3), dtype=float),

sawnergy/walks/walker.py CHANGED Viewed

@@ -63,7 +63,7 @@ class Walker:
         # Load numpy arrays from read-only storage
         with sawnergy_util.ArrayStorage(RIN_path, mode="r") as storage:
-            attr_name   = storage.get_attr("attractive_transitions_name")
+            attr_name = storage.get_attr("attractive_transitions_name")
             repuls_name = storage.get_attr("repulsive_transitions_name")
             attr_matrices  : np.ndarray | None = (
                 storage.read(attr_name, slice(None)) if attr_name is not None else None
@@ -133,6 +133,9 @@ class Walker:
             ) if repuls_matrices is not None else None
         )
+        self._attr_owner_pid   = os.getpid() if self.attr_matrices  is not None else None
+        self._repuls_owner_pid = os.getpid() if self.repuls_matrices is not None else None
         _logger.debug(
             "SharedNDArray created | attr name=%r; repuls name=%r",
             getattr(self.attr_matrices, "name", None),
@@ -159,38 +162,56 @@ class Walker:
     # explicit resource cleanup
     def close(self) -> None:
-        """Close shared-memory handles and (in main process) unlink segments.
+        """Release shared-memory resources used by this Walker.
+        This method:
+        - Closes local handles to the shared-memory backed arrays
+        (`self.attr_matrices`, `self.repuls_matrices`) in **the current process**.
+        - If the current process is the **creator** of a segment (its PID matches
+        `_attr_owner_pid` / `_repuls_owner_pid`), it also **unlinks** that segment
+        so the OS can reclaim it once all handles are closed.
+        Behavior & guarantees
+        ---------------------
+        - **Idempotent:** safe to call multiple times; subsequent calls are no-ops.
+        - **Multi-process aware:** non-creator processes only close their handles;
+        creators close **and** unlink. This prevents `resource_tracker` “leaked
+        shared_memory” warnings when using `ProcessPoolExecutor`/spawn.
+        - **Best-effort unlink:** `FileNotFoundError` during unlink (already unlinked
+        elsewhere) is swallowed.
+        - Invoked automatically by the context manager (`__exit__`) and destructor
+        (`__del__`), but it's fine to call explicitly.
+        After calling `close()`, any operation that relies on the shared arrays may
+        fail; treat the instance as finalized.
-        Idempotent: if cleanup already occurred, returns immediately. Always
-        closes local handles in the current process. If the caller is the main
-        process (per ``sawnergy_util.is_main_process()``), also attempts to
-        unlink the underlying shared-memory segments (best-effort; suppresses
-        ``FileNotFoundError`` if already unlinked elsewhere).
+        Returns:
+            None
         """
         if self._memory_cleaned_up:
             _logger.debug("close(): already cleaned up; returning")
             return
-        _logger.debug("Closing Walker resources (is_main=%s)", sawnergy_util.is_main_process())
+        _logger.debug("Closing Walker resources (pid=%s)", os.getpid())
         try:
             if self.attr_matrices is not None:
                 self.attr_matrices.close()
             if self.repuls_matrices is not None:
                 self.repuls_matrices.close()
             _logger.debug("SharedNDArray handles closed")
-            if sawnergy_util.is_main_process():
-                _logger.debug("Attempting to unlink shared memory segments (main process)")
-                try:
-                    if self.attr_matrices is not None:
-                        self.attr_matrices.unlink()
-                except FileNotFoundError:
-                    _logger.warning("attr SharedMemory already unlinked")
-                try:
-                    if self.repuls_matrices is not None:
-                        self.repuls_matrices.unlink()
-                except FileNotFoundError:
-                    _logger.warning("repuls SharedMemory already unlinked")
-            else:
-                _logger.debug("Not main process; skipping unlink")
+            # Unlink in whichever process actually CREATED the segment(s)
+            try:
+                if self.attr_matrices is not None and getattr(self, "_attr_owner_pid", None) == os.getpid():
+                    self.attr_matrices.unlink()
+            except FileNotFoundError:
+                _logger.debug("attr SharedMemory already unlinked elsewhere")
+            try:
+                if self.repuls_matrices is not None and getattr(self, "_repuls_owner_pid", None) == os.getpid():
+                    self.repuls_matrices.unlink()
+            except FileNotFoundError:
+                _logger.debug("repuls SharedMemory already unlinked elsewhere")
         finally:
             self._memory_cleaned_up = True
             _logger.debug("Cleanup complete")

sawnergy 1.0.3__py3-none-any.whl → 1.0.9__py3-none-any.whl

sawnergy 1.0.3py3-none-any.whl → 1.0.9py3-none-any.whl