PyPI - sawnergy - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

sawnergy 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sawnergy might be problematic. Click here for more details.

Files changed (16) hide show

sawnergy/embedding/SGNS_pml.py +219 -23
sawnergy/embedding/SGNS_torch.py +213 -26
sawnergy/embedding/__init__.py +24 -0
sawnergy/embedding/embedder.py +341 -211
sawnergy/embedding/visualizer.py +251 -0
sawnergy/logging_util.py +1 -1
sawnergy/rin/rin_builder.py +1 -1
sawnergy/visual/visualizer.py +6 -6
sawnergy/visual/visualizer_util.py +3 -0
{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/METADATA +79 -56
sawnergy-1.0.8.dist-info/RECORD +23 -0
sawnergy-1.0.6.dist-info/RECORD +0 -22
{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/WHEEL +0 -0
{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/licenses/LICENSE +0 -0
{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/licenses/NOTICE +0 -0
{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/top_level.txt +0 -0

sawnergy/embedding/visualizer.py ADDED Viewed

@@ -0,0 +1,251 @@
+from __future__ import annotations
+# third party
+import numpy as np
+import matplotlib as mpl
+# built-in
+from pathlib import Path
+from typing import Sequence
+import logging
+# local
+from ..visual import visualizer_util
+from .. import sawnergy_util
+# *----------------------------------------------------*
+#                        GLOBALS
+# *----------------------------------------------------*
+_logger = logging.getLogger(__name__)
+# *----------------------------------------------------*
+#                        HELPERS
+# *----------------------------------------------------*
+def _safe_svd_pca(X: np.ndarray, k: int, *, row_l2: bool = False) -> tuple[np.ndarray, np.ndarray]:
+    """Compute k principal directions via SVD and project onto them."""
+    if X.ndim != 2:
+        raise ValueError(f"PCA expects 2D array (N, D); got {X.shape}")
+    _, D = X.shape
+    if k not in (2, 3):
+        raise ValueError(f"PCA dimensionality must be 2 or 3; got {k}")
+    if D < k:
+        raise ValueError(f"Requested k={k} exceeds feature dim D={D}")
+    Xc = X - X.mean(axis=0, keepdims=True)
+    if row_l2:
+        norms = np.linalg.norm(Xc, axis=1, keepdims=True)
+        Xc = Xc / np.clip(norms, 1e-9, None)
+    _, _, Vt = np.linalg.svd(Xc, full_matrices=False)
+    comps = Vt[:k].copy()
+    proj = Xc @ comps.T
+    return proj, comps
+def _set_equal_axes_3d(ax, xyz: np.ndarray, *, padding: float = 0.05) -> None:
+    if xyz.size == 0:
+        return
+    x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2]
+    xmin, xmax = float(x.min()), float(x.max())
+    ymin, ymax = float(y.min()), float(y.max())
+    zmin, zmax = float(z.min()), float(z.max())
+    xr = xmax - xmin
+    yr = ymax - ymin
+    zr = zmax - zmin
+    r = max(xr, yr, zr)
+    pad = padding * (r if r > 0 else 1.0)
+    cx, cy, cz = (xmin + xmax) / 2.0, (ymin + ymax) / 2.0, (zmin + zmax) / 2.0
+    ax.set_xlim(cx - r / 2 - pad, cx + r / 2 + pad)
+    ax.set_ylim(cy - r / 2 - pad, cy + r / 2 + pad)
+    ax.set_zlim(cz - r / 2 - pad, cz + r / 2 + pad)
+    try:
+        ax.set_box_aspect([1, 1, 1])
+    except Exception:
+        pass
+# *----------------------------------------------------*
+#                        CLASS
+# *----------------------------------------------------*
+class Visualizer:
+    """3D PCA visualizer for per-frame embeddings"""
+    no_instances: bool = True
+    def __init__(
+        self,
+        EMB_path: str | Path,
+        figsize: tuple[int, int] = (9, 7),
+        default_node_color: str = visualizer_util.GRAY,
+        depthshade: bool = False,
+        antialiased: bool = False,
+        init_elev: float = 35,
+        init_azim: float = 45,
+        *,
+        show: bool = False,
+        normalize_rows: bool = False,
+    ) -> None:
+        # Backend & pyplot
+        visualizer_util.ensure_backend(show)
+        import matplotlib.pyplot as plt
+        self._plt = plt
+        if Visualizer.no_instances:
+            try:
+                visualizer_util.warm_start_matplotlib()
+            finally:
+                Visualizer.no_instances = False
+        # Load embeddings archive
+        EMB_path = Path(EMB_path)
+        with sawnergy_util.ArrayStorage(EMB_path, mode="r") as storage:
+            name = storage.get_attr("frame_embeddings_name")
+            E = storage.read(name, slice(None))
+        if E.ndim != 3:
+            raise ValueError(f"Expected embeddings of shape (T,N,D); got {E.shape}")
+        self.E = np.asarray(E)
+        self.T, self.N, self.D = map(int, self.E.shape)
+        _logger.info("Loaded embeddings: T=%d, N=%d, D=%d", self.T, self.N, self.D)
+        # Coloring normalizer (parity with RIN Visualizer)
+        self._residue_norm = mpl.colors.Normalize(0, max(1, self.N - 1))
+        # Figure / axes / artists
+        self._fig = self._plt.figure(figsize=figsize, num="SAWNERGY")
+        self._ax = None
+        self._scatter = None
+        self._marker_size = 30.0
+        self._init_elev = init_elev
+        self._init_azim = init_azim
+        self.default_node_color = default_node_color
+        self._antialiased = bool(antialiased)
+        self._depthshade = bool(depthshade)
+        self._normalize_rows = bool(normalize_rows)
+    # ------------------------------ PRIVATE ------------------------------ #
+    def _ensure_axes(self) -> None:
+        if self._ax is not None and self._scatter is not None:
+            return
+        self._fig.clf()
+        self._ax = self._fig.add_subplot(111, projection="3d")
+        self._ax.view_init(self._init_elev, self._init_azim)
+        self._scatter = self._ax.scatter(
+            [], [], [],
+            s=self._marker_size,
+            depthshade=self._depthshade,
+            edgecolors="none",
+            antialiased=self._antialiased,
+        )
+        try:
+            self._ax.set_axis_off()
+        except Exception:
+            pass
+    def _project3(self, X: np.ndarray) -> np.ndarray:
+        """Return a 3D PCA projection of embeddings (always 3 coordinates).
+        If the embedding dimensionality D < 3, the remaining coordinate(s) are set to 0
+        so that the returned array still has shape (N, 3).
+        """
+        k = 3 if X.shape[1] >= 3 else 2
+        P, _ = _safe_svd_pca(X, k, row_l2=self._normalize_rows)
+        if k == 2:
+            P = np.c_[P, np.zeros((P.shape[0], 1), dtype=P.dtype)]
+        return P
+    def _select_nodes(self, displayed_nodes: Sequence[int] | str | None) -> np.ndarray:
+        if displayed_nodes is None or displayed_nodes == "ALL":
+            return np.arange(self.N, dtype=np.int64)
+        idx = np.asarray(displayed_nodes)
+        if idx.dtype.kind not in "iu":
+            raise TypeError("displayed_nodes must be None, 'ALL', or an integer sequence.")
+        if idx.min() < 1 or idx.max() > self.N:
+            raise IndexError(f"displayed_nodes out of range [1,{self.N}]")
+        return idx.astype(np.int64) - 1
+    def _apply_colors(self, node_colors, idx: np.ndarray) -> np.ndarray:
+        # RIN Visualizer semantics:
+        if isinstance(node_colors, str):
+            node_cmap = self._plt.get_cmap(node_colors)
+            return node_cmap(self._residue_norm(idx))
+        if node_colors is None:
+            full = visualizer_util.map_groups_to_colors(
+                N=self.N, groups=None, default_color=self.default_node_color, one_based=True
+            )
+            return np.asarray(full)[idx]
+        arr = np.asarray(node_colors)
+        if arr.ndim == 2 and arr.shape[0] == self.N and arr.shape[1] in (3, 4):
+            return arr[idx]
+        full = visualizer_util.map_groups_to_colors(
+            N=self.N, groups=node_colors, default_color=self.default_node_color, one_based=True
+        )
+        return np.asarray(full)[idx]
+    # ------------------------------ PUBLIC ------------------------------- #
+    def build_frame(
+        self,
+        frame_id: int,
+        *,
+        node_colors: str | np.ndarray | None = "rainbow",
+        displayed_nodes: Sequence[int] | str | None = "ALL",
+        show_node_labels: bool = False,
+        show: bool = False
+    ) -> None:
+        """Render a single frame as a PCA **3D** scatter (matches RIN Visualizer API)."""
+        frame0 = int(frame_id) - 1
+        if not (0 <= frame0 < self.T):
+            raise IndexError(f"frame_id out of range [1,{self.T}]")
+        self._ensure_axes()
+        idx = self._select_nodes(displayed_nodes)
+        X = self.E[frame0, idx, :]   # (n, D)
+        P = self._project3(X)        # (n, 3)
+        colors = self._apply_colors(node_colors, idx)
+        x, y, z = P[:, 0], P[:, 1], P[:, 2]
+        self._scatter._offsets3d = (x, y, z)
+        self._scatter.set_facecolors(colors)
+        _set_equal_axes_3d(self._ax, P, padding=0.05)
+        self._ax.view_init(self._init_elev, self._init_azim)
+        if show_node_labels:
+            for txt in getattr(self, "_labels", []):
+                try:
+                    txt.remove()
+                except Exception:
+                    pass
+            self._labels = []
+            for p, nid in zip(P, idx + 1):
+                self._labels.append(self._ax.text(p[0], p[1], p[2], str(int(nid)), fontsize=8))
+        try:
+            self._fig.tight_layout()
+        except Exception:
+            try:
+                self._fig.subplots_adjust()
+            except Exception:
+                pass
+        try:
+            self._fig.canvas.draw_idle()
+        except Exception:
+            pass
+        if show:
+            try:
+                self._plt.show(block=True)
+            except TypeError:
+                self._plt.show()
+    # convenience
+    def savefig(self, path: str | Path, *, dpi: int = 150) -> None:
+        self._fig.savefig(path, dpi=dpi)
+    def close(self) -> None:
+        try:
+            self._plt.close(self._fig)
+        except Exception:
+            pass
+__all__ = ["Visualizer"]

sawnergy/logging_util.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import datetime
 def configure_logging(
     logs_dir: Path | str,
-    file_level: int = logging.DEBUG,
+    file_level: int = logging.WARNING,
     console_level: int = logging.WARNING
 ) -> None:
     """

sawnergy/rin/rin_builder.py CHANGED Viewed

@@ -669,7 +669,7 @@ class RINBuilder:
         molecule_of_interest: int,
         frame_range: tuple[int, int] | None = None,
         frame_batch_size: int = -1,
-        prune_low_energies_frac: float = 0.3,
+        prune_low_energies_frac: float = 0.85,
         output_path: str | Path | None = None,
         keep_prenormalized_energies: bool = True,
         *,

sawnergy/visual/visualizer.py CHANGED Viewed

@@ -107,7 +107,7 @@ class Visualizer:
         visualizer_util.ensure_backend(show)
         import matplotlib.pyplot as plt
         self._plt = plt
-    # ---------- WARM UP MPL ------------ #
+        # ---------- WARM UP MPL ------------ #
         _logger.debug("Visualizer.__init__ start | RIN_path=%s, figsize=%s, node_size=%s, edge_width=%s, depthshade=%s, antialiased=%s, init_view=(%s,%s)",
                       RIN_path, figsize, node_size, edge_width, depthshade, antialiased, init_elev, init_azim)
         if Visualizer.no_instances:
@@ -116,7 +116,7 @@ class Visualizer:
         else:
             _logger.debug("Skipping warm-start (no_instances=False).")
-    # ---------- LOAD THE DATA ---------- #
+        # ---------- LOAD THE DATA ---------- #
         with sawnergy_util.ArrayStorage(RIN_path, mode="r") as storage:
             com_name = storage.get_attr("com_name")
             attr_energies_name = storage.get_attr("attractive_energies_name")
@@ -135,7 +135,7 @@ class Visualizer:
         self.N = np.size(self.COM_coords[0], axis=0)
         _logger.debug("Computed N=%d", self.N)
-    # - SET UP THE CANVAS AND THE AXES - #
+        # - SET UP THE CANVAS AND THE AXES - #
         self._fig = plt.figure(figsize=figsize, num="SAWNERGY")
         self._ax  = self._fig.add_subplot(111, projection="3d")
         self._fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
@@ -145,14 +145,14 @@ class Visualizer:
         self._ax.set_axis_off()
         _logger.debug("Figure and 3D axes initialized.")
-    # ------ SET UP PLOT ELEMENTS ------ #
+        # ------ SET UP PLOT ELEMENTS ------ #
         self._scatter: PathCollection  = self._ax.scatter([], [], [], s=node_size, depthshade=depthshade, edgecolors="none")
         self._attr: Line3DCollection   = Line3DCollection(np.empty((0,2,3)), linewidths=edge_width, antialiased=antialiased)
         self._repuls: Line3DCollection = Line3DCollection(np.empty((0,2,3)), linewidths=edge_width, antialiased=antialiased)
         self._ax.add_collection3d(self._attr); self._ax.add_collection3d(self._repuls) # set pointers to the attractive and repulsive collections
         _logger.debug("Artists created | scatter(empty), attr_lines(empty), repuls_lines(empty).")
-    # ---------- HELPER FIELDS --------- #
+        # ---------- HELPER FIELDS --------- #
         # NOTE: 'under the hood' everything is 0-base indexed,
         # BUT, from the API point of view, the indexing is 1-base,
         # because amino acid residues are 1-base indexed.
@@ -160,7 +160,7 @@ class Visualizer:
         self.default_node_color = default_node_color
         _logger.debug("Helper fields set | residue_norm=[0,%d], default_node_color=%s", self.N-1, self.default_node_color)
-    # DISALLOW MPL WARM-UP IN THE FUTURE
+        # DISALLOW MPL WARM-UP IN THE FUTURE
         Visualizer.no_instances = False
         _logger.debug("Visualizer.no_instances set to False.")

sawnergy/visual/visualizer_util.py CHANGED Viewed

@@ -319,6 +319,9 @@ def build_line_segments(
     kept = edge_weights >= thresh
     rows, cols = rows[kept], cols[kept]
+    nz = weights[rows, cols] > 0.0
+    rows, cols = rows[nz], cols[nz]
     if rows.size == 0:
         _logger.debug("build_line_segments: no edges kept after threshold; returning empties.")
         return (np.empty((0, 2, 3), dtype=float),

{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sawnergy
-Version: 1.0.6
+Version: 1.0.8
 Summary: Toolkit for transforming molecular dynamics (MD) trajectories into rich graph representations
 Home-page: https://github.com/Yehor-Mishchyriak/SAWNERGY
 Author: Yehor Mishchyriak
@@ -39,19 +39,57 @@ Dynamic: summary
 ![Python](https://img.shields.io/badge/python-3.11%2B-blue)
 A toolkit for transforming molecular dynamics (MD) trajectories into rich graph representations, sampling
-random and self-avoiding walks, learning node embeddings, and visualising residue interaction networks (RINs). SAWNERGY
+random and self-avoiding walks, learning node embeddings, and visualizing residue interaction networks (RINs). SAWNERGY
 keeps the full workflow — from `cpptraj` output to skip-gram embeddings (node2vec approach) — inside Python, backed by efficient Zarr-based archives and optional GPU acceleration.
 ---
+## Installation
+   ```bash
+   pip install sawnergy
+   ```
+> **Optional:** For GPU training, install PyTorch separately (e.g., `pip install torch`).
+> **Note:** RIN building requires `cpptraj` (AmberTools). Ensure it is discoverable via `$PATH` or the `CPPTRAJ`
+> environment variable. Probably the easiest solution: install AmberTools via Conda, activate the environment, and SAWNERGY will find the cpptraj executable on its own, so just run your code and don't worry about it.
+---
+# UPDATES:
+## v1.0.8 — What’s new:
+- **Temporary deprecation of `SGNS_Torch`**
+  - `sawnergy.embedding.SGNS_Torch` currently produces noisy embeddings in practice. The issue likely stems from **weight initialization**, although the root cause has not yet been conclusively determined.
+  - **Action:** The class and its `__init__` docstring now carry a deprecation notice. Constructing the class emits a **`DeprecationWarning`** and logs a **warning**.
+  - **Use instead:** Prefer **`SG_Torch`** (plain Skip-Gram with full softmax) or the PureML backends **`SGNS_PureML`** / **`SG_PureML`**.
+  - **Compatibility:** No breaking API changes; imports remain stable. PureML backends are unaffected.
+- **Embedding visualizer update**
+  - Now you can L2 normalize your embeddings before display.
+- **Small improvements in the embedding module**
+  - Improved API with a lot of good defaults in place to ease usage out of the box.
+  - Small internal model tweaks.
+## v1.0.7 — What’s new:
+- **Added plain Skip-Gram model**
+  - Now, the user can choose if they want to apply the negative sampling technique (two binary classifiers) or train a single classifier over the vocabulary (full softmax). For more detail, see: [node2vec](https://arxiv.org/pdf/1607.00653), [word2vec](https://arxiv.org/pdf/1301.3781), and [negative_sampling](https://arxiv.org/pdf/1402.3722).
+- **Set a harsher default for low interaction energies pruning during RIN construction**
+  - Now we zero out 85% of the lowest interaction energies as opposed to the past 30% default, leading to more meaningful embeddings.
+- **BUG FIX: Visualizer**
+  - Previously, the visualizer would silently draw edges of 0 magnitude, meaning they were actually being drawn but were invisible due to full transparency and 0 width. As a result, the displayed image/animation would be very laggy. Now, this was fixed, and given the higher pruning default, the displayed interaction networks are clean and smooth under rotations, dragging, etc.
+- **New Embedding Visualizer (3D)**
+  - New lightweight viewer for per-frame embeddings that projects embeddings with PCA to a **3D** scatter. Supports the same node coloring semantics, optional node labels, and the same antialiasing/depthshade controls. Works in headless setups using the same backend guard and uses a blocking `show=True` for scripts.
+---
 ## Why SAWNERGY?
 - **Bridge simulations and graph ML**: Convert raw MD trajectories into residue interaction networks ready for graph
   algorithms and downstream machine learning tasks.
-- **Deterministic, shareable artefacts**: Every stage produces compressed Zarr archives that contain both data and metadata so runs can be reproduced, shared, or inspected later.
-- **High-performance data handling**: Heavy arrays live in shared memory during walk sampling to allow parallel processing without serealization overhead; archives are written in chunked, compressed form for fast read/write.
-- **Flexible embedding backends**: Train skip-gram with negative sampling (SGNS) models using either PureML or PyTorch.
-- **Visualization out of the box**: Plot and animate residue networks without leaving Python, using the data produced by RINBuilder
+- **Deterministic, shareable artifacts**: Every stage produces compressed Zarr archives that contain both data and metadata so runs can be reproduced, shared, or inspected later.
+- **High-performance data handling**: Heavy arrays live in shared memory during walk sampling to allow parallel processing without serialization overhead; archives are written in chunked, compressed form for fast read/write.
+- **Flexible objectives & backends**: Train Skip-Gram with **negative sampling** (`objective="sgns"`) or **plain Skip-Gram** (`objective="sg"`), using either **PureML** (default) or **PyTorch**.
+- **Visualization out of the box**: Plot and animate residue networks without leaving Python, using the data produced by RINBuilder.
 ---
@@ -91,9 +129,9 @@ node indexing, and RNG seeds stay consistent across the toolchain.
 * Wraps the AmberTools `cpptraj` executable to:
   - compute per-frame electrostatic (EMAP) and van der Waals (VMAP) energy matrices at the atomic level,
   - project atom–atom interactions to residue–residue interactions using compositional masks,
-  - prune, symmetrise, remove self-interactions, and L1-normalise the matrices,
-  - compute per-residue centres of mass (COM) over the same frames.
-* Outputs a compressed Zarr archive with transition matrices, optional prenormalised energies, COM snapshots, and rich
+  - prune, symmetrize, remove self-interactions, and L1-normalize the matrices,
+  - compute per-residue centers of mass (COM) over the same frames.
+* Outputs a compressed Zarr archive with transition matrices, optional pre-normalized energies, COM snapshots, and rich
   metadata (frame range, pruning quantile, molecule ID, etc.).
 * Supports parallel `cpptraj` execution, batch processing, and keeps temporary stores tidy via
   `ArrayStorage.compress_and_cleanup`.
@@ -103,7 +141,7 @@ node indexing, and RNG seeds stay consistent across the toolchain.
 * Opens RIN archives, resolves dataset names from attributes, and renders nodes plus attractive/repulsive edge bundles
   in 3D using Matplotlib.
 * Allows both static frame visualization and trajectory animation.
-* Handles backend selection (`Agg` fallback in headless environments) and offers convenient colour palettes via
+* Handles backend selection (`Agg` fallback in headless environments) and offers convenient color palettes via
   `visualizer_util`.
 ### `sawnergy.walks.Walker`
@@ -116,13 +154,10 @@ node indexing, and RNG seeds stay consistent across the toolchain.
 ### `sawnergy.embedding.Embedder`
-* Consumes walk archives, generates skip-gram pairs, and normalises them to 0-based indices.
-* Provides a unified interface to SGNS implementations:
-  - **PureML backend** (`SGNS_PureML`): works with the `pureml` ecosystem, optimistic for CPU training.
-  - **PyTorch backend** (`SGNS_Torch`): uses `torch.nn.Embedding` plays nicely with GPUs.
-* Both `SGNS_PureML` and `SGNS_Torch` accept training hyperparameters such as batch_size, LR, optimizer and LR_scheduler, etc.
-* Exposes `embed_frame` (single frame) and `embed_all` (all frames, deterministic seeding per frame) which return the
-  learned input embedding matrices and write them to disk when requested.
+* Consumes walk archives, generates skip-gram pairs, and normalizes them to 0-based indices.
+* Selects skip-gram (SG / SGNS) backends dynamically via `model_base="pureml"|"torch"` with per-backend overrides supplied through `model_kwargs`.
+* Handles deterministic per-frame seeding and returns the requested embedding `kind` (`"in"`, `"out"`, or `"avg"`) from `embed_frame` and `embed_all`.
+* Persists per-frame matrices with rich provenance (walk metadata, objective, hyperparameters, RNG seeds) when `embed_all` targets an output archive.
 ### Supporting Utilities
@@ -140,23 +175,13 @@ node indexing, and RNG seeds stay consistent across the toolchain.
 |---|---|---|
 | **RIN** | `ATTRACTIVE_transitions` → **(T, N, N)**, float32  •  `REPULSIVE_transitions` → **(T, N, N)**, float32 (optional)  •  `ATTRACTIVE_energies` → **(T, N, N)**, float32 (optional)  •  `REPULSIVE_energies` → **(T, N, N)**, float32 (optional)  •  `COM` → **(T, N, 3)**, float32 | `time_created` (ISO) • `com_name` = `"COM"` • `molecule_of_interest` (int) • `frame_range` = `(start, end)` inclusive • `frame_batch_size` (int) • `prune_low_energies_frac` (float in [0,1]) • `attractive_transitions_name` / `repulsive_transitions_name` (dataset names or `None`) • `attractive_energies_name` / `repulsive_energies_name` (dataset names or `None`) |
 | **Walks** | `ATTRACTIVE_RWs` → **(T, N·num_RWs, L+1)**, int32 (optional)  •  `REPULSIVE_RWs` → **(T, N·num_RWs, L+1)**, int32 (optional)  •  `ATTRACTIVE_SAWs` → **(T, N·num_SAWs, L+1)**, int32 (optional)  •  `REPULSIVE_SAWs` → **(T, N·num_SAWs, L+1)**, int32 (optional)  <br/>_Note:_ node IDs are **1-based**.| `time_created` (ISO) • `seed` (int) • `rng_scheme` = `"SeedSequence.spawn_per_batch_v1"` • `num_workers` (int) • `in_parallel` (bool) • `batch_size_nodes` (int) • `num_RWs` / `num_SAWs` (ints) • `node_count` (N) • `time_stamp_count` (T) • `walk_length` (L) • `walks_per_node` (int) • `attractive_RWs_name` / `repulsive_RWs_name` / `attractive_SAWs_name` / `repulsive_SAWs_name` (dataset names or `None`) • `walks_layout` = `"time_leading_3d"` |
-| **Embeddings** | `FRAME_EMBEDDINGS` → **(frames_written, vocab_size, D)**, typically float32 | `time_created` (ISO) • `seed` (int) • `rng_scheme` = `"SeedSequence.spawn_per_frame_v1"` • `source_walks_path` (str) • `model_base` = `"torch"` or `"pureml"` • `rin_type` = `"attr"` or `"repuls"` • `using_mode` = `"RW"|"SAW"|"merged"` • `window_size` (int) • `alpha` (float; noise exponent) • `dimensionality` = D • `num_negative_samples` (int) • `num_epochs` (int) • `batch_size` (int) • `shuffle_data` (bool) • `frames_written` (int) • `vocab_size` (int) • `frame_count` (int) • `embedding_dtype` (str) • `frame_embeddings_name` = `"FRAME_EMBEDDINGS"` • `arrays_per_chunk` (int) • `compression_level` (int) |
+| **Embeddings** | `FRAME_EMBEDDINGS` → **(T, N, D)**, float32 | `created_at` (ISO) • `frame_embeddings_name` = `"FRAME_EMBEDDINGS"` • `time_stamp_count` = T • `node_count` = N • `embedding_dim` = D • `model_base` = `"torch"` or `"pureml"` • `embedding_kind` = `"in"|"out"|"avg"` • `objective` = `"sgns"` or `"sg"` • `negative_sampling` (bool) • `num_negative_samples` (int) • `num_epochs` (int) • `batch_size` (int) • `window_size` (int) • `alpha` (float) • `lr_step_per_batch` (bool) • `shuffle_data` (bool) • `device_hint` (str) • `model_kwargs_repr` (repr string) • `RIN_type` = `"attr"` or `"repuls"` • `using` = `"RW"|"SAW"|"merged"` • `source_WALKS_path` (str) • `walk_length` (int) • `num_RWs` / `num_SAWs` (ints) • `attractive_*_name` / `repulsive_*_name` (dataset names or `None`) • `master_seed` (int) • `per_frame_seeds` (list[int]) • `arrays_per_chunk` (int) • `compression_level` (int) |
 **Notes**
-- In **RIN**, `T` equals the number of frame **batches** written (i.e., `frame_range` swept in steps of `frame_batch_size`). `ATTRACTIVE/REPULSIVE_energies` are **pre-normalised** absolute energies (written only when `keep_prenormalized_energies=True`), whereas `ATTRACTIVE/REPULSIVE_transitions` are the **row-wise L1-normalised** versions used for sampling.
+- In **RIN**, `T` equals the number of frame **batches** written (i.e., `frame_range` swept in steps of `frame_batch_size`). `ATTRACTIVE/REPULSIVE_energies` are **pre-normalized** absolute energies (written only when `keep_prenormalized_energies=True`), whereas `ATTRACTIVE/REPULSIVE_transitions` are the **row-wise L1-normalized** versions used for sampling.
 - All archives are Zarr v3 groups. ArrayStorage also maintains per-block metadata in root attrs: `array_chunk_size_in_block`, `array_shape_in_block`, and `array_dtype_in_block` (dicts keyed by dataset name). You’ll see these in every archive.
----
-## Installation
-   ```bash
-   pip install sawnergy
-   ```
-> **Note:** RIN building requires `cpptraj` (AmberTools). Ensure it is discoverable via `$PATH` or the `CPPTRAJ`
-> environment variable.
+- In **Embeddings**, `alpha` and `num_negative_samples` apply to **SGNS** only and are ignored for `objective="sg"`.
 ---
@@ -181,10 +206,10 @@ rin_builder.build_rin(
     molecule_of_interest=1,
     frame_range=(1, 100),
     frame_batch_size=10,
-    prune_low_energies_frac=0.3,
+    prune_low_energies_frac=0.85,
     output_path=rin_path,
     include_attractive=True,
-    include_repulsive=False,
+    include_repulsive=False
 )
 # 2. Sample walks from the RIN
@@ -192,52 +217,43 @@ walker = Walker(rin_path, seed=123)
 walks_path = Path("./WALKS_demo.zip")
 walker.sample_walks(
     walk_length=16,
-    walks_per_node=32,
+    walks_per_node=100,
     saw_frac=0.25,
     include_attractive=True,
     include_repulsive=False,
     time_aware=False,
     output_path=walks_path,
-    in_parallel=False,
+    in_parallel=False
 )
 walker.close()
 # 3. Train embeddings per frame (PyTorch backend)
 import torch
-embedder = Embedder(walks_path, base="torch", seed=999)
+embedder = Embedder(walks_path, seed=999)
 embeddings_path = embedder.embed_all(
     RIN_type="attr",
     using="merged",
+    num_epochs=10,
+    negative_sampling=False,
     window_size=4,
-    num_negative_samples=5,
-    num_epochs=5,
-    batch_size=1024,
-    dimensionality=128,
-    shuffle_data=True,
-    output_path="./EMBEDDINGS_demo.zip",
-    sgns_kwargs={
-        "optim": torch.optim.Adam,
-        "optim_kwargs": {"lr": 1e-3},
-        "lr_sched": torch.optim.lr_scheduler.LambdaLR,
-        "lr_sched_kwargs": {"lr_lambda": lambda _: 1.0},
-        "device": "cuda" if torch.cuda.is_available() else "cpu",
-    },
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    model_base="torch",
+    output_path="./EMBEDDINGS_demo.zip"
 )
 print("Embeddings written to", embeddings_path)
 ```
-> For the PureML backend, supply the relevant optimiser and scheduler via `sgns_kwargs`
-> (for example `optim=pureml.optimizers.Adam`, `lr_sched=pureml.optimizers.CosineAnnealingLR`).
+> For the PureML backend, set `model_base="pureml"` and pass the optimizer / scheduler classes inside `model_kwargs`.
 ---
-## Visualisation
+## Visualization
 ```python
 from sawnergy.visual import Visualizer
-v = sawnergy.visual.Visualizer("./RIN_demo.zip")
+v = Visualizer("./RIN_demo.zip")
 v.build_frame(1,
     node_colors="rainbow",
     displayed_nodes="ALL",
@@ -250,14 +266,20 @@ v.build_frame(1,
 `Visualizer` lazily loads datasets and works even in headless environments (falls back to the `Agg` backend).
+```python
+from sawnergy.embedding import Visualizer
+viz = Visualizer("./EMBEDDINGS_demo.zip", normalize_rows=True)
+viz.build_frame(1, show=True)
+```
 ---
 ## Advanced Notes
 - **Time-aware walks**: Set `time_aware=True`, provide `stickiness` and `on_no_options` when calling `Walker.sample_walks`.
 - **Shared memory lifecycle**: Call `Walker.close()` (or use a context manager) to release shared-memory segments.
-- **PureML vs PyTorch**: Choose the backend via `Embedder(..., base="pureml"|"torch")` and provide backend-specific
-  constructor kwargs through `sgns_kwargs` (optimizer, scheduler, device).
+- **PureML vs PyTorch**: Select the backend at call time with `model_base="pureml"|"torch"` (defaults to `"pureml"`) and pass optimizer / scheduler overrides through `model_kwargs`.
 - **ArrayStorage utilities**: Use `ArrayStorage` directly to peek into archives, append arrays, or manage metadata.
 ---
@@ -268,8 +290,9 @@ v.build_frame(1,
 ├── sawnergy/
 │   ├── rin/           # RINBuilder and cpptraj integration helpers
 │   ├── walks/         # Walker class and shared-memory utilities
-│   ├── embedding/     # Embedder + SGNS backends (PureML / PyTorch)
+│   ├── embedding/     # Embedder + SG/SGNS backends (PureML / PyTorch)
 │   ├── visual/        # Visualizer and palette utilities
+│   │
 │   ├── logging_util.py
 │   └── sawnergy_util.py
 │
@@ -278,7 +301,7 @@ v.build_frame(1,
 ---
-## Acknowledgements
+## Acknowledgments
 SAWNERGY builds on the AmberTools `cpptraj` ecosystem, NumPy, Matplotlib, Zarr, and PyTorch (for GPU acceleration if necessary; PureML is available by default).
 Big thanks to the upstream communities whose work makes this toolkit possible.

sawnergy-1.0.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+sawnergy/__init__.py,sha256=Dq1U38ah6nPRFEDKN41mYphcTynKfnItca6QkYkpSbs,248
+sawnergy/logging_util.py,sha256=mfYw8IsYtOfCXayjkd4g9jHuupluxRNbqyFegRkiAhQ,1476
+sawnergy/sawnergy_util.py,sha256=Htx9wr0S8TXt5aHT2mtEdYf1TCo_BC1IUwNNuZdIR-4,49432
+sawnergy/embedding/SGNS_pml.py,sha256=-S7K7qwbDGUO_KW4gnA3dGyxuezN1ZK-WikPm7krEvs,14291
+sawnergy/embedding/SGNS_torch.py,sha256=NgVQnMtRSYY0IsPhB3XV7K1-uVSah0P77a8ID8zZ7Qw,13940
+sawnergy/embedding/__init__.py,sha256=T1YXb7S5Zyy_kIqlarDSX3imd_FGFH6nDuvLQ3hMKsE,1764
+sawnergy/embedding/embedder.py,sha256=02pcf3ies3Nuo19sCoJdMAYg7BFUHj4-wf4AZ5R6PAE,32492
+sawnergy/embedding/visualizer.py,sha256=x0BiSG9_nk9AUQm9RsZ2syKeCiaxX1gTlC85aYycMXY,8830
+sawnergy/rin/__init__.py,sha256=z19hLfEIp3bwzY-eCHQBQf0NRTCJzVz_FLIpVV5q0W4,162
+sawnergy/rin/rin_builder.py,sha256=d1cC4KKY9zzNlqhxHWTFM-QyXRXubd2zlCrSM-dV5pc,44624
+sawnergy/rin/rin_util.py,sha256=5TKywA5qfm76Gl4Cyz7oBPasmE5chclR7UM4hawwQOg,14939
+sawnergy/visual/__init__.py,sha256=p_ByFtfrP19b5_qiJlkAnYesZN3M1LjIo421LUgVVbw,502
+sawnergy/visual/visualizer.py,sha256=GVD_rFavDXFz9-h28eFf5nPBujUvRncn_zYoHcFHZ3Q,33155
+sawnergy/visual/visualizer_util.py,sha256=7y3kWjHxDQMoG0dmimceHKTC5veVChoyvW7d0qXH23k,15100
+sawnergy/walks/__init__.py,sha256=Z_Kaffhn3oUX13z9jbY0V5Ncdwj9Cnr--n9D-s7gh5k,250
+sawnergy/walks/walker.py,sha256=scvfZFrSL4AwpmspD0Jb0uhnrVIRRwE_hPCE3bG6zpg,37729
+sawnergy/walks/walker_util.py,sha256=ETdyPNIDwDQCA8Z5t38keBhYBJ56_ksT_0NhOCY-tHE,15361
+sawnergy-1.0.8.dist-info/licenses/LICENSE,sha256=cElK4bCsDhyAEON3H05s35bQZvxBcXBiCOrOdiUhDCY,11346
+sawnergy-1.0.8.dist-info/licenses/NOTICE,sha256=eVTbuSasZrmMJVtKoWOzsKyu4ZNm7Ks7dzI3Tx5tEHc,109
+sawnergy-1.0.8.dist-info/METADATA,sha256=_0u1smFM5oMqaO0xuc4ZX094B6F2swQqUrolOkpikVM,16084
+sawnergy-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sawnergy-1.0.8.dist-info/top_level.txt,sha256=-67FQD6FD9Gjt74WTmO9hNYA3MLB4HaSxci0sEKC5Lo,9
+sawnergy-1.0.8.dist-info/RECORD,,

sawnergy-1.0.6.dist-info/RECORD DELETED Viewed

@@ -1,22 +0,0 @@
-sawnergy/__init__.py,sha256=Dq1U38ah6nPRFEDKN41mYphcTynKfnItca6QkYkpSbs,248
-sawnergy/logging_util.py,sha256=tnhToHchnWaORHU73dxzBuL1e_C-AXFdPExDZTEI6tE,1474
-sawnergy/sawnergy_util.py,sha256=Htx9wr0S8TXt5aHT2mtEdYf1TCo_BC1IUwNNuZdIR-4,49432
-sawnergy/embedding/SGNS_pml.py,sha256=xF_0DksJTUH5DxchTwkg-Ol975lwH1O259Wa0ZSbmDA,6298
-sawnergy/embedding/SGNS_torch.py,sha256=3Pa_mk5mzsl27M87q4tNmitOouxDdG5ZzxpdaOSyGt8,6411
-sawnergy/embedding/__init__.py,sha256=sxUh2RcZyPs8aCdvec8x843Bm3DBaYQNrBF8VyvLQ-k,965
-sawnergy/embedding/embedder.py,sha256=0DRkEfjWqnKCHdr0AxN3wjqclezMOOw6THZE7GlxihE,26266
-sawnergy/rin/__init__.py,sha256=z19hLfEIp3bwzY-eCHQBQf0NRTCJzVz_FLIpVV5q0W4,162
-sawnergy/rin/rin_builder.py,sha256=z5hCvW-jHnnv7ZgHlQlruRAMKa-TnKFdvkMcoHBhX78,44623
-sawnergy/rin/rin_util.py,sha256=5TKywA5qfm76Gl4Cyz7oBPasmE5chclR7UM4hawwQOg,14939
-sawnergy/visual/__init__.py,sha256=p_ByFtfrP19b5_qiJlkAnYesZN3M1LjIo421LUgVVbw,502
-sawnergy/visual/visualizer.py,sha256=qqggoLRNi6t0awXEt-Hy2ut9S0Y8_uKznyozlGLR1Q8,33131
-sawnergy/visual/visualizer_util.py,sha256=C9W22CJmfJuTV5_uYsEnG8YChR4nH7OHKbNz26hAyB0,15028
-sawnergy/walks/__init__.py,sha256=Z_Kaffhn3oUX13z9jbY0V5Ncdwj9Cnr--n9D-s7gh5k,250
-sawnergy/walks/walker.py,sha256=scvfZFrSL4AwpmspD0Jb0uhnrVIRRwE_hPCE3bG6zpg,37729
-sawnergy/walks/walker_util.py,sha256=ETdyPNIDwDQCA8Z5t38keBhYBJ56_ksT_0NhOCY-tHE,15361
-sawnergy-1.0.6.dist-info/licenses/LICENSE,sha256=cElK4bCsDhyAEON3H05s35bQZvxBcXBiCOrOdiUhDCY,11346
-sawnergy-1.0.6.dist-info/licenses/NOTICE,sha256=eVTbuSasZrmMJVtKoWOzsKyu4ZNm7Ks7dzI3Tx5tEHc,109
-sawnergy-1.0.6.dist-info/METADATA,sha256=9_ocluBr8baUZfTcZdBkdNx_AIu3VOtKADEyMuTc3CY,13367
-sawnergy-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sawnergy-1.0.6.dist-info/top_level.txt,sha256=-67FQD6FD9Gjt74WTmO9hNYA3MLB4HaSxci0sEKC5Lo,9
-sawnergy-1.0.6.dist-info/RECORD,,

{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

{sawnergy-1.0.6.dist-info → sawnergy-1.0.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

sawnergy 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

Potentially problematic release.

sawnergy 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl