PyPI - adaptivepy-sampling - Versions diffs - 0.1.0__py3-none-any.whl - Mend

adaptivepy-sampling 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

adaptivepy/__init__.py +7 -0
adaptivepy/api.py +229 -0
adaptivepy/cli/__init__.py +5 -0
adaptivepy/cli/run.py +68 -0
adaptivepy/clustering/__init__.py +103 -0
adaptivepy/clustering/base.py +73 -0
adaptivepy/clustering/regular_space.py +135 -0
adaptivepy/clustering/sklearn_kmeans.py +93 -0
adaptivepy/clustering/sklearn_minibatch.py +94 -0
adaptivepy/config/__init__.py +17 -0
adaptivepy/config/schema.py +196 -0
adaptivepy/io/__init__.py +27 -0
adaptivepy/io/loader.py +267 -0
adaptivepy/io/trajectory.py +151 -0
adaptivepy/models.py +83 -0
adaptivepy/output/__init__.py +23 -0
adaptivepy/output/pdb_writer.py +59 -0
adaptivepy/output/writer.py +229 -0
adaptivepy/policies/__init__.py +21 -0
adaptivepy/policies/base.py +105 -0
adaptivepy/policies/least_counts.py +43 -0
adaptivepy/policies/random.py +53 -0
adaptivepy/selection/__init__.py +5 -0
adaptivepy/selection/frame_selector.py +132 -0
adaptivepy/stats/__init__.py +15 -0
adaptivepy/stats/cluster_stats.py +118 -0
adaptivepy/utils/__init__.py +6 -0
adaptivepy/utils/io_utils.py +49 -0
adaptivepy/utils/logging.py +55 -0
adaptivepy_sampling-0.1.0.dist-info/METADATA +52 -0
adaptivepy_sampling-0.1.0.dist-info/RECORD +34 -0
adaptivepy_sampling-0.1.0.dist-info/WHEEL +5 -0
adaptivepy_sampling-0.1.0.dist-info/entry_points.txt +2 -0
adaptivepy_sampling-0.1.0.dist-info/top_level.txt +1 -0

adaptivepy/policies/least_counts.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Least-counts adaptive sampling policy."""
+from __future__ import annotations
+from typing import List
+from adaptivepy.policies.base import Policy, register_policy
+from adaptivepy.stats.cluster_stats import ClusterStats, sort_clusters_by_population
+@register_policy
+class LeastCountsPolicy(Policy):
+    """Select clusters with the smallest populations.
+    Clusters are sorted by ascending population and the first ``n_seeds``
+    cluster IDs are returned (one seed per cluster).
+    """
+    name = "least_counts"
+    def select_clusters(
+        self,
+        cluster_stats: ClusterStats,
+        n_seeds: int,
+    ) -> List[int]:
+        """Select the least-populated clusters.
+        Parameters
+        ----------
+        cluster_stats : dict
+            Per-cluster statistics.
+        n_seeds : int
+            Number of clusters to select.
+        Returns
+        -------
+        list of int
+            Cluster IDs with smallest populations.
+        """
+        sorted_clusters = sort_clusters_by_population(
+            cluster_stats, ascending=True
+        )
+        return sorted_clusters[:n_seeds]

adaptivepy/policies/random.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Random cluster selection policy."""
+from __future__ import annotations
+from typing import List, Optional
+import numpy as np
+from adaptivepy.policies.base import Policy, register_policy
+from adaptivepy.stats.cluster_stats import ClusterStats
+@register_policy
+class RandomPolicy(Policy):
+    """Uniformly sample cluster IDs at random.
+    Parameters
+    ----------
+    random_state : int or None
+        Seed for the random number generator.
+    """
+    name = "random"
+    def __init__(self, random_state: Optional[int] = None) -> None:
+        self.random_state = random_state
+        self._rng = np.random.default_rng(random_state)
+    def select_clusters(
+        self,
+        cluster_stats: ClusterStats,
+        n_seeds: int,
+    ) -> List[int]:
+        """Randomly sample ``n_seeds`` distinct cluster IDs.
+        Parameters
+        ----------
+        cluster_stats : dict
+            Per-cluster statistics.
+        n_seeds : int
+            Number of clusters to sample.
+        Returns
+        -------
+        list of int
+            Randomly selected cluster IDs.
+        """
+        cluster_ids = list(cluster_stats.keys())
+        n_select = min(n_seeds, len(cluster_ids))
+        if n_select == 0:
+            return []
+        chosen = self._rng.choice(cluster_ids, size=n_select, replace=False)
+        return [int(c) for c in chosen]

adaptivepy/selection/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Seed selection utilities."""
+from adaptivepy.selection.frame_selector import select_seeds
+__all__ = ["select_seeds"]

adaptivepy/selection/frame_selector.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Frame-level seed selection within chosen clusters."""
+from __future__ import annotations
+from typing import List, Optional
+import numpy as np
+from adaptivepy.models import FrameRecord, SeedResult
+from adaptivepy.stats.cluster_stats import ClusterStats
+def _nearest_center_frame(
+    frames: List[FrameRecord],
+    center: np.ndarray,
+) -> FrameRecord:
+    """Return the frame closest to a cluster centroid in feature space.
+    Parameters
+    ----------
+    frames : list of FrameRecord
+        Frames belonging to one cluster.
+    center : np.ndarray
+        Cluster center, shape ``(n_features,)``.
+    Returns
+    -------
+    FrameRecord
+        Frame with minimum Euclidean distance to ``center``.
+    """
+    features = np.stack([f.features for f in frames], axis=0)
+    dists = np.linalg.norm(features - center, axis=1)
+    return frames[int(np.argmin(dists))]
+def _random_frame(
+    frames: List[FrameRecord],
+    rng: np.random.Generator,
+) -> FrameRecord:
+    """Return a uniformly random frame from a cluster.
+    Parameters
+    ----------
+    frames : list of FrameRecord
+        Frames belonging to one cluster.
+    rng : np.random.Generator
+        Random number generator.
+    Returns
+    -------
+    FrameRecord
+        Randomly selected frame.
+    """
+    index = int(rng.integers(0, len(frames)))
+    return frames[index]
+def select_seeds(
+    policy_name: str,
+    selected_clusters: List[int],
+    cluster_stats: ClusterStats,
+    cluster_centers: Optional[np.ndarray],
+    method: str = "nearest_center",
+    random_state: Optional[int] = None,
+) -> List[SeedResult]:
+    """Select one seed frame from each chosen cluster.
+    Parameters
+    ----------
+    policy_name : str
+        Name of the policy that selected the clusters.
+    selected_clusters : list of int
+        Cluster IDs chosen by the policy.
+    cluster_stats : dict
+        Per-cluster frame lists and populations.
+    cluster_centers : np.ndarray or None
+        Cluster centroids, shape ``(n_clusters, n_features)``. Required for
+        ``nearest_center`` selection when centers are defined per label index.
+    method : str
+        Selection method: ``nearest_center`` or ``random_frame``.
+    random_state : int or None
+        Random seed for ``random_frame`` selection.
+    Returns
+    -------
+    list of SeedResult
+        Selected seed frames with metadata.
+    Raises
+    ------
+    ValueError
+        If ``method`` is unknown or centers are missing when required.
+    """
+    if method not in {"nearest_center", "random_frame"}:
+        raise ValueError(
+            f"Unknown seed selection method '{method}'. "
+            "Use 'nearest_center' or 'random_frame'."
+        )
+    rng = np.random.default_rng(random_state)
+    seeds: List[SeedResult] = []
+    for seed_id, cluster_id in enumerate(selected_clusters):
+        entry = cluster_stats.get(cluster_id)
+        if entry is None or not entry["frames"]:
+            continue
+        frames = entry["frames"]
+        if method == "random_frame":
+            chosen = _random_frame(frames, rng)
+        else:
+            if cluster_centers is None:
+                center = np.mean(np.stack([f.features for f in frames]), axis=0)
+            elif cluster_id < len(cluster_centers):
+                center = cluster_centers[cluster_id]
+            else:
+                center = np.mean(np.stack([f.features for f in frames]), axis=0)
+            chosen = _nearest_center_frame(frames, center)
+        seeds.append(
+            SeedResult(
+                seed_id=seed_id,
+                policy=policy_name,
+                traj_id=chosen.traj_id,
+                frame_id=chosen.frame_id,
+                cluster_id=cluster_id,
+                global_index=chosen.global_index or 0,
+            )
+        )
+    return seeds

adaptivepy/stats/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Cluster statistics for AdaptivePy."""
+from adaptivepy.stats.cluster_stats import (
+    assign_clusters,
+    cluster_stats_to_rows,
+    compute_cluster_stats,
+    sort_clusters_by_population,
+)
+__all__ = [
+    "assign_clusters",
+    "cluster_stats_to_rows",
+    "compute_cluster_stats",
+    "sort_clusters_by_population",
+]

adaptivepy/stats/cluster_stats.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""Cluster population statistics and frame assignments."""
+from __future__ import annotations
+from typing import Dict, List, TypedDict
+import numpy as np
+from adaptivepy.models import Dataset, FrameRecord
+class ClusterStatEntry(TypedDict):
+    """Statistics for a single cluster."""
+    population: int
+    frames: List[FrameRecord]
+ClusterStats = Dict[int, ClusterStatEntry]
+def assign_clusters(dataset: Dataset, labels: np.ndarray) -> None:
+    """Attach cluster labels to frame records in a dataset.
+    Parameters
+    ----------
+    dataset : Dataset
+        Dataset whose frames will be updated in place.
+    labels : np.ndarray
+        Cluster label per frame, shape ``(n_frames,)``.
+    Raises
+    ------
+    ValueError
+        If label count does not match the number of frames.
+    """
+    if len(labels) != len(dataset.frames):
+        raise ValueError(
+            f"Expected {len(dataset.frames)} labels, got {len(labels)}."
+        )
+    for record, cluster_id in zip(dataset.frames, labels):
+        record.cluster_id = int(cluster_id)
+def compute_cluster_stats(dataset: Dataset) -> ClusterStats:
+    """Compute per-cluster populations and frame lists.
+    Parameters
+    ----------
+    dataset : Dataset
+        Dataset with cluster assignments on each frame record.
+    Returns
+    -------
+    dict
+        Mapping from ``cluster_id`` to population and frame list.
+    Raises
+    ------
+    ValueError
+        If any frame lacks a cluster assignment.
+    """
+    stats: ClusterStats = {}
+    for record in dataset.frames:
+        if record.cluster_id is None:
+            raise ValueError("All frames must have cluster assignments.")
+        cluster_id = record.cluster_id
+        if cluster_id not in stats:
+            stats[cluster_id] = {"population": 0, "frames": []}
+        stats[cluster_id]["population"] += 1
+        stats[cluster_id]["frames"].append(record)
+    return stats
+def sort_clusters_by_population(
+    cluster_stats: ClusterStats,
+    ascending: bool = True,
+) -> List[int]:
+    """Return cluster IDs sorted by population.
+    Parameters
+    ----------
+    cluster_stats : dict
+        Per-cluster statistics from :func:`compute_cluster_stats`.
+    ascending : bool
+        If ``True``, smallest populations first.
+    Returns
+    -------
+    list of int
+        Sorted cluster IDs.
+    """
+    return sorted(
+        cluster_stats.keys(),
+        key=lambda cid: cluster_stats[cid]["population"],
+        reverse=not ascending,
+    )
+def cluster_stats_to_rows(cluster_stats: ClusterStats) -> List[Dict[str, int]]:
+    """Convert cluster statistics to flat rows for CSV export.
+    Parameters
+    ----------
+    cluster_stats : dict
+        Per-cluster statistics.
+    Returns
+    -------
+    list of dict
+        Rows with keys ``cluster_id`` and ``population``.
+    """
+    return [
+        {"cluster_id": cluster_id, "population": entry["population"]}
+        for cluster_id, entry in sorted(cluster_stats.items())
+    ]

adaptivepy/utils/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Utility helpers for AdaptivePy."""
+from adaptivepy.utils.io_utils import copy_file, ensure_dir
+from adaptivepy.utils.logging import setup_logger
+__all__ = ["copy_file", "ensure_dir", "setup_logger"]

adaptivepy/utils/io_utils.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Shared I/O utilities."""
+from __future__ import annotations
+import shutil
+from pathlib import Path
+from typing import Union
+PathLike = Union[str, Path]
+def ensure_dir(path: PathLike) -> Path:
+    """Create a directory and all parent directories if they do not exist.
+    Parameters
+    ----------
+    path : str or Path
+        Directory path to create.
+    Returns
+    -------
+    Path
+        Resolved path to the created directory.
+    """
+    resolved = Path(path).resolve()
+    resolved.mkdir(parents=True, exist_ok=True)
+    return resolved
+def copy_file(src: PathLike, dst: PathLike) -> Path:
+    """Copy a file to a destination path, creating parent directories as needed.
+    Parameters
+    ----------
+    src : str or Path
+        Source file path.
+    dst : str or Path
+        Destination file path.
+    Returns
+    -------
+    Path
+        Resolved destination path.
+    """
+    src_path = Path(src)
+    dst_path = Path(dst)
+    ensure_dir(dst_path.parent)
+    shutil.copy2(src_path, dst_path)
+    return dst_path.resolve()

adaptivepy/utils/logging.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Logging utilities for AdaptivePy runs."""
+from __future__ import annotations
+import logging
+import sys
+from pathlib import Path
+from typing import Optional
+def setup_logger(
+    name: str = "adaptivepy",
+    log_file: Optional[Path] = None,
+    level: int = logging.INFO,
+) -> logging.Logger:
+    """Configure and return a logger with console and optional file handlers.
+    Parameters
+    ----------
+    name : str
+        Logger name.
+    log_file : Path or None
+        If provided, log messages are also written to this file.
+    level : int
+        Logging level for both handlers.
+    Returns
+    -------
+    logging.Logger
+        Configured logger instance.
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    logger.handlers.clear()
+    logger.propagate = False
+    formatter = logging.Formatter(
+        "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(level)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    if log_file is not None:
+        log_file = Path(log_file)
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        file_handler.setLevel(level)
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+    return logger

adaptivepy_sampling-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,52 @@
+Metadata-Version: 2.4
+Name: adaptivepy-sampling
+Version: 0.1.0
+Summary: Adaptive sampling on MD trajectories via clustering and policy-driven seed selection
+Author: AdaptivePy Contributors
+License: MIT
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: numpy>=1.20
+Requires-Dist: scikit-learn>=1.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: click>=8.0
+Requires-Dist: joblib>=1.0
+Requires-Dist: mdtraj>=1.9
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+# AdaptivePy
+Adaptive sampling on molecular dynamics trajectories using clustering-based state space partitioning and policy-driven seed selection.
+## Installation
+```bash
+pip install -e .
+```
+## Quick start
+1. Prepare feature files (`features/traj_0.npy`, ...) with shape `(n_frames, n_features)`.
+2. Optionally add matching coordinate trajectories (`trajectories/traj_0.xtc`, ...) and a topology file.
+3. Edit `examples/config.yaml` and run:
+```bash
+adaptivepy run examples/config.yaml
+```
+## CLI
+```bash
+adaptivepy run config.yaml
+adaptivepy validate config.yaml
+adaptivepy list-policies
+```
+## Python API
+```python
+from adaptivepy import run_adaptive_sampling
+results = run_adaptive_sampling("config.yaml")
+```

adaptivepy_sampling-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,34 @@
+adaptivepy/__init__.py,sha256=Z9oSeaY0nBjZGv5aA1-8XJQ4oB5HDPwc9FnpSj6Xddw,198
+adaptivepy/api.py,sha256=OrnkbOmWMUusbxSMY9C-oilk1-k0Yu_7wZBsBTwzWgg,7258
+adaptivepy/models.py,sha256=2CUiOj9nvD1Nt0keFDSUFcR1b0DZaB0gywT5NWLB4Bs,2239
+adaptivepy/cli/__init__.py,sha256=IzJw4MHHsGRl5adW3RptsZi5uGXLSzkJhWzIjlc8VCM,96
+adaptivepy/cli/run.py,sha256=ItFjNxU7RRzdiqNGVBELXsE9K8GaIUOdbv7EpNfRgpE,1741
+adaptivepy/clustering/__init__.py,sha256=k1ee5Q1t0-42N2jOplA9Y0r3XdtLuIfAUwhHcUnqtRg,2841
+adaptivepy/clustering/base.py,sha256=UAxMJ6FHZZa5OY0sgrVZDSRDNKKDdsgJ89GxFOgtI88,1754
+adaptivepy/clustering/regular_space.py,sha256=FnbOycEesf3G9UGjp9UiJAtNlNcyUhaIIu_97cuuGzo,4511
+adaptivepy/clustering/sklearn_kmeans.py,sha256=7INdhM6wSMD2yzK95NiQJI2LyT8iYty71ludnVReV8E,2405
+adaptivepy/clustering/sklearn_minibatch.py,sha256=jQL4i5I9iMIW8jOUH6oPZd2DpGrr0BrjtrGCI9LG3sA,2563
+adaptivepy/config/__init__.py,sha256=JLrboCd7_fsG-GqLpCmTcv1m715lLLFQ5u6tKh1bQIk,309
+adaptivepy/config/schema.py,sha256=5SZT6XWg2X1rqEiSZt4MFBET9pifeJcbY8AhGQxb-SI,5929
+adaptivepy/io/__init__.py,sha256=nbrwleheNEBidhH8VGgdAzErIutK8mMeTLGSJw2hReg,631
+adaptivepy/io/loader.py,sha256=HZSTsVwjpbXy8YLowuKKVvZAgMU9mqmLvqsjO-I_ldE,7878
+adaptivepy/io/trajectory.py,sha256=kkyuf5KFcPHBAGxjYbDPxJ8ZFc0_jJx2mPHKYTszT9A,4208
+adaptivepy/output/__init__.py,sha256=O2F1DG8dDo9T9aI-lfQm8dlJ4izndRLOTbShR-DUJpI,540
+adaptivepy/output/pdb_writer.py,sha256=kLsExDbvCMiLKXPAQsJu8289FDcEEDgqZZhn2insEW4,1596
+adaptivepy/output/writer.py,sha256=dl3LmgfOLHIlaK1jXks-KBBNg8Dj2nXzIKEBl8q9NQY,6146
+adaptivepy/policies/__init__.py,sha256=rc8cTwHa5WG-6K9xaRBv_kBNFmsCaJ-SKlYlmyfa82I,457
+adaptivepy/policies/base.py,sha256=D8TjMlWDM-0uv0Klk-zxXTSkCWf0X8t7kDQvpibh2yA,2497
+adaptivepy/policies/least_counts.py,sha256=PKYu5j_lza_c3NC8xx8Qz6e9rMXLT4JM0hN-erMroXo,1121
+adaptivepy/policies/random.py,sha256=P0ZyTg801IBkB0qJbLX5o6aUkTkIQOUngIwnZJ6j6is,1365
+adaptivepy/selection/__init__.py,sha256=4UdLHfz5X4DNksHK7_XXr7W_pFpXNSDJOwezpgq72w0,122
+adaptivepy/selection/frame_selector.py,sha256=oKEC095iXUiTy4lieLU_xCQDdLsYnhy0AJAU6Nl9qlE,3806
+adaptivepy/stats/__init__.py,sha256=5e6zIKUUbYfxcDCfKCha39Lw4yuB8zTQXtVFEvEdtuE,328
+adaptivepy/stats/cluster_stats.py,sha256=B46tPK0IhweEQ4OgX2HMWMYXmK1KDiGudarhXoiFGFA,2989
+adaptivepy/utils/__init__.py,sha256=RzCDgCQgs4GpVCN-CzxEWgPwtB7ArzvWdE9l_sF4cCM,204
+adaptivepy/utils/io_utils.py,sha256=SXel2DmgCV_5C-xCo7nu_hdCKHllMwVgfa8-8DWycwc,1051
+adaptivepy/utils/logging.py,sha256=qtIMxtRPY79ArtTuJ5oOA0ZjvrccQHHIQ5v_71TDzLk,1455
+adaptivepy_sampling-0.1.0.dist-info/METADATA,sha256=mrBmBlzdWFonaqtZ0aYc2seEQP7LQzh3aiyomp8I_3Y,1222
+adaptivepy_sampling-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+adaptivepy_sampling-0.1.0.dist-info/entry_points.txt,sha256=DDQkzgiBjliB_Fyy0y6qs956Y8-RIzgrRVdVG_ac3b8,55
+adaptivepy_sampling-0.1.0.dist-info/top_level.txt,sha256=mObCepJKVRgCqGBCOOO9d135EvogjEfTwCa5U7X0VKU,11
+adaptivepy_sampling-0.1.0.dist-info/RECORD,,

adaptivepy_sampling-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

adaptivepy_sampling-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ adaptivepy = adaptivepy.cli.run:main

adaptivepy_sampling-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ adaptivepy