PyPI - crisp-ase - Versions diffs - 1.1.2__py3-none-any.whl - Mend

crisp-ase 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

CRISP/__init__.py +99 -0
CRISP/_version.py +1 -0
CRISP/cli.py +41 -0
CRISP/data_analysis/__init__.py +38 -0
CRISP/data_analysis/clustering.py +838 -0
CRISP/data_analysis/contact_coordination.py +915 -0
CRISP/data_analysis/h_bond.py +772 -0
CRISP/data_analysis/msd.py +1199 -0
CRISP/data_analysis/prdf.py +404 -0
CRISP/data_analysis/volumetric_atomic_density.py +527 -0
CRISP/py.typed +1 -0
CRISP/simulation_utility/__init__.py +31 -0
CRISP/simulation_utility/atomic_indices.py +155 -0
CRISP/simulation_utility/atomic_traj_linemap.py +278 -0
CRISP/simulation_utility/error_analysis.py +254 -0
CRISP/simulation_utility/interatomic_distances.py +200 -0
CRISP/simulation_utility/subsampling.py +241 -0
CRISP/tests/DataAnalysis/__init__.py +1 -0
CRISP/tests/DataAnalysis/test_clustering_extended.py +212 -0
CRISP/tests/DataAnalysis/test_contact_coordination.py +184 -0
CRISP/tests/DataAnalysis/test_contact_coordination_extended.py +465 -0
CRISP/tests/DataAnalysis/test_h_bond_complete.py +326 -0
CRISP/tests/DataAnalysis/test_h_bond_extended.py +322 -0
CRISP/tests/DataAnalysis/test_msd_complete.py +305 -0
CRISP/tests/DataAnalysis/test_msd_extended.py +522 -0
CRISP/tests/DataAnalysis/test_prdf.py +206 -0
CRISP/tests/DataAnalysis/test_volumetric_atomic_density.py +463 -0
CRISP/tests/SimulationUtility/__init__.py +1 -0
CRISP/tests/SimulationUtility/test_atomic_traj_linemap.py +101 -0
CRISP/tests/SimulationUtility/test_atomic_traj_linemap_extended.py +469 -0
CRISP/tests/SimulationUtility/test_error_analysis_extended.py +151 -0
CRISP/tests/SimulationUtility/test_interatomic_distances.py +223 -0
CRISP/tests/SimulationUtility/test_subsampling.py +365 -0
CRISP/tests/__init__.py +1 -0
CRISP/tests/test_CRISP.py +28 -0
CRISP/tests/test_cli.py +87 -0
CRISP/tests/test_crisp_comprehensive.py +679 -0
crisp_ase-1.1.2.dist-info/METADATA +141 -0
crisp_ase-1.1.2.dist-info/RECORD +42 -0
crisp_ase-1.1.2.dist-info/WHEEL +5 -0
crisp_ase-1.1.2.dist-info/entry_points.txt +2 -0
crisp_ase-1.1.2.dist-info/top_level.txt +1 -0

CRISP/simulation_utility/interatomic_distances.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""
+CRISP/simulation_utility/interatomic_distances.py
+This module provides tools to calculate and analyze interatomic distances from
+molecular dynamics trajectories.
+"""
+import os
+import numpy as np
+import pickle
+from ase.io import read
+from ase import Atoms
+from typing import Union, Tuple, List, Dict, Any, Optional
+from joblib import Parallel, delayed
+__all__ = ['indices', 'distance_calculation', 'save_distance_matrices', 'calculate_interatomic_distances']
+def indices(atoms: Atoms, ind: Union[str, List[Union[int, str]], None]) -> np.ndarray:
+    """Extract atom indices from various input types.
+    Parameters
+    ----------
+    atoms : ase.Atoms
+        Atoms object containing atomic coordinates and elements
+    ind : str, list, or None
+        Specification for which atoms to select:
+        - "all" or None: all atoms
+        - string ending with ".npy": load indices from NumPy file
+        - list of integers: direct atom indices
+        - list of strings: chemical symbols to select
+    Returns
+    -------
+    np.ndarray
+        Array of atom indices
+    Raises
+    ------
+    ValueError
+        If the index type is not recognized
+    """
+    if ind == "all" or ind is None:
+        return np.arange(len(atoms))
+    if isinstance(ind, str) and ind.endswith(".npy"):
+        return np.load(ind, allow_pickle=True)
+    if not isinstance(ind, list):
+        ind = [ind]
+    if any(isinstance(item, int) for item in ind):
+        return np.array(ind)
+    if any(isinstance(item, str) for item in ind):
+        idx = []
+        for symbol in ind:
+            idx.append(np.where(np.array(atoms.get_chemical_symbols()) == symbol)[0])
+        return np.concatenate(idx)
+    raise ValueError("Invalid index type")
+def distance_calculation(
+    traj_path: str,
+    frame_skip: int,
+    index_type: Union[str, List[Union[int, str]]] = "all"
+) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    """Calculate distance matrices for multiple frames in a trajectory.
+    Parameters
+    ----------
+    traj_path : str
+        Path to the trajectory file in any format supported by ASE
+    frame_skip : int
+        Read every nth frame (n=frame_skip)
+    index_type : str, list, or None, optional
+        Specification for which atoms to select for sub-matrix (default: "all")
+    Returns
+    -------
+    Tuple[List[np.ndarray], List[np.ndarray]]
+        Two lists containing:
+        1. Full distance matrices for all frames
+        2. Sub-matrices for specified atoms
+    Raises
+    ------
+    ValueError
+        If no frames were found in the trajectory or if format is unsupported
+    """
+    try:
+        # Let ASE auto-detect file format based on extension
+        frames = read(traj_path, index=f"::{frame_skip}")
+        # Handle the case when a single frame is returned (not a list)
+        if not isinstance(frames, list):
+            frames = [frames]
+        if not frames:
+            raise ValueError("No frames were found in the trajectory using the given frame_skip.")
+        def process_frame(frame: Atoms) -> Tuple[np.ndarray, np.ndarray]:
+            dm = frame.get_all_distances(mic=True)
+            idx = indices(frame, index_type)
+            sub_dm = dm[np.ix_(idx, idx)]
+            return dm, sub_dm
+        results = Parallel(n_jobs=-1)(delayed(process_frame)(frame) for frame in frames)
+        full_dms, sub_dms = zip(*results)
+        return list(full_dms), list(sub_dms)
+    except ValueError as e:
+        raise e
+    except Exception as e:
+        raise ValueError(f"Error processing trajectory: {e}. Check if the format is supported by ASE.")
+def save_distance_matrices(
+    full_dms: List[np.ndarray],
+    sub_dms: List[np.ndarray],
+    index_type: Union[str, List[Union[int, str]]] = "all",
+    output_dir: str = "distance_calculations"
+) -> None:
+    """Save distance matrices to pickle file.
+    Parameters
+    ----------
+    full_dms : List[np.ndarray]
+        List of full distance matrices
+    sub_dms : List[np.ndarray]
+        List of sub-matrices for specified atoms
+    index_type : str, list, or None, optional
+        Type of index selection used (default: "all")
+    output_dir : str, optional
+        Directory to save output file (default: "distance_calculations")
+    Returns
+    -------
+    None
+        Saves results to disk
+    """
+    data = {"full_dms": full_dms}
+    if index_type not in ["all", None]:
+        data["sub_dms"] = sub_dms
+    os.makedirs(output_dir, exist_ok=True)
+    output_path = os.path.join(output_dir, "distance_matrices.pkl")
+    with open(output_path, "wb") as f:
+        pickle.dump(data, f)
+    print(f"Distance matrices saved in '{output_path}'")
+def calculate_interatomic_distances(
+    traj_path: str,
+    frame_skip: int = 10,
+    index_type: Union[str, List[Union[int, str]]] = "all",
+    output_dir: str = "distance_calculations",
+    save_results: bool = True
+) -> Dict[str, List[np.ndarray]]:
+    """
+    Calculate interatomic distances for a trajectory and optionally save results.
+    Parameters
+    ----------
+    traj_path : str
+        Path to the trajectory file
+    frame_skip : int, optional
+        Read every nth frame (default: 10)
+    index_type : str, list, or None, optional
+        Specification for which atoms to select (default: "all")
+    output_dir : str, optional
+        Directory to save output file (default: "distance_calculations")
+    save_results : bool, optional
+        Whether to save results to disk (default: True)
+    Returns
+    -------
+    Dict[str, List[np.ndarray]]
+        Dictionary containing full distance matrices and optionally sub-matrices
+    Examples
+    --------
+    >>> results = calculate_interatomic_distances("trajectory.traj")
+    >>> first_frame_distances = results["full_dms"][0]
+    >>> print(f"Distance matrix shape: {first_frame_distances.shape}")
+    """
+    print(f"Calculating interatomic distances from '{traj_path}'")
+    print(f"Using frame skip: {frame_skip}")
+    print(f"Index type: {index_type}")
+    full_dms, sub_dms = distance_calculation(traj_path, frame_skip, index_type)
+    print(f"Processed {len(full_dms)} frames")
+    print(f"Full matrix shape: {full_dms[0].shape}")
+    print(f"Sub-matrix shape: {sub_dms[0].shape}")
+    results = {"full_dms": full_dms}
+    if index_type not in ["all", None]:
+        results["sub_dms"] = sub_dms
+    if save_results:
+        save_distance_matrices(full_dms, sub_dms, index_type, output_dir)
+    return results

CRISP/simulation_utility/subsampling.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""
+CRISP/simulation_utility/subsampling.py
+This module provides functionality for structure subsampling from molecular dynamics
+trajectories using Farthest Point Sampling (FPS) with SOAP descriptors.
+"""
+import numpy as np
+from ase.io import read, write
+import fpsample
+import glob
+import os
+from dscribe.descriptors import SOAP
+import matplotlib.pyplot as plt
+from joblib import Parallel, delayed
+from typing import Union, List, Optional
+__all__ = ['indices', 'compute_soap', 'create_repres', 'subsample']
+def indices(atoms, ind: Union[str, List[Union[int, str]]]) -> np.ndarray:
+    """
+    Extract atom indices from an ASE Atoms object based on the input specifier.
+    Parameters
+    ----------
+    atoms : ase.Atoms
+        ASE Atoms object containing atomic structure
+    ind : Union[str, List[Union[int, str]]]
+        Index specifier, can be:
+        - "all" or None: all atoms
+        - string ending with ".npy": load indices from NumPy file
+        - integer or list of integers: direct atom indices
+        - string or list of strings: chemical symbols to select
+    Returns
+    -------
+    np.ndarray
+        Array of selected indices
+    Raises
+    ------
+    ValueError
+        If the index type is invalid
+    """
+    # Select all atoms
+    if ind == "all" or ind is None:
+        return np.arange(len(atoms))
+    # Load from NumPy file
+    if isinstance(ind, str) and ind.endswith(".npy"):
+        return np.load(ind, allow_pickle=True)
+    # Convert single items to list
+    if not isinstance(ind, list):
+        ind = [ind]
+    # Handle integer indices directly
+    if any(isinstance(item, int) for item in ind):
+        return np.array(ind)
+    # Handle chemical symbols
+    if any(isinstance(item, str) for item in ind):
+        idx = []
+        for symbol in ind:
+            idx.append(np.where(np.array(atoms.get_chemical_symbols()) == symbol)[0])
+        return np.concatenate(idx)
+    raise ValueError("Invalid index type")
+def compute_soap(
+    structure,
+    all_spec: List[str],
+    rcut: float,
+    idx: np.ndarray
+) -> np.ndarray:
+    """Compute SOAP descriptors for a given structure.
+    Parameters
+    ----------
+    structure : ase.Atoms
+        Atomic structure for which to compute SOAP descriptors
+    all_spec : list
+        List of chemical elements to include in the descriptor
+    rcut : float
+        Cutoff radius for the SOAP descriptor in Angstroms
+    idx : numpy.ndarray
+        Indices of atoms to use as centers for SOAP calculation
+    Returns
+    -------
+    numpy.ndarray
+        Average SOAP descriptor vector for the structure
+    """
+    periodic_cell = structure.cell.volume > 0
+    soap = SOAP(
+        species=all_spec,
+        periodic=periodic_cell,
+        r_cut=rcut,
+        n_max=8,
+        l_max=6,
+        sigma=0.5,
+        sparse=False
+    )
+    soap_ind = soap.create(structure, centers=idx)
+    return np.mean(soap_ind, axis=0)
+def create_repres(
+    traj_path: List,
+    rcut: float = 6,
+    ind: Union[str, List[Union[int, str]]] = "all",
+    n_jobs: int = -1
+) -> np.ndarray:
+    """Create SOAP representation vectors for a trajectory.
+    Parameters
+    ----------
+    traj_path : list
+        List of ase.Atoms objects representing a trajectory
+    rcut : float, optional
+        Cutoff radius for the SOAP descriptor in Angstroms (default: 6)
+    ind : str, list, or None, optional
+        Specification for which atoms to use as SOAP centers (default: "all")
+    n_jobs : int, optional
+        Number of parallel jobs to run; -1 uses all available cores (default: -1)
+    Returns
+    -------
+    numpy.ndarray
+        Array of SOAP descriptors for each frame in the trajectory
+    """
+    all_spec = traj_path[0].get_chemical_symbols()
+    idx = indices(traj_path[0], ind=ind)
+    repres = Parallel(n_jobs=n_jobs)(
+        delayed(compute_soap)(structure, all_spec, rcut, idx) for structure in traj_path
+    )
+    return np.array(repres)
+def subsample(
+    traj_path: str,
+    n_samples: int = 50,
+    index_type: Union[str, List[Union[int, str]]] = "all",
+    rcut: float = 6.0,
+    file_format: Optional[str] = None,
+    plot_subsample: bool = False,
+    frame_skip: int = 1,
+    output_dir: str = "subsampled_structures"
+) -> None:
+    """Subsample a trajectory using Farthest Point Sampling with SOAP descriptors.
+    Parameters
+    ----------
+    traj_path : str
+        Path pattern to trajectory file(s); supports globbing
+    n_samples : int, optional
+        Number of frames to select (default: 50)
+    index_type : str, list, or None, optional
+        Specification for which atoms to use for SOAP calculation (default: "all")
+    rcut : float, optional
+        Cutoff radius for SOAP in Angstroms (default: 6.0)
+    file_format : str, optional
+        File format for ASE I/O (default: None, auto-detect)
+    plot_subsample : bool, optional
+        Whether to generate a plot of FPS distances (default: False)
+    frame_skip : int, optional
+        Read every nth frame from the trajectory (default: 1)
+    output_dir : str, optional
+        Directory to save the subsampled structures (default: "subsampled_structures")
+    Returns
+    -------
+    list
+        List of selected ase.Atoms frames
+    Notes
+    -----
+    The selected frames and plots are saved in the specified output directory
+    """
+    traj_files = glob.glob(traj_path)
+    # Check if any matching files were found
+    if not traj_files:
+        raise ValueError(f"No files found matching pattern: {traj_path}")
+    trajec = []
+    for file in traj_files:
+        if file_format is not None:
+            trajec += read(file, index=f'::{frame_skip}', format=file_format)
+        else:
+            trajec += read(file, index=f'::{frame_skip}')
+    if not isinstance(trajec, list):
+        trajec = [trajec]
+    repres = create_repres(trajec, ind=index_type, rcut=rcut)
+    # Ensure we don't request more samples than available frames
+    n_samples = min(n_samples, len(trajec))
+    perm = fpsample.fps_sampling(repres, n_samples, start_idx=0)
+    fps_frames = []
+    for str_idx, frame in enumerate(perm):
+        new_frame = trajec[frame]
+        fps_frames.append(new_frame)
+    os.makedirs(output_dir, exist_ok=True)
+    if plot_subsample:
+        distance = []
+        for i in range(1, len(perm)):
+            distance.append(np.min(np.linalg.norm(repres[perm[:i]] - repres[perm[i]], axis=1)))
+        plt.figure(figsize=(8, 6))
+        plt.plot(distance, c="blue", linewidth=2)
+        plt.ylim([0, 1.1 * max(distance)])
+        plt.xlabel("Number of subsampled structures")
+        plt.ylabel("Euclidean distance")
+        plt.title("FPS Subsampling")
+        plt.savefig(os.path.join(output_dir, "subsampled_convergence.png"), dpi=300)
+        plt.show()
+        plt.close()
+        print(f"Saved convergence plot to {os.path.join(output_dir, 'subsampled_convergence.png')}")
+    # Extract the base filename without path for output file using os.path for platform independence
+    base_filename = os.path.basename(traj_files[0])
+    output_file = os.path.join(output_dir, f"subsample_{base_filename}")
+    try:
+        write(output_file, fps_frames, format=file_format)
+        print(f"Saved {len(fps_frames)} subsampled structures to {output_file}")
+    except Exception as e:
+        print(f"Error saving subsampled structures: {e}")
+    return fps_frames

CRISP/tests/DataAnalysis/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CRISP DataAnalysis tests package."""

CRISP/tests/DataAnalysis/test_clustering_extended.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""Extended tests for clustering module to improve coverage."""
+import pytest
+import numpy as np
+import os
+import tempfile
+import shutil
+from ase import Atoms
+from ase.io import write
+try:
+    from CRISP.data_analysis.clustering import (
+        analyze_frame,
+        analyze_trajectory,
+    )
+    ASE_AVAILABLE = True
+except ImportError:
+    ASE_AVAILABLE = False
+@pytest.mark.skipif(not ASE_AVAILABLE, reason="ASE not available")
+class TestClusteringExtended:
+    """Extended clustering tests for coverage."""
+    def test_analyze_frame_basic(self):
+        """Test frame clustering analysis."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            atoms = Atoms('H2OH2O', positions=[
+                [0.0, 0.0, 0.0],
+                [0.96, 0.0, 0.0],
+                [0.24, 0.93, 0.0],
+                [2.8, 0.0, 0.0],
+                [3.76, 0.0, 0.0],
+                [3.04, 0.93, 0.0]
+            ])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            atom_indices = np.array([0, 1, 2, 3, 4, 5])
+            analyzer = analyze_frame(
+                traj_path=traj_file,
+                atom_indices=atom_indices,
+                threshold=2.5,
+                min_samples=2
+            )
+            assert analyzer is not None
+        finally:
+            shutil.rmtree(temp_dir)
+    @pytest.mark.parametrize("threshold", [1.5, 2.0, 2.5, 3.0])
+    def test_analyze_frame_different_cutoffs(self, threshold):
+        """Test with different distance cutoffs."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            atoms = Atoms('H2OH2O', positions=[
+                [0.0, 0.0, 0.0],
+                [0.96, 0.0, 0.0],
+                [0.24, 0.93, 0.0],
+                [2.8, 0.0, 0.0],
+                [3.76, 0.0, 0.0],
+                [3.04, 0.93, 0.0]
+            ])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            atom_indices = np.array([0, 1, 2])
+            analyzer = analyze_frame(
+                traj_path=traj_file,
+                atom_indices=atom_indices,
+                threshold=threshold,
+                min_samples=1
+            )
+            assert analyzer is not None
+        finally:
+            shutil.rmtree(temp_dir)
+    def test_analyze_frame_calculate_distance_matrix(self):
+        """Test distance matrix calculation."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            atoms = Atoms('H2O', positions=[
+                [0.0, 0.0, 0.0],
+                [0.96, 0.0, 0.0],
+                [0.24, 0.93, 0.0]
+            ])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            atom_indices = np.array([0, 1, 2])
+            analyzer = analyze_frame(
+                traj_path=traj_file,
+                atom_indices=atom_indices,
+                threshold=2.5,
+                min_samples=2
+            )
+            frame = analyzer.read_custom_frame()
+            assert frame is not None
+            dist_matrix, positions = analyzer.calculate_distance_matrix(frame)
+            assert dist_matrix is not None
+        finally:
+            shutil.rmtree(temp_dir)
+    def test_analyze_trajectory_basic(self):
+        """Test trajectory clustering."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            atoms = Atoms('H2O', positions=[
+                [0.0, 0.0, 0.0],
+                [0.96, 0.0, 0.0],
+                [0.24, 0.93, 0.0]
+            ])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            atom_indices = np.array([0, 1, 2])
+            results = analyze_trajectory(
+                traj_path=traj_file,
+                indices_path=atom_indices,
+                threshold=2.5,
+                min_samples=2,
+                frame_skip=1
+            )
+            assert isinstance(results, list)
+        finally:
+            shutil.rmtree(temp_dir)
+@pytest.mark.skipif(not ASE_AVAILABLE, reason="ASE not available")
+class TestClusteringEdgeCases:
+    """Test edge cases for clustering."""
+    def test_clustering_min_atoms_validation(self):
+        """Test minimum atoms validation."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            atoms = Atoms('H', positions=[[0.0, 0.0, 0.0]])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            atom_indices = np.array([0])
+            analyzer = analyze_frame(
+                traj_path=traj_file,
+                atom_indices=atom_indices,
+                threshold=2.5,
+                min_samples=5
+            )
+            frame = analyzer.read_custom_frame()
+            with pytest.raises(ValueError):
+                analyzer.calculate_distance_matrix(frame)
+        finally:
+            shutil.rmtree(temp_dir)
+    def test_clustering_invalid_trajectory(self):
+        """Test handling of invalid trajectory file."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            nonexistent = os.path.join(temp_dir, 'nonexistent.traj')
+            analyzer = analyze_frame(
+                traj_path=nonexistent,
+                atom_indices=np.array([0, 1, 2]),
+                threshold=2.5,
+                min_samples=2
+            )
+            frame = analyzer.read_custom_frame()
+            assert frame is None
+        finally:
+            shutil.rmtree(temp_dir)
+    def test_clustering_indices_from_file(self):
+        """Test loading indices from numpy file."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            traj_file = os.path.join(temp_dir, 'test.traj')
+            indices_file = os.path.join(temp_dir, 'indices.npy')
+            atoms = Atoms('H2O', positions=[
+                [0.0, 0.0, 0.0],
+                [0.96, 0.0, 0.0],
+                [0.24, 0.93, 0.0]
+            ])
+            atoms.set_cell([10, 10, 10])
+            atoms.set_pbc([True, True, True])
+            write(traj_file, atoms)
+            indices = np.array([0, 1, 2])
+            np.save(indices_file, indices)
+            analyzer = analyze_frame(
+                traj_path=traj_file,
+                atom_indices=indices_file,
+                threshold=2.5,
+                min_samples=2
+            )
+            assert analyzer is not None
+        finally:
+            shutil.rmtree(temp_dir)
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])