PyPI - chemparseplot - Versions diffs - 0.0.3__tar.gz → 1.0.1__tar.gz - Mend

chemparseplot 0.0.3tar.gz → 1.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/.gitignore RENAMED Viewed

@@ -1,3 +1,4 @@
+.pixi
 apidocs/*
 ### Generated by gibo (https://github.com/simonwhitaker/gibo)
 ### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chemparseplot
-Version: 0.0.3
+Version: 1.0.1
 Summary: Parsers and plotting tools for computational chemistry
 Project-URL: Documentation, https://github.com/HaoZeke/chemparseplot#readme
 Project-URL: Issues, https://github.com/HaoZeke/chemparseplot/issues
@@ -12,14 +12,14 @@ Keywords: compchem,parser,plot
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: Implementation :: CPython
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Requires-Dist: numpy>=1.26.2
 Requires-Dist: pint>=0.22
+Requires-Dist: rgpycrumbs>=1.0.0
 Provides-Extra: doc
 Requires-Dist: mdit-py-plugins>=0.3.4; extra == 'doc'
 Requires-Dist: myst-nb>=1; extra == 'doc'
@@ -31,9 +31,14 @@ Requires-Dist: sphinx-sitemap>=2.5.1; extra == 'doc'
 Requires-Dist: sphinx-togglebutton>=0.3.2; extra == 'doc'
 Requires-Dist: sphinx>=7.2.6; extra == 'doc'
 Requires-Dist: sphinxcontrib-apidoc>=0.4; extra == 'doc'
+Provides-Extra: lint
+Requires-Dist: ruff>=0.1.6; extra == 'lint'
 Provides-Extra: plot
 Requires-Dist: cmcrameri>=1.7; extra == 'plot'
 Requires-Dist: matplotlib>=3.8.2; extra == 'plot'
+Provides-Extra: test
+Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
+Requires-Dist: pytest>=7.4.3; extra == 'test'
 Description-Content-Type: text/markdown

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.0.3'
-__version_tuple__ = version_tuple = (0, 0, 3)
+__version__ = version = '1.0.1'
+__version_tuple__ = version_tuple = (1, 0, 1)
 __commit_id__ = commit_id = None

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@
 #
 # SPDX-License-Identifier: MIT
-from chemparseplot import basetypes, parse, units
+from chemparseplot import parse, units

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/eon/minimization.py RENAMED Viewed

@@ -8,7 +8,7 @@ def min_e_result(eresp: Path) -> dict:
     """Reads and parses the results.dat file.
     Args:
-        eresp: Path to the EON results directory.
+        eresp: Path to the eOn results directory.
     Returns:
         A dictionary containing the parsed data from results.dat, or None if the file

chemparseplot-1.0.1/chemparseplot/parse/eon/neb.py ADDED Viewed

@@ -0,0 +1,388 @@
+import logging
+from collections.abc import Callable
+from pathlib import Path
+import numpy as np
+import polars as pl
+from ase import Atoms
+from ase.io import read as ase_read
+try:
+    from rgpycrumbs._aux import _import_from_parent_env
+    from rgpycrumbs.geom.api.alignment import (
+        calculate_rmsd_from_ref,
+    )
+    ira_mod = _import_from_parent_env("ira_mod")
+except ImportError:
+    ira_mod = None
+log = logging.getLogger(__name__)
+def calculate_landscape_coords(
+    atoms_list: list[Atoms], ira_instance, ira_kmax: float
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Calculates 2D landscape coordinates (RMSD-R, RMSD-P) for a path.
+    :param atoms_list: List of ASE Atoms objects representing the path.
+    :param ira_instance: An instantiated IRA object (or None).
+    :param ira_kmax: kmax factor for IRA.
+    :return: A tuple of (rmsd_r, rmsd_p) arrays.
+    """
+    logging.info("Calculating landscape coordinates (RMSD-R, RMSD-P)...")
+    rmsd_r = calculate_rmsd_from_ref(
+        atoms_list, ira_instance, ref_atom=atoms_list[0], ira_kmax=ira_kmax
+    )
+    rmsd_p = calculate_rmsd_from_ref(
+        atoms_list, ira_instance, ref_atom=atoms_list[-1], ira_kmax=ira_kmax
+    )
+    return rmsd_r, rmsd_p
+def _validate_data_atoms_match(z_data, atoms, dat_file_name):
+    """Checks if data points count matches structure count."""
+    if len(z_data) != len(atoms):
+        errmsg = (
+            f"Structure count ({len(atoms)}) != data point count "
+            f"({len(z_data)}) in {dat_file_name}"
+        )
+        log.error(errmsg)
+        raise ValueError(errmsg)
+def load_or_compute_data(
+    cache_file: Path | None,
+    force_recompute: bool,
+    validation_check: Callable[[pl.DataFrame], None],
+    computation_callback: Callable[[], pl.DataFrame],
+    context_name: str,
+) -> pl.DataFrame:
+    """Retrieves data from a parquet cache or triggers a computation callback."""
+    if cache_file and cache_file.exists() and not force_recompute:
+        log.info(f"Loading cached {context_name} data from {cache_file}...")
+        try:
+            df = pl.read_parquet(cache_file)
+            validation_check(df)
+            log.info(f"Loaded {df.height} rows from cache.")
+            return df
+        except Exception as e:
+            log.warning(f"Cache load failed or invalid: {e}. Recomputing...")
+    log.info(f"Computing {context_name} data...")
+    df = computation_callback()
+    if cache_file:
+        log.info(f"Saving {context_name} cache to {cache_file}...")
+        try:
+            df.write_parquet(cache_file)
+        except Exception as e:
+            log.error(f"Failed to write cache file: {e}")
+    return df
+def load_structures_and_calculate_additional_rmsd(
+    con_file: Path,
+    additional_con: list[tuple[Path, str]],
+    ira_kmax: float,
+    sp_file: Path | None = None,
+):
+    """Loads the main trajectory and calculates RMSD for any additional comparison structures."""
+    log.info(f"Reading structures from {con_file}")
+    atoms_list = ase_read(con_file, index=":")
+    log.info(f"Loaded {len(atoms_list)} structures.")
+    # --- Explicit Saddle Point Loading ---
+    sp_data = None
+    ira_instance = ira_mod.IRA() if ira_mod else None
+    if sp_file and sp_file.exists():
+        log.info(f"Loading explicit saddle point from {sp_file}")
+        sp_atoms = ase_read(sp_file)
+        sp_rmsd_r = calculate_rmsd_from_ref(
+            [sp_atoms],
+            ira_instance,
+            ref_atom=atoms_list[0],
+            ira_kmax=ira_kmax,
+        )[0]
+        sp_rmsd_p = calculate_rmsd_from_ref(
+            [sp_atoms],
+            ira_instance,
+            ref_atom=atoms_list[-1],
+            ira_kmax=ira_kmax,
+        )[0]
+        sp_data = {"atoms": sp_atoms, "r": sp_rmsd_r, "p": sp_rmsd_p}
+    # --- Additional Structures Loading ---
+    additional_atoms_data = []
+    if additional_con:
+        for add_file, add_label in additional_con:
+            # Handle empty labels
+            if not add_label or add_label.strip() == "":
+                label = add_file.stem
+            else:
+                label = add_label
+            log.info(f"Processing additional structure: {label}")
+            additional_atoms = ase_read(add_file)
+            add_rmsd_r = calculate_rmsd_from_ref(
+                [additional_atoms],
+                ira_instance,
+                ref_atom=atoms_list[0],
+                ira_kmax=ira_kmax,
+            )[0]
+            add_rmsd_p = calculate_rmsd_from_ref(
+                [additional_atoms],
+                ira_instance,
+                ref_atom=atoms_list[-1],
+                ira_kmax=ira_kmax,
+            )[0]
+            additional_atoms_data.append(
+                (additional_atoms, add_rmsd_r, add_rmsd_p, label)
+            )
+    return atoms_list, additional_atoms_data, sp_data
+def _process_single_path_step(
+    dat_file,
+    con_file,
+    y_data_column,
+    ira_instance,
+    ira_kmax,
+    step_idx,
+    ref_atoms=None,
+    prod_atoms=None,
+):
+    """Helper to process a single .dat/.con pair into a DataFrame row."""
+    path_data = np.loadtxt(dat_file, skiprows=1).T
+    z_data_step = path_data[y_data_column]
+    atoms_list_step = ase_read(con_file, index=":")
+    f_para_step = path_data[3]
+    _validate_data_atoms_match(z_data_step, atoms_list_step, dat_file.name)
+    # If ref/prod not provided, assume self-contained NEB (0=Ref, -1=Prod)
+    # If provided (augmentation mode), use them.
+    ref = ref_atoms if ref_atoms is not None else atoms_list_step[0]
+    prod = prod_atoms if prod_atoms is not None else atoms_list_step[-1]
+    rmsd_r = calculate_rmsd_from_ref(atoms_list_step, ira_instance, ref_atom=ref, ira_kmax=ira_kmax)
+    rmsd_p = calculate_rmsd_from_ref(atoms_list_step, ira_instance, ref_atom=prod, ira_kmax=ira_kmax)
+    # --- Calculate Synthetic 2D Gradients ---
+    dr = np.gradient(rmsd_r)
+    dp = np.gradient(rmsd_p)
+    norm_ds = np.sqrt(dr**2 + dp**2)
+    norm_ds[norm_ds == 0] = 1.0
+    tr = dr / norm_ds
+    tp = dp / norm_ds
+    grad_r = -f_para_step * tr
+    grad_p = -f_para_step * tp
+    return pl.DataFrame(
+        {
+            "r": rmsd_r,
+            "p": rmsd_p,
+            "grad_r": grad_r,
+            "grad_p": grad_p,
+            "z": z_data_step,
+            "step": int(step_idx),
+        }
+    )
+def aggregate_neb_landscape_data(
+    all_dat_paths: list[Path],
+    all_con_paths: list[Path],
+    y_data_column: int,
+    ira_instance,  # Can be None
+    cache_file: Path | None = None,
+    force_recompute: bool = False,
+    ira_kmax: float = 1.8,
+    # Caching augmentation
+    augment_dat: str | None = None,
+    augment_con: str | None = None,
+    ref_atoms: Atoms | None = None,
+    prod_atoms: Atoms | None = None,
+) -> pl.DataFrame:
+    """Aggregates data from multiple NEB steps for landscape visualization."""
+    # Init IRA if not passed
+    if ira_instance is None and ira_mod is not None:
+        ira_instance = ira_mod.IRA()
+    def validate_landscape_cache(df: pl.DataFrame):
+        if "p" not in df.columns:
+            raise ValueError("Cache missing 'p' column.")
+        if "grad_r" not in df.columns:
+            raise ValueError("Cache missing gradient columns (outdated).")
+    def compute_landscape_data() -> pl.DataFrame:
+        all_dfs = []
+        # --- Load Augmentation Data (Inside Cache Block) ---
+        if augment_dat and augment_con and ref_atoms and prod_atoms:
+            log.info(f"Loading augmentation data for cache: {augment_dat}")
+            df_aug = load_augmenting_neb_data(
+                augment_dat,
+                augment_con,
+                ref_atoms=ref_atoms,
+                prod_atoms=prod_atoms,
+                y_data_column=y_data_column,
+                ira_kmax=ira_kmax,
+            )
+            if not df_aug.is_empty():
+                 all_dfs.append(df_aug)
+        # Synchronization check
+        paths_dat = all_dat_paths
+        paths_con = all_con_paths
+        if len(paths_dat) != len(paths_con):
+            log.warning(f"Mismatch: {len(paths_dat)} dat vs {len(paths_con)} con.")
+            min_len = min(len(paths_dat), len(paths_con))
+            paths_dat = paths_dat[:min_len]
+            paths_con = paths_con[:min_len]
+        for step_idx, (dat_file, con_file_step) in enumerate(
+            zip(paths_dat, paths_con, strict=True)
+        ):
+            try:
+                df_step = _process_single_path_step(
+                    dat_file,
+                    con_file_step,
+                    y_data_column,
+                    ira_instance,
+                    ira_kmax,
+                    step_idx,
+                )
+                all_dfs.append(df_step)
+            except Exception as e:
+                log.warning(f"Failed to process step {step_idx} ({dat_file.name}): {e}")
+                continue
+        if not all_dfs:
+            rerr = "No data could be aggregated."
+            raise RuntimeError(rerr)
+        return pl.concat(all_dfs)
+    return load_or_compute_data(
+        cache_file=cache_file,
+        force_recompute=force_recompute,
+        validation_check=validate_landscape_cache,
+        computation_callback=compute_landscape_data,
+        context_name="Landscape",
+    )
+def load_augmenting_neb_data(
+    dat_pattern: str,
+    con_pattern: str,
+    ref_atoms: Atoms,
+    prod_atoms: Atoms,
+    y_data_column: int,
+    ira_kmax: float,
+) -> pl.DataFrame:
+    """
+    Loads external NEB paths (dat+con) to augment the landscape fit.
+    Forces projection onto the MAIN path's R/P coordinates.
+    """
+    from chemparseplot.parse.file_ import find_file_paths
+    dat_paths = find_file_paths(dat_pattern)
+    con_paths = find_file_paths(con_pattern)
+    if not dat_paths or not con_paths:
+        log.warning("Augmentation patterns did not match files.")
+        return pl.DataFrame()
+    # Sync lengths
+    min_len = min(len(dat_paths), len(con_paths))
+    dat_paths = dat_paths[:min_len]
+    con_paths = con_paths[:min_len]
+    log.info(f"Augmenting with {min_len} external paths...")
+    all_dfs = []
+    ira_instance = ira_mod.IRA() if ira_mod else None
+    for i, (d, c) in enumerate(zip(dat_paths, con_paths)):
+        try:
+            # Step -1 indicates 'background/augmented' data
+            df = _process_single_path_step(
+                d,
+                c,
+                y_data_column,
+                ira_instance,
+                ira_kmax,
+                -1,
+                ref_atoms=ref_atoms,
+                prod_atoms=prod_atoms,
+            )
+            all_dfs.append(df)
+        except Exception as e:
+            log.warning(f"Failed to load augmentation pair {d.name}: {e}")
+    return pl.concat(all_dfs) if all_dfs else pl.DataFrame()
+def compute_profile_rmsd(
+    atoms_list: list[Atoms],
+    cache_file: Path | None,
+    force_recompute: bool,
+    ira_kmax: float,
+) -> pl.DataFrame:
+    """Computes RMSD for a 1D profile."""
+    def validate_profile_cache(df: pl.DataFrame):
+        if "p" in df.columns:
+            raise ValueError("Cache contains 'p' column (looks like landscape data).")
+        if df.height != len(atoms_list):
+            raise ValueError(
+                f"Size mismatch: {df.height} vs {len(atoms_list)} structures."
+            )
+    def compute_data() -> pl.DataFrame:
+        ira_instance = ira_mod.IRA() if ira_mod else None
+        r_vals = calculate_rmsd_from_ref(
+            atoms_list, ira_instance, ref_atom=atoms_list[0], ira_kmax=ira_kmax
+        )
+        return pl.DataFrame({"r": r_vals})
+    return load_or_compute_data(
+        cache_file=cache_file,
+        force_recompute=force_recompute,
+        validation_check=validate_profile_cache,
+        computation_callback=compute_data,
+        context_name="Profile RMSD",
+    )
+def estimate_rbf_smoothing(df: pl.DataFrame) -> float:
+    """
+    Estimates a smoothing parameter for RBF interpolation.
+    Calculates the median Euclidean distance between sequential points in the path
+    and uses that value as the smoothing factor.
+    """
+    # Calculate distances between sequential images (r, p) within each step
+    df_dist = (
+        df.sort(["step", "r"])
+        .with_columns(
+            dr=pl.col("r").diff().over("step"),
+            dp=pl.col("p").diff().over("step"),
+        )
+        .with_columns(dist=(pl.col("dr") ** 2 + pl.col("dp") ** 2).sqrt())
+        .drop_nulls()
+    )
+    global_median_step = df_dist["dist"].median()
+    if global_median_step is None or global_median_step == 0:
+        return 0.0
+    return global_median_step

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/eon/saddle_search.py RENAMED Viewed

@@ -10,7 +10,7 @@ from rgpycrumbs.parsers.bless import BLESS_LOG
 from rgpycrumbs.parsers.common import _NUM
 from rgpycrumbs.search.helpers import tail
-from chemparseplot.basetypes import DimerOpt, MolGeom, SaddleMeasure, SpinID
+from rgpycrumbs.basetypes import DimerOpt, MolGeom, SaddleMeasure, SpinID
 class EONSaddleStatus(Enum):
@@ -72,7 +72,7 @@ def _read_results_dat(eresp: Path) -> dict:
     """Reads and parses the results.dat file.
     Args:
-        eresp: Path to the EON results directory.
+        eresp: Path to the eOn results directory.
     Returns:
         A dictionary containing the parsed data from results.dat, or None if the file
@@ -103,10 +103,10 @@ def _read_results_dat(eresp: Path) -> dict:
 def _find_log_file(eresp: Path) -> Path | None:
-    """Finds the most recent, valid log file within the EON results directory.
+    """Finds the most recent, valid log file within the eOn results directory.
     Args:
-        eresp: Path to the EON results directory.
+        eresp: Path to the eOn results directory.
     Returns:
         Path to the chosen log file, or None if no suitable log file is found.
@@ -182,7 +182,7 @@ def _extract_saddle_info(
     Args:
         log_data: A list of strings representing the lines of the log file.
-        eresp: Path to the EON results directory.
+        eresp: Path to the eOn results directory.
         is_gprd: Boolean flag indicating whether the GPRD method was used.
     Returns:
@@ -206,12 +206,14 @@ def _extract_saddle_info(
         saddle_fmax = np.abs(np.max(saddle.forces))
     elif not is_gprd:
         try:
+            # Expected header: Step, Step Size, Delta E, ||Force||
+            # ||Force|| is the 5th element (index 4)
             saddle_fmax = float(
                 (eresp / "client_spdlog.log")
                 .read_text()
                 .strip()
                 .split("\n")[-5:][0]
-                .split()[5]
+                .split()[4]
             )
         except (FileNotFoundError, IndexError):
             saddle_fmax = 0.0
@@ -243,10 +245,10 @@ def _get_methods(eresp: Path) -> DimerOpt:
 def parse_eon_saddle(eresp: Path, rloc: "SpinID") -> "SaddleMeasure":
-    """Parses EON saddle point search results from a directory.
+    """Parses eOn saddle point search results from a directory.
     Args:
-        eresp: Path to the directory containing EON results.
+        eresp: Path to the directory containing eOn results.
         rloc: A SpinID object.
     Returns:

chemparseplot-1.0.1/chemparseplot/parse/file_.py ADDED Viewed

@@ -0,0 +1,12 @@
+import logging
+import glob
+from pathlib import Path
+log = logging.getLogger(__name__)
+def find_file_paths(file_pattern: str) -> list[Path]:
+    """Finds and sorts files matching a glob pattern."""
+    log.info(f"Searching for files with pattern: '{file_pattern}'")
+    file_paths = sorted(Path(p) for p in glob.glob(file_pattern))
+    log.info(f"Found {len(file_paths)} file(s).")
+    return file_paths

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/orca/neb/interp.py RENAMED Viewed

@@ -14,7 +14,7 @@ import re
 import chemparseplot.parse.converter as conv
 import chemparseplot.parse.patterns as pat
-from chemparseplot.basetypes import nebiter, nebpath
+from rgpycrumbs.basetypes import nebiter, nebpath
 from chemparseplot.units import Q_
 # fmt: off

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/sella/saddle_search.py RENAMED Viewed

@@ -5,7 +5,7 @@ from collections import Counter
 from ase.io import read
 from rgpycrumbs.time.helpers import one_day_tdelta
-from chemparseplot.basetypes import SaddleMeasure, SpinID
+from rgpycrumbs.basetypes import SaddleMeasure, SpinID
 import ase
 import datetime

{chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/plot/geomscan.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import matplotlib.pyplot as plt
-from chemparseplot.plot._aids import spline_interp
+from rgpycrumbs.interpolation import spline_interp
 from chemparseplot.plot.structs import BasePlotter

chemparseplot 0.0.3__tar.gz → 1.0.1__tar.gz

chemparseplot 0.0.3tar.gz → 1.0.1tar.gz