PyPI - chemparseplot - Versions diffs - 1.3.0__tar.gz → 1.4.0__tar.gz - Mend

chemparseplot 1.3.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{chemparseplot-1.3.0 → chemparseplot-1.4.0}/.gitignore RENAMED Viewed

@@ -171,3 +171,11 @@ cython_debug/
 /_version.py
 /.pdm-python
 *.ipynb
+# Dolt database files (added by bd init)
+.dolt/
+*.db
+# Lychee link checker cache
+.lycheecache
+.beads/

{chemparseplot-1.3.0 → chemparseplot-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chemparseplot
-Version: 1.3.0
+Version: 1.4.0
 Summary: Parsers and plotting tools for computational chemistry
 Project-URL: Documentation, https://chemparseplot.rgoswami.me
 Project-URL: Issues, https://github.com/HaoZeke/chemparseplot/issues

{chemparseplot-1.3.0 → chemparseplot-1.4.0}/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '1.3.0'
-__version_tuple__ = version_tuple = (1, 3, 0)
+__version__ = version = '1.4.0'
+__version_tuple__ = version_tuple = (1, 4, 0)
 __commit_id__ = commit_id = None

chemparseplot-1.4.0/chemparseplot/parse/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
+#
+# SPDX-License-Identifier: MIT
+from chemparseplot.parse import orca, patterns
+# Lazy imports for modules with optional heavy deps (h5py, pandas)
+# Import directly: from chemparseplot.parse.chemgp_hdf5 import read_h5_table
+# Or: from chemparseplot.parse import plumed

chemparseplot-1.4.0/chemparseplot/parse/chemgp_hdf5.py ADDED Viewed

@@ -0,0 +1,174 @@
+# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
+#
+# SPDX-License-Identifier: MIT
+"""HDF5 file I/O utilities for ChemGP data.
+This module provides functions for reading structured data from ChemGP HDF5
+output files. The HDF5 layout mirrors the Julia common_plot.jl helpers.
+HDF5 Layout
+-----------
+- ``grids/<name>``: 2D arrays with attrs x_range, y_range, x_length, y_length
+- ``table/<name>``: group of same-length 1D arrays
+- ``paths/<name>``: point sequences (x, y or rAB, rBC)
+- ``points/<name>``: point sets (x, y or pc1, pc2)
+- Root attrs: metadata scalars
+.. versionadded:: 1.7.0
+    Extracted from chemgp.plt_gp to standalone module.
+"""
+from typing import Any
+import numpy as np
+def read_h5_table(f: Any, name: str = "table") -> Any:
+    """Read a group of same-length vectors as a DataFrame.
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    name
+        Name of the table group (default: "table")
+    Returns
+    -------
+    DataFrame
+        DataFrame with columns from the HDF5 group
+    """
+    import pandas as pd
+    g = f[name]
+    cols = {}
+    for k in g.keys():
+        arr = g[k][()]
+        if arr.dtype.kind in {"S", "O"}:
+            cols[k] = arr.astype(str).tolist()
+        else:
+            cols[k] = arr.tolist()
+    return pd.DataFrame(cols)
+def read_h5_grid(
+    f: Any, name: str
+) -> tuple[np.ndarray, np.ndarray | None, np.ndarray | None]:
+    """Read a 2D grid with optional axis ranges.
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    name
+        Name of the grid dataset
+    Returns
+    -------
+    tuple
+        (data, x_coords, y_coords) where x_coords and y_coords may be None
+        if axis range attributes are not present
+    """
+    ds = f[f"grids/{name}"]
+    data = ds[()]
+    x_coords = None
+    y_coords = None
+    if "x_range" in ds.attrs and "x_length" in ds.attrs:
+        lo, hi = ds.attrs["x_range"]
+        n = int(ds.attrs["x_length"])
+        x_coords = np.linspace(lo, hi, n)
+    if "y_range" in ds.attrs and "y_length" in ds.attrs:
+        lo, hi = ds.attrs["y_range"]
+        n = int(ds.attrs["y_length"])
+        y_coords = np.linspace(lo, hi, n)
+    return data, x_coords, y_coords
+def read_h5_path(f: Any, name: str) -> dict[str, np.ndarray]:
+    """Read a path (ordered point sequence).
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    name
+        Name of the path dataset
+    Returns
+    -------
+    dict
+        Dictionary mapping coordinate names to arrays
+    """
+    g = f[f"paths/{name}"]
+    return {k: g[k][()] for k in g.keys()}
+def read_h5_points(f: Any, name: str) -> dict[str, np.ndarray]:
+    """Read a point set.
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    name
+        Name of the points dataset
+    Returns
+    -------
+    dict
+        Dictionary mapping coordinate names to arrays
+    """
+    g = f[f"points/{name}"]
+    return {k: g[k][()] for k in g.keys()}
+def read_h5_metadata(f: Any) -> dict[str, Any]:
+    """Read root-level metadata attributes.
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    Returns
+    -------
+    dict
+        Dictionary of metadata attributes
+    """
+    return {k: f.attrs[k] for k in f.attrs.keys()}
+def validate_hdf5_structure(
+    f: Any, required_groups: list[str] | None = None
+) -> list[str]:
+    """Validate HDF5 file has expected structure.
+    Parameters
+    ----------
+    f
+        Open HDF5 file object
+    required_groups
+        List of required group names (default: ["grids", "table"])
+    Returns
+    -------
+    list[str]
+        List of missing groups (empty if all present)
+    Raises
+    ------
+    ValueError
+        If required groups are missing
+    """
+    if required_groups is None:
+        required_groups = ["grids", "table"]
+    missing = [g for g in required_groups if g not in f]
+    if missing:
+        msg = f"Invalid HDF5 structure. Missing groups: {missing}"
+        raise ValueError(msg)
+    return missing

chemparseplot-1.4.0/chemparseplot/parse/chemgp_jsonl.py ADDED Viewed

@@ -0,0 +1,305 @@
+# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
+#
+# SPDX-License-Identifier: MIT
+"""Parsers for ChemGP JSONL output formats.
+ChemGP Rust examples produce JSONL files with method comparison data,
+GP quality grids, and RFF approximation benchmarks. This module provides
+structured parsing into typed containers for downstream plotting.
+.. versionadded:: 1.5.0
+"""
+from __future__ import annotations
+import json
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+@dataclass
+class OptimizerTrace:
+    """Single optimizer trace from a comparison JSONL.
+    Attributes
+    ----------
+    method : str
+        Optimizer name (e.g. ``"gp_minimize"``, ``"neb"``, ``"otgpd"``).
+    steps : list[int]
+        Step indices.
+    oracle_calls : list[int]
+        Cumulative oracle call counts.
+    energies : list[float] | None
+        Energy at each step (minimize, dimer).
+    forces : list[float] | None
+        Force norm at each step (dimer: ``force``, NEB: ``max_force``).
+    """
+    method: str
+    steps: list[int] = field(default_factory=list)
+    oracle_calls: list[int] = field(default_factory=list)
+    energies: list[float] | None = None
+    forces: list[float] | None = None
+@dataclass
+class ComparisonData:
+    """Parsed optimizer comparison from a single JSONL file.
+    Attributes
+    ----------
+    traces : dict[str, OptimizerTrace]
+        Keyed by method name.
+    summary : dict | None
+        Summary record if present.
+    """
+    traces: dict[str, OptimizerTrace] = field(default_factory=dict)
+    summary: dict[str, Any] | None = None
+def parse_comparison_jsonl(path: str | Path) -> ComparisonData:
+    """Parse a ChemGP optimizer comparison JSONL file.
+    Handles minimize, dimer, and NEB comparison formats. Each line is a
+    JSON object with a ``method`` field (or ``summary: true``).
+    Parameters
+    ----------
+    path
+        Path to the JSONL file.
+    Returns
+    -------
+    ComparisonData
+        Parsed traces keyed by method name.
+    """
+    data = ComparisonData()
+    with open(path) as f:
+        for line in f:
+            rec = json.loads(line.strip())
+            if rec.get("summary"):
+                data.summary = rec
+                continue
+            method = rec["method"]
+            if method not in data.traces:
+                data.traces[method] = OptimizerTrace(method=method)
+            trace = data.traces[method]
+            trace.steps.append(rec.get("step", len(trace.steps)))
+            trace.oracle_calls.append(rec["oracle_calls"])
+            if "energy" in rec:
+                if trace.energies is None:
+                    trace.energies = []
+                trace.energies.append(rec["energy"])
+            force_key = "force" if "force" in rec else "max_force"
+            if force_key in rec:
+                if trace.forces is None:
+                    trace.forces = []
+                trace.forces.append(rec[force_key])
+    return data
+@dataclass
+class RFFQualityData:
+    """Parsed RFF approximation quality data.
+    Attributes
+    ----------
+    exact_energy_mae : float
+        Exact GP energy MAE vs true surface.
+    exact_gradient_mae : float
+        Exact GP gradient MAE vs true surface.
+    d_rff_values : list[int]
+        RFF feature counts tested.
+    energy_mae_vs_true : list[float]
+        RFF energy MAE vs true surface.
+    gradient_mae_vs_true : list[float]
+        RFF gradient MAE vs true surface.
+    energy_mae_vs_gp : list[float]
+        RFF energy MAE vs exact GP.
+    gradient_mae_vs_gp : list[float]
+        RFF gradient MAE vs exact GP.
+    """
+    exact_energy_mae: float = 0.0
+    exact_gradient_mae: float = 0.0
+    d_rff_values: list[int] = field(default_factory=list)
+    energy_mae_vs_true: list[float] = field(default_factory=list)
+    gradient_mae_vs_true: list[float] = field(default_factory=list)
+    energy_mae_vs_gp: list[float] = field(default_factory=list)
+    gradient_mae_vs_gp: list[float] = field(default_factory=list)
+def parse_rff_quality_jsonl(path: str | Path) -> RFFQualityData:
+    """Parse a ChemGP RFF quality JSONL file.
+    Parameters
+    ----------
+    path
+        Path to the JSONL file.
+    Returns
+    -------
+    RFFQualityData
+        Parsed exact GP and RFF metrics.
+    """
+    data = RFFQualityData()
+    with open(path) as f:
+        for line in f:
+            rec = json.loads(line.strip())
+            if rec["type"] == "exact_gp":
+                data.exact_energy_mae = rec["energy_mae"]
+                data.exact_gradient_mae = rec["gradient_mae"]
+            elif rec["type"] == "rff":
+                data.d_rff_values.append(rec["d_rff"])
+                data.energy_mae_vs_true.append(rec["energy_mae_vs_true"])
+                data.gradient_mae_vs_true.append(rec["gradient_mae_vs_true"])
+                data.energy_mae_vs_gp.append(rec["energy_mae_vs_gp"])
+                data.gradient_mae_vs_gp.append(rec["gradient_mae_vs_gp"])
+    return data
+@dataclass
+class GPQualityGrid:
+    """GP quality grid data for a single training set size.
+    Attributes
+    ----------
+    n_train : int
+        Number of training points.
+    nx : int
+        Grid x resolution.
+    ny : int
+        Grid y resolution.
+    x : list[list[float]]
+        Grid x coordinates (ny x nx).
+    y : list[list[float]]
+        Grid y coordinates (ny x nx).
+    true_e : list[list[float]]
+        True energy on grid.
+    gp_e : list[list[float]]
+        GP predicted energy on grid.
+    gp_var : list[list[float]]
+        GP variance on grid.
+    train_x : list[float]
+        Training point x coordinates.
+    train_y : list[float]
+        Training point y coordinates.
+    train_e : list[float]
+        Training point energies.
+    """
+    n_train: int = 0
+    nx: int = 0
+    ny: int = 0
+    x: list[list[float]] = field(default_factory=list)
+    y: list[list[float]] = field(default_factory=list)
+    true_e: list[list[float]] = field(default_factory=list)
+    gp_e: list[list[float]] = field(default_factory=list)
+    gp_var: list[list[float]] = field(default_factory=list)
+    train_x: list[float] = field(default_factory=list)
+    train_y: list[float] = field(default_factory=list)
+    train_e: list[float] = field(default_factory=list)
+@dataclass
+class StationaryPoint:
+    """A stationary point (minimum or saddle) on the PES."""
+    kind: str  # "minimum" or "saddle"
+    id: int
+    x: float
+    y: float
+    energy: float
+@dataclass
+class GPQualityData:
+    """Complete GP quality data from mb_gp_quality.jsonl.
+    Attributes
+    ----------
+    meta : dict
+        Grid metadata (nx, ny, x_min, x_max, y_min, y_max).
+    stationary : list[StationaryPoint]
+        Minima and saddle points.
+    grids : dict[int, GPQualityGrid]
+        Grid data keyed by n_train.
+    """
+    meta: dict[str, Any] = field(default_factory=dict)
+    stationary: list[StationaryPoint] = field(default_factory=list)
+    grids: dict[int, GPQualityGrid] = field(default_factory=dict)
+def parse_gp_quality_jsonl(path: str | Path) -> GPQualityData:
+    """Parse a ChemGP GP quality JSONL file.
+    Parameters
+    ----------
+    path
+        Path to the JSONL file (e.g. ``mb_gp_quality.jsonl``).
+    Returns
+    -------
+    GPQualityData
+        Structured grid data with metadata and stationary points.
+    """
+    data = GPQualityData()
+    train_points = defaultdict(lambda: {"x": [], "y": [], "e": []})
+    grid_records = defaultdict(list)
+    with open(path) as f:
+        for line in f:
+            rec = json.loads(line.strip())
+            t = rec["type"]
+            if t == "grid_meta":
+                data.meta = rec
+            elif t in ("minimum", "saddle"):
+                data.stationary.append(
+                    StationaryPoint(
+                        kind=t,
+                        id=rec["id"],
+                        x=rec["x"],
+                        y=rec["y"],
+                        energy=rec["energy"],
+                    )
+                )
+            elif t == "train_point":
+                n = rec["n_train"]
+                train_points[n]["x"].append(rec["x"])
+                train_points[n]["y"].append(rec["y"])
+                train_points[n]["e"].append(rec["energy"])
+            elif t == "grid":
+                grid_records[rec["n_train"]].append(rec)
+    nx = data.meta.get("nx", 0)
+    ny = data.meta.get("ny", 0)
+    for n_train, records in grid_records.items():
+        grid = GPQualityGrid(n_train=n_train, nx=nx, ny=ny)
+        # Initialize 2D arrays
+        grid.x = [[0.0] * nx for _ in range(ny)]
+        grid.y = [[0.0] * nx for _ in range(ny)]
+        grid.true_e = [[0.0] * nx for _ in range(ny)]
+        grid.gp_e = [[0.0] * nx for _ in range(ny)]
+        grid.gp_var = [[0.0] * nx for _ in range(ny)]
+        for rec in records:
+            ix, iy = rec["ix"], rec["iy"]
+            grid.x[iy][ix] = rec["x"]
+            grid.y[iy][ix] = rec["y"]
+            grid.true_e[iy][ix] = rec["true_e"]
+            grid.gp_e[iy][ix] = rec["gp_e"]
+            grid.gp_var[iy][ix] = rec["gp_var"]
+        tp = train_points.get(n_train, {"x": [], "y": [], "e": []})
+        grid.train_x = tp["x"]
+        grid.train_y = tp["y"]
+        grid.train_e = tp["e"]
+        data.grids[n_train] = grid
+    return data

{chemparseplot-1.3.0 → chemparseplot-1.4.0}/chemparseplot/parse/eon/neb.py RENAMED Viewed

@@ -282,7 +282,7 @@ def load_augmenting_neb_data(
     ```{versionadded} 0.1.0
     ```
     """
-    from chemparseplot.parse.file_ import find_file_paths  # noqa: PLC0415
+    from chemparseplot.parse.file_ import find_file_paths
     dat_paths = find_file_paths(dat_pattern)
     con_paths = find_file_paths(con_pattern)

{chemparseplot-1.3.0 → chemparseplot-1.4.0}/chemparseplot/parse/neb_utils.py RENAMED Viewed

@@ -32,9 +32,9 @@ def calculate_landscape_coords(
     :param ira_kmax: kmax factor for IRA.
     :return: A tuple of (rmsd_r, rmsd_p) arrays.
     """
-    from concurrent.futures import ThreadPoolExecutor  # noqa: PLC0415
+    from concurrent.futures import ThreadPoolExecutor
-    from rgpycrumbs.geom.api.alignment import calculate_rmsd_from_ref  # noqa: PLC0415
+    from rgpycrumbs.geom.api.alignment import calculate_rmsd_from_ref
     log.info("Calculating landscape coordinates (RMSD-R, RMSD-P)...")
     with ThreadPoolExecutor(max_workers=2) as pool:

chemparseplot-1.4.0/chemparseplot/parse/orca/neb/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
+#
+# SPDX-License-Identifier: MIT
+"""ORCA NEB parsing utilities.
+Supports both:
+- OPI (ORCA Python Interface) for ORCA 6.1+ JSON output
+- Legacy regex parsing for older ORCA versions
+"""
+from chemparseplot.parse.orca.neb.interp import extract_interp_points
+from chemparseplot.parse.orca.neb.opi_parser import (
+    HAS_OPI,
+    parse_orca_neb,
+    parse_orca_neb_fallback,
+)
+__all__ = [
+    "HAS_OPI",
+    "extract_interp_points",
+    "parse_orca_neb",
+    "parse_orca_neb_fallback",
+]

chemparseplot 1.3.0__tar.gz → 1.4.0__tar.gz

chemparseplot 1.3.0tar.gz → 1.4.0tar.gz