PyPI - off - Versions diffs - 0.1.0__py3-none-any.whl - Mend

off 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

off/__init__.py +23 -0
off/atom_energies.py +151 -0
off/config/_config.py +108 -0
off/dft_distrax/__init__.py +27 -0
off/dft_distrax/dft_distrax.py +216 -0
off/flow/__init__.py +29 -0
off/flow/equiv_flows.py +99 -0
off/functionals/__init__.py +35 -0
off/functionals/core_correction.py +84 -0
off/functionals/exchange_correlation.py +174 -0
off/functionals/external.py +49 -0
off/functionals/functional.py +129 -0
off/functionals/hartree.py +62 -0
off/functionals/kinetic.py +87 -0
off/main.py +172 -0
off/ode_solver/__init__.py +32 -0
off/ode_solver/eqx_ode.py +76 -0
off/plot_binding_csv.py +63 -0
off/plot_pes_ema.py +259 -0
off/plot_pes_mpl.py +280 -0
off/promolecular/__init__.py +27 -0
off/promolecular/promolecular_dist.py +465 -0
off/quadrature.py +261 -0
off/quadrature_scan.py +188 -0
off/scan_pes.py +133 -0
off/test_fwd_rev.py +290 -0
off/train/__init__.py +44 -0
off/train/loop.py +228 -0
off/train/loss.py +149 -0
off/train/utils.py +38 -0
off/utils.py +618 -0
off-0.1.0.dist-info/METADATA +154 -0
off-0.1.0.dist-info/RECORD +37 -0
off-0.1.0.dist-info/WHEEL +5 -0
off-0.1.0.dist-info/entry_points.txt +3 -0
off-0.1.0.dist-info/licenses/LICENSE +21 -0
off-0.1.0.dist-info/top_level.txt +1 -0

off/quadrature.py ADDED Viewed

@@ -0,0 +1,261 @@
+import glob
+import json
+import re
+from pathlib import Path
+import numpy as np
+import jax
+import jax.numpy as jnp
+import jax.random as jrnd
+import equinox as eqx
+from pyscf import gto, dft
+jax.config.update("jax_enable_x64", True)
+from .flow.equiv_flows import CNF
+from .ode_solver.eqx_ode import fwd_ode, rev_ode
+from .utils import one_hot_encode, coordinates, get_solver
+from .promolecular.promolecular_dist import ProMolecularDensity, AtomDBDistribution
+from .train.loss import build_energy_functional
+AA_TO_BOHR = 1.8897259886
+# ── model / prior loading ─────────────────────────────────────────────────────
+def last_checkpoint(results_dir):
+    """(path, epoch) of the highest-epoch checkpoint in results_dir/Checkpoints/."""
+    ckpts = glob.glob(str(Path(results_dir) / "Checkpoints" / "checkpoint_*.eqx"))
+    if not ckpts:
+        raise FileNotFoundError(f"No checkpoints in {results_dir}/Checkpoints/")
+    ckpts.sort(key=lambda p: int(re.search(r'checkpoint_(\d+)\.eqx', p).group(1)))
+    last = ckpts[-1]
+    return last, int(re.search(r'checkpoint_(\d+)\.eqx', last).group(1))
+def load_model(results_dir, p):
+    """Rebuild the CNF for job_params `p` and load its last checkpoint."""
+    Ne, atoms, z, coords = coordinates(p['mol_name'], p['bond_length'])
+    model = CNF(din=3, dim=p['hidden_layer'], mu=coords,
+                one_hot=one_hot_encode(z), key=jrnd.PRNGKey(0))
+    ckpt, epoch = last_checkpoint(results_dir)
+    model = eqx.tree_deserialise_leaves(ckpt, model)
+    return model, get_solver(p['solver']), Ne, atoms, z, coords, epoch
+def build_prior(p, z, coords, Ne):
+    """Rebuild the base distribution used at training time (must match it)."""
+    if p.get('prior') == 'db_sir':
+        from atomdb import make_promolecule
+        db_prior = make_promolecule(atnums=z, coords=coords, dataset="hci")
+        return AtomDBDistribution(db_prior=db_prior, z=z, coords=coords, Ne=Ne)
+    return ProMolecularDensity(z.ravel(), coords)
+# ── grid construction (PySCF) ─────────────────────────────────────────────────
+def _grids_from_mol(mol, level):
+    """Build a PySCF Becke grid for `mol`; return (coords, weights) in Bohr."""
+    grid = dft.gen_grid.Grids(mol)
+    grid.level = level
+    grid.build()
+    return (jnp.asarray(grid.coords,  dtype=jnp.float64),
+            jnp.asarray(grid.weights, dtype=jnp.float64))
+def build_grid(atoms, coords, Ne, grid_level=3, basis="6-31G(d,p)", unit="B"):
+    """PySCF molecular quadrature grid.  `coords` are interpreted in `unit`
+    ('B'/'Bohr' or 'Angstrom'); the returned grid coords/weights are in Bohr."""
+    atom_str = "; ".join(f"{a} {c[0]:.10f} {c[1]:.10f} {c[2]:.10f}"
+                         for a, c in zip(atoms, np.asarray(coords)))
+    mol = gto.M(atom=atom_str, basis=basis, unit=unit, verbose=0, spin=int(Ne) % 2)
+    return _grids_from_mol(mol, grid_level)
+def get_grid(geom, level=3, *, units="angstrom", basis="6-31G(d,p)", spin=0):
+    """User-facing quadrature grid.
+    Parameters
+    ----------
+    geom : str
+        Geometry in PySCF's ``atom=`` format — e.g. ``"H 0 0 0; H 0 0 0.74"``
+        or a multi-line XYZ-style block.
+    level : int
+        PySCF grid level (the "grid size");
+    units : {'angstrom', 'bohr'}
+        Units the geometry is given in (PySCF's default is angstrom).
+    basis, spin :
+        Forwarded to ``pyscf.gto.M``.  The basis only sets the atom-centred
+        grid partitioning; it does not affect the flow density.
+    Returns
+    -------
+    (weights, coords) :
+        Note the order, to match the listing ``w_grid, x_grid = get_grid(...)``;
+        the internal :func:`build_grid` returns the opposite ``(coords, weights)``.
+    """
+    unit = "Bohr" if str(units).lower().startswith("b") else "Angstrom"
+    mol = gto.M(atom=geom, basis=basis, unit=unit, spin=spin, verbose=0)
+    coords, weights = _grids_from_mol(mol, level)
+    return weights, coords
+getGrid = get_grid
+def rho_on_grid(model, solver, prior, grid_coords, chunk=256):
+    """Evaluate (positions, ρ_φ, score = ∇log ρ_φ) at the grid points."""
+    x_l, rho_l, sc_l = [], [], []
+    for i in range(0, grid_coords.shape[0], chunk):
+        xc = grid_coords[i:i+chunk]; n = xc.shape[0]
+        st_r = jnp.concatenate([xc, jnp.zeros((n, 1)), jnp.zeros((n, 3))], axis=1)
+        zb, _ = rev_ode(model, st_r, solver)
+        lp0 = prior.log_prob(zb)
+        sc0 = prior.score(zb)
+        xt, lpt, sct = fwd_ode(model, jnp.concatenate([zb, lp0, sc0], axis=1), solver)
+        x_l.append(np.array(xt))
+        rho_l.append(np.array(jnp.exp(lpt)).ravel())
+        sc_l.append(np.array(sct))
+    return np.concatenate(x_l), np.concatenate(rho_l), np.concatenate(sc_l)
+def quadrature_energy(functional, x_np, rho_np, sc_np, grid_coords, grid_weights,
+                      mol_dict, Ne, chunk=256):
+    """Integrate every energy term on the grid.
+    The local terms use ``functional``'s component functionals; the Hartree term
+    is the grid double sum (true 1/r), not the functional's MC pairwise estimator.
+    """
+    w  = np.array(grid_weights)
+    G  = rho_np.shape[0]
+    rc = rho_np[:, None]
+    measure = jnp.asarray(w * rho_np)
+    def _args(sl):
+        return (jnp.array(rc[sl]), jnp.array(sc_np[sl]), jnp.array(x_np[sl]),
+                Ne, mol_dict, None)
+    def local(func):                         # ∫ f(...)·ρ dr  via the shared _integrate
+        out = np.zeros(G)
+        for i in range(0, G, chunk):
+            sl = slice(i, min(i + chunk, G))
+            out[sl] = np.array(func(*_args(sl))).ravel()
+        return float(functional._integrate(jnp.asarray(out), measure))
+    T    = local(functional.kinetic)
+    E_X  = local(functional.exchange)
+    E_C  = local(functional.correlation) if functional.correlation is not None else 0.0
+    V_N  = local(functional.external)
+    E_CC = local(functional.core_correction) if functional.core_correction is not None else 0.0
+    # Hartree — grid double sum
+    gc = np.array(grid_coords)
+    vc = np.zeros(G)
+    for i in range(0, G, chunk):
+        xi = gc[i:i+chunk]
+        r2 = np.sum((gc[None, :, :] - xi[:, None, :]) ** 2, axis=-1)
+        vc[i:i+chunk] = np.dot(1. / np.sqrt(np.where(r2 == 0., np.inf, r2)), w * rho_np)
+    V_H = float(0.5 * Ne ** 2 * functional._integrate(jnp.asarray(vc), measure))
+    # Nuclear repulsion
+    cn = np.array(mol_dict['coords']); zn = np.array(mol_dict['z']).ravel()
+    E_NN = sum(zn[I] * zn[J] / float(np.linalg.norm(cn[I] - cn[J]))
+               for I in range(len(cn)) for J in range(I + 1, len(cn)))
+    return dict(T=T, V_N=V_N, V_H=V_H, E_X=E_X, E_C=E_C, E_CC=E_CC, E_NN=E_NN,
+                E_total=T + V_N + V_H + E_X + E_C + E_CC + E_NN)
+# ── high-level entry points ───────────────────────────────────────────────────
+def grid_energy(model, prior, solver, coords, z, atoms, Ne, functional, *,
+                grid_level=3, units="bohr", basis="6-31G(d,p)", chunk=256):
+    """Build the grid, evaluate ρ_φ, and integrate all energy terms.
+    Parameters
+    ----------
+    model, prior, solver : the trained CNF, its base distribution, ODE solver.
+    coords, z, atoms, Ne : molecular geometry / charges / electron count.
+    functional           : an EnergyFunctional (e.g. from build_energy_functional).
+    grid_level           : PySCF grid level (the "grid size").
+    units                : 'bohr' or 'angstrom' — how `coords` are given; the flow
+                           works in Bohr, so 'angstrom' inputs are converted.
+    Returns the energy dict plus 'Ne_integral'.
+    """
+    coords = np.asarray(coords, dtype=float)
+    unit = "Bohr" if str(units).lower().startswith("b") else "Angstrom"
+    coords_bohr = coords if unit == "Bohr" else coords * AA_TO_BOHR
+    gc, gw = build_grid(atoms, coords, Ne, grid_level=grid_level, basis=basis, unit=unit)
+    x_np, rho_np, sc_np = rho_on_grid(model, solver, prior, gc, chunk=chunk)
+    mol_dict = {'coords': jnp.asarray(coords_bohr, dtype=jnp.float64), 'z': jnp.asarray(z)}
+    en = quadrature_energy(functional, x_np, rho_np, sc_np, gc, gw, mol_dict, Ne, chunk=chunk)
+    en['Ne_integral'] = float(np.dot(np.array(gw), Ne * rho_np))
+    return en
+def grid_energy_from_checkpoint(results_dir, *, grid_level=3, basis="6-31G(d,p)",
+                                units="bohr", chunk=256, recompute=False, cache=True):
+    """One call from a trained run directory: read job_params.json (functional +
+    geometry), load the last checkpoint, and integrate.  The geometry comes from
+    ``coordinates`` (always Bohr), so no units flag is needed here.
+    The result is cached in ``results_dir/energy_summary.json`` — pass
+    ``recompute=True`` to ignore an existing cache, or ``cache=False`` to skip
+    reading/writing it.
+    """
+    results_dir = Path(results_dir)
+    summary_path = results_dir / "energy_summary.json"
+    if cache and not recompute and summary_path.exists():
+        with open(summary_path) as f:
+            return json.load(f)
+    with open(results_dir / "job_params.json") as f:
+        p = json.load(f)
+    model, solver, Ne, atoms, z, coords, epoch = load_model(results_dir, p)
+    prior = build_prior(p, z, coords, Ne)
+    functional = build_energy_functional(
+        kinetic_name=p['kinetic'], lam=p['lam'], exchange_name=p['exchange'],
+        correlation_name=p['correlation'], hartree_name=p['hartree'],
+        external_name=p['external'], core_correction_name=p['core_correction'],
+    )
+    en = grid_energy(model, prior, solver, coords, z, atoms, Ne, functional,
+                     grid_level=grid_level, units=units, basis=basis, chunk=chunk)
+    en.update(epoch=epoch, mol_name=p['mol_name'], bond_length=p['bond_length'])
+    if cache:
+        with open(summary_path, "w") as f:
+            json.dump(en, f, indent=4)
+    return en
+def _print_energy(results_dir, en):
+    print(f"\n{results_dir}")
+    print(f"  mol={en.get('mol_name')}  R={en.get('bond_length')}  epoch={en.get('epoch')}")
+    print("  " + "-" * 36)
+    for k in ("T", "V_N", "V_H", "E_X", "E_C", "E_CC", "E_NN"):
+        print(f"  {k:8s} = {en[k]:+.6f} Ha")
+    print("  " + "-" * 36)
+    print(f"  {'E_total':8s} = {en['E_total']:+.6f} Ha")
+    print(f"  {'N_e':8s} = {en['Ne_integral']:.4f}   (∫ρ, should be Ne)")
+def main():
+    import argparse
+    ap = argparse.ArgumentParser(
+        description="Grid (quadrature) energy of a trained OFF run directory.")
+    ap.add_argument("results_dir", nargs="+",
+                    help="bl_* run dir(s) with job_params.json and Checkpoints/ "
+                         "(shell globs like Results/H2/<method>/bl_* are fine)")
+    ap.add_argument("--grid_level", type=int, default=1, help="PySCF grid level")
+    ap.add_argument("--bs", type=int, default=256, help="grid chunk size")
+    ap.add_argument("--basis", type=str, default="6-31G(d,p)",
+                    help="PySCF basis (sets grid partitioning only)")
+    ap.add_argument("--recompute", action="store_true",
+                    help="ignore cached energy_summary.json and recompute")
+    args = ap.parse_args()
+    for rd in args.results_dir:
+        en = grid_energy_from_checkpoint(
+            rd, grid_level=args.grid_level, basis=args.basis,
+            chunk=args.bs, recompute=args.recompute)
+        _print_energy(rd, en)
+if __name__ == "__main__":
+    main()

off/quadrature_scan.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""
+Grid-quadrature total energy for every bond length of a molecule.
+Thin CLI around ``of_flows/quadrature.py``: it walks every method directory and
+every bl_* subdirectory under Results/{mol}/, calls
+``quadrature.grid_energy_from_checkpoint`` on each (which builds the PySCF grid,
+evaluates ρ_φ via the flow, and integrates all energy terms), and writes one CSV
+per molecule.  For molecules it also integrates the constituent single atoms
+under the same method tag and reports the binding energy.
+Directory layout assumed (same as main.py):
+    Results/{mol}/{method}/bl_X.XXXX/
+        Checkpoints/checkpoint_*.eqx
+        job_params.json
+    Results/{atom}/{method}/bl_0.0000/   (binding reference)
+Results/ is located next to this script, so it runs from anywhere.
+Usage
+-----
+python quadrature_scan.py --H2
+python quadrature_scan.py --H2 --N2
+python quadrature_scan.py --mol H2 H10
+python quadrature_scan.py --H10 --recompute
+Output (one CSV per molecule, under Results/{mol}/):
+    Results/{mol}/quadrature_{mol}.csv
+      columns: method, R_bohr, epoch, E_total, T, V_N, V_H, E_X, E_C, E_CC,
+               E_NN, Ne_int, E_atoms, dE_bind_Ha
+"""
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import gc
+import re
+import argparse
+from pathlib import Path
+import jax
+import pandas as pd
+from quadrature import grid_energy_from_checkpoint
+_SCRIPT_DIR = Path(__file__).resolve().parent
+KNOWN_MOLS = ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne",
+              "H2", "N2", "O2", "F2", "HF", "CO", "LiH", "H10"]
+SINGLE_ATOMS = {"H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne"}
+# ── CLI ───────────────────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser(
+    description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter,
+    allow_abbrev=False)  # so --H is not treated as a prefix of --H2 / --H10
+for _m in KNOWN_MOLS:
+    parser.add_argument(f"--{_m}", action="store_true", help=f"Scan molecule {_m}")
+parser.add_argument("--mol", type=str, nargs="+", default=[], metavar="NAME",
+                    help="Molecule name(s) to scan (alternative to the flags)")
+parser.add_argument("--results_root", type=str, default=None,
+                    help="Override Results root (default: <script_dir>/Results)")
+parser.add_argument("--bs", type=int, default=256, help="Grid chunk size")
+parser.add_argument("--grid_level", type=int, default=3, help="PySCF grid level")
+parser.add_argument("--recompute", action="store_true",
+                    help="Re-run grid integration even if energy_summary.json is cached")
+parser.add_argument("--out", type=str, default=None,
+                    help="Output CSV path (default: Results/{mol}/quadrature_{mol}.csv)")
+args = parser.parse_args()
+selected = list(args.mol) + [m for m in KNOWN_MOLS if getattr(args, m)]
+selected = list(dict.fromkeys(selected))
+if not selected:
+    parser.error("No molecule selected. Use a flag (e.g. --H2) or --mol H2 [...].")
+root = Path(args.results_root).resolve() if args.results_root else (_SCRIPT_DIR / "Results")
+print(f"Results root : {root}\n")
+def constituents(mol: str) -> dict:
+    """{element: count} from a formula, e.g. N2->{N:2}, HF->{H:1,F:1}, H10->{H:10}."""
+    out = {}
+    for el, n in re.findall(r"([A-Z][a-z]?)(\d*)", mol):
+        if el:
+            out[el] = out.get(el, 0) + (int(n) if n else 1)
+    return out
+def atom_reference(method_name: str, mol: str):
+    """Grid energy reference Σ_atoms count·E(atom) under the same method tag.
+    Returns (E_atoms, {element: E_atom}) or (None, None) if any atom is missing."""
+    total = 0.0
+    detail = {}
+    for el, n in constituents(mol).items():
+        adir = root / el / method_name / "bl_0.0000"
+        if not (adir / "job_params.json").exists():
+            print(f"  atom reference: {el} not found at {adir} — binding skipped")
+            return None, None
+        try:
+            data = grid_energy_from_checkpoint(
+                adir, grid_level=args.grid_level, chunk=args.bs, recompute=args.recompute)
+        except Exception as e:
+            print(f"  atom reference: {el} FAILED — {e}")
+            return None, None
+        detail[el] = data['E_total']
+        total += n * data['E_total']
+    return total, detail
+def scan_molecule(mol: str):
+    mol_dir = root / mol
+    if not mol_dir.is_dir():
+        print(f"[{mol}] SKIP — {mol_dir} not found\n")
+        return
+    is_atom = mol in SINGLE_ATOMS
+    rows = []
+    for method_dir in sorted(d for d in mol_dir.iterdir() if d.is_dir()):
+        bl_dirs = sorted(method_dir.glob("bl_*"),
+                         key=lambda d: float(d.name.split("_")[1]))
+        if not bl_dirs:
+            continue
+        print(f"[{mol}] method: {method_dir.name}  ({len(bl_dirs)} bond lengths)")
+        # Single-atom reference for the binding energy (same method tag).
+        E_atoms = None
+        if not is_atom:
+            E_atoms, detail = atom_reference(method_dir.name, mol)
+            if E_atoms is not None:
+                ref = "  ".join(f"{n}*E({el})={detail[el]:+.6f}"
+                                for el, n in constituents(mol).items())
+                print(f"  atom reference (grid): {ref}  ->  Σ = {E_atoms:+.6f} Ha")
+        for bl_dir in bl_dirs:
+            if not (bl_dir / "job_params.json").exists():
+                print(f"  {bl_dir.name}: missing job_params.json — skipping")
+                continue
+            try:
+                data = grid_energy_from_checkpoint(
+                    bl_dir, grid_level=args.grid_level, chunk=args.bs,
+                    recompute=args.recompute)
+            except Exception as e:
+                print(f"  {bl_dir.name}: FAILED — {e}")
+                continue
+            row = {
+                "method":  method_dir.name,
+                "R_bohr":  data['bond_length'],
+                "epoch":   data.get('epoch', '?'),
+                "E_total": data['E_total'],
+                "T":       data['T'],
+                "V_N":     data['V_N'],
+                "V_H":     data['V_H'],
+                "E_X":     data['E_X'],
+                "E_C":     data.get('E_C', 0.0),
+                "E_CC":    data.get('E_CC', 0.0),
+                "E_NN":    data['E_NN'],
+                "Ne_int":  data['Ne_integral'],
+            }
+            if E_atoms is not None:
+                row["E_atoms"]    = E_atoms
+                row["dE_bind_Ha"] = E_atoms - data['E_total']   # ΔE = ΣE(atom) - E(mol)
+            rows.append(row)
+            msg = (f"    R={data['bond_length']:.4f} Bohr  epoch={data.get('epoch','?'):>6}"
+                   f"  E_total={data['E_total']:+.6f} Ha")
+            if E_atoms is not None:
+                msg += f"  ΔE={E_atoms - data['E_total']:+.6f} Ha"
+            print(msg)
+            jax.clear_caches()
+            gc.collect()
+        print()
+    if not rows:
+        print(f"[{mol}] nothing to write (no checkpoints found)\n")
+        return
+    df = (pd.DataFrame(rows)
+            .sort_values(["method", "R_bohr"])
+            .reset_index(drop=True))
+    out_path = (Path(args.out).resolve() if args.out
+                else mol_dir / f"quadrature_{mol}.csv")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    df.to_csv(out_path, index=False, float_format="%.8f")
+    print("=" * 96)
+    print(df.to_string(index=False))
+    print("=" * 96)
+    print(f"[{mol}] saved → {out_path}\n")
+for mol in selected:
+    scan_molecule(mol)

off/scan_pes.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""
+Potential Energy Surface scan over a set of bond-length result directories.
+Thin CLI around ``of_flows/quadrature.py``: grid-integrates every bl_* directory
+under --scan_dir (via ``grid_energy_from_checkpoint``) and, optionally, an atom
+reference for the binding energy, then writes pes.csv and a plot.
+Usage
+-----
+  python scan_pes.py \
+      --scan_dir  Results/H2/<method> \
+      --atom_dir  Results/H/<method>/bl_0.0000
+Outputs (written inside --scan_dir):
+  pes.csv          — R, E_total, T, V_N, V_H, E_X, E_NN, E_bind, D_e  (Ha / eV)
+  pes.png / .svg   — PES curve (E_total and D_e vs R)
+"""
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import argparse
+from pathlib import Path
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import pandas as pd
+from quadrature import grid_energy_from_checkpoint
+# ── CLI ───────────────────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser()
+parser.add_argument("--scan_dir",  type=str, required=True,
+                    help="Method directory containing bl_X.XXXX subdirectories")
+parser.add_argument("--atom_dir",  type=str, default=None,
+                    help="bl_0.0000 directory for the atom (binding-energy reference)")
+parser.add_argument("--bs",        type=int, default=256,  help="Grid chunk size")
+parser.add_argument("--grid_level",type=int, default=3,    help="PySCF grid level")
+parser.add_argument("--recompute", action="store_true",
+                    help="Re-run integration even if energy_summary.json already exists")
+args = parser.parse_args()
+def analyse(results_dir):
+    return grid_energy_from_checkpoint(
+        Path(results_dir).resolve(), grid_level=args.grid_level,
+        chunk=args.bs, recompute=args.recompute)
+# ── Scan over all bl_* directories ───────────────────────────────────────────
+scan_dir = Path(args.scan_dir).resolve()
+bl_dirs  = sorted(scan_dir.glob("bl_*"),
+                  key=lambda d: float(d.name.split("_")[1]))
+if not bl_dirs:
+    raise FileNotFoundError(f"No bl_* directories found in {scan_dir}")
+print(f"\nFound {len(bl_dirs)} bond-length directories in:\n  {scan_dir}\n")
+# Optional atom reference (binding uses 2*E_atom — homonuclear diatomic)
+E_atom = None
+if args.atom_dir is not None:
+    print("atom reference:")
+    E_atom = analyse(args.atom_dir)['E_total']
+    print(f"  E(atom) = {E_atom:+.6f} Ha\n")
+# Main scan
+rows = []
+for bl_dir in bl_dirs:
+    if not (bl_dir / "job_params.json").exists():
+        print(f"  {bl_dir.name}: missing job_params.json — skipping")
+        continue
+    data = analyse(bl_dir)
+    R = data['bond_length']
+    row = {'R_bohr':  R,
+           'epoch':   data.get('epoch', '?'),
+           'E_total': data['E_total'],
+           'T':       data['T'],
+           'V_N':     data['V_N'],
+           'V_H':     data['V_H'],
+           'E_X':     data['E_X'],
+           'E_C':     data.get('E_C', 0.0),
+           'E_NN':    data['E_NN'],
+           'Ne_int':  data['Ne_integral']}
+    if E_atom is not None:
+        E_bind           = data['E_total'] - 2.0 * E_atom
+        row['E_bind_Ha'] = E_bind
+        row['D_e_eV']    = -E_bind * 27.2114
+    rows.append(row)
+    tag = f"  R={R:.4f} Bohr  epoch={row['epoch']:>6}  E={data['E_total']:+.6f} Ha"
+    if E_atom is not None:
+        tag += f"  E_bind={row['E_bind_Ha']:+.6f} Ha"
+    print(tag)
+# ── Save CSV ─────────────────────────────────────────────────────────────────
+df = pd.DataFrame(rows).sort_values('R_bohr').reset_index(drop=True)
+csv_path = scan_dir / "pes.csv"
+df.to_csv(csv_path, index=False, float_format='%.8f')
+print(f"\nPES data saved → {csv_path}")
+print(df.to_string(index=False))
+# ── Plot ──────────────────────────────────────────────────────────────────────
+fig, axes = plt.subplots(1, 2 if E_atom is not None else 1,
+                         figsize=(11 if E_atom is not None else 5, 4))
+if E_atom is None:
+    axes = [axes]
+R_vals    = df['R_bohr'].values
+max_epoch = df['epoch'].max()
+complete  = df['epoch'] == max_epoch   # True if run finished
+for ax, y_col, ylabel, title, color in [
+    (axes[0], 'E_total', 'Energy [Ha]',  'Potential Energy Surface', 'tab:blue'),
+    *( [(axes[1], 'D_e_eV', 'D_e [eV]', 'Dissociation Energy', 'tab:orange')]
+       if E_atom is not None else [] ),
+]:
+    y = df[y_col].values
+    ax.plot(R_vals[complete],  y[complete],  'o-', color=color, label=f'epoch={max_epoch}')
+    ax.plot(R_vals[~complete], y[~complete], '^', color=color, alpha=0.5,
+            label='incomplete', markerfacecolor='none')
+    if y_col == 'D_e_eV':
+        ax.axhline(0, color='k', linewidth=0.8, linestyle='--')
+    ax.set_xlabel("R [Bohr]")
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3)
+fig.suptitle(scan_dir.parent.name.split("/")[-1], fontsize=9)
+fig.tight_layout()
+fig.savefig(scan_dir / "pes.svg", transparent=True)
+fig.savefig(scan_dir / "pes.png", dpi=150)
+print(f"PES plot saved → {scan_dir}/pes.png")