PyPI - aimnet - Versions diffs - 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

aimnet 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

aimnet/__init__.py +7 -0
aimnet/base.py +24 -8
aimnet/calculators/__init__.py +4 -4
aimnet/calculators/aimnet2ase.py +19 -6
aimnet/calculators/calculator.py +868 -108
aimnet/calculators/model_registry.py +2 -5
aimnet/calculators/model_registry.yaml +55 -17
aimnet/cli.py +62 -6
aimnet/config.py +8 -9
aimnet/data/sgdataset.py +23 -22
aimnet/kernels/__init__.py +66 -0
aimnet/kernels/conv_sv_2d_sp_wp.py +478 -0
aimnet/models/__init__.py +13 -1
aimnet/models/aimnet2.py +19 -22
aimnet/models/base.py +183 -15
aimnet/models/convert.py +30 -0
aimnet/models/utils.py +735 -0
aimnet/modules/__init__.py +1 -1
aimnet/modules/aev.py +49 -48
aimnet/modules/core.py +14 -13
aimnet/modules/lr.py +520 -115
aimnet/modules/ops.py +537 -0
aimnet/nbops.py +105 -15
aimnet/ops.py +90 -18
aimnet/train/export_model.py +226 -0
aimnet/train/loss.py +7 -7
aimnet/train/metrics.py +5 -6
aimnet/train/train.py +4 -1
aimnet/train/utils.py +42 -13
aimnet-0.1.0.dist-info/METADATA +308 -0
aimnet-0.1.0.dist-info/RECORD +43 -0
{aimnet-0.0.1.dist-info → aimnet-0.1.0.dist-info}/WHEEL +1 -1
aimnet-0.1.0.dist-info/entry_points.txt +3 -0
aimnet/calculators/nb_kernel_cpu.py +0 -222
aimnet/calculators/nb_kernel_cuda.py +0 -217
aimnet/calculators/nbmat.py +0 -220
aimnet/train/pt2jpt.py +0 -81
aimnet-0.0.1.dist-info/METADATA +0 -78
aimnet-0.0.1.dist-info/RECORD +0 -41
aimnet-0.0.1.dist-info/entry_points.txt +0 -5
{aimnet-0.0.1.dist-info → aimnet-0.1.0.dist-info/licenses}/LICENSE +0 -0

aimnet/nbops.py CHANGED Viewed

@@ -1,10 +1,8 @@
-from typing import Dict, Tuple
 import torch
 from torch import Tensor
-def set_nb_mode(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+def set_nb_mode(data: dict[str, Tensor]) -> dict[str, Tensor]:
     """Logic to guess and set the neighbor model."""
     if "nbmat" in data:
         if data["nbmat"].ndim == 2:
@@ -18,12 +16,12 @@ def set_nb_mode(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
     return data
-def get_nb_mode(data: Dict[str, Tensor]) -> int:
+def get_nb_mode(data: dict[str, Tensor]) -> int:
     """Get the neighbor model."""
     return int(data["_nb_mode"].item())
-def calc_masks(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+def calc_masks(data: dict[str, Tensor]) -> dict[str, Tensor]:
     """Calculate neighbor masks"""
     nb_mode = get_nb_mode(data)
     if nb_mode == 0:
@@ -45,9 +43,20 @@ def calc_masks(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
         # padding must be the last atom
         data["mask_i"] = torch.zeros(data["numbers"].shape[0], device=data["numbers"].device, dtype=torch.bool)
         data["mask_i"][-1] = True
-        for suffix in ("", "_lr"):
-            if f"nbmat{suffix}" in data:
-                data[f"mask_ij{suffix}"] = data[f"nbmat{suffix}"] == data["numbers"].shape[0] - 1
+        # Track processed arrays by their data pointer to avoid redundant mask calculations
+        processed: dict[int, str] = {}  # data_ptr -> mask_suffix
+        for suffix in ("", "_lr", "_coulomb", "_dftd3"):
+            nbmat_key = f"nbmat{suffix}"
+            if nbmat_key in data:
+                if not torch.jit.is_scripting():
+                    # data_ptr() not supported in TorchScript
+                    ptr = data[nbmat_key].data_ptr()
+                    if ptr in processed:
+                        # Same array - reuse existing mask
+                        data[f"mask_ij{suffix}"] = data[f"mask_ij{processed[ptr]}"]
+                        continue
+                    processed[ptr] = suffix
+                data[f"mask_ij{suffix}"] = data[nbmat_key] == data["numbers"].shape[0] - 1
         data["_input_padded"] = torch.tensor(True)
         data["mol_sizes"] = torch.bincount(data["mol_idx"])
         # last atom is padding
@@ -56,9 +65,20 @@ def calc_masks(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
         data["mask_i"] = data["numbers"] == 0
         w = torch.where(data["mask_i"])
         pad_idx = w[0] * data["numbers"].shape[1] + w[1]
-        for suffix in ("", "_lr"):
-            if f"nbmat{suffix}" in data:
-                data[f"mask_ij{suffix}"] = torch.isin(data[f"nbmat{suffix}"], pad_idx)
+        # Track processed arrays by their data pointer to avoid redundant mask calculations
+        processed: dict[int, str] = {}  # data_ptr -> mask_suffix
+        for suffix in ("", "_lr", "_coulomb", "_dftd3"):
+            nbmat_key = f"nbmat{suffix}"
+            if nbmat_key in data:
+                if not torch.jit.is_scripting():
+                    # data_ptr() not supported in TorchScript
+                    ptr = data[nbmat_key].data_ptr()
+                    if ptr in processed:
+                        # Same array - reuse existing mask
+                        data[f"mask_ij{suffix}"] = data[f"mask_ij{processed[ptr]}"]
+                        continue
+                    processed[ptr] = suffix
+                data[f"mask_ij{suffix}"] = torch.isin(data[nbmat_key], pad_idx)
         data["_input_padded"] = torch.tensor(True)
         data["mol_sizes"] = (~data["mask_i"]).sum(-1)
     else:
@@ -69,7 +89,7 @@ def calc_masks(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
 def mask_ij_(
     x: Tensor,
-    data: Dict[str, Tensor],
+    data: dict[str, Tensor],
     mask_value: float = 0.0,
     inplace: bool = True,
     suffix: str = "",
@@ -84,7 +104,7 @@ def mask_ij_(
     return x
-def mask_i_(x: Tensor, data: Dict[str, Tensor], mask_value: float = 0.0, inplace: bool = True) -> Tensor:
+def mask_i_(x: Tensor, data: dict[str, Tensor], mask_value: float = 0.0, inplace: bool = True) -> Tensor:
     nb_mode = get_nb_mode(data)
     if nb_mode == 0:
         if data["_input_padded"].item():
@@ -110,7 +130,47 @@ def mask_i_(x: Tensor, data: Dict[str, Tensor], mask_value: float = 0.0, inplace
     return x
-def get_ij(x: Tensor, data: Dict[str, Tensor], suffix: str = "") -> Tuple[Tensor, Tensor]:
+def resolve_suffix(data: dict[str, Tensor], suffixes: list[str]) -> str:
+    """Try suffixes in order, return first found, raise if none exist.
+    This function makes fallback behavior explicit by requiring a list
+    of acceptable suffixes. Each module controls which neighbor lists
+    are acceptable for its operations.
+    For nb_mode=0 (no neighbor matrix), returns empty string since
+    neighbor lists are not used in that mode.
+    Parameters
+    ----------
+    data : dict
+        Data dictionary containing neighbor matrices.
+    suffixes : list[str]
+        List of suffixes to try in priority order (e.g., ["_dftd3", "_lr"]).
+        Empty string "" can be included for fallback to base nbmat.
+    Returns
+    -------
+    str
+        The first suffix that has a corresponding nbmat{suffix} in data.
+    Raises
+    ------
+    KeyError
+        If none of the suffixes have corresponding neighbor matrices.
+    """
+    # In nb_mode=0, there are no neighbor matrices - suffix is unused
+    nb_mode = get_nb_mode(data)
+    if nb_mode == 0:
+        return ""
+    for suffix in suffixes:
+        if f"nbmat{suffix}" in data:
+            return suffix
+    raise KeyError(f"No neighbor matrix found for any suffix in {suffixes}")
+def get_ij(x: Tensor, data: dict[str, Tensor], suffix: str = "") -> tuple[Tensor, Tensor]:
     nb_mode = get_nb_mode(data)
     if nb_mode == 0:
         x_i = x.unsqueeze(2)
@@ -128,7 +188,36 @@ def get_ij(x: Tensor, data: Dict[str, Tensor], suffix: str = "") -> Tuple[Tensor
     return x_i, x_j
-def mol_sum(x: Tensor, data: Dict[str, Tensor]) -> Tensor:
+def get_i(x: Tensor, data: dict[str, Tensor]) -> Tensor:
+    """Get the i-component of pairwise expansion without computing j.
+    This is an optimized version of get_ij when only x_i is needed,
+    avoiding the expensive index_select operation for x_j.
+    Parameters
+    ----------
+    x : Tensor
+        Input tensor to expand.
+    data : dict[str, Tensor]
+        Data dictionary containing neighbor mode information.
+    Returns
+    -------
+    Tensor
+        The i-component with appropriate unsqueeze for the neighbor mode.
+    """
+    nb_mode = get_nb_mode(data)
+    if nb_mode == 0:
+        return x.unsqueeze(2)
+    elif nb_mode == 1:
+        return x.unsqueeze(1)
+    elif nb_mode == 2:
+        return x.unsqueeze(2)
+    else:
+        raise ValueError(f"Invalid neighbor mode: {nb_mode}")
+def mol_sum(x: Tensor, data: dict[str, Tensor]) -> Tensor:
     nb_mode = get_nb_mode(data)
     if nb_mode in (0, 2):
         res = x.sum(dim=1)
@@ -140,6 +229,7 @@ def mol_sum(x: Tensor, data: Dict[str, Tensor]) -> Tensor:
         idx = data["mol_idx"]
         # assuming mol_idx is sorted, replace with max if not
         out_size = int(idx[-1].item()) + 1
         if x.ndim == 1:
             res = torch.zeros(out_size, device=x.device, dtype=x.dtype)
         else:

aimnet/ops.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import math
-from typing import Dict, Optional, Tuple
 import torch
 from torch import Tensor
@@ -7,7 +6,7 @@ from torch import Tensor
 from aimnet import nbops
-def lazy_calc_dij_lr(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+def lazy_calc_dij_lr(data: dict[str, Tensor]) -> dict[str, Tensor]:
     if "d_ij_lr" not in data:
         nb_mode = nbops.get_nb_mode(data)
         if nb_mode == 0:
@@ -17,23 +16,67 @@ def lazy_calc_dij_lr(data: Dict[str, Tensor]) -> Dict[str, Tensor]:
     return data
-def calc_distances(data: Dict[str, Tensor], suffix: str = "", pad_value: float = 1.0) -> Tuple[Tensor, Tensor]:
+def lazy_calc_dij(data: dict[str, Tensor], suffix: str) -> dict[str, Tensor]:
+    """Lazily calculate distances for a given suffix.
+    Computes and caches d_ij{suffix} in data dict if not present.
+    For nb_mode=0 (no neighbor list), reuses d_ij.
+    Parameters
+    ----------
+    data : dict
+        Data dictionary.
+    suffix : str
+        Suffix for neighbor matrix (e.g., "_coulomb", "_dftd3", "_lr").
+    Returns
+    -------
+    dict
+        Data dictionary with d_ij{suffix} added.
+    """
+    key = f"d_ij{suffix}"
+    if key not in data:
+        nb_mode = nbops.get_nb_mode(data)
+        if nb_mode == 0:
+            data[key] = data["d_ij"]
+        else:
+            data[key] = calc_distances(data, suffix=suffix)[0]
+    return data
+def calc_distances(data: dict[str, Tensor], suffix: str = "", pad_value: float = 1.0) -> tuple[Tensor, Tensor]:
     coord_i, coord_j = nbops.get_ij(data["coord"], data, suffix)
     if f"shifts{suffix}" in data:
         assert "cell" in data, "cell is required if shifts are provided"
         nb_mode = nbops.get_nb_mode(data)
+        cell = data["cell"]
         if nb_mode == 2:
-            shifts = torch.einsum("bnmd,bdh->bnmh", data[f"shifts{suffix}"], data["cell"])
+            # Batched format: shifts (B, N, M, 3), cell (B, 3, 3) or (3, 3)
+            if cell.ndim == 2:
+                shifts = torch.einsum("bnmd,dh->bnmh", data[f"shifts{suffix}"], cell)
+            else:
+                shifts = torch.einsum("bnmd,bdh->bnmh", data[f"shifts{suffix}"], cell)
+        elif nb_mode == 1:
+            # Flat format: shifts (N_total, M, 3), cell (3, 3) or (B, 3, 3)
+            if cell.ndim == 2:
+                shifts = data[f"shifts{suffix}"] @ cell
+            else:
+                # Batched cells - need mol_idx to select correct cell for each atom
+                mol_idx = data["mol_idx"]
+                atom_cell = cell[mol_idx]  # (N_total, 3, 3)
+                # shifts: (N_total, M, 3), atom_cell: (N_total, 3, 3)
+                shifts = torch.einsum("nmd,ndh->nmh", data[f"shifts{suffix}"], atom_cell)
         else:
-            shifts = data[f"shifts{suffix}"] @ data["cell"]
+            # nb_mode == 0: no neighbor matrix, shouldn't have shifts
+            shifts = data[f"shifts{suffix}"] @ cell
         coord_j = coord_j + shifts
     r_ij = coord_j - coord_i
+    r_ij = nbops.mask_ij_(r_ij, data, mask_value=pad_value, inplace=False, suffix=suffix)
     d_ij = torch.norm(r_ij, p=2, dim=-1)
-    d_ij = nbops.mask_ij_(d_ij, data, mask_value=pad_value, inplace=False, suffix=suffix)
     return d_ij, r_ij
-def center_coordinates(coord: Tensor, data: Dict[str, Tensor], masses: Optional[Tensor] = None) -> Tensor:
+def center_coordinates(coord: Tensor, data: dict[str, Tensor], masses: Tensor | None = None) -> Tensor:
     if masses is not None:
         masses = masses.unsqueeze(-1)
         center = nbops.mol_sum(coord * masses, data) / nbops.mol_sum(masses, data) / data["mol_sizes"].unsqueeze(-1)
@@ -61,16 +104,17 @@ def exp_expand(d_ij: Tensor, shifts: Tensor, eta: float) -> Tensor:
     return torch.exp(-eta * (d_ij.unsqueeze(-1) - shifts) ** 2)
-# pylint: disable=invalid-name
 def nse(
     Q: Tensor,
     q_u: Tensor,
     f_u: Tensor,
-    data: Dict[str, Tensor],
+    data: dict[str, Tensor],
     epsilon: float = 1.0e-6,
 ) -> Tensor:
     # Q and q_u and f_u must have last dimension size 1 or 2
-    F_u = nbops.mol_sum(f_u, data) + epsilon
+    F_u = nbops.mol_sum(f_u, data)
+    if epsilon > 0:
+        F_u = F_u + epsilon
     Q_u = nbops.mol_sum(q_u, data)
     dQ = Q - Q_u
     # for loss
@@ -92,30 +136,36 @@ def nse(
     return q
-def coulomb_matrix_dsf(d_ij: Tensor, Rc: float, alpha: float, data: Dict[str, Tensor]) -> Tensor:
+def coulomb_matrix_dsf(d_ij: Tensor, Rc: float, alpha: float, data: dict[str, Tensor]) -> Tensor:
     _c1 = (alpha * d_ij).erfc() / d_ij
     _c2 = math.erfc(alpha * Rc) / Rc
     _c3 = _c2 / Rc
     _c4 = 2 * alpha * math.exp(-((alpha * Rc) ** 2)) / (Rc * math.pi**0.5)
     J = _c1 - _c2 + (d_ij - Rc) * (_c3 + _c4)
-    # mask for d_ij > Rc
-    mask = data["mask_ij_lr"] & (d_ij > Rc)
+    # Zero invalid pairs: padding/diagonal (mask_ij_lr) OR beyond cutoff
+    mask = data["mask_ij_lr"] | (d_ij > Rc)
     J.masked_fill_(mask, 0.0)
     return J
-def coulomb_matrix_sf(q_j: Tensor, d_ij: Tensor, Rc: float, data: Dict[str, Tensor]) -> Tensor:
+def coulomb_matrix_sf(q_j: Tensor, d_ij: Tensor, Rc: float, data: dict[str, Tensor]) -> Tensor:
     _c1 = 1.0 / d_ij
     _c2 = 1.0 / Rc
     _c3 = _c2 / Rc
     J = _c1 - _c2 + (d_ij - Rc) * _c3
-    mask = data["mask_ij_lr"] & (d_ij > Rc)
+    # Zero invalid pairs: padding/diagonal (mask_ij_lr) OR beyond cutoff
+    mask = data["mask_ij_lr"] | (d_ij > Rc)
     J.masked_fill_(mask, 0.0)
     return J
 def get_shifts_within_cutoff(cell: Tensor, cutoff: Tensor) -> Tensor:
-    assert cell.shape == (3, 3), "Batch cell is not supported"
+    """Get all lattice shift vectors within cutoff distance.
+    Note: Batched cells are not supported - this function is only used by Ewald summation
+    which is a single-molecule calculation.
+    """
+    assert cell.ndim == 2 and cell.shape == (3, 3), "Batched cells not supported for Ewald summation"
     cell_inv = torch.inverse(cell).mT
     inv_distances = cell_inv.norm(p=2, dim=-1)
     num_repeats = torch.ceil(cutoff * inv_distances).to(torch.long)
@@ -128,10 +178,32 @@ def get_shifts_within_cutoff(cell: Tensor, cutoff: Tensor) -> Tensor:
     return shifts
-def coulomb_matrix_ewald(coord: Tensor, cell: Tensor) -> Tensor:
+def coulomb_matrix_ewald(coord: Tensor, cell: Tensor, accuracy: float = 1e-8) -> Tensor:
+    """Compute Coulomb matrix using Ewald summation.
+    Parameters
+    ----------
+    coord : Tensor
+        Atomic coordinates, shape (N, 3).
+    cell : Tensor
+        Unit cell vectors, shape (3, 3).
+    accuracy : float
+        Target accuracy for the Ewald summation. Controls the real-space
+        and reciprocal-space cutoffs. Lower values give higher accuracy
+        but require more computation. Default is 1e-8.
+        The cutoffs are computed as:
+        - eta = (V^2 / N)^(1/6) / sqrt(2*pi)
+        - cutoff_real = sqrt(-2 * ln(accuracy)) * eta
+        - cutoff_recip = sqrt(-2 * ln(accuracy)) / eta
+    Returns
+    -------
+    Tensor
+        Coulomb matrix J, shape (N, N).
+    """
     # single molecule implementation. nb_mode == 1
     assert coord.ndim == 2 and cell.ndim == 2, "Only single molecule is supported"
-    accuracy = 1e-8
     N = coord.shape[0]
     volume = torch.det(cell)
     eta = ((volume**2 / N) ** (1 / 6)) / math.sqrt(2.0 * math.pi)

aimnet/train/export_model.py ADDED Viewed

@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+"""Export trained model to distributable state dict format.
+This script creates a self-contained .pt file from training artifacts:
+- Raw PyTorch weights (.pt)
+- Self-atomic energies (.sae)
+- Model YAML configuration
+The output file contains:
+- model_yaml: Core model config (without LRCoulomb/DFTD3, with SRCoulomb if needed)
+- cutoff: Model cutoff
+- needs_coulomb: Whether calculator should add external Coulomb
+- needs_dispersion: Whether calculator should add external DFTD3
+- coulomb_mode: "sr_embedded" | "none" (describes what's in the model)
+- coulomb_sr_rc: Coulomb short-range cutoff (optional, if coulomb_mode="sr_embedded")
+- coulomb_sr_envelope: Envelope function ("exp" or "cosine", optional)
+- d3_params: D3 parameters {s8, a1, a2, s6} (optional, if needs_dispersion=True)
+- has_embedded_lr: Whether model has embedded LR modules (D3TS, SRCoulomb) needing nbmat_lr
+- implemented_species: Parametrized atomic numbers
+- state_dict: Model weights with SAE baked into atomic_shift (float64)
+"""
+import copy
+import click
+import torch
+import yaml
+from torch import nn
+from aimnet.config import build_module, load_yaml
+from aimnet.models.utils import strip_lr_modules_from_yaml, validate_state_dict_keys
+def load_sae(sae_file: str) -> dict[int, float]:
+    """Load SAE file (YAML-like format: atomic_number: energy)."""
+    sae = load_yaml(sae_file)
+    if not isinstance(sae, dict):
+        raise TypeError("SAE file must contain a dictionary.")
+    return {int(k): float(v) for k, v in sae.items()}
+def bake_sae_into_model(model: nn.Module, sae: dict[int, float]) -> nn.Module:
+    """Add SAE values to atomic_shift.shifts.weight (converted to float64)."""
+    # Disable gradients before in-place operation
+    for p in model.parameters():
+        p.requires_grad_(False)
+    model.outputs.atomic_shift.double()  # type: ignore
+    for k, v in sae.items():
+        model.outputs.atomic_shift.shifts.weight[k] += v  # type: ignore
+    return model
+def extract_cutoff(model: nn.Module) -> float:
+    """Extract cutoff from model's AEV module."""
+    return float(model.aev.rc_s.item())  # type: ignore
+def get_implemented_species(sae: dict[int, float]) -> list[int]:
+    """Get list of implemented species from SAE."""
+    return sorted(sae.keys())
+def mask_not_implemented_species(model: nn.Module, species: list[int]) -> nn.Module:
+    """Set NaN for species not in the SAE."""
+    weight = model.afv.weight  # type: ignore
+    for i in range(1, weight.shape[0]):  # type: ignore
+        if i not in species:
+            weight[i, :] = torch.nan  # type: ignore
+    return model
+@click.command()
+@click.argument("weights", type=click.Path(exists=True))
+@click.argument("output", type=str)
+@click.option("--model", "-m", type=click.Path(exists=True), required=True, help="Path to model definition YAML file")
+@click.option("--sae", "-s", type=click.Path(exists=True), required=True, help="Path to the SAE YAML file")
+@click.option(
+    "--needs-coulomb/--no-coulomb", default=None, help="Override Coulomb detection. Default: auto-detect from YAML"
+)
+@click.option(
+    "--needs-dispersion/--no-dispersion",
+    default=None,
+    help="Override dispersion detection. Default: auto-detect from YAML",
+)
+def export_model(
+    weights: str,
+    output: str,
+    model: str,
+    sae: str,
+    needs_coulomb: bool | None,
+    needs_dispersion: bool | None,
+):
+    """Export trained model to distributable state dict format.
+    weights: Path to the raw PyTorch weights file (.pt).
+    outoput: Path to the output .pt file.
+    Example:
+        aimnet export weights.pt model.pt --model config.yaml --sae model.sae
+    """
+    # Load model YAML
+    print(f"Loading config from {model}")
+    with open(model, encoding="utf-8") as f:
+        model_config = yaml.safe_load(f)
+    # Load SAE
+    print(f"Loading SAE from {sae}")
+    sae_dict = load_sae(sae)
+    implemented_species = get_implemented_species(sae_dict)
+    # Load source state dict
+    print(f"Loading weights from {weights}")
+    source_sd = torch.load(weights, map_location="cpu", weights_only=True)
+    # Strip LR modules and detect flags
+    core_config, coulomb_mode, needs_dispersion_auto, d3_params, coulomb_sr_rc, coulomb_sr_envelope, disp_ptfile = (
+        strip_lr_modules_from_yaml(model_config, source_sd)
+    )
+    # Serialize YAML BEFORE building module (build_module mutates the dict)
+    core_yaml_str = yaml.dump(core_config, default_flow_style=False, sort_keys=False)
+    # Build model from modified config
+    print("Building model...")
+    core_model = build_module(copy.deepcopy(core_config))
+    if not isinstance(core_model, nn.Module):
+        raise TypeError("Built module is not an nn.Module")
+    # Load weights with strict=False (modules may differ)
+    load_result = core_model.load_state_dict(source_sd, strict=False)
+    # Check for unexpected missing/extra keys
+    real_missing, real_unexpected = validate_state_dict_keys(load_result.missing_keys, load_result.unexpected_keys)
+    if real_missing:
+        print(f"WARNING: Unexpected missing keys: {real_missing}")
+    if real_unexpected:
+        print(f"WARNING: Unexpected extra keys in source: {real_unexpected}")
+    if not real_missing and not real_unexpected:
+        print("Loaded weights successfully")
+    # Load dispersion parameters from ptfile and inject into model
+    # (raw training weights don't contain disp_param0 buffer)
+    if disp_ptfile is not None:
+        disp_params = torch.load(disp_ptfile, map_location="cpu", weights_only=True)
+        for _name, module in core_model.named_modules():
+            if hasattr(module, "disp_param0"):
+                # Resize buffer if needed (ptfile may have different shape than placeholder)
+                if module.disp_param0.shape != disp_params.shape:
+                    module.disp_param0 = torch.zeros_like(disp_params)
+                module.disp_param0.copy_(disp_params)
+                print(f"Loaded disp_param0 from {disp_ptfile}")
+                break
+    # Bake SAE into atomic_shift (float64)
+    print("Baking SAE into atomic_shift...")
+    core_model = bake_sae_into_model(core_model, sae_dict)
+    # Mask not-implemented species
+    core_model = mask_not_implemented_species(core_model, implemented_species)
+    # Extract cutoff
+    cutoff = extract_cutoff(core_model)
+    # Set model to eval mode
+    core_model.eval()
+    # Determine final flags (CLI overrides auto-detection)
+    auto_needs_coulomb = coulomb_mode == "sr_embedded"
+    auto_needs_dispersion = needs_dispersion_auto
+    final_needs_coulomb = needs_coulomb if needs_coulomb is not None else auto_needs_coulomb
+    final_needs_dispersion = needs_dispersion if needs_dispersion is not None else auto_needs_dispersion
+    # Warn if overriding auto-detection
+    if needs_coulomb is not None and needs_coulomb != auto_needs_coulomb:
+        print(f"  Overriding needs_coulomb: {auto_needs_coulomb} -> {needs_coulomb}")
+    if needs_dispersion is not None and needs_dispersion != auto_needs_dispersion:
+        print(f"  Overriding needs_dispersion: {auto_needs_dispersion} -> {needs_dispersion}")
+    # Detect if model has any embedded LR modules that need nbmat_lr
+    outputs = model_config.get("kwargs", {}).get("outputs", {})
+    has_embedded_lr = False
+    # Check for embedded D3TS (uses NN-predicted C6/alpha, must stay embedded)
+    has_d3ts = any("D3TS" in outputs.get(k, {}).get("class", "") for k in ["dftd3", "d3bj", "d3ts"])
+    if has_d3ts:
+        has_embedded_lr = True
+    # Check for embedded SRCoulomb (model had LRCoulomb before conversion)
+    if coulomb_mode == "sr_embedded":
+        has_embedded_lr = True
+    # Create new format dict
+    new_format = {
+        "format_version": 2,  # v2 = new .pt format (v1 = legacy .jpt)
+        "model_yaml": core_yaml_str,
+        "cutoff": cutoff,
+        "needs_coulomb": final_needs_coulomb,
+        "needs_dispersion": final_needs_dispersion,
+        "coulomb_mode": coulomb_mode,
+        "coulomb_sr_rc": coulomb_sr_rc if final_needs_coulomb else None,
+        "coulomb_sr_envelope": coulomb_sr_envelope if final_needs_coulomb else None,
+        "d3_params": d3_params if final_needs_dispersion else None,
+        "has_embedded_lr": has_embedded_lr,
+        "implemented_species": implemented_species,
+        "state_dict": core_model.state_dict(),
+    }
+    # Save
+    torch.save(new_format, output)
+    print(f"\nSaved model to {output}")
+    print(f"  cutoff: {cutoff}")
+    print(f"  needs_coulomb: {final_needs_coulomb}")
+    print(f"  needs_dispersion: {final_needs_dispersion}")
+    print(f"  coulomb_mode: {coulomb_mode}")
+    if final_needs_coulomb:
+        print(f"  coulomb_sr_rc: {coulomb_sr_rc}")
+        print(f"  coulomb_sr_envelope: {coulomb_sr_envelope}")
+    if final_needs_dispersion:
+        print(f"  d3_params: {d3_params}")
+    print(f"  has_embedded_lr: {has_embedded_lr}")
+    print(f"  implemented_species: {implemented_species}")
+if __name__ == "__main__":
+    export_model()

aimnet/train/loss.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import partial
-from typing import Any, Dict
+from typing import Any
 import torch
 from torch import Tensor
@@ -30,7 +30,7 @@ class MTLoss:
                 Dict[str, Tensor]: total loss under key 'loss' and values for individual components.
     """
-    def __init__(self, components: Dict[str, Any]):
+    def __init__(self, components: dict[str, Any]):
         w_sum = sum(c["weight"] for c in components.values())
         self.components = {}
         for name, c in components.items():
@@ -38,7 +38,7 @@ class MTLoss:
             fn = partial(get_module(c["fn"]), **kwargs)
             self.components[name] = (fn, c["weight"] / w_sum)
-    def __call__(self, y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    def __call__(self, y_pred: dict[str, Tensor], y_true: dict[str, Tensor]) -> dict[str, Tensor]:
         loss = {}
         for name, (fn, w) in self.components.items():
             _l = fn(y_pred=y_pred, y_true=y_true)
@@ -48,7 +48,7 @@ class MTLoss:
         return loss
-def mse_loss_fn(y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor], key_pred: str, key_true: str) -> Tensor:
+def mse_loss_fn(y_pred: dict[str, Tensor], y_true: dict[str, Tensor], key_pred: str, key_true: str) -> Tensor:
     """General MSE loss function"""
     x = y_true[key_true]
     y = y_pred[key_pred]
@@ -56,7 +56,7 @@ def mse_loss_fn(y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor], key_pred:
     return loss
-def peratom_loss_fn(y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor], key_pred: str, key_true: str) -> Tensor:
+def peratom_loss_fn(y_pred: dict[str, Tensor], y_true: dict[str, Tensor], key_pred: str, key_true: str) -> Tensor:
     """MSE loss function with per-atom normalization correction.
     Suitable when some of the values are zero both in y_pred and y_true due to padding of inputs.
     """
@@ -73,11 +73,11 @@ def peratom_loss_fn(y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor], key_pr
 def energy_loss_fn(
-    y_pred: Dict[str, Tensor], y_true: Dict[str, Tensor], key_pred: str = "energy", key_true: str = "energy"
+    y_pred: dict[str, Tensor], y_true: dict[str, Tensor], key_pred: str = "energy", key_true: str = "energy"
 ) -> Tensor:
     """MSE loss normalized by the number of atoms."""
     x = y_true[key_true]
     y = y_pred[key_pred]
-    s = y_pred["_natom"].sqrt()
+    s = y_pred["_natom"] ** 0.5
     loss = ((x - y).pow(2) / s).mean() if y_pred["_natom"].numel() > 1 else torch.nn.functional.mse_loss(x, y) / s
     return loss

aimnet 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

aimnet 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl