PyPI - bayesianflow-for-chem - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl - Mend

bayesianflow-for-chem 1.4.0py3-none-any.whl → 1.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bayesianflow-for-chem might be problematic. Click here for more details.

Files changed (9) hide show

bayesianflow_for_chem/__init__.py CHANGED Viewed

@@ -7,5 +7,5 @@ from . import data, tool, train, scorer
 from .model import ChemBFN, MLP, EnsembleChemBFN
 __all__ = ["data", "tool", "train", "scorer", "ChemBFN", "MLP", "EnsembleChemBFN"]
-__version__ = "1.4.0"
+__version__ = "1.4.2"
 __author__ = "Nianze A. Tao (Omozawa Sueno)"

bayesianflow_for_chem/model.py CHANGED Viewed

@@ -54,9 +54,19 @@ class Linear(nn.Linear):
         :return:
         :rtype: None
         """
+        from torchao.dtypes.affine_quantized_tensor import AffineQuantizedTensor
         assert r > 0, "Rank should be larger than 0."
-        self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
-        self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
+        if isinstance(self.weight, AffineQuantizedTensor):
+            self.lora_A = nn.Parameter(
+                torch.zeros((r, self.in_features), device=self.weight.device)
+            )
+            self.lora_B = nn.Parameter(
+                torch.zeros((self.out_features, r), device=self.weight.device)
+            )
+        else:
+            self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
+            self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
         self.scaling = lora_alpha / r
         self.lora_dropout = lora_dropout
         self.lora_enabled = True
@@ -169,16 +179,6 @@ class Attention(nn.Module):
         k = k.view(split).permute(2, 0, 1, 3).contiguous()
         v = v.view(split).permute(2, 0, 1, 3).contiguous()
         q, k = self._rotate(q, k, pe)  # position embedding
-        """
-        # Original code. Maybe using `nn.functional.scaled_dot_product_attention(...)` is better.
-        k_t = k.transpose(-2, -1)
-        if mask is not None:
-            alpha = softmax((q @ k_t / self.tp).masked_fill_(mask, -torch.inf), -1)
-        else:
-            alpha = softmax(q @ k_t / self.tp, -1)
-        atten_out = (alpha @ v).permute(1, 2, 0, 3).contiguous().view(shape)
-        """
         atten_out = nn.functional.scaled_dot_product_attention(
             q, k, v, mask, 0.0, False, scale=1 / self.tp
         )
@@ -430,19 +430,14 @@ class ChemBFN(nn.Module):
             c += y
         pe = self.position(n_t)
         x = self.embedding(x)
-        attn_mask: Optional[Tensor] = None
         if self.semi_autoregressive:
             attn_mask = torch.tril(
                 torch.ones((1, n_b, n_t, n_t), device=x.device), diagonal=0
             )
+        elif mask is not None:
+            attn_mask = mask.transpose(-2, -1).repeat(1, n_t, 1)[None, ...] != 0
         else:
-            if mask is not None:
-                """
-                # Original Code.
-                attn_mask = mask.transpose(-2, -1).repeat(1, x.shape[1], 1)[None, ...] == 0
-                """
-                attn_mask = mask.transpose(-2, -1).repeat(1, n_t, 1)[None, ...] != 0
+            attn_mask = None
         for layer in self.encoder_layers:
             x = layer(x, pe, c, attn_mask)
         return self.final_layer(x, c, mask is None)
@@ -1222,23 +1217,23 @@ class EnsembleChemBFN(ChemBFN):
         )
     def quantise(
-        self, quantise_method: Optional[Callable[[ChemBFN], nn.Module]] = None
+        self, quantise_method: Optional[Callable[[ChemBFN], None]] = None
     ) -> None:
         """
         Quantise the submodels. \n
         This method should be called, if necessary, before `torch.compile()`.
-        :param quantise_method: quantisation method; default is `bayesianflow_for_chem.tool.quantise_model`
+        :param quantise_method: quantisation method; default is `bayesianflow_for_chem.tool.quantise_model_`
         :type quantise_method: callable | None
         :return:
         :rtype: None
         """
         if quantise_method is None:
-            from bayesianflow_for_chem.tool import quantise_model
+            from bayesianflow_for_chem.tool import quantise_model_
-            quantise_method = quantise_model
-        for k, v in self.models.items():
-            self.models[k] = quantise_method(v)
+            quantise_method = quantise_model_
+        for _, v in self.models.items():
+            quantise_method(v)
     def jit(self, freeze: bool = False) -> None:
         """

bayesianflow_for_chem/tool.py CHANGED Viewed

@@ -13,18 +13,18 @@ import torch
 import numpy as np
 import torch.nn as nn
 from torch import cuda, Tensor, softmax
-from torch.ao import quantization
 from torch.utils.data import DataLoader
-from typing_extensions import Self
-from rdkit.Chem.rdchem import Mol, Bond
+from typing_extensions import Self, deprecated
 from rdkit.Chem import (
     rdDetermineBonds,
+    GetFormalCharge,
     MolFromXYZBlock,
     MolFromSmiles,
     MolToSmiles,
     CanonSmiles,
     AllChem,
     AddHs,
+    Mol,
 )
 from rdkit.Chem.Scaffolds.MurckoScaffold import MurckoScaffoldSmiles  # type: ignore
 from sklearn.metrics import (
@@ -385,6 +385,11 @@ def inpaint(
     ]
+@deprecated(
+    "Eager mode quantization from `torch.ao` is deprecated and will be remove in version 2.10, "
+    "so this fuction will stop working since that time. "
+    "Please use `quantise_model_` instead."
+)
 def quantise_model(model: ChemBFN) -> nn.Module:
     """
     Dynamic quantisation of the trained model to `torch.qint8` data type.
@@ -394,6 +399,7 @@ def quantise_model(model: ChemBFN) -> nn.Module:
     :return: quantised model
     :rtype: torch.nn.Module
     """
+    from torch.ao import quantization
     from torch.ao.nn.quantized import dynamic
     from torch.ao.nn.quantized.modules.utils import _quantize_weight
     from torch.ao.quantization.qconfig import default_dynamic_qconfig
@@ -526,6 +532,24 @@ def quantise_model(model: ChemBFN) -> nn.Module:
     return quantised_model
+def quantise_model_(model: ChemBFN) -> None:
+    """
+    In-place dynamic quantisation of the trained model to `int8` data type. \n
+    Due to some limitations of `torchao` module, it is slower than method previded by `torch.ao`.
+    :param model: trained ChemBFN model
+    :type model: bayesianflow_for_chem.model.ChemBFN
+    :return:
+    :rtype: None
+    """
+    from torchao.quantization.quant_api import (
+        quantize_,
+        Int8DynamicActivationInt8WeightConfig,
+    )
+    quantize_(model, Int8DynamicActivationInt8WeightConfig())
 class GeometryConverter:
     """
     Converting between different 2D/3D molecular representations.
@@ -539,40 +563,88 @@ class GeometryConverter:
             xyz_block.append(f"{atom} {r[i][0]:.10f} {r[i][1]:.10f} {r[i][2]:.10f}")
         return MolFromXYZBlock("\n".join(xyz_block))
-    @staticmethod
-    def _bond_pair_idx(bonds: Bond) -> List[List[int]]:
-        return [[i.GetBeginAtomIdx(), i.GetEndAtomIdx()] for i in bonds]
     @staticmethod
     def smiles2cartesian(
-        smiles: str, num_conformers: int = 50, random_seed: int = 42
+        smiles: str,
+        num_conformers: int = 50,
+        rdkit_ff_type: str = "MMFF",
+        refine_with_crest: bool = False,
+        spin: float = 0.0,
     ) -> Tuple[List[str], np.ndarray]:
         """
         Guess the 3D geometry from SMILES string via MMFF conformer search.
         :param smiles: a valid SMILES string
         :param num_conformers: number of initial conformers
-        :param random_seed: random seed used to generate conformers
+        :param rdkit_ff_type: force field type chosen in `'MMFF'` and `'UFF'`
+        :param refine_with_crest: find the best conformer via CREST
+        :param spin: total spin; only required when `refine_with_crest=True`
         :type smiles: str
         :type num_conformers: int
-        :type random_seed: int
+        :type rdkit_ff_type: str
+        :type refine_with_crest: bool
+        :type spin: float
         :return: atomic symbols \n
                  cartesian coordinates;  shape: (n_a, 3)
         :rtype: tuple
         """
+        assert rdkit_ff_type.lower() in ("mmff", "uff")
+        if refine_with_crest:
+            from tempfile import TemporaryDirectory
+            from subprocess import run
+            # We need both CREST and xTB installed.
+            if run("crest --version", shell=True).returncode != 0:
+                raise RuntimeError(
+                    "`CREST` is not found! Make sure it is installed and added into the PATH."
+                )
+            if run("xtb --version", shell=True).returncode != 0:
+                raise RuntimeError(
+                    "`xTB` is not found! Make sure it is installed and added into the PATH."
+                )
         mol = MolFromSmiles(smiles)
         mol = AddHs(mol)
-        AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, randomSeed=random_seed)
+        AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, params=AllChem.ETKDG())
         symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
         energies = []
         for conf_id in range(num_conformers):
-            ff = AllChem.MMFFGetMoleculeForceField(
-                mol, AllChem.MMFFGetMoleculeProperties(mol), confId=conf_id
-            )
+            if rdkit_ff_type.lower() == "mmff":
+                ff = AllChem.MMFFGetMoleculeForceField(
+                    mol, AllChem.MMFFGetMoleculeProperties(mol), confId=conf_id
+                )
+            else:  # UFF
+                ff = AllChem.UFFGetMoleculeForceField(mol, confId=conf_id)
             energy = ff.CalcEnergy()
             energies.append((conf_id, energy))
         lowest_energy_conf = min(energies, key=lambda x: x[1])
         coordinates = mol.GetConformer(id=lowest_energy_conf[0]).GetPositions()
+        if refine_with_crest:
+            xyz = f"{len(symbols)}\n\n" + "\n".join(
+                f"{s} {coordinates[i][0]:.10f} {coordinates[i][1]:.10f} {coordinates[i][2]:.10f}"
+                for i, s in enumerate(symbols)
+            )
+            chrg = GetFormalCharge(mol)
+            uhf = int(spin * 2)
+            with TemporaryDirectory(dir=Path.cwd()) as temp_dir:
+                with open(Path(temp_dir) / "mol.xyz", "w", encoding="utf-8") as f:
+                    f.write(xyz)
+                s = run(
+                    f"crest mol.xyz -gfn2 -quick -prop ohess{f' --chrg {chrg}' if chrg != 0 else ''}{f' --uhf {uhf}' if uhf != 0 else ''}",
+                    shell=True,
+                    cwd=temp_dir,
+                )
+                if s.returncode == 0:
+                    with open(Path(temp_dir) / "crest_property.xyz", "r") as f:
+                        xyz = f.readlines()
+                    xyz_data = []
+                    for i in xyz[2:]:
+                        if i == xyz[0]:
+                            break
+                        xyz_data.append(i.strip().split())
+                    xyz_data = np.array(xyz_data)
+                    symbols, coordinates = np.split(xyz_data, [1], axis=-1)
+                    symbols = symbols.flatten().tolist()
+                    coordinates = coordinates.astype(np.float64)
         return symbols, coordinates
     def cartesian2smiles(

{bayesianflow_for_chem-1.4.0.dist-info → bayesianflow_for_chem-1.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 1.4.0
+Version: 1.4.2
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao
@@ -23,6 +23,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: rdkit>=2023.9.6
 Requires-Dist: torch>=2.3.1
+Requires-Dist: torchao>=0.12
 Requires-Dist: numpy>=1.26.4
 Requires-Dist: loralib>=0.1.2
 Requires-Dist: lightning>=2.2.0

bayesianflow_for_chem-1.4.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+bayesianflow_for_chem/__init__.py,sha256=IeIasLe6wLuGbH7DIlB38ehDPqvlMBT388hf58I3J30,329
+bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
+bayesianflow_for_chem/model.py,sha256=6pxGuIM7rKyawcz2hI8dT88rv3qFsnCvlLhDj1CB9YU,50595
+bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
+bayesianflow_for_chem/tool.py,sha256=Ne_ew1P8r6KWOqUZpb-BL_q7Dm6fnSTtxhJvgV1JHHs,26264
+bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
+bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
+bayesianflow_for_chem-1.4.2.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+bayesianflow_for_chem-1.4.2.dist-info/METADATA,sha256=s6k85HFXvasxvZBJD3Rj8cFNJXehS-utcMeKC6tP8F8,5673
+bayesianflow_for_chem-1.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+bayesianflow_for_chem-1.4.2.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
+bayesianflow_for_chem-1.4.2.dist-info/RECORD,,

bayesianflow_for_chem-1.4.0.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-bayesianflow_for_chem/__init__.py,sha256=3sP8nM4_idOX-ksvpBJEApxPAVAPijKvQHxidTO5790,329
-bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
-bayesianflow_for_chem/model.py,sha256=fUrXKhn2U9FrVPJyb4lqACqPTePkIgI0v6_1jPs5c0Q,50784
-bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
-bayesianflow_for_chem/tool.py,sha256=NMMRHk2FJY0fyA76zCrz6tkcylCuExMUMj5hohWTnkE,23155
-bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
-bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
-bayesianflow_for_chem-1.4.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-bayesianflow_for_chem-1.4.0.dist-info/METADATA,sha256=1Y5mLIOaPsHcyCCm2SkWz7OCniQYVJ67-cVq3cUU0Mw,5643
-bayesianflow_for_chem-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-bayesianflow_for_chem-1.4.0.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
-bayesianflow_for_chem-1.4.0.dist-info/RECORD,,

{bayesianflow_for_chem-1.4.0.dist-info → bayesianflow_for_chem-1.4.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{bayesianflow_for_chem-1.4.0.dist-info → bayesianflow_for_chem-1.4.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{bayesianflow_for_chem-1.4.0.dist-info → bayesianflow_for_chem-1.4.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

bayesianflow-for-chem 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

Potentially problematic release.

bayesianflow-for-chem 1.4.0py3-none-any.whl → 1.4.2py3-none-any.whl