PyPI - bayesianflow-for-chem - Versions diffs - 2.0.5__tar.gz → 2.1.0__tar.gz - Mend

bayesianflow-for-chem 2.0.5tar.gz → 2.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bayesianflow-for-chem might be problematic. Click here for more details.

Files changed (24) hide show

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.5
+Version: 2.1.0
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem/__init__.py RENAMED Viewed

@@ -18,7 +18,7 @@ __all__ = [
     "MLP",
     "EnsembleChemBFN",
 ]
-__version__ = "2.0.5"
+__version__ = "2.1.0"
 __author__ = "Nianze A. Tao (Omozawa Sueno)"

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem/cli.py RENAMED Viewed

@@ -32,7 +32,7 @@ from bayesianflow_for_chem.data import (
     collate,
     CSVData,
 )
-from bayesianflow_for_chem.tool import sample, inpaint
+from bayesianflow_for_chem.tool import sample, inpaint, optimise, adjust_lora_
 """
@@ -99,9 +99,11 @@ sample_size = 1000  # the minimum number of samples you want
 sample_step = 100
 sample_method = "ODE:0.5"  # ODE-solver with temperature of 0.5; another choice is "BFN"
 semi_autoregressive = false
+lora_scaling = 1.0  # LoRA scaling if applied
 guidance_objective = [-0.023, 0.09, 0.113]  # if no objective is needed set it to empty array []
 guidance_objective_strength = 4.0  # unnecessary if guidance_objective = []
 guidance_scaffold = "c1ccccc1"  # if no scaffold is used set it to empty string ""
+sample_template = ""  # template for mol2mol task; leave it blank if scaffold is used
 unwanted_token = []
 exclude_invalid = true  # to only store valid samples
 exclude_duplicate = true  # to only store unique samples
@@ -130,7 +132,7 @@ def parse_cli(version: str) -> argparse.Namespace:
     """
     parser = argparse.ArgumentParser(
         description="Madmol: a CLI molecular design tool for "
-        "de novo design, R-group replacement, and sequence in-filling, "
+        "de novo design, R-group replacement, molecule optimisation, and sequence in-filling, "
         "based on generative route of ChemBFN method. "
         "Let's make some craziest molecules.",
         epilog=f"Madmol {version}, developed in Hiroshima University by chemists for chemists. "
@@ -157,7 +159,7 @@ def parse_cli(version: str) -> argparse.Namespace:
         "-D",
         "--dryrun",
         action="store_true",
-        help="dry-run to check the configurations",
+        help="dry-run to check the configurations and exit",
     )
     parser.add_argument("-V", "--version", action="version", version=version)
     return parser.parse_args()
@@ -284,6 +286,14 @@ def load_runtime_config(
                 f"\033[0;33mWarning\033[0;0m in {config_file}: Directory {result_dir} to save the result does not exist."
             )
             flag_warning += 1
+        if (
+            config["inference"]["guidance_scaffold"] != ""
+            and config["inference"]["sample_template"] != ""
+        ):
+            print(
+                f"\033[0;33mWarning\033[0;0m in {config_file}: Inpaint task or mol2mol task?"
+            )
+            flag_warning += 1
     return config, flag_critical, flag_warning
@@ -520,6 +530,7 @@ def main_script(version: str) -> None:
         if "train" in runtime_config:
             bfn = model.model
             mlp = model.mlp
+        lora_scaling = runtime_config["inference"].get("lora_scaling", 1.0)
         # ####### strat inference #######
         bfn.semi_autoregressive = runtime_config["inference"]["semi_autoregressive"]
         _device = (
@@ -550,8 +561,16 @@ def main_script(version: str) -> None:
                 x[:-1], (0, sequence_length - x.shape[-1] + 1), value=0
             )
             x = x[None, :].repeat(batch_size, 1)
+            # then sample template will be ignored.
+        elif runtime_config["inference"]["sample_template"]:
+            template = runtime_config["inference"]["sample_template"]
+            x = tokeniser(template)
+            x = torch.nn.functional.pad(x, (0, sequence_length - x.shape[-1]), value=0)
+            x = x[None, :].repeat(batch_size, 1)
         else:
             x = None
+        if bfn.lora_enabled:
+            adjust_lora_(bfn, lora_scaling)
         mols = []
         while len(mols) < runtime_config["inference"]["sample_size"]:
             if x is None:
@@ -567,7 +586,7 @@ def main_script(version: str) -> None:
                     method=sample_method,
                     allowed_tokens=allowed_token,
                 )
-            else:
+            elif runtime_config["inference"]["guidance_scaffold"]:
                 s = inpaint(
                     bfn,
                     x,
@@ -579,6 +598,18 @@ def main_script(version: str) -> None:
                     method=sample_method,
                     allowed_tokens=allowed_token,
                 )
+            else:
+                s = optimise(
+                    bfn,
+                    x,
+                    sample_step,
+                    y,
+                    guidance_strength,
+                    _device,
+                    vocab_keys,
+                    method=sample_method,
+                    allowed_tokens=allowed_token,
+                )
             if runtime_config["inference"]["exclude_invalid"]:
                 s = [i for i in s if i]
                 if tokeniser_name == "smiles" or tokeniser_name == "safe":

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem/model.py RENAMED Viewed

@@ -676,7 +676,7 @@ class ChemBFN(nn.Module):
         token_mask: Optional[Tensor] = None,
     ) -> Tuple[Tensor, Tensor]:
         """
-        Sample from a piror distribution.
+        Sample from a uniform piror distribution.
         :param batch_size: batch size
         :param sequence_size: max sequence length
@@ -880,6 +880,108 @@ class ChemBFN(nn.Module):
             p = p.masked_fill_(token_mask, 0.0)
         return torch.argmax(p, -1), entropy
+    @torch.jit.export
+    def optimise(
+        self,
+        x: Tensor,
+        y: Optional[Tensor] = None,
+        sample_step: int = 100,
+        guidance_strength: float = 4.0,
+        token_mask: Optional[Tensor] = None,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        Optimise the template molecule (mol2mol). \n
+        This method is equivalent to sampling from a customised prior distribution.
+        :param x: categorical indices of template;  shape: (n_b, n_t)
+        :param y: conditioning vector;              shape: (n_b, 1, n_f) or (n_b, n_f)
+        :param sample_step: number of sampling steps
+        :param guidance_strength: strength of conditional generation. It is not used if y is null.
+        :param token_mask: token mask assigning unwanted token(s) with `True`;
+                                                    shape: (1, 1, n_vocab)
+        :type x: torch.Tensor
+        :type y: torch.Tensor | None
+        :type sample_step: int
+        :type guidance_strength: float
+        :type token_mask: torch.Tensor | None
+        :return: sampled token indices;             shape: (n_b, n_t) \n
+                 entropy of the tokens;             shape: (n_b)
+        :rtype: tuple
+        """
+        n_b = x.shape[0]
+        x_onehot = nn.functional.one_hot(x, self.K).float()
+        theta = nn.functional.softmax(x_onehot, -1)
+        if y is not None:
+            y = self.reshape_y(y)
+        for i in torch.linspace(1, sample_step, sample_step, device=x.device):
+            t = (i - 1).view(1, 1, 1).repeat(n_b, 1, 1) / sample_step
+            p = self.discrete_output_distribution(theta, t, y, guidance_strength)
+            if token_mask is not None:
+                p = p.masked_fill_(token_mask, 0.0)
+            alpha = self.calc_discrete_alpha(t, t + 1 / sample_step)
+            e_k = nn.functional.one_hot(torch.argmax(p, -1), self.K).float()
+            mu = alpha * (self.K * e_k - 1)
+            sigma = (alpha * self.K).sqrt()
+            theta = (mu + sigma * torch.randn_like(mu)).exp() * theta
+            theta = theta / theta.sum(-1, True)
+        t_final = torch.ones((n_b, 1, 1), device=x.device)
+        p = self.discrete_output_distribution(theta, t_final, y, guidance_strength)
+        entropy = -(p * p.log()).sum(-1).mean(-1)
+        if token_mask is not None:
+            p = p.masked_fill_(token_mask, 0.0)
+        return torch.argmax(p, -1), entropy
+    @torch.jit.export
+    def ode_optimise(
+        self,
+        x: Tensor,
+        y: Optional[Tensor] = None,
+        sample_step: int = 100,
+        guidance_strength: float = 4.0,
+        token_mask: Optional[Tensor] = None,
+        temperature: float = 0.5,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        ODE mol2mol.
+        :param x: categorical indices of template;  shape: (n_b, n_t)
+        :param y: conditioning vector;              shape: (n_b, 1, n_f) or (n_b, n_f)
+        :param sample_step: number of sampling steps
+        :param guidance_strength: strength of conditional generation. It is not used if y is null.
+        :param token_mask: token mask assigning unwanted token(s) with `True`;
+                                                    shape: (1, 1, n_vocab)
+        :param temperature: sampling temperature
+        :type x: torch.Tensor
+        :type y: torch.Tensor | None
+        :type sample_step: int
+        :type guidance_strength: float
+        :type token_mask: torch.Tensor | None
+        :type temperature: float
+        :return: sampled token indices;             shape: (n_b, n_t) \n
+                 entropy of the tokens;             shape: (n_b)
+        :rtype: tuple
+        """
+        n_b = x.shape[0]
+        z = nn.functional.one_hot(x, self.K).float()
+        if y is not None:
+            y = self.reshape_y(y)
+        for i in torch.linspace(1, sample_step, sample_step, device=self.beta.device):
+            t = (i - 1).view(1, 1, 1).repeat(n_b, 1, 1) / sample_step
+            theta = torch.softmax(z, -1)
+            beta = self.calc_beta(t + 1 / sample_step)
+            p = self.discrete_output_distribution(theta, t, y, guidance_strength)
+            if token_mask is not None:
+                p = p.masked_fill_(token_mask, 0.0)
+            u = torch.randn_like(z)
+            z = (self.K * p - 1) * beta + (self.K * beta * temperature).sqrt() * u
+        t_final = torch.ones((n_b, 1, 1), device=self.beta.device)
+        theta = torch.softmax(z, -1)
+        p = self.discrete_output_distribution(theta, t_final, y, guidance_strength)
+        entropy = -(p * p.log()).sum(-1).mean(-1)
+        if token_mask is not None:
+            p = p.masked_fill_(token_mask, 0.0)
+        return torch.argmax(p, -1), entropy
     def inference(
         self, x: Tensor, mlp: MLP, embed_fn: Optional[Callable[[Tensor], Tensor]] = None
     ) -> Tensor:
@@ -1250,6 +1352,71 @@ class EnsembleChemBFN(ChemBFN):
             x, y, sample_step, guidance_strength, token_mask, temperature
         )
+    @torch.inference_mode()
+    def optimise(
+        self,
+        x: Tensor,
+        conditions: Union[List[Tensor], Dict[str, Tensor]],
+        sample_step: int = 100,
+        guidance_strength: float = 4.0,
+        token_mask: Optional[Tensor] = None,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        Optimise the template molecule (mol2mol). \n
+        This method is equivalent to sampling from a customised prior distribution.
+        :param x: categorical indices of template;  shape: (n_b, n_t)
+        :param conditions: conditioning vector;     shape: (n_b, n_c) * n_h
+        :param sample_step: number of sampling steps
+        :param guidance_strength: strength of conditional generation. It is not used if y is null.
+        :param token_mask: token mask assigning unwanted token(s) with `True`;
+                                                    shape: (1, 1, n_vocab)
+        :type x: torch.Tensor
+        :type y: torch.Tensor | None
+        :type sample_step: int
+        :type guidance_strength: float
+        :type token_mask: torch.Tensor | None
+        :return: sampled token indices;             shape: (n_b, n_t) \n
+                 entropy of the tokens;             shape: (n_b)
+        :rtype: tuple
+        """
+        y = self.construct_y(conditions)
+        return super().optimise(x, y, sample_step, guidance_strength, token_mask)
+    @torch.inference_mode()
+    def ode_optimise(
+        self,
+        x: Tensor,
+        conditions: Union[List[Tensor], Dict[str, Tensor]],
+        sample_step: int = 100,
+        guidance_strength: float = 4.0,
+        token_mask: Optional[Tensor] = None,
+        temperature: float = 0.5,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        ODE inpainting.
+        :param x: categorical indices of template;  shape: (n_b, n_t)
+        :param conditions: conditioning vector;     shape: (n_b, n_c) * n_h
+        :param sample_step: number of sampling steps
+        :param guidance_strength: strength of conditional generation. It is not used if y is null.
+        :param token_mask: token mask;              shape: (1, 1, n_vocab)
+        :param temperature: sampling temperature
+        :type x: torch.Tensor
+        :type conditions: list | dict
+        :type sample_step: int
+        :type guidance_strength: float
+        :type token_mask: torch.Tensor | None
+        :type temperature: float
+        :return: sampled token indices;             shape: (n_b, n_t) \n
+                 entropy of the tokens;             shape: (n_b)
+        :rtype: tuple
+        """
+        y = self.construct_y(conditions)
+        return super().ode_optimise(
+            x, y, sample_step, guidance_strength, token_mask, temperature
+        )
     def quantise(
         self, quantise_method: Optional[Callable[[ChemBFN], None]] = None
     ) -> None:

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem/tool.py RENAMED Viewed

@@ -219,7 +219,7 @@ def sample(
     sort: bool = False,
 ) -> List[str]:
     """
-    Sampling.
+    Sampling molecules.
     :param model: trained ChemBFN model
     :param batch_size: batch size
@@ -385,6 +385,96 @@ def inpaint(
     ]
+@torch.no_grad()
+def optimise(
+    model: Union[ChemBFN, EnsembleChemBFN],
+    x: Tensor,
+    sample_step: int = 100,
+    y: Optional[Union[Tensor, Dict[str, Tensor], List[Tensor]]] = None,
+    guidance_strength: float = 4.0,
+    device: Union[str, torch.device, None] = None,
+    vocab_keys: List[str] = VOCAB_KEYS,
+    separator: str = "",
+    method: str = "BFN",
+    allowed_tokens: Union[str, List[str]] = "all",
+    sort: bool = False,
+) -> List[str]:
+    """
+    Optimising template molecules (mol2mol).
+    :param model: trained ChemBFN model
+    :param x: categorical indices of template;  shape: (n_b, n_t)
+    :param sample_step: number of sampling steps
+    :param y: conditioning vector;              shape: (n_b, 1, n_f) or (n_b, n_f) \n
+              or a list/`dict` of conditions;   shape: (n_b, n_c) * n_h
+    :param guidance_strength: strength of conditional generation. It is not used if y is null.
+    :param device: hardware accelerator
+    :param vocab_keys: a list of (ordered) vocabulary
+    :param separator: token separator; default is `""`
+    :param method: sampling method chosen from `"ODE:x"` or `"BFN"` where `x` is the value of sampling temperature; default is `"BFN"`
+    :param allowed_tokens: a list of allowed tokens; default is `"all"`
+    :param sort: whether to sort the samples according to entropy values; default is `False`
+    :type model: bayesianflow_for_chem.model.ChemBFN | bayesianflow_for_chem.model.EnsembleChemBFN
+    :type x: torch.Tensor
+    :type sample_step: int
+    :type y: torch.Tensor | list | dict | None
+    :type guidance_strength: float
+    :type device: str | torch.device | None
+    :type vocab_keys: list
+    :type separator: str
+    :type method: str
+    :type allowed_tokens: str | list
+    :type sort: bool
+    :return: a list of generated molecular strings
+    :rtype: list
+    """
+    assert method.split(":")[0].lower() in ("ode", "bfn")
+    if isinstance(model, EnsembleChemBFN):
+        assert y is not None, "conditioning is required while using an ensemble model."
+        assert isinstance(y, list) or isinstance(y, dict)
+    else:
+        assert isinstance(y, Tensor) or y is None
+    if device is None:
+        device = _find_device()
+    model.to(device).eval()
+    x = x.to(device)
+    if y is not None:
+        if isinstance(y, Tensor):
+            y = y.to(device)
+        elif isinstance(y, list):
+            y = [i.to(device) for i in y]
+        elif isinstance(y, dict):
+            y = {k: v.to(device) for k, v in y.items()}
+        else:
+            raise NotImplementedError
+    if isinstance(allowed_tokens, list):
+        token_mask = [0 if i in allowed_tokens else 1 for i in vocab_keys]
+        token_mask = torch.tensor([[token_mask]], dtype=torch.bool).to(device)
+    else:
+        token_mask = None
+    if "ode" in method.lower():
+        tp = float(method.split(":")[-1])
+        assert tp > 0, "Sampling temperature should be higher than 0."
+        tokens, entropy = model.ode_optimise(
+            x, y, sample_step, guidance_strength, token_mask, tp
+        )
+    else:
+        tokens, entropy = model.optimise(
+            x, y, sample_step, guidance_strength, token_mask
+        )
+    if sort:
+        sorted_idx = entropy.argsort(stable=True)
+        tokens = tokens[sorted_idx]
+    return [
+        separator.join([vocab_keys[i] for i in j])
+        .split("<start>" + separator)[-1]
+        .split(separator + "<end>")[0]
+        .replace("<pad>", "")
+        for j in tokens
+    ]
 def quantise_model_(model: ChemBFN) -> None:
     """
     In-place dynamic quantisation of the trained model to `int8` data type. \n

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.5
+Version: 2.1.0
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao

{bayesianflow_for_chem-2.0.5 → bayesianflow_for_chem-2.1.0}/bayesianflow_for_chem.egg-info/SOURCES.txt RENAMED Viewed

@@ -17,5 +17,6 @@ bayesianflow_for_chem.egg-info/dependency_links.txt
 bayesianflow_for_chem.egg-info/entry_points.txt
 bayesianflow_for_chem.egg-info/requires.txt
 bayesianflow_for_chem.egg-info/top_level.txt
+test/test_jit_compatibility.py
 test/test_merge_lora.py
 test/test_molecular_embedding.py

bayesianflow_for_chem-2.1.0/test/test_jit_compatibility.py ADDED Viewed

@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+# Author: Nianze A. Tao (Omozawa Sueno)
+"""
+Model should be compatible with TorchScript.
+"""
+import torch
+from bayesianflow_for_chem import ChemBFN
+model = ChemBFN(512)
+model_method = [
+    "sample",
+    "ode_sample",
+    "inpaint",
+    "ode_inpaint",
+    "optimise",
+    "ode_optimise",
+]
+@torch.inference_mode()
+def test():
+    jit_model = torch.jit.script(model).eval()
+    assert isinstance(jit_model, torch.jit.ScriptModule)
+    for method in model_method:
+        assert hasattr(jit_model, method)
+    jit_model = torch.jit.freeze(jit_model, model_method)
+    for method in model_method:
+        assert hasattr(jit_model, method)