PyPI - bayesianflow-for-chem - Versions diffs - 2.0.2__tar.gz → 2.0.4__tar.gz - Mend

bayesianflow-for-chem 2.0.2tar.gz → 2.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bayesianflow-for-chem might be problematic. Click here for more details.

Files changed (23) hide show

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.2
+Version: 2.0.4
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao
@@ -23,6 +23,7 @@ License-File: LICENSE
 Requires-Dist: rdkit>=2025.3.5
 Requires-Dist: torch>=2.8.0
 Requires-Dist: torchao>=0.12
+Requires-Dist: colorama>=0.4.6
 Requires-Dist: numpy>=2.3.2
 Requires-Dist: scipy>=1.16.1
 Requires-Dist: loralib>=0.1.2
@@ -49,6 +50,11 @@ Dynamic: summary
 This is the repository of the PyTorch implementation of ChemBFN model.
+### Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/README.md RENAMED Viewed

@@ -5,6 +5,11 @@
 This is the repository of the PyTorch implementation of ChemBFN model.
+### Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem/__init__.py RENAMED Viewed

@@ -3,6 +3,7 @@
 """
 ChemBFN package.
 """
+import colorama
 from . import data, tool, train, scorer, spectra
 from .model import ChemBFN, MLP, EnsembleChemBFN
 from .cli import main_script
@@ -17,7 +18,7 @@ __all__ = [
     "MLP",
     "EnsembleChemBFN",
 ]
-__version__ = "2.0.2"
+__version__ = "2.0.4"
 __author__ = "Nianze A. Tao (Omozawa Sueno)"
@@ -28,4 +29,6 @@ def main() -> None:
     :return:
     :rtype: None
     """
+    colorama.just_fix_windows_console()
     main_script(__version__)
+    colorama.deinit()

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem/cli.py RENAMED Viewed

@@ -130,10 +130,11 @@ def parse_cli(version: str) -> argparse.Namespace:
     """
     parser = argparse.ArgumentParser(
         description="Madmol: a CLI molecular design tool for "
-        "de novo design and R-group replacement, "
+        "de novo design, R-group replacement, and sequence in-filling, "
         "based on generative route of ChemBFN method. "
         "Let's make some craziest molecules.",
-        epilog=f"Madmol {version}, developed in Hiroshima University",
+        epilog=f"Madmol {version}, developed in Hiroshima University by chemists for chemists. "
+        "Visit https://augus1999.github.io/bayesian-flow-network-for-chemistry/ for more details.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     parser.add_argument(
@@ -180,14 +181,16 @@ def load_model_config(
         model_config = tomllib.load(f)
     if model_config["ChemBFN"]["num_vocab"] != "match vocabulary size":
         if not isinstance(model_config["ChemBFN"]["num_vocab"], int):
-            print(f"Critical in {config_file}: You must specify num_vocab.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: You must specify num_vocab."
+            )
             flag_critical += 1
     if model_config["ChemBFN"]["base_model"]:
         model_file = model_config["ChemBFN"]["base_model"]
         for fn in model_file:
             if not os.path.exists(fn):
                 print(
-                    f"Critical in {config_file}: Base model file {fn} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
                 )
                 flag_critical += 1
     if "MLP" in model_config:
@@ -195,14 +198,14 @@ def load_model_config(
         b = model_config["MLP"]["size"][-1]
         if a != b:
             print(
-                f"Critical in {config_file}: MLP hidden size {b} should match ChemBFN hidden size {a}."
+                f"\033[0;31mCritical\033[0;0m in {config_file}: MLP hidden size {b} should match ChemBFN hidden size {a}."
             )
             flag_critical += 1
         if model_config["MLP"]["base_model"]:
             model_file = model_config["MLP"]["base_model"]
             if not os.path.exists(model_file):
                 print(
-                    f"Critical in {config_file}: Base model file {fn} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
                 )
                 flag_critical += 1
     return model_config, flag_critical, flag_warning
@@ -226,49 +229,61 @@ def load_runtime_config(
         config = tomllib.load(f)
     tokeniser_name = config["tokeniser"]["name"].lower()
     if not tokeniser_name in "smiles selfies safe fasta".split():
-        print(f"Critical in {config_file}: Unknown tokensier name: {tokeniser_name}.")
+        print(
+            f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown tokensier name: {tokeniser_name}."
+        )
         flag_critical += 1
     if tokeniser_name == "selfies":
         vocab = config["tokeniser"]["vocab"]
         if vocab.lower() == "default":
-            print(f"Critical in {config_file}: You should specify a vocabulary file.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: You should specify a vocabulary file."
+            )
             flag_critical += 1
         elif not os.path.exists(vocab):
-            print(f"Critical in {config_file}: Vocabulary file {vocab} does not exist.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Vocabulary file {vocab} does not exist."
+            )
             flag_critical += 1
     if "train" in config:
         dataset_file = config["train"]["dataset"]
         if not os.path.exists(dataset_file):
             print(
-                f"Critical in {config_file}: Dataset file {dataset_file} does not exist."
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Dataset file {dataset_file} does not exist."
             )
             flag_critical += 1
         logger_name = config["train"]["logger_name"].lower()
         if not logger_name in "csv tensorboard wandb".split():
-            print(f"Critical in {config_file}: Unknown logger: {logger_name}.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown logger: {logger_name}."
+            )
             flag_critical += 1
         if config["train"]["restart"]:
             ckpt_file = config["train"]["restart"]
             if not os.path.exists(ckpt_file):
                 print(
-                    f"Critical in {config_file}: Restart checkpoint file {ckpt_file} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Restart checkpoint file {ckpt_file} does not exist."
                 )
                 flag_critical += 1
     if "inference" in config:
         if not "train" in config:
             if not isinstance(config["inference"]["sequence_length"], int):
                 print(
-                    f"Critical in {config_file}: You must set an integer for sequence_length."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: You must set an integer for sequence_length."
                 )
                 flag_critical += 1
         if config["inference"]["guidance_objective"]:
             if not "guidance_objective_strength" in config["inference"]:
                 print(
-                    f"Critical in {config_file}: You need to add guidance_objective_strength."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: You need to add guidance_objective_strength."
                 )
                 flag_critical += 1
         result_dir = Path(config["inference"]["result_file"]).parent
-        assert os.path.exists(result_dir), f"directory {result_dir} does not exist."
+        if not os.path.exists(result_dir):
+            print(
+                f"\033[0;33mWarning\033[0;0m in {config_file}: Directory {result_dir} to save the result does not exist."
+            )
+            flag_warning += 1
     return config, flag_critical, flag_warning
@@ -306,7 +321,7 @@ def main_script(version: str) -> None:
         if runtime_config["train"]["enable_lora"]:
             if not model_config["ChemBFN"]["base_model"]:
                 print(
-                    f"Warning in {parser.model_config}: You should load a pretrained model first."
+                    f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained model first."
                 )
                 flag_warning += 1
         if not os.path.exists(runtime_config["train"]["checkpoint_save_path"]):
@@ -314,12 +329,12 @@ def main_script(version: str) -> None:
     else:
         if not model_config["ChemBFN"]["base_model"]:
             print(
-                f"Warning in {parser.model_config}: You should load a pretrained ChemBFN model."
+                f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained ChemBFN model."
             )
             flag_warning += 1
         if not model_config["MLP"]["base_model"]:
             print(
-                f"Warning in {parser.model_config}: You should load a pretrained MLP."
+                f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained MLP."
             )
             flag_warning += 1
     if "inference" in runtime_config:

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem/model.py RENAMED Viewed

@@ -1038,6 +1038,19 @@ class EnsembleChemBFN(ChemBFN):
         self.__delattr__("lora_enabled")
         self.__delattr__("lora_param")
         self.__delattr__("hparam")
+        # ------- merge LoRA parameters to reduce the latency -------
+        for _, v in self.models.items():
+            for module in v.modules():
+                if hasattr(module, "lora_A"):
+                    module.weight.data += (
+                        module.lora_B @ module.lora_A
+                    ) * module.scaling
+                    module.lora_enabled = False
+                    module.lora_A = None
+                    module.lora_B = None
+                    module.scaling = None
+                    module.lora_dropout = None
+            v.lora_enabled = False
     def construct_y(
         self, c: Union[List[Tensor], Dict[str, Tensor]]

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem/tool.py RENAMED Viewed

@@ -9,6 +9,7 @@ import warnings
 from pathlib import Path
 from typing import List, Dict, Tuple, Union, Optional
 import torch
+import colorama
 import numpy as np
 from torch import cuda, Tensor, softmax
 from torch.utils.data import DataLoader
@@ -141,6 +142,7 @@ def split_dataset(
     assert file.endswith(".csv")
     assert len(split_ratio) == 3
     assert method in ("random", "scaffold")
+    colorama.just_fix_windows_console()
     with open(file, "r") as f:
         data = list(csv.reader(f))
     header = data[0]
@@ -198,6 +200,7 @@ def split_dataset(
     with open(file.replace(".csv", "_val.csv"), "w", newline="") as fva:
         writer = csv.writer(fva)
         writer.writerows([header] + val_set)
+    colorama.deinit()
 @torch.no_grad()
@@ -467,7 +470,7 @@ class GeometryConverter:
         spin: float = 0.0,
     ) -> Tuple[List[str], np.ndarray]:
         """
-        Guess the 3D geometry from SMILES string via MMFF conformer search.
+        Guess the 3D geometry from SMILES string via conformer search.
         :param smiles: a valid SMILES string
         :param num_conformers: number of initial conformers

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem/train.py RENAMED Viewed

@@ -134,7 +134,7 @@ class Regressor(LightningModule):
         hparam: Dict[str, Union[str, int, float, bool]] = DEFAULT_REGRESSOR_HPARAM,
     ) -> None:
         """
-        A `~lightning.LightningModule` wrapper of bayesian flow network for chemistry regression model.\n
+        A `~lightning.LightningModule` wrapper of bayesian flow network for chemistry regression or classification model.\n
         This module is used in training stage only. By calling `Regressor(...).export_model(YOUR_WORK_DIR)` after training,
         the models will be saved to `YOUR_WORK_DIR/model_ft.pt` (if LoRA is enabled then `YOUR_WORK_DIR/lora.pt`)
         and `YOUR_WORK_DIR/readout.pt`.

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.2
+Version: 2.0.4
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao
@@ -23,6 +23,7 @@ License-File: LICENSE
 Requires-Dist: rdkit>=2025.3.5
 Requires-Dist: torch>=2.8.0
 Requires-Dist: torchao>=0.12
+Requires-Dist: colorama>=0.4.6
 Requires-Dist: numpy>=2.3.2
 Requires-Dist: scipy>=1.16.1
 Requires-Dist: loralib>=0.1.2
@@ -49,6 +50,11 @@ Dynamic: summary
 This is the repository of the PyTorch implementation of ChemBFN model.
+### Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem.egg-info/SOURCES.txt RENAMED Viewed

@@ -16,4 +16,6 @@ bayesianflow_for_chem.egg-info/SOURCES.txt
 bayesianflow_for_chem.egg-info/dependency_links.txt
 bayesianflow_for_chem.egg-info/entry_points.txt
 bayesianflow_for_chem.egg-info/requires.txt
-bayesianflow_for_chem.egg-info/top_level.txt
+bayesianflow_for_chem.egg-info/top_level.txt
+test/test_merge_lora.py
+test/test_molecular_embedding.py

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/bayesianflow_for_chem.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 rdkit>=2025.3.5
 torch>=2.8.0
 torchao>=0.12
+colorama>=0.4.6
 numpy>=2.3.2
 scipy>=1.16.1
 loralib>=0.1.2

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.4}/setup.py RENAMED Viewed

@@ -55,6 +55,7 @@ setup(
         "rdkit>=2025.3.5",
         "torch>=2.8.0",
         "torchao>=0.12",
+        "colorama>=0.4.6",
         "numpy>=2.3.2",
         "scipy>=1.16.1",
         "loralib>=0.1.2",

bayesianflow_for_chem-2.0.4/test/test_merge_lora.py ADDED Viewed

@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# Author: Nianze A. Tao (Omozawa Sueno)
+"""
+Model output should be almost identical before and after emerging LoRA parameters into base model.
+"""
+import torch
+from bayesianflow_for_chem import ChemBFN, MLP
+from bayesianflow_for_chem.tool import merge_lora_
+from bayesianflow_for_chem.data import VOCAB_COUNT, smiles2token, collate
+torch.manual_seed(8964)
+model = ChemBFN(VOCAB_COUNT)
+model.enable_lora(r=8)
+model.eval()
+mlp = MLP([512, 256, 3], dropout=0.7)
+mlp.eval()
+for module in model.modules():
+    if hasattr(module, "lora_B"):
+        torch.nn.init.kaiming_uniform_(module.lora_B, a=5**0.5)
+x = collate(
+    [{"token": smiles2token("c1ccccc1O")}, {"token": smiles2token("[NH4+]CCCCCC[O-]")}]
+)["token"]
+@torch.inference_mode()
+def test():
+    model.semi_autoregressive = False
+    y1 = model.inference(x, mlp)
+    model.semi_autoregressive = True
+    y2 = model.inference(x, mlp)
+    merge_lora_(model)
+    model.semi_autoregressive = False
+    y3 = model.inference(x, mlp)
+    model.semi_autoregressive = True
+    y4 = model.inference(x, mlp)
+    assert not model.lora_enabled
+    assert (y1 - y3).abs().mean() < 1e-6
+    assert (y2 - y4).abs().mean() < 1e-6

bayesianflow_for_chem-2.0.4/test/test_molecular_embedding.py ADDED Viewed

@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+# Author: Nianze A. Tao (Omozawa Sueno)
+"""
+Molecular embedding vectors should not be affected by <pad> tokens.
+"""
+from functools import partial
+import torch
+from bayesianflow_for_chem import ChemBFN, MLP
+from bayesianflow_for_chem.data import VOCAB_COUNT, smiles2token
+torch.manual_seed(8964)
+model = ChemBFN(VOCAB_COUNT)
+model.eval()
+mlp1 = MLP([512, 256, 3], dropout=0.7)
+mlp1.eval()
+mlp2 = MLP([1024, 512, 3], dropout=0.7)
+mlp2.eval()
+x = smiles2token("c1ccccc1O.[NH4+]CCCCCC[O-]")
+x1 = x[None, ...]
+x2 = torch.nn.functional.pad(x1, (0, 7, 0, 0))
+def embed_fn(z, sar_flag, mask, x):
+    mb0 = z[x == 2].view(z.shape[0], -1) if sar_flag else z[::, 0]
+    mb1 = (z * mask[..., None]).sum(1) / (mask != 0).float().sum(1, True)
+    return torch.cat([mb0, mb1], -1)
+@torch.inference_mode()
+def test():
+    model.semi_autoregressive = False
+    y1 = model.inference(x1, mlp1)
+    y2 = model.inference(x2, mlp1)
+    assert (y1 != y2).sum() == 0
+    model.semi_autoregressive = True
+    y1 = model.inference(x1, mlp1)
+    y2 = model.inference(x2, mlp1)
+    assert (y1 != y2).sum() == 0
+    # ------- customised embedding extraction -------
+    mask1 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0]])
+    mask2 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0] * 8])
+    model.semi_autoregressive = False
+    y1 = model.inference(
+        x1,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
+    )
+    y2 = model.inference(
+        x2,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
+    )
+    assert (y1 != y2).sum() == 0
+    model.semi_autoregressive = True
+    y1 = model.inference(
+        x1,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
+    )
+    y2 = model.inference(
+        x2,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
+    )
+    assert (y1 != y2).sum() == 0