PyPI - bayesianflow-for-chem - Versions diffs - 2.0.2__tar.gz → 2.0.3__tar.gz - Mend

bayesianflow-for-chem 2.0.2tar.gz → 2.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bayesianflow-for-chem might be problematic. Click here for more details.

Files changed (22) hide show

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.2
+Version: 2.0.3
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao
@@ -23,6 +23,7 @@ License-File: LICENSE
 Requires-Dist: rdkit>=2025.3.5
 Requires-Dist: torch>=2.8.0
 Requires-Dist: torchao>=0.12
+Requires-Dist: colorama>=0.4.6
 Requires-Dist: numpy>=2.3.2
 Requires-Dist: scipy>=1.16.1
 Requires-Dist: loralib>=0.1.2
@@ -49,6 +50,11 @@ Dynamic: summary
 This is the repository of the PyTorch implementation of ChemBFN model.
+## Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/README.md RENAMED Viewed

@@ -5,6 +5,11 @@
 This is the repository of the PyTorch implementation of ChemBFN model.
+## Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/__init__.py RENAMED Viewed

@@ -3,6 +3,7 @@
 """
 ChemBFN package.
 """
+import colorama
 from . import data, tool, train, scorer, spectra
 from .model import ChemBFN, MLP, EnsembleChemBFN
 from .cli import main_script
@@ -17,7 +18,7 @@ __all__ = [
     "MLP",
     "EnsembleChemBFN",
 ]
-__version__ = "2.0.2"
+__version__ = "2.0.3"
 __author__ = "Nianze A. Tao (Omozawa Sueno)"
@@ -28,4 +29,6 @@ def main() -> None:
     :return:
     :rtype: None
     """
+    colorama.just_fix_windows_console()
     main_script(__version__)
+    colorama.deinit()

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/cli.py RENAMED Viewed

@@ -180,14 +180,16 @@ def load_model_config(
         model_config = tomllib.load(f)
     if model_config["ChemBFN"]["num_vocab"] != "match vocabulary size":
         if not isinstance(model_config["ChemBFN"]["num_vocab"], int):
-            print(f"Critical in {config_file}: You must specify num_vocab.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: You must specify num_vocab."
+            )
             flag_critical += 1
     if model_config["ChemBFN"]["base_model"]:
         model_file = model_config["ChemBFN"]["base_model"]
         for fn in model_file:
             if not os.path.exists(fn):
                 print(
-                    f"Critical in {config_file}: Base model file {fn} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
                 )
                 flag_critical += 1
     if "MLP" in model_config:
@@ -195,14 +197,14 @@ def load_model_config(
         b = model_config["MLP"]["size"][-1]
         if a != b:
             print(
-                f"Critical in {config_file}: MLP hidden size {b} should match ChemBFN hidden size {a}."
+                f"\033[0;31mCritical\033[0;0m in {config_file}: MLP hidden size {b} should match ChemBFN hidden size {a}."
             )
             flag_critical += 1
         if model_config["MLP"]["base_model"]:
             model_file = model_config["MLP"]["base_model"]
             if not os.path.exists(model_file):
                 print(
-                    f"Critical in {config_file}: Base model file {fn} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
                 )
                 flag_critical += 1
     return model_config, flag_critical, flag_warning
@@ -226,49 +228,61 @@ def load_runtime_config(
         config = tomllib.load(f)
     tokeniser_name = config["tokeniser"]["name"].lower()
     if not tokeniser_name in "smiles selfies safe fasta".split():
-        print(f"Critical in {config_file}: Unknown tokensier name: {tokeniser_name}.")
+        print(
+            f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown tokensier name: {tokeniser_name}."
+        )
         flag_critical += 1
     if tokeniser_name == "selfies":
         vocab = config["tokeniser"]["vocab"]
         if vocab.lower() == "default":
-            print(f"Critical in {config_file}: You should specify a vocabulary file.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: You should specify a vocabulary file."
+            )
             flag_critical += 1
         elif not os.path.exists(vocab):
-            print(f"Critical in {config_file}: Vocabulary file {vocab} does not exist.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Vocabulary file {vocab} does not exist."
+            )
             flag_critical += 1
     if "train" in config:
         dataset_file = config["train"]["dataset"]
         if not os.path.exists(dataset_file):
             print(
-                f"Critical in {config_file}: Dataset file {dataset_file} does not exist."
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Dataset file {dataset_file} does not exist."
             )
             flag_critical += 1
         logger_name = config["train"]["logger_name"].lower()
         if not logger_name in "csv tensorboard wandb".split():
-            print(f"Critical in {config_file}: Unknown logger: {logger_name}.")
+            print(
+                f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown logger: {logger_name}."
+            )
             flag_critical += 1
         if config["train"]["restart"]:
             ckpt_file = config["train"]["restart"]
             if not os.path.exists(ckpt_file):
                 print(
-                    f"Critical in {config_file}: Restart checkpoint file {ckpt_file} does not exist."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: Restart checkpoint file {ckpt_file} does not exist."
                 )
                 flag_critical += 1
     if "inference" in config:
         if not "train" in config:
             if not isinstance(config["inference"]["sequence_length"], int):
                 print(
-                    f"Critical in {config_file}: You must set an integer for sequence_length."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: You must set an integer for sequence_length."
                 )
                 flag_critical += 1
         if config["inference"]["guidance_objective"]:
             if not "guidance_objective_strength" in config["inference"]:
                 print(
-                    f"Critical in {config_file}: You need to add guidance_objective_strength."
+                    f"\033[0;31mCritical\033[0;0m in {config_file}: You need to add guidance_objective_strength."
                 )
                 flag_critical += 1
         result_dir = Path(config["inference"]["result_file"]).parent
-        assert os.path.exists(result_dir), f"directory {result_dir} does not exist."
+        if not os.path.exists(result_dir):
+            print(
+                f"\033[0;33mWarning\033[0;0m in {config_file}: Directory {result_dir} to save the result does not exist."
+            )
+            flag_warning += 1
     return config, flag_critical, flag_warning
@@ -306,7 +320,7 @@ def main_script(version: str) -> None:
         if runtime_config["train"]["enable_lora"]:
             if not model_config["ChemBFN"]["base_model"]:
                 print(
-                    f"Warning in {parser.model_config}: You should load a pretrained model first."
+                    f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained model first."
                 )
                 flag_warning += 1
         if not os.path.exists(runtime_config["train"]["checkpoint_save_path"]):
@@ -314,12 +328,12 @@ def main_script(version: str) -> None:
     else:
         if not model_config["ChemBFN"]["base_model"]:
             print(
-                f"Warning in {parser.model_config}: You should load a pretrained ChemBFN model."
+                f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained ChemBFN model."
             )
             flag_warning += 1
         if not model_config["MLP"]["base_model"]:
             print(
-                f"Warning in {parser.model_config}: You should load a pretrained MLP."
+                f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained MLP."
             )
             flag_warning += 1
     if "inference" in runtime_config:

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/tool.py RENAMED Viewed

@@ -9,6 +9,7 @@ import warnings
 from pathlib import Path
 from typing import List, Dict, Tuple, Union, Optional
 import torch
+import colorama
 import numpy as np
 from torch import cuda, Tensor, softmax
 from torch.utils.data import DataLoader
@@ -141,6 +142,7 @@ def split_dataset(
     assert file.endswith(".csv")
     assert len(split_ratio) == 3
     assert method in ("random", "scaffold")
+    colorama.just_fix_windows_console()
     with open(file, "r") as f:
         data = list(csv.reader(f))
     header = data[0]
@@ -198,6 +200,7 @@ def split_dataset(
     with open(file.replace(".csv", "_val.csv"), "w", newline="") as fva:
         writer = csv.writer(fva)
         writer.writerows([header] + val_set)
+    colorama.deinit()
 @torch.no_grad()

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bayesianflow_for_chem
-Version: 2.0.2
+Version: 2.0.3
 Summary: Bayesian flow network framework for Chemistry
 Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
 Author: Nianze A. Tao
@@ -23,6 +23,7 @@ License-File: LICENSE
 Requires-Dist: rdkit>=2025.3.5
 Requires-Dist: torch>=2.8.0
 Requires-Dist: torchao>=0.12
+Requires-Dist: colorama>=0.4.6
 Requires-Dist: numpy>=2.3.2
 Requires-Dist: scipy>=1.16.1
 Requires-Dist: loralib>=0.1.2
@@ -49,6 +50,11 @@ Dynamic: summary
 This is the repository of the PyTorch implementation of ChemBFN model.
+## Build State
+[![PyPI](https://img.shields.io/pypi/v/bayesianflow-for-chem?color=ff69b4)](https://pypi.org/project/bayesianflow-for-chem/)
+![pytest](https://github.com/Augus1999/bayesian-flow-network-for-chemistry/actions/workflows/pytest.yml/badge.svg)
 ## Features
 ChemBFN provides the state-of-the-art functionalities of

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/SOURCES.txt RENAMED Viewed

@@ -16,4 +16,5 @@ bayesianflow_for_chem.egg-info/SOURCES.txt
 bayesianflow_for_chem.egg-info/dependency_links.txt
 bayesianflow_for_chem.egg-info/entry_points.txt
 bayesianflow_for_chem.egg-info/requires.txt
-bayesianflow_for_chem.egg-info/top_level.txt
+bayesianflow_for_chem.egg-info/top_level.txt
+test/test_molecular_embedding.py

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 rdkit>=2025.3.5
 torch>=2.8.0
 torchao>=0.12
+colorama>=0.4.6
 numpy>=2.3.2
 scipy>=1.16.1
 loralib>=0.1.2

{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/setup.py RENAMED Viewed

@@ -55,6 +55,7 @@ setup(
         "rdkit>=2025.3.5",
         "torch>=2.8.0",
         "torchao>=0.12",
+        "colorama>=0.4.6",
         "numpy>=2.3.2",
         "scipy>=1.16.1",
         "loralib>=0.1.2",

bayesianflow_for_chem-2.0.3/test/test_molecular_embedding.py ADDED Viewed

@@ -0,0 +1,62 @@
+from functools import partial
+import torch
+from bayesianflow_for_chem import ChemBFN, MLP
+from bayesianflow_for_chem.data import VOCAB_COUNT, smiles2token
+torch.manual_seed(8964)
+model = ChemBFN(VOCAB_COUNT)
+model.eval()
+mlp1 = MLP([512, 256, 3], dropout=0.7)
+mlp1.eval()
+mlp2 = MLP([1024, 512, 3], dropout=0.7)
+mlp2.eval()
+x = smiles2token("c1ccccc1O.[NH4+]CCCCCC[O-]")
+x1 = x[None, ...]
+x2 = torch.nn.functional.pad(x1, (0, 7, 0, 0))
+def embed_fn(z, sar_flag, mask, x):
+    mb0 = z[x == 2].view(z.shape[0], -1) if sar_flag else z[::, 0]
+    mb1 = (z * mask[..., None]).sum(1) / (mask != 0).float().sum(1, True)
+    return torch.cat([mb0, mb1], -1)
+@torch.inference_mode()
+def test():
+    model.semi_autoregressive = False
+    y1 = model.inference(x1, mlp1)
+    y2 = model.inference(x2, mlp1)
+    assert (y1 != y2).sum() == 0
+    model.semi_autoregressive = True
+    y1 = model.inference(x1, mlp1)
+    y2 = model.inference(x2, mlp1)
+    assert (y1 != y2).sum() == 0
+    # ------- customised embedding extraction -------
+    mask1 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0]])
+    mask2 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0] * 8])
+    model.semi_autoregressive = False
+    y1 = model.inference(
+        x1,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
+    )
+    y2 = model.inference(
+        x2,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
+    )
+    assert (y1 != y2).sum() == 0
+    model.semi_autoregressive = True
+    y1 = model.inference(
+        x1,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
+    )
+    y2 = model.inference(
+        x2,
+        mlp2,
+        partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
+    )
+    assert (y1 != y2).sum() == 0