bayesianflow-for-chem 2.0.2__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bayesianflow-for-chem might be problematic. Click here for more details.
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/PKG-INFO +7 -1
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/README.md +5 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/__init__.py +4 -1
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/cli.py +30 -16
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/tool.py +3 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/PKG-INFO +7 -1
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/SOURCES.txt +2 -1
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/requires.txt +1 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/setup.py +1 -0
- bayesianflow_for_chem-2.0.3/test/test_molecular_embedding.py +62 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/LICENSE +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/data.py +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/model.py +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/scorer.py +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/spectra.py +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/train.py +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/vocab.txt +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/dependency_links.txt +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/entry_points.txt +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/top_level.txt +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/pyproject.toml +0 -0
- {bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bayesianflow_for_chem
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: Bayesian flow network framework for Chemistry
|
|
5
5
|
Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -23,6 +23,7 @@ License-File: LICENSE
|
|
|
23
23
|
Requires-Dist: rdkit>=2025.3.5
|
|
24
24
|
Requires-Dist: torch>=2.8.0
|
|
25
25
|
Requires-Dist: torchao>=0.12
|
|
26
|
+
Requires-Dist: colorama>=0.4.6
|
|
26
27
|
Requires-Dist: numpy>=2.3.2
|
|
27
28
|
Requires-Dist: scipy>=1.16.1
|
|
28
29
|
Requires-Dist: loralib>=0.1.2
|
|
@@ -49,6 +50,11 @@ Dynamic: summary
|
|
|
49
50
|
|
|
50
51
|
This is the repository of the PyTorch implementation of ChemBFN model.
|
|
51
52
|
|
|
53
|
+
## Build State
|
|
54
|
+
|
|
55
|
+
[](https://pypi.org/project/bayesianflow-for-chem/)
|
|
56
|
+

|
|
57
|
+
|
|
52
58
|
## Features
|
|
53
59
|
|
|
54
60
|
ChemBFN provides the state-of-the-art functionalities of
|
|
@@ -5,6 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
This is the repository of the PyTorch implementation of ChemBFN model.
|
|
7
7
|
|
|
8
|
+
## Build State
|
|
9
|
+
|
|
10
|
+
[](https://pypi.org/project/bayesianflow-for-chem/)
|
|
11
|
+

|
|
12
|
+
|
|
8
13
|
## Features
|
|
9
14
|
|
|
10
15
|
ChemBFN provides the state-of-the-art functionalities of
|
{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/__init__.py
RENAMED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
"""
|
|
4
4
|
ChemBFN package.
|
|
5
5
|
"""
|
|
6
|
+
import colorama
|
|
6
7
|
from . import data, tool, train, scorer, spectra
|
|
7
8
|
from .model import ChemBFN, MLP, EnsembleChemBFN
|
|
8
9
|
from .cli import main_script
|
|
@@ -17,7 +18,7 @@ __all__ = [
|
|
|
17
18
|
"MLP",
|
|
18
19
|
"EnsembleChemBFN",
|
|
19
20
|
]
|
|
20
|
-
__version__ = "2.0.
|
|
21
|
+
__version__ = "2.0.3"
|
|
21
22
|
__author__ = "Nianze A. Tao (Omozawa Sueno)"
|
|
22
23
|
|
|
23
24
|
|
|
@@ -28,4 +29,6 @@ def main() -> None:
|
|
|
28
29
|
:return:
|
|
29
30
|
:rtype: None
|
|
30
31
|
"""
|
|
32
|
+
colorama.just_fix_windows_console()
|
|
31
33
|
main_script(__version__)
|
|
34
|
+
colorama.deinit()
|
|
@@ -180,14 +180,16 @@ def load_model_config(
|
|
|
180
180
|
model_config = tomllib.load(f)
|
|
181
181
|
if model_config["ChemBFN"]["num_vocab"] != "match vocabulary size":
|
|
182
182
|
if not isinstance(model_config["ChemBFN"]["num_vocab"], int):
|
|
183
|
-
print(
|
|
183
|
+
print(
|
|
184
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: You must specify num_vocab."
|
|
185
|
+
)
|
|
184
186
|
flag_critical += 1
|
|
185
187
|
if model_config["ChemBFN"]["base_model"]:
|
|
186
188
|
model_file = model_config["ChemBFN"]["base_model"]
|
|
187
189
|
for fn in model_file:
|
|
188
190
|
if not os.path.exists(fn):
|
|
189
191
|
print(
|
|
190
|
-
f"
|
|
192
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
|
|
191
193
|
)
|
|
192
194
|
flag_critical += 1
|
|
193
195
|
if "MLP" in model_config:
|
|
@@ -195,14 +197,14 @@ def load_model_config(
|
|
|
195
197
|
b = model_config["MLP"]["size"][-1]
|
|
196
198
|
if a != b:
|
|
197
199
|
print(
|
|
198
|
-
f"
|
|
200
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: MLP hidden size {b} should match ChemBFN hidden size {a}."
|
|
199
201
|
)
|
|
200
202
|
flag_critical += 1
|
|
201
203
|
if model_config["MLP"]["base_model"]:
|
|
202
204
|
model_file = model_config["MLP"]["base_model"]
|
|
203
205
|
if not os.path.exists(model_file):
|
|
204
206
|
print(
|
|
205
|
-
f"
|
|
207
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Base model file {fn} does not exist."
|
|
206
208
|
)
|
|
207
209
|
flag_critical += 1
|
|
208
210
|
return model_config, flag_critical, flag_warning
|
|
@@ -226,49 +228,61 @@ def load_runtime_config(
|
|
|
226
228
|
config = tomllib.load(f)
|
|
227
229
|
tokeniser_name = config["tokeniser"]["name"].lower()
|
|
228
230
|
if not tokeniser_name in "smiles selfies safe fasta".split():
|
|
229
|
-
print(
|
|
231
|
+
print(
|
|
232
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown tokensier name: {tokeniser_name}."
|
|
233
|
+
)
|
|
230
234
|
flag_critical += 1
|
|
231
235
|
if tokeniser_name == "selfies":
|
|
232
236
|
vocab = config["tokeniser"]["vocab"]
|
|
233
237
|
if vocab.lower() == "default":
|
|
234
|
-
print(
|
|
238
|
+
print(
|
|
239
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: You should specify a vocabulary file."
|
|
240
|
+
)
|
|
235
241
|
flag_critical += 1
|
|
236
242
|
elif not os.path.exists(vocab):
|
|
237
|
-
print(
|
|
243
|
+
print(
|
|
244
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Vocabulary file {vocab} does not exist."
|
|
245
|
+
)
|
|
238
246
|
flag_critical += 1
|
|
239
247
|
if "train" in config:
|
|
240
248
|
dataset_file = config["train"]["dataset"]
|
|
241
249
|
if not os.path.exists(dataset_file):
|
|
242
250
|
print(
|
|
243
|
-
f"
|
|
251
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Dataset file {dataset_file} does not exist."
|
|
244
252
|
)
|
|
245
253
|
flag_critical += 1
|
|
246
254
|
logger_name = config["train"]["logger_name"].lower()
|
|
247
255
|
if not logger_name in "csv tensorboard wandb".split():
|
|
248
|
-
print(
|
|
256
|
+
print(
|
|
257
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Unknown logger: {logger_name}."
|
|
258
|
+
)
|
|
249
259
|
flag_critical += 1
|
|
250
260
|
if config["train"]["restart"]:
|
|
251
261
|
ckpt_file = config["train"]["restart"]
|
|
252
262
|
if not os.path.exists(ckpt_file):
|
|
253
263
|
print(
|
|
254
|
-
f"
|
|
264
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: Restart checkpoint file {ckpt_file} does not exist."
|
|
255
265
|
)
|
|
256
266
|
flag_critical += 1
|
|
257
267
|
if "inference" in config:
|
|
258
268
|
if not "train" in config:
|
|
259
269
|
if not isinstance(config["inference"]["sequence_length"], int):
|
|
260
270
|
print(
|
|
261
|
-
f"
|
|
271
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: You must set an integer for sequence_length."
|
|
262
272
|
)
|
|
263
273
|
flag_critical += 1
|
|
264
274
|
if config["inference"]["guidance_objective"]:
|
|
265
275
|
if not "guidance_objective_strength" in config["inference"]:
|
|
266
276
|
print(
|
|
267
|
-
f"
|
|
277
|
+
f"\033[0;31mCritical\033[0;0m in {config_file}: You need to add guidance_objective_strength."
|
|
268
278
|
)
|
|
269
279
|
flag_critical += 1
|
|
270
280
|
result_dir = Path(config["inference"]["result_file"]).parent
|
|
271
|
-
|
|
281
|
+
if not os.path.exists(result_dir):
|
|
282
|
+
print(
|
|
283
|
+
f"\033[0;33mWarning\033[0;0m in {config_file}: Directory {result_dir} to save the result does not exist."
|
|
284
|
+
)
|
|
285
|
+
flag_warning += 1
|
|
272
286
|
return config, flag_critical, flag_warning
|
|
273
287
|
|
|
274
288
|
|
|
@@ -306,7 +320,7 @@ def main_script(version: str) -> None:
|
|
|
306
320
|
if runtime_config["train"]["enable_lora"]:
|
|
307
321
|
if not model_config["ChemBFN"]["base_model"]:
|
|
308
322
|
print(
|
|
309
|
-
f"
|
|
323
|
+
f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained model first."
|
|
310
324
|
)
|
|
311
325
|
flag_warning += 1
|
|
312
326
|
if not os.path.exists(runtime_config["train"]["checkpoint_save_path"]):
|
|
@@ -314,12 +328,12 @@ def main_script(version: str) -> None:
|
|
|
314
328
|
else:
|
|
315
329
|
if not model_config["ChemBFN"]["base_model"]:
|
|
316
330
|
print(
|
|
317
|
-
f"
|
|
331
|
+
f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained ChemBFN model."
|
|
318
332
|
)
|
|
319
333
|
flag_warning += 1
|
|
320
334
|
if not model_config["MLP"]["base_model"]:
|
|
321
335
|
print(
|
|
322
|
-
f"
|
|
336
|
+
f"\033[0;33mWarning\033[0;0m in {parser.model_config}: You should load a pretrained MLP."
|
|
323
337
|
)
|
|
324
338
|
flag_warning += 1
|
|
325
339
|
if "inference" in runtime_config:
|
|
@@ -9,6 +9,7 @@ import warnings
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import List, Dict, Tuple, Union, Optional
|
|
11
11
|
import torch
|
|
12
|
+
import colorama
|
|
12
13
|
import numpy as np
|
|
13
14
|
from torch import cuda, Tensor, softmax
|
|
14
15
|
from torch.utils.data import DataLoader
|
|
@@ -141,6 +142,7 @@ def split_dataset(
|
|
|
141
142
|
assert file.endswith(".csv")
|
|
142
143
|
assert len(split_ratio) == 3
|
|
143
144
|
assert method in ("random", "scaffold")
|
|
145
|
+
colorama.just_fix_windows_console()
|
|
144
146
|
with open(file, "r") as f:
|
|
145
147
|
data = list(csv.reader(f))
|
|
146
148
|
header = data[0]
|
|
@@ -198,6 +200,7 @@ def split_dataset(
|
|
|
198
200
|
with open(file.replace(".csv", "_val.csv"), "w", newline="") as fva:
|
|
199
201
|
writer = csv.writer(fva)
|
|
200
202
|
writer.writerows([header] + val_set)
|
|
203
|
+
colorama.deinit()
|
|
201
204
|
|
|
202
205
|
|
|
203
206
|
@torch.no_grad()
|
{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bayesianflow_for_chem
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: Bayesian flow network framework for Chemistry
|
|
5
5
|
Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -23,6 +23,7 @@ License-File: LICENSE
|
|
|
23
23
|
Requires-Dist: rdkit>=2025.3.5
|
|
24
24
|
Requires-Dist: torch>=2.8.0
|
|
25
25
|
Requires-Dist: torchao>=0.12
|
|
26
|
+
Requires-Dist: colorama>=0.4.6
|
|
26
27
|
Requires-Dist: numpy>=2.3.2
|
|
27
28
|
Requires-Dist: scipy>=1.16.1
|
|
28
29
|
Requires-Dist: loralib>=0.1.2
|
|
@@ -49,6 +50,11 @@ Dynamic: summary
|
|
|
49
50
|
|
|
50
51
|
This is the repository of the PyTorch implementation of ChemBFN model.
|
|
51
52
|
|
|
53
|
+
## Build State
|
|
54
|
+
|
|
55
|
+
[](https://pypi.org/project/bayesianflow-for-chem/)
|
|
56
|
+

|
|
57
|
+
|
|
52
58
|
## Features
|
|
53
59
|
|
|
54
60
|
ChemBFN provides the state-of-the-art functionalities of
|
|
@@ -16,4 +16,5 @@ bayesianflow_for_chem.egg-info/SOURCES.txt
|
|
|
16
16
|
bayesianflow_for_chem.egg-info/dependency_links.txt
|
|
17
17
|
bayesianflow_for_chem.egg-info/entry_points.txt
|
|
18
18
|
bayesianflow_for_chem.egg-info/requires.txt
|
|
19
|
-
bayesianflow_for_chem.egg-info/top_level.txt
|
|
19
|
+
bayesianflow_for_chem.egg-info/top_level.txt
|
|
20
|
+
test/test_molecular_embedding.py
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
import torch
|
|
3
|
+
from bayesianflow_for_chem import ChemBFN, MLP
|
|
4
|
+
from bayesianflow_for_chem.data import VOCAB_COUNT, smiles2token
|
|
5
|
+
|
|
6
|
+
torch.manual_seed(8964)
|
|
7
|
+
|
|
8
|
+
model = ChemBFN(VOCAB_COUNT)
|
|
9
|
+
model.eval()
|
|
10
|
+
mlp1 = MLP([512, 256, 3], dropout=0.7)
|
|
11
|
+
mlp1.eval()
|
|
12
|
+
mlp2 = MLP([1024, 512, 3], dropout=0.7)
|
|
13
|
+
mlp2.eval()
|
|
14
|
+
|
|
15
|
+
x = smiles2token("c1ccccc1O.[NH4+]CCCCCC[O-]")
|
|
16
|
+
x1 = x[None, ...]
|
|
17
|
+
x2 = torch.nn.functional.pad(x1, (0, 7, 0, 0))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def embed_fn(z, sar_flag, mask, x):
|
|
21
|
+
mb0 = z[x == 2].view(z.shape[0], -1) if sar_flag else z[::, 0]
|
|
22
|
+
mb1 = (z * mask[..., None]).sum(1) / (mask != 0).float().sum(1, True)
|
|
23
|
+
return torch.cat([mb0, mb1], -1)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@torch.inference_mode()
|
|
27
|
+
def test():
|
|
28
|
+
model.semi_autoregressive = False
|
|
29
|
+
y1 = model.inference(x1, mlp1)
|
|
30
|
+
y2 = model.inference(x2, mlp1)
|
|
31
|
+
assert (y1 != y2).sum() == 0
|
|
32
|
+
model.semi_autoregressive = True
|
|
33
|
+
y1 = model.inference(x1, mlp1)
|
|
34
|
+
y2 = model.inference(x2, mlp1)
|
|
35
|
+
assert (y1 != y2).sum() == 0
|
|
36
|
+
# ------- customised embedding extraction -------
|
|
37
|
+
mask1 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0]])
|
|
38
|
+
mask2 = torch.tensor([[0] + [0.7] * 9 + [0] + [0.3] * 16 + [0] * 8])
|
|
39
|
+
model.semi_autoregressive = False
|
|
40
|
+
y1 = model.inference(
|
|
41
|
+
x1,
|
|
42
|
+
mlp2,
|
|
43
|
+
partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
|
|
44
|
+
)
|
|
45
|
+
y2 = model.inference(
|
|
46
|
+
x2,
|
|
47
|
+
mlp2,
|
|
48
|
+
partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
|
|
49
|
+
)
|
|
50
|
+
assert (y1 != y2).sum() == 0
|
|
51
|
+
model.semi_autoregressive = True
|
|
52
|
+
y1 = model.inference(
|
|
53
|
+
x1,
|
|
54
|
+
mlp2,
|
|
55
|
+
partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask1, x=x1),
|
|
56
|
+
)
|
|
57
|
+
y2 = model.inference(
|
|
58
|
+
x2,
|
|
59
|
+
mlp2,
|
|
60
|
+
partial(embed_fn, sar_flag=model.semi_autoregressive, mask=mask2, x=x2),
|
|
61
|
+
)
|
|
62
|
+
assert (y1 != y2).sum() == 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bayesianflow_for_chem-2.0.2 → bayesianflow_for_chem-2.0.3}/bayesianflow_for_chem/spectra.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|