bayesianflow-for-chem 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bayesianflow_for_chem/__init__.py +1 -1
- bayesianflow_for_chem/cli.py +22 -7
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/METADATA +2 -3
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/RECORD +8 -8
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/WHEEL +0 -0
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/entry_points.txt +0 -0
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/top_level.txt +0 -0
bayesianflow_for_chem/cli.py
CHANGED
|
@@ -78,6 +78,7 @@ epoch = 100
|
|
|
78
78
|
batch_size = 512
|
|
79
79
|
semi_autoregressive = false
|
|
80
80
|
enable_lora = false
|
|
81
|
+
dynamic_padding = false # only set to true when pretraining a model
|
|
81
82
|
restart = "" # or a checkpoint file in absolute path
|
|
82
83
|
dataset = "home/user/project/dataset/qm9.csv"
|
|
83
84
|
molecule_tag = "smiles"
|
|
@@ -107,13 +108,24 @@ exclude_duplicate = true # to only store unique samples
|
|
|
107
108
|
result_file = "home/user/project/result/result.csv"
|
|
108
109
|
"""
|
|
109
110
|
|
|
111
|
+
_MESSAGE = r"""
|
|
112
|
+
madmadmadmadmadmadmadmadmadmadmadmadmadmadmad
|
|
113
|
+
__ __ __ ____ __ __ _____ __
|
|
114
|
+
( \/ ) /__\ ( _ \( \/ )( _ )( )
|
|
115
|
+
) ( /(__)\ )(_) )) ( )(_)( )(__
|
|
116
|
+
(_/\/\_)(__)(__)(____/(_/\/\_)(_____)(____)
|
|
117
|
+
Version {}
|
|
118
|
+
madmadmadmadmadmadmadmadmadmadmadmadmadmadmad
|
|
119
|
+
"""
|
|
120
|
+
|
|
110
121
|
|
|
111
122
|
def parse_cli(version: str) -> argparse.Namespace:
|
|
112
123
|
parser = argparse.ArgumentParser(
|
|
113
|
-
description="
|
|
124
|
+
description="Madmol: a CLI molecular design tool for "
|
|
114
125
|
"de novo design and R-group replacement, "
|
|
115
|
-
"based on generative route of ChemBFN method."
|
|
116
|
-
|
|
126
|
+
"based on generative route of ChemBFN method. "
|
|
127
|
+
"Let's make some craziest molecules.",
|
|
128
|
+
epilog=f"Madmol {version}, developed in Hiroshima University",
|
|
117
129
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
118
130
|
)
|
|
119
131
|
parser.add_argument(
|
|
@@ -289,6 +301,7 @@ def main_script(version: str) -> None:
|
|
|
289
301
|
return
|
|
290
302
|
if flag_critical != 0:
|
|
291
303
|
raise RuntimeError
|
|
304
|
+
print(_MESSAGE.format(version))
|
|
292
305
|
# ####### build tokeniser #######
|
|
293
306
|
tokeniser_config = runtime_config["tokeniser"]
|
|
294
307
|
tokeniser_name = tokeniser_config["name"].lower()
|
|
@@ -369,7 +382,6 @@ def main_script(version: str) -> None:
|
|
|
369
382
|
_mol = ".".join([i[j] for j in mol_idx])
|
|
370
383
|
_data_len.append(tokeniser(_mol).shape[-1])
|
|
371
384
|
lmax = max(_data_len)
|
|
372
|
-
print(f"maximum sequence length: {lmax}")
|
|
373
385
|
dataset = CSVData(dataset_file)
|
|
374
386
|
dataset.map(
|
|
375
387
|
partial(_encode, mol_tag=mol_tag, obj_tag=obj_tag, tokeniser=tokeniser)
|
|
@@ -420,12 +432,13 @@ def main_script(version: str) -> None:
|
|
|
420
432
|
)
|
|
421
433
|
# ####### build model #######
|
|
422
434
|
if runtime_config["train"]["enable_lora"]:
|
|
423
|
-
bfn.enable_lora(
|
|
435
|
+
bfn.enable_lora(bfn.hparam["channel"] // 128)
|
|
424
436
|
model = Model(bfn, mlp, scorer)
|
|
425
437
|
model.model.semi_autoregressive = runtime_config["train"]["semi_autoregressive"]
|
|
426
438
|
# ####### strat training #######
|
|
427
439
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
|
428
|
-
|
|
440
|
+
if not runtime_config["train"]["dynamic_padding"]:
|
|
441
|
+
os.environ["MAX_PADDING_LENGTH"] = f"{lmax}" # important!
|
|
429
442
|
torch.set_float32_matmul_precision("medium")
|
|
430
443
|
trainer.fit(
|
|
431
444
|
model,
|
|
@@ -442,7 +455,9 @@ def main_script(version: str) -> None:
|
|
|
442
455
|
"padding_index": 0,
|
|
443
456
|
"start_index": 1,
|
|
444
457
|
"end_index": 2,
|
|
445
|
-
"padding_strategy":
|
|
458
|
+
"padding_strategy": (
|
|
459
|
+
"dynamic" if runtime_config["train"]["dynamic_padding"] else "static"
|
|
460
|
+
),
|
|
446
461
|
"padding_length": lmax,
|
|
447
462
|
"label": obj_tag,
|
|
448
463
|
"name": runtime_config["run_name"],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bayesianflow_for_chem
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Bayesian flow network framework for Chemistry
|
|
5
5
|
Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -12,10 +12,9 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
|
13
13
|
Classifier: Natural Language :: English
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
18
|
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
20
|
Requires-Python: >=3.11
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
bayesianflow_for_chem/__init__.py,sha256=
|
|
2
|
-
bayesianflow_for_chem/cli.py,sha256=
|
|
1
|
+
bayesianflow_for_chem/__init__.py,sha256=gkNLgOEBxs_WfxSVgEJ0u5zPAlfPezvtYUuFZoXLCFE,464
|
|
2
|
+
bayesianflow_for_chem/cli.py,sha256=HPg_XbRqD3ViJ9q90X3TnnwI8RnWRFGyBI13eMBTQX8,21024
|
|
3
3
|
bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
|
|
4
4
|
bayesianflow_for_chem/model.py,sha256=qpRDEJR9ZhQOP9PSY5IVIG71w-Qg0sZnV-Oo35WvY20,50872
|
|
5
5
|
bayesianflow_for_chem/scorer.py,sha256=gQFUlkyxitch02ntqcRh1ZS8aondKLynW5U6NfTQTb4,4084
|
|
@@ -7,9 +7,9 @@ bayesianflow_for_chem/spectra.py,sha256=Ba9ib1aDvTtDYbH3b4d-lIty3ZSQMu7jwehuV2Km
|
|
|
7
7
|
bayesianflow_for_chem/tool.py,sha256=hjzeUlYrpHwCjyJR6conG8OoCfyHZdxmZyv0NePY6C4,20273
|
|
8
8
|
bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
|
|
9
9
|
bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
|
|
10
|
-
bayesianflow_for_chem-2.0.
|
|
11
|
-
bayesianflow_for_chem-2.0.
|
|
12
|
-
bayesianflow_for_chem-2.0.
|
|
13
|
-
bayesianflow_for_chem-2.0.
|
|
14
|
-
bayesianflow_for_chem-2.0.
|
|
15
|
-
bayesianflow_for_chem-2.0.
|
|
10
|
+
bayesianflow_for_chem-2.0.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
11
|
+
bayesianflow_for_chem-2.0.1.dist-info/METADATA,sha256=YuVMnLW0z8OLzUmR4d90CsygNvqqisp-v3SrDQxxa70,5611
|
|
12
|
+
bayesianflow_for_chem-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
bayesianflow_for_chem-2.0.1.dist-info/entry_points.txt,sha256=N63RMoJsr8rxuKxc7Fj802SL8J5AlpCoPkS8E3IFPLI,54
|
|
14
|
+
bayesianflow_for_chem-2.0.1.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
|
|
15
|
+
bayesianflow_for_chem-2.0.1.dist-info/RECORD,,
|
|
File without changes
|
{bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{bayesianflow_for_chem-2.0.0.dist-info → bayesianflow_for_chem-2.0.1.dist-info}/top_level.txt
RENAMED
|
File without changes
|