chembfn-webui 1.0.0__tar.gz → 2.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/PKG-INFO +34 -6
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/README.md +33 -6
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/bin/app.py +164 -61
- chembfn_webui-2.1.3/chembfn_webui/bin/favicon.png +0 -0
- chembfn_webui-2.1.3/chembfn_webui/cache/results.csv +1 -0
- chembfn_webui-2.1.3/chembfn_webui/lib/utilities.py +419 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/lib/version.py +1 -1
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui.egg-info/PKG-INFO +34 -6
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui.egg-info/SOURCES.txt +4 -1
- chembfn_webui-2.1.3/chembfn_webui.egg-info/requires.txt +5 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/setup.py +43 -13
- chembfn_webui-2.1.3/test/test_prompt_parsing.py +82 -0
- chembfn_webui-2.1.3/test/test_user_expression_validator.py +138 -0
- chembfn_webui-1.0.0/chembfn_webui/cache/results.csv +0 -1
- chembfn_webui-1.0.0/chembfn_webui/lib/utilities.py +0 -210
- chembfn_webui-1.0.0/chembfn_webui.egg-info/requires.txt +0 -5
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/LICENSE +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/__init__.py +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/cache/cache_file_here.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/model/base_model/place_base_model_here.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/model/lora/place_lora_folder_here.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/model/standalone_model/place_standalone_model_folder_here.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui/model/vocab/place_vocabulary_file_here.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui.egg-info/dependency_links.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui.egg-info/entry_points.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/chembfn_webui.egg-info/top_level.txt +0 -0
- {chembfn_webui-1.0.0 → chembfn_webui-2.1.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chembfn_webui
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 2.1.3
|
|
4
4
|
Summary: WebUI for ChemBFN
|
|
5
5
|
Home-page: https://github.com/Augus1999/ChemBFN-WebUI
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -15,15 +15,16 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
19
|
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
19
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
21
|
Requires-Python: >=3.11
|
|
21
22
|
Description-Content-Type: text/markdown
|
|
22
23
|
License-File: LICENSE
|
|
23
|
-
Requires-Dist: bayesianflow_for_chem>=2.
|
|
24
|
-
Requires-Dist: mol2chemfigPy3>=1.5.
|
|
25
|
-
Requires-Dist: gradio
|
|
26
|
-
Requires-Dist: torch>=2.
|
|
24
|
+
Requires-Dist: bayesianflow_for_chem>=2.4.0
|
|
25
|
+
Requires-Dist: mol2chemfigPy3>=1.5.12
|
|
26
|
+
Requires-Dist: gradio<7.0.0,>=6.0.0
|
|
27
|
+
Requires-Dist: torch>=2.9.0
|
|
27
28
|
Requires-Dist: selfies>=2.2.0
|
|
28
29
|
Dynamic: author
|
|
29
30
|
Dynamic: author-email
|
|
@@ -39,10 +40,20 @@ Dynamic: requires-dist
|
|
|
39
40
|
Dynamic: requires-python
|
|
40
41
|
Dynamic: summary
|
|
41
42
|
|
|
42
|
-
##
|
|
43
|
+
## Web-based UI visualisation tool for ChemBFN method
|
|
43
44
|
|
|
45
|
+
[](https://pypi.org/project/chembfn-webui/)
|
|
46
|
+

|
|
47
|
+

|
|
48
|
+
[](https://stand-with-ukraine.pp.ua)
|
|
44
49
|
|
|
45
50
|
|
|
51
|
+
|
|
52
|
+
> Important:
|
|
53
|
+
>
|
|
54
|
+
> For the security concerning, it is not recommended to use this application as a public service.
|
|
55
|
+
> When deploying on a local host as a shared application, it is better to install this application in a container or VM, to prevent this application from accessing the Internet, and to limit the premissions of read, create, and delete loacal files and directories.
|
|
56
|
+
|
|
46
57
|
### 1. Install
|
|
47
58
|
|
|
48
59
|
```bash
|
|
@@ -83,6 +94,22 @@ For example,
|
|
|
83
94
|
└───moses_selfies_vocab.txt
|
|
84
95
|
```
|
|
85
96
|
|
|
97
|
+
> Note:
|
|
98
|
+
>
|
|
99
|
+
> >The file `config.json` is automatically saved by CLI tool `Madmol` provided in `bayesianflow-for-chem` package. If you train models via Python API, you need to manually create that file for your models by filling in the tempate:
|
|
100
|
+
> >```json
|
|
101
|
+
> >{
|
|
102
|
+
> > "padding_index": 0,
|
|
103
|
+
> > "start_index": 1,
|
|
104
|
+
> > "end_index": 2,
|
|
105
|
+
> > "padding_strategy": "static",
|
|
106
|
+
> > "padding_length": PADDING_LENGTH,
|
|
107
|
+
> > "label": [LABEL_NAME_I, LABEL_NAME_II, ...],
|
|
108
|
+
> > "name": JOB_NAME
|
|
109
|
+
> >}
|
|
110
|
+
> >```
|
|
111
|
+
> >The configureation file for base models can be downloaded [here](https://huggingface.co/suenoomozawa/ChemBFN/resolve/main/config.json).
|
|
112
|
+
|
|
86
113
|
If placed correctly, all these files can be seen in the "model explorer" tab.
|
|
87
114
|
|
|
88
115
|
> You can use an external folder to host the models if it follows the same structure as [`chembfn_webui/model`](https://github.com/Augus1999/ChemBFN-WebUI/tree/main/chembfn_webui/model). See the next section for the method.
|
|
@@ -131,6 +158,7 @@ Under "advanced control" tab
|
|
|
131
158
|
|
|
132
159
|
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
133
160
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
161
|
+
* You can customise the result preprocessing function, e.g., the model output a reaction SMILES "CCI.C[O-]>>COCC" which couldn't be recognised by RDKit; you can pass `lambda x: x.split(">>")[-1]` to force the program only looking at the products.
|
|
134
162
|
|
|
135
163
|
### 6. Generate molecules
|
|
136
164
|
|
|
@@ -1,13 +1,23 @@
|
|
|
1
|
-
##
|
|
1
|
+
## Web-based UI visualisation tool for ChemBFN method
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/chembfn-webui/)
|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
[](https://stand-with-ukraine.pp.ua)
|
|
2
7
|
|
|
3
8
|
<p align="left">
|
|
4
|
-
<img src="image/screenshot_0.jpeg" alt="screenshot 0" width="
|
|
5
|
-
<img src="image/screenshot_1.jpeg" alt="screenshot 1" width="
|
|
6
|
-
<img src="image/screenshot_2.jpeg" alt="screenshot 2" width="
|
|
7
|
-
<img src="image/screenshot_3.jpeg" alt="screenshot 3" width="
|
|
8
|
-
<img src="image/screenshot_4.jpeg" alt="screenshot 4" width="
|
|
9
|
+
<img src="image/screenshot_0.jpeg" alt="screenshot 0" width="360" height="auto">
|
|
10
|
+
<img src="image/screenshot_1.jpeg" alt="screenshot 1" width="360" height="auto">
|
|
11
|
+
<img src="image/screenshot_2.jpeg" alt="screenshot 2" width="360" height="auto">
|
|
12
|
+
<img src="image/screenshot_3.jpeg" alt="screenshot 3" width="360" height="auto">
|
|
13
|
+
<img src="image/screenshot_4.jpeg" alt="screenshot 4" width="360" height="auto">
|
|
9
14
|
</p>
|
|
10
15
|
|
|
16
|
+
> [!IMPORTANT]
|
|
17
|
+
>
|
|
18
|
+
> For the security concerning, it is not recommended to use this application as a public service.
|
|
19
|
+
> When deploying on a local host as a shared application, it is better to install this application in a container or VM, to prevent this application from accessing the Internet, and to limit the premissions of read, create, and delete loacal files and directories.
|
|
20
|
+
|
|
11
21
|
### 1. Install
|
|
12
22
|
|
|
13
23
|
```bash
|
|
@@ -48,6 +58,22 @@ For example,
|
|
|
48
58
|
└───moses_selfies_vocab.txt
|
|
49
59
|
```
|
|
50
60
|
|
|
61
|
+
> [!NOTE]
|
|
62
|
+
>
|
|
63
|
+
> >The file `config.json` is automatically saved by CLI tool `Madmol` provided in `bayesianflow-for-chem` package. If you train models via Python API, you need to manually create that file for your models by filling in the tempate:
|
|
64
|
+
> >```json
|
|
65
|
+
> >{
|
|
66
|
+
> > "padding_index": 0,
|
|
67
|
+
> > "start_index": 1,
|
|
68
|
+
> > "end_index": 2,
|
|
69
|
+
> > "padding_strategy": "static",
|
|
70
|
+
> > "padding_length": PADDING_LENGTH,
|
|
71
|
+
> > "label": [LABEL_NAME_I, LABEL_NAME_II, ...],
|
|
72
|
+
> > "name": JOB_NAME
|
|
73
|
+
> >}
|
|
74
|
+
> >```
|
|
75
|
+
> >The configureation file for base models can be downloaded [here](https://huggingface.co/suenoomozawa/ChemBFN/resolve/main/config.json).
|
|
76
|
+
|
|
51
77
|
If placed correctly, all these files can be seen in the "model explorer" tab.
|
|
52
78
|
|
|
53
79
|
> You can use an external folder to host the models if it follows the same structure as [`chembfn_webui/model`](./chembfn_webui/model). See the next section for the method.
|
|
@@ -96,6 +122,7 @@ Under "advanced control" tab
|
|
|
96
122
|
|
|
97
123
|
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
98
124
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
125
|
+
* You can customise the result preprocessing function, e.g., the model output a reaction SMILES "CCI.C[O-]>>COCC" which couldn't be recognised by RDKit; you can pass `lambda x: x.split(">>")[-1]` to force the program only looking at the products.
|
|
99
126
|
|
|
100
127
|
### 6. Generate molecules
|
|
101
128
|
|
|
@@ -6,8 +6,9 @@ Define application behaviours.
|
|
|
6
6
|
import sys
|
|
7
7
|
import argparse
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from copy import deepcopy
|
|
9
10
|
from functools import partial
|
|
10
|
-
from typing import Tuple, List, Dict, Union
|
|
11
|
+
from typing import Tuple, List, Dict, Optional, Union, Literal
|
|
11
12
|
|
|
12
13
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
13
14
|
from rdkit.Chem import Draw, MolFromSmiles # type: ignore
|
|
@@ -32,18 +33,21 @@ from bayesianflow_for_chem.tool import (
|
|
|
32
33
|
quantise_model_,
|
|
33
34
|
)
|
|
34
35
|
from lib.utilities import (
|
|
36
|
+
sys_info,
|
|
35
37
|
find_model,
|
|
36
38
|
find_vocab,
|
|
37
39
|
parse_prompt,
|
|
38
40
|
parse_exclude_token,
|
|
39
41
|
parse_sar_control,
|
|
42
|
+
build_result_prep_fn,
|
|
40
43
|
)
|
|
41
44
|
from lib.version import __version__
|
|
42
45
|
|
|
43
46
|
vocabs = find_vocab()
|
|
44
47
|
models = find_model()
|
|
45
|
-
lora_selected = False # lora select flag
|
|
46
48
|
cache_dir = Path(__file__).parent.parent / "cache"
|
|
49
|
+
favicon_dir = Path(__file__).parent / "favicon.png"
|
|
50
|
+
_RESULT_COUNT = 0
|
|
47
51
|
|
|
48
52
|
HTML_STYLE = gr.InputHTMLAttributes(
|
|
49
53
|
autocapitalize="off",
|
|
@@ -74,7 +78,7 @@ def selfies2vec(sel: str, vocab_dict: Dict[str, int]) -> List[int]:
|
|
|
74
78
|
return [vocab_dict.get(i, unknown_id) for i in s]
|
|
75
79
|
|
|
76
80
|
|
|
77
|
-
def
|
|
81
|
+
def _refresh(
|
|
78
82
|
model_selected: str, vocab_selected: str, tokeniser_selected: str
|
|
79
83
|
) -> Tuple[
|
|
80
84
|
List[str], List[str], List[List[str]], List[List[str]], gr.Dropdown, gr.Dropdown
|
|
@@ -119,7 +123,7 @@ def refresh(
|
|
|
119
123
|
return a, b, c, d, e, f
|
|
120
124
|
|
|
121
125
|
|
|
122
|
-
def
|
|
126
|
+
def _select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
123
127
|
"""
|
|
124
128
|
Select LoRA model name from Dataframe object.
|
|
125
129
|
|
|
@@ -130,20 +134,16 @@ def select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
|
130
134
|
:return: new prompt string
|
|
131
135
|
:rtype: str
|
|
132
136
|
"""
|
|
133
|
-
global lora_selected
|
|
134
|
-
if lora_selected: # avoid double select
|
|
135
|
-
lora_selected = False
|
|
136
|
-
return prompt
|
|
137
137
|
selected_lora = evt.value
|
|
138
|
-
|
|
139
|
-
if evt.index[1] != 0:
|
|
138
|
+
exist_lora = parse_prompt(prompt)["lora"]
|
|
139
|
+
if evt.index[1] != 0 or selected_lora in exist_lora:
|
|
140
140
|
return prompt
|
|
141
141
|
if not prompt:
|
|
142
142
|
return f"<{selected_lora}:1>"
|
|
143
143
|
return f"{prompt};\n<{selected_lora}:1>"
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def
|
|
146
|
+
def _token_name_change_evt(
|
|
147
147
|
token_name: str, vocab_fn: str
|
|
148
148
|
) -> Tuple[gr.Dropdown, gr.Tab, gr.Tab]:
|
|
149
149
|
"""
|
|
@@ -178,15 +178,17 @@ def run(
|
|
|
178
178
|
batch_size: int,
|
|
179
179
|
sequence_size: int,
|
|
180
180
|
guidance_strength: float,
|
|
181
|
-
method:
|
|
181
|
+
method: Literal["BFN", "ODE"],
|
|
182
182
|
temperature: float,
|
|
183
|
-
prompt: str,
|
|
184
|
-
scaffold: str,
|
|
185
|
-
template: str,
|
|
186
|
-
sar_control: str,
|
|
187
|
-
exclude_token: str,
|
|
188
|
-
quantise:
|
|
189
|
-
jited:
|
|
183
|
+
prompt: Optional[str],
|
|
184
|
+
scaffold: Optional[str],
|
|
185
|
+
template: Optional[str],
|
|
186
|
+
sar_control: Optional[str],
|
|
187
|
+
exclude_token: Optional[str],
|
|
188
|
+
quantise: Literal["on", "off"],
|
|
189
|
+
jited: Literal["on", "off"],
|
|
190
|
+
sorted_: Literal["on", "off"],
|
|
191
|
+
result_prep_fn: Optional[str],
|
|
190
192
|
) -> Tuple[Union[List, None], List[str], str, gr.TextArea, str]:
|
|
191
193
|
"""
|
|
192
194
|
Run generation or inpainting.
|
|
@@ -207,6 +209,8 @@ def run(
|
|
|
207
209
|
:param exclude_token: unwanted tokens
|
|
208
210
|
:param quantise: `"on"` or `"off"`
|
|
209
211
|
:param jited: `"on"` or `"off"`
|
|
212
|
+
:param sorted\\_: whether to sort the reulst; `"on"` or `"off"`
|
|
213
|
+
:param result_prep_fn: a string form result preprocessing function
|
|
210
214
|
:type model_name: str
|
|
211
215
|
:type token_name: str
|
|
212
216
|
:type vocab_fn: str
|
|
@@ -216,13 +220,15 @@ def run(
|
|
|
216
220
|
:type guidance_strength: float
|
|
217
221
|
:type method: str
|
|
218
222
|
:type temperature: float
|
|
219
|
-
:type prompt: str
|
|
220
|
-
:type scaffold: str
|
|
221
|
-
:type template: str
|
|
222
|
-
:type sar_control: str
|
|
223
|
-
:type exclude_token: str
|
|
223
|
+
:type prompt: str | None
|
|
224
|
+
:type scaffold: str | None
|
|
225
|
+
:type template: str | None
|
|
226
|
+
:type sar_control: str | None
|
|
227
|
+
:type exclude_token: str | None
|
|
224
228
|
:type quantise: str
|
|
225
229
|
:type jited: str
|
|
230
|
+
:type sorted\\_: str
|
|
231
|
+
:type result_prep_fn: str | None
|
|
226
232
|
:return: list of images \n
|
|
227
233
|
list of generated molecules \n
|
|
228
234
|
Chemfig code \n
|
|
@@ -238,6 +244,8 @@ def run(
|
|
|
238
244
|
lora_label_dict = dict([[i[0], i[2] != []] for i in models["lora"]])
|
|
239
245
|
standalone_lmax_dict = dict([[i[0], i[3]] for i in models["standalone"]])
|
|
240
246
|
lora_lmax_dict = dict([[i[0], i[3]] for i in models["lora"]])
|
|
247
|
+
# ------- build result preprocessing function -------
|
|
248
|
+
_result_prep_fn = build_result_prep_fn(result_prep_fn)
|
|
241
249
|
# ------- build tokeniser -------
|
|
242
250
|
if token_name == "SMILES & SAFE":
|
|
243
251
|
vocab_keys = VOCAB_KEYS
|
|
@@ -245,27 +253,31 @@ def run(
|
|
|
245
253
|
trans_fn = lambda x: [i for i in x if (MolFromSmiles(i) and i)]
|
|
246
254
|
img_fn = lambda x: [Draw.MolToImage(MolFromSmiles(i), (500, 500)) for i in x]
|
|
247
255
|
chemfig_fn = lambda x: [mol2chemfig(i, "-r", inline=True) for i in x]
|
|
248
|
-
|
|
256
|
+
elif token_name == "FASTA":
|
|
249
257
|
vocab_keys = FASTA_VOCAB_KEYS
|
|
250
258
|
tokeniser = fasta2vec
|
|
251
|
-
trans_fn = lambda x: x
|
|
259
|
+
trans_fn = lambda x: [i for i in x if i]
|
|
252
260
|
img_fn = lambda _: None # senseless to provide dumb 2D images
|
|
253
261
|
chemfig_fn = lambda _: [""] # senseless to provide very long Chemfig code
|
|
254
|
-
|
|
262
|
+
elif token_name == "SELFIES":
|
|
255
263
|
vocab_data = load_vocab(vocabs[vocab_fn])
|
|
256
264
|
vocab_keys = vocab_data["vocab_keys"]
|
|
257
265
|
vocab_dict = vocab_data["vocab_dict"]
|
|
258
266
|
tokeniser = partial(selfies2vec, vocab_dict=vocab_dict)
|
|
259
|
-
trans_fn = lambda x: x
|
|
267
|
+
trans_fn = lambda x: [i for i in x if i]
|
|
260
268
|
img_fn = lambda x: [
|
|
261
269
|
Draw.MolToImage(MolFromSmiles(decoder(i)), (500, 500)) for i in x
|
|
262
270
|
]
|
|
263
271
|
chemfig_fn = lambda x: [mol2chemfig(decoder(i), "-r", inline=True) for i in x]
|
|
272
|
+
else:
|
|
273
|
+
raise RuntimeError("Oops, maybe something wrong with Gradio.")
|
|
264
274
|
_method = "bfn" if method == "BFN" else f"ode:{temperature}"
|
|
265
275
|
# ------- build model -------
|
|
266
276
|
prompt_info = parse_prompt(prompt)
|
|
267
277
|
sar_flag = parse_sar_control(sar_control)
|
|
268
|
-
|
|
278
|
+
_info = deepcopy(prompt_info)
|
|
279
|
+
_info["semi-autoregression"] = deepcopy(sar_flag)
|
|
280
|
+
print("Prompt summary:", _info) # prompt
|
|
269
281
|
if not prompt_info["lora"]:
|
|
270
282
|
if model_name in base_model_dict:
|
|
271
283
|
lmax = sequence_size
|
|
@@ -286,11 +298,11 @@ def run(
|
|
|
286
298
|
mlp = MLP.from_checkpoint(
|
|
287
299
|
standalone_model_dict[model_name] / "mlp.pt"
|
|
288
300
|
)
|
|
289
|
-
y = torch.tensor([prompt_info["objective"]], dtype=torch.float32)
|
|
301
|
+
y = torch.tensor([prompt_info["objective"][0]], dtype=torch.float32)
|
|
290
302
|
y = mlp.forward(y)
|
|
291
303
|
else:
|
|
292
304
|
y = None
|
|
293
|
-
_message.append(f"Sequence length
|
|
305
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
294
306
|
bfn.semi_autoregressive = sar_flag[0]
|
|
295
307
|
if quantise == "on":
|
|
296
308
|
quantise_model_(bfn)
|
|
@@ -316,20 +328,20 @@ def run(
|
|
|
316
328
|
mlp = MLP.from_checkpoint(
|
|
317
329
|
lora_model_dict[prompt_info["lora"][0]] / "mlp.pt"
|
|
318
330
|
)
|
|
319
|
-
y = torch.tensor([prompt_info["objective"]], dtype=torch.float32)
|
|
331
|
+
y = torch.tensor([prompt_info["objective"][0]], dtype=torch.float32)
|
|
320
332
|
y = mlp.forward(y)
|
|
321
333
|
else:
|
|
322
334
|
y = None
|
|
323
335
|
if prompt_info["lora_scaling"][0] != 1.0:
|
|
324
336
|
adjust_lora_(bfn, prompt_info["lora_scaling"][0])
|
|
325
|
-
_message.append(f"Sequence length
|
|
337
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
326
338
|
bfn.semi_autoregressive = sar_flag[0]
|
|
327
339
|
if quantise == "on":
|
|
328
340
|
quantise_model_(bfn)
|
|
329
341
|
if jited == "on":
|
|
330
342
|
bfn.compile()
|
|
331
343
|
else:
|
|
332
|
-
lmax = max(
|
|
344
|
+
lmax = max(lora_lmax_dict[i] for i in prompt_info["lora"])
|
|
333
345
|
if model_name in base_model_dict:
|
|
334
346
|
base_model_dir = base_model_dict[model_name]
|
|
335
347
|
else:
|
|
@@ -344,16 +356,25 @@ def run(
|
|
|
344
356
|
if len(sar_flag) == 1:
|
|
345
357
|
sar_flag = [sar_flag[0] for _ in range(len(weights))]
|
|
346
358
|
bfn = EnsembleChemBFN(base_model_dir, lora_dir, mlps, weights)
|
|
347
|
-
y =
|
|
359
|
+
y = (
|
|
360
|
+
[torch.tensor([i], dtype=torch.float32) for i in prompt_info["objective"]]
|
|
361
|
+
if prompt_info["objective"]
|
|
362
|
+
else None
|
|
363
|
+
)
|
|
348
364
|
if quantise == "on":
|
|
349
365
|
bfn.quantise()
|
|
350
366
|
if jited == "on":
|
|
351
367
|
bfn.compile()
|
|
352
|
-
_message.append(f"Sequence length
|
|
368
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
369
|
+
result_prep_fn_ = lambda x: [_result_prep_fn(i) for i in x]
|
|
353
370
|
# ------- inference -------
|
|
354
371
|
allowed_tokens = parse_exclude_token(exclude_token, vocab_keys)
|
|
355
372
|
if not allowed_tokens:
|
|
356
373
|
allowed_tokens = "all"
|
|
374
|
+
if scaffold is None:
|
|
375
|
+
scaffold = ""
|
|
376
|
+
if template is None:
|
|
377
|
+
template = ""
|
|
357
378
|
scaffold = scaffold.strip()
|
|
358
379
|
template = template.strip()
|
|
359
380
|
if scaffold:
|
|
@@ -369,8 +390,9 @@ def run(
|
|
|
369
390
|
vocab_keys=vocab_keys,
|
|
370
391
|
method=_method,
|
|
371
392
|
allowed_tokens=allowed_tokens,
|
|
393
|
+
sort=sorted_ == "on",
|
|
372
394
|
)
|
|
373
|
-
mols = trans_fn(mols)
|
|
395
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
374
396
|
imgs = img_fn(mols)
|
|
375
397
|
chemfigs = chemfig_fn(mols)
|
|
376
398
|
if template:
|
|
@@ -388,8 +410,9 @@ def run(
|
|
|
388
410
|
vocab_keys=vocab_keys,
|
|
389
411
|
method=_method,
|
|
390
412
|
allowed_tokens=allowed_tokens,
|
|
413
|
+
sort=sorted_ == "on",
|
|
391
414
|
)
|
|
392
|
-
mols = trans_fn(mols)
|
|
415
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
393
416
|
imgs = img_fn(mols)
|
|
394
417
|
chemfigs = chemfig_fn(mols)
|
|
395
418
|
else:
|
|
@@ -403,16 +426,19 @@ def run(
|
|
|
403
426
|
vocab_keys=vocab_keys,
|
|
404
427
|
method=_method,
|
|
405
428
|
allowed_tokens=allowed_tokens,
|
|
429
|
+
sort=sorted_ == "on",
|
|
406
430
|
)
|
|
407
|
-
mols = trans_fn(mols)
|
|
431
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
408
432
|
imgs = img_fn(mols)
|
|
409
433
|
chemfigs = chemfig_fn(mols)
|
|
410
|
-
n_mol = len(mols)
|
|
411
434
|
with open(cache_dir / "results.csv", "w", encoding="utf-8", newline="") as rf:
|
|
412
435
|
rf.write("\n".join(mols))
|
|
413
436
|
_message.append(
|
|
414
|
-
f"{n_mol}
|
|
437
|
+
f"{(n_mol := len(mols))} {'smaple' if n_mol in (0, 1) else 'samples'} "
|
|
438
|
+
"generated and saved to cache that can be downloaded."
|
|
415
439
|
)
|
|
440
|
+
global _RESULT_COUNT
|
|
441
|
+
_RESULT_COUNT = n_mol
|
|
416
442
|
return (
|
|
417
443
|
imgs,
|
|
418
444
|
mols,
|
|
@@ -422,11 +448,11 @@ def run(
|
|
|
422
448
|
)
|
|
423
449
|
|
|
424
450
|
|
|
425
|
-
with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
426
|
-
gr.Markdown("### WebUI to generate and visualise molecules for ChemBFN method.")
|
|
451
|
+
with gr.Blocks(title="ChemBFN WebUI", analytics_enabled=False) as app:
|
|
427
452
|
with gr.Row():
|
|
428
453
|
with gr.Column(scale=1):
|
|
429
454
|
btn = gr.Button("RUN", variant="primary")
|
|
455
|
+
stop = gr.Button("\u23f9", variant="stop", visible=False)
|
|
430
456
|
model_name = gr.Dropdown(
|
|
431
457
|
[i[0] for i in models["base"]] + [i[0] for i in models["standalone"]],
|
|
432
458
|
label="model",
|
|
@@ -471,14 +497,15 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
471
497
|
message = gr.TextArea(label="message", lines=2)
|
|
472
498
|
with gr.Tab(label="result viewer"):
|
|
473
499
|
with gr.Tab(label="result"):
|
|
474
|
-
btn_download = gr.File(
|
|
500
|
+
btn_download = gr.File(
|
|
501
|
+
str(cache_dir / "results.csv"), label="download", visible=False
|
|
502
|
+
)
|
|
475
503
|
result = gr.Dataframe(
|
|
476
504
|
headers=["molecule"],
|
|
477
|
-
|
|
505
|
+
column_count=(1, "fixed"),
|
|
478
506
|
label="",
|
|
479
|
-
|
|
507
|
+
interactive=False,
|
|
480
508
|
show_row_numbers=True,
|
|
481
|
-
show_copy_button=True,
|
|
482
509
|
)
|
|
483
510
|
with gr.Tab(
|
|
484
511
|
label="LATEX Chemfig", visible=token_name.value != "FASTA"
|
|
@@ -496,7 +523,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
496
523
|
vocab_table = gr.Dataframe(
|
|
497
524
|
list(vocabs.keys()),
|
|
498
525
|
headers=["name"],
|
|
499
|
-
|
|
526
|
+
column_count=(1, "fixed"),
|
|
500
527
|
label="",
|
|
501
528
|
interactive=False,
|
|
502
529
|
show_row_numbers=True,
|
|
@@ -505,7 +532,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
505
532
|
base_table = gr.Dataframe(
|
|
506
533
|
[i[0] for i in models["base"]],
|
|
507
534
|
headers=["name"],
|
|
508
|
-
|
|
535
|
+
column_count=(1, "fixed"),
|
|
509
536
|
label="",
|
|
510
537
|
interactive=False,
|
|
511
538
|
show_row_numbers=True,
|
|
@@ -514,7 +541,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
514
541
|
standalone_table = gr.Dataframe(
|
|
515
542
|
[[i[0], i[2]] for i in models["standalone"]],
|
|
516
543
|
headers=["name", "objective"],
|
|
517
|
-
|
|
544
|
+
column_count=(2, "fixed"),
|
|
518
545
|
label="",
|
|
519
546
|
interactive=False,
|
|
520
547
|
show_row_numbers=True,
|
|
@@ -523,7 +550,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
523
550
|
lora_tabel = gr.Dataframe(
|
|
524
551
|
[[i[0], i[2]] for i in models["lora"]],
|
|
525
552
|
headers=["name", "objective"],
|
|
526
|
-
|
|
553
|
+
column_count=(2, "fixed"),
|
|
527
554
|
label="",
|
|
528
555
|
interactive=False,
|
|
529
556
|
show_row_numbers=True,
|
|
@@ -540,10 +567,33 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
540
567
|
placeholder="key in unwanted tokens separated by comma.",
|
|
541
568
|
html_attributes=HTML_STYLE,
|
|
542
569
|
)
|
|
543
|
-
|
|
544
|
-
|
|
570
|
+
result_prep_fn = gr.Textbox(
|
|
571
|
+
"lambda x: x",
|
|
572
|
+
label="result preprocessing function",
|
|
573
|
+
placeholder="lambda x: x",
|
|
574
|
+
html_attributes=HTML_STYLE,
|
|
575
|
+
)
|
|
576
|
+
with gr.Row(scale=1):
|
|
577
|
+
quantise = gr.Radio(
|
|
578
|
+
["on", "off"], value="off", label="quantisation"
|
|
579
|
+
)
|
|
580
|
+
jited = gr.Radio(["on", "off"], value="off", label="JIT")
|
|
581
|
+
sorted_ = gr.Radio(
|
|
582
|
+
["on", "off"], value="off", label="sort result based on entropy"
|
|
583
|
+
)
|
|
584
|
+
gr.HTML(sys_info(), elem_classes="custom_footer", elem_id="footer")
|
|
545
585
|
# ------ user interaction events -------
|
|
546
|
-
btn.click(
|
|
586
|
+
gen = btn.click(
|
|
587
|
+
fn=lambda: (
|
|
588
|
+
gr.Button("RUN", variant="primary", visible=False),
|
|
589
|
+
gr.Button("\u23f9", variant="stop", visible=True),
|
|
590
|
+
),
|
|
591
|
+
inputs=None,
|
|
592
|
+
outputs=[btn, stop],
|
|
593
|
+
api_name="switch_to_stop_mode",
|
|
594
|
+
api_description="Switch to STOP.",
|
|
595
|
+
api_visibility="private",
|
|
596
|
+
).then(
|
|
547
597
|
fn=run,
|
|
548
598
|
inputs=[
|
|
549
599
|
model_name,
|
|
@@ -562,11 +612,37 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
562
612
|
exclude_token,
|
|
563
613
|
quantise,
|
|
564
614
|
jited,
|
|
615
|
+
sorted_,
|
|
616
|
+
result_prep_fn,
|
|
565
617
|
],
|
|
566
618
|
outputs=[img, result, chemfig, message, btn_download],
|
|
619
|
+
api_name="run",
|
|
620
|
+
api_description="Run ChemBFN model.",
|
|
621
|
+
)
|
|
622
|
+
gen.then(
|
|
623
|
+
fn=lambda: (
|
|
624
|
+
gr.Button("RUN", variant="primary", visible=True),
|
|
625
|
+
gr.Button("\u23f9", variant="stop", visible=False),
|
|
626
|
+
),
|
|
627
|
+
inputs=None,
|
|
628
|
+
outputs=[btn, stop],
|
|
629
|
+
api_name="switch_back_to_run_mode",
|
|
630
|
+
api_description="Swtch back to RUN.",
|
|
631
|
+
api_visibility="private",
|
|
632
|
+
)
|
|
633
|
+
stop.click(
|
|
634
|
+
fn=lambda: (
|
|
635
|
+
gr.Button("RUN", variant="primary", visible=True),
|
|
636
|
+
gr.Button("\u23f9", variant="stop", visible=False),
|
|
637
|
+
),
|
|
638
|
+
inputs=None,
|
|
639
|
+
outputs=[btn, stop],
|
|
640
|
+
cancels=[gen],
|
|
641
|
+
api_name="stop",
|
|
642
|
+
api_description="Stop the model.",
|
|
567
643
|
)
|
|
568
644
|
btn_refresh.click(
|
|
569
|
-
fn=
|
|
645
|
+
fn=_refresh,
|
|
570
646
|
inputs=[model_name, vocab_fn, token_name],
|
|
571
647
|
outputs=[
|
|
572
648
|
vocab_table,
|
|
@@ -576,11 +652,14 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
576
652
|
model_name,
|
|
577
653
|
vocab_fn,
|
|
578
654
|
],
|
|
655
|
+
api_name="refresh_model_list",
|
|
656
|
+
api_description="Refresh the model list.",
|
|
579
657
|
)
|
|
580
658
|
token_name.input(
|
|
581
|
-
fn=
|
|
659
|
+
fn=_token_name_change_evt,
|
|
582
660
|
inputs=[token_name, vocab_fn],
|
|
583
661
|
outputs=[vocab_fn, code, gallery],
|
|
662
|
+
api_visibility="private",
|
|
584
663
|
)
|
|
585
664
|
method.input(
|
|
586
665
|
fn=lambda x, y: gr.Slider(
|
|
@@ -593,12 +672,25 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
593
672
|
),
|
|
594
673
|
inputs=[method, temperature],
|
|
595
674
|
outputs=temperature,
|
|
675
|
+
api_name="select_sampling_method",
|
|
676
|
+
api_description="Select sampling method between 'BFN' and 'ODE'.",
|
|
677
|
+
api_visibility="private",
|
|
678
|
+
)
|
|
679
|
+
lora_tabel.select(
|
|
680
|
+
fn=_select_lora,
|
|
681
|
+
inputs=prompt,
|
|
682
|
+
outputs=prompt,
|
|
683
|
+
api_name="select_lora",
|
|
684
|
+
api_description="Select LoRA model from the model list.",
|
|
685
|
+
api_visibility="private",
|
|
596
686
|
)
|
|
597
|
-
lora_tabel.select(fn=select_lora, inputs=prompt, outputs=prompt)
|
|
598
687
|
result.change(
|
|
599
|
-
fn=lambda x: gr.File(x, label="download", visible=
|
|
688
|
+
fn=lambda x: gr.File(x, label="download", visible=_RESULT_COUNT > 0),
|
|
600
689
|
inputs=btn_download,
|
|
601
690
|
outputs=btn_download,
|
|
691
|
+
api_name="change_download_state",
|
|
692
|
+
api_description="Hide or show the file downloading item.",
|
|
693
|
+
api_visibility="private",
|
|
602
694
|
)
|
|
603
695
|
|
|
604
696
|
|
|
@@ -609,18 +701,29 @@ def main() -> None:
|
|
|
609
701
|
:return:
|
|
610
702
|
:rtype: None
|
|
611
703
|
"""
|
|
704
|
+
from rdkit import RDLogger
|
|
705
|
+
|
|
706
|
+
RDLogger.DisableLog("rdApp.*") # type: ignore
|
|
612
707
|
parser = argparse.ArgumentParser(
|
|
613
708
|
description="A web-based visualisation tool for ChemBFN method.",
|
|
614
|
-
epilog=f"ChemBFN WebUI {__version__},
|
|
709
|
+
epilog=f"ChemBFN WebUI {__version__}, "
|
|
710
|
+
"developed in Hiroshima University by chemists for chemists. "
|
|
615
711
|
"Visit https://augus1999.github.io/bayesian-flow-network-for-chemistry/ for more details.",
|
|
616
712
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
617
713
|
)
|
|
618
714
|
parser.add_argument(
|
|
619
|
-
"--public", default=False, help="open to public", action="store_true"
|
|
715
|
+
"-P", "--public", default=False, help="open to public", action="store_true"
|
|
620
716
|
)
|
|
621
717
|
parser.add_argument("-V", "--version", action="version", version=__version__)
|
|
622
718
|
args = parser.parse_args()
|
|
623
|
-
|
|
719
|
+
print(f"This is ChemBFN WebUI version {__version__}")
|
|
720
|
+
app.launch(
|
|
721
|
+
share=args.public,
|
|
722
|
+
footer_links=["api"],
|
|
723
|
+
allowed_paths=[cache_dir.absolute().__str__()],
|
|
724
|
+
favicon_path=favicon_dir.absolute().__str__(),
|
|
725
|
+
css=".custom_footer {text-align:center;bottom:0;}",
|
|
726
|
+
)
|
|
624
727
|
|
|
625
728
|
|
|
626
729
|
if __name__ == "__main__":
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
c1ccccc1
|