chembfn-webui 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chembfn-webui might be problematic. Click here for more details.
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/PKG-INFO +9 -2
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/README.md +8 -1
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/bin/app.py +170 -18
- chembfn_webui-0.3.0/chembfn_webui/cache/results.csv +1 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/lib/utilities.py +77 -1
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/lib/version.py +1 -1
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/PKG-INFO +9 -2
- chembfn_webui-0.1.0/chembfn_webui/cache/results.csv +0 -15
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/LICENSE +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/__init__.py +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/cache/cache_file_here.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/base_model/place_base_model_here.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/lora/place_lora_folder_here.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/standalone_model/place_standalone_model_folder_here.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/vocab/place_vocabulary_file_here.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/SOURCES.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/dependency_links.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/entry_points.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/requires.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui.egg-info/top_level.txt +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/setup.cfg +0 -0
- {chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chembfn_webui
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: WebUI for ChemBFN
|
|
5
5
|
Home-page: https://github.com/Augus1999/ChemBFN-WebUI
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -122,9 +122,16 @@ $ chembfn
|
|
|
122
122
|
|
|
123
123
|
Under "advanced control" tab
|
|
124
124
|
|
|
125
|
-
* You can control semi-autoregressive behaviours by key in `F` for
|
|
125
|
+
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
126
126
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
127
127
|
|
|
128
128
|
### 6. Generate molecules
|
|
129
129
|
|
|
130
130
|
Click "RUN" then here you go! If error occured, please check your prompts and settings.
|
|
131
|
+
|
|
132
|
+
## Where to obtain the models?
|
|
133
|
+
|
|
134
|
+
* Pretrained models: [https://huggingface.co/suenoomozawa/ChemBFN](https://huggingface.co/suenoomozawa/ChemBFN)
|
|
135
|
+
* ChemBFN source code: [https://github.com/Augus1999/bayesian-flow-network-for-chemistry](https://github.com/Augus1999/bayesian-flow-network-for-chemistry)
|
|
136
|
+
* ChemBFN document: [https://augus1999.github.io/bayesian-flow-network-for-chemistry/](https://augus1999.github.io/bayesian-flow-network-for-chemistry/)
|
|
137
|
+
* ChemBFN package: [https://pypi.org/project/bayesianflow-for-chem/](https://pypi.org/project/bayesianflow-for-chem/)
|
|
@@ -87,9 +87,16 @@ $ chembfn
|
|
|
87
87
|
|
|
88
88
|
Under "advanced control" tab
|
|
89
89
|
|
|
90
|
-
* You can control semi-autoregressive behaviours by key in `F` for
|
|
90
|
+
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
91
91
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
92
92
|
|
|
93
93
|
### 6. Generate molecules
|
|
94
94
|
|
|
95
95
|
Click "RUN" then here you go! If error occured, please check your prompts and settings.
|
|
96
|
+
|
|
97
|
+
## Where to obtain the models?
|
|
98
|
+
|
|
99
|
+
* Pretrained models: [https://huggingface.co/suenoomozawa/ChemBFN](https://huggingface.co/suenoomozawa/ChemBFN)
|
|
100
|
+
* ChemBFN source code: [https://github.com/Augus1999/bayesian-flow-network-for-chemistry](https://github.com/Augus1999/bayesian-flow-network-for-chemistry)
|
|
101
|
+
* ChemBFN document: [https://augus1999.github.io/bayesian-flow-network-for-chemistry/](https://augus1999.github.io/bayesian-flow-network-for-chemistry/)
|
|
102
|
+
* ChemBFN package: [https://pypi.org/project/bayesianflow-for-chem/](https://pypi.org/project/bayesianflow-for-chem/)
|
|
@@ -7,10 +7,10 @@ import sys
|
|
|
7
7
|
import argparse
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from functools import partial
|
|
10
|
-
from typing import Tuple, List, Dict
|
|
10
|
+
from typing import Tuple, List, Dict, Union
|
|
11
11
|
|
|
12
12
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
13
|
-
from rdkit.Chem import Draw, MolFromSmiles
|
|
13
|
+
from rdkit.Chem import Draw, MolFromSmiles
|
|
14
14
|
from mol2chemfigPy3 import mol2chemfig
|
|
15
15
|
import gradio as gr
|
|
16
16
|
import torch
|
|
@@ -24,7 +24,7 @@ from bayesianflow_for_chem.data import (
|
|
|
24
24
|
aa2vec,
|
|
25
25
|
split_selfies,
|
|
26
26
|
)
|
|
27
|
-
from bayesianflow_for_chem.tool import sample, inpaint, adjust_lora_
|
|
27
|
+
from bayesianflow_for_chem.tool import sample, inpaint, adjust_lora_, quantise_model_
|
|
28
28
|
from lib.utilities import (
|
|
29
29
|
find_model,
|
|
30
30
|
find_vocab,
|
|
@@ -39,8 +39,26 @@ models = find_model()
|
|
|
39
39
|
lora_selected = False # lora select flag
|
|
40
40
|
cache_dir = Path(__file__).parent.parent / "cache"
|
|
41
41
|
|
|
42
|
+
HTML_STYLE = gr.InputHTMLAttributes(
|
|
43
|
+
autocapitalize="off",
|
|
44
|
+
autocorrect="off",
|
|
45
|
+
spellcheck=False,
|
|
46
|
+
autocomplete="off",
|
|
47
|
+
lang="en",
|
|
48
|
+
)
|
|
49
|
+
|
|
42
50
|
|
|
43
51
|
def selfies2vec(sel: str, vocab_dict: Dict[str, int]) -> List[int]:
|
|
52
|
+
"""
|
|
53
|
+
Tokeniser SELFIES string.
|
|
54
|
+
|
|
55
|
+
:param sel: SELFIES string
|
|
56
|
+
:param vocab_dict: vocabulary dictionary
|
|
57
|
+
:type sel: str
|
|
58
|
+
:type vocab_dict: dict
|
|
59
|
+
:return: a list of token indices
|
|
60
|
+
:rtype: list
|
|
61
|
+
"""
|
|
44
62
|
s = split_selfies(sel)
|
|
45
63
|
unknown_id = None
|
|
46
64
|
for key, idx in vocab_dict.items():
|
|
@@ -55,6 +73,23 @@ def refresh(
|
|
|
55
73
|
) -> Tuple[
|
|
56
74
|
List[str], List[str], List[List[str]], List[List[str]], gr.Dropdown, gr.Dropdown
|
|
57
75
|
]:
|
|
76
|
+
"""
|
|
77
|
+
Refresh model file list.
|
|
78
|
+
|
|
79
|
+
:param model_selected: the selected model name
|
|
80
|
+
:param vocab_selected: the selected vocabulary name
|
|
81
|
+
:param tokeniser_selected: the selected tokeniser name
|
|
82
|
+
:type model_selected: str
|
|
83
|
+
:type vocab_selected: str
|
|
84
|
+
:type tokeniser_selected: str
|
|
85
|
+
:return: a list of vocabulary names \n
|
|
86
|
+
a list of base model files \n
|
|
87
|
+
a list of standalone model files \n
|
|
88
|
+
a list of LoRA model files \n
|
|
89
|
+
Gradio Dropdown item \n
|
|
90
|
+
Gradio Dropdown item \n
|
|
91
|
+
:rtype: tuple
|
|
92
|
+
"""
|
|
58
93
|
global vocabs, models
|
|
59
94
|
vocabs = find_vocab()
|
|
60
95
|
models = find_model()
|
|
@@ -66,17 +101,29 @@ def refresh(
|
|
|
66
101
|
[i[0] for i in models["base"]] + [i[0] for i in models["standalone"]],
|
|
67
102
|
value=model_selected,
|
|
68
103
|
label="model",
|
|
104
|
+
filterable=False,
|
|
69
105
|
)
|
|
70
106
|
f = gr.Dropdown(
|
|
71
107
|
list(vocabs.keys()),
|
|
72
108
|
value=vocab_selected,
|
|
73
109
|
label="vocabulary",
|
|
74
110
|
visible=tokeniser_selected == "SELFIES",
|
|
111
|
+
filterable=False,
|
|
75
112
|
)
|
|
76
113
|
return a, b, c, d, e, f
|
|
77
114
|
|
|
78
115
|
|
|
79
116
|
def select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
117
|
+
"""
|
|
118
|
+
Select LoRA model name from Dataframe object.
|
|
119
|
+
|
|
120
|
+
:param evt: `~gradio.SelectData` instance
|
|
121
|
+
:param prompt: prompt string
|
|
122
|
+
:type evt: gradio.SelectData
|
|
123
|
+
:type prompt: str
|
|
124
|
+
:return: new prompt string
|
|
125
|
+
:rtype: str
|
|
126
|
+
"""
|
|
80
127
|
global lora_selected
|
|
81
128
|
if lora_selected: # avoid double select
|
|
82
129
|
lora_selected = False
|
|
@@ -90,6 +137,33 @@ def select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
|
90
137
|
return f"{prompt};\n<{selected_lora}:1>"
|
|
91
138
|
|
|
92
139
|
|
|
140
|
+
def token_name_change_evt(
|
|
141
|
+
token_name: str, vocab_fn: str
|
|
142
|
+
) -> Tuple[gr.Dropdown, gr.Tab, gr.Tab]:
|
|
143
|
+
"""
|
|
144
|
+
Define token_name-dropdown item change event.
|
|
145
|
+
|
|
146
|
+
:param token_name: tokeniser name
|
|
147
|
+
:param vocab_fn: customised vocabulary name
|
|
148
|
+
:type token_name: str
|
|
149
|
+
:type vocab_fn: str
|
|
150
|
+
:return: Dropdown item \n
|
|
151
|
+
Tab item \n
|
|
152
|
+
Tab item \n
|
|
153
|
+
:rtype: tuple
|
|
154
|
+
"""
|
|
155
|
+
a = gr.Dropdown(
|
|
156
|
+
list(vocabs.keys()),
|
|
157
|
+
value=vocab_fn,
|
|
158
|
+
label="vocabulary",
|
|
159
|
+
visible=token_name == "SELFIES",
|
|
160
|
+
filterable=False,
|
|
161
|
+
)
|
|
162
|
+
b = gr.Tab(label="LATEX Chemfig", visible=token_name != "FASTA")
|
|
163
|
+
c = gr.Tab(label="gallery", visible=token_name != "FASTA")
|
|
164
|
+
return a, b, c
|
|
165
|
+
|
|
166
|
+
|
|
93
167
|
def run(
|
|
94
168
|
model_name: str,
|
|
95
169
|
token_name: str,
|
|
@@ -104,7 +178,49 @@ def run(
|
|
|
104
178
|
scaffold: str,
|
|
105
179
|
sar_control: str,
|
|
106
180
|
exclude_token: str,
|
|
107
|
-
|
|
181
|
+
quantise: str,
|
|
182
|
+
jited: str,
|
|
183
|
+
) -> Tuple[Union[List, None], List[str], str, str, str]:
|
|
184
|
+
"""
|
|
185
|
+
Run generation or inpainting.
|
|
186
|
+
|
|
187
|
+
:param model_name: model name
|
|
188
|
+
:param token_name: tokeniser name
|
|
189
|
+
:param vocab_fn: customised vocabulary name
|
|
190
|
+
:param step: number of sampling steps
|
|
191
|
+
:param batch_size: batch-size
|
|
192
|
+
:param sequence_size: maximum sequence length
|
|
193
|
+
:param guidance_strength: guidance strength of conditioning
|
|
194
|
+
:param method: `"BFN"` or `"ODE"`
|
|
195
|
+
:param temperature: sampling temperature while ODE-solver used
|
|
196
|
+
:param prompt: prompt string
|
|
197
|
+
:param scaffold: molecular scaffold
|
|
198
|
+
:param sar_control: semi-autoregressive behaviour flags
|
|
199
|
+
:param exclude_token: unwanted tokens
|
|
200
|
+
:param quantise: `"on"` or `"off"`
|
|
201
|
+
:param jited: `"on"` or `"off"`
|
|
202
|
+
:type model_name: str
|
|
203
|
+
:type token_name: str
|
|
204
|
+
:type vocab_fn: str
|
|
205
|
+
:type step: int
|
|
206
|
+
:type batch_size: int
|
|
207
|
+
:type sequence_size: int
|
|
208
|
+
:type guidance_strength: float
|
|
209
|
+
:type method: str
|
|
210
|
+
:type temperature: float
|
|
211
|
+
:type prompt: str
|
|
212
|
+
:type scaffold: str
|
|
213
|
+
:type sar_control: str
|
|
214
|
+
:type exclude_token: str
|
|
215
|
+
:type quantise: str
|
|
216
|
+
:type jited: str
|
|
217
|
+
:return: list of images \n
|
|
218
|
+
list of generated molecules \n
|
|
219
|
+
Chemfig code \n
|
|
220
|
+
messages \n
|
|
221
|
+
cache file path
|
|
222
|
+
:rtype: tuple
|
|
223
|
+
"""
|
|
108
224
|
_message = []
|
|
109
225
|
base_model_dict = dict(models["base"])
|
|
110
226
|
standalone_model_dict = dict([[i[0], i[1]] for i in models["standalone"]])
|
|
@@ -113,6 +229,7 @@ def run(
|
|
|
113
229
|
lora_label_dict = dict([[i[0], i[2] != []] for i in models["lora"]])
|
|
114
230
|
standalone_lmax_dict = dict([[i[0], i[3]] for i in models["standalone"]])
|
|
115
231
|
lora_lmax_dict = dict([[i[0], i[3]] for i in models["lora"]])
|
|
232
|
+
# ------- build tokeniser -------
|
|
116
233
|
if token_name == "SMILES & SAFE":
|
|
117
234
|
vocab_keys = VOCAB_KEYS
|
|
118
235
|
tokeniser = smiles2vec
|
|
@@ -123,8 +240,8 @@ def run(
|
|
|
123
240
|
vocab_keys = AA_VOCAB_KEYS
|
|
124
241
|
tokeniser = aa2vec
|
|
125
242
|
trans_fn = lambda x: x
|
|
126
|
-
img_fn = lambda
|
|
127
|
-
chemfig_fn = lambda
|
|
243
|
+
img_fn = lambda _: None # senseless to provide dumb 2D images
|
|
244
|
+
chemfig_fn = lambda _: [""] # senseless to provide very long Chemfig code
|
|
128
245
|
if token_name == "SELFIES":
|
|
129
246
|
vocab_data = load_vocab(vocabs[vocab_fn])
|
|
130
247
|
vocab_keys = vocab_data["vocab_keys"]
|
|
@@ -139,7 +256,7 @@ def run(
|
|
|
139
256
|
# ------- build model -------
|
|
140
257
|
prompt_info = parse_prompt(prompt)
|
|
141
258
|
sar_flag = parse_sar_control(sar_control)
|
|
142
|
-
print(prompt_info)
|
|
259
|
+
print("Prompt summary:", prompt_info) # show prompt info
|
|
143
260
|
if not prompt_info["lora"]:
|
|
144
261
|
if model_name in base_model_dict:
|
|
145
262
|
lmax = sequence_size
|
|
@@ -166,6 +283,10 @@ def run(
|
|
|
166
283
|
y = None
|
|
167
284
|
_message.append(f"Sequence length is set to {lmax} from model metadata.")
|
|
168
285
|
bfn.semi_autoregressive = sar_flag[0]
|
|
286
|
+
if quantise == "on":
|
|
287
|
+
quantise_model_(bfn)
|
|
288
|
+
if jited == "on":
|
|
289
|
+
bfn.compile()
|
|
169
290
|
elif len(prompt_info["lora"]) == 1:
|
|
170
291
|
lmax = lora_lmax_dict[prompt_info["lora"][0]]
|
|
171
292
|
if model_name in base_model_dict:
|
|
@@ -194,6 +315,10 @@ def run(
|
|
|
194
315
|
adjust_lora_(bfn, prompt_info["lora_scaling"][0])
|
|
195
316
|
_message.append(f"Sequence length is set to {lmax} from model metadata.")
|
|
196
317
|
bfn.semi_autoregressive = sar_flag[0]
|
|
318
|
+
if quantise == "on":
|
|
319
|
+
quantise_model_(bfn)
|
|
320
|
+
if jited == "on":
|
|
321
|
+
bfn.compile()
|
|
197
322
|
else:
|
|
198
323
|
lmax = max([lora_lmax_dict[i] for i in prompt_info["lora"]])
|
|
199
324
|
if model_name in base_model_dict:
|
|
@@ -211,6 +336,10 @@ def run(
|
|
|
211
336
|
sar_flag = [sar_flag[0] for _ in range(len(weights))]
|
|
212
337
|
bfn = EnsembleChemBFN(base_model_dir, lora_dir, mlps, weights)
|
|
213
338
|
y = [torch.tensor([i], dtype=torch.float32) for i in prompt_info["objective"]]
|
|
339
|
+
if quantise == "on":
|
|
340
|
+
bfn.quantise()
|
|
341
|
+
if jited == "on":
|
|
342
|
+
bfn.compile()
|
|
214
343
|
_message.append(f"Sequence length is set to {lmax} from model metadata.")
|
|
215
344
|
# ------- inference -------
|
|
216
345
|
allowed_tokens = parse_exclude_token(exclude_token, vocab_keys)
|
|
@@ -272,14 +401,18 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
272
401
|
model_name = gr.Dropdown(
|
|
273
402
|
[i[0] for i in models["base"]] + [i[0] for i in models["standalone"]],
|
|
274
403
|
label="model",
|
|
404
|
+
filterable=False,
|
|
275
405
|
)
|
|
276
406
|
token_name = gr.Dropdown(
|
|
277
|
-
["SMILES & SAFE", "SELFIES", "FASTA"],
|
|
407
|
+
["SMILES & SAFE", "SELFIES", "FASTA"],
|
|
408
|
+
label="tokeniser",
|
|
409
|
+
filterable=False,
|
|
278
410
|
)
|
|
279
411
|
vocab_fn = gr.Dropdown(
|
|
280
412
|
list(vocabs.keys()),
|
|
281
413
|
label="vocabulary",
|
|
282
414
|
visible=token_name.value == "SELFIES",
|
|
415
|
+
filterable=False,
|
|
283
416
|
)
|
|
284
417
|
step = gr.Slider(1, 5000, 100, step=1, precision=0, label="step")
|
|
285
418
|
batch_size = gr.Slider(1, 512, 1, step=1, precision=0, label="batch size")
|
|
@@ -289,7 +422,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
289
422
|
guidance_strength = gr.Slider(
|
|
290
423
|
0, 25, 4, step=0.05, label="guidance strength"
|
|
291
424
|
)
|
|
292
|
-
method = gr.Dropdown(["BFN", "ODE"], label="method")
|
|
425
|
+
method = gr.Dropdown(["BFN", "ODE"], label="method", filterable=False)
|
|
293
426
|
temperature = gr.Slider(
|
|
294
427
|
0.0,
|
|
295
428
|
2.5,
|
|
@@ -300,8 +433,10 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
300
433
|
)
|
|
301
434
|
with gr.Column(scale=2):
|
|
302
435
|
with gr.Tab(label="prompt editor"):
|
|
303
|
-
prompt = gr.TextArea(
|
|
304
|
-
|
|
436
|
+
prompt = gr.TextArea(
|
|
437
|
+
label="prompt", lines=12, html_attributes=HTML_STYLE
|
|
438
|
+
)
|
|
439
|
+
scaffold = gr.Textbox(label="scaffold", html_attributes=HTML_STYLE)
|
|
305
440
|
gr.Markdown("")
|
|
306
441
|
message = gr.TextArea(label="message")
|
|
307
442
|
with gr.Tab(label="result viewer"):
|
|
@@ -315,11 +450,15 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
315
450
|
show_row_numbers=True,
|
|
316
451
|
show_copy_button=True,
|
|
317
452
|
)
|
|
318
|
-
with gr.Tab(
|
|
453
|
+
with gr.Tab(
|
|
454
|
+
label="LATEX Chemfig", visible=token_name.value != "FASTA"
|
|
455
|
+
) as code:
|
|
319
456
|
chemfig = gr.Code(
|
|
320
457
|
label="", language="latex", show_line_numbers=True
|
|
321
458
|
)
|
|
322
|
-
with gr.Tab(
|
|
459
|
+
with gr.Tab(
|
|
460
|
+
label="gallery", visible=token_name.value != "FASTA"
|
|
461
|
+
) as gallery:
|
|
323
462
|
img = gr.Gallery(label="molecule", columns=4, height=512)
|
|
324
463
|
with gr.Tab(label="model explorer"):
|
|
325
464
|
btn_refresh = gr.Button("refresh", variant="secondary")
|
|
@@ -360,12 +499,19 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
360
499
|
show_row_numbers=True,
|
|
361
500
|
)
|
|
362
501
|
with gr.Tab(label="advanced control"):
|
|
363
|
-
sar_control = gr.Textbox(
|
|
502
|
+
sar_control = gr.Textbox(
|
|
503
|
+
"F",
|
|
504
|
+
label="semi-autoregressive behaviour",
|
|
505
|
+
html_attributes=HTML_STYLE,
|
|
506
|
+
)
|
|
364
507
|
gr.Markdown("")
|
|
365
508
|
exclude_token = gr.TextArea(
|
|
366
509
|
label="exclude tokens",
|
|
367
510
|
placeholder="key in unwanted tokens separated by comma.",
|
|
511
|
+
html_attributes=HTML_STYLE,
|
|
368
512
|
)
|
|
513
|
+
quantise = gr.Radio(["on", "off"], value="off", label="quantisation")
|
|
514
|
+
jited = gr.Radio(["on", "off"], value="off", label="JIT")
|
|
369
515
|
# ------ user interaction events -------
|
|
370
516
|
btn.click(
|
|
371
517
|
fn=run,
|
|
@@ -383,6 +529,8 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
383
529
|
scaffold,
|
|
384
530
|
sar_control,
|
|
385
531
|
exclude_token,
|
|
532
|
+
quantise,
|
|
533
|
+
jited,
|
|
386
534
|
],
|
|
387
535
|
outputs=[img, result, chemfig, message, btn_download],
|
|
388
536
|
)
|
|
@@ -399,11 +547,9 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
399
547
|
],
|
|
400
548
|
)
|
|
401
549
|
token_name.input(
|
|
402
|
-
fn=
|
|
403
|
-
list(vocabs.keys()), value=y, label="vocabulary", visible=x == "SELFIES"
|
|
404
|
-
),
|
|
550
|
+
fn=token_name_change_evt,
|
|
405
551
|
inputs=[token_name, vocab_fn],
|
|
406
|
-
outputs=vocab_fn,
|
|
552
|
+
outputs=[vocab_fn, code, gallery],
|
|
407
553
|
)
|
|
408
554
|
method.input(
|
|
409
555
|
fn=lambda x, y: gr.Slider(
|
|
@@ -426,6 +572,12 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
426
572
|
|
|
427
573
|
|
|
428
574
|
def main() -> None:
|
|
575
|
+
"""
|
|
576
|
+
Main function.
|
|
577
|
+
|
|
578
|
+
:return:
|
|
579
|
+
:rtype: None
|
|
580
|
+
"""
|
|
429
581
|
parser = argparse.ArgumentParser()
|
|
430
582
|
parser.add_argument(
|
|
431
583
|
"--public", default=False, help="open to public", action="store_true"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
CC[C@H]1CCCCN1CCN1CCOCC1
|
|
@@ -15,6 +15,12 @@ if "CHEMBFN_WEBUI_MODEL_DIR" in os.environ:
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def find_vocab() -> Dict[str, str]:
|
|
18
|
+
"""
|
|
19
|
+
Find customised vocabulary files.
|
|
20
|
+
|
|
21
|
+
:return: {file_name: file_path}
|
|
22
|
+
:rtype: dict
|
|
23
|
+
"""
|
|
18
24
|
vocab_fns = glob(str(_model_path / "vocab/*.txt"))
|
|
19
25
|
return {
|
|
20
26
|
os.path.basename(i).replace(".txt", ""): i
|
|
@@ -24,6 +30,17 @@ def find_vocab() -> Dict[str, str]:
|
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
def find_model() -> Dict[str, List[List[Union[str, int, List[str], Path]]]]:
|
|
33
|
+
"""
|
|
34
|
+
Find model files.
|
|
35
|
+
|
|
36
|
+
:return: ```
|
|
37
|
+
{
|
|
38
|
+
"base": [[name1, path1], [name2, path2], ...],
|
|
39
|
+
"standalone": [[name1, parent_path1, label1, pad_len1], ...],
|
|
40
|
+
"lora": [[name1, parent_path1, label1, pad_len1], ...]
|
|
41
|
+
}```
|
|
42
|
+
:rtype: dict
|
|
43
|
+
"""
|
|
27
44
|
models = {}
|
|
28
45
|
# find base models
|
|
29
46
|
base_fns = glob(str(_model_path / "base_model/*.pt"))
|
|
@@ -61,7 +78,21 @@ def find_model() -> Dict[str, List[List[Union[str, int, List[str], Path]]]]:
|
|
|
61
78
|
return models
|
|
62
79
|
|
|
63
80
|
|
|
64
|
-
def _get_lora_info(prompt: str) -> Tuple[str, List[float],
|
|
81
|
+
def _get_lora_info(prompt: str) -> Tuple[str, List[float], float]:
|
|
82
|
+
"""
|
|
83
|
+
Parse sub-prompt string containing LoRA info.
|
|
84
|
+
|
|
85
|
+
:param prompt: LoRA sub-pompt: \n
|
|
86
|
+
case I. `"<name:A>"` \n
|
|
87
|
+
case II. `"<name>"` \n
|
|
88
|
+
case III. `"<name:A>:[a,b,...]"` \n
|
|
89
|
+
case IV. `"<name>:[a,b,c,...]"`
|
|
90
|
+
:type prompt: str
|
|
91
|
+
:return: LoRA name \n
|
|
92
|
+
objective values \n
|
|
93
|
+
LoRA scaling
|
|
94
|
+
:rtype: tuple
|
|
95
|
+
"""
|
|
65
96
|
s = prompt.split(">")
|
|
66
97
|
s1 = s[0].replace("<", "")
|
|
67
98
|
lora_info = s1.split(":")
|
|
@@ -83,6 +114,27 @@ def _get_lora_info(prompt: str) -> Tuple[str, List[float], List[float]]:
|
|
|
83
114
|
def parse_prompt(
|
|
84
115
|
prompt: str,
|
|
85
116
|
) -> Dict[str, Union[List[str], List[float], List[List[float]]]]:
|
|
117
|
+
"""
|
|
118
|
+
Parse propmt.
|
|
119
|
+
|
|
120
|
+
:param prompt: prompt string: \n
|
|
121
|
+
case I. empty string `""` --> `{"lora": [], "objective": [], "lora_scaling": []}`\n
|
|
122
|
+
case II. one condition `"[a,b,c,...]"` --> `{"lora": [], "objective": [[a, b, c, ...]], "lora_scaling": []}`\n
|
|
123
|
+
case III. one LoRA `"<name:A>"` --> `{"lora": [name], "objective": [], "lora_scaling": [A]}`\n
|
|
124
|
+
case IV. one LoRA `"<name>"` --> `{"lora": [name], "objective": [], "lora_scaling": [1]}`\n
|
|
125
|
+
case V. one LoRA with condition `"<name:A>:[a,b,...]"` --> `{"lora": [name], "objective": [[a, b, ...]], "lora_scaling": [A]}`\n
|
|
126
|
+
case VI. one LoRA with condition `"<name>:[a,b,...]"` --> `{"lora": [name], "objective": [[a, b, ...]], "lora_scaling": [1]}`\n
|
|
127
|
+
case VII. several LoRAs with conditions `"<name1:A1>:[a1,b1,...];<name2>:[a2,b2,c2,...]"` --> `{"lora": [name1, name2], "objective": [[a1, b1, ...], [a2, b2, c2, ...]], "lora_scaling": [A1, 1]}`\n
|
|
128
|
+
case VIII. other cases --> `{"lora": [], "objective": [], "lora_scaling": []}`\n
|
|
129
|
+
:type prompt: str
|
|
130
|
+
:return: ```
|
|
131
|
+
{
|
|
132
|
+
"lora": [name1, name2, ...],
|
|
133
|
+
"objective": [obj1, obj2, ...],
|
|
134
|
+
"lora_scaling": [s1, s2, ...]
|
|
135
|
+
}```
|
|
136
|
+
:rtype: dict
|
|
137
|
+
"""
|
|
86
138
|
prompt_group = prompt.strip().replace("\n", "").split(";")
|
|
87
139
|
prompt_group = [i for i in prompt_group if i]
|
|
88
140
|
info = {"lora": [], "objective": [], "lora_scaling": []}
|
|
@@ -114,6 +166,16 @@ def parse_prompt(
|
|
|
114
166
|
|
|
115
167
|
|
|
116
168
|
def parse_exclude_token(tokens: str, vocab_keys: List[str]) -> List[str]:
|
|
169
|
+
"""
|
|
170
|
+
Parse exclude token string.
|
|
171
|
+
|
|
172
|
+
:param tokens: unwanted token string in the format `"token1,token2,..."`
|
|
173
|
+
:param vocab_keys: vocabulary elements
|
|
174
|
+
:type tokens: str
|
|
175
|
+
:type vocab_keys: list
|
|
176
|
+
:return: a list of allowed vocabulary
|
|
177
|
+
:rtype: list
|
|
178
|
+
"""
|
|
117
179
|
tokens = tokens.strip().replace("\n", "").split(",")
|
|
118
180
|
tokens = [i for i in tokens if i]
|
|
119
181
|
if not tokens:
|
|
@@ -121,7 +183,21 @@ def parse_exclude_token(tokens: str, vocab_keys: List[str]) -> List[str]:
|
|
|
121
183
|
tokens = [i for i in vocab_keys if i not in tokens]
|
|
122
184
|
return tokens
|
|
123
185
|
|
|
186
|
+
|
|
124
187
|
def parse_sar_control(sar_control: str) -> List[bool]:
|
|
188
|
+
"""
|
|
189
|
+
Parse semi-autoregression control string.
|
|
190
|
+
|
|
191
|
+
:param sar_control: semi-autoregression control string: \n
|
|
192
|
+
case I. `""` --> `[False]` \n
|
|
193
|
+
case II. `"F"` --> `[False]` \n
|
|
194
|
+
case III. `"T"` --> `[True]` \n
|
|
195
|
+
case IV. `F,T,...` --> `[False, True, ...]` \n
|
|
196
|
+
case V. other cases --> `[False, False, ...]` \n
|
|
197
|
+
:type sar_control: str
|
|
198
|
+
:return: a list of SAR flag
|
|
199
|
+
:rtype: list
|
|
200
|
+
"""
|
|
125
201
|
sar_flag = sar_control.strip().replace("\n", "").split(",")
|
|
126
202
|
sar_flag = [i for i in sar_flag if i]
|
|
127
203
|
if not sar_flag:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chembfn_webui
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: WebUI for ChemBFN
|
|
5
5
|
Home-page: https://github.com/Augus1999/ChemBFN-WebUI
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -122,9 +122,16 @@ $ chembfn
|
|
|
122
122
|
|
|
123
123
|
Under "advanced control" tab
|
|
124
124
|
|
|
125
|
-
* You can control semi-autoregressive behaviours by key in `F` for
|
|
125
|
+
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
126
126
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
127
127
|
|
|
128
128
|
### 6. Generate molecules
|
|
129
129
|
|
|
130
130
|
Click "RUN" then here you go! If error occured, please check your prompts and settings.
|
|
131
|
+
|
|
132
|
+
## Where to obtain the models?
|
|
133
|
+
|
|
134
|
+
* Pretrained models: [https://huggingface.co/suenoomozawa/ChemBFN](https://huggingface.co/suenoomozawa/ChemBFN)
|
|
135
|
+
* ChemBFN source code: [https://github.com/Augus1999/bayesian-flow-network-for-chemistry](https://github.com/Augus1999/bayesian-flow-network-for-chemistry)
|
|
136
|
+
* ChemBFN document: [https://augus1999.github.io/bayesian-flow-network-for-chemistry/](https://augus1999.github.io/bayesian-flow-network-for-chemistry/)
|
|
137
|
+
* ChemBFN package: [https://pypi.org/project/bayesianflow-for-chem/](https://pypi.org/project/bayesianflow-for-chem/)
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][O]
|
|
2
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][=Branch1][C][=O][N][C][C][C]
|
|
3
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][N][C][C][Branch1][=Branch2][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1]
|
|
4
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][O]
|
|
5
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
6
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
7
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
8
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
9
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
10
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
11
|
-
[O][=C][Branch2][Ring1][C][N][N][C][=Branch1][C][=O][C][=C][C][=C][Branch1][C][Cl][C][=C][Ring1][#Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1]
|
|
12
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
13
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][=Branch1][C][=O][O]
|
|
14
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
15
|
-
[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/base_model/place_base_model_here.txt
RENAMED
|
File without changes
|
{chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/lora/place_lora_folder_here.txt
RENAMED
|
File without changes
|
|
File without changes
|
{chembfn_webui-0.1.0 → chembfn_webui-0.3.0}/chembfn_webui/model/vocab/place_vocabulary_file_here.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|