chembfn-webui 1.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chembfn_webui/bin/app.py +154 -55
- chembfn_webui/bin/favicon.png +0 -0
- chembfn_webui/cache/results.csv +50 -1
- chembfn_webui/lib/utilities.py +173 -17
- chembfn_webui/lib/version.py +1 -1
- {chembfn_webui-1.0.0.dist-info → chembfn_webui-2.1.0.dist-info}/METADATA +34 -6
- chembfn_webui-2.1.0.dist-info/RECORD +17 -0
- chembfn_webui-1.0.0.dist-info/RECORD +0 -16
- {chembfn_webui-1.0.0.dist-info → chembfn_webui-2.1.0.dist-info}/WHEEL +0 -0
- {chembfn_webui-1.0.0.dist-info → chembfn_webui-2.1.0.dist-info}/entry_points.txt +0 -0
- {chembfn_webui-1.0.0.dist-info → chembfn_webui-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {chembfn_webui-1.0.0.dist-info → chembfn_webui-2.1.0.dist-info}/top_level.txt +0 -0
chembfn_webui/bin/app.py
CHANGED
|
@@ -6,8 +6,9 @@ Define application behaviours.
|
|
|
6
6
|
import sys
|
|
7
7
|
import argparse
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from copy import deepcopy
|
|
9
10
|
from functools import partial
|
|
10
|
-
from typing import Tuple, List, Dict, Union
|
|
11
|
+
from typing import Tuple, List, Dict, Optional, Union, Literal
|
|
11
12
|
|
|
12
13
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
13
14
|
from rdkit.Chem import Draw, MolFromSmiles # type: ignore
|
|
@@ -32,18 +33,21 @@ from bayesianflow_for_chem.tool import (
|
|
|
32
33
|
quantise_model_,
|
|
33
34
|
)
|
|
34
35
|
from lib.utilities import (
|
|
36
|
+
sys_info,
|
|
35
37
|
find_model,
|
|
36
38
|
find_vocab,
|
|
37
39
|
parse_prompt,
|
|
38
40
|
parse_exclude_token,
|
|
39
41
|
parse_sar_control,
|
|
42
|
+
build_result_prep_fn,
|
|
40
43
|
)
|
|
41
44
|
from lib.version import __version__
|
|
42
45
|
|
|
43
46
|
vocabs = find_vocab()
|
|
44
47
|
models = find_model()
|
|
45
|
-
lora_selected = False # lora select flag
|
|
46
48
|
cache_dir = Path(__file__).parent.parent / "cache"
|
|
49
|
+
favicon_dir = Path(__file__).parent / "favicon.png"
|
|
50
|
+
_result_count = 0
|
|
47
51
|
|
|
48
52
|
HTML_STYLE = gr.InputHTMLAttributes(
|
|
49
53
|
autocapitalize="off",
|
|
@@ -74,7 +78,7 @@ def selfies2vec(sel: str, vocab_dict: Dict[str, int]) -> List[int]:
|
|
|
74
78
|
return [vocab_dict.get(i, unknown_id) for i in s]
|
|
75
79
|
|
|
76
80
|
|
|
77
|
-
def
|
|
81
|
+
def _refresh(
|
|
78
82
|
model_selected: str, vocab_selected: str, tokeniser_selected: str
|
|
79
83
|
) -> Tuple[
|
|
80
84
|
List[str], List[str], List[List[str]], List[List[str]], gr.Dropdown, gr.Dropdown
|
|
@@ -119,7 +123,7 @@ def refresh(
|
|
|
119
123
|
return a, b, c, d, e, f
|
|
120
124
|
|
|
121
125
|
|
|
122
|
-
def
|
|
126
|
+
def _select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
123
127
|
"""
|
|
124
128
|
Select LoRA model name from Dataframe object.
|
|
125
129
|
|
|
@@ -130,20 +134,16 @@ def select_lora(evt: gr.SelectData, prompt: str) -> str:
|
|
|
130
134
|
:return: new prompt string
|
|
131
135
|
:rtype: str
|
|
132
136
|
"""
|
|
133
|
-
global lora_selected
|
|
134
|
-
if lora_selected: # avoid double select
|
|
135
|
-
lora_selected = False
|
|
136
|
-
return prompt
|
|
137
137
|
selected_lora = evt.value
|
|
138
|
-
|
|
139
|
-
if evt.index[1] != 0:
|
|
138
|
+
exist_lora = parse_prompt(prompt)["lora"]
|
|
139
|
+
if evt.index[1] != 0 or selected_lora in exist_lora:
|
|
140
140
|
return prompt
|
|
141
141
|
if not prompt:
|
|
142
142
|
return f"<{selected_lora}:1>"
|
|
143
143
|
return f"{prompt};\n<{selected_lora}:1>"
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def
|
|
146
|
+
def _token_name_change_evt(
|
|
147
147
|
token_name: str, vocab_fn: str
|
|
148
148
|
) -> Tuple[gr.Dropdown, gr.Tab, gr.Tab]:
|
|
149
149
|
"""
|
|
@@ -178,15 +178,17 @@ def run(
|
|
|
178
178
|
batch_size: int,
|
|
179
179
|
sequence_size: int,
|
|
180
180
|
guidance_strength: float,
|
|
181
|
-
method:
|
|
181
|
+
method: Literal["BFN", "ODE"],
|
|
182
182
|
temperature: float,
|
|
183
|
-
prompt: str,
|
|
184
|
-
scaffold: str,
|
|
185
|
-
template: str,
|
|
186
|
-
sar_control: str,
|
|
187
|
-
exclude_token: str,
|
|
188
|
-
quantise:
|
|
189
|
-
jited:
|
|
183
|
+
prompt: Optional[str],
|
|
184
|
+
scaffold: Optional[str],
|
|
185
|
+
template: Optional[str],
|
|
186
|
+
sar_control: Optional[str],
|
|
187
|
+
exclude_token: Optional[str],
|
|
188
|
+
quantise: Literal["on", "off"],
|
|
189
|
+
jited: Literal["on", "off"],
|
|
190
|
+
sorted_: Literal["on", "off"],
|
|
191
|
+
result_prep_fn: Optional[str],
|
|
190
192
|
) -> Tuple[Union[List, None], List[str], str, gr.TextArea, str]:
|
|
191
193
|
"""
|
|
192
194
|
Run generation or inpainting.
|
|
@@ -207,6 +209,8 @@ def run(
|
|
|
207
209
|
:param exclude_token: unwanted tokens
|
|
208
210
|
:param quantise: `"on"` or `"off"`
|
|
209
211
|
:param jited: `"on"` or `"off"`
|
|
212
|
+
:param sorted\\_: whether to sort the reulst; `"on"` or `"off"`
|
|
213
|
+
:param result_prep_fn: a string form result preprocessing function
|
|
210
214
|
:type model_name: str
|
|
211
215
|
:type token_name: str
|
|
212
216
|
:type vocab_fn: str
|
|
@@ -216,13 +220,15 @@ def run(
|
|
|
216
220
|
:type guidance_strength: float
|
|
217
221
|
:type method: str
|
|
218
222
|
:type temperature: float
|
|
219
|
-
:type prompt: str
|
|
220
|
-
:type scaffold: str
|
|
221
|
-
:type template: str
|
|
222
|
-
:type sar_control: str
|
|
223
|
-
:type exclude_token: str
|
|
223
|
+
:type prompt: str | None
|
|
224
|
+
:type scaffold: str | None
|
|
225
|
+
:type template: str | None
|
|
226
|
+
:type sar_control: str | None
|
|
227
|
+
:type exclude_token: str | None
|
|
224
228
|
:type quantise: str
|
|
225
229
|
:type jited: str
|
|
230
|
+
:type sorted\\_: str
|
|
231
|
+
:type result_prep_fn: str | None
|
|
226
232
|
:return: list of images \n
|
|
227
233
|
list of generated molecules \n
|
|
228
234
|
Chemfig code \n
|
|
@@ -238,6 +244,8 @@ def run(
|
|
|
238
244
|
lora_label_dict = dict([[i[0], i[2] != []] for i in models["lora"]])
|
|
239
245
|
standalone_lmax_dict = dict([[i[0], i[3]] for i in models["standalone"]])
|
|
240
246
|
lora_lmax_dict = dict([[i[0], i[3]] for i in models["lora"]])
|
|
247
|
+
# ------- build result preprocessing function -------
|
|
248
|
+
_result_prep_fn = build_result_prep_fn(result_prep_fn)
|
|
241
249
|
# ------- build tokeniser -------
|
|
242
250
|
if token_name == "SMILES & SAFE":
|
|
243
251
|
vocab_keys = VOCAB_KEYS
|
|
@@ -248,7 +256,7 @@ def run(
|
|
|
248
256
|
if token_name == "FASTA":
|
|
249
257
|
vocab_keys = FASTA_VOCAB_KEYS
|
|
250
258
|
tokeniser = fasta2vec
|
|
251
|
-
trans_fn = lambda x: x
|
|
259
|
+
trans_fn = lambda x: [i for i in x if i]
|
|
252
260
|
img_fn = lambda _: None # senseless to provide dumb 2D images
|
|
253
261
|
chemfig_fn = lambda _: [""] # senseless to provide very long Chemfig code
|
|
254
262
|
if token_name == "SELFIES":
|
|
@@ -256,7 +264,7 @@ def run(
|
|
|
256
264
|
vocab_keys = vocab_data["vocab_keys"]
|
|
257
265
|
vocab_dict = vocab_data["vocab_dict"]
|
|
258
266
|
tokeniser = partial(selfies2vec, vocab_dict=vocab_dict)
|
|
259
|
-
trans_fn = lambda x: x
|
|
267
|
+
trans_fn = lambda x: [i for i in x if i]
|
|
260
268
|
img_fn = lambda x: [
|
|
261
269
|
Draw.MolToImage(MolFromSmiles(decoder(i)), (500, 500)) for i in x
|
|
262
270
|
]
|
|
@@ -265,7 +273,9 @@ def run(
|
|
|
265
273
|
# ------- build model -------
|
|
266
274
|
prompt_info = parse_prompt(prompt)
|
|
267
275
|
sar_flag = parse_sar_control(sar_control)
|
|
268
|
-
|
|
276
|
+
_info = deepcopy(prompt_info)
|
|
277
|
+
_info["semi-autoregression"] = deepcopy(sar_flag)
|
|
278
|
+
print("Prompt summary:", _info) # prompt
|
|
269
279
|
if not prompt_info["lora"]:
|
|
270
280
|
if model_name in base_model_dict:
|
|
271
281
|
lmax = sequence_size
|
|
@@ -290,7 +300,7 @@ def run(
|
|
|
290
300
|
y = mlp.forward(y)
|
|
291
301
|
else:
|
|
292
302
|
y = None
|
|
293
|
-
_message.append(f"Sequence length
|
|
303
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
294
304
|
bfn.semi_autoregressive = sar_flag[0]
|
|
295
305
|
if quantise == "on":
|
|
296
306
|
quantise_model_(bfn)
|
|
@@ -322,7 +332,7 @@ def run(
|
|
|
322
332
|
y = None
|
|
323
333
|
if prompt_info["lora_scaling"][0] != 1.0:
|
|
324
334
|
adjust_lora_(bfn, prompt_info["lora_scaling"][0])
|
|
325
|
-
_message.append(f"Sequence length
|
|
335
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
326
336
|
bfn.semi_autoregressive = sar_flag[0]
|
|
327
337
|
if quantise == "on":
|
|
328
338
|
quantise_model_(bfn)
|
|
@@ -344,16 +354,25 @@ def run(
|
|
|
344
354
|
if len(sar_flag) == 1:
|
|
345
355
|
sar_flag = [sar_flag[0] for _ in range(len(weights))]
|
|
346
356
|
bfn = EnsembleChemBFN(base_model_dir, lora_dir, mlps, weights)
|
|
347
|
-
y =
|
|
357
|
+
y = (
|
|
358
|
+
[torch.tensor([i], dtype=torch.float32) for i in prompt_info["objective"]]
|
|
359
|
+
if prompt_info["objective"]
|
|
360
|
+
else None
|
|
361
|
+
)
|
|
348
362
|
if quantise == "on":
|
|
349
363
|
bfn.quantise()
|
|
350
364
|
if jited == "on":
|
|
351
365
|
bfn.compile()
|
|
352
|
-
_message.append(f"Sequence length
|
|
366
|
+
_message.append(f"Sequence length set to {lmax} from model metadata.")
|
|
367
|
+
result_prep_fn_ = lambda x: [_result_prep_fn(i) for i in x]
|
|
353
368
|
# ------- inference -------
|
|
354
369
|
allowed_tokens = parse_exclude_token(exclude_token, vocab_keys)
|
|
355
370
|
if not allowed_tokens:
|
|
356
371
|
allowed_tokens = "all"
|
|
372
|
+
if scaffold is None:
|
|
373
|
+
scaffold = ""
|
|
374
|
+
if template is None:
|
|
375
|
+
template = ""
|
|
357
376
|
scaffold = scaffold.strip()
|
|
358
377
|
template = template.strip()
|
|
359
378
|
if scaffold:
|
|
@@ -369,8 +388,9 @@ def run(
|
|
|
369
388
|
vocab_keys=vocab_keys,
|
|
370
389
|
method=_method,
|
|
371
390
|
allowed_tokens=allowed_tokens,
|
|
391
|
+
sort=sorted_ == "on",
|
|
372
392
|
)
|
|
373
|
-
mols = trans_fn(mols)
|
|
393
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
374
394
|
imgs = img_fn(mols)
|
|
375
395
|
chemfigs = chemfig_fn(mols)
|
|
376
396
|
if template:
|
|
@@ -388,8 +408,9 @@ def run(
|
|
|
388
408
|
vocab_keys=vocab_keys,
|
|
389
409
|
method=_method,
|
|
390
410
|
allowed_tokens=allowed_tokens,
|
|
411
|
+
sort=sorted_ == "on",
|
|
391
412
|
)
|
|
392
|
-
mols = trans_fn(mols)
|
|
413
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
393
414
|
imgs = img_fn(mols)
|
|
394
415
|
chemfigs = chemfig_fn(mols)
|
|
395
416
|
else:
|
|
@@ -403,16 +424,18 @@ def run(
|
|
|
403
424
|
vocab_keys=vocab_keys,
|
|
404
425
|
method=_method,
|
|
405
426
|
allowed_tokens=allowed_tokens,
|
|
427
|
+
sort=sorted_ == "on",
|
|
406
428
|
)
|
|
407
|
-
mols = trans_fn(mols)
|
|
429
|
+
mols = trans_fn(result_prep_fn_(mols))
|
|
408
430
|
imgs = img_fn(mols)
|
|
409
431
|
chemfigs = chemfig_fn(mols)
|
|
410
|
-
n_mol = len(mols)
|
|
411
432
|
with open(cache_dir / "results.csv", "w", encoding="utf-8", newline="") as rf:
|
|
412
433
|
rf.write("\n".join(mols))
|
|
413
434
|
_message.append(
|
|
414
|
-
f"{n_mol}
|
|
435
|
+
f"{(n_mol := len(mols))} {'smaple' if n_mol in (0, 1) else 'samples'} generated and saved to cache that can be downloaded."
|
|
415
436
|
)
|
|
437
|
+
global _result_count
|
|
438
|
+
_result_count = n_mol
|
|
416
439
|
return (
|
|
417
440
|
imgs,
|
|
418
441
|
mols,
|
|
@@ -422,11 +445,11 @@ def run(
|
|
|
422
445
|
)
|
|
423
446
|
|
|
424
447
|
|
|
425
|
-
with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
426
|
-
gr.Markdown("### WebUI to generate and visualise molecules for ChemBFN method.")
|
|
448
|
+
with gr.Blocks(title="ChemBFN WebUI", analytics_enabled=False) as app:
|
|
427
449
|
with gr.Row():
|
|
428
450
|
with gr.Column(scale=1):
|
|
429
451
|
btn = gr.Button("RUN", variant="primary")
|
|
452
|
+
stop = gr.Button("\u23f9", variant="stop", visible=False)
|
|
430
453
|
model_name = gr.Dropdown(
|
|
431
454
|
[i[0] for i in models["base"]] + [i[0] for i in models["standalone"]],
|
|
432
455
|
label="model",
|
|
@@ -471,14 +494,15 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
471
494
|
message = gr.TextArea(label="message", lines=2)
|
|
472
495
|
with gr.Tab(label="result viewer"):
|
|
473
496
|
with gr.Tab(label="result"):
|
|
474
|
-
btn_download = gr.File(
|
|
497
|
+
btn_download = gr.File(
|
|
498
|
+
str(cache_dir / "results.csv"), label="download", visible=False
|
|
499
|
+
)
|
|
475
500
|
result = gr.Dataframe(
|
|
476
501
|
headers=["molecule"],
|
|
477
|
-
|
|
502
|
+
column_count=(1, "fixed"),
|
|
478
503
|
label="",
|
|
479
|
-
|
|
504
|
+
interactive=False,
|
|
480
505
|
show_row_numbers=True,
|
|
481
|
-
show_copy_button=True,
|
|
482
506
|
)
|
|
483
507
|
with gr.Tab(
|
|
484
508
|
label="LATEX Chemfig", visible=token_name.value != "FASTA"
|
|
@@ -496,7 +520,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
496
520
|
vocab_table = gr.Dataframe(
|
|
497
521
|
list(vocabs.keys()),
|
|
498
522
|
headers=["name"],
|
|
499
|
-
|
|
523
|
+
column_count=(1, "fixed"),
|
|
500
524
|
label="",
|
|
501
525
|
interactive=False,
|
|
502
526
|
show_row_numbers=True,
|
|
@@ -505,7 +529,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
505
529
|
base_table = gr.Dataframe(
|
|
506
530
|
[i[0] for i in models["base"]],
|
|
507
531
|
headers=["name"],
|
|
508
|
-
|
|
532
|
+
column_count=(1, "fixed"),
|
|
509
533
|
label="",
|
|
510
534
|
interactive=False,
|
|
511
535
|
show_row_numbers=True,
|
|
@@ -514,7 +538,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
514
538
|
standalone_table = gr.Dataframe(
|
|
515
539
|
[[i[0], i[2]] for i in models["standalone"]],
|
|
516
540
|
headers=["name", "objective"],
|
|
517
|
-
|
|
541
|
+
column_count=(2, "fixed"),
|
|
518
542
|
label="",
|
|
519
543
|
interactive=False,
|
|
520
544
|
show_row_numbers=True,
|
|
@@ -523,7 +547,7 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
523
547
|
lora_tabel = gr.Dataframe(
|
|
524
548
|
[[i[0], i[2]] for i in models["lora"]],
|
|
525
549
|
headers=["name", "objective"],
|
|
526
|
-
|
|
550
|
+
column_count=(2, "fixed"),
|
|
527
551
|
label="",
|
|
528
552
|
interactive=False,
|
|
529
553
|
show_row_numbers=True,
|
|
@@ -540,10 +564,33 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
540
564
|
placeholder="key in unwanted tokens separated by comma.",
|
|
541
565
|
html_attributes=HTML_STYLE,
|
|
542
566
|
)
|
|
543
|
-
|
|
544
|
-
|
|
567
|
+
result_prep_fn = gr.Textbox(
|
|
568
|
+
"lambda x: x",
|
|
569
|
+
label="result preprocessing function",
|
|
570
|
+
placeholder="lambda x: x",
|
|
571
|
+
html_attributes=HTML_STYLE,
|
|
572
|
+
)
|
|
573
|
+
with gr.Row(scale=1):
|
|
574
|
+
quantise = gr.Radio(
|
|
575
|
+
["on", "off"], value="off", label="quantisation"
|
|
576
|
+
)
|
|
577
|
+
jited = gr.Radio(["on", "off"], value="off", label="JIT")
|
|
578
|
+
sorted_ = gr.Radio(
|
|
579
|
+
["on", "off"], value="off", label="sort result based on entropy"
|
|
580
|
+
)
|
|
581
|
+
gr.HTML(sys_info(), elem_classes="custom_footer", elem_id="footer")
|
|
545
582
|
# ------ user interaction events -------
|
|
546
|
-
btn.click(
|
|
583
|
+
gen = btn.click(
|
|
584
|
+
fn=lambda: (
|
|
585
|
+
gr.Button("RUN", variant="primary", visible=False),
|
|
586
|
+
gr.Button("\u23f9", variant="stop", visible=True),
|
|
587
|
+
),
|
|
588
|
+
inputs=None,
|
|
589
|
+
outputs=[btn, stop],
|
|
590
|
+
api_name="switch_to_stop_mode",
|
|
591
|
+
api_description="Switch to STOP.",
|
|
592
|
+
api_visibility="private",
|
|
593
|
+
).then(
|
|
547
594
|
fn=run,
|
|
548
595
|
inputs=[
|
|
549
596
|
model_name,
|
|
@@ -562,11 +609,37 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
562
609
|
exclude_token,
|
|
563
610
|
quantise,
|
|
564
611
|
jited,
|
|
612
|
+
sorted_,
|
|
613
|
+
result_prep_fn,
|
|
565
614
|
],
|
|
566
615
|
outputs=[img, result, chemfig, message, btn_download],
|
|
616
|
+
api_name="run",
|
|
617
|
+
api_description="Run ChemBFN model.",
|
|
618
|
+
)
|
|
619
|
+
gen.then(
|
|
620
|
+
fn=lambda: (
|
|
621
|
+
gr.Button("RUN", variant="primary", visible=True),
|
|
622
|
+
gr.Button("\u23f9", variant="stop", visible=False),
|
|
623
|
+
),
|
|
624
|
+
inputs=None,
|
|
625
|
+
outputs=[btn, stop],
|
|
626
|
+
api_name="switch_back_to_run_mode",
|
|
627
|
+
api_description="Swtch back to RUN.",
|
|
628
|
+
api_visibility="private",
|
|
629
|
+
)
|
|
630
|
+
stop.click(
|
|
631
|
+
fn=lambda: (
|
|
632
|
+
gr.Button("RUN", variant="primary", visible=True),
|
|
633
|
+
gr.Button("\u23f9", variant="stop", visible=False),
|
|
634
|
+
),
|
|
635
|
+
inputs=None,
|
|
636
|
+
outputs=[btn, stop],
|
|
637
|
+
cancels=[gen],
|
|
638
|
+
api_name="stop",
|
|
639
|
+
api_description="Stop the model.",
|
|
567
640
|
)
|
|
568
641
|
btn_refresh.click(
|
|
569
|
-
fn=
|
|
642
|
+
fn=_refresh,
|
|
570
643
|
inputs=[model_name, vocab_fn, token_name],
|
|
571
644
|
outputs=[
|
|
572
645
|
vocab_table,
|
|
@@ -576,11 +649,14 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
576
649
|
model_name,
|
|
577
650
|
vocab_fn,
|
|
578
651
|
],
|
|
652
|
+
api_name="refresh_model_list",
|
|
653
|
+
api_description="Refresh the model list.",
|
|
579
654
|
)
|
|
580
655
|
token_name.input(
|
|
581
|
-
fn=
|
|
656
|
+
fn=_token_name_change_evt,
|
|
582
657
|
inputs=[token_name, vocab_fn],
|
|
583
658
|
outputs=[vocab_fn, code, gallery],
|
|
659
|
+
api_visibility="private",
|
|
584
660
|
)
|
|
585
661
|
method.input(
|
|
586
662
|
fn=lambda x, y: gr.Slider(
|
|
@@ -593,12 +669,25 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
|
|
|
593
669
|
),
|
|
594
670
|
inputs=[method, temperature],
|
|
595
671
|
outputs=temperature,
|
|
672
|
+
api_name="select_sampling_method",
|
|
673
|
+
api_description="Select sampling method between 'BFN' and 'ODE'.",
|
|
674
|
+
api_visibility="private",
|
|
675
|
+
)
|
|
676
|
+
lora_tabel.select(
|
|
677
|
+
fn=_select_lora,
|
|
678
|
+
inputs=prompt,
|
|
679
|
+
outputs=prompt,
|
|
680
|
+
api_name="select_lora",
|
|
681
|
+
api_description="Select LoRA model from the model list.",
|
|
682
|
+
api_visibility="private",
|
|
596
683
|
)
|
|
597
|
-
lora_tabel.select(fn=select_lora, inputs=prompt, outputs=prompt)
|
|
598
684
|
result.change(
|
|
599
|
-
fn=lambda x: gr.File(x, label="download", visible=
|
|
685
|
+
fn=lambda x: gr.File(x, label="download", visible=_result_count > 0),
|
|
600
686
|
inputs=btn_download,
|
|
601
687
|
outputs=btn_download,
|
|
688
|
+
api_name="change_download_state",
|
|
689
|
+
api_description="Hide or show the file downloading item.",
|
|
690
|
+
api_visibility="private",
|
|
602
691
|
)
|
|
603
692
|
|
|
604
693
|
|
|
@@ -609,6 +698,9 @@ def main() -> None:
|
|
|
609
698
|
:return:
|
|
610
699
|
:rtype: None
|
|
611
700
|
"""
|
|
701
|
+
from rdkit import RDLogger
|
|
702
|
+
|
|
703
|
+
RDLogger.DisableLog("rdApp.*") # type: ignore
|
|
612
704
|
parser = argparse.ArgumentParser(
|
|
613
705
|
description="A web-based visualisation tool for ChemBFN method.",
|
|
614
706
|
epilog=f"ChemBFN WebUI {__version__}, developed in Hiroshima University by chemists for chemists. "
|
|
@@ -616,11 +708,18 @@ def main() -> None:
|
|
|
616
708
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
617
709
|
)
|
|
618
710
|
parser.add_argument(
|
|
619
|
-
"--public", default=False, help="open to public", action="store_true"
|
|
711
|
+
"-P", "--public", default=False, help="open to public", action="store_true"
|
|
620
712
|
)
|
|
621
713
|
parser.add_argument("-V", "--version", action="version", version=__version__)
|
|
622
714
|
args = parser.parse_args()
|
|
623
|
-
|
|
715
|
+
print(f"This is ChemBFN WebUI version {__version__}")
|
|
716
|
+
app.launch(
|
|
717
|
+
share=args.public,
|
|
718
|
+
footer_links=["api"],
|
|
719
|
+
allowed_paths=[cache_dir.absolute().__str__()],
|
|
720
|
+
favicon_path=favicon_dir.absolute().__str__(),
|
|
721
|
+
css=".custom_footer {text-align:center;bottom:0;}",
|
|
722
|
+
)
|
|
624
723
|
|
|
625
724
|
|
|
626
725
|
if __name__ == "__main__":
|
|
Binary file
|
chembfn_webui/cache/results.csv
CHANGED
|
@@ -1 +1,50 @@
|
|
|
1
|
-
|
|
1
|
+
[C][C][=C][C][=C][C][=C][Ring1][Branch1][C][C][=C][Branch2][Ring1][C][C][=Branch1][C][=O][N][C][C][O][C][N][Branch1][C][C][Branch1][C][C][=O][C][=Branch1][C][=O][C][Ring2][Ring1][C][=C][Ring2][Ring1][=Branch2][C]
|
|
2
|
+
[O][=C][Branch1][#C][C][N][C@@][Branch1][Ring2][C][C][O][C][C][O][C][Ring1][Branch2][N][C][=C][C][=C][C][=Branch1][C][=C][Ring1][=Branch1][C][#N]
|
|
3
|
+
[C][O][C@@H1][C][N][Branch1][S][C][=Branch1][C][=O][C][=C][C][=C][Branch1][C][C][C][=C][Ring1][#Branch1][C][Branch1][N][C][O][C][C][N][C][=Branch1][C][=O][N][C][C][Ring1][=Branch1][=O]
|
|
4
|
+
[C][C][C][C@@H1][Branch2][Ring1][Ring2][N][C][=Branch1][C][=O][C][C][=C][Branch1][Ring1][C][C][N][Branch1][C][C][N][=Ring1][Branch2][N][Branch1][C][C][C][=Branch1][C][=O][C@H1][Branch1][C][C][C][C][Branch1][C][N][=O]
|
|
5
|
+
[C][O][C][C][C][Branch2][Ring1][#C][C][=Branch1][C][=O][N][C][C@@H1][C][C@@H1][C][C][C@H1][Branch1][Ring2][C][Ring1][#Branch1][N][Ring1][=Branch1][C][=Branch1][C][=O][C][C][C][C][Ring1][Ring2][C][Ring2][Ring1][Ring2]
|
|
6
|
+
[C][C][=C][N][Branch1][C][C][N][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][C][C][C][Ring1][Branch1][C][N][Branch1][N][C][=Branch1][C][=O][C][C][C][Branch1][C][O][C][C][C][Ring1][Ring1]
|
|
7
|
+
[C][C][Branch1][C][C][C][C@H1][Branch2][Ring1][Branch2][C][=Branch1][C][=O][N][C][C@H1][C][C][N][Branch1][=Branch2][C][C][C][=N][O][N][=Ring1][Branch1][C][Ring1][O][C][C][=N][N][Branch1][C][C][C][=Ring1][=Branch1]
|
|
8
|
+
[C][O][C][C][=Branch1][C][=O][N][C][C][C@H1][C][C][C@@H1][Branch2][Ring1][C][N][C][=Branch1][C][=O][C][=C][C][=C][C][Branch1][Ring1][O][C][=N][Ring1][Branch2][C@H1][Ring2][Ring1][C][C][C][Ring2][Ring1][=Branch1]
|
|
9
|
+
[C][O][C][=C][C][=C][Branch2][Ring1][=C][C@@H1][Branch1][O][C][Branch1][C][C][=O][N][C][C][C][C][N][Branch1][C][C][C][=Branch1][C][=O][C][Branch1][C][F][Branch1][C][F][F][C][=C][Ring2][Ring1][Branch2]
|
|
10
|
+
[C][C@H1][Branch1][#Branch1][C][N][Branch1][C][C][C][C][C][C][N][Branch1][=N][C][=Branch1][C][=O][C][C@@H1][C][C][C][O][Ring1][Branch1][C][C][Ring1][=C]
|
|
11
|
+
[C][=C][C][C][O][C][C][N][C][=Branch1][C][=O][N][C][C][C][=Branch1][C][=O][N][C][C][N][Branch1][C][C][C][C][Ring1][#Branch1]
|
|
12
|
+
[C][O][C][C][Branch2][Ring1][#C][C][=Branch1][C][=O][N][C][C@H1][Branch1][C][C][C@@H1][Branch1][#C][N][C][=Branch1][C][=O][C][C][=C][C][=C][C][=C][Ring1][=Branch1][C][Ring1][S][C][C][Ring2][Ring1][Branch1]
|
|
13
|
+
[N][C][=Branch1][C][=O][C@@H1][C][C][C@H1][Branch2][Ring1][=C][C][=Branch1][C][=O][N][C][C][N][Branch1][S][C][=Branch1][C][=O][C][Branch1][C][F][=C][C][C][C][C][Ring1][Branch1][C][C][Ring1][#C][C][Ring2][Ring1][=Branch1]
|
|
14
|
+
[C][=C][C][Branch2][Ring2][Ring1][C][=Branch1][C][=O][N][C][C][C][C][Ring1][Branch1][C][N][Branch1][P][C][=Branch1][C][=O][C][Branch1][=Branch1][C][C][C][Ring1][Ring1][C][C][Ring1][=Branch1][C][Ring1][=C][C][C][Ring2][Ring1][#Branch1]
|
|
15
|
+
[C][=C][C][N][Branch1][Branch1][C][C][O][C][C][C][C][N][Branch1][=C][C][=Branch1][C][=O][C][O][C][C][N][Branch1][C][C][C][C][C][Ring1][#C]
|
|
16
|
+
[C][C][=Branch1][C][=O][C][C][C][C][=Branch1][C][=O][N][Branch1][C][C][C][C][C][N][Branch1][C][C][C@H1][Branch1][C][C][C][=Branch1][C][=O][N][C][Branch1][C][N][=O]
|
|
17
|
+
[C][C][Branch1][C][C][Branch1][C][C][C][Branch1][C][C][Branch1][C][C][N][C][=Branch1][C][=O][C][C][C][=Branch1][C][=O][C][Branch1][C][C][Branch1][C][C][Branch1][C][C][C]
|
|
18
|
+
[C][C@@H1][C][N][Branch1][=Branch2][C][C][C][N][Branch1][C][C][C][C][C@@H1][Ring1][O][C][N][C][=Branch1][C][=O][C][=C][C][=C][Branch1][C][O][N][=C][Ring1][#Branch1]
|
|
19
|
+
[C][C][=N][O][C][Branch2][Ring1][=C][C@][Branch1][C][C][C][Branch1][C][C][Branch1][C][C][C][N][Branch1][C][C][C][C@H1][C][C][C][Branch1][C][C][N][C][Ring1][#Branch1][=O][=N][Ring2][Ring1][=Branch1]
|
|
20
|
+
[C][C][N][Branch1][#Branch1][C][C][Branch1][C][C][C][C][=Branch1][C][=O][N][C][C][Branch1][C][C][Branch1][C][C][S][Branch1][C][C][=Branch1][C][=O][=O]
|
|
21
|
+
[C][C][=C][C][Branch2][Ring1][N][C][C][=Branch1][C][=O][N][C][C][Branch1][#C][N][C][=Branch1][C][=O][C][C][C][C][=C][NH1][N][=Ring1][Branch1][C][Ring1][=C][=N][O][Ring2][Ring1][=Branch1]
|
|
22
|
+
[C][C][C@@H1][Branch2][Ring1][=N][C][=Branch1][C][=O][N][C][C][N][Branch1][=C][C][=Branch1][C][=O][C][S][Branch1][C][C][=Branch1][C][=O][=O][C][C][C][Ring1][=C][N][Branch1][C][C][C]
|
|
23
|
+
[C][C][N][Branch2][Ring1][=C][C][=Branch1][C][=O][C][=Branch1][C][=O][N][C][C][C][Branch1][C][C][Branch1][C][C][C][C][Branch1][C][C][Branch1][C][C][C][Ring1][#Branch2][C@H1][Branch1][C][C][C][=Branch1][C][=O][N][Branch1][C][C][C]
|
|
24
|
+
[C][C][C][C][Branch1][C][O][C][N][Branch1][C][C][C][C][=Branch1][C][=O][N][C][Branch1][C][C][Branch1][C][C][C]
|
|
25
|
+
[C][C][=C][C][=C][Ring1][Ring1][C][=Branch1][C][=O][N][C][C][C][C][C][=Branch1][C][=O][C][Branch1][O][C][C][=Branch1][C][=O][N][Branch1][C][C][C][C][Ring1][#C]
|
|
26
|
+
[C][C][Branch1][C][C][Branch1][C][C][N][Branch1][#C][C][C][=Branch1][C][=O][N][C][=C][C][=C][N][=C][Ring1][=Branch1][C@@H1][Branch1][Ring1][C][N][C][Ring1][P]
|
|
27
|
+
[C][N][C][C][N][Branch2][Ring1][#Branch1][C][Branch1][C][C][C][C][N][C][=Branch1][C][=O][N][C][C][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][C][C][Ring2][Ring1][Ring2][=O]
|
|
28
|
+
[C][N][C][=Branch1][C][=O][C][Branch2][Ring1][=Branch1][C][=Branch1][C][=O][N][C][C][N][Branch1][C][C][C][C][=Branch1][C][=O][N][Branch1][C][C][C][C][C][C][C][C][Ring2][Ring1][Ring1]
|
|
29
|
+
[C][C][=Branch1][C][=O][N][C][C][N][Branch2][Ring1][N][C][=Branch1][C][=O][C][N][Branch1][C][C][C][C][O][C][C][=C][C][Branch1][C][F][=C][C][Branch1][C][F][=C][Ring1][=Branch2][C][C][Ring2][Ring1][#Branch1]
|
|
30
|
+
[C][=C][C][C][O][C][C][=Branch1][C][=O][N][C][C][C@H1][Branch1][#C][C][N][C][=Branch1][C][=O][C][=C][C][=C][N][=C][Ring1][=Branch1][C][Ring1][#C]
|
|
31
|
+
[C][C][O][C][C][=Branch1][C][=O][N][C][C][Branch1][C][O][Branch2][Ring1][C][C][N][C][=Branch1][C][=O][C][C][C][C][Branch1][C][O][C][C][Ring1][#Branch1][C][Ring1][S]
|
|
32
|
+
[C][O][C][C@H1][Branch1][P][N][C][=Branch1][C][=O][C][C][Branch1][C][N][Branch1][C][C][C][O][C][C][C][C][Ring1][Ring1]
|
|
33
|
+
[O][=C][Branch1][O][C][O][C][=C][C][=C][C][=C][Ring1][=Branch1][N][C][C][C][=Branch1][C][=O][N][C][C][C][Branch1][=N][C][C][=Branch1][C][=O][N][C][C][C][C][Ring1][Branch1][C][C][Ring1][=C]
|
|
34
|
+
[C][C][=C][C][=C][C][Branch2][Ring1][P][C][=Branch1][C][=O][N][C][C][Branch2][Ring1][Ring1][N][Branch1][C][C][C][=Branch1][C][=O][C@@H1][C][C@H1][Ring1][Ring1][C][Branch1][C][N][=O][C][C][Ring1][#C][=C][Ring2][Ring1][#Branch1]
|
|
35
|
+
[C][C][Branch1][C][C][C][Branch2][Ring1][#Branch2][C][=Branch1][C][=O][N][C@@H1][C][N][Branch1][O][C][C][C][=C][C][=C][C][=C][Ring1][=Branch1][C][C@H1][Ring1][=N][O][C][Ring2][Ring1][Branch1]
|
|
36
|
+
[C][C][=Branch1][C][=O][N][C][C][C][=C][Branch1][C][C][=C][C][=C][Ring1][=Branch1][N][C][C][N][Branch1][=N][S][=Branch1][C][=O][=Branch1][C][=O][C][C][C][C][C][Ring1][C][=N][Ring1][P]
|
|
37
|
+
[C][#C][C][N][C][=Branch1][C][=S][N][C][=Branch1][C][=O][C][C][N][C][C][N][Branch1][=N][C][C][=C][C][=C][Branch1][C][F][C][=C][Ring1][#Branch1][C][C][Ring1][=C]
|
|
38
|
+
[C][C][C][N][Branch1][Ring2][C][C][O][C][=Branch1][C][=O][C][C][C][Branch1][Ring1][C][N][C][=Branch1][C][=O][C][C][C@H1][Branch1][C][C][C]
|
|
39
|
+
[C][C][C][C@@H1][Branch1][C][C][N][C][=Branch1][C][=O][C][N][C][C][Branch2][Ring1][=Branch1][C@H1][Branch1][C][C][N][C][=Branch1][C][=O][C][C][=C][C][=C][C][Branch1][C][C][=C][Ring1][#Branch1][C][Ring1][P]
|
|
40
|
+
[C][C][Branch1][C][C][C][Branch1][C][C][N][C][=Branch1][C][=O][C][=Branch1][C][=O][N][C][C][Branch2][Ring1][C][N][Branch1][C][C][C][=Branch1][C][=O][C][C][Branch1][C][C][Branch1][C][C][C][C][Ring1][=N]
|
|
41
|
+
[C][C@@H1][Branch2][Ring1][N][C][=Branch1][C][=O][N][C][C][C@H1][Branch1][#C][N][C][=Branch1][C][=O][C][C][Branch1][C][C][Branch1][C][C][O][C][Ring1][=N][C][C][C][O][C][C][Ring1][=Branch1]
|
|
42
|
+
[C][O][C][=C][C][=C][C][Branch2][Ring2][C][O][C][C][=Branch1][C][=O][N][C][C][Branch2][Ring1][Ring1][C][C][C][N][Ring1][Branch1][C][=Branch1][C][=O][C][N][C][=C][N][=N][Ring1][Branch1][C][Ring1][S][=C][Ring2][Ring1][#Branch2]
|
|
43
|
+
[C][C][C][C][N][=C][Branch1][C][C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][C][Branch1][C][C][O][C][C][O][Ring1][=Branch1][C][=C][C][=Ring1][=C][C][=Branch1][C][=O][N][C][C][C][C][Ring2][Ring1][#Branch1]
|
|
44
|
+
[C][=C][C][N][Branch1][Ring2][C][C][=C][C][C][N][C][=Branch1][C][=O][N][C][C][Branch1][C][C][Branch1][C][C][C][C][C][O][C][C][Ring1][=Branch1]
|
|
45
|
+
[C][C@H1][C][N][Branch1][=C][C][C][Branch1][C][C][Branch1][C][C][C][N][C][C][O][C][C][C@@H1][Branch1][=Branch1][C][=Branch1][C][=O][O][Ring2][Ring1][C]
|
|
46
|
+
[C][N][N][=N][C][=C][Ring1][Branch1][C][=Branch1][C][=O][N][C][C][C][C][C@@H1][C][N][Branch1][=Branch1][C][C][C][Ring1][Ring1][C][C][O][Ring1][=Branch2]
|
|
47
|
+
[C][C][Branch2][Ring1][C][C][=Branch1][C][=O][N][Branch1][C][C][C][=Branch1][C][=O][C][C][C][Ring1][Branch1][C][C][=C][C][=C][Branch1][#Branch2][C][=Branch1][C][=O][N][Branch1][C][C][C][S][Ring1][#Branch2]
|
|
48
|
+
[C][C@@H1][Branch2][Ring1][O][C][C][C][N][C][=Branch1][C][=O][N][C][C][Branch1][C][C][Branch1][C][C][O][C][Branch1][C][C][Branch1][C][C][C][C][N][C][C][O][C][C][Ring1][=Branch1]
|
|
49
|
+
[C][C][NH1][C][=C][C][=Ring1][Branch1][C][=Branch1][C][=O][N][C][C][C@@][Branch1][C][O][Branch1][P][C][N][C][=Branch1][C][=O][C][#C][C][Branch1][C][C][Branch1][C][C][C][C][Ring1][S]
|
|
50
|
+
[C][=C][C][Branch1][C][C][Branch1][C][C][C][=Branch1][C][=O][C][N][Branch1][#C][C][C][N][C][=Branch1][C][=O][Branch1][C][C][C][C][Branch1][C][O][C][C][C][C][Ring1][=C]
|
chembfn_webui/lib/utilities.py
CHANGED
|
@@ -4,15 +4,122 @@
|
|
|
4
4
|
Utilities.
|
|
5
5
|
"""
|
|
6
6
|
import os
|
|
7
|
+
import ast
|
|
7
8
|
import json
|
|
8
9
|
from glob import glob
|
|
9
10
|
from pathlib import Path
|
|
10
|
-
from typing import Dict, List, Tuple, Union
|
|
11
|
+
from typing import Dict, List, Tuple, Union, Optional, Callable, Any
|
|
11
12
|
|
|
12
13
|
_model_path = Path(__file__).parent.parent / "model"
|
|
13
14
|
if "CHEMBFN_WEBUI_MODEL_DIR" in os.environ:
|
|
14
15
|
_model_path = Path(os.environ["CHEMBFN_WEBUI_MODEL_DIR"])
|
|
15
16
|
|
|
17
|
+
_ALLOWED_STRING_METHODS = {"strip", "replace", "split"}
|
|
18
|
+
_ALLOWED_NODES = (
|
|
19
|
+
ast.arguments,
|
|
20
|
+
ast.arg,
|
|
21
|
+
ast.Expression,
|
|
22
|
+
ast.Attribute,
|
|
23
|
+
ast.Subscript,
|
|
24
|
+
ast.Constant,
|
|
25
|
+
ast.UnaryOp,
|
|
26
|
+
ast.Lambda,
|
|
27
|
+
ast.Load,
|
|
28
|
+
ast.Name,
|
|
29
|
+
ast.Call,
|
|
30
|
+
ast.USub,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _SafeLambdaValidator(ast.NodeVisitor):
|
|
35
|
+
def visit(self, node: ast.AST) -> Any:
|
|
36
|
+
if not isinstance(node, _ALLOWED_NODES):
|
|
37
|
+
raise ValueError(f"Disallowed syntax: {type(node).__name__}")
|
|
38
|
+
super().visit(node)
|
|
39
|
+
|
|
40
|
+
def visit_Lambda(self, node: ast.Lambda) -> Any:
|
|
41
|
+
if len(node.args.args) != 1:
|
|
42
|
+
raise ValueError("Only one argument is accepted")
|
|
43
|
+
if node.args.args[0].arg != "x":
|
|
44
|
+
raise ValueError("Lambda argument must be named 'x'")
|
|
45
|
+
self.visit(node.body)
|
|
46
|
+
|
|
47
|
+
def visit_Name(self, node: ast.Name) -> None:
|
|
48
|
+
if node.id != "x":
|
|
49
|
+
raise ValueError(f"Only variable 'x' is allowed, not '{node.id}'")
|
|
50
|
+
|
|
51
|
+
def visit_arguments(self, node: ast.arguments) -> None:
|
|
52
|
+
if len(node.args) > 1:
|
|
53
|
+
raise ValueError("Only one argument is allowed")
|
|
54
|
+
|
|
55
|
+
def visit_Subscript(self, node: ast.Subscript) -> Any:
|
|
56
|
+
# Only allow x.split(...)[idx]
|
|
57
|
+
if not isinstance(node.value, ast.Call):
|
|
58
|
+
raise ValueError("Indexing should only be used after `split` method")
|
|
59
|
+
if not isinstance(node.value.func, ast.Attribute):
|
|
60
|
+
raise ValueError("Indexing should only be used after `split` method")
|
|
61
|
+
if node.value.func.attr != "split":
|
|
62
|
+
raise ValueError("Indexing should only be used after `split` method")
|
|
63
|
+
if not isinstance(node.slice, (ast.Constant, ast.UnaryOp)):
|
|
64
|
+
raise ValueError("Only number index is accepted")
|
|
65
|
+
self.visit(node.value)
|
|
66
|
+
idx = node.slice # should be positive or negative int
|
|
67
|
+
if isinstance(idx, ast.UnaryOp) and isinstance(idx.op, ast.USub):
|
|
68
|
+
if not isinstance(idx.operand, ast.Constant):
|
|
69
|
+
raise ValueError("Invalid index")
|
|
70
|
+
elif not isinstance(idx, ast.Constant):
|
|
71
|
+
raise ValueError("Index must be an integer literal")
|
|
72
|
+
self.visit(idx)
|
|
73
|
+
|
|
74
|
+
def visit_Attribute(self, node: ast.Attribute) -> Any:
|
|
75
|
+
# Only allow x.<method>
|
|
76
|
+
if not isinstance(node.value, ast.Name):
|
|
77
|
+
raise ValueError("No nested method calling is allowed")
|
|
78
|
+
if node.value.id != "x":
|
|
79
|
+
raise ValueError("Please only use 'x' as argument")
|
|
80
|
+
if node.attr not in _ALLOWED_STRING_METHODS:
|
|
81
|
+
raise ValueError(f"Method '{node.attr}' not allowed")
|
|
82
|
+
self.generic_visit(node)
|
|
83
|
+
|
|
84
|
+
def visit_Call(self, node: ast.Call) -> Any:
|
|
85
|
+
# only allow x.<method>(...)
|
|
86
|
+
if not isinstance(node.func, ast.Attribute):
|
|
87
|
+
raise ValueError("Only method calls on string objects are allowed")
|
|
88
|
+
self.visit(node.func)
|
|
89
|
+
for arg in node.args:
|
|
90
|
+
if not isinstance(arg, ast.Constant):
|
|
91
|
+
raise ValueError("Only literal arguments allowed")
|
|
92
|
+
self.visit(arg)
|
|
93
|
+
if node.keywords:
|
|
94
|
+
raise ValueError("Keyword arguments are not allowed")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def sys_info() -> str:
|
|
98
|
+
"""
|
|
99
|
+
Get system information.
|
|
100
|
+
|
|
101
|
+
:return: system info in html format
|
|
102
|
+
:rtype: str
|
|
103
|
+
"""
|
|
104
|
+
import sys
|
|
105
|
+
import torch
|
|
106
|
+
import gradio as gr
|
|
107
|
+
import bayesianflow_for_chem as bfn
|
|
108
|
+
from .version import __version__
|
|
109
|
+
|
|
110
|
+
_python_version = ".".join([str(i) for i in sys.version_info[:3]])
|
|
111
|
+
return f"""
|
|
112
|
+
version: <a href="https://github.com/Augus1999/ChemBFN-WebUI">{__version__}</a>
|
|
113
|
+
 • 
|
|
114
|
+
bayesianflow-for-chem: <a href="https://github.com/Augus1999/bayesian-flow-network-for-chemistry">{bfn.__version__}</a>
|
|
115
|
+
 • 
|
|
116
|
+
python: {_python_version}
|
|
117
|
+
 • 
|
|
118
|
+
torch: {getattr(torch, '__long_version__', torch.__version__)}
|
|
119
|
+
 • 
|
|
120
|
+
gradio: {gr.__version__}
|
|
121
|
+
"""
|
|
122
|
+
|
|
16
123
|
|
|
17
124
|
def find_vocab() -> Dict[str, str]:
|
|
18
125
|
"""
|
|
@@ -100,19 +207,27 @@ def _get_lora_info(prompt: str) -> Tuple[str, List[float], float]:
|
|
|
100
207
|
if len(lora_info) == 1:
|
|
101
208
|
lora_scaling = 1.0
|
|
102
209
|
else:
|
|
103
|
-
|
|
210
|
+
try:
|
|
211
|
+
lora_scaling = float(lora_info[1])
|
|
212
|
+
except ValueError as error:
|
|
213
|
+
print(f"{error}. Reset `lora_scaling` to 1.0.")
|
|
214
|
+
lora_scaling = 1.0
|
|
104
215
|
if len(s) == 1:
|
|
105
216
|
obj = []
|
|
106
217
|
elif ":" not in s[1]:
|
|
107
218
|
obj = []
|
|
108
219
|
else:
|
|
109
220
|
s2 = s[1].replace(":", "").replace("[", "").replace("]", "").split(",")
|
|
110
|
-
|
|
221
|
+
try:
|
|
222
|
+
obj = [float(i) for i in s2]
|
|
223
|
+
except ValueError as error:
|
|
224
|
+
print(f"{error}. Reset `obj` to empty.")
|
|
225
|
+
obj = []
|
|
111
226
|
return lora_name, obj, lora_scaling
|
|
112
227
|
|
|
113
228
|
|
|
114
229
|
def parse_prompt(
|
|
115
|
-
prompt: str,
|
|
230
|
+
prompt: Optional[str],
|
|
116
231
|
) -> Dict[str, Union[List[str], List[float], List[List[float]]]]:
|
|
117
232
|
"""
|
|
118
233
|
Parse propmt.
|
|
@@ -126,7 +241,7 @@ def parse_prompt(
|
|
|
126
241
|
case VI. one LoRA with condition `"<name>:[a,b,...]"` --> `{"lora": [name], "objective": [[a, b, ...]], "lora_scaling": [1]}`\n
|
|
127
242
|
case VII. several LoRAs with conditions `"<name1:A1>:[a1,b1,...];<name2>:[a2,b2,c2,...]"` --> `{"lora": [name1, name2], "objective": [[a1, b1, ...], [a2, b2, c2, ...]], "lora_scaling": [A1, 1]}`\n
|
|
128
243
|
case VIII. other cases --> `{"lora": [], "objective": [], "lora_scaling": []}`\n
|
|
129
|
-
:type prompt: str
|
|
244
|
+
:type prompt: str | None
|
|
130
245
|
:return: ```
|
|
131
246
|
{
|
|
132
247
|
"lora": [name1, name2, ...],
|
|
@@ -135,18 +250,26 @@ def parse_prompt(
|
|
|
135
250
|
}```
|
|
136
251
|
:rtype: dict
|
|
137
252
|
"""
|
|
253
|
+
if prompt is None:
|
|
254
|
+
prompt = ""
|
|
138
255
|
prompt_group = prompt.strip().replace("\n", "").split(";")
|
|
139
|
-
prompt_group = [i for i in prompt_group if i]
|
|
256
|
+
prompt_group = [i.strip() for i in prompt_group if i.strip()]
|
|
140
257
|
info = {"lora": [], "objective": [], "lora_scaling": []}
|
|
141
258
|
if not prompt_group:
|
|
142
259
|
pass
|
|
143
260
|
if len(prompt_group) == 1:
|
|
144
261
|
if not ("<" in prompt_group[0] and ">" in prompt_group[0]):
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
262
|
+
try:
|
|
263
|
+
obj = [
|
|
264
|
+
float(i)
|
|
265
|
+
for i in prompt_group[0]
|
|
266
|
+
.replace("[", "")
|
|
267
|
+
.replace("]", "")
|
|
268
|
+
.split(",")
|
|
269
|
+
]
|
|
270
|
+
info["objective"].append(obj)
|
|
271
|
+
except ValueError as error:
|
|
272
|
+
print(f"{error}. Reset `obj` to empty.")
|
|
150
273
|
else:
|
|
151
274
|
lora_name, obj, lora_scaling = _get_lora_info(prompt_group[0])
|
|
152
275
|
info["lora"].append(lora_name)
|
|
@@ -165,26 +288,28 @@ def parse_prompt(
|
|
|
165
288
|
return info
|
|
166
289
|
|
|
167
290
|
|
|
168
|
-
def parse_exclude_token(tokens: str, vocab_keys: List[str]) -> List[str]:
|
|
291
|
+
def parse_exclude_token(tokens: Optional[str], vocab_keys: List[str]) -> List[str]:
|
|
169
292
|
"""
|
|
170
293
|
Parse exclude token string.
|
|
171
294
|
|
|
172
295
|
:param tokens: unwanted token string in the format `"token1,token2,..."`
|
|
173
296
|
:param vocab_keys: vocabulary elements
|
|
174
|
-
:type tokens: str
|
|
297
|
+
:type tokens: str | None
|
|
175
298
|
:type vocab_keys: list
|
|
176
299
|
:return: a list of allowed vocabulary
|
|
177
300
|
:rtype: list
|
|
178
301
|
"""
|
|
302
|
+
if tokens is None:
|
|
303
|
+
tokens = ""
|
|
179
304
|
tokens = tokens.strip().replace("\n", "").split(",")
|
|
180
|
-
tokens = [i for i in tokens if i]
|
|
305
|
+
tokens = [i.strip() for i in tokens if i.strip()]
|
|
181
306
|
if not tokens:
|
|
182
307
|
return tokens
|
|
183
308
|
tokens = [i for i in vocab_keys if i not in tokens]
|
|
184
309
|
return tokens
|
|
185
310
|
|
|
186
311
|
|
|
187
|
-
def parse_sar_control(sar_control: str) -> List[bool]:
|
|
312
|
+
def parse_sar_control(sar_control: Optional[str]) -> List[bool]:
|
|
188
313
|
"""
|
|
189
314
|
Parse semi-autoregression control string.
|
|
190
315
|
|
|
@@ -194,17 +319,48 @@ def parse_sar_control(sar_control: str) -> List[bool]:
|
|
|
194
319
|
case III. `"T"` --> `[True]` \n
|
|
195
320
|
case IV. `F,T,...` --> `[False, True, ...]` \n
|
|
196
321
|
case V. other cases --> `[False, False, ...]` \n
|
|
197
|
-
:type sar_control: str
|
|
322
|
+
:type sar_control: str | None
|
|
198
323
|
:return: a list of SAR flag
|
|
199
324
|
:rtype: list
|
|
200
325
|
"""
|
|
326
|
+
if sar_control is None:
|
|
327
|
+
sar_control = ""
|
|
201
328
|
sar_flag = sar_control.strip().replace("\n", "").split(",")
|
|
202
|
-
sar_flag = [i for i in sar_flag if i]
|
|
329
|
+
sar_flag = [i.strip() for i in sar_flag if i.strip()]
|
|
203
330
|
if not sar_flag:
|
|
204
331
|
return [False]
|
|
205
332
|
sar_flag = [i.lower() == "t" for i in sar_flag]
|
|
206
333
|
return sar_flag
|
|
207
334
|
|
|
208
335
|
|
|
336
|
+
def build_result_prep_fn(fn_string: Optional[str]) -> Callable[[str], str]:
|
|
337
|
+
"""
|
|
338
|
+
Build result preprocessing function.
|
|
339
|
+
|
|
340
|
+
:param fn_string: string form result preprocessing function
|
|
341
|
+
:type fn_string: str | None
|
|
342
|
+
:return: Description
|
|
343
|
+
:rtype: callable
|
|
344
|
+
"""
|
|
345
|
+
if not fn_string:
|
|
346
|
+
return lambda x: x
|
|
347
|
+
try:
|
|
348
|
+
tree = ast.parse(fn_string, mode="eval")
|
|
349
|
+
_SafeLambdaValidator().visit(tree)
|
|
350
|
+
code = compile(tree, filename="<safe_lambda>", mode="eval")
|
|
351
|
+
fn = eval(code, {"__builtins__": {}}, {})
|
|
352
|
+
if not callable(fn):
|
|
353
|
+
print(
|
|
354
|
+
"Warning: Expression did not produce a function. Returned identity as result preprocessing function."
|
|
355
|
+
)
|
|
356
|
+
return lambda x: x
|
|
357
|
+
return fn
|
|
358
|
+
except Exception as e:
|
|
359
|
+
print(
|
|
360
|
+
f"Invalid or unsafe expression: {e}. Returned identity as result preprocessing function."
|
|
361
|
+
)
|
|
362
|
+
return lambda x: x
|
|
363
|
+
|
|
364
|
+
|
|
209
365
|
if __name__ == "__main__":
|
|
210
366
|
...
|
chembfn_webui/lib/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chembfn_webui
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: WebUI for ChemBFN
|
|
5
5
|
Home-page: https://github.com/Augus1999/ChemBFN-WebUI
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -15,15 +15,16 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
19
|
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
19
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
21
|
Requires-Python: >=3.11
|
|
21
22
|
Description-Content-Type: text/markdown
|
|
22
23
|
License-File: LICENSE
|
|
23
|
-
Requires-Dist: bayesianflow_for_chem>=2.
|
|
24
|
-
Requires-Dist: mol2chemfigPy3>=1.5.
|
|
25
|
-
Requires-Dist: gradio
|
|
26
|
-
Requires-Dist: torch>=2.
|
|
24
|
+
Requires-Dist: bayesianflow_for_chem>=2.4.0
|
|
25
|
+
Requires-Dist: mol2chemfigPy3>=1.5.12
|
|
26
|
+
Requires-Dist: gradio<7.0.0,>=6.0.0
|
|
27
|
+
Requires-Dist: torch>=2.9.0
|
|
27
28
|
Requires-Dist: selfies>=2.2.0
|
|
28
29
|
Dynamic: author
|
|
29
30
|
Dynamic: author-email
|
|
@@ -39,10 +40,20 @@ Dynamic: requires-dist
|
|
|
39
40
|
Dynamic: requires-python
|
|
40
41
|
Dynamic: summary
|
|
41
42
|
|
|
42
|
-
##
|
|
43
|
+
## Web-based UI visualisation tool for ChemBFN method
|
|
43
44
|
|
|
45
|
+
[](https://pypi.org/project/chembfn-webui/)
|
|
46
|
+

|
|
47
|
+

|
|
48
|
+
[](https://stand-with-ukraine.pp.ua)
|
|
44
49
|
|
|
45
50
|
|
|
51
|
+
|
|
52
|
+
> Important:
|
|
53
|
+
>
|
|
54
|
+
> For the security concerning, it is not recommended to use this application as a public service.
|
|
55
|
+
> When deploying on a local host as a shared application, it is better to install this application in a container or VM, to prevent this application from accessing the Internet, and to limit the premissions of read, create, and delete loacal files and directories.
|
|
56
|
+
|
|
46
57
|
### 1. Install
|
|
47
58
|
|
|
48
59
|
```bash
|
|
@@ -83,6 +94,22 @@ For example,
|
|
|
83
94
|
└───moses_selfies_vocab.txt
|
|
84
95
|
```
|
|
85
96
|
|
|
97
|
+
> Note:
|
|
98
|
+
>
|
|
99
|
+
> >The file `config.json` is automatically saved by CLI tool `Madmol` provided in `bayesianflow-for-chem` package. If you train models via Python API, you need to manually create that file for your models by filling in the tempate:
|
|
100
|
+
> >```json
|
|
101
|
+
> >{
|
|
102
|
+
> > "padding_index": 0,
|
|
103
|
+
> > "start_index": 1,
|
|
104
|
+
> > "end_index": 2,
|
|
105
|
+
> > "padding_strategy": "static",
|
|
106
|
+
> > "padding_length": PADDING_LENGTH,
|
|
107
|
+
> > "label": [LABEL_NAME_I, LABEL_NAME_II, ...],
|
|
108
|
+
> > "name": JOB_NAME
|
|
109
|
+
> >}
|
|
110
|
+
> >```
|
|
111
|
+
> >The configureation file for base models can be downloaded [here](https://huggingface.co/suenoomozawa/ChemBFN/resolve/main/config.json).
|
|
112
|
+
|
|
86
113
|
If placed correctly, all these files can be seen in the "model explorer" tab.
|
|
87
114
|
|
|
88
115
|
> You can use an external folder to host the models if it follows the same structure as [`chembfn_webui/model`](https://github.com/Augus1999/ChemBFN-WebUI/tree/main/chembfn_webui/model). See the next section for the method.
|
|
@@ -131,6 +158,7 @@ Under "advanced control" tab
|
|
|
131
158
|
|
|
132
159
|
* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
|
|
133
160
|
* You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
|
|
161
|
+
* You can customise the result preprocessing function, e.g., the model output a reaction SMILES "CCI.C[O-]>>COCC" which couldn't be recognised by RDKit; you can pass `lambda x: x.split(">>")[-1]` to force the program only looking at the products.
|
|
134
162
|
|
|
135
163
|
### 6. Generate molecules
|
|
136
164
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
chembfn_webui/__init__.py,sha256=AXUdd_PrlfVO56losFUP7A8XrqCDPylwRbTpe_WG3Uc,87
|
|
2
|
+
chembfn_webui/bin/app.py,sha256=6fQbbix4ZT06twQPhhXsxixs5lkq5jYQq2KC7DhN-QA,25444
|
|
3
|
+
chembfn_webui/bin/favicon.png,sha256=Vm7bk-sD-_4p1LaNPEkGOxXSWKxndkNU4DUAGrhE7RQ,43455
|
|
4
|
+
chembfn_webui/cache/cache_file_here.txt,sha256=hi60T_q6Cf5WPtXuwe4CqjiWpaUqrczsmGMhKIUL--M,28
|
|
5
|
+
chembfn_webui/cache/results.csv,sha256=yX2j1DFbCNkXUQwn4t98c1nZeEuvXsoCQYfoaXDoSVc,9065
|
|
6
|
+
chembfn_webui/lib/utilities.py,sha256=_E-aDtwG7Po5ex9CWWMYmjJbCG54iLGtwlw4XGCbzAE,13154
|
|
7
|
+
chembfn_webui/lib/version.py,sha256=9rTqy3zjJidxuirrjVFGiPQYQKEPh9G43uBHvl4-32k,138
|
|
8
|
+
chembfn_webui/model/base_model/place_base_model_here.txt,sha256=oa8_ILaAlWpTXICVDi-Y46_OahV7wB6Che6gbiEIh-c,39
|
|
9
|
+
chembfn_webui/model/lora/place_lora_folder_here.txt,sha256=YYOo0Cj278DyRcgVrCLa1f2Q-cqgNeMnelaLiA3Fuic,69
|
|
10
|
+
chembfn_webui/model/standalone_model/place_standalone_model_folder_here.txt,sha256=Dp42UscfI0Zp3SnvRv5vOfWiJZnxdY7rG3jo0kf86VM,80
|
|
11
|
+
chembfn_webui/model/vocab/place_vocabulary_file_here.txt,sha256=fLOINvZP2022oE7RsmfDjgyaw2yMi7glmdu_cTwmo88,28
|
|
12
|
+
chembfn_webui-2.1.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
13
|
+
chembfn_webui-2.1.0.dist-info/METADATA,sha256=nh3zZx1r89zoMGZyAQo3hTkbtwOBo1KuGRuB6Z7KYMg,7640
|
|
14
|
+
chembfn_webui-2.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
chembfn_webui-2.1.0.dist-info/entry_points.txt,sha256=fp8WTPybvwpeYKrUhTi456wwZbmCMJXN1TeFGpR1SlY,55
|
|
16
|
+
chembfn_webui-2.1.0.dist-info/top_level.txt,sha256=VdWt3Z7jhbB0pQO_mkRawnU5s75SBT9BV8fGaAIJTDI,14
|
|
17
|
+
chembfn_webui-2.1.0.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
chembfn_webui/__init__.py,sha256=AXUdd_PrlfVO56losFUP7A8XrqCDPylwRbTpe_WG3Uc,87
|
|
2
|
-
chembfn_webui/bin/app.py,sha256=fqIP5O5aojwZmw2-eHBcuFm4YuQBIEuwOMTvDzW7jcA,21632
|
|
3
|
-
chembfn_webui/cache/cache_file_here.txt,sha256=hi60T_q6Cf5WPtXuwe4CqjiWpaUqrczsmGMhKIUL--M,28
|
|
4
|
-
chembfn_webui/cache/results.csv,sha256=xdSOWM1GGGJEFS1Y4sfCjA-9-66AECEo6El59_yW1hw,37
|
|
5
|
-
chembfn_webui/lib/utilities.py,sha256=ALPw-Evjd9DdsU_RQA6Zp2Gc6XnRR7Y_5fZrqG9azWo,7460
|
|
6
|
-
chembfn_webui/lib/version.py,sha256=Mbvn1j2C-hWCKICax3XLfU-P0Q3j0oPQ4GQXoDFJ3fs,138
|
|
7
|
-
chembfn_webui/model/base_model/place_base_model_here.txt,sha256=oa8_ILaAlWpTXICVDi-Y46_OahV7wB6Che6gbiEIh-c,39
|
|
8
|
-
chembfn_webui/model/lora/place_lora_folder_here.txt,sha256=YYOo0Cj278DyRcgVrCLa1f2Q-cqgNeMnelaLiA3Fuic,69
|
|
9
|
-
chembfn_webui/model/standalone_model/place_standalone_model_folder_here.txt,sha256=Dp42UscfI0Zp3SnvRv5vOfWiJZnxdY7rG3jo0kf86VM,80
|
|
10
|
-
chembfn_webui/model/vocab/place_vocabulary_file_here.txt,sha256=fLOINvZP2022oE7RsmfDjgyaw2yMi7glmdu_cTwmo88,28
|
|
11
|
-
chembfn_webui-1.0.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
12
|
-
chembfn_webui-1.0.0.dist-info/METADATA,sha256=iRpNEXoJZRG42fMjyHwsmcUJ5jV0MrGgWUMkZrqUb3s,5897
|
|
13
|
-
chembfn_webui-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
-
chembfn_webui-1.0.0.dist-info/entry_points.txt,sha256=fp8WTPybvwpeYKrUhTi456wwZbmCMJXN1TeFGpR1SlY,55
|
|
15
|
-
chembfn_webui-1.0.0.dist-info/top_level.txt,sha256=VdWt3Z7jhbB0pQO_mkRawnU5s75SBT9BV8fGaAIJTDI,14
|
|
16
|
-
chembfn_webui-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|