PyPI - chembfn-webui - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

chembfn-webui 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chembfn-webui might be problematic. Click here for more details.

Files changed (10) hide show

chembfn_webui/bin/app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from bayesianflow_for_chem.data import (
     aa2vec,
     split_selfies,
 )
-from bayesianflow_for_chem.tool import sample, inpaint, adjust_lora_
+from bayesianflow_for_chem.tool import sample, inpaint, adjust_lora_, quantise_model_
 from lib.utilities import (
     find_model,
     find_vocab,
@@ -41,6 +41,16 @@ cache_dir = Path(__file__).parent.parent / "cache"
 def selfies2vec(sel: str, vocab_dict: Dict[str, int]) -> List[int]:
+    """
+    Tokeniser SELFIES string.
+    :param sel: SELFIES string
+    :param vocab_dict: vocabulary dictionary
+    :type sel: str
+    :type vocab_dict: dict
+    :return: a list of token indices
+    :rtype: list
+    """
     s = split_selfies(sel)
     unknown_id = None
     for key, idx in vocab_dict.items():
@@ -55,6 +65,23 @@ def refresh(
 ) -> Tuple[
     List[str], List[str], List[List[str]], List[List[str]], gr.Dropdown, gr.Dropdown
 ]:
+    """
+    Refresh model file list.
+    :param model_selected: the selected model name
+    :param vocab_selected: the selected vocabulary name
+    :param tokeniser_selected: the selected tokeniser name
+    :type model_selected: str
+    :type vocab_selected: str
+    :type tokeniser_selected: str
+    :return: a list of vocabulary names \n
+             a list of base model files \n
+             a list of standalone model files \n
+             a list of LoRA model files \n
+             Gradio Dropdown item \n
+             Gradio Dropdown item \n
+    :rtype: tuple
+    """
     global vocabs, models
     vocabs = find_vocab()
     models = find_model()
@@ -77,6 +104,16 @@ def refresh(
 def select_lora(evt: gr.SelectData, prompt: str) -> str:
+    """
+    Select LoRA model name from Dataframe object.
+    :param evt: `~gradio.SelectData` instance
+    :param prompt: prompt string
+    :type evt: gradio.SelectData
+    :type prompt: str
+    :return: new prompt string
+    :rtype: str
+    """
     global lora_selected
     if lora_selected:  # avoid double select
         lora_selected = False
@@ -104,7 +141,49 @@ def run(
     scaffold: str,
     sar_control: str,
     exclude_token: str,
+    quantise: str,
+    jited: str,
 ) -> Tuple[List, List[str], str, str, str]:
+    """
+    Run generation or inpainting.
+    :param model_name: model name
+    :param token_name: tokeniser name
+    :param vocab_fn: customised vocabulary name
+    :param step: number of sampling steps
+    :param batch_size: batch-size
+    :param sequence_size: maximum sequence length
+    :param guidance_strength: guidance strength of conditioning
+    :param method: `"BFN"` or `"ODE"`
+    :param temperature: sampling temperature while ODE-solver used
+    :param prompt: prompt string
+    :param scaffold: molecular scaffold
+    :param sar_control: semi-autoregressive behaviour flags
+    :param exclude_token: unwanted tokens
+    :param quantise: `"on"` or `"off"`
+    :param jited: `"on"` or `"off"`
+    :type model_name: str
+    :type token_name: str
+    :type vocab_fn: str
+    :type step: int
+    :type batch_size: int
+    :type sequence_size: int
+    :type guidance_strength: float
+    :type method: str
+    :type temperature: float
+    :type prompt: str
+    :type scaffold: str
+    :type sar_control: str
+    :type exclude_token: str
+    :type quantise: str
+    :type jited: str
+    :return: list of images \n
+             list of generated molecules \n
+             Chemfig code \n
+             messages \n
+             cache file path
+    :rtype: tuple
+    """
     _message = []
     base_model_dict = dict(models["base"])
     standalone_model_dict = dict([[i[0], i[1]] for i in models["standalone"]])
@@ -113,6 +192,7 @@ def run(
     lora_label_dict = dict([[i[0], i[2] != []] for i in models["lora"]])
     standalone_lmax_dict = dict([[i[0], i[3]] for i in models["standalone"]])
     lora_lmax_dict = dict([[i[0], i[3]] for i in models["lora"]])
+    # ------- build tokeniser -------
     if token_name == "SMILES & SAFE":
         vocab_keys = VOCAB_KEYS
         tokeniser = smiles2vec
@@ -139,7 +219,7 @@ def run(
     # ------- build model -------
     prompt_info = parse_prompt(prompt)
     sar_flag = parse_sar_control(sar_control)
-    print(prompt_info)
+    print("Prompt summary:", prompt_info)  # show prompt info
     if not prompt_info["lora"]:
         if model_name in base_model_dict:
             lmax = sequence_size
@@ -166,6 +246,10 @@ def run(
                 y = None
             _message.append(f"Sequence length is set to {lmax} from model metadata.")
         bfn.semi_autoregressive = sar_flag[0]
+        if quantise == "on":
+            quantise_model_(bfn)
+        if jited == "on":
+            bfn.compile()
     elif len(prompt_info["lora"]) == 1:
         lmax = lora_lmax_dict[prompt_info["lora"][0]]
         if model_name in base_model_dict:
@@ -194,6 +278,10 @@ def run(
             adjust_lora_(bfn, prompt_info["lora_scaling"][0])
         _message.append(f"Sequence length is set to {lmax} from model metadata.")
         bfn.semi_autoregressive = sar_flag[0]
+        if quantise == "on":
+            quantise_model_(bfn)
+        if jited == "on":
+            bfn.compile()
     else:
         lmax = max([lora_lmax_dict[i] for i in prompt_info["lora"]])
         if model_name in base_model_dict:
@@ -211,6 +299,10 @@ def run(
             sar_flag = [sar_flag[0] for _ in range(len(weights))]
         bfn = EnsembleChemBFN(base_model_dir, lora_dir, mlps, weights)
         y = [torch.tensor([i], dtype=torch.float32) for i in prompt_info["objective"]]
+        if quantise == "on":
+            bfn.quantise()
+        if jited == "on":
+            bfn.compile()
         _message.append(f"Sequence length is set to {lmax} from model metadata.")
     # ------- inference -------
     allowed_tokens = parse_exclude_token(exclude_token, vocab_keys)
@@ -366,6 +458,8 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
                     label="exclude tokens",
                     placeholder="key in unwanted tokens separated by comma.",
                 )
+                quantise = gr.Radio(["on", "off"], value="off", label="quantisation")
+                jited = gr.Radio(["on", "off"], value="off", label="JIT")
     # ------ user interaction events -------
     btn.click(
         fn=run,
@@ -383,6 +477,8 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
             scaffold,
             sar_control,
             exclude_token,
+            quantise,
+            jited,
         ],
         outputs=[img, result, chemfig, message, btn_download],
     )
@@ -426,6 +522,12 @@ with gr.Blocks(title="ChemBFN WebUI") as app:
 def main() -> None:
+    """
+    Main function.
+    :return:
+    :rtype: None
+    """
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--public", default=False, help="open to public", action="store_true"

chembfn_webui/cache/results.csv CHANGED Viewed

@@ -1,15 +1,7 @@
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][O]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][=Branch1][C][=O][N][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][N][C][C][Branch1][=Branch2][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][O]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[O][=C][Branch2][Ring1][C][N][N][C][=Branch1][C][=O][C][=C][C][=C][Branch1][C][Cl][C][=C][Ring1][#Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][=Branch1][C][=O][O]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
-[C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C][C]
+CC(C)(CC#N)C#N
+CCC(C)(C#N)C#N
+C1C(C)(C#N)C1C#N
+C(C)(C)C(C#N)C#N
+C(C)CC(C#N)C#N
+CCC(C)(C#N)C#N
+CC(C)(C#N)C#N

chembfn_webui/lib/utilities.py CHANGED Viewed

@@ -15,6 +15,12 @@ if "CHEMBFN_WEBUI_MODEL_DIR" in os.environ:
 def find_vocab() -> Dict[str, str]:
+    """
+    Find customised vocabulary files.
+    :return: {file_name: file_path}
+    :rtype: dict
+    """
     vocab_fns = glob(str(_model_path / "vocab/*.txt"))
     return {
         os.path.basename(i).replace(".txt", ""): i
@@ -24,6 +30,17 @@ def find_vocab() -> Dict[str, str]:
 def find_model() -> Dict[str, List[List[Union[str, int, List[str], Path]]]]:
+    """
+    Find model files.
+    :return: ```
+            {
+              "base": [[name1, path1], [name2, path2], ...],
+              "standalone": [[name1, parent_path1, label1, pad_len1], ...],
+              "lora": [[name1, parent_path1, label1, pad_len1], ...]
+            }```
+    :rtype: dict
+    """
     models = {}
     # find base models
     base_fns = glob(str(_model_path / "base_model/*.pt"))
@@ -61,7 +78,21 @@ def find_model() -> Dict[str, List[List[Union[str, int, List[str], Path]]]]:
     return models
-def _get_lora_info(prompt: str) -> Tuple[str, List[float], List[float]]:
+def _get_lora_info(prompt: str) -> Tuple[str, List[float], float]:
+    """
+    Parse sub-prompt string containing LoRA info.
+    :param prompt: LoRA sub-pompt: \n
+                   case I. `"<name:A>"` \n
+                   case II. `"<name>"` \n
+                   case III. `"<name:A>:[a,b,...]"` \n
+                   case IV. `"<name>:[a,b,c,...]"`
+    :type prompt: str
+    :return: LoRA name \n
+             objective values \n
+             LoRA scaling
+    :rtype: tuple
+    """
     s = prompt.split(">")
     s1 = s[0].replace("<", "")
     lora_info = s1.split(":")
@@ -83,6 +114,27 @@ def _get_lora_info(prompt: str) -> Tuple[str, List[float], List[float]]:
 def parse_prompt(
     prompt: str,
 ) -> Dict[str, Union[List[str], List[float], List[List[float]]]]:
+    """
+    Parse propmt.
+    :param prompt: prompt string: \n
+                   case I. empty string `""` --> `{"lora": [], "objective": [], "lora_scaling": []}`\n
+                   case II. one condition `"[a,b,c,...]"` --> `{"lora": [], "objective": [[a, b, c, ...]], "lora_scaling": []}`\n
+                   case III. one LoRA `"<name:A>"` --> `{"lora": [name], "objective": [], "lora_scaling": [A]}`\n
+                   case IV. one LoRA `"<name>"` --> `{"lora": [name], "objective": [], "lora_scaling": [1]}`\n
+                   case V. one LoRA with condition `"<name:A>:[a,b,...]"` --> `{"lora": [name], "objective": [[a, b, ...]], "lora_scaling": [A]}`\n
+                   case VI. one LoRA with condition `"<name>:[a,b,...]"` --> `{"lora": [name], "objective": [[a, b, ...]], "lora_scaling": [1]}`\n
+                   case VII. several LoRAs with conditions `"<name1:A1>:[a1,b1,...];<name2>:[a2,b2,c2,...]"` --> `{"lora": [name1, name2], "objective": [[a1, b1, ...], [a2, b2, c2, ...]], "lora_scaling": [A1, 1]}`\n
+                   case VIII. other cases --> `{"lora": [], "objective": [], "lora_scaling": []}`\n
+    :type prompt: str
+    :return: ```
+            {
+              "lora": [name1, name2, ...],
+              "objective": [obj1, obj2, ...],
+              "lora_scaling": [s1, s2, ...]
+            }```
+    :rtype: dict
+    """
     prompt_group = prompt.strip().replace("\n", "").split(";")
     prompt_group = [i for i in prompt_group if i]
     info = {"lora": [], "objective": [], "lora_scaling": []}
@@ -114,6 +166,16 @@ def parse_prompt(
 def parse_exclude_token(tokens: str, vocab_keys: List[str]) -> List[str]:
+    """
+    Parse exclude token string.
+    :param tokens: unwanted token string in the format `"token1,token2,..."`
+    :param vocab_keys: vocabulary elements
+    :type tokens: str
+    :type vocab_keys: list
+    :return: a list of allowed vocabulary
+    :rtype: list
+    """
     tokens = tokens.strip().replace("\n", "").split(",")
     tokens = [i for i in tokens if i]
     if not tokens:
@@ -121,7 +183,21 @@ def parse_exclude_token(tokens: str, vocab_keys: List[str]) -> List[str]:
     tokens = [i for i in vocab_keys if i not in tokens]
     return tokens
 def parse_sar_control(sar_control: str) -> List[bool]:
+    """
+    Parse semi-autoregression control string.
+    :param sar_control: semi-autoregression control string: \n
+                        case I. `""` --> `[False]` \n
+                        case II. `"F"` --> `[False]` \n
+                        case III. `"T"` --> `[True]` \n
+                        case IV. `F,T,...` --> `[False, True, ...]` \n
+                        case V. other cases --> `[False, False, ...]` \n
+    :type sar_control: str
+    :return: a list of SAR flag
+    :rtype: list
+    """
     sar_flag = sar_control.strip().replace("\n", "").split(",")
     sar_flag = [i for i in sar_flag if i]
     if not sar_flag:

chembfn_webui/lib/version.py CHANGED Viewed

@@ -4,5 +4,5 @@
 Version info.
 """
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 __author__ = "Nianze A. TAO"

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chembfn_webui
-Version: 0.1.0
+Version: 0.2.0
 Summary: WebUI for ChemBFN
 Home-page: https://github.com/Augus1999/ChemBFN-WebUI
 Author: Nianze A. Tao
@@ -122,9 +122,16 @@ $ chembfn
 Under "advanced control" tab
-* You can control semi-autoregressive behaviours by key in `F` for switch off SAR, `T` for switch on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
+* You can control semi-autoregressive behaviours by key in `F` for switching off SAR, `T` for switching on SAR, and prompt like `F,F,T,...` to individually control the SAR in an ensemble model.
 * You can add unwanted tokens, e.g., `[Cu],p,[Si]`.
 ### 6. Generate molecules
 Click "RUN" then here you go! If error occured, please check your prompts and settings.
+## Where to obtain the models?
+* Pretrained models: [https://huggingface.co/suenoomozawa/ChemBFN](https://huggingface.co/suenoomozawa/ChemBFN)
+* ChemBFN source code: [https://github.com/Augus1999/bayesian-flow-network-for-chemistry](https://github.com/Augus1999/bayesian-flow-network-for-chemistry)
+* ChemBFN document: [https://augus1999.github.io/bayesian-flow-network-for-chemistry/](https://augus1999.github.io/bayesian-flow-network-for-chemistry/)
+* ChemBFN package: [https://pypi.org/project/bayesianflow-for-chem/](https://pypi.org/project/bayesianflow-for-chem/)

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
 chembfn_webui/__init__.py,sha256=AXUdd_PrlfVO56losFUP7A8XrqCDPylwRbTpe_WG3Uc,87
-chembfn_webui/bin/app.py,sha256=GLXsqaZFmKu3dj35Ja-ygPUQSLK-uKgVIMxZQipXf5c,15809
+chembfn_webui/bin/app.py,sha256=nD6M_e3v7aI6Iyfr3ntFXkpCC24LNeU0XaK-bT5EveA,18864
 chembfn_webui/cache/cache_file_here.txt,sha256=hi60T_q6Cf5WPtXuwe4CqjiWpaUqrczsmGMhKIUL--M,28
-chembfn_webui/cache/results.csv,sha256=cNmpygApXW6XLwkZfKkLRh6BwlwURkHZ17da8qUDjac,1670
-chembfn_webui/lib/utilities.py,sha256=bnAAhfryDpZpAMk5p0eURJ2nhgaXgTY5QWXITdL26gc,4476
-chembfn_webui/lib/version.py,sha256=3uax1uzsS9zcwmKGqogR9oHyvdv4l5UktCj3R9mW1p4,138
+chembfn_webui/cache/results.csv,sha256=QDwo2y-HHfxbvsNY4Tp8jpLOXOwLhzapJIRaxwQ4BS0,107
+chembfn_webui/lib/utilities.py,sha256=ALPw-Evjd9DdsU_RQA6Zp2Gc6XnRR7Y_5fZrqG9azWo,7460
+chembfn_webui/lib/version.py,sha256=tOCr0-h9d8eZdkQ040lxB9yzvb9spVCyxqjIs-Tt5yc,138
 chembfn_webui/model/base_model/place_base_model_here.txt,sha256=oa8_ILaAlWpTXICVDi-Y46_OahV7wB6Che6gbiEIh-c,39
 chembfn_webui/model/lora/place_lora_folder_here.txt,sha256=YYOo0Cj278DyRcgVrCLa1f2Q-cqgNeMnelaLiA3Fuic,69
 chembfn_webui/model/standalone_model/place_standalone_model_folder_here.txt,sha256=Dp42UscfI0Zp3SnvRv5vOfWiJZnxdY7rG3jo0kf86VM,80
 chembfn_webui/model/vocab/place_vocabulary_file_here.txt,sha256=fLOINvZP2022oE7RsmfDjgyaw2yMi7glmdu_cTwmo88,28
-chembfn_webui-0.1.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-chembfn_webui-0.1.0.dist-info/METADATA,sha256=r9Obs3CWZy_ZK42c46gDXMAORUWQhAv4WhL_mpdEO4o,5125
-chembfn_webui-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-chembfn_webui-0.1.0.dist-info/entry_points.txt,sha256=fp8WTPybvwpeYKrUhTi456wwZbmCMJXN1TeFGpR1SlY,55
-chembfn_webui-0.1.0.dist-info/top_level.txt,sha256=VdWt3Z7jhbB0pQO_mkRawnU5s75SBT9BV8fGaAIJTDI,14
-chembfn_webui-0.1.0.dist-info/RECORD,,
+chembfn_webui-0.2.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+chembfn_webui-0.2.0.dist-info/METADATA,sha256=qUKPuLkPeeq2zsGRaqVE_LEbVNtW7VLONUp9nHaLBM4,5710
+chembfn_webui-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+chembfn_webui-0.2.0.dist-info/entry_points.txt,sha256=fp8WTPybvwpeYKrUhTi456wwZbmCMJXN1TeFGpR1SlY,55
+chembfn_webui-0.2.0.dist-info/top_level.txt,sha256=VdWt3Z7jhbB0pQO_mkRawnU5s75SBT9BV8fGaAIJTDI,14
+chembfn_webui-0.2.0.dist-info/RECORD,,

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{chembfn_webui-0.1.0.dist-info → chembfn_webui-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

chembfn-webui 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

chembfn-webui 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl