PyPI - kdock - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

kdock 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

kdock/__init__.py +1 -1
kdock/_modidx.py +7 -1
kdock/af3/analyze.py +9 -9
kdock/af3/docker.py +7 -7
kdock/af3/json.py +11 -11
kdock/af3/protein_pairs.py +9 -9
kdock/boltz/__init__.py +0 -0
kdock/boltz/dock.py +107 -0
kdock/core/data.py +17 -7
kdock/core/ligand.py +29 -18
kdock/core/plot.py +6 -6
kdock/core/protein.py +9 -9
kdock/core/utils.py +8 -8
kdock/gnina/dock.py +7 -7
kdock/gnina/rescore.py +12 -12
kdock/px/core.py +6 -6
kdock/px/dock.py +6 -6
{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/METADATA +19 -27
kdock-0.0.4.dist-info/RECORD +27 -0
{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/WHEEL +1 -1
kdock-0.0.2.dist-info/RECORD +0 -25
{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/entry_points.txt +0 -0
{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/licenses/LICENSE +0 -0
{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/top_level.txt +0 -0

kdock/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.2"
1	+ __version__ = "0.0.4"

kdock/_modidx.py CHANGED Viewed

@@ -42,6 +42,10 @@ d = { 'settings': { 'branch': 'main',
                                                                                              'kdock/af3/protein_pairs.py'),
                                          'kdock.af3.protein_pairs.get_protein_subjson': ( 'af3/protein_pairs.html#get_protein_subjson',
                                                                                           'kdock/af3/protein_pairs.py')},
+            'kdock.boltz.dock': { 'kdock.boltz.dock.plot_scatter_spearman': ( 'core/boltz/dock.html#plot_scatter_spearman',
+                                                                              'kdock/boltz/dock.py'),
+                                  'kdock.boltz.dock.prepare_boltz': ('core/boltz/dock.html#prepare_boltz', 'kdock/boltz/dock.py'),
+                                  'kdock.boltz.dock.run_boltz': ('core/boltz/dock.html#run_boltz', 'kdock/boltz/dock.py')},
             'kdock.core.data': { 'kdock.core.data.Collins': ('core/data.html#collins', 'kdock/core/data.py'),
                                  'kdock.core.data.Collins.get_antibiotics_2k': ( 'core/data.html#collins.get_antibiotics_2k',
                                                                                  'kdock/core/data.py'),
@@ -54,8 +58,10 @@ d = { 'settings': { 'branch': 'main',
                                  'kdock.core.data.Kras.get_mirati_g12d_raw': ( 'core/data.html#kras.get_mirati_g12d_raw',
                                                                                'kdock/core/data.py'),
                                  'kdock.core.data.Kras.get_seq': ('core/data.html#kras.get_seq', 'kdock/core/data.py'),
-                                 'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py')},
+                                 'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py'),
+                                 'kdock.core.data.name2smi': ('core/data.html#name2smi', 'kdock/core/data.py')},
             'kdock.core.ligand': { 'kdock.core.ligand.compress_fp': ('core/ligand.html#compress_fp', 'kdock/core/ligand.py'),
+                                   'kdock.core.ligand.contain_acrylamide': ('core/ligand.html#contain_acrylamide', 'kdock/core/ligand.py'),
                                    'kdock.core.ligand.get_fp': ('core/ligand.html#get_fp', 'kdock/core/ligand.py'),
                                    'kdock.core.ligand.get_rdkit': ('core/ligand.html#get_rdkit', 'kdock/core/ligand.py'),
                                    'kdock.core.ligand.get_rdkit_3d': ('core/ligand.html#get_rdkit_3d', 'kdock/core/ligand.py'),

kdock/af3/analyze.py CHANGED Viewed

@@ -1,15 +1,15 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/03_analyze.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['read_summary_json', 'get_summary_df', 'process_summary_df', 'get_top_cases', 'get_3d_report', 'get_report',
            'copy_file']
-# %% ../../nbs/af3/03_analyze.ipynb 3
+# %% ../../nbs/af3/03_analyze.ipynb #bfdd4fae-37c7-4835-afb1-524e5a731add
 import json, shutil, pandas as pd
 from pathlib import Path
 import plotly.graph_objects as go
-# %% ../../nbs/af3/03_analyze.ipynb 5
+# %% ../../nbs/af3/03_analyze.ipynb #7b982142-f161-414b-b893-56234282cfb7
 def read_summary_json(json_path):
     "Read json file to dictionary"
     json_path = Path(json_path)
@@ -29,7 +29,7 @@ def read_summary_json(json_path):
             row[key] = value
     return row
-# %% ../../nbs/af3/03_analyze.ipynb 7
+# %% ../../nbs/af3/03_analyze.ipynb #6efc38e3-f8d8-4c3c-b068-f5a37a4af057
 def get_summary_df(output_dir):
     "Pack the summary json from the output folder to the df"
@@ -37,7 +37,7 @@ def get_summary_df(output_dir):
     print(len(path_list),'summary_confidences.json files detected')
     return pd.DataFrame(list(map(read_summary_json,path_list)))
-# %% ../../nbs/af3/03_analyze.ipynb 10
+# %% ../../nbs/af3/03_analyze.ipynb #2ae7e00d-f285-4fc1-a40c-af0f64d74376
 def process_summary_df(df,generate_report=False):
     "Post process the json-converted pandas df; remove redundant columns; available for pairs"
@@ -90,7 +90,7 @@ def process_summary_df(df,generate_report=False):
     return df
-# %% ../../nbs/af3/03_analyze.ipynb 12
+# %% ../../nbs/af3/03_analyze.ipynb #e1af07e2-b86a-492d-b8d7-0fd2f8d90779
 def get_top_cases(df,n=30):
     "Get top cases from the metric"
     idxs = set()
@@ -109,7 +109,7 @@ def get_top_cases(df,n=30):
     return list(idxs)
-# %% ../../nbs/af3/03_analyze.ipynb 14
+# %% ../../nbs/af3/03_analyze.ipynb #89900087-ff2c-48e3-bbe7-3077de2d1f32
 def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',save_dir='af_report'):
     "Generate 3d plot html file given case index and x, y, z colname"
     annotation = df.index.where(df.index.isin(index_list),'').str.split('_').str[1]
@@ -140,7 +140,7 @@ def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',sav
     fig.write_html(Path(save_dir)/'3d_scatter_plot.html',full_html=True)
     print('Exported the html to 3d_scatter_plot.html')
-# %% ../../nbs/af3/03_analyze.ipynb 16
+# %% ../../nbs/af3/03_analyze.ipynb #02cab7f4-a581-43fb-a0b9-6df5ce6d1d08
 def get_report(out_dir,save_dir='af_report'):
     "Generate summary report based on summary_confidences file; return summary df and top cases"
     out = get_summary_df(out_dir)
@@ -153,7 +153,7 @@ def get_report(out_dir,save_dir='af_report'):
     return out, top_cases
-# %% ../../nbs/af3/03_analyze.ipynb 18
+# %% ../../nbs/af3/03_analyze.ipynb #11319fd1-0bee-4e8c-b224-9eed52b8cbb2
 def copy_file(idx_name, source_dir, dest_dir):
     "Copy all model cif generated by AF3 to the new dest folder"
     source_path = Path(source_dir)/f"{idx_name}/{idx_name}_model.cif"

kdock/af3/docker.py CHANGED Viewed

@@ -1,12 +1,12 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/01_docker.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['get_docker_command', 'docker_single_full', 'docker_multi_full', 'docker_multi_msa', 'docker_multi_infer']
-# %% ../../nbs/af3/01_docker.ipynb 3
+# %% ../../nbs/af3/01_docker.ipynb #57b6887a-a451-462e-9452-d850d3028317
 from fastcore.meta import delegates
-# %% ../../nbs/af3/01_docker.ipynb 6
+# %% ../../nbs/af3/01_docker.ipynb #10f9c68f-0374-4cb2-b10e-65bb0107844f
 def get_docker_command(
     input_dir="af_input", # Directory for input data
     output_dir="af_output", # Directory for output results
@@ -80,7 +80,7 @@ def get_docker_command(
     docker_command = "\n".join(cmd_parts)
     print(docker_command)
-# %% ../../nbs/af3/01_docker.ipynb 12
+# %% ../../nbs/af3/01_docker.ipynb #5e5c9de1-0b4b-433f-90a8-02fa755aafc3
 @delegates(get_docker_command)
 def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
     "Single json task with full pipeline."
@@ -90,7 +90,7 @@ def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
         cache_dir=cache_dir,
         **kwargs)
-# %% ../../nbs/af3/01_docker.ipynb 16
+# %% ../../nbs/af3/01_docker.ipynb #996f65a3-64ee-4288-aed7-3a0acba73175
 @delegates(get_docker_command)
 def docker_multi_full(input_dir,output_dir,**kwargs):
     "Folder of json as input with full pipeline."
@@ -99,7 +99,7 @@ def docker_multi_full(input_dir,output_dir,**kwargs):
         output_dir=output_dir,
         **kwargs)
-# %% ../../nbs/af3/01_docker.ipynb 19
+# %% ../../nbs/af3/01_docker.ipynb #7512a43c-28c2-41f8-b4c5-4ddf1e56f716
 @delegates(get_docker_command)
 def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
     "MSA search only, without structure inference; CPU only."
@@ -109,7 +109,7 @@ def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
         search_only=search_only,
         **kwargs)
-# %% ../../nbs/af3/01_docker.ipynb 22
+# %% ../../nbs/af3/01_docker.ipynb #403f1d8f-6055-486a-a0f9-bc4b2ca8feaa
 @delegates(get_docker_command)
 def docker_multi_infer(input_dir,output_dir,skip_search=True,**kwargs):
     "Infer only with pre-calculated MSA; GPU is needed."

kdock/af3/json.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/00_json.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['dump_json', 'get_protein_json', 'read_json', 'get_protein_smiles_json', 'get_protein_ccdcode_json',
            'assign_atom_names_from_graph', 'mol_to_ccd_text', 'sdf2ccd', 'get_protein_ccd_json', 'split_nfolder']
-# %% ../../nbs/af3/00_json.ipynb 2
+# %% ../../nbs/af3/00_json.ipynb #1d0467e3-9fcb-4e07-a59f-58405b640950
 import re, shutil, json, pandas as pd, numpy as np
 from pathlib import Path
@@ -14,13 +14,13 @@ from rdkit import Chem
 from Bio.PDB import PDBParser
-# %% ../../nbs/af3/00_json.ipynb 4
+# %% ../../nbs/af3/00_json.ipynb #62063f37-32aa-4a12-8e57-29ef4fff1413
 def dump_json(data, save_path):
     "Save json data into a file"
     with open(save_path,'w') as f:
         json.dump(data,f,indent=4)
-# %% ../../nbs/af3/00_json.ipynb 5
+# %% ../../nbs/af3/00_json.ipynb #14d9fdb5-7e05-47f6-b596-9d751f9f5618
 def get_protein_json(name, # job name
                      seq, # aa sequence
                      save_path=None, # .json
@@ -48,13 +48,13 @@ def get_protein_json(name, # job name
         dump_json(json_data,save_path)
     return json_data
-# %% ../../nbs/af3/00_json.ipynb 9
+# %% ../../nbs/af3/00_json.ipynb #9bb34b3b-e90f-4112-a947-aee8a8610335
 def read_json(file_path):
     with open(file_path,'r') as f:
         data = json.load(f)
     return data
-# %% ../../nbs/af3/00_json.ipynb 11
+# %% ../../nbs/af3/00_json.ipynb #b9490613-30cd-4cf9-93a7-0e53a6645932
 def get_protein_smiles_json(smi_id:str,
                             SMILES:str,
                             protein_json, # json type
@@ -88,7 +88,7 @@ def get_protein_smiles_json(smi_id:str,
         dump_json(json_data,save_path)
     return json_data
-# %% ../../nbs/af3/00_json.ipynb 18
+# %% ../../nbs/af3/00_json.ipynb #300bb051-4246-400e-bdc3-dd43e4da47d1
 def get_protein_ccdcode_json(protein_json,  # dict with protein sequence
                               ccd_code,      # str or list of str
                               job_id: str,   # job/task ID
@@ -128,7 +128,7 @@ def get_protein_ccdcode_json(protein_json,  # dict with protein sequence
     return json_data
-# %% ../../nbs/af3/00_json.ipynb 22
+# %% ../../nbs/af3/00_json.ipynb #e850527d-826d-4c7f-9a66-bf307c44b809
 # Mapping bond types to mmCIF-compatible values
 _RDKIT_BOND_TYPE_TO_MMCIF = {
     rd_chem.BondType.SINGLE: 'SING',
@@ -208,7 +208,7 @@ def mol_to_ccd_text(mol, component_id, pdbx_smiles=None, include_hydrogens=False
     return "\n".join(lines)
-# %% ../../nbs/af3/00_json.ipynb 23
+# %% ../../nbs/af3/00_json.ipynb #c099dc24-d29e-49a1-9cbb-d14e7bc2a3de
 def sdf2ccd(sdf_path,
             CCD_name='lig-1', # do not use '_'; use as less letter as possible, 'lig-any' leads to extra ligands
             ):
@@ -218,7 +218,7 @@ def sdf2ccd(sdf_path,
     mol = supplier[0]  # Get the first molecule
     return mol_to_ccd_text(mol,CCD_name)
-# %% ../../nbs/af3/00_json.ipynb 26
+# %% ../../nbs/af3/00_json.ipynb #3953bc6b-3bc0-40e5-a87b-08eb640b8fcf
 def get_protein_ccd_json(protein_json, # dict with protein sequence
                          rec_residue_num:int, # 1-indexed, for bondedAtomPairs, e.g., ["A", 145, "SG"]
                          rec_atom_id:str, # for bondedAtomPairs, e.g., ["A", 145, "SG"]
@@ -262,7 +262,7 @@ def get_protein_ccd_json(protein_json, # dict with protein sequence
     return json_data
-# %% ../../nbs/af3/00_json.ipynb 30
+# %% ../../nbs/af3/00_json.ipynb #c7f16eec-082a-4233-b3ed-b96d58ecedb4
 def split_nfolder(folder_dir,
                   n=4):
     "Move json files from a folder into subfolders (folder_0, folder_1, ..., folder_N)."

kdock/af3/protein_pairs.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/02_protein_pairs.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['get_colabfold_cmd', 'copy_a3m', 'a3m_to_seq', 'get_protein_subjson', 'dump_json_folder', 'get_multi_protein_json',
            'generate_pair_df']
-# %% ../../nbs/af3/02_protein_pairs.ipynb 4
+# %% ../../nbs/af3/02_protein_pairs.ipynb #92000c0e-6470-423c-9c55-9972a341282b
 import os, json, shutil, pandas as pd
 from tqdm import tqdm
 from itertools import combinations
@@ -12,12 +12,12 @@ from pathlib import Path
 from .json import *
 from .docker import *
-# %% ../../nbs/af3/02_protein_pairs.ipynb 9
+# %% ../../nbs/af3/02_protein_pairs.ipynb #3f3ae31b-287e-4810-9e10-db969c3fcc6b
 def get_colabfold_cmd(csv_path,project_name):
     print('Run below in terminal:')
     print(f'\n colabfold_batch {csv_path} msa_{project_name} --msa-only')
-# %% ../../nbs/af3/02_protein_pairs.ipynb 13
+# %% ../../nbs/af3/02_protein_pairs.ipynb #9fb90cd0-3260-4dd1-a169-340a75633ab0
 def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
              dest_dir: str, # Path to the destination directory where files will be copied
              ):
@@ -33,12 +33,12 @@ def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
     print(f"Copied {len(files)} a3m files from {a3m_dir} to {dest_dir}")
-# %% ../../nbs/af3/02_protein_pairs.ipynb 17
+# %% ../../nbs/af3/02_protein_pairs.ipynb #def3928f-c895-479f-9e88-093fdf3892da
 def a3m_to_seq(file_path:Path):
     "Get protein sequence from a3m file"
     return file_path.read_text().splitlines()[2] # protein sequence is located on line 2
-# %% ../../nbs/af3/02_protein_pairs.ipynb 19
+# %% ../../nbs/af3/02_protein_pairs.ipynb #03da4db8-60b5-409e-889a-f44b2fb27705
 def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
     "Get subjson (protein part) with colabfold unpairedMSA .a3m path"
     file_path = Path(a3m_dir)/f"{gene_name}.a3m"
@@ -55,13 +55,13 @@ def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
     return json_data
-# %% ../../nbs/af3/02_protein_pairs.ipynb 22
+# %% ../../nbs/af3/02_protein_pairs.ipynb #76c918a2-630a-48ac-a682-501cfe4a522f
 def dump_json_folder(json_data, folder):
     "Save json under a folder"
     file_path = Path(folder)/f"{json_data['name']}.json"
     with open(file_path,'w') as f: json.dump(json_data,f,indent=4)
-# %% ../../nbs/af3/02_protein_pairs.ipynb 23
+# %% ../../nbs/af3/02_protein_pairs.ipynb #8e97b4ea-17ec-48dd-bca0-c2f11763205a
 def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None):
     'Get json of multiple proteins, with unpaired MSA path indicated (from colabfold MSA)'
     sequences = []
@@ -82,7 +82,7 @@ def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None)
         dump_json_folder(json_data,save_folder)
     return json_data
-# %% ../../nbs/af3/02_protein_pairs.ipynb 27
+# %% ../../nbs/af3/02_protein_pairs.ipynb #8cc62d64-4643-49a3-a7ed-be7231abb0b9
 def generate_pair_df(gene_list,self_pair=True):
     "Unique pair genes in a gene list"
     pairs = list(combinations(gene_list, 2))

kdock/boltz/__init__.py ADDED Viewed

File without changes

kdock/boltz/dock.py ADDED Viewed

@@ -0,0 +1,107 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/boltz/00_dock.ipynb.
+# %% auto #0
+__all__ = ['prepare_boltz', 'run_boltz', 'plot_scatter_spearman']
+# %% ../../nbs/core/boltz/00_dock.ipynb #3b60daa9
+# basics
+import subprocess
+from pathlib import Path
+# %% ../../nbs/core/boltz/00_dock.ipynb #71a6ad95
+def prepare_boltz(seq: str, # Amino acid sequence of the protein the protein
+                    smiles: str, # SMILES string of the ligand
+                    fname: str, # Output filename (should end with .yaml)
+                    ):
+    "Create a YAML file for protein-ligand affinity prediction."
+    yaml_content = f"""version: 1
+sequences:
+  - protein:
+      id: "A"
+      sequence: "{seq}"
+  - ligand:
+      id: "B"
+      smiles: "{smiles}"
+properties:
+  - affinity:
+      binder: "B"
+"""
+    with open(fname, "w") as f:
+        f.write(yaml_content)
+# %% ../../nbs/core/boltz/00_dock.ipynb #486e2576
+def run_boltz(file_list:list[Path], # list of .yaml path in Pathlib object
+                     api_key):
+    "Run Boltz-Lab predictions for a list of YAML files."
+    # config key
+    subprocess.run(
+        ["boltz-lab", "config", "--api-key", api_key.strip()],
+        check=True
+    )
+    failed = []
+    for file in file_list:
+        print(f"\nSubmitting: {str(file)}")
+        result = subprocess.run(
+            ["boltz-lab", "predict", str(file),
+             "--no-wait", # for batch run, so no need to wait the results til the next
+             "--name",file.stem, # job name appeared in boltz
+             ],
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode != 0: failed.append(file.name)
+        print(result.stdout)
+    print("\n======== SUMMARY ========")
+    print(f"Total: {len(file_list)}")
+    print(f"Failed: {len(failed)}")
+    return failed
+# %% ../../nbs/core/boltz/00_dock.ipynb #45b0e6aa
+import matplotlib.pyplot as plt
+import seaborn as sns, numpy as np
+from scipy.stats import spearmanr
+# %% ../../nbs/core/boltz/00_dock.ipynb #eec74db7
+def plot_scatter_spearman(data, x, y, ax=None):
+    """
+    Plot scatter + Spearman correlation and p-value annotation.
+    """
+    if ax is None:
+        ax = plt.gca()
+    # Drop NA
+    sub = data[[x, y]].dropna()
+    x_vals = sub[x]
+    y_vals = sub[y]
+    # Compute Spearman
+    rho, p = spearmanr(x_vals, y_vals)
+    # Plot
+    sns.scatterplot(data=sub, x=x, y=y, ax=ax)
+    # Annotate
+    text = f"Spearman ρ = {rho:.3f}\np = {p:.2e}"
+    ax.text(
+        0.98, 0.98,   # x, y in axes fraction
+        text,
+        transform=ax.transAxes,
+        ha='right',          # horizontal align
+        va='top',            # vertical align
+        fontsize=11,
+        bbox=dict(boxstyle="round", facecolor="white", alpha=0.7)
+    )
+    ax.set_title(f'{x} vs {y}')
+    return rho, p

kdock/core/data.py CHANGED Viewed

@@ -1,22 +1,32 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/00_data.ipynb.
-# %% auto 0
-__all__ = ['BASE_URL', 'fetch_csv', 'Collins', 'Kras']
+# %% auto #0
+__all__ = ['BASE_URL', 'name2smi', 'fetch_csv', 'Collins', 'Kras']
-# %% ../../nbs/core/00_data.ipynb 3
+# %% ../../nbs/core/00_data.ipynb #e61ba13c
 import pandas as pd
 import requests
 from functools import lru_cache
+import pubchempy as pcp
-# %% ../../nbs/core/00_data.ipynb 7
+# %% ../../nbs/core/00_data.ipynb #b97285b8
+def name2smi(name):
+    "Given a compound name, get SMILES in PubChem database. "
+    compounds = pcp.get_compounds(name, 'name')
+    if not compounds: return None
+    # get the first rank
+    c = compounds[0]
+    return c.smiles # can also return c.cid compound ID
+# %% ../../nbs/core/00_data.ipynb #209ffb82
 BASE_URL = "https://github.com/sky1ove/kdock/raw/main/"
-# %% ../../nbs/core/00_data.ipynb 8
+# %% ../../nbs/core/00_data.ipynb #d7e2b62a
 @lru_cache()
 def fetch_csv(url):
     return pd.read_csv(url)
-# %% ../../nbs/core/00_data.ipynb 9
+# %% ../../nbs/core/00_data.ipynb #1f1a2239
 class Collins:
     "A class of loading compound datasets from Collins lab."
@@ -45,7 +55,7 @@ class Collins:
         """
         return fetch_csv(BASE_URL + "dataset/antibiotics_enzyme.csv")
-# %% ../../nbs/core/00_data.ipynb 19
+# %% ../../nbs/core/00_data.ipynb #16424d50
 class Kras:
     "A class of fetching various KRAS datasets."
     @staticmethod

kdock/core/ligand.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/03_ligand.ipynb.
-# %% auto 0
-__all__ = ['plot_drug', 'rdkit_conformer', 'tanimoto', 'get_rdkit', 'get_rdkit_3d', 'get_rdkit_all', 'remove_hi_corr',
-           'preprocess', 'get_rdkit_df', 'get_fp', 'compress_fp', 'tanimoto_numba', 'hash_fp', 'get_same_mol_group']
+# %% auto #0
+__all__ = ['contain_acrylamide', 'plot_drug', 'rdkit_conformer', 'tanimoto', 'get_rdkit', 'get_rdkit_3d', 'get_rdkit_all',
+           'remove_hi_corr', 'preprocess', 'get_rdkit_df', 'get_fp', 'compress_fp', 'tanimoto_numba', 'hash_fp',
+           'get_same_mol_group']
-# %% ../../nbs/core/03_ligand.ipynb 3
+# %% ../../nbs/core/03_ligand.ipynb #b02d7ca8-fcbe-4c95-a0bc-4c1a1471578c
 import pandas as pd, numpy as np
 from rdkit import Chem,RDLogger,DataStructs
 from rdkit.Chem import Draw, rdDepictor, Descriptors,Descriptors3D, AllChem
@@ -16,7 +17,17 @@ from tqdm.contrib.concurrent import process_map
 from tqdm import tqdm
 RDLogger.DisableLog('rdApp.warning')
-# %% ../../nbs/core/03_ligand.ipynb 5
+# %% ../../nbs/core/03_ligand.ipynb #e9c0f1db
+def contain_acrylamide(smiles,
+                        ACRYLAMIDE_SMARTS="C=CC(=O)N", # SMARTS pattern for acrylamide
+                        ):
+    "Check if the SMILES contain acrylamide (can form covalent bond with cysteine in protein)"
+    acrylamide_pattern = Chem.MolFromSmarts(ACRYLAMIDE_SMARTS)
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None: raise ValueError("Invalid SMILES")
+    return mol.HasSubstructMatch(acrylamide_pattern)
+# %% ../../nbs/core/03_ligand.ipynb #18c2b546
 def plot_drug(drug_dict, flip_list=None, save_path=None):
     flip_list = flip_list or []
     mols = []
@@ -55,7 +66,7 @@ def plot_drug(drug_dict, flip_list=None, save_path=None):
         # display(SVG(svg_str))
         return svg_obj
-# %% ../../nbs/core/03_ligand.ipynb 8
+# %% ../../nbs/core/03_ligand.ipynb #3136f2ba-17fc-49a8-ab9e-15107d39cb62
 def rdkit_conformer(SMILES, # SMILES string
                     output=None, # file ".sdf" to be saved
                     method='ETKDG', # Optimization method, can be 'UFF', 'MMFF' or 'ETKDGv3'
@@ -98,7 +109,7 @@ def rdkit_conformer(SMILES, # SMILES string
         w.close()
     return mol
-# %% ../../nbs/core/03_ligand.ipynb 10
+# %% ../../nbs/core/03_ligand.ipynb #6b5f7fdd-89e2-481e-9f37-f999fccf3c86
 def tanimoto(df, # df with SMILES and ID columns
              smiles_col='SMILES', # colname of SMILES
              id_col='ID', # colname of compound ID
@@ -136,7 +147,7 @@ def tanimoto(df, # df with SMILES and ID columns
     return result_df
-# %% ../../nbs/core/03_ligand.ipynb 14
+# %% ../../nbs/core/03_ligand.ipynb #9268af86-13e9-4051-8300-635c12e00186
 def get_rdkit(SMILES:str):
     """
     Extract chemical features from SMILES
@@ -145,7 +156,7 @@ def get_rdkit(SMILES:str):
     mol = Chem.MolFromSmiles(SMILES)
     return Descriptors.CalcMolDescriptors(mol)
-# %% ../../nbs/core/03_ligand.ipynb 15
+# %% ../../nbs/core/03_ligand.ipynb #e003dc50-9fac-44f1-b2dd-15e9ecd812ce
 def get_rdkit_3d(SMILES:str):
     "Extract 3d features from SMILES"
     mol = Chem.MolFromSmiles(SMILES)
@@ -154,14 +165,14 @@ def get_rdkit_3d(SMILES:str):
     AllChem.UFFOptimizeMolecule(mol)
     return Descriptors3D.CalcMolDescriptors3D(mol)
-# %% ../../nbs/core/03_ligand.ipynb 16
+# %% ../../nbs/core/03_ligand.ipynb #7512ba88-69bf-4aaa-97b5-3624ad63184c
 def get_rdkit_all(SMILES:str):
     "Extract chemical features and 3d features from SMILES"
     feat = get_rdkit(SMILES)
     feat_3d = get_rdkit_3d(SMILES)
     return feat|feat_3d
-# %% ../../nbs/core/03_ligand.ipynb 17
+# %% ../../nbs/core/03_ligand.ipynb #081e1e1c-5b9e-4297-886c-3f17e3594dfa
 def remove_hi_corr(df: pd.DataFrame,
                    thr=0.99 # threshold
                    ):
@@ -171,7 +182,7 @@ def remove_hi_corr(df: pd.DataFrame,
     to_drop = [column for column in upper.columns if any(upper[column] > thr)]
     return df.drop(to_drop, axis=1), to_drop
-# %% ../../nbs/core/03_ligand.ipynb 18
+# %% ../../nbs/core/03_ligand.ipynb #bdaca119-3060-4df6-b86f-1a2e9adf0950
 def preprocess(df: pd.DataFrame, thr=0.99):
     "Remove features with no variance, and highly correlated features based on threshold."
     col_ori = df.columns
@@ -196,7 +207,7 @@ def preprocess(df: pd.DataFrame, thr=0.99):
     return df
-# %% ../../nbs/core/03_ligand.ipynb 19
+# %% ../../nbs/core/03_ligand.ipynb #8d66039a-89ad-4da4-bee1-c37e78c59e36
 def get_rdkit_df(df: pd.DataFrame,
                  include_3d=False,
                  col='SMILES',
@@ -217,7 +228,7 @@ def get_rdkit_df(df: pd.DataFrame,
     return out
-# %% ../../nbs/core/03_ligand.ipynb 25
+# %% ../../nbs/core/03_ligand.ipynb #40788d6f-b180-4b07-9b01-b8d607939523
 def get_fp(SMILES,
            name='ecfp',
            ELEMENTS_PER_WORKER = 1_000_000):
@@ -242,12 +253,12 @@ def get_fp(SMILES,
     return np.concatenate(middle_parts)
-# %% ../../nbs/core/03_ligand.ipynb 29
+# %% ../../nbs/core/03_ligand.ipynb #a1a77f98-f3c6-462c-9e5e-c4caf18a40a2
 def compress_fp(array):
     "Compress binary finterprints using np.packbits"
     return np.packbits(array,axis=1)
-# %% ../../nbs/core/03_ligand.ipynb 47
+# %% ../../nbs/core/03_ligand.ipynb #d27a7e74-83cd-414b-8c96-c8439535fe6f
 @numba.njit(parallel=True)
 def tanimoto_numba(fps):
     "Get a NxN matrix of tanimoto similarity among N compounds."
@@ -263,12 +274,12 @@ def tanimoto_numba(fps):
             result[j, i] = sim  # fill symmetric position
     return result
-# %% ../../nbs/core/03_ligand.ipynb 50
+# %% ../../nbs/core/03_ligand.ipynb #9e6d6bf4-061c-4f13-b916-9d7366fbde63
 def hash_fp(fp_row):
     "Hash a binary fingerprint row using SHA256"
     return hashlib.sha256(fp_row.tobytes()).hexdigest()
-# %% ../../nbs/core/03_ligand.ipynb 52
+# %% ../../nbs/core/03_ligand.ipynb #07ad9560-7afd-4c16-8614-c9ab39afbbec
 def get_same_mol_group(df, smi_col='SMILES'):
     "Assign a group number to the same compounds by utilizing hash sha256 to encode morgan fp and find same molecule."
     df = df.copy()

kdock/core/plot.py CHANGED Viewed

@@ -2,10 +2,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/04_plot.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['reduce_feature', 'set_sns', 'plot_2d', 'plot_corr']
-# %% ../../nbs/core/04_plot.ipynb 3
+# %% ../../nbs/core/04_plot.ipynb #1fdc92f2
 import pandas as pd, seaborn as sns
 from fastcore.meta import delegates
 from matplotlib import pyplot as plt
@@ -18,7 +18,7 @@ from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
 from umap import UMAP
-# %% ../../nbs/core/04_plot.ipynb 6
+# %% ../../nbs/core/04_plot.ipynb #2e65eb15
 def reduce_feature(data, # df or numpy array
                    method='pca', # dimensionality reduction method, accept both capital and lower case
                    complexity=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
@@ -53,13 +53,13 @@ def reduce_feature(data, # df or numpy array
     return embedding_df
-# %% ../../nbs/core/04_plot.ipynb 10
+# %% ../../nbs/core/04_plot.ipynb #3012ced7
 def set_sns():
     sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
     sns.set_context('notebook')
     sns.set_style("ticks")
-# %% ../../nbs/core/04_plot.ipynb 12
+# %% ../../nbs/core/04_plot.ipynb #e284443d
 @delegates(sns.scatterplot)
 def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and second column to be y
             **kwargs, # arguments for sns.scatterplot
@@ -68,7 +68,7 @@ def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and se
     plt.figure(figsize=(7,7))
     sns.scatterplot(data = X,x=X.columns[0],y=X.columns[1],alpha=0.7,**kwargs)
-# %% ../../nbs/core/04_plot.ipynb 14
+# %% ../../nbs/core/04_plot.ipynb #afb87d13
 def plot_corr(x,#a column of df
               y,#a column of df
               xlabel=None,# x axis label

kdock/core/protein.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/02_protein.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['get_uniprot_seq', 'get_uniprot_features', 'get_uniprot_kd', 'get_uniprot_type', 'apply_mut_single',
            'apply_mut_complex', 'compare_seq']
-# %% ../../nbs/core/02_protein.ipynb 3
+# %% ../../nbs/core/02_protein.ipynb #d1ba401f-ec26-4182-986a-e40837f9a915
 import pandas as pd
 import requests,re
 from functools import lru_cache
@@ -12,7 +12,7 @@ from functools import lru_cache
 # for compare seq
 from Bio.Align import PairwiseAligner
-# %% ../../nbs/core/02_protein.ipynb 5
+# %% ../../nbs/core/02_protein.ipynb #81c804f5-a4d8-4b99-a47e-3355779a1bbd
 @lru_cache()
 def get_uniprot_seq(uniprot_id):
     "Queries the UniProt database to retrieve the protein sequence for a given UniProt ID."
@@ -29,7 +29,7 @@ def get_uniprot_seq(uniprot_id):
     else:
         return f"Error: Unable to retrieve sequence for UniProt ID {uniprot_id}. Status code: {response.status_code}"
-# %% ../../nbs/core/02_protein.ipynb 7
+# %% ../../nbs/core/02_protein.ipynb #68c733a5-bbc9-4373-b9d2-23c545e74316
 @lru_cache()
 def get_uniprot_features(uniprot_id):
     "Given uniprot_id, get specific region for uniprot features."
@@ -63,7 +63,7 @@ def get_uniprot_features(uniprot_id):
     else:
         raise ValueError(f"Failed to retrieve UniProt features for {uniprot_id}")
-# %% ../../nbs/core/02_protein.ipynb 9
+# %% ../../nbs/core/02_protein.ipynb #26d793bb-3f87-4e84-bcf6-6ca62a794b32
 def get_uniprot_kd(uniprot_id):
     "Query 'Domain: Protein kinase' based on UniProt ID and get its sequence info."
     data = get_uniprot_features(uniprot_id)
@@ -86,7 +86,7 @@ def get_uniprot_kd(uniprot_id):
     return out
-# %% ../../nbs/core/02_protein.ipynb 11
+# %% ../../nbs/core/02_protein.ipynb #4005a8aa-f8a1-4b99-819e-d7c08d487bf2
 def get_uniprot_type(uniprot_id,type_='Signal'):
     "Get region sequences based on UniProt ID features."
     data = get_uniprot_features(uniprot_id)
@@ -118,7 +118,7 @@ def get_uniprot_type(uniprot_id,type_='Signal'):
     return out
-# %% ../../nbs/core/02_protein.ipynb 16
+# %% ../../nbs/core/02_protein.ipynb #f06c0c1d-829d-4f9e-bcc2-9457050faaf4
 def apply_mut_single(seq, # protein sequence
            *mutations, # e.g., E709A
            start_pos=1, # if the protein sequence does not start from index 1, indicate the start index to match the mutations
@@ -142,7 +142,7 @@ def apply_mut_single(seq, # protein sequence
     return ''.join(seq_list)
-# %% ../../nbs/core/02_protein.ipynb 18
+# %% ../../nbs/core/02_protein.ipynb #5c66ef12-8348-4dc6-affc-04858c1411f5
 def apply_mut_complex(seq, # protein sequence
                       mut, # mutation (e.g., G776delinsVC/S783C, G778dupGSP)
                       start_pos=1, # if truncated protein sequence, indicate where it starts to match the position of mutation
@@ -196,7 +196,7 @@ def apply_mut_complex(seq, # protein sequence
     return ''.join(seq)
-# %% ../../nbs/core/02_protein.ipynb 22
+# %% ../../nbs/core/02_protein.ipynb #5ac69a8c-fe83-48fc-926f-b99a15624ce3
 def compare_seq(
     seq1: str,
     seq2: str,

kdock/core/utils.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/01_utils.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['rglob', 'copy_files', 'get_rec_lig', 'get_box', 'view_mol', 'view_complex']
-# %% ../../nbs/core/01_utils.ipynb 3
+# %% ../../nbs/core/01_utils.ipynb #61fddcbc
 from pathlib import Path
 import subprocess,shutil,zipfile
 import numpy as np
@@ -11,7 +11,7 @@ import numpy as np
 import py3Dmol
 from rdkit import Chem
-# %% ../../nbs/core/01_utils.ipynb 6
+# %% ../../nbs/core/01_utils.ipynb #2cb26b85
 def rglob(path, pattern, max_depth):
     "Get a file list given folder depths"
     base_path = Path(path).resolve()
@@ -19,7 +19,7 @@ def rglob(path, pattern, max_depth):
         if len(path.relative_to(base_path).parts) <= max_depth:
             yield path
-# %% ../../nbs/core/01_utils.ipynb 8
+# %% ../../nbs/core/01_utils.ipynb #8d003773
 def copy_files(file_list, dest_dir):
     "Copy a list of files to the destination directory, or zip them if dest_dir ends with .zip."
     dest_path = Path(dest_dir)
@@ -37,7 +37,7 @@ def copy_files(file_list, dest_dir):
             shutil.copy2(file_path, dest_path / file_path.name)
         print(f'Copied {len(file_list)} files to {dest_path}')
-# %% ../../nbs/core/01_utils.ipynb 11
+# %% ../../nbs/core/01_utils.ipynb #3ffd3d10
 def get_rec_lig(pdb_id: str, # pdb id for download
                             lig_id: str, # ligand id shown on the protein page
                             out_dir = '.', # directory path to save pdb files
@@ -81,7 +81,7 @@ def get_rec_lig(pdb_id: str, # pdb id for download
     return str(rec_file), str(lig_sdf_file)
-# %% ../../nbs/core/01_utils.ipynb 14
+# %% ../../nbs/core/01_utils.ipynb #491e748e
 def get_box(sdf_file, autobox_add=4.0,tolist=False):
     "Get the box coordinates of ligand.sdf; mimic GNINA's --autobox_ligand behavior."
     mol = Chem.SDMolSupplier(str(sdf_file), removeHs=False)[0]
@@ -107,7 +107,7 @@ def get_box(sdf_file, autobox_add=4.0,tolist=False):
     }
     return list(box_dict.values()) if tolist else box_dict
-# %% ../../nbs/core/01_utils.ipynb 18
+# %% ../../nbs/core/01_utils.ipynb #cdf59cb8
 def view_mol(file, #sdf or pdb file
             ):
     "Visualize pdb or sdf file"
@@ -118,7 +118,7 @@ def view_mol(file, #sdf or pdb file
     v.zoomTo()
     v.show()
-# %% ../../nbs/core/01_utils.ipynb 20
+# %% ../../nbs/core/01_utils.ipynb #83cd0e6a
 def view_complex(receptor,           # protein file
                  ligand,             # ligand (green), or docked ligand
                  ori_ligand=None,    # original ligand (yellow)

kdock/gnina/dock.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/gnina/04_gnina_docking.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['setup_gnina_local', 'setup_gnina_docker', 'extract_gnina_dock', 'gnina_dock']
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 3
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #e20fcc7c
 # basics
 import re,subprocess, py3Dmol
 from tqdm import tqdm
@@ -14,7 +14,7 @@ import pandas as pd,numpy as np
 from rdkit import Chem
 from rdkit.Chem import AllChem
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 8
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #1ce62365
 def setup_gnina_local(version='v1.3'):
     "Download and install gnina in the current directory"
     # Check CUDA availability
@@ -35,14 +35,14 @@ def setup_gnina_local(version='v1.3'):
     print('Finish setup!')
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 13
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #c259e599
 def setup_gnina_docker():
     "Pull gnina docker image"
     print("Pulling GNINA Docker image: gnina/gnina")
     subprocess.run(["docker", "pull", "gnina/gnina"], check=True,stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     print("GNINA Docker image is ready.")
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 18
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #a68899f4
 def extract_gnina_dock(gnina_output):
     "Extract values from gnina output"
     mode1_line = re.search(r'\b1\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\b', gnina_output)
@@ -56,7 +56,7 @@ def extract_gnina_dock(gnina_output):
     return None
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 19
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #78d04091
 def gnina_dock(receptor, # receptor file
               ligand, # ligand file
               autobox_ligand, # ligand file isolated from the complex
@@ -85,7 +85,7 @@ def gnina_dock(receptor, # receptor file
     return values
-# %% ../../nbs/gnina/04_gnina_docking.ipynb 21
+# %% ../../nbs/gnina/04_gnina_docking.ipynb #8e868b74
 def gnina_dock(df,
                  ID_col = 'ID',
                  smi_col = 'SMILES',

kdock/gnina/rescore.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/gnina/05_gnina_AF3_rescore.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['ChainSelect', 'rename_residues', 'split_cif', 'pdb2sdf', 'prepare_rec_lig', 'gnina_rescore_local',
            'gnina_rescore_docker', 'extract_gnina_rescore', 'get_gnina_rescore', 'get_gnina_rescore_folder']
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 3
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #37eb344f-6e5c-4350-b0fd-962340a4bf0d
 import pandas as pd
 import re, os, subprocess, py3Dmol
 from Bio.PDB import MMCIFParser, PDBIO, Select
@@ -15,7 +15,7 @@ from fastcore.all import L
 from tqdm.contrib.concurrent import process_map
 from functools import partial
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 5
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #2e53d110-a6a6-4d46-8b04-9e7a76069995
 class ChainSelect(Select):
     "Select chain to save"
     def __init__(self, chain_ids):
@@ -23,7 +23,7 @@ class ChainSelect(Select):
     def accept_chain(self, chain):
         return chain.get_id() in self.chain_ids
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 6
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #44e31366-70c4-4b9a-9e27-603442683fed
 def rename_residues(structure, chain_id, new_resname='LIG'):
     "Rename residue name from LIG_L to LIG as LIG_L exceeds lengths and leads to error in RDKit"
     for model in structure:
@@ -32,7 +32,7 @@ def rename_residues(structure, chain_id, new_resname='LIG'):
                 for residue in chain:
                     residue.resname = new_resname
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 7
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #a760f9d5-7aba-47fc-b47f-593a862587f9
 def split_cif(cif_path, rec_chain_id,lig_chain_id, rec_pdb_path, lig_pdb_path):
     "Split AF3 output CIF to protein and ligand PDBs"
     parser = MMCIFParser(QUIET=True)
@@ -43,7 +43,7 @@ def split_cif(cif_path, rec_chain_id,lig_chain_id, rec_pdb_path, lig_pdb_path):
     io.save(str(rec_pdb_path), ChainSelect(rec_chain_id))  # receptor
     io.save(str(lig_pdb_path), ChainSelect(lig_chain_id))  # ligand
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 8
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #ecf8c5a3-84e5-4c96-a97a-1f22e1d52b4e
 def pdb2sdf(pdb_path, sdf_path):
     "Convert ligand pdb to sdf file"
     mol = Chem.MolFromPDBFile(pdb_path, sanitize=True, removeHs=False)
@@ -56,7 +56,7 @@ def pdb2sdf(pdb_path, sdf_path):
         print('Conversion failed for:', pdb_path)
         return pdb_path
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 9
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #223e66d6-f08e-4af8-96f9-cf2a78e9dfc1
 def prepare_rec_lig(cif_path, rec_chain_id, lig_chain_id, rec_pdb_path, lig_pdb_path):
     "Split AF3 cif to protein.pdb (chainA) and ligand.sdf (chainL) "
@@ -70,7 +70,7 @@ def prepare_rec_lig(cif_path, rec_chain_id, lig_chain_id, rec_pdb_path, lig_pdb_
         pass
     return failed
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 15
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #e2d3a635-ce64-4e0a-a235-e659f191121f
 def gnina_rescore_local(protein_pdb,  # receptor file
                         ligand_sdf,   # ligand file
                         CNN_affinity=True,
@@ -91,7 +91,7 @@ def gnina_rescore_local(protein_pdb,  # receptor file
     result = subprocess.run(command, capture_output=True, text=True)
     return result.stdout
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 17
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #e1d09a6e-8323-4503-b80c-5d5f95889b8f
 def gnina_rescore_docker(protein_pdb,
                          ligand_sdf,
                          CNN_affinity=True,
@@ -124,7 +124,7 @@ def gnina_rescore_docker(protein_pdb,
     result = subprocess.run(command, capture_output=True, text=True)
     return result.stdout
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 19
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #c9bd835a-b955-469e-a732-edbad6e2ddee
 def extract_gnina_rescore(txt):
     """Extract GNINA output metrics into a dictionary (partial match allowed)."""
     result = {}
@@ -145,7 +145,7 @@ def extract_gnina_rescore(txt):
     return result
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 24
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #b21d71d8-a420-4e7b-8882-1caa55e84dc2
 def get_gnina_rescore(cif_path,
                       rec_chain_id='A',
                       lig_chain_id='L',
@@ -170,7 +170,7 @@ def get_gnina_rescore(cif_path,
         gnina_output = gnina_rescore_docker(rec_path,lig_path,CNN_affinity,vinardo)
     return extract_gnina_rescore(gnina_output)
-# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb 29
+# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #1e80f250-2c22-45e9-8a97-6f68c2e8514d
 def get_gnina_rescore_folder(cif_folder,
                              rec_chain_id='A',
                              lig_chain_id='L',

kdock/px/core.py CHANGED Viewed

@@ -1,13 +1,13 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/protenix/07_protenix.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['get_single_job', 'get_single_protein_ligand_json', 'get_protein_ligand_df_json', 'get_virtual_screening_json']
-# %% ../../nbs/protenix/07_protenix.ipynb 6
+# %% ../../nbs/protenix/07_protenix.ipynb #b64cf79f-8926-4928-81c0-45d90ffcf425
 import json
 from pathlib import Path
-# %% ../../nbs/protenix/07_protenix.ipynb 9
+# %% ../../nbs/protenix/07_protenix.ipynb #d883059f-1b5c-4abc-af97-f8be1f24e2c9
 def get_single_job(job_name, protein_seq, msa_dir, SMILES=None,CCD=None):
     "Get protenix json format of protein and ligand."
@@ -40,7 +40,7 @@ def get_single_job(job_name, protein_seq, msa_dir, SMILES=None,CCD=None):
         ]
     }
-# %% ../../nbs/protenix/07_protenix.ipynb 11
+# %% ../../nbs/protenix/07_protenix.ipynb #42c3b0ef-c991-401f-9c8c-a1e0dbf5ed2d
 def get_single_protein_ligand_json(job_name,
                                    protein_seq,
                                    msa_dir,
@@ -59,7 +59,7 @@ def get_single_protein_ligand_json(job_name,
     return data
-# %% ../../nbs/protenix/07_protenix.ipynb 16
+# %% ../../nbs/protenix/07_protenix.ipynb #129512dd-a788-4593-9986-1b1f2c27d4b8
 def get_protein_ligand_df_json(df,
                                id_col,
                                seq_col,
@@ -96,7 +96,7 @@ def get_protein_ligand_df_json(df,
     return all_jobs
-# %% ../../nbs/protenix/07_protenix.ipynb 19
+# %% ../../nbs/protenix/07_protenix.ipynb #b6ed9bf7-4a0b-4230-bb89-89fb30f4025e
 def get_virtual_screening_json(df,
                                protein_seq,
                                msa_dir,

kdock/px/dock.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/protenix/06_proteinix_dock.ipynb.
-# %% auto 0
+# %% auto #0
 __all__ = ['capture_output', 'get_protenix_dock', 'get_protenix_vina_dock', 'json2sdf']
-# %% ../../nbs/protenix/06_proteinix_dock.ipynb 11
+# %% ../../nbs/protenix/06_proteinix_dock.ipynb #97f1b541-c8d4-4d9f-bf5b-a081269a0a70
 # kdock
 from ..core.utils import *
@@ -28,7 +28,7 @@ except ImportError as e:
     write_ligand_to_sdf = None
-# %% ../../nbs/protenix/06_proteinix_dock.ipynb 15
+# %% ../../nbs/protenix/06_proteinix_dock.ipynb #b885d4cd-7783-4595-8803-a74fbeb24a38
 @contextlib.contextmanager
 def capture_output(log_path):
     with open(log_path, 'a') as log_file:  # Append mode prevents overwriting
@@ -56,7 +56,7 @@ def capture_output(log_path):
             os.close(old_stdout_fd)
             os.close(old_stderr_fd)
-# %% ../../nbs/protenix/06_proteinix_dock.ipynb 16
+# %% ../../nbs/protenix/06_proteinix_dock.ipynb #77c25ec1-7edd-4e18-8a64-61bff5071f3a
 def get_protenix_dock(receptor_pdb, # pdb path
                       ligand_sdf, # sdf path
                       box:list, # center xyz + size xyz
@@ -81,7 +81,7 @@ def get_protenix_dock(receptor_pdb, # pdb path
     print(f'Docking complete. Results in: {result_dir}')
     print('Cache dir:', cache_dir)
-# %% ../../nbs/protenix/06_proteinix_dock.ipynb 20
+# %% ../../nbs/protenix/06_proteinix_dock.ipynb #59033dc5-d02c-4a1c-a4ef-f56ef9364ff8
 def get_protenix_vina_dock(receptor_pdb, # pdb path
                       ligand_sdf, # sdf path
                       box:list, # center xyz + size xyz
@@ -100,7 +100,7 @@ def get_protenix_vina_dock(receptor_pdb, # pdb path
     print(f'Docking complete. Results in: {result_dir}')
-# %% ../../nbs/protenix/06_proteinix_dock.ipynb 23
+# %% ../../nbs/protenix/06_proteinix_dock.ipynb #ce3d018b-6762-450f-b2aa-bcdb942445c8
 def json2sdf(json_path,
              sdf_path=None, # .sdf to be saved
              ):

{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,20 +1,17 @@
 Metadata-Version: 2.4
 Name: kdock
-Version: 0.0.2
+Version: 0.0.4
 Summary: A collection of docking tools
-Home-page: https://github.com/sky1ove/kdock
-Author: Lily Cai
-Author-email: lcai888666@gmail.com
-License: Apache Software License 2.0
-Keywords: nbdev jupyter notebook python
-Classifier: Development Status :: 4 - Beta
-Classifier: Intended Audience :: Developers
+Author-email: Lily Cai <lcai888666@gmail.com>
+License: Apache-2.0
+Project-URL: Repository, https://github.com/sky1ove/kdock
+Project-URL: Documentation, https://sky1ove.github.io/kdock
+Keywords: nbdev,jupyter,notebook,python
 Classifier: Natural Language :: English
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Intended Audience :: Developers
+Classifier: Development Status :: 3 - Alpha
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -35,22 +32,12 @@ Requires-Dist: openpyxl
 Requires-Dist: biopython
 Requires-Dist: requests
 Requires-Dist: scikit-fingerprints
+Requires-Dist: pubchempy
 Provides-Extra: dev
 Requires-Dist: nbdev; extra == "dev"
 Requires-Dist: ipykernel; extra == "dev"
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: keywords
-Dynamic: license
+Requires-Dist: twine; extra == "dev"
 Dynamic: license-file
-Dynamic: provides-extra
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
 # kdock
@@ -59,8 +46,13 @@ Dynamic: summary
 ## Installation
-Install latest from the GitHub
-[repository](https://github.com/sky1ove/kdock):
+Install stable version from Pypi:
+``` sh
+$ pip install kdock
+```
+Install latest from the GitHub:
 ``` sh
 $ pip install -U git+https://github.com/sky1ove/kdock.git

kdock-0.0.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,27 @@
+kdock/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
+kdock/_modidx.py,sha256=wDTqXS4lSFzownRvr2ldeTcZH8t6kXKkxaN3FuLKs9g,16155
+kdock/af3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kdock/af3/analyze.py,sha256=Fl_Edh1tKLC4Bkqi14LFQv0fNLZyD9GCKvc3oSDAnMo,6111
+kdock/af3/docker.py,sha256=_CKbagpQaWB43DfhiLOuDI7xA-XX5mp_m1WBclWtJ44,4822
+kdock/af3/json.py,sha256=nB7j9T61crClEUA8sDwwiIHkrMGLf3KlxTiK54vM1BI,9877
+kdock/af3/protein_pairs.py,sha256=p7D2u_g7N_xQ0r8xEjlDWvmwbwnIVKYk3AxqyN-XSls,3878
+kdock/boltz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kdock/boltz/dock.py,sha256=Tp7PoTYKBmOutTxCg8ATRQWbgZKG8lWv2DnxXSWAAuY,2841
+kdock/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kdock/core/data.py,sha256=_T2TjYrfifjGj6mZelacCVHbSywqWhKf9awOtp7nIqQ,2779
+kdock/core/ligand.py,sha256=8ndoNrjfWFDxdSk8iw-JBn918Z6eu55bv_t6AAREgz0,11660
+kdock/core/plot.py,sha256=_CRiVLpBH3rp0pv9YrT7t8Y9hGYPpTtwJMvXPS7Smkg,3332
+kdock/core/protein.py,sha256=DPhd6sHrAybQK9VBP0gC1_ZfwZ4sD_l_HiytpsKnowQ,10709
+kdock/core/utils.py,sha256=2M7FyQfUsRsGQLG892p8B1vFiPZpl1tV0RNl9Pv3EKI,5723
+kdock/gnina/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kdock/gnina/dock.py,sha256=LconAw4yFXJmR4JWhf6AlVouapC84Dy07nWi7aHOGXs,4401
+kdock/gnina/rescore.py,sha256=LhWikIrWM1lRf4otVMFfLRcvx0M0sUfSCj7cIqSQ8Jg,7870
+kdock/px/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kdock/px/core.py,sha256=T_SkMqT56pfJQoI8A1U6gzf8OsqFXlheOIuj-jEvhiI,4715
+kdock/px/dock.py,sha256=_RA3mDP_KOmhEI0s24Vz9z4cyuk8u-CXy3TO_fKWGME,4682
+kdock-0.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kdock-0.0.4.dist-info/METADATA,sha256=5fbtbMYp22xHndW0kREt7H7gqjyUQvgQbpUspK33JBY,1838
+kdock-0.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+kdock-0.0.4.dist-info/entry_points.txt,sha256=YwnlRC6hsrK3yz6gon_F9IGMri124qmi8VfsymI1Fz0,32
+kdock-0.0.4.dist-info/top_level.txt,sha256=HfWUyFy5aRDePTmNrMCw2m5HNtpfBhbpm6zbXjfsVnA,6
+kdock-0.0.4.dist-info/RECORD,,

{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

kdock-0.0.2.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-kdock/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
-kdock/_modidx.py,sha256=TaXYpAMNa4XCw-wAUH1LZaMQ2uzJvDcIPFfWGhtUD08,15422
-kdock/af3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kdock/af3/analyze.py,sha256=cdVkWx3bW293XF-SgXBW54zDjXd3nWY3j_hUPfPD7AE,5827
-kdock/af3/docker.py,sha256=0jTP7lFQ6kGRostX_aemWAXpnGf-c2w3ICGIJYxz_b8,4609
-kdock/af3/json.py,sha256=2h6ixv1PTy7KvPzepMvVHb9QuvhSebjTS1Kn5q9YrUs,9522
-kdock/af3/protein_pairs.py,sha256=-Ade5JjSpz2j8hOG3L1T9IO67QiONhPP6ukv4wyDTy8,3595
-kdock/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kdock/core/data.py,sha256=XmV_65YTgdsWygG9GifjWOkP-5D9mzbzQI19AlS0yxc,2392
-kdock/core/ligand.py,sha256=PSDPnceov6nLkmtMagkEexJ3eTn6v1N7uGOj3swiYSs,10631
-kdock/core/plot.py,sha256=kbkIDqksiOXTvisG5TcjEHkZSImisV1Yxl7r_CGOmNo,3294
-kdock/core/protein.py,sha256=y0BzCPRphbYY4aW6DBCiRbbMv8GQXMZy4n7lzr4_E90,10424
-kdock/core/utils.py,sha256=Lpnu6mWNFOIkxJExp39Gei9EHlWCMn-ZOFiDUesrvoU,5670
-kdock/gnina/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kdock/gnina/dock.py,sha256=BY2lvmLplDq-x6VTOhckBvVWmtlMfbAoB14W2dxXtAw,4356
-kdock/gnina/rescore.py,sha256=n2Nwa8c9fyrNfwTC7UuZn-8kb9CqHZPr25sjCumYFXk,7478
-kdock/px/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kdock/px/core.py,sha256=P_dmbv0pyT1TY5wlq0r-b32x6BEzNWyeTkoy1C_gK6A,4537
-kdock/px/dock.py,sha256=WZEUG-m0IUDmVZ0CJ_oKZm1mcgCvI_pnVUzj3QLv1io,4506
-kdock-0.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kdock-0.0.2.dist-info/METADATA,sha256=xcQCel5oQm2FL8VwmxnCYz6nfwq4cuklPlqRssULfd8,2122
-kdock-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-kdock-0.0.2.dist-info/entry_points.txt,sha256=YwnlRC6hsrK3yz6gon_F9IGMri124qmi8VfsymI1Fz0,32
-kdock-0.0.2.dist-info/top_level.txt,sha256=HfWUyFy5aRDePTmNrMCw2m5HNtpfBhbpm6zbXjfsVnA,6
-kdock-0.0.2.dist-info/RECORD,,

{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

kdock 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

kdock 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl