kdock 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kdock/__init__.py +1 -1
- kdock/_modidx.py +7 -1
- kdock/af3/analyze.py +9 -9
- kdock/af3/docker.py +7 -7
- kdock/af3/json.py +11 -11
- kdock/af3/protein_pairs.py +9 -9
- kdock/boltz/__init__.py +0 -0
- kdock/boltz/dock.py +107 -0
- kdock/core/data.py +17 -7
- kdock/core/ligand.py +29 -18
- kdock/core/plot.py +6 -6
- kdock/core/protein.py +9 -9
- kdock/core/utils.py +8 -8
- kdock/gnina/dock.py +7 -7
- kdock/gnina/rescore.py +12 -12
- kdock/px/core.py +6 -6
- kdock/px/dock.py +6 -6
- {kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/METADATA +19 -27
- kdock-0.0.4.dist-info/RECORD +27 -0
- {kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/WHEEL +1 -1
- kdock-0.0.2.dist-info/RECORD +0 -25
- {kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/entry_points.txt +0 -0
- {kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/licenses/LICENSE +0 -0
- {kdock-0.0.2.dist-info → kdock-0.0.4.dist-info}/top_level.txt +0 -0
kdock/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.4"
|
kdock/_modidx.py
CHANGED
|
@@ -42,6 +42,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
42
42
|
'kdock/af3/protein_pairs.py'),
|
|
43
43
|
'kdock.af3.protein_pairs.get_protein_subjson': ( 'af3/protein_pairs.html#get_protein_subjson',
|
|
44
44
|
'kdock/af3/protein_pairs.py')},
|
|
45
|
+
'kdock.boltz.dock': { 'kdock.boltz.dock.plot_scatter_spearman': ( 'core/boltz/dock.html#plot_scatter_spearman',
|
|
46
|
+
'kdock/boltz/dock.py'),
|
|
47
|
+
'kdock.boltz.dock.prepare_boltz': ('core/boltz/dock.html#prepare_boltz', 'kdock/boltz/dock.py'),
|
|
48
|
+
'kdock.boltz.dock.run_boltz': ('core/boltz/dock.html#run_boltz', 'kdock/boltz/dock.py')},
|
|
45
49
|
'kdock.core.data': { 'kdock.core.data.Collins': ('core/data.html#collins', 'kdock/core/data.py'),
|
|
46
50
|
'kdock.core.data.Collins.get_antibiotics_2k': ( 'core/data.html#collins.get_antibiotics_2k',
|
|
47
51
|
'kdock/core/data.py'),
|
|
@@ -54,8 +58,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
54
58
|
'kdock.core.data.Kras.get_mirati_g12d_raw': ( 'core/data.html#kras.get_mirati_g12d_raw',
|
|
55
59
|
'kdock/core/data.py'),
|
|
56
60
|
'kdock.core.data.Kras.get_seq': ('core/data.html#kras.get_seq', 'kdock/core/data.py'),
|
|
57
|
-
'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py')
|
|
61
|
+
'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py'),
|
|
62
|
+
'kdock.core.data.name2smi': ('core/data.html#name2smi', 'kdock/core/data.py')},
|
|
58
63
|
'kdock.core.ligand': { 'kdock.core.ligand.compress_fp': ('core/ligand.html#compress_fp', 'kdock/core/ligand.py'),
|
|
64
|
+
'kdock.core.ligand.contain_acrylamide': ('core/ligand.html#contain_acrylamide', 'kdock/core/ligand.py'),
|
|
59
65
|
'kdock.core.ligand.get_fp': ('core/ligand.html#get_fp', 'kdock/core/ligand.py'),
|
|
60
66
|
'kdock.core.ligand.get_rdkit': ('core/ligand.html#get_rdkit', 'kdock/core/ligand.py'),
|
|
61
67
|
'kdock.core.ligand.get_rdkit_3d': ('core/ligand.html#get_rdkit_3d', 'kdock/core/ligand.py'),
|
kdock/af3/analyze.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/03_analyze.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['read_summary_json', 'get_summary_df', 'process_summary_df', 'get_top_cases', 'get_3d_report', 'get_report',
|
|
5
5
|
'copy_file']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
7
|
+
# %% ../../nbs/af3/03_analyze.ipynb #bfdd4fae-37c7-4835-afb1-524e5a731add
|
|
8
8
|
import json, shutil, pandas as pd
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
import plotly.graph_objects as go
|
|
11
11
|
|
|
12
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
12
|
+
# %% ../../nbs/af3/03_analyze.ipynb #7b982142-f161-414b-b893-56234282cfb7
|
|
13
13
|
def read_summary_json(json_path):
|
|
14
14
|
"Read json file to dictionary"
|
|
15
15
|
json_path = Path(json_path)
|
|
@@ -29,7 +29,7 @@ def read_summary_json(json_path):
|
|
|
29
29
|
row[key] = value
|
|
30
30
|
return row
|
|
31
31
|
|
|
32
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
32
|
+
# %% ../../nbs/af3/03_analyze.ipynb #6efc38e3-f8d8-4c3c-b068-f5a37a4af057
|
|
33
33
|
def get_summary_df(output_dir):
|
|
34
34
|
"Pack the summary json from the output folder to the df"
|
|
35
35
|
|
|
@@ -37,7 +37,7 @@ def get_summary_df(output_dir):
|
|
|
37
37
|
print(len(path_list),'summary_confidences.json files detected')
|
|
38
38
|
return pd.DataFrame(list(map(read_summary_json,path_list)))
|
|
39
39
|
|
|
40
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
40
|
+
# %% ../../nbs/af3/03_analyze.ipynb #2ae7e00d-f285-4fc1-a40c-af0f64d74376
|
|
41
41
|
def process_summary_df(df,generate_report=False):
|
|
42
42
|
"Post process the json-converted pandas df; remove redundant columns; available for pairs"
|
|
43
43
|
|
|
@@ -90,7 +90,7 @@ def process_summary_df(df,generate_report=False):
|
|
|
90
90
|
|
|
91
91
|
return df
|
|
92
92
|
|
|
93
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
93
|
+
# %% ../../nbs/af3/03_analyze.ipynb #e1af07e2-b86a-492d-b8d7-0fd2f8d90779
|
|
94
94
|
def get_top_cases(df,n=30):
|
|
95
95
|
"Get top cases from the metric"
|
|
96
96
|
idxs = set()
|
|
@@ -109,7 +109,7 @@ def get_top_cases(df,n=30):
|
|
|
109
109
|
|
|
110
110
|
return list(idxs)
|
|
111
111
|
|
|
112
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
112
|
+
# %% ../../nbs/af3/03_analyze.ipynb #89900087-ff2c-48e3-bbe7-3077de2d1f32
|
|
113
113
|
def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',save_dir='af_report'):
|
|
114
114
|
"Generate 3d plot html file given case index and x, y, z colname"
|
|
115
115
|
annotation = df.index.where(df.index.isin(index_list),'').str.split('_').str[1]
|
|
@@ -140,7 +140,7 @@ def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',sav
|
|
|
140
140
|
fig.write_html(Path(save_dir)/'3d_scatter_plot.html',full_html=True)
|
|
141
141
|
print('Exported the html to 3d_scatter_plot.html')
|
|
142
142
|
|
|
143
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
143
|
+
# %% ../../nbs/af3/03_analyze.ipynb #02cab7f4-a581-43fb-a0b9-6df5ce6d1d08
|
|
144
144
|
def get_report(out_dir,save_dir='af_report'):
|
|
145
145
|
"Generate summary report based on summary_confidences file; return summary df and top cases"
|
|
146
146
|
out = get_summary_df(out_dir)
|
|
@@ -153,7 +153,7 @@ def get_report(out_dir,save_dir='af_report'):
|
|
|
153
153
|
|
|
154
154
|
return out, top_cases
|
|
155
155
|
|
|
156
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
156
|
+
# %% ../../nbs/af3/03_analyze.ipynb #11319fd1-0bee-4e8c-b224-9eed52b8cbb2
|
|
157
157
|
def copy_file(idx_name, source_dir, dest_dir):
|
|
158
158
|
"Copy all model cif generated by AF3 to the new dest folder"
|
|
159
159
|
source_path = Path(source_dir)/f"{idx_name}/{idx_name}_model.cif"
|
kdock/af3/docker.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/01_docker.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_docker_command', 'docker_single_full', 'docker_multi_full', 'docker_multi_msa', 'docker_multi_infer']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
6
|
+
# %% ../../nbs/af3/01_docker.ipynb #57b6887a-a451-462e-9452-d850d3028317
|
|
7
7
|
from fastcore.meta import delegates
|
|
8
8
|
|
|
9
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
9
|
+
# %% ../../nbs/af3/01_docker.ipynb #10f9c68f-0374-4cb2-b10e-65bb0107844f
|
|
10
10
|
def get_docker_command(
|
|
11
11
|
input_dir="af_input", # Directory for input data
|
|
12
12
|
output_dir="af_output", # Directory for output results
|
|
@@ -80,7 +80,7 @@ def get_docker_command(
|
|
|
80
80
|
docker_command = "\n".join(cmd_parts)
|
|
81
81
|
print(docker_command)
|
|
82
82
|
|
|
83
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
83
|
+
# %% ../../nbs/af3/01_docker.ipynb #5e5c9de1-0b4b-433f-90a8-02fa755aafc3
|
|
84
84
|
@delegates(get_docker_command)
|
|
85
85
|
def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
|
|
86
86
|
"Single json task with full pipeline."
|
|
@@ -90,7 +90,7 @@ def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
|
|
|
90
90
|
cache_dir=cache_dir,
|
|
91
91
|
**kwargs)
|
|
92
92
|
|
|
93
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
93
|
+
# %% ../../nbs/af3/01_docker.ipynb #996f65a3-64ee-4288-aed7-3a0acba73175
|
|
94
94
|
@delegates(get_docker_command)
|
|
95
95
|
def docker_multi_full(input_dir,output_dir,**kwargs):
|
|
96
96
|
"Folder of json as input with full pipeline."
|
|
@@ -99,7 +99,7 @@ def docker_multi_full(input_dir,output_dir,**kwargs):
|
|
|
99
99
|
output_dir=output_dir,
|
|
100
100
|
**kwargs)
|
|
101
101
|
|
|
102
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
102
|
+
# %% ../../nbs/af3/01_docker.ipynb #7512a43c-28c2-41f8-b4c5-4ddf1e56f716
|
|
103
103
|
@delegates(get_docker_command)
|
|
104
104
|
def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
|
|
105
105
|
"MSA search only, without structure inference; CPU only."
|
|
@@ -109,7 +109,7 @@ def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
|
|
|
109
109
|
search_only=search_only,
|
|
110
110
|
**kwargs)
|
|
111
111
|
|
|
112
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
112
|
+
# %% ../../nbs/af3/01_docker.ipynb #403f1d8f-6055-486a-a0f9-bc4b2ca8feaa
|
|
113
113
|
@delegates(get_docker_command)
|
|
114
114
|
def docker_multi_infer(input_dir,output_dir,skip_search=True,**kwargs):
|
|
115
115
|
"Infer only with pre-calculated MSA; GPU is needed."
|
kdock/af3/json.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/00_json.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['dump_json', 'get_protein_json', 'read_json', 'get_protein_smiles_json', 'get_protein_ccdcode_json',
|
|
5
5
|
'assign_atom_names_from_graph', 'mol_to_ccd_text', 'sdf2ccd', 'get_protein_ccd_json', 'split_nfolder']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
7
|
+
# %% ../../nbs/af3/00_json.ipynb #1d0467e3-9fcb-4e07-a59f-58405b640950
|
|
8
8
|
import re, shutil, json, pandas as pd, numpy as np
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
|
|
@@ -14,13 +14,13 @@ from rdkit import Chem
|
|
|
14
14
|
|
|
15
15
|
from Bio.PDB import PDBParser
|
|
16
16
|
|
|
17
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
17
|
+
# %% ../../nbs/af3/00_json.ipynb #62063f37-32aa-4a12-8e57-29ef4fff1413
|
|
18
18
|
def dump_json(data, save_path):
|
|
19
19
|
"Save json data into a file"
|
|
20
20
|
with open(save_path,'w') as f:
|
|
21
21
|
json.dump(data,f,indent=4)
|
|
22
22
|
|
|
23
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
23
|
+
# %% ../../nbs/af3/00_json.ipynb #14d9fdb5-7e05-47f6-b596-9d751f9f5618
|
|
24
24
|
def get_protein_json(name, # job name
|
|
25
25
|
seq, # aa sequence
|
|
26
26
|
save_path=None, # .json
|
|
@@ -48,13 +48,13 @@ def get_protein_json(name, # job name
|
|
|
48
48
|
dump_json(json_data,save_path)
|
|
49
49
|
return json_data
|
|
50
50
|
|
|
51
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
51
|
+
# %% ../../nbs/af3/00_json.ipynb #9bb34b3b-e90f-4112-a947-aee8a8610335
|
|
52
52
|
def read_json(file_path):
|
|
53
53
|
with open(file_path,'r') as f:
|
|
54
54
|
data = json.load(f)
|
|
55
55
|
return data
|
|
56
56
|
|
|
57
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
57
|
+
# %% ../../nbs/af3/00_json.ipynb #b9490613-30cd-4cf9-93a7-0e53a6645932
|
|
58
58
|
def get_protein_smiles_json(smi_id:str,
|
|
59
59
|
SMILES:str,
|
|
60
60
|
protein_json, # json type
|
|
@@ -88,7 +88,7 @@ def get_protein_smiles_json(smi_id:str,
|
|
|
88
88
|
dump_json(json_data,save_path)
|
|
89
89
|
return json_data
|
|
90
90
|
|
|
91
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
91
|
+
# %% ../../nbs/af3/00_json.ipynb #300bb051-4246-400e-bdc3-dd43e4da47d1
|
|
92
92
|
def get_protein_ccdcode_json(protein_json, # dict with protein sequence
|
|
93
93
|
ccd_code, # str or list of str
|
|
94
94
|
job_id: str, # job/task ID
|
|
@@ -128,7 +128,7 @@ def get_protein_ccdcode_json(protein_json, # dict with protein sequence
|
|
|
128
128
|
|
|
129
129
|
return json_data
|
|
130
130
|
|
|
131
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
131
|
+
# %% ../../nbs/af3/00_json.ipynb #e850527d-826d-4c7f-9a66-bf307c44b809
|
|
132
132
|
# Mapping bond types to mmCIF-compatible values
|
|
133
133
|
_RDKIT_BOND_TYPE_TO_MMCIF = {
|
|
134
134
|
rd_chem.BondType.SINGLE: 'SING',
|
|
@@ -208,7 +208,7 @@ def mol_to_ccd_text(mol, component_id, pdbx_smiles=None, include_hydrogens=False
|
|
|
208
208
|
|
|
209
209
|
return "\n".join(lines)
|
|
210
210
|
|
|
211
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
211
|
+
# %% ../../nbs/af3/00_json.ipynb #c099dc24-d29e-49a1-9cbb-d14e7bc2a3de
|
|
212
212
|
def sdf2ccd(sdf_path,
|
|
213
213
|
CCD_name='lig-1', # do not use '_'; use as less letter as possible, 'lig-any' leads to extra ligands
|
|
214
214
|
):
|
|
@@ -218,7 +218,7 @@ def sdf2ccd(sdf_path,
|
|
|
218
218
|
mol = supplier[0] # Get the first molecule
|
|
219
219
|
return mol_to_ccd_text(mol,CCD_name)
|
|
220
220
|
|
|
221
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
221
|
+
# %% ../../nbs/af3/00_json.ipynb #3953bc6b-3bc0-40e5-a87b-08eb640b8fcf
|
|
222
222
|
def get_protein_ccd_json(protein_json, # dict with protein sequence
|
|
223
223
|
rec_residue_num:int, # 1-indexed, for bondedAtomPairs, e.g., ["A", 145, "SG"]
|
|
224
224
|
rec_atom_id:str, # for bondedAtomPairs, e.g., ["A", 145, "SG"]
|
|
@@ -262,7 +262,7 @@ def get_protein_ccd_json(protein_json, # dict with protein sequence
|
|
|
262
262
|
|
|
263
263
|
return json_data
|
|
264
264
|
|
|
265
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
265
|
+
# %% ../../nbs/af3/00_json.ipynb #c7f16eec-082a-4233-b3ed-b96d58ecedb4
|
|
266
266
|
def split_nfolder(folder_dir,
|
|
267
267
|
n=4):
|
|
268
268
|
"Move json files from a folder into subfolders (folder_0, folder_1, ..., folder_N)."
|
kdock/af3/protein_pairs.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/02_protein_pairs.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_colabfold_cmd', 'copy_a3m', 'a3m_to_seq', 'get_protein_subjson', 'dump_json_folder', 'get_multi_protein_json',
|
|
5
5
|
'generate_pair_df']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
7
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #92000c0e-6470-423c-9c55-9972a341282b
|
|
8
8
|
import os, json, shutil, pandas as pd
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
from itertools import combinations
|
|
@@ -12,12 +12,12 @@ from pathlib import Path
|
|
|
12
12
|
from .json import *
|
|
13
13
|
from .docker import *
|
|
14
14
|
|
|
15
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
15
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #3f3ae31b-287e-4810-9e10-db969c3fcc6b
|
|
16
16
|
def get_colabfold_cmd(csv_path,project_name):
|
|
17
17
|
print('Run below in terminal:')
|
|
18
18
|
print(f'\n colabfold_batch {csv_path} msa_{project_name} --msa-only')
|
|
19
19
|
|
|
20
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
20
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #9fb90cd0-3260-4dd1-a169-340a75633ab0
|
|
21
21
|
def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
|
|
22
22
|
dest_dir: str, # Path to the destination directory where files will be copied
|
|
23
23
|
):
|
|
@@ -33,12 +33,12 @@ def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
|
|
|
33
33
|
|
|
34
34
|
print(f"Copied {len(files)} a3m files from {a3m_dir} to {dest_dir}")
|
|
35
35
|
|
|
36
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
36
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #def3928f-c895-479f-9e88-093fdf3892da
|
|
37
37
|
def a3m_to_seq(file_path:Path):
|
|
38
38
|
"Get protein sequence from a3m file"
|
|
39
39
|
return file_path.read_text().splitlines()[2] # protein sequence is located on line 2
|
|
40
40
|
|
|
41
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
41
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #03da4db8-60b5-409e-889a-f44b2fb27705
|
|
42
42
|
def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
|
|
43
43
|
"Get subjson (protein part) with colabfold unpairedMSA .a3m path"
|
|
44
44
|
file_path = Path(a3m_dir)/f"{gene_name}.a3m"
|
|
@@ -55,13 +55,13 @@ def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
|
|
|
55
55
|
|
|
56
56
|
return json_data
|
|
57
57
|
|
|
58
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
58
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #76c918a2-630a-48ac-a682-501cfe4a522f
|
|
59
59
|
def dump_json_folder(json_data, folder):
|
|
60
60
|
"Save json under a folder"
|
|
61
61
|
file_path = Path(folder)/f"{json_data['name']}.json"
|
|
62
62
|
with open(file_path,'w') as f: json.dump(json_data,f,indent=4)
|
|
63
63
|
|
|
64
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
64
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #8e97b4ea-17ec-48dd-bca0-c2f11763205a
|
|
65
65
|
def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None):
|
|
66
66
|
'Get json of multiple proteins, with unpaired MSA path indicated (from colabfold MSA)'
|
|
67
67
|
sequences = []
|
|
@@ -82,7 +82,7 @@ def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None)
|
|
|
82
82
|
dump_json_folder(json_data,save_folder)
|
|
83
83
|
return json_data
|
|
84
84
|
|
|
85
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
85
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #8cc62d64-4643-49a3-a7ed-be7231abb0b9
|
|
86
86
|
def generate_pair_df(gene_list,self_pair=True):
|
|
87
87
|
"Unique pair genes in a gene list"
|
|
88
88
|
pairs = list(combinations(gene_list, 2))
|
kdock/boltz/__init__.py
ADDED
|
File without changes
|
kdock/boltz/dock.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/boltz/00_dock.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto #0
|
|
4
|
+
__all__ = ['prepare_boltz', 'run_boltz', 'plot_scatter_spearman']
|
|
5
|
+
|
|
6
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #3b60daa9
|
|
7
|
+
# basics
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #71a6ad95
|
|
12
|
+
def prepare_boltz(seq: str, # Amino acid sequence of the protein the protein
|
|
13
|
+
smiles: str, # SMILES string of the ligand
|
|
14
|
+
fname: str, # Output filename (should end with .yaml)
|
|
15
|
+
):
|
|
16
|
+
"Create a YAML file for protein-ligand affinity prediction."
|
|
17
|
+
yaml_content = f"""version: 1
|
|
18
|
+
sequences:
|
|
19
|
+
- protein:
|
|
20
|
+
id: "A"
|
|
21
|
+
sequence: "{seq}"
|
|
22
|
+
- ligand:
|
|
23
|
+
id: "B"
|
|
24
|
+
smiles: "{smiles}"
|
|
25
|
+
properties:
|
|
26
|
+
- affinity:
|
|
27
|
+
binder: "B"
|
|
28
|
+
"""
|
|
29
|
+
with open(fname, "w") as f:
|
|
30
|
+
f.write(yaml_content)
|
|
31
|
+
|
|
32
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #486e2576
|
|
33
|
+
def run_boltz(file_list:list[Path], # list of .yaml path in Pathlib object
|
|
34
|
+
api_key):
|
|
35
|
+
|
|
36
|
+
"Run Boltz-Lab predictions for a list of YAML files."
|
|
37
|
+
|
|
38
|
+
# config key
|
|
39
|
+
subprocess.run(
|
|
40
|
+
["boltz-lab", "config", "--api-key", api_key.strip()],
|
|
41
|
+
check=True
|
|
42
|
+
)
|
|
43
|
+
failed = []
|
|
44
|
+
|
|
45
|
+
for file in file_list:
|
|
46
|
+
print(f"\nSubmitting: {str(file)}")
|
|
47
|
+
|
|
48
|
+
result = subprocess.run(
|
|
49
|
+
["boltz-lab", "predict", str(file),
|
|
50
|
+
"--no-wait", # for batch run, so no need to wait the results til the next
|
|
51
|
+
"--name",file.stem, # job name appeared in boltz
|
|
52
|
+
],
|
|
53
|
+
capture_output=True,
|
|
54
|
+
text=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if result.returncode != 0: failed.append(file.name)
|
|
58
|
+
|
|
59
|
+
print(result.stdout)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
print("\n======== SUMMARY ========")
|
|
63
|
+
print(f"Total: {len(file_list)}")
|
|
64
|
+
print(f"Failed: {len(failed)}")
|
|
65
|
+
|
|
66
|
+
return failed
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #45b0e6aa
|
|
70
|
+
import matplotlib.pyplot as plt
|
|
71
|
+
import seaborn as sns, numpy as np
|
|
72
|
+
from scipy.stats import spearmanr
|
|
73
|
+
|
|
74
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #eec74db7
|
|
75
|
+
def plot_scatter_spearman(data, x, y, ax=None):
|
|
76
|
+
"""
|
|
77
|
+
Plot scatter + Spearman correlation and p-value annotation.
|
|
78
|
+
"""
|
|
79
|
+
if ax is None:
|
|
80
|
+
ax = plt.gca()
|
|
81
|
+
|
|
82
|
+
# Drop NA
|
|
83
|
+
sub = data[[x, y]].dropna()
|
|
84
|
+
x_vals = sub[x]
|
|
85
|
+
y_vals = sub[y]
|
|
86
|
+
|
|
87
|
+
# Compute Spearman
|
|
88
|
+
rho, p = spearmanr(x_vals, y_vals)
|
|
89
|
+
|
|
90
|
+
# Plot
|
|
91
|
+
sns.scatterplot(data=sub, x=x, y=y, ax=ax)
|
|
92
|
+
|
|
93
|
+
# Annotate
|
|
94
|
+
text = f"Spearman ρ = {rho:.3f}\np = {p:.2e}"
|
|
95
|
+
ax.text(
|
|
96
|
+
0.98, 0.98, # x, y in axes fraction
|
|
97
|
+
text,
|
|
98
|
+
transform=ax.transAxes,
|
|
99
|
+
ha='right', # horizontal align
|
|
100
|
+
va='top', # vertical align
|
|
101
|
+
fontsize=11,
|
|
102
|
+
bbox=dict(boxstyle="round", facecolor="white", alpha=0.7)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
ax.set_title(f'{x} vs {y}')
|
|
106
|
+
|
|
107
|
+
return rho, p
|
kdock/core/data.py
CHANGED
|
@@ -1,22 +1,32 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/00_data.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
4
|
-
__all__ = ['BASE_URL', 'fetch_csv', 'Collins', 'Kras']
|
|
3
|
+
# %% auto #0
|
|
4
|
+
__all__ = ['BASE_URL', 'name2smi', 'fetch_csv', 'Collins', 'Kras']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
6
|
+
# %% ../../nbs/core/00_data.ipynb #e61ba13c
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import requests
|
|
9
9
|
from functools import lru_cache
|
|
10
|
+
import pubchempy as pcp
|
|
10
11
|
|
|
11
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
12
|
+
# %% ../../nbs/core/00_data.ipynb #b97285b8
|
|
13
|
+
def name2smi(name):
|
|
14
|
+
"Given a compound name, get SMILES in PubChem database. "
|
|
15
|
+
compounds = pcp.get_compounds(name, 'name')
|
|
16
|
+
if not compounds: return None
|
|
17
|
+
# get the first rank
|
|
18
|
+
c = compounds[0]
|
|
19
|
+
return c.smiles # can also return c.cid compound ID
|
|
20
|
+
|
|
21
|
+
# %% ../../nbs/core/00_data.ipynb #209ffb82
|
|
12
22
|
BASE_URL = "https://github.com/sky1ove/kdock/raw/main/"
|
|
13
23
|
|
|
14
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
24
|
+
# %% ../../nbs/core/00_data.ipynb #d7e2b62a
|
|
15
25
|
@lru_cache()
|
|
16
26
|
def fetch_csv(url):
|
|
17
27
|
return pd.read_csv(url)
|
|
18
28
|
|
|
19
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
29
|
+
# %% ../../nbs/core/00_data.ipynb #1f1a2239
|
|
20
30
|
class Collins:
|
|
21
31
|
"A class of loading compound datasets from Collins lab."
|
|
22
32
|
|
|
@@ -45,7 +55,7 @@ class Collins:
|
|
|
45
55
|
"""
|
|
46
56
|
return fetch_csv(BASE_URL + "dataset/antibiotics_enzyme.csv")
|
|
47
57
|
|
|
48
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
58
|
+
# %% ../../nbs/core/00_data.ipynb #16424d50
|
|
49
59
|
class Kras:
|
|
50
60
|
"A class of fetching various KRAS datasets."
|
|
51
61
|
@staticmethod
|
kdock/core/ligand.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/03_ligand.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
4
|
-
__all__ = ['plot_drug', 'rdkit_conformer', 'tanimoto', 'get_rdkit', 'get_rdkit_3d', 'get_rdkit_all',
|
|
5
|
-
'preprocess', 'get_rdkit_df', 'get_fp', 'compress_fp', 'tanimoto_numba', 'hash_fp',
|
|
3
|
+
# %% auto #0
|
|
4
|
+
__all__ = ['contain_acrylamide', 'plot_drug', 'rdkit_conformer', 'tanimoto', 'get_rdkit', 'get_rdkit_3d', 'get_rdkit_all',
|
|
5
|
+
'remove_hi_corr', 'preprocess', 'get_rdkit_df', 'get_fp', 'compress_fp', 'tanimoto_numba', 'hash_fp',
|
|
6
|
+
'get_same_mol_group']
|
|
6
7
|
|
|
7
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
8
|
+
# %% ../../nbs/core/03_ligand.ipynb #b02d7ca8-fcbe-4c95-a0bc-4c1a1471578c
|
|
8
9
|
import pandas as pd, numpy as np
|
|
9
10
|
from rdkit import Chem,RDLogger,DataStructs
|
|
10
11
|
from rdkit.Chem import Draw, rdDepictor, Descriptors,Descriptors3D, AllChem
|
|
@@ -16,7 +17,17 @@ from tqdm.contrib.concurrent import process_map
|
|
|
16
17
|
from tqdm import tqdm
|
|
17
18
|
RDLogger.DisableLog('rdApp.warning')
|
|
18
19
|
|
|
19
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
20
|
+
# %% ../../nbs/core/03_ligand.ipynb #e9c0f1db
|
|
21
|
+
def contain_acrylamide(smiles,
|
|
22
|
+
ACRYLAMIDE_SMARTS="C=CC(=O)N", # SMARTS pattern for acrylamide
|
|
23
|
+
):
|
|
24
|
+
"Check if the SMILES contain acrylamide (can form covalent bond with cysteine in protein)"
|
|
25
|
+
acrylamide_pattern = Chem.MolFromSmarts(ACRYLAMIDE_SMARTS)
|
|
26
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
27
|
+
if mol is None: raise ValueError("Invalid SMILES")
|
|
28
|
+
return mol.HasSubstructMatch(acrylamide_pattern)
|
|
29
|
+
|
|
30
|
+
# %% ../../nbs/core/03_ligand.ipynb #18c2b546
|
|
20
31
|
def plot_drug(drug_dict, flip_list=None, save_path=None):
|
|
21
32
|
flip_list = flip_list or []
|
|
22
33
|
mols = []
|
|
@@ -55,7 +66,7 @@ def plot_drug(drug_dict, flip_list=None, save_path=None):
|
|
|
55
66
|
# display(SVG(svg_str))
|
|
56
67
|
return svg_obj
|
|
57
68
|
|
|
58
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
69
|
+
# %% ../../nbs/core/03_ligand.ipynb #3136f2ba-17fc-49a8-ab9e-15107d39cb62
|
|
59
70
|
def rdkit_conformer(SMILES, # SMILES string
|
|
60
71
|
output=None, # file ".sdf" to be saved
|
|
61
72
|
method='ETKDG', # Optimization method, can be 'UFF', 'MMFF' or 'ETKDGv3'
|
|
@@ -98,7 +109,7 @@ def rdkit_conformer(SMILES, # SMILES string
|
|
|
98
109
|
w.close()
|
|
99
110
|
return mol
|
|
100
111
|
|
|
101
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
112
|
+
# %% ../../nbs/core/03_ligand.ipynb #6b5f7fdd-89e2-481e-9f37-f999fccf3c86
|
|
102
113
|
def tanimoto(df, # df with SMILES and ID columns
|
|
103
114
|
smiles_col='SMILES', # colname of SMILES
|
|
104
115
|
id_col='ID', # colname of compound ID
|
|
@@ -136,7 +147,7 @@ def tanimoto(df, # df with SMILES and ID columns
|
|
|
136
147
|
|
|
137
148
|
return result_df
|
|
138
149
|
|
|
139
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
150
|
+
# %% ../../nbs/core/03_ligand.ipynb #9268af86-13e9-4051-8300-635c12e00186
|
|
140
151
|
def get_rdkit(SMILES:str):
|
|
141
152
|
"""
|
|
142
153
|
Extract chemical features from SMILES
|
|
@@ -145,7 +156,7 @@ def get_rdkit(SMILES:str):
|
|
|
145
156
|
mol = Chem.MolFromSmiles(SMILES)
|
|
146
157
|
return Descriptors.CalcMolDescriptors(mol)
|
|
147
158
|
|
|
148
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
159
|
+
# %% ../../nbs/core/03_ligand.ipynb #e003dc50-9fac-44f1-b2dd-15e9ecd812ce
|
|
149
160
|
def get_rdkit_3d(SMILES:str):
|
|
150
161
|
"Extract 3d features from SMILES"
|
|
151
162
|
mol = Chem.MolFromSmiles(SMILES)
|
|
@@ -154,14 +165,14 @@ def get_rdkit_3d(SMILES:str):
|
|
|
154
165
|
AllChem.UFFOptimizeMolecule(mol)
|
|
155
166
|
return Descriptors3D.CalcMolDescriptors3D(mol)
|
|
156
167
|
|
|
157
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
168
|
+
# %% ../../nbs/core/03_ligand.ipynb #7512ba88-69bf-4aaa-97b5-3624ad63184c
|
|
158
169
|
def get_rdkit_all(SMILES:str):
|
|
159
170
|
"Extract chemical features and 3d features from SMILES"
|
|
160
171
|
feat = get_rdkit(SMILES)
|
|
161
172
|
feat_3d = get_rdkit_3d(SMILES)
|
|
162
173
|
return feat|feat_3d
|
|
163
174
|
|
|
164
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
175
|
+
# %% ../../nbs/core/03_ligand.ipynb #081e1e1c-5b9e-4297-886c-3f17e3594dfa
|
|
165
176
|
def remove_hi_corr(df: pd.DataFrame,
|
|
166
177
|
thr=0.99 # threshold
|
|
167
178
|
):
|
|
@@ -171,7 +182,7 @@ def remove_hi_corr(df: pd.DataFrame,
|
|
|
171
182
|
to_drop = [column for column in upper.columns if any(upper[column] > thr)]
|
|
172
183
|
return df.drop(to_drop, axis=1), to_drop
|
|
173
184
|
|
|
174
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
185
|
+
# %% ../../nbs/core/03_ligand.ipynb #bdaca119-3060-4df6-b86f-1a2e9adf0950
|
|
175
186
|
def preprocess(df: pd.DataFrame, thr=0.99):
|
|
176
187
|
"Remove features with no variance, and highly correlated features based on threshold."
|
|
177
188
|
col_ori = df.columns
|
|
@@ -196,7 +207,7 @@ def preprocess(df: pd.DataFrame, thr=0.99):
|
|
|
196
207
|
|
|
197
208
|
return df
|
|
198
209
|
|
|
199
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
210
|
+
# %% ../../nbs/core/03_ligand.ipynb #8d66039a-89ad-4da4-bee1-c37e78c59e36
|
|
200
211
|
def get_rdkit_df(df: pd.DataFrame,
|
|
201
212
|
include_3d=False,
|
|
202
213
|
col='SMILES',
|
|
@@ -217,7 +228,7 @@ def get_rdkit_df(df: pd.DataFrame,
|
|
|
217
228
|
|
|
218
229
|
return out
|
|
219
230
|
|
|
220
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
231
|
+
# %% ../../nbs/core/03_ligand.ipynb #40788d6f-b180-4b07-9b01-b8d607939523
|
|
221
232
|
def get_fp(SMILES,
|
|
222
233
|
name='ecfp',
|
|
223
234
|
ELEMENTS_PER_WORKER = 1_000_000):
|
|
@@ -242,12 +253,12 @@ def get_fp(SMILES,
|
|
|
242
253
|
|
|
243
254
|
return np.concatenate(middle_parts)
|
|
244
255
|
|
|
245
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
256
|
+
# %% ../../nbs/core/03_ligand.ipynb #a1a77f98-f3c6-462c-9e5e-c4caf18a40a2
|
|
246
257
|
def compress_fp(array):
|
|
247
258
|
"Compress binary finterprints using np.packbits"
|
|
248
259
|
return np.packbits(array,axis=1)
|
|
249
260
|
|
|
250
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
261
|
+
# %% ../../nbs/core/03_ligand.ipynb #d27a7e74-83cd-414b-8c96-c8439535fe6f
|
|
251
262
|
@numba.njit(parallel=True)
|
|
252
263
|
def tanimoto_numba(fps):
|
|
253
264
|
"Get a NxN matrix of tanimoto similarity among N compounds."
|
|
@@ -263,12 +274,12 @@ def tanimoto_numba(fps):
|
|
|
263
274
|
result[j, i] = sim # fill symmetric position
|
|
264
275
|
return result
|
|
265
276
|
|
|
266
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
277
|
+
# %% ../../nbs/core/03_ligand.ipynb #9e6d6bf4-061c-4f13-b916-9d7366fbde63
|
|
267
278
|
def hash_fp(fp_row):
|
|
268
279
|
"Hash a binary fingerprint row using SHA256"
|
|
269
280
|
return hashlib.sha256(fp_row.tobytes()).hexdigest()
|
|
270
281
|
|
|
271
|
-
# %% ../../nbs/core/03_ligand.ipynb
|
|
282
|
+
# %% ../../nbs/core/03_ligand.ipynb #07ad9560-7afd-4c16-8614-c9ab39afbbec
|
|
272
283
|
def get_same_mol_group(df, smi_col='SMILES'):
|
|
273
284
|
"Assign a group number to the same compounds by utilizing hash sha256 to encode morgan fp and find same molecule."
|
|
274
285
|
df = df.copy()
|
kdock/core/plot.py
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/04_plot.ipynb.
|
|
4
4
|
|
|
5
|
-
# %% auto 0
|
|
5
|
+
# %% auto #0
|
|
6
6
|
__all__ = ['reduce_feature', 'set_sns', 'plot_2d', 'plot_corr']
|
|
7
7
|
|
|
8
|
-
# %% ../../nbs/core/04_plot.ipynb
|
|
8
|
+
# %% ../../nbs/core/04_plot.ipynb #1fdc92f2
|
|
9
9
|
import pandas as pd, seaborn as sns
|
|
10
10
|
from fastcore.meta import delegates
|
|
11
11
|
from matplotlib import pyplot as plt
|
|
@@ -18,7 +18,7 @@ from sklearn.decomposition import PCA
|
|
|
18
18
|
from sklearn.manifold import TSNE
|
|
19
19
|
from umap import UMAP
|
|
20
20
|
|
|
21
|
-
# %% ../../nbs/core/04_plot.ipynb
|
|
21
|
+
# %% ../../nbs/core/04_plot.ipynb #2e65eb15
|
|
22
22
|
def reduce_feature(data, # df or numpy array
|
|
23
23
|
method='pca', # dimensionality reduction method, accept both capital and lower case
|
|
24
24
|
complexity=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
|
|
@@ -53,13 +53,13 @@ def reduce_feature(data, # df or numpy array
|
|
|
53
53
|
|
|
54
54
|
return embedding_df
|
|
55
55
|
|
|
56
|
-
# %% ../../nbs/core/04_plot.ipynb
|
|
56
|
+
# %% ../../nbs/core/04_plot.ipynb #3012ced7
|
|
57
57
|
def set_sns():
|
|
58
58
|
sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
|
|
59
59
|
sns.set_context('notebook')
|
|
60
60
|
sns.set_style("ticks")
|
|
61
61
|
|
|
62
|
-
# %% ../../nbs/core/04_plot.ipynb
|
|
62
|
+
# %% ../../nbs/core/04_plot.ipynb #e284443d
|
|
63
63
|
@delegates(sns.scatterplot)
|
|
64
64
|
def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and second column to be y
|
|
65
65
|
**kwargs, # arguments for sns.scatterplot
|
|
@@ -68,7 +68,7 @@ def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and se
|
|
|
68
68
|
plt.figure(figsize=(7,7))
|
|
69
69
|
sns.scatterplot(data = X,x=X.columns[0],y=X.columns[1],alpha=0.7,**kwargs)
|
|
70
70
|
|
|
71
|
-
# %% ../../nbs/core/04_plot.ipynb
|
|
71
|
+
# %% ../../nbs/core/04_plot.ipynb #afb87d13
|
|
72
72
|
def plot_corr(x,#a column of df
|
|
73
73
|
y,#a column of df
|
|
74
74
|
xlabel=None,# x axis label
|
kdock/core/protein.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/02_protein.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_uniprot_seq', 'get_uniprot_features', 'get_uniprot_kd', 'get_uniprot_type', 'apply_mut_single',
|
|
5
5
|
'apply_mut_complex', 'compare_seq']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
7
|
+
# %% ../../nbs/core/02_protein.ipynb #d1ba401f-ec26-4182-986a-e40837f9a915
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import requests,re
|
|
10
10
|
from functools import lru_cache
|
|
@@ -12,7 +12,7 @@ from functools import lru_cache
|
|
|
12
12
|
# for compare seq
|
|
13
13
|
from Bio.Align import PairwiseAligner
|
|
14
14
|
|
|
15
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
15
|
+
# %% ../../nbs/core/02_protein.ipynb #81c804f5-a4d8-4b99-a47e-3355779a1bbd
|
|
16
16
|
@lru_cache()
|
|
17
17
|
def get_uniprot_seq(uniprot_id):
|
|
18
18
|
"Queries the UniProt database to retrieve the protein sequence for a given UniProt ID."
|
|
@@ -29,7 +29,7 @@ def get_uniprot_seq(uniprot_id):
|
|
|
29
29
|
else:
|
|
30
30
|
return f"Error: Unable to retrieve sequence for UniProt ID {uniprot_id}. Status code: {response.status_code}"
|
|
31
31
|
|
|
32
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
32
|
+
# %% ../../nbs/core/02_protein.ipynb #68c733a5-bbc9-4373-b9d2-23c545e74316
|
|
33
33
|
@lru_cache()
|
|
34
34
|
def get_uniprot_features(uniprot_id):
|
|
35
35
|
"Given uniprot_id, get specific region for uniprot features."
|
|
@@ -63,7 +63,7 @@ def get_uniprot_features(uniprot_id):
|
|
|
63
63
|
else:
|
|
64
64
|
raise ValueError(f"Failed to retrieve UniProt features for {uniprot_id}")
|
|
65
65
|
|
|
66
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
66
|
+
# %% ../../nbs/core/02_protein.ipynb #26d793bb-3f87-4e84-bcf6-6ca62a794b32
|
|
67
67
|
def get_uniprot_kd(uniprot_id):
|
|
68
68
|
"Query 'Domain: Protein kinase' based on UniProt ID and get its sequence info."
|
|
69
69
|
data = get_uniprot_features(uniprot_id)
|
|
@@ -86,7 +86,7 @@ def get_uniprot_kd(uniprot_id):
|
|
|
86
86
|
|
|
87
87
|
return out
|
|
88
88
|
|
|
89
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
89
|
+
# %% ../../nbs/core/02_protein.ipynb #4005a8aa-f8a1-4b99-819e-d7c08d487bf2
|
|
90
90
|
def get_uniprot_type(uniprot_id,type_='Signal'):
|
|
91
91
|
"Get region sequences based on UniProt ID features."
|
|
92
92
|
data = get_uniprot_features(uniprot_id)
|
|
@@ -118,7 +118,7 @@ def get_uniprot_type(uniprot_id,type_='Signal'):
|
|
|
118
118
|
|
|
119
119
|
return out
|
|
120
120
|
|
|
121
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
121
|
+
# %% ../../nbs/core/02_protein.ipynb #f06c0c1d-829d-4f9e-bcc2-9457050faaf4
|
|
122
122
|
def apply_mut_single(seq, # protein sequence
|
|
123
123
|
*mutations, # e.g., E709A
|
|
124
124
|
start_pos=1, # if the protein sequence does not start from index 1, indicate the start index to match the mutations
|
|
@@ -142,7 +142,7 @@ def apply_mut_single(seq, # protein sequence
|
|
|
142
142
|
|
|
143
143
|
return ''.join(seq_list)
|
|
144
144
|
|
|
145
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
145
|
+
# %% ../../nbs/core/02_protein.ipynb #5c66ef12-8348-4dc6-affc-04858c1411f5
|
|
146
146
|
def apply_mut_complex(seq, # protein sequence
|
|
147
147
|
mut, # mutation (e.g., G776delinsVC/S783C, G778dupGSP)
|
|
148
148
|
start_pos=1, # if truncated protein sequence, indicate where it starts to match the position of mutation
|
|
@@ -196,7 +196,7 @@ def apply_mut_complex(seq, # protein sequence
|
|
|
196
196
|
|
|
197
197
|
return ''.join(seq)
|
|
198
198
|
|
|
199
|
-
# %% ../../nbs/core/02_protein.ipynb
|
|
199
|
+
# %% ../../nbs/core/02_protein.ipynb #5ac69a8c-fe83-48fc-926f-b99a15624ce3
|
|
200
200
|
def compare_seq(
|
|
201
201
|
seq1: str,
|
|
202
202
|
seq2: str,
|
kdock/core/utils.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/01_utils.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['rglob', 'copy_files', 'get_rec_lig', 'get_box', 'view_mol', 'view_complex']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
6
|
+
# %% ../../nbs/core/01_utils.ipynb #61fddcbc
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import subprocess,shutil,zipfile
|
|
9
9
|
import numpy as np
|
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
|
11
11
|
import py3Dmol
|
|
12
12
|
from rdkit import Chem
|
|
13
13
|
|
|
14
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
14
|
+
# %% ../../nbs/core/01_utils.ipynb #2cb26b85
|
|
15
15
|
def rglob(path, pattern, max_depth):
|
|
16
16
|
"Get a file list given folder depths"
|
|
17
17
|
base_path = Path(path).resolve()
|
|
@@ -19,7 +19,7 @@ def rglob(path, pattern, max_depth):
|
|
|
19
19
|
if len(path.relative_to(base_path).parts) <= max_depth:
|
|
20
20
|
yield path
|
|
21
21
|
|
|
22
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
22
|
+
# %% ../../nbs/core/01_utils.ipynb #8d003773
|
|
23
23
|
def copy_files(file_list, dest_dir):
|
|
24
24
|
"Copy a list of files to the destination directory, or zip them if dest_dir ends with .zip."
|
|
25
25
|
dest_path = Path(dest_dir)
|
|
@@ -37,7 +37,7 @@ def copy_files(file_list, dest_dir):
|
|
|
37
37
|
shutil.copy2(file_path, dest_path / file_path.name)
|
|
38
38
|
print(f'Copied {len(file_list)} files to {dest_path}')
|
|
39
39
|
|
|
40
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
40
|
+
# %% ../../nbs/core/01_utils.ipynb #3ffd3d10
|
|
41
41
|
def get_rec_lig(pdb_id: str, # pdb id for download
|
|
42
42
|
lig_id: str, # ligand id shown on the protein page
|
|
43
43
|
out_dir = '.', # directory path to save pdb files
|
|
@@ -81,7 +81,7 @@ def get_rec_lig(pdb_id: str, # pdb id for download
|
|
|
81
81
|
|
|
82
82
|
return str(rec_file), str(lig_sdf_file)
|
|
83
83
|
|
|
84
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
84
|
+
# %% ../../nbs/core/01_utils.ipynb #491e748e
|
|
85
85
|
def get_box(sdf_file, autobox_add=4.0,tolist=False):
|
|
86
86
|
"Get the box coordinates of ligand.sdf; mimic GNINA's --autobox_ligand behavior."
|
|
87
87
|
mol = Chem.SDMolSupplier(str(sdf_file), removeHs=False)[0]
|
|
@@ -107,7 +107,7 @@ def get_box(sdf_file, autobox_add=4.0,tolist=False):
|
|
|
107
107
|
}
|
|
108
108
|
return list(box_dict.values()) if tolist else box_dict
|
|
109
109
|
|
|
110
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
110
|
+
# %% ../../nbs/core/01_utils.ipynb #cdf59cb8
|
|
111
111
|
def view_mol(file, #sdf or pdb file
|
|
112
112
|
):
|
|
113
113
|
"Visualize pdb or sdf file"
|
|
@@ -118,7 +118,7 @@ def view_mol(file, #sdf or pdb file
|
|
|
118
118
|
v.zoomTo()
|
|
119
119
|
v.show()
|
|
120
120
|
|
|
121
|
-
# %% ../../nbs/core/01_utils.ipynb
|
|
121
|
+
# %% ../../nbs/core/01_utils.ipynb #83cd0e6a
|
|
122
122
|
def view_complex(receptor, # protein file
|
|
123
123
|
ligand, # ligand (green), or docked ligand
|
|
124
124
|
ori_ligand=None, # original ligand (yellow)
|
kdock/gnina/dock.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/gnina/04_gnina_docking.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['setup_gnina_local', 'setup_gnina_docker', 'extract_gnina_dock', 'gnina_dock']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
6
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #e20fcc7c
|
|
7
7
|
# basics
|
|
8
8
|
import re,subprocess, py3Dmol
|
|
9
9
|
from tqdm import tqdm
|
|
@@ -14,7 +14,7 @@ import pandas as pd,numpy as np
|
|
|
14
14
|
from rdkit import Chem
|
|
15
15
|
from rdkit.Chem import AllChem
|
|
16
16
|
|
|
17
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
17
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #1ce62365
|
|
18
18
|
def setup_gnina_local(version='v1.3'):
|
|
19
19
|
"Download and install gnina in the current directory"
|
|
20
20
|
# Check CUDA availability
|
|
@@ -35,14 +35,14 @@ def setup_gnina_local(version='v1.3'):
|
|
|
35
35
|
|
|
36
36
|
print('Finish setup!')
|
|
37
37
|
|
|
38
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
38
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #c259e599
|
|
39
39
|
def setup_gnina_docker():
|
|
40
40
|
"Pull gnina docker image"
|
|
41
41
|
print("Pulling GNINA Docker image: gnina/gnina")
|
|
42
42
|
subprocess.run(["docker", "pull", "gnina/gnina"], check=True,stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
43
43
|
print("GNINA Docker image is ready.")
|
|
44
44
|
|
|
45
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
45
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #a68899f4
|
|
46
46
|
def extract_gnina_dock(gnina_output):
|
|
47
47
|
"Extract values from gnina output"
|
|
48
48
|
mode1_line = re.search(r'\b1\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\b', gnina_output)
|
|
@@ -56,7 +56,7 @@ def extract_gnina_dock(gnina_output):
|
|
|
56
56
|
|
|
57
57
|
return None
|
|
58
58
|
|
|
59
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
59
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #78d04091
|
|
60
60
|
def gnina_dock(receptor, # receptor file
|
|
61
61
|
ligand, # ligand file
|
|
62
62
|
autobox_ligand, # ligand file isolated from the complex
|
|
@@ -85,7 +85,7 @@ def gnina_dock(receptor, # receptor file
|
|
|
85
85
|
|
|
86
86
|
return values
|
|
87
87
|
|
|
88
|
-
# %% ../../nbs/gnina/04_gnina_docking.ipynb
|
|
88
|
+
# %% ../../nbs/gnina/04_gnina_docking.ipynb #8e868b74
|
|
89
89
|
def gnina_dock(df,
|
|
90
90
|
ID_col = 'ID',
|
|
91
91
|
smi_col = 'SMILES',
|
kdock/gnina/rescore.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/gnina/05_gnina_AF3_rescore.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['ChainSelect', 'rename_residues', 'split_cif', 'pdb2sdf', 'prepare_rec_lig', 'gnina_rescore_local',
|
|
5
5
|
'gnina_rescore_docker', 'extract_gnina_rescore', 'get_gnina_rescore', 'get_gnina_rescore_folder']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
7
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #37eb344f-6e5c-4350-b0fd-962340a4bf0d
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import re, os, subprocess, py3Dmol
|
|
10
10
|
from Bio.PDB import MMCIFParser, PDBIO, Select
|
|
@@ -15,7 +15,7 @@ from fastcore.all import L
|
|
|
15
15
|
from tqdm.contrib.concurrent import process_map
|
|
16
16
|
from functools import partial
|
|
17
17
|
|
|
18
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
18
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #2e53d110-a6a6-4d46-8b04-9e7a76069995
|
|
19
19
|
class ChainSelect(Select):
|
|
20
20
|
"Select chain to save"
|
|
21
21
|
def __init__(self, chain_ids):
|
|
@@ -23,7 +23,7 @@ class ChainSelect(Select):
|
|
|
23
23
|
def accept_chain(self, chain):
|
|
24
24
|
return chain.get_id() in self.chain_ids
|
|
25
25
|
|
|
26
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
26
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #44e31366-70c4-4b9a-9e27-603442683fed
|
|
27
27
|
def rename_residues(structure, chain_id, new_resname='LIG'):
|
|
28
28
|
"Rename residue name from LIG_L to LIG as LIG_L exceeds lengths and leads to error in RDKit"
|
|
29
29
|
for model in structure:
|
|
@@ -32,7 +32,7 @@ def rename_residues(structure, chain_id, new_resname='LIG'):
|
|
|
32
32
|
for residue in chain:
|
|
33
33
|
residue.resname = new_resname
|
|
34
34
|
|
|
35
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
35
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #a760f9d5-7aba-47fc-b47f-593a862587f9
|
|
36
36
|
def split_cif(cif_path, rec_chain_id,lig_chain_id, rec_pdb_path, lig_pdb_path):
|
|
37
37
|
"Split AF3 output CIF to protein and ligand PDBs"
|
|
38
38
|
parser = MMCIFParser(QUIET=True)
|
|
@@ -43,7 +43,7 @@ def split_cif(cif_path, rec_chain_id,lig_chain_id, rec_pdb_path, lig_pdb_path):
|
|
|
43
43
|
io.save(str(rec_pdb_path), ChainSelect(rec_chain_id)) # receptor
|
|
44
44
|
io.save(str(lig_pdb_path), ChainSelect(lig_chain_id)) # ligand
|
|
45
45
|
|
|
46
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
46
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #ecf8c5a3-84e5-4c96-a97a-1f22e1d52b4e
|
|
47
47
|
def pdb2sdf(pdb_path, sdf_path):
|
|
48
48
|
"Convert ligand pdb to sdf file"
|
|
49
49
|
mol = Chem.MolFromPDBFile(pdb_path, sanitize=True, removeHs=False)
|
|
@@ -56,7 +56,7 @@ def pdb2sdf(pdb_path, sdf_path):
|
|
|
56
56
|
print('Conversion failed for:', pdb_path)
|
|
57
57
|
return pdb_path
|
|
58
58
|
|
|
59
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
59
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #223e66d6-f08e-4af8-96f9-cf2a78e9dfc1
|
|
60
60
|
def prepare_rec_lig(cif_path, rec_chain_id, lig_chain_id, rec_pdb_path, lig_pdb_path):
|
|
61
61
|
"Split AF3 cif to protein.pdb (chainA) and ligand.sdf (chainL) "
|
|
62
62
|
|
|
@@ -70,7 +70,7 @@ def prepare_rec_lig(cif_path, rec_chain_id, lig_chain_id, rec_pdb_path, lig_pdb_
|
|
|
70
70
|
pass
|
|
71
71
|
return failed
|
|
72
72
|
|
|
73
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
73
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #e2d3a635-ce64-4e0a-a235-e659f191121f
|
|
74
74
|
def gnina_rescore_local(protein_pdb, # receptor file
|
|
75
75
|
ligand_sdf, # ligand file
|
|
76
76
|
CNN_affinity=True,
|
|
@@ -91,7 +91,7 @@ def gnina_rescore_local(protein_pdb, # receptor file
|
|
|
91
91
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
92
92
|
return result.stdout
|
|
93
93
|
|
|
94
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
94
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #e1d09a6e-8323-4503-b80c-5d5f95889b8f
|
|
95
95
|
def gnina_rescore_docker(protein_pdb,
|
|
96
96
|
ligand_sdf,
|
|
97
97
|
CNN_affinity=True,
|
|
@@ -124,7 +124,7 @@ def gnina_rescore_docker(protein_pdb,
|
|
|
124
124
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
125
125
|
return result.stdout
|
|
126
126
|
|
|
127
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
127
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #c9bd835a-b955-469e-a732-edbad6e2ddee
|
|
128
128
|
def extract_gnina_rescore(txt):
|
|
129
129
|
"""Extract GNINA output metrics into a dictionary (partial match allowed)."""
|
|
130
130
|
result = {}
|
|
@@ -145,7 +145,7 @@ def extract_gnina_rescore(txt):
|
|
|
145
145
|
|
|
146
146
|
return result
|
|
147
147
|
|
|
148
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
148
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #b21d71d8-a420-4e7b-8882-1caa55e84dc2
|
|
149
149
|
def get_gnina_rescore(cif_path,
|
|
150
150
|
rec_chain_id='A',
|
|
151
151
|
lig_chain_id='L',
|
|
@@ -170,7 +170,7 @@ def get_gnina_rescore(cif_path,
|
|
|
170
170
|
gnina_output = gnina_rescore_docker(rec_path,lig_path,CNN_affinity,vinardo)
|
|
171
171
|
return extract_gnina_rescore(gnina_output)
|
|
172
172
|
|
|
173
|
-
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb
|
|
173
|
+
# %% ../../nbs/gnina/05_gnina_AF3_rescore.ipynb #1e80f250-2c22-45e9-8a97-6f68c2e8514d
|
|
174
174
|
def get_gnina_rescore_folder(cif_folder,
|
|
175
175
|
rec_chain_id='A',
|
|
176
176
|
lig_chain_id='L',
|
kdock/px/core.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/protenix/07_protenix.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_single_job', 'get_single_protein_ligand_json', 'get_protein_ligand_df_json', 'get_virtual_screening_json']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/protenix/07_protenix.ipynb
|
|
6
|
+
# %% ../../nbs/protenix/07_protenix.ipynb #b64cf79f-8926-4928-81c0-45d90ffcf425
|
|
7
7
|
import json
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
# %% ../../nbs/protenix/07_protenix.ipynb
|
|
10
|
+
# %% ../../nbs/protenix/07_protenix.ipynb #d883059f-1b5c-4abc-af97-f8be1f24e2c9
|
|
11
11
|
def get_single_job(job_name, protein_seq, msa_dir, SMILES=None,CCD=None):
|
|
12
12
|
"Get protenix json format of protein and ligand."
|
|
13
13
|
|
|
@@ -40,7 +40,7 @@ def get_single_job(job_name, protein_seq, msa_dir, SMILES=None,CCD=None):
|
|
|
40
40
|
]
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
# %% ../../nbs/protenix/07_protenix.ipynb
|
|
43
|
+
# %% ../../nbs/protenix/07_protenix.ipynb #42c3b0ef-c991-401f-9c8c-a1e0dbf5ed2d
|
|
44
44
|
def get_single_protein_ligand_json(job_name,
|
|
45
45
|
protein_seq,
|
|
46
46
|
msa_dir,
|
|
@@ -59,7 +59,7 @@ def get_single_protein_ligand_json(job_name,
|
|
|
59
59
|
|
|
60
60
|
return data
|
|
61
61
|
|
|
62
|
-
# %% ../../nbs/protenix/07_protenix.ipynb
|
|
62
|
+
# %% ../../nbs/protenix/07_protenix.ipynb #129512dd-a788-4593-9986-1b1f2c27d4b8
|
|
63
63
|
def get_protein_ligand_df_json(df,
|
|
64
64
|
id_col,
|
|
65
65
|
seq_col,
|
|
@@ -96,7 +96,7 @@ def get_protein_ligand_df_json(df,
|
|
|
96
96
|
return all_jobs
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
# %% ../../nbs/protenix/07_protenix.ipynb
|
|
99
|
+
# %% ../../nbs/protenix/07_protenix.ipynb #b6ed9bf7-4a0b-4230-bb89-89fb30f4025e
|
|
100
100
|
def get_virtual_screening_json(df,
|
|
101
101
|
protein_seq,
|
|
102
102
|
msa_dir,
|
kdock/px/dock.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/protenix/06_proteinix_dock.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['capture_output', 'get_protenix_dock', 'get_protenix_vina_dock', 'json2sdf']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/protenix/06_proteinix_dock.ipynb
|
|
6
|
+
# %% ../../nbs/protenix/06_proteinix_dock.ipynb #97f1b541-c8d4-4d9f-bf5b-a081269a0a70
|
|
7
7
|
# kdock
|
|
8
8
|
from ..core.utils import *
|
|
9
9
|
|
|
@@ -28,7 +28,7 @@ except ImportError as e:
|
|
|
28
28
|
write_ligand_to_sdf = None
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
# %% ../../nbs/protenix/06_proteinix_dock.ipynb
|
|
31
|
+
# %% ../../nbs/protenix/06_proteinix_dock.ipynb #b885d4cd-7783-4595-8803-a74fbeb24a38
|
|
32
32
|
@contextlib.contextmanager
|
|
33
33
|
def capture_output(log_path):
|
|
34
34
|
with open(log_path, 'a') as log_file: # Append mode prevents overwriting
|
|
@@ -56,7 +56,7 @@ def capture_output(log_path):
|
|
|
56
56
|
os.close(old_stdout_fd)
|
|
57
57
|
os.close(old_stderr_fd)
|
|
58
58
|
|
|
59
|
-
# %% ../../nbs/protenix/06_proteinix_dock.ipynb
|
|
59
|
+
# %% ../../nbs/protenix/06_proteinix_dock.ipynb #77c25ec1-7edd-4e18-8a64-61bff5071f3a
|
|
60
60
|
def get_protenix_dock(receptor_pdb, # pdb path
|
|
61
61
|
ligand_sdf, # sdf path
|
|
62
62
|
box:list, # center xyz + size xyz
|
|
@@ -81,7 +81,7 @@ def get_protenix_dock(receptor_pdb, # pdb path
|
|
|
81
81
|
print(f'Docking complete. Results in: {result_dir}')
|
|
82
82
|
print('Cache dir:', cache_dir)
|
|
83
83
|
|
|
84
|
-
# %% ../../nbs/protenix/06_proteinix_dock.ipynb
|
|
84
|
+
# %% ../../nbs/protenix/06_proteinix_dock.ipynb #59033dc5-d02c-4a1c-a4ef-f56ef9364ff8
|
|
85
85
|
def get_protenix_vina_dock(receptor_pdb, # pdb path
|
|
86
86
|
ligand_sdf, # sdf path
|
|
87
87
|
box:list, # center xyz + size xyz
|
|
@@ -100,7 +100,7 @@ def get_protenix_vina_dock(receptor_pdb, # pdb path
|
|
|
100
100
|
|
|
101
101
|
print(f'Docking complete. Results in: {result_dir}')
|
|
102
102
|
|
|
103
|
-
# %% ../../nbs/protenix/06_proteinix_dock.ipynb
|
|
103
|
+
# %% ../../nbs/protenix/06_proteinix_dock.ipynb #ce3d018b-6762-450f-b2aa-bcdb942445c8
|
|
104
104
|
def json2sdf(json_path,
|
|
105
105
|
sdf_path=None, # .sdf to be saved
|
|
106
106
|
):
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kdock
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: A collection of docking tools
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Keywords: nbdev
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
5
|
+
Author-email: Lily Cai <lcai888666@gmail.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Repository, https://github.com/sky1ove/kdock
|
|
8
|
+
Project-URL: Documentation, https://sky1ove.github.io/kdock
|
|
9
|
+
Keywords: nbdev,jupyter,notebook,python
|
|
12
10
|
Classifier: Natural Language :: English
|
|
13
|
-
Classifier:
|
|
14
|
-
Classifier:
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
15
|
Requires-Python: >=3.9
|
|
19
16
|
Description-Content-Type: text/markdown
|
|
20
17
|
License-File: LICENSE
|
|
@@ -35,22 +32,12 @@ Requires-Dist: openpyxl
|
|
|
35
32
|
Requires-Dist: biopython
|
|
36
33
|
Requires-Dist: requests
|
|
37
34
|
Requires-Dist: scikit-fingerprints
|
|
35
|
+
Requires-Dist: pubchempy
|
|
38
36
|
Provides-Extra: dev
|
|
39
37
|
Requires-Dist: nbdev; extra == "dev"
|
|
40
38
|
Requires-Dist: ipykernel; extra == "dev"
|
|
41
|
-
|
|
42
|
-
Dynamic: author-email
|
|
43
|
-
Dynamic: classifier
|
|
44
|
-
Dynamic: description
|
|
45
|
-
Dynamic: description-content-type
|
|
46
|
-
Dynamic: home-page
|
|
47
|
-
Dynamic: keywords
|
|
48
|
-
Dynamic: license
|
|
39
|
+
Requires-Dist: twine; extra == "dev"
|
|
49
40
|
Dynamic: license-file
|
|
50
|
-
Dynamic: provides-extra
|
|
51
|
-
Dynamic: requires-dist
|
|
52
|
-
Dynamic: requires-python
|
|
53
|
-
Dynamic: summary
|
|
54
41
|
|
|
55
42
|
# kdock
|
|
56
43
|
|
|
@@ -59,8 +46,13 @@ Dynamic: summary
|
|
|
59
46
|
|
|
60
47
|
## Installation
|
|
61
48
|
|
|
62
|
-
Install
|
|
63
|
-
|
|
49
|
+
Install stable version from Pypi:
|
|
50
|
+
|
|
51
|
+
``` sh
|
|
52
|
+
$ pip install kdock
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Install latest from the GitHub:
|
|
64
56
|
|
|
65
57
|
``` sh
|
|
66
58
|
$ pip install -U git+https://github.com/sky1ove/kdock.git
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
kdock/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
|
|
2
|
+
kdock/_modidx.py,sha256=wDTqXS4lSFzownRvr2ldeTcZH8t6kXKkxaN3FuLKs9g,16155
|
|
3
|
+
kdock/af3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
kdock/af3/analyze.py,sha256=Fl_Edh1tKLC4Bkqi14LFQv0fNLZyD9GCKvc3oSDAnMo,6111
|
|
5
|
+
kdock/af3/docker.py,sha256=_CKbagpQaWB43DfhiLOuDI7xA-XX5mp_m1WBclWtJ44,4822
|
|
6
|
+
kdock/af3/json.py,sha256=nB7j9T61crClEUA8sDwwiIHkrMGLf3KlxTiK54vM1BI,9877
|
|
7
|
+
kdock/af3/protein_pairs.py,sha256=p7D2u_g7N_xQ0r8xEjlDWvmwbwnIVKYk3AxqyN-XSls,3878
|
|
8
|
+
kdock/boltz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
kdock/boltz/dock.py,sha256=Tp7PoTYKBmOutTxCg8ATRQWbgZKG8lWv2DnxXSWAAuY,2841
|
|
10
|
+
kdock/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
kdock/core/data.py,sha256=_T2TjYrfifjGj6mZelacCVHbSywqWhKf9awOtp7nIqQ,2779
|
|
12
|
+
kdock/core/ligand.py,sha256=8ndoNrjfWFDxdSk8iw-JBn918Z6eu55bv_t6AAREgz0,11660
|
|
13
|
+
kdock/core/plot.py,sha256=_CRiVLpBH3rp0pv9YrT7t8Y9hGYPpTtwJMvXPS7Smkg,3332
|
|
14
|
+
kdock/core/protein.py,sha256=DPhd6sHrAybQK9VBP0gC1_ZfwZ4sD_l_HiytpsKnowQ,10709
|
|
15
|
+
kdock/core/utils.py,sha256=2M7FyQfUsRsGQLG892p8B1vFiPZpl1tV0RNl9Pv3EKI,5723
|
|
16
|
+
kdock/gnina/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
+
kdock/gnina/dock.py,sha256=LconAw4yFXJmR4JWhf6AlVouapC84Dy07nWi7aHOGXs,4401
|
|
18
|
+
kdock/gnina/rescore.py,sha256=LhWikIrWM1lRf4otVMFfLRcvx0M0sUfSCj7cIqSQ8Jg,7870
|
|
19
|
+
kdock/px/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
kdock/px/core.py,sha256=T_SkMqT56pfJQoI8A1U6gzf8OsqFXlheOIuj-jEvhiI,4715
|
|
21
|
+
kdock/px/dock.py,sha256=_RA3mDP_KOmhEI0s24Vz9z4cyuk8u-CXy3TO_fKWGME,4682
|
|
22
|
+
kdock-0.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
23
|
+
kdock-0.0.4.dist-info/METADATA,sha256=5fbtbMYp22xHndW0kREt7H7gqjyUQvgQbpUspK33JBY,1838
|
|
24
|
+
kdock-0.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
25
|
+
kdock-0.0.4.dist-info/entry_points.txt,sha256=YwnlRC6hsrK3yz6gon_F9IGMri124qmi8VfsymI1Fz0,32
|
|
26
|
+
kdock-0.0.4.dist-info/top_level.txt,sha256=HfWUyFy5aRDePTmNrMCw2m5HNtpfBhbpm6zbXjfsVnA,6
|
|
27
|
+
kdock-0.0.4.dist-info/RECORD,,
|
kdock-0.0.2.dist-info/RECORD
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
kdock/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
|
|
2
|
-
kdock/_modidx.py,sha256=TaXYpAMNa4XCw-wAUH1LZaMQ2uzJvDcIPFfWGhtUD08,15422
|
|
3
|
-
kdock/af3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
kdock/af3/analyze.py,sha256=cdVkWx3bW293XF-SgXBW54zDjXd3nWY3j_hUPfPD7AE,5827
|
|
5
|
-
kdock/af3/docker.py,sha256=0jTP7lFQ6kGRostX_aemWAXpnGf-c2w3ICGIJYxz_b8,4609
|
|
6
|
-
kdock/af3/json.py,sha256=2h6ixv1PTy7KvPzepMvVHb9QuvhSebjTS1Kn5q9YrUs,9522
|
|
7
|
-
kdock/af3/protein_pairs.py,sha256=-Ade5JjSpz2j8hOG3L1T9IO67QiONhPP6ukv4wyDTy8,3595
|
|
8
|
-
kdock/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
kdock/core/data.py,sha256=XmV_65YTgdsWygG9GifjWOkP-5D9mzbzQI19AlS0yxc,2392
|
|
10
|
-
kdock/core/ligand.py,sha256=PSDPnceov6nLkmtMagkEexJ3eTn6v1N7uGOj3swiYSs,10631
|
|
11
|
-
kdock/core/plot.py,sha256=kbkIDqksiOXTvisG5TcjEHkZSImisV1Yxl7r_CGOmNo,3294
|
|
12
|
-
kdock/core/protein.py,sha256=y0BzCPRphbYY4aW6DBCiRbbMv8GQXMZy4n7lzr4_E90,10424
|
|
13
|
-
kdock/core/utils.py,sha256=Lpnu6mWNFOIkxJExp39Gei9EHlWCMn-ZOFiDUesrvoU,5670
|
|
14
|
-
kdock/gnina/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
kdock/gnina/dock.py,sha256=BY2lvmLplDq-x6VTOhckBvVWmtlMfbAoB14W2dxXtAw,4356
|
|
16
|
-
kdock/gnina/rescore.py,sha256=n2Nwa8c9fyrNfwTC7UuZn-8kb9CqHZPr25sjCumYFXk,7478
|
|
17
|
-
kdock/px/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
kdock/px/core.py,sha256=P_dmbv0pyT1TY5wlq0r-b32x6BEzNWyeTkoy1C_gK6A,4537
|
|
19
|
-
kdock/px/dock.py,sha256=WZEUG-m0IUDmVZ0CJ_oKZm1mcgCvI_pnVUzj3QLv1io,4506
|
|
20
|
-
kdock-0.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
21
|
-
kdock-0.0.2.dist-info/METADATA,sha256=xcQCel5oQm2FL8VwmxnCYz6nfwq4cuklPlqRssULfd8,2122
|
|
22
|
-
kdock-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
-
kdock-0.0.2.dist-info/entry_points.txt,sha256=YwnlRC6hsrK3yz6gon_F9IGMri124qmi8VfsymI1Fz0,32
|
|
24
|
-
kdock-0.0.2.dist-info/top_level.txt,sha256=HfWUyFy5aRDePTmNrMCw2m5HNtpfBhbpm6zbXjfsVnA,6
|
|
25
|
-
kdock-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|