kdock 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kdock-0.0.2/kdock.egg-info → kdock-0.0.4}/PKG-INFO +19 -27
- {kdock-0.0.2 → kdock-0.0.4}/README.md +7 -2
- kdock-0.0.4/kdock/__init__.py +1 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock/_modidx.py +7 -1
- {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/analyze.py +9 -9
- {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/docker.py +7 -7
- {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/json.py +11 -11
- {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/protein_pairs.py +9 -9
- kdock-0.0.4/kdock/boltz/dock.py +107 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock/core/data.py +17 -7
- {kdock-0.0.2 → kdock-0.0.4}/kdock/core/ligand.py +29 -18
- {kdock-0.0.2 → kdock-0.0.4}/kdock/core/plot.py +6 -6
- {kdock-0.0.2 → kdock-0.0.4}/kdock/core/protein.py +9 -9
- {kdock-0.0.2 → kdock-0.0.4}/kdock/core/utils.py +8 -8
- {kdock-0.0.2 → kdock-0.0.4}/kdock/gnina/dock.py +7 -7
- {kdock-0.0.2 → kdock-0.0.4}/kdock/gnina/rescore.py +12 -12
- kdock-0.0.4/kdock/px/__init__.py +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock/px/core.py +6 -6
- {kdock-0.0.2 → kdock-0.0.4}/kdock/px/dock.py +6 -6
- {kdock-0.0.2 → kdock-0.0.4/kdock.egg-info}/PKG-INFO +19 -27
- {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/SOURCES.txt +2 -3
- {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/requires.txt +2 -0
- kdock-0.0.4/pyproject.toml +33 -0
- kdock-0.0.2/kdock/__init__.py +0 -1
- kdock-0.0.2/kdock.egg-info/not-zip-safe +0 -1
- kdock-0.0.2/pyproject.toml +0 -11
- kdock-0.0.2/settings.ini +0 -40
- kdock-0.0.2/setup.py +0 -64
- {kdock-0.0.2 → kdock-0.0.4}/LICENSE +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/MANIFEST.in +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/__init__.py +0 -0
- {kdock-0.0.2/kdock/core → kdock-0.0.4/kdock/boltz}/__init__.py +0 -0
- {kdock-0.0.2/kdock/gnina → kdock-0.0.4/kdock/core}/__init__.py +0 -0
- {kdock-0.0.2/kdock/px → kdock-0.0.4/kdock/gnina}/__init__.py +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/dependency_links.txt +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/entry_points.txt +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/top_level.txt +0 -0
- {kdock-0.0.2 → kdock-0.0.4}/setup.cfg +0 -0
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kdock
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: A collection of docking tools
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Keywords: nbdev
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
5
|
+
Author-email: Lily Cai <lcai888666@gmail.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Repository, https://github.com/sky1ove/kdock
|
|
8
|
+
Project-URL: Documentation, https://sky1ove.github.io/kdock
|
|
9
|
+
Keywords: nbdev,jupyter,notebook,python
|
|
12
10
|
Classifier: Natural Language :: English
|
|
13
|
-
Classifier:
|
|
14
|
-
Classifier:
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
15
|
Requires-Python: >=3.9
|
|
19
16
|
Description-Content-Type: text/markdown
|
|
20
17
|
License-File: LICENSE
|
|
@@ -35,22 +32,12 @@ Requires-Dist: openpyxl
|
|
|
35
32
|
Requires-Dist: biopython
|
|
36
33
|
Requires-Dist: requests
|
|
37
34
|
Requires-Dist: scikit-fingerprints
|
|
35
|
+
Requires-Dist: pubchempy
|
|
38
36
|
Provides-Extra: dev
|
|
39
37
|
Requires-Dist: nbdev; extra == "dev"
|
|
40
38
|
Requires-Dist: ipykernel; extra == "dev"
|
|
41
|
-
|
|
42
|
-
Dynamic: author-email
|
|
43
|
-
Dynamic: classifier
|
|
44
|
-
Dynamic: description
|
|
45
|
-
Dynamic: description-content-type
|
|
46
|
-
Dynamic: home-page
|
|
47
|
-
Dynamic: keywords
|
|
48
|
-
Dynamic: license
|
|
39
|
+
Requires-Dist: twine; extra == "dev"
|
|
49
40
|
Dynamic: license-file
|
|
50
|
-
Dynamic: provides-extra
|
|
51
|
-
Dynamic: requires-dist
|
|
52
|
-
Dynamic: requires-python
|
|
53
|
-
Dynamic: summary
|
|
54
41
|
|
|
55
42
|
# kdock
|
|
56
43
|
|
|
@@ -59,8 +46,13 @@ Dynamic: summary
|
|
|
59
46
|
|
|
60
47
|
## Installation
|
|
61
48
|
|
|
62
|
-
Install
|
|
63
|
-
|
|
49
|
+
Install stable version from Pypi:
|
|
50
|
+
|
|
51
|
+
``` sh
|
|
52
|
+
$ pip install kdock
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Install latest from the GitHub:
|
|
64
56
|
|
|
65
57
|
``` sh
|
|
66
58
|
$ pip install -U git+https://github.com/sky1ove/kdock.git
|
|
@@ -5,8 +5,13 @@
|
|
|
5
5
|
|
|
6
6
|
## Installation
|
|
7
7
|
|
|
8
|
-
Install
|
|
9
|
-
|
|
8
|
+
Install stable version from Pypi:
|
|
9
|
+
|
|
10
|
+
``` sh
|
|
11
|
+
$ pip install kdock
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Install latest from the GitHub:
|
|
10
15
|
|
|
11
16
|
``` sh
|
|
12
17
|
$ pip install -U git+https://github.com/sky1ove/kdock.git
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.4"
|
|
@@ -42,6 +42,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
42
42
|
'kdock/af3/protein_pairs.py'),
|
|
43
43
|
'kdock.af3.protein_pairs.get_protein_subjson': ( 'af3/protein_pairs.html#get_protein_subjson',
|
|
44
44
|
'kdock/af3/protein_pairs.py')},
|
|
45
|
+
'kdock.boltz.dock': { 'kdock.boltz.dock.plot_scatter_spearman': ( 'core/boltz/dock.html#plot_scatter_spearman',
|
|
46
|
+
'kdock/boltz/dock.py'),
|
|
47
|
+
'kdock.boltz.dock.prepare_boltz': ('core/boltz/dock.html#prepare_boltz', 'kdock/boltz/dock.py'),
|
|
48
|
+
'kdock.boltz.dock.run_boltz': ('core/boltz/dock.html#run_boltz', 'kdock/boltz/dock.py')},
|
|
45
49
|
'kdock.core.data': { 'kdock.core.data.Collins': ('core/data.html#collins', 'kdock/core/data.py'),
|
|
46
50
|
'kdock.core.data.Collins.get_antibiotics_2k': ( 'core/data.html#collins.get_antibiotics_2k',
|
|
47
51
|
'kdock/core/data.py'),
|
|
@@ -54,8 +58,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
54
58
|
'kdock.core.data.Kras.get_mirati_g12d_raw': ( 'core/data.html#kras.get_mirati_g12d_raw',
|
|
55
59
|
'kdock/core/data.py'),
|
|
56
60
|
'kdock.core.data.Kras.get_seq': ('core/data.html#kras.get_seq', 'kdock/core/data.py'),
|
|
57
|
-
'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py')
|
|
61
|
+
'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py'),
|
|
62
|
+
'kdock.core.data.name2smi': ('core/data.html#name2smi', 'kdock/core/data.py')},
|
|
58
63
|
'kdock.core.ligand': { 'kdock.core.ligand.compress_fp': ('core/ligand.html#compress_fp', 'kdock/core/ligand.py'),
|
|
64
|
+
'kdock.core.ligand.contain_acrylamide': ('core/ligand.html#contain_acrylamide', 'kdock/core/ligand.py'),
|
|
59
65
|
'kdock.core.ligand.get_fp': ('core/ligand.html#get_fp', 'kdock/core/ligand.py'),
|
|
60
66
|
'kdock.core.ligand.get_rdkit': ('core/ligand.html#get_rdkit', 'kdock/core/ligand.py'),
|
|
61
67
|
'kdock.core.ligand.get_rdkit_3d': ('core/ligand.html#get_rdkit_3d', 'kdock/core/ligand.py'),
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/03_analyze.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['read_summary_json', 'get_summary_df', 'process_summary_df', 'get_top_cases', 'get_3d_report', 'get_report',
|
|
5
5
|
'copy_file']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
7
|
+
# %% ../../nbs/af3/03_analyze.ipynb #bfdd4fae-37c7-4835-afb1-524e5a731add
|
|
8
8
|
import json, shutil, pandas as pd
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
import plotly.graph_objects as go
|
|
11
11
|
|
|
12
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
12
|
+
# %% ../../nbs/af3/03_analyze.ipynb #7b982142-f161-414b-b893-56234282cfb7
|
|
13
13
|
def read_summary_json(json_path):
|
|
14
14
|
"Read json file to dictionary"
|
|
15
15
|
json_path = Path(json_path)
|
|
@@ -29,7 +29,7 @@ def read_summary_json(json_path):
|
|
|
29
29
|
row[key] = value
|
|
30
30
|
return row
|
|
31
31
|
|
|
32
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
32
|
+
# %% ../../nbs/af3/03_analyze.ipynb #6efc38e3-f8d8-4c3c-b068-f5a37a4af057
|
|
33
33
|
def get_summary_df(output_dir):
|
|
34
34
|
"Pack the summary json from the output folder to the df"
|
|
35
35
|
|
|
@@ -37,7 +37,7 @@ def get_summary_df(output_dir):
|
|
|
37
37
|
print(len(path_list),'summary_confidences.json files detected')
|
|
38
38
|
return pd.DataFrame(list(map(read_summary_json,path_list)))
|
|
39
39
|
|
|
40
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
40
|
+
# %% ../../nbs/af3/03_analyze.ipynb #2ae7e00d-f285-4fc1-a40c-af0f64d74376
|
|
41
41
|
def process_summary_df(df,generate_report=False):
|
|
42
42
|
"Post process the json-converted pandas df; remove redundant columns; available for pairs"
|
|
43
43
|
|
|
@@ -90,7 +90,7 @@ def process_summary_df(df,generate_report=False):
|
|
|
90
90
|
|
|
91
91
|
return df
|
|
92
92
|
|
|
93
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
93
|
+
# %% ../../nbs/af3/03_analyze.ipynb #e1af07e2-b86a-492d-b8d7-0fd2f8d90779
|
|
94
94
|
def get_top_cases(df,n=30):
|
|
95
95
|
"Get top cases from the metric"
|
|
96
96
|
idxs = set()
|
|
@@ -109,7 +109,7 @@ def get_top_cases(df,n=30):
|
|
|
109
109
|
|
|
110
110
|
return list(idxs)
|
|
111
111
|
|
|
112
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
112
|
+
# %% ../../nbs/af3/03_analyze.ipynb #89900087-ff2c-48e3-bbe7-3077de2d1f32
|
|
113
113
|
def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',save_dir='af_report'):
|
|
114
114
|
"Generate 3d plot html file given case index and x, y, z colname"
|
|
115
115
|
annotation = df.index.where(df.index.isin(index_list),'').str.split('_').str[1]
|
|
@@ -140,7 +140,7 @@ def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',sav
|
|
|
140
140
|
fig.write_html(Path(save_dir)/'3d_scatter_plot.html',full_html=True)
|
|
141
141
|
print('Exported the html to 3d_scatter_plot.html')
|
|
142
142
|
|
|
143
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
143
|
+
# %% ../../nbs/af3/03_analyze.ipynb #02cab7f4-a581-43fb-a0b9-6df5ce6d1d08
|
|
144
144
|
def get_report(out_dir,save_dir='af_report'):
|
|
145
145
|
"Generate summary report based on summary_confidences file; return summary df and top cases"
|
|
146
146
|
out = get_summary_df(out_dir)
|
|
@@ -153,7 +153,7 @@ def get_report(out_dir,save_dir='af_report'):
|
|
|
153
153
|
|
|
154
154
|
return out, top_cases
|
|
155
155
|
|
|
156
|
-
# %% ../../nbs/af3/03_analyze.ipynb
|
|
156
|
+
# %% ../../nbs/af3/03_analyze.ipynb #11319fd1-0bee-4e8c-b224-9eed52b8cbb2
|
|
157
157
|
def copy_file(idx_name, source_dir, dest_dir):
|
|
158
158
|
"Copy all model cif generated by AF3 to the new dest folder"
|
|
159
159
|
source_path = Path(source_dir)/f"{idx_name}/{idx_name}_model.cif"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/01_docker.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_docker_command', 'docker_single_full', 'docker_multi_full', 'docker_multi_msa', 'docker_multi_infer']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
6
|
+
# %% ../../nbs/af3/01_docker.ipynb #57b6887a-a451-462e-9452-d850d3028317
|
|
7
7
|
from fastcore.meta import delegates
|
|
8
8
|
|
|
9
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
9
|
+
# %% ../../nbs/af3/01_docker.ipynb #10f9c68f-0374-4cb2-b10e-65bb0107844f
|
|
10
10
|
def get_docker_command(
|
|
11
11
|
input_dir="af_input", # Directory for input data
|
|
12
12
|
output_dir="af_output", # Directory for output results
|
|
@@ -80,7 +80,7 @@ def get_docker_command(
|
|
|
80
80
|
docker_command = "\n".join(cmd_parts)
|
|
81
81
|
print(docker_command)
|
|
82
82
|
|
|
83
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
83
|
+
# %% ../../nbs/af3/01_docker.ipynb #5e5c9de1-0b4b-433f-90a8-02fa755aafc3
|
|
84
84
|
@delegates(get_docker_command)
|
|
85
85
|
def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
|
|
86
86
|
"Single json task with full pipeline."
|
|
@@ -90,7 +90,7 @@ def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
|
|
|
90
90
|
cache_dir=cache_dir,
|
|
91
91
|
**kwargs)
|
|
92
92
|
|
|
93
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
93
|
+
# %% ../../nbs/af3/01_docker.ipynb #996f65a3-64ee-4288-aed7-3a0acba73175
|
|
94
94
|
@delegates(get_docker_command)
|
|
95
95
|
def docker_multi_full(input_dir,output_dir,**kwargs):
|
|
96
96
|
"Folder of json as input with full pipeline."
|
|
@@ -99,7 +99,7 @@ def docker_multi_full(input_dir,output_dir,**kwargs):
|
|
|
99
99
|
output_dir=output_dir,
|
|
100
100
|
**kwargs)
|
|
101
101
|
|
|
102
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
102
|
+
# %% ../../nbs/af3/01_docker.ipynb #7512a43c-28c2-41f8-b4c5-4ddf1e56f716
|
|
103
103
|
@delegates(get_docker_command)
|
|
104
104
|
def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
|
|
105
105
|
"MSA search only, without structure inference; CPU only."
|
|
@@ -109,7 +109,7 @@ def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
|
|
|
109
109
|
search_only=search_only,
|
|
110
110
|
**kwargs)
|
|
111
111
|
|
|
112
|
-
# %% ../../nbs/af3/01_docker.ipynb
|
|
112
|
+
# %% ../../nbs/af3/01_docker.ipynb #403f1d8f-6055-486a-a0f9-bc4b2ca8feaa
|
|
113
113
|
@delegates(get_docker_command)
|
|
114
114
|
def docker_multi_infer(input_dir,output_dir,skip_search=True,**kwargs):
|
|
115
115
|
"Infer only with pre-calculated MSA; GPU is needed."
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/00_json.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['dump_json', 'get_protein_json', 'read_json', 'get_protein_smiles_json', 'get_protein_ccdcode_json',
|
|
5
5
|
'assign_atom_names_from_graph', 'mol_to_ccd_text', 'sdf2ccd', 'get_protein_ccd_json', 'split_nfolder']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
7
|
+
# %% ../../nbs/af3/00_json.ipynb #1d0467e3-9fcb-4e07-a59f-58405b640950
|
|
8
8
|
import re, shutil, json, pandas as pd, numpy as np
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
|
|
@@ -14,13 +14,13 @@ from rdkit import Chem
|
|
|
14
14
|
|
|
15
15
|
from Bio.PDB import PDBParser
|
|
16
16
|
|
|
17
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
17
|
+
# %% ../../nbs/af3/00_json.ipynb #62063f37-32aa-4a12-8e57-29ef4fff1413
|
|
18
18
|
def dump_json(data, save_path):
|
|
19
19
|
"Save json data into a file"
|
|
20
20
|
with open(save_path,'w') as f:
|
|
21
21
|
json.dump(data,f,indent=4)
|
|
22
22
|
|
|
23
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
23
|
+
# %% ../../nbs/af3/00_json.ipynb #14d9fdb5-7e05-47f6-b596-9d751f9f5618
|
|
24
24
|
def get_protein_json(name, # job name
|
|
25
25
|
seq, # aa sequence
|
|
26
26
|
save_path=None, # .json
|
|
@@ -48,13 +48,13 @@ def get_protein_json(name, # job name
|
|
|
48
48
|
dump_json(json_data,save_path)
|
|
49
49
|
return json_data
|
|
50
50
|
|
|
51
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
51
|
+
# %% ../../nbs/af3/00_json.ipynb #9bb34b3b-e90f-4112-a947-aee8a8610335
|
|
52
52
|
def read_json(file_path):
|
|
53
53
|
with open(file_path,'r') as f:
|
|
54
54
|
data = json.load(f)
|
|
55
55
|
return data
|
|
56
56
|
|
|
57
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
57
|
+
# %% ../../nbs/af3/00_json.ipynb #b9490613-30cd-4cf9-93a7-0e53a6645932
|
|
58
58
|
def get_protein_smiles_json(smi_id:str,
|
|
59
59
|
SMILES:str,
|
|
60
60
|
protein_json, # json type
|
|
@@ -88,7 +88,7 @@ def get_protein_smiles_json(smi_id:str,
|
|
|
88
88
|
dump_json(json_data,save_path)
|
|
89
89
|
return json_data
|
|
90
90
|
|
|
91
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
91
|
+
# %% ../../nbs/af3/00_json.ipynb #300bb051-4246-400e-bdc3-dd43e4da47d1
|
|
92
92
|
def get_protein_ccdcode_json(protein_json, # dict with protein sequence
|
|
93
93
|
ccd_code, # str or list of str
|
|
94
94
|
job_id: str, # job/task ID
|
|
@@ -128,7 +128,7 @@ def get_protein_ccdcode_json(protein_json, # dict with protein sequence
|
|
|
128
128
|
|
|
129
129
|
return json_data
|
|
130
130
|
|
|
131
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
131
|
+
# %% ../../nbs/af3/00_json.ipynb #e850527d-826d-4c7f-9a66-bf307c44b809
|
|
132
132
|
# Mapping bond types to mmCIF-compatible values
|
|
133
133
|
_RDKIT_BOND_TYPE_TO_MMCIF = {
|
|
134
134
|
rd_chem.BondType.SINGLE: 'SING',
|
|
@@ -208,7 +208,7 @@ def mol_to_ccd_text(mol, component_id, pdbx_smiles=None, include_hydrogens=False
|
|
|
208
208
|
|
|
209
209
|
return "\n".join(lines)
|
|
210
210
|
|
|
211
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
211
|
+
# %% ../../nbs/af3/00_json.ipynb #c099dc24-d29e-49a1-9cbb-d14e7bc2a3de
|
|
212
212
|
def sdf2ccd(sdf_path,
|
|
213
213
|
CCD_name='lig-1', # do not use '_'; use as less letter as possible, 'lig-any' leads to extra ligands
|
|
214
214
|
):
|
|
@@ -218,7 +218,7 @@ def sdf2ccd(sdf_path,
|
|
|
218
218
|
mol = supplier[0] # Get the first molecule
|
|
219
219
|
return mol_to_ccd_text(mol,CCD_name)
|
|
220
220
|
|
|
221
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
221
|
+
# %% ../../nbs/af3/00_json.ipynb #3953bc6b-3bc0-40e5-a87b-08eb640b8fcf
|
|
222
222
|
def get_protein_ccd_json(protein_json, # dict with protein sequence
|
|
223
223
|
rec_residue_num:int, # 1-indexed, for bondedAtomPairs, e.g., ["A", 145, "SG"]
|
|
224
224
|
rec_atom_id:str, # for bondedAtomPairs, e.g., ["A", 145, "SG"]
|
|
@@ -262,7 +262,7 @@ def get_protein_ccd_json(protein_json, # dict with protein sequence
|
|
|
262
262
|
|
|
263
263
|
return json_data
|
|
264
264
|
|
|
265
|
-
# %% ../../nbs/af3/00_json.ipynb
|
|
265
|
+
# %% ../../nbs/af3/00_json.ipynb #c7f16eec-082a-4233-b3ed-b96d58ecedb4
|
|
266
266
|
def split_nfolder(folder_dir,
|
|
267
267
|
n=4):
|
|
268
268
|
"Move json files from a folder into subfolders (folder_0, folder_1, ..., folder_N)."
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/02_protein_pairs.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
3
|
+
# %% auto #0
|
|
4
4
|
__all__ = ['get_colabfold_cmd', 'copy_a3m', 'a3m_to_seq', 'get_protein_subjson', 'dump_json_folder', 'get_multi_protein_json',
|
|
5
5
|
'generate_pair_df']
|
|
6
6
|
|
|
7
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
7
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #92000c0e-6470-423c-9c55-9972a341282b
|
|
8
8
|
import os, json, shutil, pandas as pd
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
from itertools import combinations
|
|
@@ -12,12 +12,12 @@ from pathlib import Path
|
|
|
12
12
|
from .json import *
|
|
13
13
|
from .docker import *
|
|
14
14
|
|
|
15
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
15
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #3f3ae31b-287e-4810-9e10-db969c3fcc6b
|
|
16
16
|
def get_colabfold_cmd(csv_path,project_name):
|
|
17
17
|
print('Run below in terminal:')
|
|
18
18
|
print(f'\n colabfold_batch {csv_path} msa_{project_name} --msa-only')
|
|
19
19
|
|
|
20
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
20
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #9fb90cd0-3260-4dd1-a169-340a75633ab0
|
|
21
21
|
def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
|
|
22
22
|
dest_dir: str, # Path to the destination directory where files will be copied
|
|
23
23
|
):
|
|
@@ -33,12 +33,12 @@ def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
|
|
|
33
33
|
|
|
34
34
|
print(f"Copied {len(files)} a3m files from {a3m_dir} to {dest_dir}")
|
|
35
35
|
|
|
36
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
36
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #def3928f-c895-479f-9e88-093fdf3892da
|
|
37
37
|
def a3m_to_seq(file_path:Path):
|
|
38
38
|
"Get protein sequence from a3m file"
|
|
39
39
|
return file_path.read_text().splitlines()[2] # protein sequence is located on line 2
|
|
40
40
|
|
|
41
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
41
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #03da4db8-60b5-409e-889a-f44b2fb27705
|
|
42
42
|
def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
|
|
43
43
|
"Get subjson (protein part) with colabfold unpairedMSA .a3m path"
|
|
44
44
|
file_path = Path(a3m_dir)/f"{gene_name}.a3m"
|
|
@@ -55,13 +55,13 @@ def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
|
|
|
55
55
|
|
|
56
56
|
return json_data
|
|
57
57
|
|
|
58
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
58
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #76c918a2-630a-48ac-a682-501cfe4a522f
|
|
59
59
|
def dump_json_folder(json_data, folder):
|
|
60
60
|
"Save json under a folder"
|
|
61
61
|
file_path = Path(folder)/f"{json_data['name']}.json"
|
|
62
62
|
with open(file_path,'w') as f: json.dump(json_data,f,indent=4)
|
|
63
63
|
|
|
64
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
64
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #8e97b4ea-17ec-48dd-bca0-c2f11763205a
|
|
65
65
|
def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None):
|
|
66
66
|
'Get json of multiple proteins, with unpaired MSA path indicated (from colabfold MSA)'
|
|
67
67
|
sequences = []
|
|
@@ -82,7 +82,7 @@ def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None)
|
|
|
82
82
|
dump_json_folder(json_data,save_folder)
|
|
83
83
|
return json_data
|
|
84
84
|
|
|
85
|
-
# %% ../../nbs/af3/02_protein_pairs.ipynb
|
|
85
|
+
# %% ../../nbs/af3/02_protein_pairs.ipynb #8cc62d64-4643-49a3-a7ed-be7231abb0b9
|
|
86
86
|
def generate_pair_df(gene_list,self_pair=True):
|
|
87
87
|
"Unique pair genes in a gene list"
|
|
88
88
|
pairs = list(combinations(gene_list, 2))
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/boltz/00_dock.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto #0
|
|
4
|
+
__all__ = ['prepare_boltz', 'run_boltz', 'plot_scatter_spearman']
|
|
5
|
+
|
|
6
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #3b60daa9
|
|
7
|
+
# basics
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #71a6ad95
|
|
12
|
+
def prepare_boltz(seq: str, # Amino acid sequence of the protein the protein
|
|
13
|
+
smiles: str, # SMILES string of the ligand
|
|
14
|
+
fname: str, # Output filename (should end with .yaml)
|
|
15
|
+
):
|
|
16
|
+
"Create a YAML file for protein-ligand affinity prediction."
|
|
17
|
+
yaml_content = f"""version: 1
|
|
18
|
+
sequences:
|
|
19
|
+
- protein:
|
|
20
|
+
id: "A"
|
|
21
|
+
sequence: "{seq}"
|
|
22
|
+
- ligand:
|
|
23
|
+
id: "B"
|
|
24
|
+
smiles: "{smiles}"
|
|
25
|
+
properties:
|
|
26
|
+
- affinity:
|
|
27
|
+
binder: "B"
|
|
28
|
+
"""
|
|
29
|
+
with open(fname, "w") as f:
|
|
30
|
+
f.write(yaml_content)
|
|
31
|
+
|
|
32
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #486e2576
|
|
33
|
+
def run_boltz(file_list:list[Path], # list of .yaml path in Pathlib object
|
|
34
|
+
api_key):
|
|
35
|
+
|
|
36
|
+
"Run Boltz-Lab predictions for a list of YAML files."
|
|
37
|
+
|
|
38
|
+
# config key
|
|
39
|
+
subprocess.run(
|
|
40
|
+
["boltz-lab", "config", "--api-key", api_key.strip()],
|
|
41
|
+
check=True
|
|
42
|
+
)
|
|
43
|
+
failed = []
|
|
44
|
+
|
|
45
|
+
for file in file_list:
|
|
46
|
+
print(f"\nSubmitting: {str(file)}")
|
|
47
|
+
|
|
48
|
+
result = subprocess.run(
|
|
49
|
+
["boltz-lab", "predict", str(file),
|
|
50
|
+
"--no-wait", # for batch run, so no need to wait the results til the next
|
|
51
|
+
"--name",file.stem, # job name appeared in boltz
|
|
52
|
+
],
|
|
53
|
+
capture_output=True,
|
|
54
|
+
text=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if result.returncode != 0: failed.append(file.name)
|
|
58
|
+
|
|
59
|
+
print(result.stdout)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
print("\n======== SUMMARY ========")
|
|
63
|
+
print(f"Total: {len(file_list)}")
|
|
64
|
+
print(f"Failed: {len(failed)}")
|
|
65
|
+
|
|
66
|
+
return failed
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #45b0e6aa
|
|
70
|
+
import matplotlib.pyplot as plt
|
|
71
|
+
import seaborn as sns, numpy as np
|
|
72
|
+
from scipy.stats import spearmanr
|
|
73
|
+
|
|
74
|
+
# %% ../../nbs/core/boltz/00_dock.ipynb #eec74db7
|
|
75
|
+
def plot_scatter_spearman(data, x, y, ax=None):
|
|
76
|
+
"""
|
|
77
|
+
Plot scatter + Spearman correlation and p-value annotation.
|
|
78
|
+
"""
|
|
79
|
+
if ax is None:
|
|
80
|
+
ax = plt.gca()
|
|
81
|
+
|
|
82
|
+
# Drop NA
|
|
83
|
+
sub = data[[x, y]].dropna()
|
|
84
|
+
x_vals = sub[x]
|
|
85
|
+
y_vals = sub[y]
|
|
86
|
+
|
|
87
|
+
# Compute Spearman
|
|
88
|
+
rho, p = spearmanr(x_vals, y_vals)
|
|
89
|
+
|
|
90
|
+
# Plot
|
|
91
|
+
sns.scatterplot(data=sub, x=x, y=y, ax=ax)
|
|
92
|
+
|
|
93
|
+
# Annotate
|
|
94
|
+
text = f"Spearman ρ = {rho:.3f}\np = {p:.2e}"
|
|
95
|
+
ax.text(
|
|
96
|
+
0.98, 0.98, # x, y in axes fraction
|
|
97
|
+
text,
|
|
98
|
+
transform=ax.transAxes,
|
|
99
|
+
ha='right', # horizontal align
|
|
100
|
+
va='top', # vertical align
|
|
101
|
+
fontsize=11,
|
|
102
|
+
bbox=dict(boxstyle="round", facecolor="white", alpha=0.7)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
ax.set_title(f'{x} vs {y}')
|
|
106
|
+
|
|
107
|
+
return rho, p
|
|
@@ -1,22 +1,32 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/00_data.ipynb.
|
|
2
2
|
|
|
3
|
-
# %% auto 0
|
|
4
|
-
__all__ = ['BASE_URL', 'fetch_csv', 'Collins', 'Kras']
|
|
3
|
+
# %% auto #0
|
|
4
|
+
__all__ = ['BASE_URL', 'name2smi', 'fetch_csv', 'Collins', 'Kras']
|
|
5
5
|
|
|
6
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
6
|
+
# %% ../../nbs/core/00_data.ipynb #e61ba13c
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import requests
|
|
9
9
|
from functools import lru_cache
|
|
10
|
+
import pubchempy as pcp
|
|
10
11
|
|
|
11
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
12
|
+
# %% ../../nbs/core/00_data.ipynb #b97285b8
|
|
13
|
+
def name2smi(name):
|
|
14
|
+
"Given a compound name, get SMILES in PubChem database. "
|
|
15
|
+
compounds = pcp.get_compounds(name, 'name')
|
|
16
|
+
if not compounds: return None
|
|
17
|
+
# get the first rank
|
|
18
|
+
c = compounds[0]
|
|
19
|
+
return c.smiles # can also return c.cid compound ID
|
|
20
|
+
|
|
21
|
+
# %% ../../nbs/core/00_data.ipynb #209ffb82
|
|
12
22
|
BASE_URL = "https://github.com/sky1ove/kdock/raw/main/"
|
|
13
23
|
|
|
14
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
24
|
+
# %% ../../nbs/core/00_data.ipynb #d7e2b62a
|
|
15
25
|
@lru_cache()
|
|
16
26
|
def fetch_csv(url):
|
|
17
27
|
return pd.read_csv(url)
|
|
18
28
|
|
|
19
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
29
|
+
# %% ../../nbs/core/00_data.ipynb #1f1a2239
|
|
20
30
|
class Collins:
|
|
21
31
|
"A class of loading compound datasets from Collins lab."
|
|
22
32
|
|
|
@@ -45,7 +55,7 @@ class Collins:
|
|
|
45
55
|
"""
|
|
46
56
|
return fetch_csv(BASE_URL + "dataset/antibiotics_enzyme.csv")
|
|
47
57
|
|
|
48
|
-
# %% ../../nbs/core/00_data.ipynb
|
|
58
|
+
# %% ../../nbs/core/00_data.ipynb #16424d50
|
|
49
59
|
class Kras:
|
|
50
60
|
"A class of fetching various KRAS datasets."
|
|
51
61
|
@staticmethod
|