kdock 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kdock/__init__.py +1 -0
- kdock/_modidx.py +131 -0
- kdock/af3/__init__.py +0 -0
- kdock/af3/analyze.py +162 -0
- kdock/af3/docker.py +120 -0
- kdock/af3/json.py +282 -0
- kdock/af3/protein_pairs.py +95 -0
- kdock/core/__init__.py +0 -0
- kdock/core/data.py +64 -0
- kdock/core/ligand.py +294 -0
- kdock/core/plot.py +89 -0
- kdock/core/protein.py +283 -0
- kdock/core/utils.py +156 -0
- kdock/gnina/__init__.py +0 -0
- kdock/gnina/dock.py +114 -0
- kdock/gnina/rescore.py +204 -0
- kdock/px/__init__.py +0 -0
- kdock/px/core.py +130 -0
- kdock/px/dock.py +117 -0
- kdock-0.0.2.dist-info/METADATA +80 -0
- kdock-0.0.2.dist-info/RECORD +25 -0
- kdock-0.0.2.dist-info/WHEEL +5 -0
- kdock-0.0.2.dist-info/entry_points.txt +2 -0
- kdock-0.0.2.dist-info/licenses/LICENSE +201 -0
- kdock-0.0.2.dist-info/top_level.txt +1 -0
kdock/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.2"
|
kdock/_modidx.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Autogenerated by nbdev
|
|
2
|
+
|
|
3
|
+
d = { 'settings': { 'branch': 'main',
|
|
4
|
+
'doc_baseurl': '/kdock',
|
|
5
|
+
'doc_host': 'https://sky1ove.github.io',
|
|
6
|
+
'git_url': 'https://github.com/sky1ove/kdock',
|
|
7
|
+
'lib_path': 'kdock'},
|
|
8
|
+
'syms': { 'kdock.af3.analyze': { 'kdock.af3.analyze.copy_file': ('af3/analyze.html#copy_file', 'kdock/af3/analyze.py'),
|
|
9
|
+
'kdock.af3.analyze.get_3d_report': ('af3/analyze.html#get_3d_report', 'kdock/af3/analyze.py'),
|
|
10
|
+
'kdock.af3.analyze.get_report': ('af3/analyze.html#get_report', 'kdock/af3/analyze.py'),
|
|
11
|
+
'kdock.af3.analyze.get_summary_df': ('af3/analyze.html#get_summary_df', 'kdock/af3/analyze.py'),
|
|
12
|
+
'kdock.af3.analyze.get_top_cases': ('af3/analyze.html#get_top_cases', 'kdock/af3/analyze.py'),
|
|
13
|
+
'kdock.af3.analyze.process_summary_df': ('af3/analyze.html#process_summary_df', 'kdock/af3/analyze.py'),
|
|
14
|
+
'kdock.af3.analyze.read_summary_json': ('af3/analyze.html#read_summary_json', 'kdock/af3/analyze.py')},
|
|
15
|
+
'kdock.af3.docker': { 'kdock.af3.docker.docker_multi_full': ('af3/docker.html#docker_multi_full', 'kdock/af3/docker.py'),
|
|
16
|
+
'kdock.af3.docker.docker_multi_infer': ('af3/docker.html#docker_multi_infer', 'kdock/af3/docker.py'),
|
|
17
|
+
'kdock.af3.docker.docker_multi_msa': ('af3/docker.html#docker_multi_msa', 'kdock/af3/docker.py'),
|
|
18
|
+
'kdock.af3.docker.docker_single_full': ('af3/docker.html#docker_single_full', 'kdock/af3/docker.py'),
|
|
19
|
+
'kdock.af3.docker.get_docker_command': ('af3/docker.html#get_docker_command', 'kdock/af3/docker.py')},
|
|
20
|
+
'kdock.af3.json': { 'kdock.af3.json.assign_atom_names_from_graph': ( 'af3/json.html#assign_atom_names_from_graph',
|
|
21
|
+
'kdock/af3/json.py'),
|
|
22
|
+
'kdock.af3.json.dump_json': ('af3/json.html#dump_json', 'kdock/af3/json.py'),
|
|
23
|
+
'kdock.af3.json.get_protein_ccd_json': ('af3/json.html#get_protein_ccd_json', 'kdock/af3/json.py'),
|
|
24
|
+
'kdock.af3.json.get_protein_ccdcode_json': ('af3/json.html#get_protein_ccdcode_json', 'kdock/af3/json.py'),
|
|
25
|
+
'kdock.af3.json.get_protein_json': ('af3/json.html#get_protein_json', 'kdock/af3/json.py'),
|
|
26
|
+
'kdock.af3.json.get_protein_smiles_json': ('af3/json.html#get_protein_smiles_json', 'kdock/af3/json.py'),
|
|
27
|
+
'kdock.af3.json.mol_to_ccd_text': ('af3/json.html#mol_to_ccd_text', 'kdock/af3/json.py'),
|
|
28
|
+
'kdock.af3.json.read_json': ('af3/json.html#read_json', 'kdock/af3/json.py'),
|
|
29
|
+
'kdock.af3.json.sdf2ccd': ('af3/json.html#sdf2ccd', 'kdock/af3/json.py'),
|
|
30
|
+
'kdock.af3.json.split_nfolder': ('af3/json.html#split_nfolder', 'kdock/af3/json.py')},
|
|
31
|
+
'kdock.af3.protein_pairs': { 'kdock.af3.protein_pairs.a3m_to_seq': ( 'af3/protein_pairs.html#a3m_to_seq',
|
|
32
|
+
'kdock/af3/protein_pairs.py'),
|
|
33
|
+
'kdock.af3.protein_pairs.copy_a3m': ( 'af3/protein_pairs.html#copy_a3m',
|
|
34
|
+
'kdock/af3/protein_pairs.py'),
|
|
35
|
+
'kdock.af3.protein_pairs.dump_json_folder': ( 'af3/protein_pairs.html#dump_json_folder',
|
|
36
|
+
'kdock/af3/protein_pairs.py'),
|
|
37
|
+
'kdock.af3.protein_pairs.generate_pair_df': ( 'af3/protein_pairs.html#generate_pair_df',
|
|
38
|
+
'kdock/af3/protein_pairs.py'),
|
|
39
|
+
'kdock.af3.protein_pairs.get_colabfold_cmd': ( 'af3/protein_pairs.html#get_colabfold_cmd',
|
|
40
|
+
'kdock/af3/protein_pairs.py'),
|
|
41
|
+
'kdock.af3.protein_pairs.get_multi_protein_json': ( 'af3/protein_pairs.html#get_multi_protein_json',
|
|
42
|
+
'kdock/af3/protein_pairs.py'),
|
|
43
|
+
'kdock.af3.protein_pairs.get_protein_subjson': ( 'af3/protein_pairs.html#get_protein_subjson',
|
|
44
|
+
'kdock/af3/protein_pairs.py')},
|
|
45
|
+
'kdock.core.data': { 'kdock.core.data.Collins': ('core/data.html#collins', 'kdock/core/data.py'),
|
|
46
|
+
'kdock.core.data.Collins.get_antibiotics_2k': ( 'core/data.html#collins.get_antibiotics_2k',
|
|
47
|
+
'kdock/core/data.py'),
|
|
48
|
+
'kdock.core.data.Collins.get_antibiotics_39k': ( 'core/data.html#collins.get_antibiotics_39k',
|
|
49
|
+
'kdock/core/data.py'),
|
|
50
|
+
'kdock.core.data.Collins.get_antibiotics_enzyme': ( 'core/data.html#collins.get_antibiotics_enzyme',
|
|
51
|
+
'kdock/core/data.py'),
|
|
52
|
+
'kdock.core.data.Kras': ('core/data.html#kras', 'kdock/core/data.py'),
|
|
53
|
+
'kdock.core.data.Kras.get_mirati_g12d': ('core/data.html#kras.get_mirati_g12d', 'kdock/core/data.py'),
|
|
54
|
+
'kdock.core.data.Kras.get_mirati_g12d_raw': ( 'core/data.html#kras.get_mirati_g12d_raw',
|
|
55
|
+
'kdock/core/data.py'),
|
|
56
|
+
'kdock.core.data.Kras.get_seq': ('core/data.html#kras.get_seq', 'kdock/core/data.py'),
|
|
57
|
+
'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py')},
|
|
58
|
+
'kdock.core.ligand': { 'kdock.core.ligand.compress_fp': ('core/ligand.html#compress_fp', 'kdock/core/ligand.py'),
|
|
59
|
+
'kdock.core.ligand.get_fp': ('core/ligand.html#get_fp', 'kdock/core/ligand.py'),
|
|
60
|
+
'kdock.core.ligand.get_rdkit': ('core/ligand.html#get_rdkit', 'kdock/core/ligand.py'),
|
|
61
|
+
'kdock.core.ligand.get_rdkit_3d': ('core/ligand.html#get_rdkit_3d', 'kdock/core/ligand.py'),
|
|
62
|
+
'kdock.core.ligand.get_rdkit_all': ('core/ligand.html#get_rdkit_all', 'kdock/core/ligand.py'),
|
|
63
|
+
'kdock.core.ligand.get_rdkit_df': ('core/ligand.html#get_rdkit_df', 'kdock/core/ligand.py'),
|
|
64
|
+
'kdock.core.ligand.get_same_mol_group': ('core/ligand.html#get_same_mol_group', 'kdock/core/ligand.py'),
|
|
65
|
+
'kdock.core.ligand.hash_fp': ('core/ligand.html#hash_fp', 'kdock/core/ligand.py'),
|
|
66
|
+
'kdock.core.ligand.plot_drug': ('core/ligand.html#plot_drug', 'kdock/core/ligand.py'),
|
|
67
|
+
'kdock.core.ligand.preprocess': ('core/ligand.html#preprocess', 'kdock/core/ligand.py'),
|
|
68
|
+
'kdock.core.ligand.rdkit_conformer': ('core/ligand.html#rdkit_conformer', 'kdock/core/ligand.py'),
|
|
69
|
+
'kdock.core.ligand.remove_hi_corr': ('core/ligand.html#remove_hi_corr', 'kdock/core/ligand.py'),
|
|
70
|
+
'kdock.core.ligand.tanimoto': ('core/ligand.html#tanimoto', 'kdock/core/ligand.py'),
|
|
71
|
+
'kdock.core.ligand.tanimoto_numba': ('core/ligand.html#tanimoto_numba', 'kdock/core/ligand.py')},
|
|
72
|
+
'kdock.core.plot': { 'kdock.core.plot.plot_2d': ('core/plot.html#plot_2d', 'kdock/core/plot.py'),
|
|
73
|
+
'kdock.core.plot.plot_corr': ('core/plot.html#plot_corr', 'kdock/core/plot.py'),
|
|
74
|
+
'kdock.core.plot.reduce_feature': ('core/plot.html#reduce_feature', 'kdock/core/plot.py'),
|
|
75
|
+
'kdock.core.plot.set_sns': ('core/plot.html#set_sns', 'kdock/core/plot.py')},
|
|
76
|
+
'kdock.core.protein': { 'kdock.core.protein.apply_mut_complex': ( 'core/protein.html#apply_mut_complex',
|
|
77
|
+
'kdock/core/protein.py'),
|
|
78
|
+
'kdock.core.protein.apply_mut_single': ('core/protein.html#apply_mut_single', 'kdock/core/protein.py'),
|
|
79
|
+
'kdock.core.protein.compare_seq': ('core/protein.html#compare_seq', 'kdock/core/protein.py'),
|
|
80
|
+
'kdock.core.protein.get_uniprot_features': ( 'core/protein.html#get_uniprot_features',
|
|
81
|
+
'kdock/core/protein.py'),
|
|
82
|
+
'kdock.core.protein.get_uniprot_kd': ('core/protein.html#get_uniprot_kd', 'kdock/core/protein.py'),
|
|
83
|
+
'kdock.core.protein.get_uniprot_seq': ('core/protein.html#get_uniprot_seq', 'kdock/core/protein.py'),
|
|
84
|
+
'kdock.core.protein.get_uniprot_type': ('core/protein.html#get_uniprot_type', 'kdock/core/protein.py')},
|
|
85
|
+
'kdock.core.utils': { 'kdock.core.utils.copy_files': ('core/utils.html#copy_files', 'kdock/core/utils.py'),
|
|
86
|
+
'kdock.core.utils.get_box': ('core/utils.html#get_box', 'kdock/core/utils.py'),
|
|
87
|
+
'kdock.core.utils.get_rec_lig': ('core/utils.html#get_rec_lig', 'kdock/core/utils.py'),
|
|
88
|
+
'kdock.core.utils.rglob': ('core/utils.html#rglob', 'kdock/core/utils.py'),
|
|
89
|
+
'kdock.core.utils.view_complex': ('core/utils.html#view_complex', 'kdock/core/utils.py'),
|
|
90
|
+
'kdock.core.utils.view_mol': ('core/utils.html#view_mol', 'kdock/core/utils.py')},
|
|
91
|
+
'kdock.gnina.dock': { 'kdock.gnina.dock.extract_gnina_dock': ( 'gnina/gnina_docking.html#extract_gnina_dock',
|
|
92
|
+
'kdock/gnina/dock.py'),
|
|
93
|
+
'kdock.gnina.dock.gnina_dock': ('gnina/gnina_docking.html#gnina_dock', 'kdock/gnina/dock.py'),
|
|
94
|
+
'kdock.gnina.dock.setup_gnina_docker': ( 'gnina/gnina_docking.html#setup_gnina_docker',
|
|
95
|
+
'kdock/gnina/dock.py'),
|
|
96
|
+
'kdock.gnina.dock.setup_gnina_local': ( 'gnina/gnina_docking.html#setup_gnina_local',
|
|
97
|
+
'kdock/gnina/dock.py')},
|
|
98
|
+
'kdock.gnina.rescore': { 'kdock.gnina.rescore.ChainSelect': ( 'gnina/gnina_af3_rescore.html#chainselect',
|
|
99
|
+
'kdock/gnina/rescore.py'),
|
|
100
|
+
'kdock.gnina.rescore.ChainSelect.__init__': ( 'gnina/gnina_af3_rescore.html#chainselect.__init__',
|
|
101
|
+
'kdock/gnina/rescore.py'),
|
|
102
|
+
'kdock.gnina.rescore.ChainSelect.accept_chain': ( 'gnina/gnina_af3_rescore.html#chainselect.accept_chain',
|
|
103
|
+
'kdock/gnina/rescore.py'),
|
|
104
|
+
'kdock.gnina.rescore.extract_gnina_rescore': ( 'gnina/gnina_af3_rescore.html#extract_gnina_rescore',
|
|
105
|
+
'kdock/gnina/rescore.py'),
|
|
106
|
+
'kdock.gnina.rescore.get_gnina_rescore': ( 'gnina/gnina_af3_rescore.html#get_gnina_rescore',
|
|
107
|
+
'kdock/gnina/rescore.py'),
|
|
108
|
+
'kdock.gnina.rescore.get_gnina_rescore_folder': ( 'gnina/gnina_af3_rescore.html#get_gnina_rescore_folder',
|
|
109
|
+
'kdock/gnina/rescore.py'),
|
|
110
|
+
'kdock.gnina.rescore.gnina_rescore_docker': ( 'gnina/gnina_af3_rescore.html#gnina_rescore_docker',
|
|
111
|
+
'kdock/gnina/rescore.py'),
|
|
112
|
+
'kdock.gnina.rescore.gnina_rescore_local': ( 'gnina/gnina_af3_rescore.html#gnina_rescore_local',
|
|
113
|
+
'kdock/gnina/rescore.py'),
|
|
114
|
+
'kdock.gnina.rescore.pdb2sdf': ('gnina/gnina_af3_rescore.html#pdb2sdf', 'kdock/gnina/rescore.py'),
|
|
115
|
+
'kdock.gnina.rescore.prepare_rec_lig': ( 'gnina/gnina_af3_rescore.html#prepare_rec_lig',
|
|
116
|
+
'kdock/gnina/rescore.py'),
|
|
117
|
+
'kdock.gnina.rescore.rename_residues': ( 'gnina/gnina_af3_rescore.html#rename_residues',
|
|
118
|
+
'kdock/gnina/rescore.py'),
|
|
119
|
+
'kdock.gnina.rescore.split_cif': ('gnina/gnina_af3_rescore.html#split_cif', 'kdock/gnina/rescore.py')},
|
|
120
|
+
'kdock.px.core': { 'kdock.px.core.get_protein_ligand_df_json': ( 'protenix/protenix.html#get_protein_ligand_df_json',
|
|
121
|
+
'kdock/px/core.py'),
|
|
122
|
+
'kdock.px.core.get_single_job': ('protenix/protenix.html#get_single_job', 'kdock/px/core.py'),
|
|
123
|
+
'kdock.px.core.get_single_protein_ligand_json': ( 'protenix/protenix.html#get_single_protein_ligand_json',
|
|
124
|
+
'kdock/px/core.py'),
|
|
125
|
+
'kdock.px.core.get_virtual_screening_json': ( 'protenix/protenix.html#get_virtual_screening_json',
|
|
126
|
+
'kdock/px/core.py')},
|
|
127
|
+
'kdock.px.dock': { 'kdock.px.dock.capture_output': ('protenix/proteinix_dock.html#capture_output', 'kdock/px/dock.py'),
|
|
128
|
+
'kdock.px.dock.get_protenix_dock': ('protenix/proteinix_dock.html#get_protenix_dock', 'kdock/px/dock.py'),
|
|
129
|
+
'kdock.px.dock.get_protenix_vina_dock': ( 'protenix/proteinix_dock.html#get_protenix_vina_dock',
|
|
130
|
+
'kdock/px/dock.py'),
|
|
131
|
+
'kdock.px.dock.json2sdf': ('protenix/proteinix_dock.html#json2sdf', 'kdock/px/dock.py')}}}
|
kdock/af3/__init__.py
ADDED
|
File without changes
|
kdock/af3/analyze.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/03_analyze.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['read_summary_json', 'get_summary_df', 'process_summary_df', 'get_top_cases', 'get_3d_report', 'get_report',
|
|
5
|
+
'copy_file']
|
|
6
|
+
|
|
7
|
+
# %% ../../nbs/af3/03_analyze.ipynb 3
|
|
8
|
+
import json, shutil, pandas as pd
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import plotly.graph_objects as go
|
|
11
|
+
|
|
12
|
+
# %% ../../nbs/af3/03_analyze.ipynb 5
|
|
13
|
+
def read_summary_json(json_path):
|
|
14
|
+
"Read json file to dictionary"
|
|
15
|
+
json_path = Path(json_path)
|
|
16
|
+
json_data=json.loads(json_path.read_text())
|
|
17
|
+
|
|
18
|
+
# Flatten the JSON data into a single row
|
|
19
|
+
row = {"ID": json_path.stem}
|
|
20
|
+
for key, value in json_data.items():
|
|
21
|
+
if isinstance(value, list):
|
|
22
|
+
for idx, sub_value in enumerate(value):
|
|
23
|
+
if isinstance(sub_value, list):
|
|
24
|
+
for sub_idx, sub_sub_value in enumerate(sub_value):
|
|
25
|
+
row[f"{key}_{idx}_{sub_idx}"] = sub_sub_value
|
|
26
|
+
else:
|
|
27
|
+
row[f"{key}_{idx}"] = sub_value
|
|
28
|
+
else:
|
|
29
|
+
row[key] = value
|
|
30
|
+
return row
|
|
31
|
+
|
|
32
|
+
# %% ../../nbs/af3/03_analyze.ipynb 7
|
|
33
|
+
def get_summary_df(output_dir):
|
|
34
|
+
"Pack the summary json from the output folder to the df"
|
|
35
|
+
|
|
36
|
+
path_list = list(Path(output_dir).rglob('*_summary_confidences.json'))
|
|
37
|
+
print(len(path_list),'summary_confidences.json files detected')
|
|
38
|
+
return pd.DataFrame(list(map(read_summary_json,path_list)))
|
|
39
|
+
|
|
40
|
+
# %% ../../nbs/af3/03_analyze.ipynb 10
|
|
41
|
+
def process_summary_df(df,generate_report=False):
|
|
42
|
+
"Post process the json-converted pandas df; remove redundant columns; available for pairs"
|
|
43
|
+
|
|
44
|
+
df['ID'] = df.ID.str.replace('_summary_confidences','')
|
|
45
|
+
df = df.set_index('ID')
|
|
46
|
+
|
|
47
|
+
# drop zero std columns: usually contains chain_pair_pae_min_0_0 and has_clash
|
|
48
|
+
zero_std_cols = df.columns[df.std()<1e-10]
|
|
49
|
+
df=df.drop(columns=zero_std_cols)
|
|
50
|
+
|
|
51
|
+
# drop columns with same values
|
|
52
|
+
redundant_columns = []
|
|
53
|
+
|
|
54
|
+
# Check for columns equal to `iptm`
|
|
55
|
+
for col in ['chain_iptm_0', 'chain_iptm_1', 'chain_pair_iptm_0_1', 'chain_pair_iptm_1_0']:
|
|
56
|
+
if df['iptm'].equals(df[col]):
|
|
57
|
+
redundant_columns.append(col)
|
|
58
|
+
|
|
59
|
+
# Check for columns equal to `chain_ptm_0`
|
|
60
|
+
if df['chain_ptm_0'].equals(df['chain_pair_iptm_0_0']):
|
|
61
|
+
redundant_columns.append('chain_pair_iptm_0_0')
|
|
62
|
+
|
|
63
|
+
# Check for columns equal to `chain_ptm_1`
|
|
64
|
+
if df['chain_ptm_1'].equals(df['chain_pair_iptm_1_1']):
|
|
65
|
+
redundant_columns.append('chain_pair_iptm_1_1')
|
|
66
|
+
|
|
67
|
+
# Drop redundant columns
|
|
68
|
+
df = df.drop(columns=redundant_columns)
|
|
69
|
+
|
|
70
|
+
if generate_report:
|
|
71
|
+
print('Generating pairplot graph report')
|
|
72
|
+
sns.pairplot(data=df, corner=True)
|
|
73
|
+
plt.savefig("af_report.pdf")
|
|
74
|
+
plt.close()
|
|
75
|
+
print('Export to af_report.pdf')
|
|
76
|
+
|
|
77
|
+
# add iptm and ptm
|
|
78
|
+
df['iptm_ptm_add'] = df['iptm']+df['ptm']
|
|
79
|
+
|
|
80
|
+
# inter error add
|
|
81
|
+
df['chain_pair_pae_min_add'] = df['chain_pair_pae_min_0_1']+df['chain_pair_pae_min_1_0']
|
|
82
|
+
|
|
83
|
+
# rank iptm and ptm and add the rank, this method can ignore the different value range between iptm and ptm
|
|
84
|
+
df['iptm_rnk'],df['ptm_rnk'] = df.iptm.rank(ascending=False), df.ptm.rank(ascending=False)
|
|
85
|
+
df['iptm_ptm_rnk_add'] = df['iptm_rnk']+df['ptm_rnk']
|
|
86
|
+
|
|
87
|
+
# combine chain_pair_pae_min & iptm rank
|
|
88
|
+
df['chain_pair_pae_min_add_rnk'] = df['chain_pair_pae_min_add'].rank()
|
|
89
|
+
df['iptm_pae_add_rnk'] = df['chain_pair_pae_min_add_rnk'] + df['iptm_rnk']
|
|
90
|
+
|
|
91
|
+
return df
|
|
92
|
+
|
|
93
|
+
# %% ../../nbs/af3/03_analyze.ipynb 12
|
|
94
|
+
def get_top_cases(df,n=30):
|
|
95
|
+
"Get top cases from the metric"
|
|
96
|
+
idxs = set()
|
|
97
|
+
|
|
98
|
+
big_cols = ['ranking_score', 'iptm', 'iptm_ptm_add']
|
|
99
|
+
small_cols = [
|
|
100
|
+
'iptm_ptm_rnk_add',
|
|
101
|
+
'chain_pair_pae_min_add',
|
|
102
|
+
'chain_pair_pae_min_0_1',
|
|
103
|
+
'chain_pair_pae_min_1_0',
|
|
104
|
+
'iptm_pae_add_rnk'
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
for col in big_cols: idxs.update(df.nlargest(n, col).index)
|
|
108
|
+
for col in small_cols: idxs.update(df.nsmallest(n, col).index)
|
|
109
|
+
|
|
110
|
+
return list(idxs)
|
|
111
|
+
|
|
112
|
+
# %% ../../nbs/af3/03_analyze.ipynb 14
|
|
113
|
+
def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',save_dir='af_report'):
|
|
114
|
+
"Generate 3d plot html file given case index and x, y, z colname"
|
|
115
|
+
annotation = df.index.where(df.index.isin(index_list),'').str.split('_').str[1]
|
|
116
|
+
|
|
117
|
+
# Create the 3D scatter plot
|
|
118
|
+
fig = go.Figure(data=go.Scatter3d(
|
|
119
|
+
x=df[x],
|
|
120
|
+
y=df[y],
|
|
121
|
+
z=df[z],
|
|
122
|
+
mode='markers+text',
|
|
123
|
+
text=annotation, # Annotation using the index
|
|
124
|
+
textposition="top center",
|
|
125
|
+
marker=dict(size=8, color='blue', opacity=0.8),
|
|
126
|
+
))
|
|
127
|
+
|
|
128
|
+
# Customize layout
|
|
129
|
+
fig.update_layout(
|
|
130
|
+
scene=dict(
|
|
131
|
+
xaxis_title=x,
|
|
132
|
+
yaxis_title=y,
|
|
133
|
+
zaxis_title=z,
|
|
134
|
+
),
|
|
135
|
+
title='3D Scatter Plot',
|
|
136
|
+
autosize=True,
|
|
137
|
+
height=3000,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
fig.write_html(Path(save_dir)/'3d_scatter_plot.html',full_html=True)
|
|
141
|
+
print('Exported the html to 3d_scatter_plot.html')
|
|
142
|
+
|
|
143
|
+
# %% ../../nbs/af3/03_analyze.ipynb 16
|
|
144
|
+
def get_report(out_dir,save_dir='af_report'):
|
|
145
|
+
"Generate summary report based on summary_confidences file; return summary df and top cases"
|
|
146
|
+
out = get_summary_df(out_dir)
|
|
147
|
+
out = process_summary_df(out)
|
|
148
|
+
top_cases = get_top_cases(out)
|
|
149
|
+
|
|
150
|
+
Path(save_dir).mkdir(exist_ok=True)
|
|
151
|
+
get_3d_report(out,top_cases,save_dir=save_dir)
|
|
152
|
+
out.to_csv(Path(save_dir)/'summary_confidences.csv')
|
|
153
|
+
|
|
154
|
+
return out, top_cases
|
|
155
|
+
|
|
156
|
+
# %% ../../nbs/af3/03_analyze.ipynb 18
|
|
157
|
+
def copy_file(idx_name, source_dir, dest_dir):
|
|
158
|
+
"Copy all model cif generated by AF3 to the new dest folder"
|
|
159
|
+
source_path = Path(source_dir)/f"{idx_name}/{idx_name}_model.cif"
|
|
160
|
+
dest_path = Path(dest_dir)/f"{idx_name}_model.cif"
|
|
161
|
+
shutil.copy(source_path,dest_path)
|
|
162
|
+
print(f'Copying {str(source_path)} to {str(dest_path)}')
|
kdock/af3/docker.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/01_docker.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['get_docker_command', 'docker_single_full', 'docker_multi_full', 'docker_multi_msa', 'docker_multi_infer']
|
|
5
|
+
|
|
6
|
+
# %% ../../nbs/af3/01_docker.ipynb 3
|
|
7
|
+
from fastcore.meta import delegates
|
|
8
|
+
|
|
9
|
+
# %% ../../nbs/af3/01_docker.ipynb 6
|
|
10
|
+
def get_docker_command(
|
|
11
|
+
input_dir="af_input", # Directory for input data
|
|
12
|
+
output_dir="af_output", # Directory for output results
|
|
13
|
+
model_dir="af_model", # Directory containing models
|
|
14
|
+
db_dir="af_database", # Directory for databases. If None, this option is ommitted
|
|
15
|
+
cache_dir="af_cache", # Directory for JAX compilation cache. If None, this option is omitted
|
|
16
|
+
gpus=0, # GPU devices to allocate (e.g., 0,1), if None, ommitted
|
|
17
|
+
docker_name="sky1ove/alphafold3", # Docker image name
|
|
18
|
+
embedding=False, # If True, includes the --save_embeddings=true flag
|
|
19
|
+
skip_search=False, # if MSA is precalculated and present in json; If True, includes the --norun_data_pipeline flag
|
|
20
|
+
search_only=False, # search MSA only; If True, sets skip_search to False and includes the --norun_inference flag
|
|
21
|
+
json_path=None # Path to JSON file. If not None, uses json_file instead of input_dir
|
|
22
|
+
):
|
|
23
|
+
|
|
24
|
+
"Generate a Docker run command for Alphafold with customizable parameters."
|
|
25
|
+
|
|
26
|
+
# Start building the Docker command
|
|
27
|
+
cmd_parts = ["docker run --rm \\"]
|
|
28
|
+
|
|
29
|
+
if json_path:
|
|
30
|
+
parent_dir=json_path.split('/')[0]
|
|
31
|
+
cmd_parts.append(f' --volume "$HOME/{parent_dir}:/root/af_input" \\')
|
|
32
|
+
else:
|
|
33
|
+
parent_dir = input_dir.split('/')[0]
|
|
34
|
+
cmd_parts.append(f' --volume "$HOME/{parent_dir}:/root/af_input" \\')
|
|
35
|
+
|
|
36
|
+
cmd_parts.extend([f' --volume "$HOME/{output_dir}:/root/af_output" \\',
|
|
37
|
+
f' --volume "$HOME/{model_dir}:/root/models" \\',
|
|
38
|
+
])
|
|
39
|
+
if db_dir:
|
|
40
|
+
cmd_parts.append(f' --volume "$HOME/{db_dir}:/root/public_databases" \\')
|
|
41
|
+
|
|
42
|
+
if cache_dir:
|
|
43
|
+
cmd_parts.append(f' --volume "$HOME/{cache_dir}:/root/cache" \\')
|
|
44
|
+
|
|
45
|
+
if gpus is not None:
|
|
46
|
+
cmd_parts.append(f' --gpus "device={gpus}" \\')
|
|
47
|
+
|
|
48
|
+
cmd_parts.extend([f' {docker_name} \\',
|
|
49
|
+
' python run_alphafold.py \\'])
|
|
50
|
+
|
|
51
|
+
if json_path:
|
|
52
|
+
mount_path= '/'.join(json_path.split('/')[1:])
|
|
53
|
+
cmd_parts.append(f' --json_path=/root/af_input/{mount_path} \\')
|
|
54
|
+
else:
|
|
55
|
+
len_dir = len(input_dir.split('/')[1:])
|
|
56
|
+
if len_dir:
|
|
57
|
+
mount_dir= '/'.join(input_dir.split('/')[1:])
|
|
58
|
+
cmd_parts.append(f' --input_dir=/root/af_input/{mount_dir} \\')
|
|
59
|
+
else:
|
|
60
|
+
cmd_parts.append(' --input_dir=/root/af_input \\')
|
|
61
|
+
|
|
62
|
+
cmd_parts.extend([' --output_dir=/root/af_output \\',
|
|
63
|
+
' --model_dir=/root/models \\'])
|
|
64
|
+
|
|
65
|
+
if cache_dir:
|
|
66
|
+
cmd_parts.append(' --jax_compilation_cache_dir=/root/cache \\')
|
|
67
|
+
|
|
68
|
+
if embedding:
|
|
69
|
+
cmd_parts.append(' --save_embeddings=true \\')
|
|
70
|
+
|
|
71
|
+
if search_only:
|
|
72
|
+
skip_search = False
|
|
73
|
+
cmd_parts.append(' --norun_inference \\')
|
|
74
|
+
elif skip_search:
|
|
75
|
+
cmd_parts.append(' --norun_data_pipeline \\')
|
|
76
|
+
|
|
77
|
+
if cmd_parts[-1].endswith('\\'):
|
|
78
|
+
cmd_parts[-1] = cmd_parts[-1].rstrip(' \\')
|
|
79
|
+
|
|
80
|
+
docker_command = "\n".join(cmd_parts)
|
|
81
|
+
print(docker_command)
|
|
82
|
+
|
|
83
|
+
# %% ../../nbs/af3/01_docker.ipynb 12
|
|
84
|
+
@delegates(get_docker_command)
|
|
85
|
+
def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
|
|
86
|
+
"Single json task with full pipeline."
|
|
87
|
+
return get_docker_command(
|
|
88
|
+
json_path=json_path,
|
|
89
|
+
output_dir=output_dir,
|
|
90
|
+
cache_dir=cache_dir,
|
|
91
|
+
**kwargs)
|
|
92
|
+
|
|
93
|
+
# %% ../../nbs/af3/01_docker.ipynb 16
|
|
94
|
+
@delegates(get_docker_command)
|
|
95
|
+
def docker_multi_full(input_dir,output_dir,**kwargs):
|
|
96
|
+
"Folder of json as input with full pipeline."
|
|
97
|
+
return get_docker_command(
|
|
98
|
+
input_dir=input_dir,
|
|
99
|
+
output_dir=output_dir,
|
|
100
|
+
**kwargs)
|
|
101
|
+
|
|
102
|
+
# %% ../../nbs/af3/01_docker.ipynb 19
|
|
103
|
+
@delegates(get_docker_command)
|
|
104
|
+
def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
|
|
105
|
+
"MSA search only, without structure inference; CPU only."
|
|
106
|
+
return get_docker_command(
|
|
107
|
+
input_dir=input_dir,
|
|
108
|
+
output_dir=output_dir,
|
|
109
|
+
search_only=search_only,
|
|
110
|
+
**kwargs)
|
|
111
|
+
|
|
112
|
+
# %% ../../nbs/af3/01_docker.ipynb 22
|
|
113
|
+
@delegates(get_docker_command)
|
|
114
|
+
def docker_multi_infer(input_dir,output_dir,skip_search=True,**kwargs):
|
|
115
|
+
"Infer only with pre-calculated MSA; GPU is needed."
|
|
116
|
+
return get_docker_command(
|
|
117
|
+
input_dir=input_dir,
|
|
118
|
+
output_dir=output_dir,
|
|
119
|
+
skip_search=skip_search,
|
|
120
|
+
**kwargs)
|