kdock 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {kdock-0.0.2/kdock.egg-info → kdock-0.0.4}/PKG-INFO +19 -27
  2. {kdock-0.0.2 → kdock-0.0.4}/README.md +7 -2
  3. kdock-0.0.4/kdock/__init__.py +1 -0
  4. {kdock-0.0.2 → kdock-0.0.4}/kdock/_modidx.py +7 -1
  5. {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/analyze.py +9 -9
  6. {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/docker.py +7 -7
  7. {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/json.py +11 -11
  8. {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/protein_pairs.py +9 -9
  9. kdock-0.0.4/kdock/boltz/dock.py +107 -0
  10. {kdock-0.0.2 → kdock-0.0.4}/kdock/core/data.py +17 -7
  11. {kdock-0.0.2 → kdock-0.0.4}/kdock/core/ligand.py +29 -18
  12. {kdock-0.0.2 → kdock-0.0.4}/kdock/core/plot.py +6 -6
  13. {kdock-0.0.2 → kdock-0.0.4}/kdock/core/protein.py +9 -9
  14. {kdock-0.0.2 → kdock-0.0.4}/kdock/core/utils.py +8 -8
  15. {kdock-0.0.2 → kdock-0.0.4}/kdock/gnina/dock.py +7 -7
  16. {kdock-0.0.2 → kdock-0.0.4}/kdock/gnina/rescore.py +12 -12
  17. kdock-0.0.4/kdock/px/__init__.py +0 -0
  18. {kdock-0.0.2 → kdock-0.0.4}/kdock/px/core.py +6 -6
  19. {kdock-0.0.2 → kdock-0.0.4}/kdock/px/dock.py +6 -6
  20. {kdock-0.0.2 → kdock-0.0.4/kdock.egg-info}/PKG-INFO +19 -27
  21. {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/SOURCES.txt +2 -3
  22. {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/requires.txt +2 -0
  23. kdock-0.0.4/pyproject.toml +33 -0
  24. kdock-0.0.2/kdock/__init__.py +0 -1
  25. kdock-0.0.2/kdock.egg-info/not-zip-safe +0 -1
  26. kdock-0.0.2/pyproject.toml +0 -11
  27. kdock-0.0.2/settings.ini +0 -40
  28. kdock-0.0.2/setup.py +0 -64
  29. {kdock-0.0.2 → kdock-0.0.4}/LICENSE +0 -0
  30. {kdock-0.0.2 → kdock-0.0.4}/MANIFEST.in +0 -0
  31. {kdock-0.0.2 → kdock-0.0.4}/kdock/af3/__init__.py +0 -0
  32. {kdock-0.0.2/kdock/core → kdock-0.0.4/kdock/boltz}/__init__.py +0 -0
  33. {kdock-0.0.2/kdock/gnina → kdock-0.0.4/kdock/core}/__init__.py +0 -0
  34. {kdock-0.0.2/kdock/px → kdock-0.0.4/kdock/gnina}/__init__.py +0 -0
  35. {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/dependency_links.txt +0 -0
  36. {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/entry_points.txt +0 -0
  37. {kdock-0.0.2 → kdock-0.0.4}/kdock.egg-info/top_level.txt +0 -0
  38. {kdock-0.0.2 → kdock-0.0.4}/setup.cfg +0 -0
@@ -1,20 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kdock
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: A collection of docking tools
5
- Home-page: https://github.com/sky1ove/kdock
6
- Author: Lily Cai
7
- Author-email: lcai888666@gmail.com
8
- License: Apache Software License 2.0
9
- Keywords: nbdev jupyter notebook python
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
5
+ Author-email: Lily Cai <lcai888666@gmail.com>
6
+ License: Apache-2.0
7
+ Project-URL: Repository, https://github.com/sky1ove/kdock
8
+ Project-URL: Documentation, https://sky1ove.github.io/kdock
9
+ Keywords: nbdev,jupyter,notebook,python
12
10
  Classifier: Natural Language :: English
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
18
15
  Requires-Python: >=3.9
19
16
  Description-Content-Type: text/markdown
20
17
  License-File: LICENSE
@@ -35,22 +32,12 @@ Requires-Dist: openpyxl
35
32
  Requires-Dist: biopython
36
33
  Requires-Dist: requests
37
34
  Requires-Dist: scikit-fingerprints
35
+ Requires-Dist: pubchempy
38
36
  Provides-Extra: dev
39
37
  Requires-Dist: nbdev; extra == "dev"
40
38
  Requires-Dist: ipykernel; extra == "dev"
41
- Dynamic: author
42
- Dynamic: author-email
43
- Dynamic: classifier
44
- Dynamic: description
45
- Dynamic: description-content-type
46
- Dynamic: home-page
47
- Dynamic: keywords
48
- Dynamic: license
39
+ Requires-Dist: twine; extra == "dev"
49
40
  Dynamic: license-file
50
- Dynamic: provides-extra
51
- Dynamic: requires-dist
52
- Dynamic: requires-python
53
- Dynamic: summary
54
41
 
55
42
  # kdock
56
43
 
@@ -59,8 +46,13 @@ Dynamic: summary
59
46
 
60
47
  ## Installation
61
48
 
62
- Install latest from the GitHub
63
- [repository](https://github.com/sky1ove/kdock):
49
+ Install stable version from Pypi:
50
+
51
+ ``` sh
52
+ $ pip install kdock
53
+ ```
54
+
55
+ Install latest from the GitHub:
64
56
 
65
57
  ``` sh
66
58
  $ pip install -U git+https://github.com/sky1ove/kdock.git
@@ -5,8 +5,13 @@
5
5
 
6
6
  ## Installation
7
7
 
8
- Install latest from the GitHub
9
- [repository](https://github.com/sky1ove/kdock):
8
+ Install stable version from Pypi:
9
+
10
+ ``` sh
11
+ $ pip install kdock
12
+ ```
13
+
14
+ Install latest from the GitHub:
10
15
 
11
16
  ``` sh
12
17
  $ pip install -U git+https://github.com/sky1ove/kdock.git
@@ -0,0 +1 @@
1
+ __version__ = "0.0.4"
@@ -42,6 +42,10 @@ d = { 'settings': { 'branch': 'main',
42
42
  'kdock/af3/protein_pairs.py'),
43
43
  'kdock.af3.protein_pairs.get_protein_subjson': ( 'af3/protein_pairs.html#get_protein_subjson',
44
44
  'kdock/af3/protein_pairs.py')},
45
+ 'kdock.boltz.dock': { 'kdock.boltz.dock.plot_scatter_spearman': ( 'core/boltz/dock.html#plot_scatter_spearman',
46
+ 'kdock/boltz/dock.py'),
47
+ 'kdock.boltz.dock.prepare_boltz': ('core/boltz/dock.html#prepare_boltz', 'kdock/boltz/dock.py'),
48
+ 'kdock.boltz.dock.run_boltz': ('core/boltz/dock.html#run_boltz', 'kdock/boltz/dock.py')},
45
49
  'kdock.core.data': { 'kdock.core.data.Collins': ('core/data.html#collins', 'kdock/core/data.py'),
46
50
  'kdock.core.data.Collins.get_antibiotics_2k': ( 'core/data.html#collins.get_antibiotics_2k',
47
51
  'kdock/core/data.py'),
@@ -54,8 +58,10 @@ d = { 'settings': { 'branch': 'main',
54
58
  'kdock.core.data.Kras.get_mirati_g12d_raw': ( 'core/data.html#kras.get_mirati_g12d_raw',
55
59
  'kdock/core/data.py'),
56
60
  'kdock.core.data.Kras.get_seq': ('core/data.html#kras.get_seq', 'kdock/core/data.py'),
57
- 'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py')},
61
+ 'kdock.core.data.fetch_csv': ('core/data.html#fetch_csv', 'kdock/core/data.py'),
62
+ 'kdock.core.data.name2smi': ('core/data.html#name2smi', 'kdock/core/data.py')},
58
63
  'kdock.core.ligand': { 'kdock.core.ligand.compress_fp': ('core/ligand.html#compress_fp', 'kdock/core/ligand.py'),
64
+ 'kdock.core.ligand.contain_acrylamide': ('core/ligand.html#contain_acrylamide', 'kdock/core/ligand.py'),
59
65
  'kdock.core.ligand.get_fp': ('core/ligand.html#get_fp', 'kdock/core/ligand.py'),
60
66
  'kdock.core.ligand.get_rdkit': ('core/ligand.html#get_rdkit', 'kdock/core/ligand.py'),
61
67
  'kdock.core.ligand.get_rdkit_3d': ('core/ligand.html#get_rdkit_3d', 'kdock/core/ligand.py'),
@@ -1,15 +1,15 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/03_analyze.ipynb.
2
2
 
3
- # %% auto 0
3
+ # %% auto #0
4
4
  __all__ = ['read_summary_json', 'get_summary_df', 'process_summary_df', 'get_top_cases', 'get_3d_report', 'get_report',
5
5
  'copy_file']
6
6
 
7
- # %% ../../nbs/af3/03_analyze.ipynb 3
7
+ # %% ../../nbs/af3/03_analyze.ipynb #bfdd4fae-37c7-4835-afb1-524e5a731add
8
8
  import json, shutil, pandas as pd
9
9
  from pathlib import Path
10
10
  import plotly.graph_objects as go
11
11
 
12
- # %% ../../nbs/af3/03_analyze.ipynb 5
12
+ # %% ../../nbs/af3/03_analyze.ipynb #7b982142-f161-414b-b893-56234282cfb7
13
13
  def read_summary_json(json_path):
14
14
  "Read json file to dictionary"
15
15
  json_path = Path(json_path)
@@ -29,7 +29,7 @@ def read_summary_json(json_path):
29
29
  row[key] = value
30
30
  return row
31
31
 
32
- # %% ../../nbs/af3/03_analyze.ipynb 7
32
+ # %% ../../nbs/af3/03_analyze.ipynb #6efc38e3-f8d8-4c3c-b068-f5a37a4af057
33
33
  def get_summary_df(output_dir):
34
34
  "Pack the summary json from the output folder to the df"
35
35
 
@@ -37,7 +37,7 @@ def get_summary_df(output_dir):
37
37
  print(len(path_list),'summary_confidences.json files detected')
38
38
  return pd.DataFrame(list(map(read_summary_json,path_list)))
39
39
 
40
- # %% ../../nbs/af3/03_analyze.ipynb 10
40
+ # %% ../../nbs/af3/03_analyze.ipynb #2ae7e00d-f285-4fc1-a40c-af0f64d74376
41
41
  def process_summary_df(df,generate_report=False):
42
42
  "Post process the json-converted pandas df; remove redundant columns; available for pairs"
43
43
 
@@ -90,7 +90,7 @@ def process_summary_df(df,generate_report=False):
90
90
 
91
91
  return df
92
92
 
93
- # %% ../../nbs/af3/03_analyze.ipynb 12
93
+ # %% ../../nbs/af3/03_analyze.ipynb #e1af07e2-b86a-492d-b8d7-0fd2f8d90779
94
94
  def get_top_cases(df,n=30):
95
95
  "Get top cases from the metric"
96
96
  idxs = set()
@@ -109,7 +109,7 @@ def get_top_cases(df,n=30):
109
109
 
110
110
  return list(idxs)
111
111
 
112
- # %% ../../nbs/af3/03_analyze.ipynb 14
112
+ # %% ../../nbs/af3/03_analyze.ipynb #89900087-ff2c-48e3-bbe7-3077de2d1f32
113
113
  def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',save_dir='af_report'):
114
114
  "Generate 3d plot html file given case index and x, y, z colname"
115
115
  annotation = df.index.where(df.index.isin(index_list),'').str.split('_').str[1]
@@ -140,7 +140,7 @@ def get_3d_report(df,index_list, x='iptm',y='ptm',z='chain_pair_pae_min_add',sav
140
140
  fig.write_html(Path(save_dir)/'3d_scatter_plot.html',full_html=True)
141
141
  print('Exported the html to 3d_scatter_plot.html')
142
142
 
143
- # %% ../../nbs/af3/03_analyze.ipynb 16
143
+ # %% ../../nbs/af3/03_analyze.ipynb #02cab7f4-a581-43fb-a0b9-6df5ce6d1d08
144
144
  def get_report(out_dir,save_dir='af_report'):
145
145
  "Generate summary report based on summary_confidences file; return summary df and top cases"
146
146
  out = get_summary_df(out_dir)
@@ -153,7 +153,7 @@ def get_report(out_dir,save_dir='af_report'):
153
153
 
154
154
  return out, top_cases
155
155
 
156
- # %% ../../nbs/af3/03_analyze.ipynb 18
156
+ # %% ../../nbs/af3/03_analyze.ipynb #11319fd1-0bee-4e8c-b224-9eed52b8cbb2
157
157
  def copy_file(idx_name, source_dir, dest_dir):
158
158
  "Copy all model cif generated by AF3 to the new dest folder"
159
159
  source_path = Path(source_dir)/f"{idx_name}/{idx_name}_model.cif"
@@ -1,12 +1,12 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/01_docker.ipynb.
2
2
 
3
- # %% auto 0
3
+ # %% auto #0
4
4
  __all__ = ['get_docker_command', 'docker_single_full', 'docker_multi_full', 'docker_multi_msa', 'docker_multi_infer']
5
5
 
6
- # %% ../../nbs/af3/01_docker.ipynb 3
6
+ # %% ../../nbs/af3/01_docker.ipynb #57b6887a-a451-462e-9452-d850d3028317
7
7
  from fastcore.meta import delegates
8
8
 
9
- # %% ../../nbs/af3/01_docker.ipynb 6
9
+ # %% ../../nbs/af3/01_docker.ipynb #10f9c68f-0374-4cb2-b10e-65bb0107844f
10
10
  def get_docker_command(
11
11
  input_dir="af_input", # Directory for input data
12
12
  output_dir="af_output", # Directory for output results
@@ -80,7 +80,7 @@ def get_docker_command(
80
80
  docker_command = "\n".join(cmd_parts)
81
81
  print(docker_command)
82
82
 
83
- # %% ../../nbs/af3/01_docker.ipynb 12
83
+ # %% ../../nbs/af3/01_docker.ipynb #5e5c9de1-0b4b-433f-90a8-02fa755aafc3
84
84
  @delegates(get_docker_command)
85
85
  def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
86
86
  "Single json task with full pipeline."
@@ -90,7 +90,7 @@ def docker_single_full(json_path,output_dir,cache_dir=False,**kwargs):
90
90
  cache_dir=cache_dir,
91
91
  **kwargs)
92
92
 
93
- # %% ../../nbs/af3/01_docker.ipynb 16
93
+ # %% ../../nbs/af3/01_docker.ipynb #996f65a3-64ee-4288-aed7-3a0acba73175
94
94
  @delegates(get_docker_command)
95
95
  def docker_multi_full(input_dir,output_dir,**kwargs):
96
96
  "Folder of json as input with full pipeline."
@@ -99,7 +99,7 @@ def docker_multi_full(input_dir,output_dir,**kwargs):
99
99
  output_dir=output_dir,
100
100
  **kwargs)
101
101
 
102
- # %% ../../nbs/af3/01_docker.ipynb 19
102
+ # %% ../../nbs/af3/01_docker.ipynb #7512a43c-28c2-41f8-b4c5-4ddf1e56f716
103
103
  @delegates(get_docker_command)
104
104
  def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
105
105
  "MSA search only, without structure inference; CPU only."
@@ -109,7 +109,7 @@ def docker_multi_msa(input_dir,output_dir,search_only=True,**kwargs):
109
109
  search_only=search_only,
110
110
  **kwargs)
111
111
 
112
- # %% ../../nbs/af3/01_docker.ipynb 22
112
+ # %% ../../nbs/af3/01_docker.ipynb #403f1d8f-6055-486a-a0f9-bc4b2ca8feaa
113
113
  @delegates(get_docker_command)
114
114
  def docker_multi_infer(input_dir,output_dir,skip_search=True,**kwargs):
115
115
  "Infer only with pre-calculated MSA; GPU is needed."
@@ -1,10 +1,10 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/00_json.ipynb.
2
2
 
3
- # %% auto 0
3
+ # %% auto #0
4
4
  __all__ = ['dump_json', 'get_protein_json', 'read_json', 'get_protein_smiles_json', 'get_protein_ccdcode_json',
5
5
  'assign_atom_names_from_graph', 'mol_to_ccd_text', 'sdf2ccd', 'get_protein_ccd_json', 'split_nfolder']
6
6
 
7
- # %% ../../nbs/af3/00_json.ipynb 2
7
+ # %% ../../nbs/af3/00_json.ipynb #1d0467e3-9fcb-4e07-a59f-58405b640950
8
8
  import re, shutil, json, pandas as pd, numpy as np
9
9
  from pathlib import Path
10
10
 
@@ -14,13 +14,13 @@ from rdkit import Chem
14
14
 
15
15
  from Bio.PDB import PDBParser
16
16
 
17
- # %% ../../nbs/af3/00_json.ipynb 4
17
+ # %% ../../nbs/af3/00_json.ipynb #62063f37-32aa-4a12-8e57-29ef4fff1413
18
18
  def dump_json(data, save_path):
19
19
  "Save json data into a file"
20
20
  with open(save_path,'w') as f:
21
21
  json.dump(data,f,indent=4)
22
22
 
23
- # %% ../../nbs/af3/00_json.ipynb 5
23
+ # %% ../../nbs/af3/00_json.ipynb #14d9fdb5-7e05-47f6-b596-9d751f9f5618
24
24
  def get_protein_json(name, # job name
25
25
  seq, # aa sequence
26
26
  save_path=None, # .json
@@ -48,13 +48,13 @@ def get_protein_json(name, # job name
48
48
  dump_json(json_data,save_path)
49
49
  return json_data
50
50
 
51
- # %% ../../nbs/af3/00_json.ipynb 9
51
+ # %% ../../nbs/af3/00_json.ipynb #9bb34b3b-e90f-4112-a947-aee8a8610335
52
52
  def read_json(file_path):
53
53
  with open(file_path,'r') as f:
54
54
  data = json.load(f)
55
55
  return data
56
56
 
57
- # %% ../../nbs/af3/00_json.ipynb 11
57
+ # %% ../../nbs/af3/00_json.ipynb #b9490613-30cd-4cf9-93a7-0e53a6645932
58
58
  def get_protein_smiles_json(smi_id:str,
59
59
  SMILES:str,
60
60
  protein_json, # json type
@@ -88,7 +88,7 @@ def get_protein_smiles_json(smi_id:str,
88
88
  dump_json(json_data,save_path)
89
89
  return json_data
90
90
 
91
- # %% ../../nbs/af3/00_json.ipynb 18
91
+ # %% ../../nbs/af3/00_json.ipynb #300bb051-4246-400e-bdc3-dd43e4da47d1
92
92
  def get_protein_ccdcode_json(protein_json, # dict with protein sequence
93
93
  ccd_code, # str or list of str
94
94
  job_id: str, # job/task ID
@@ -128,7 +128,7 @@ def get_protein_ccdcode_json(protein_json, # dict with protein sequence
128
128
 
129
129
  return json_data
130
130
 
131
- # %% ../../nbs/af3/00_json.ipynb 22
131
+ # %% ../../nbs/af3/00_json.ipynb #e850527d-826d-4c7f-9a66-bf307c44b809
132
132
  # Mapping bond types to mmCIF-compatible values
133
133
  _RDKIT_BOND_TYPE_TO_MMCIF = {
134
134
  rd_chem.BondType.SINGLE: 'SING',
@@ -208,7 +208,7 @@ def mol_to_ccd_text(mol, component_id, pdbx_smiles=None, include_hydrogens=False
208
208
 
209
209
  return "\n".join(lines)
210
210
 
211
- # %% ../../nbs/af3/00_json.ipynb 23
211
+ # %% ../../nbs/af3/00_json.ipynb #c099dc24-d29e-49a1-9cbb-d14e7bc2a3de
212
212
  def sdf2ccd(sdf_path,
213
213
  CCD_name='lig-1', # do not use '_'; use as less letter as possible, 'lig-any' leads to extra ligands
214
214
  ):
@@ -218,7 +218,7 @@ def sdf2ccd(sdf_path,
218
218
  mol = supplier[0] # Get the first molecule
219
219
  return mol_to_ccd_text(mol,CCD_name)
220
220
 
221
- # %% ../../nbs/af3/00_json.ipynb 26
221
+ # %% ../../nbs/af3/00_json.ipynb #3953bc6b-3bc0-40e5-a87b-08eb640b8fcf
222
222
  def get_protein_ccd_json(protein_json, # dict with protein sequence
223
223
  rec_residue_num:int, # 1-indexed, for bondedAtomPairs, e.g., ["A", 145, "SG"]
224
224
  rec_atom_id:str, # for bondedAtomPairs, e.g., ["A", 145, "SG"]
@@ -262,7 +262,7 @@ def get_protein_ccd_json(protein_json, # dict with protein sequence
262
262
 
263
263
  return json_data
264
264
 
265
- # %% ../../nbs/af3/00_json.ipynb 30
265
+ # %% ../../nbs/af3/00_json.ipynb #c7f16eec-082a-4233-b3ed-b96d58ecedb4
266
266
  def split_nfolder(folder_dir,
267
267
  n=4):
268
268
  "Move json files from a folder into subfolders (folder_0, folder_1, ..., folder_N)."
@@ -1,10 +1,10 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/af3/02_protein_pairs.ipynb.
2
2
 
3
- # %% auto 0
3
+ # %% auto #0
4
4
  __all__ = ['get_colabfold_cmd', 'copy_a3m', 'a3m_to_seq', 'get_protein_subjson', 'dump_json_folder', 'get_multi_protein_json',
5
5
  'generate_pair_df']
6
6
 
7
- # %% ../../nbs/af3/02_protein_pairs.ipynb 4
7
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #92000c0e-6470-423c-9c55-9972a341282b
8
8
  import os, json, shutil, pandas as pd
9
9
  from tqdm import tqdm
10
10
  from itertools import combinations
@@ -12,12 +12,12 @@ from pathlib import Path
12
12
  from .json import *
13
13
  from .docker import *
14
14
 
15
- # %% ../../nbs/af3/02_protein_pairs.ipynb 9
15
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #3f3ae31b-287e-4810-9e10-db969c3fcc6b
16
16
  def get_colabfold_cmd(csv_path,project_name):
17
17
  print('Run below in terminal:')
18
18
  print(f'\n colabfold_batch {csv_path} msa_{project_name} --msa-only')
19
19
 
20
- # %% ../../nbs/af3/02_protein_pairs.ipynb 13
20
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #9fb90cd0-3260-4dd1-a169-340a75633ab0
21
21
  def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
22
22
  dest_dir: str, # Path to the destination directory where files will be copied
23
23
  ):
@@ -33,12 +33,12 @@ def copy_a3m(a3m_dir: str, # Path to the source directory containing .a3m files.
33
33
 
34
34
  print(f"Copied {len(files)} a3m files from {a3m_dir} to {dest_dir}")
35
35
 
36
- # %% ../../nbs/af3/02_protein_pairs.ipynb 17
36
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #def3928f-c895-479f-9e88-093fdf3892da
37
37
  def a3m_to_seq(file_path:Path):
38
38
  "Get protein sequence from a3m file"
39
39
  return file_path.read_text().splitlines()[2] # protein sequence is located on line 2
40
40
 
41
- # %% ../../nbs/af3/02_protein_pairs.ipynb 19
41
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #03da4db8-60b5-409e-889a-f44b2fb27705
42
42
  def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
43
43
  "Get subjson (protein part) with colabfold unpairedMSA .a3m path"
44
44
  file_path = Path(a3m_dir)/f"{gene_name}.a3m"
@@ -55,13 +55,13 @@ def get_protein_subjson(gene_name, a3m_dir=".",idx = 'A',run_template=True):
55
55
 
56
56
  return json_data
57
57
 
58
- # %% ../../nbs/af3/02_protein_pairs.ipynb 22
58
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #76c918a2-630a-48ac-a682-501cfe4a522f
59
59
  def dump_json_folder(json_data, folder):
60
60
  "Save json under a folder"
61
61
  file_path = Path(folder)/f"{json_data['name']}.json"
62
62
  with open(file_path,'w') as f: json.dump(json_data,f,indent=4)
63
63
 
64
- # %% ../../nbs/af3/02_protein_pairs.ipynb 23
64
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #8e97b4ea-17ec-48dd-bca0-c2f11763205a
65
65
  def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None):
66
66
  'Get json of multiple proteins, with unpaired MSA path indicated (from colabfold MSA)'
67
67
  sequences = []
@@ -82,7 +82,7 @@ def get_multi_protein_json(gene_list,a3m_dir,run_template=True,save_folder=None)
82
82
  dump_json_folder(json_data,save_folder)
83
83
  return json_data
84
84
 
85
- # %% ../../nbs/af3/02_protein_pairs.ipynb 27
85
+ # %% ../../nbs/af3/02_protein_pairs.ipynb #8cc62d64-4643-49a3-a7ed-be7231abb0b9
86
86
  def generate_pair_df(gene_list,self_pair=True):
87
87
  "Unique pair genes in a gene list"
88
88
  pairs = list(combinations(gene_list, 2))
@@ -0,0 +1,107 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/boltz/00_dock.ipynb.
2
+
3
+ # %% auto #0
4
+ __all__ = ['prepare_boltz', 'run_boltz', 'plot_scatter_spearman']
5
+
6
+ # %% ../../nbs/core/boltz/00_dock.ipynb #3b60daa9
7
+ # basics
8
+ import subprocess
9
+ from pathlib import Path
10
+
11
+ # %% ../../nbs/core/boltz/00_dock.ipynb #71a6ad95
12
+ def prepare_boltz(seq: str, # Amino acid sequence of the protein the protein
13
+ smiles: str, # SMILES string of the ligand
14
+ fname: str, # Output filename (should end with .yaml)
15
+ ):
16
+ "Create a YAML file for protein-ligand affinity prediction."
17
+ yaml_content = f"""version: 1
18
+ sequences:
19
+ - protein:
20
+ id: "A"
21
+ sequence: "{seq}"
22
+ - ligand:
23
+ id: "B"
24
+ smiles: "{smiles}"
25
+ properties:
26
+ - affinity:
27
+ binder: "B"
28
+ """
29
+ with open(fname, "w") as f:
30
+ f.write(yaml_content)
31
+
32
+ # %% ../../nbs/core/boltz/00_dock.ipynb #486e2576
33
+ def run_boltz(file_list:list[Path], # list of .yaml path in Pathlib object
34
+ api_key):
35
+
36
+ "Run Boltz-Lab predictions for a list of YAML files."
37
+
38
+ # config key
39
+ subprocess.run(
40
+ ["boltz-lab", "config", "--api-key", api_key.strip()],
41
+ check=True
42
+ )
43
+ failed = []
44
+
45
+ for file in file_list:
46
+ print(f"\nSubmitting: {str(file)}")
47
+
48
+ result = subprocess.run(
49
+ ["boltz-lab", "predict", str(file),
50
+ "--no-wait", # for batch run, so no need to wait the results til the next
51
+ "--name",file.stem, # job name appeared in boltz
52
+ ],
53
+ capture_output=True,
54
+ text=True,
55
+ )
56
+
57
+ if result.returncode != 0: failed.append(file.name)
58
+
59
+ print(result.stdout)
60
+
61
+
62
+ print("\n======== SUMMARY ========")
63
+ print(f"Total: {len(file_list)}")
64
+ print(f"Failed: {len(failed)}")
65
+
66
+ return failed
67
+
68
+
69
+ # %% ../../nbs/core/boltz/00_dock.ipynb #45b0e6aa
70
+ import matplotlib.pyplot as plt
71
+ import seaborn as sns, numpy as np
72
+ from scipy.stats import spearmanr
73
+
74
+ # %% ../../nbs/core/boltz/00_dock.ipynb #eec74db7
75
+ def plot_scatter_spearman(data, x, y, ax=None):
76
+ """
77
+ Plot scatter + Spearman correlation and p-value annotation.
78
+ """
79
+ if ax is None:
80
+ ax = plt.gca()
81
+
82
+ # Drop NA
83
+ sub = data[[x, y]].dropna()
84
+ x_vals = sub[x]
85
+ y_vals = sub[y]
86
+
87
+ # Compute Spearman
88
+ rho, p = spearmanr(x_vals, y_vals)
89
+
90
+ # Plot
91
+ sns.scatterplot(data=sub, x=x, y=y, ax=ax)
92
+
93
+ # Annotate
94
+ text = f"Spearman ρ = {rho:.3f}\np = {p:.2e}"
95
+ ax.text(
96
+ 0.98, 0.98, # x, y in axes fraction
97
+ text,
98
+ transform=ax.transAxes,
99
+ ha='right', # horizontal align
100
+ va='top', # vertical align
101
+ fontsize=11,
102
+ bbox=dict(boxstyle="round", facecolor="white", alpha=0.7)
103
+ )
104
+
105
+ ax.set_title(f'{x} vs {y}')
106
+
107
+ return rho, p
@@ -1,22 +1,32 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/core/00_data.ipynb.
2
2
 
3
- # %% auto 0
4
- __all__ = ['BASE_URL', 'fetch_csv', 'Collins', 'Kras']
3
+ # %% auto #0
4
+ __all__ = ['BASE_URL', 'name2smi', 'fetch_csv', 'Collins', 'Kras']
5
5
 
6
- # %% ../../nbs/core/00_data.ipynb 3
6
+ # %% ../../nbs/core/00_data.ipynb #e61ba13c
7
7
  import pandas as pd
8
8
  import requests
9
9
  from functools import lru_cache
10
+ import pubchempy as pcp
10
11
 
11
- # %% ../../nbs/core/00_data.ipynb 7
12
+ # %% ../../nbs/core/00_data.ipynb #b97285b8
13
+ def name2smi(name):
14
+ "Given a compound name, get SMILES in PubChem database. "
15
+ compounds = pcp.get_compounds(name, 'name')
16
+ if not compounds: return None
17
+ # get the first rank
18
+ c = compounds[0]
19
+ return c.smiles # can also return c.cid compound ID
20
+
21
+ # %% ../../nbs/core/00_data.ipynb #209ffb82
12
22
  BASE_URL = "https://github.com/sky1ove/kdock/raw/main/"
13
23
 
14
- # %% ../../nbs/core/00_data.ipynb 8
24
+ # %% ../../nbs/core/00_data.ipynb #d7e2b62a
15
25
  @lru_cache()
16
26
  def fetch_csv(url):
17
27
  return pd.read_csv(url)
18
28
 
19
- # %% ../../nbs/core/00_data.ipynb 9
29
+ # %% ../../nbs/core/00_data.ipynb #1f1a2239
20
30
  class Collins:
21
31
  "A class of loading compound datasets from Collins lab."
22
32
 
@@ -45,7 +55,7 @@ class Collins:
45
55
  """
46
56
  return fetch_csv(BASE_URL + "dataset/antibiotics_enzyme.csv")
47
57
 
48
- # %% ../../nbs/core/00_data.ipynb 19
58
+ # %% ../../nbs/core/00_data.ipynb #16424d50
49
59
  class Kras:
50
60
  "A class of fetching various KRAS datasets."
51
61
  @staticmethod