biobb-io 4.2.0__py3-none-any.whl → 5.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biobb_io/__init__.py +3 -1
- biobb_io/api/__init__.py +31 -1
- biobb_io/api/alphafold.py +53 -22
- biobb_io/api/api_binding_site.py +59 -23
- biobb_io/api/canonical_fasta.py +55 -23
- biobb_io/api/common.py +177 -114
- biobb_io/api/ideal_sdf.py +53 -23
- biobb_io/api/ligand.py +53 -23
- biobb_io/api/memprotmd_sim.py +53 -22
- biobb_io/api/memprotmd_sim_list.py +44 -18
- biobb_io/api/memprotmd_sim_search.py +48 -21
- biobb_io/api/mmcif.py +53 -23
- biobb_io/api/pdb.py +57 -25
- biobb_io/api/pdb_cluster_zip.py +70 -32
- biobb_io/api/pdb_variants.py +108 -40
- biobb_io/api/structure_info.py +54 -22
- biobb_io/py.typed +0 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.1.dist-info}/METADATA +15 -16
- biobb_io-5.0.1.dist-info/RECORD +24 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.1.dist-info}/WHEEL +1 -1
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.1.dist-info}/entry_points.txt +0 -1
- biobb_io/api/drugbank.py +0 -121
- biobb_io-4.2.0.dist-info/RECORD +0 -24
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.1.dist-info}/LICENSE +0 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.1.dist-info}/top_level.txt +0 -0
biobb_io/api/common.py
CHANGED
|
@@ -1,39 +1,50 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""Common functions for package api"""
|
|
2
|
+
|
|
3
3
|
import json
|
|
4
|
-
import
|
|
4
|
+
import os
|
|
5
5
|
import re
|
|
6
6
|
import urllib.request
|
|
7
7
|
from pathlib import Path, PurePath
|
|
8
|
+
|
|
9
|
+
import requests
|
|
8
10
|
from biobb_common.tools import file_utils as fu
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
def check_output_path(path, argument, optional, out_log, classname):
|
|
12
|
-
"""
|
|
13
|
+
def check_output_path(path, argument, optional, out_log, classname) -> str:
|
|
14
|
+
"""Checks output file"""
|
|
13
15
|
if optional and not path:
|
|
14
|
-
return
|
|
16
|
+
return ""
|
|
15
17
|
if PurePath(path).parent and not Path(PurePath(path).parent).exists():
|
|
16
|
-
fu.log(classname +
|
|
17
|
-
raise SystemExit(classname +
|
|
18
|
+
fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log)
|
|
19
|
+
raise SystemExit(classname + ": Unexisting %s folder" % argument)
|
|
18
20
|
file_extension = PurePath(path).suffix
|
|
19
21
|
if not is_valid_file(file_extension[1:], argument):
|
|
20
|
-
fu.log(
|
|
21
|
-
|
|
22
|
+
fu.log(
|
|
23
|
+
classname
|
|
24
|
+
+ ": Format %s in %s file is not compatible"
|
|
25
|
+
% (file_extension[1:], argument),
|
|
26
|
+
out_log,
|
|
27
|
+
)
|
|
28
|
+
raise SystemExit(
|
|
29
|
+
classname
|
|
30
|
+
+ ": Format %s in %s file is not compatible"
|
|
31
|
+
% (file_extension[1:], argument)
|
|
32
|
+
)
|
|
22
33
|
return path
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
def is_valid_file(ext, argument):
|
|
26
|
-
"""
|
|
37
|
+
"""Checks if file format is compatible"""
|
|
27
38
|
formats = {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
39
|
+
"output_sdf_path": ["sdf"],
|
|
40
|
+
"output_pdb_path": ["pdb"],
|
|
41
|
+
"output_simulations": ["json"],
|
|
42
|
+
"output_simulation": ["zip"],
|
|
43
|
+
"output_pdb_zip_path": ["zip"],
|
|
44
|
+
"output_mutations_list_txt": ["txt"],
|
|
45
|
+
"output_json_path": ["json"],
|
|
46
|
+
"output_fasta_path": ["fasta"],
|
|
47
|
+
"output_mmcif_path": ["mmcif", "cif"],
|
|
37
48
|
}
|
|
38
49
|
return ext in formats[argument]
|
|
39
50
|
|
|
@@ -44,15 +55,15 @@ def download_pdb(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
44
55
|
String: Content of the pdb file.
|
|
45
56
|
"""
|
|
46
57
|
|
|
47
|
-
if api_id ==
|
|
58
|
+
if api_id == "mmb":
|
|
48
59
|
url = "https://mmb.irbbarcelona.org/api/pdb/" + pdb_code + "/coords/?"
|
|
49
|
-
elif api_id ==
|
|
60
|
+
elif api_id == "pdb":
|
|
50
61
|
url = "https://files.rcsb.org/download/" + pdb_code + ".pdb"
|
|
51
|
-
elif api_id ==
|
|
62
|
+
elif api_id == "pdbe":
|
|
52
63
|
url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent"
|
|
53
64
|
|
|
54
65
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
55
|
-
return requests.get(url).content.decode(
|
|
66
|
+
return requests.get(url).content.decode("utf-8")
|
|
56
67
|
|
|
57
68
|
|
|
58
69
|
def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
|
|
@@ -66,11 +77,11 @@ def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
|
|
|
66
77
|
fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log)
|
|
67
78
|
|
|
68
79
|
r = requests.get(url)
|
|
69
|
-
if
|
|
70
|
-
fu.log(classname +
|
|
71
|
-
raise SystemExit(classname +
|
|
80
|
+
if r.status_code == 404:
|
|
81
|
+
fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log)
|
|
82
|
+
raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code))
|
|
72
83
|
|
|
73
|
-
return r.content.decode(
|
|
84
|
+
return r.content.decode("utf-8")
|
|
74
85
|
|
|
75
86
|
|
|
76
87
|
def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
|
|
@@ -79,15 +90,15 @@ def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
79
90
|
String: Content of the mmcif file.
|
|
80
91
|
"""
|
|
81
92
|
|
|
82
|
-
if api_id ==
|
|
93
|
+
if api_id == "mmb":
|
|
83
94
|
url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".cif"
|
|
84
|
-
elif api_id ==
|
|
95
|
+
elif api_id == "pdb":
|
|
85
96
|
url = "https://files.rcsb.org/download/" + pdb_code + ".cif"
|
|
86
|
-
elif api_id ==
|
|
97
|
+
elif api_id == "pdbe":
|
|
87
98
|
url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif"
|
|
88
99
|
|
|
89
100
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
90
|
-
return requests.get(url, verify=
|
|
101
|
+
return requests.get(url, verify=True).content.decode("utf-8")
|
|
91
102
|
|
|
92
103
|
|
|
93
104
|
def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
|
|
@@ -96,12 +107,16 @@ def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
|
|
|
96
107
|
String: Content of the ligand file.
|
|
97
108
|
"""
|
|
98
109
|
|
|
99
|
-
if api_id ==
|
|
110
|
+
if api_id == "mmb":
|
|
100
111
|
url = "http://mmb.irbbarcelona.org/api/pdbMonomer/" + ligand_code.lower()
|
|
101
|
-
text = requests.get(url, verify=
|
|
102
|
-
elif api_id ==
|
|
103
|
-
url =
|
|
104
|
-
|
|
112
|
+
text = requests.get(url, verify=True).content.decode("utf-8")
|
|
113
|
+
elif api_id == "pdbe":
|
|
114
|
+
url = (
|
|
115
|
+
"https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/"
|
|
116
|
+
+ ligand_code.upper()
|
|
117
|
+
+ "_ideal.pdb"
|
|
118
|
+
)
|
|
119
|
+
text = urllib.request.urlopen(url).read().decode("utf-8")
|
|
105
120
|
|
|
106
121
|
fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
|
|
107
122
|
|
|
@@ -117,37 +132,28 @@ def download_fasta(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
117
132
|
String: Content of the fasta file.
|
|
118
133
|
"""
|
|
119
134
|
|
|
120
|
-
if api_id ==
|
|
135
|
+
if api_id == "mmb":
|
|
121
136
|
url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".fasta"
|
|
122
|
-
elif api_id ==
|
|
137
|
+
elif api_id == "pdb":
|
|
123
138
|
url = "https://www.rcsb.org/fasta/entry/" + pdb_code
|
|
124
|
-
elif api_id ==
|
|
139
|
+
elif api_id == "pdbe":
|
|
125
140
|
url = "https://www.ebi.ac.uk/pdbe/entry/pdb/" + pdb_code + "/fasta"
|
|
126
141
|
|
|
127
142
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
128
|
-
return requests.get(url, verify=
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def download_drugbank(drugbank_id, url="https://www.drugbank.ca/structures/small_molecule_drugs/%s.sdf?type=3d", out_log=None, global_log=None):
|
|
132
|
-
"""
|
|
133
|
-
Returns:
|
|
134
|
-
String: Content of the component file.
|
|
135
|
-
"""
|
|
136
|
-
url = (url % drugbank_id)
|
|
137
|
-
|
|
138
|
-
fu.log("Downloading %s from: %s" % (drugbank_id, url), out_log, global_log)
|
|
139
|
-
|
|
140
|
-
text = requests.get(url, verify=False).content.decode('utf-8')
|
|
141
|
-
|
|
142
|
-
return text
|
|
143
|
+
return requests.get(url, verify=True).content.decode("utf-8")
|
|
143
144
|
|
|
144
145
|
|
|
145
|
-
def download_binding_site(
|
|
146
|
+
def download_binding_site(
|
|
147
|
+
pdb_code,
|
|
148
|
+
url="https://www.ebi.ac.uk/pdbe/api/pdb/entry/binding_sites/%s",
|
|
149
|
+
out_log=None,
|
|
150
|
+
global_log=None,
|
|
151
|
+
):
|
|
146
152
|
"""
|
|
147
153
|
Returns:
|
|
148
154
|
String: Content of the component file.
|
|
149
155
|
"""
|
|
150
|
-
url =
|
|
156
|
+
url = url % pdb_code
|
|
151
157
|
|
|
152
158
|
fu.log("Getting binding sites from: %s" % (url), out_log, global_log)
|
|
153
159
|
|
|
@@ -165,24 +171,37 @@ def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None):
|
|
|
165
171
|
String: Content of the ideal sdf file.
|
|
166
172
|
"""
|
|
167
173
|
|
|
168
|
-
if api_id ==
|
|
169
|
-
url =
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
+
if api_id == "pdb":
|
|
175
|
+
url = (
|
|
176
|
+
"https://files.rcsb.org/ligands/download/"
|
|
177
|
+
+ ligand_code.upper()
|
|
178
|
+
+ "_ideal.sdf"
|
|
179
|
+
)
|
|
180
|
+
text = requests.get(url, verify=True).content.decode("utf-8")
|
|
181
|
+
elif api_id == "pdbe":
|
|
182
|
+
url = (
|
|
183
|
+
"https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/"
|
|
184
|
+
+ ligand_code.upper()
|
|
185
|
+
+ "_ideal.sdf"
|
|
186
|
+
)
|
|
187
|
+
text = urllib.request.urlopen(url).read().decode("utf-8")
|
|
174
188
|
|
|
175
189
|
fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
|
|
176
190
|
|
|
177
191
|
return text
|
|
178
192
|
|
|
179
193
|
|
|
180
|
-
def download_str_info(
|
|
194
|
+
def download_str_info(
|
|
195
|
+
pdb_code,
|
|
196
|
+
url="http://mmb.irbbarcelona.org/api/pdb/%s.json",
|
|
197
|
+
out_log=None,
|
|
198
|
+
global_log=None,
|
|
199
|
+
):
|
|
181
200
|
"""
|
|
182
201
|
Returns:
|
|
183
202
|
String: Content of the JSON file.
|
|
184
203
|
"""
|
|
185
|
-
url =
|
|
204
|
+
url = url % pdb_code
|
|
186
205
|
|
|
187
206
|
fu.log("Getting structure info from: %s" % (url), out_log, global_log)
|
|
188
207
|
|
|
@@ -195,11 +214,15 @@ def download_str_info(pdb_code, url="http://mmb.irbbarcelona.org/api/pdb/%s.json
|
|
|
195
214
|
|
|
196
215
|
|
|
197
216
|
def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None):
|
|
198
|
-
"""
|
|
217
|
+
"""Writes and filters a PDB"""
|
|
199
218
|
fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log)
|
|
200
|
-
with open(output_pdb_path,
|
|
219
|
+
with open(output_pdb_path, "w") as output_pdb_file:
|
|
201
220
|
if filt:
|
|
202
|
-
fu.log(
|
|
221
|
+
fu.log(
|
|
222
|
+
"Filtering lines NOT starting with one of these words: %s" % str(filt),
|
|
223
|
+
out_log,
|
|
224
|
+
global_log,
|
|
225
|
+
)
|
|
203
226
|
for line in pdb_string.splitlines(True):
|
|
204
227
|
if line.strip().split()[0][0:6] in filt:
|
|
205
228
|
output_pdb_file.write(line)
|
|
@@ -208,23 +231,23 @@ def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=N
|
|
|
208
231
|
|
|
209
232
|
|
|
210
233
|
def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None):
|
|
211
|
-
"""
|
|
234
|
+
"""Writes a mmcif"""
|
|
212
235
|
fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log)
|
|
213
|
-
with open(output_mmcif_path,
|
|
236
|
+
with open(output_mmcif_path, "w") as output_mmcif_file:
|
|
214
237
|
output_mmcif_file.write(mmcif_string)
|
|
215
238
|
|
|
216
239
|
|
|
217
240
|
def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None):
|
|
218
|
-
"""
|
|
241
|
+
"""Writes a FASTA"""
|
|
219
242
|
fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log)
|
|
220
|
-
with open(output_fasta_path,
|
|
243
|
+
with open(output_fasta_path, "w") as output_fasta_file:
|
|
221
244
|
output_fasta_file.write(fasta_string)
|
|
222
245
|
|
|
223
246
|
|
|
224
247
|
def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None):
|
|
225
|
-
"""
|
|
248
|
+
"""Writes a SDF"""
|
|
226
249
|
fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log)
|
|
227
|
-
with open(output_sdf_path,
|
|
250
|
+
with open(output_sdf_path, "w") as output_sdf_file:
|
|
228
251
|
output_sdf_file.write(sdf_string)
|
|
229
252
|
|
|
230
253
|
|
|
@@ -236,15 +259,32 @@ def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None):
|
|
|
236
259
|
url = "http://mmb.irbbarcelona.org/api/pdb/"
|
|
237
260
|
pdb_codes = set()
|
|
238
261
|
|
|
239
|
-
url = url+pdb_code.lower()+
|
|
240
|
-
cluster_list = json.loads(requests.get(url, verify=
|
|
262
|
+
url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json"
|
|
263
|
+
cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[
|
|
264
|
+
"clusterMembers"
|
|
265
|
+
]
|
|
241
266
|
for elem in cluster_list:
|
|
242
|
-
pdb_codes.add(elem[
|
|
267
|
+
pdb_codes.add(elem["_id"].lower())
|
|
243
268
|
|
|
244
269
|
if out_log:
|
|
245
|
-
out_log.info(
|
|
270
|
+
out_log.info(
|
|
271
|
+
"Cluster: "
|
|
272
|
+
+ str(cluster)
|
|
273
|
+
+ " of pdb_code: "
|
|
274
|
+
+ pdb_code
|
|
275
|
+
+ "\n List: "
|
|
276
|
+
+ str(pdb_codes)
|
|
277
|
+
)
|
|
246
278
|
if global_log:
|
|
247
|
-
global_log.info(
|
|
279
|
+
global_log.info(
|
|
280
|
+
fu.get_logs_prefix()
|
|
281
|
+
+ "Cluster: "
|
|
282
|
+
+ str(cluster)
|
|
283
|
+
+ " of pdb_code: "
|
|
284
|
+
+ pdb_code
|
|
285
|
+
+ "\n List: "
|
|
286
|
+
+ str(pdb_codes)
|
|
287
|
+
)
|
|
248
288
|
|
|
249
289
|
return pdb_codes
|
|
250
290
|
|
|
@@ -255,67 +295,84 @@ def get_uniprot(pdb_code, url, out_log=None, global_log=None):
|
|
|
255
295
|
Returns:
|
|
256
296
|
str: UNIPROT code.
|
|
257
297
|
"""
|
|
258
|
-
url_uniprot_id =
|
|
259
|
-
uniprot_id = requests.get(url_uniprot_id, verify=
|
|
298
|
+
url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id"
|
|
299
|
+
uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0]
|
|
260
300
|
|
|
261
301
|
if out_log:
|
|
262
|
-
out_log.info(
|
|
302
|
+
out_log.info(
|
|
303
|
+
"PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
|
|
304
|
+
)
|
|
263
305
|
if global_log:
|
|
264
|
-
global_log.info(
|
|
306
|
+
global_log.info(
|
|
307
|
+
"PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
|
|
308
|
+
)
|
|
265
309
|
|
|
266
310
|
return uniprot_id
|
|
267
311
|
|
|
268
312
|
|
|
269
|
-
def get_variants(
|
|
313
|
+
def get_variants(
|
|
314
|
+
uniprot_id, url="http://mmb.irbbarcelona.org/api", out_log=None, global_log=None
|
|
315
|
+
):
|
|
270
316
|
"""Returns the variants of the `uniprot_id` code.
|
|
271
317
|
|
|
272
318
|
Returns:
|
|
273
319
|
:obj:`list` of :obj:`str`: List of variants.
|
|
274
320
|
"""
|
|
275
|
-
url_uniprot_mut = (
|
|
276
|
-
|
|
321
|
+
url_uniprot_mut = (
|
|
322
|
+
url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar"
|
|
323
|
+
)
|
|
324
|
+
variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"]
|
|
277
325
|
variants = variants if variants else []
|
|
278
326
|
|
|
279
|
-
fu.log(
|
|
327
|
+
fu.log(
|
|
328
|
+
"Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id),
|
|
329
|
+
out_log,
|
|
330
|
+
global_log,
|
|
331
|
+
)
|
|
280
332
|
return variants if variants else []
|
|
281
333
|
|
|
282
334
|
|
|
283
335
|
def write_json(json_string, output_json_path, out_log=None, global_log=None):
|
|
284
|
-
"""
|
|
336
|
+
"""Writes a JSON"""
|
|
285
337
|
fu.log("Writting json to: %s" % (output_json_path), out_log, global_log)
|
|
286
|
-
with open(output_json_path,
|
|
338
|
+
with open(output_json_path, "w") as output_json_file:
|
|
287
339
|
output_json_file.write(json_string)
|
|
288
340
|
|
|
289
341
|
|
|
290
342
|
def get_memprotmd_sim_list(out_log=None, global_log=None):
|
|
291
|
-
"""
|
|
343
|
+
"""Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API"""
|
|
292
344
|
|
|
293
|
-
fu.log(
|
|
345
|
+
fu.log(
|
|
346
|
+
"Getting all available membrane-protein systems (simulations) from the MemProtMD REST API",
|
|
347
|
+
out_log,
|
|
348
|
+
global_log,
|
|
349
|
+
)
|
|
294
350
|
|
|
295
351
|
url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all"
|
|
296
352
|
json_obj = requests.post(url).json()
|
|
297
353
|
json_string = json.dumps(json_obj, indent=4)
|
|
298
354
|
|
|
299
|
-
fu.log(
|
|
355
|
+
fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log)
|
|
300
356
|
|
|
301
357
|
return json_string
|
|
302
358
|
|
|
303
359
|
|
|
304
360
|
def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None):
|
|
305
|
-
"""
|
|
361
|
+
"""Performs advanced searches in the MemProtMD DB using its REST API and a given keyword"""
|
|
306
362
|
|
|
307
|
-
fu.log(
|
|
363
|
+
fu.log(
|
|
364
|
+
"Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s"
|
|
365
|
+
% (collection_name, keyword),
|
|
366
|
+
out_log,
|
|
367
|
+
global_log,
|
|
368
|
+
)
|
|
308
369
|
|
|
309
370
|
url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced"
|
|
310
371
|
json_query = {
|
|
311
372
|
"collectionName": collection_name,
|
|
312
|
-
"query": {
|
|
313
|
-
|
|
314
|
-
},
|
|
315
|
-
"projection": {
|
|
316
|
-
"simulations": 1
|
|
317
|
-
},
|
|
318
|
-
"options": {}
|
|
373
|
+
"query": {"keywords": keyword},
|
|
374
|
+
"projection": {"simulations": 1},
|
|
375
|
+
"options": {},
|
|
319
376
|
}
|
|
320
377
|
|
|
321
378
|
json_obj = requests.post(url, json=json_query).json()
|
|
@@ -324,43 +381,49 @@ def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=
|
|
|
324
381
|
# get total number of simulation
|
|
325
382
|
list_kw = []
|
|
326
383
|
for sim_list in json_obj:
|
|
327
|
-
for sim in sim_list[
|
|
384
|
+
for sim in sim_list["simulations"]:
|
|
328
385
|
list_kw.append(sim)
|
|
329
386
|
|
|
330
|
-
fu.log(
|
|
387
|
+
fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log)
|
|
331
388
|
|
|
332
389
|
return json_string
|
|
333
390
|
|
|
334
391
|
|
|
335
392
|
def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None):
|
|
336
|
-
"""
|
|
393
|
+
"""Gets a single simulation from MemProtMD DB"""
|
|
337
394
|
|
|
338
|
-
fu.log(
|
|
395
|
+
fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log)
|
|
339
396
|
|
|
340
|
-
url =
|
|
397
|
+
url = (
|
|
398
|
+
"http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/"
|
|
399
|
+
+ pdb_code
|
|
400
|
+
+ "_default_dppc/files/run/at.zip"
|
|
401
|
+
)
|
|
341
402
|
response = requests.get(url)
|
|
342
403
|
|
|
343
|
-
open(output_file,
|
|
404
|
+
open(output_file, "wb").write(response.content)
|
|
344
405
|
|
|
345
406
|
fu.log("Saving output %s file" % (output_file), out_log, global_log)
|
|
346
407
|
|
|
347
408
|
|
|
348
409
|
def check_mandatory_property(property, name, out_log, classname):
|
|
349
|
-
"""
|
|
410
|
+
"""Checks mandatory properties"""
|
|
350
411
|
|
|
351
412
|
if not property:
|
|
352
|
-
fu.log(classname +
|
|
353
|
-
raise SystemExit(classname +
|
|
413
|
+
fu.log(classname + ": Unexisting %s property, exiting" % name, out_log)
|
|
414
|
+
raise SystemExit(classname + ": Unexisting %s property" % name)
|
|
354
415
|
return property
|
|
355
416
|
|
|
356
417
|
|
|
357
418
|
def check_uniprot_code(code, out_log, classname):
|
|
358
|
-
"""
|
|
419
|
+
"""Checks uniprot code"""
|
|
359
420
|
|
|
360
|
-
pattern = re.compile(
|
|
421
|
+
pattern = re.compile(
|
|
422
|
+
(r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
|
|
423
|
+
)
|
|
361
424
|
|
|
362
425
|
if not pattern.match(code):
|
|
363
|
-
fu.log(classname +
|
|
364
|
-
raise SystemExit(classname +
|
|
426
|
+
fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log)
|
|
427
|
+
raise SystemExit(classname + ": Incorrect uniprot code for %s" % code)
|
|
365
428
|
|
|
366
429
|
return True
|
biobb_io/api/ideal_sdf.py
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
|
|
3
3
|
"""Module containing the IdealSdf class and the command line interface."""
|
|
4
|
+
|
|
4
5
|
import argparse
|
|
5
|
-
from
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
6
8
|
from biobb_common.configuration import settings
|
|
9
|
+
from biobb_common.generic.biobb_object import BiobbObject
|
|
7
10
|
from biobb_common.tools.file_utils import launchlogger
|
|
8
|
-
|
|
11
|
+
|
|
12
|
+
from biobb_io.api.common import (
|
|
13
|
+
check_mandatory_property,
|
|
14
|
+
check_output_path,
|
|
15
|
+
download_ideal_sdf,
|
|
16
|
+
write_sdf,
|
|
17
|
+
)
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
class IdealSdf(BiobbObject):
|
|
@@ -21,6 +30,7 @@ class IdealSdf(BiobbObject):
|
|
|
21
30
|
* **api_id** (*str*) - ("pdbe") Identifier of the PDB REST API from which the SDF structure will be downloaded. Values: pdbe (`PDB in Europe REST API <https://www.ebi.ac.uk/pdbe/pdbe-rest-api>`_), pdb (`RCSB PDB REST API <https://data.rcsb.org/>`_).
|
|
22
31
|
* **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
|
|
23
32
|
* **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
|
|
33
|
+
* **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
|
|
24
34
|
|
|
25
35
|
Examples:
|
|
26
36
|
This is a use example of how to use the building block from Python::
|
|
@@ -43,8 +53,7 @@ class IdealSdf(BiobbObject):
|
|
|
43
53
|
|
|
44
54
|
"""
|
|
45
55
|
|
|
46
|
-
def __init__(self, output_sdf_path,
|
|
47
|
-
properties=None, **kwargs) -> None:
|
|
56
|
+
def __init__(self, output_sdf_path, properties=None, **kwargs) -> None:
|
|
48
57
|
properties = properties or {}
|
|
49
58
|
|
|
50
59
|
# Call parent class constructor
|
|
@@ -52,13 +61,11 @@ class IdealSdf(BiobbObject):
|
|
|
52
61
|
self.locals_var_dict = locals().copy()
|
|
53
62
|
|
|
54
63
|
# Input/Output files
|
|
55
|
-
self.io_dict = {
|
|
56
|
-
"out": {"output_sdf_path": output_sdf_path}
|
|
57
|
-
}
|
|
64
|
+
self.io_dict = {"out": {"output_sdf_path": output_sdf_path}}
|
|
58
65
|
|
|
59
66
|
# Properties specific for BB
|
|
60
|
-
self.api_id = properties.get(
|
|
61
|
-
self.ligand_code = properties.get(
|
|
67
|
+
self.api_id = properties.get("api_id", "pdbe")
|
|
68
|
+
self.ligand_code = properties.get("ligand_code", None)
|
|
62
69
|
self.properties = properties
|
|
63
70
|
|
|
64
71
|
# Check the properties
|
|
@@ -66,8 +73,14 @@ class IdealSdf(BiobbObject):
|
|
|
66
73
|
self.check_arguments()
|
|
67
74
|
|
|
68
75
|
def check_data_params(self, out_log, err_log):
|
|
69
|
-
"""
|
|
70
|
-
self.output_sdf_path = check_output_path(
|
|
76
|
+
"""Checks all the input/output paths and parameters"""
|
|
77
|
+
self.output_sdf_path = check_output_path(
|
|
78
|
+
self.io_dict["out"]["output_sdf_path"],
|
|
79
|
+
"output_sdf_path",
|
|
80
|
+
False,
|
|
81
|
+
out_log,
|
|
82
|
+
self.__class__.__name__,
|
|
83
|
+
)
|
|
71
84
|
|
|
72
85
|
@launchlogger
|
|
73
86
|
def launch(self) -> int:
|
|
@@ -80,12 +93,16 @@ class IdealSdf(BiobbObject):
|
|
|
80
93
|
if self.check_restart():
|
|
81
94
|
return 0
|
|
82
95
|
|
|
83
|
-
check_mandatory_property(
|
|
96
|
+
check_mandatory_property(
|
|
97
|
+
self.ligand_code, "ligand_code", self.out_log, self.__class__.__name__
|
|
98
|
+
)
|
|
84
99
|
|
|
85
100
|
self.ligand_code = self.ligand_code.strip()
|
|
86
101
|
|
|
87
102
|
# Downloading PDB file
|
|
88
|
-
sdf_string = download_ideal_sdf(
|
|
103
|
+
sdf_string = download_ideal_sdf(
|
|
104
|
+
self.ligand_code, self.api_id, self.out_log, self.global_log
|
|
105
|
+
)
|
|
89
106
|
write_sdf(sdf_string, self.output_sdf_path, self.out_log, self.global_log)
|
|
90
107
|
|
|
91
108
|
self.check_arguments(output_files_created=True, raise_exception=False)
|
|
@@ -93,31 +110,44 @@ class IdealSdf(BiobbObject):
|
|
|
93
110
|
return 0
|
|
94
111
|
|
|
95
112
|
|
|
96
|
-
def ideal_sdf(output_sdf_path: str, properties: dict = None, **kwargs) -> int:
|
|
113
|
+
def ideal_sdf(output_sdf_path: str, properties: Optional[dict] = None, **kwargs) -> int:
|
|
97
114
|
"""Execute the :class:`IdealSdf <api.ideal_sdf.IdealSdf>` class and
|
|
98
115
|
execute the :meth:`launch() <api.ideal_sdf.IdealSdf.launch>` method."""
|
|
99
116
|
|
|
100
|
-
return IdealSdf(
|
|
101
|
-
|
|
117
|
+
return IdealSdf(
|
|
118
|
+
output_sdf_path=output_sdf_path, properties=properties, **kwargs
|
|
119
|
+
).launch()
|
|
102
120
|
|
|
103
121
|
|
|
104
122
|
def main():
|
|
105
123
|
"""Command line execution of this building block. Please check the command line documentation."""
|
|
106
|
-
parser = argparse.ArgumentParser(
|
|
107
|
-
|
|
124
|
+
parser = argparse.ArgumentParser(
|
|
125
|
+
description="This class is a wrapper for downloading an ideal SDF ligand from the Protein Data Bank.",
|
|
126
|
+
formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
|
|
127
|
+
)
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
"-c",
|
|
130
|
+
"--config",
|
|
131
|
+
required=False,
|
|
132
|
+
help="This file can be a YAML file, JSON file or JSON string",
|
|
133
|
+
)
|
|
108
134
|
|
|
109
135
|
# Specific args of each building block
|
|
110
|
-
required_args = parser.add_argument_group(
|
|
111
|
-
required_args.add_argument(
|
|
136
|
+
required_args = parser.add_argument_group("required arguments")
|
|
137
|
+
required_args.add_argument(
|
|
138
|
+
"-o",
|
|
139
|
+
"--output_sdf_path",
|
|
140
|
+
required=True,
|
|
141
|
+
help="Path to the output SDF file. Accepted formats: sdf.",
|
|
142
|
+
)
|
|
112
143
|
|
|
113
144
|
args = parser.parse_args()
|
|
114
145
|
config = args.config if args.config else None
|
|
115
146
|
properties = settings.ConfReader(config=config).get_prop_dic()
|
|
116
147
|
|
|
117
148
|
# Specific call of each building block
|
|
118
|
-
ideal_sdf(output_sdf_path=args.output_sdf_path,
|
|
119
|
-
properties=properties)
|
|
149
|
+
ideal_sdf(output_sdf_path=args.output_sdf_path, properties=properties)
|
|
120
150
|
|
|
121
151
|
|
|
122
|
-
if __name__ ==
|
|
152
|
+
if __name__ == "__main__":
|
|
123
153
|
main()
|