biobb-io 4.2.0__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biobb_io/__init__.py +3 -1
- biobb_io/api/__init__.py +33 -1
- biobb_io/api/alphafold.py +53 -22
- biobb_io/api/api_binding_site.py +59 -23
- biobb_io/api/canonical_fasta.py +55 -23
- biobb_io/api/common.py +185 -103
- biobb_io/api/drugbank.py +52 -22
- biobb_io/api/ideal_sdf.py +53 -23
- biobb_io/api/ligand.py +53 -23
- biobb_io/api/memprotmd_sim.py +53 -22
- biobb_io/api/memprotmd_sim_list.py +44 -18
- biobb_io/api/memprotmd_sim_search.py +48 -21
- biobb_io/api/mmcif.py +53 -23
- biobb_io/api/pdb.py +57 -25
- biobb_io/api/pdb_cluster_zip.py +70 -32
- biobb_io/api/pdb_variants.py +108 -40
- biobb_io/api/structure_info.py +54 -22
- biobb_io/py.typed +0 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.0.dist-info}/METADATA +15 -16
- biobb_io-5.0.0.dist-info/RECORD +25 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.0.dist-info}/WHEEL +1 -1
- biobb_io-4.2.0.dist-info/RECORD +0 -24
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.0.dist-info}/LICENSE +0 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.0.dist-info}/entry_points.txt +0 -0
- {biobb_io-4.2.0.dist-info → biobb_io-5.0.0.dist-info}/top_level.txt +0 -0
biobb_io/api/common.py
CHANGED
|
@@ -1,39 +1,50 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""Common functions for package api"""
|
|
2
|
+
|
|
3
3
|
import json
|
|
4
|
-
import
|
|
4
|
+
import os
|
|
5
5
|
import re
|
|
6
6
|
import urllib.request
|
|
7
7
|
from pathlib import Path, PurePath
|
|
8
|
+
|
|
9
|
+
import requests
|
|
8
10
|
from biobb_common.tools import file_utils as fu
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
def check_output_path(path, argument, optional, out_log, classname):
|
|
12
|
-
"""
|
|
13
|
+
def check_output_path(path, argument, optional, out_log, classname) -> str:
|
|
14
|
+
"""Checks output file"""
|
|
13
15
|
if optional and not path:
|
|
14
|
-
return
|
|
16
|
+
return ""
|
|
15
17
|
if PurePath(path).parent and not Path(PurePath(path).parent).exists():
|
|
16
|
-
fu.log(classname +
|
|
17
|
-
raise SystemExit(classname +
|
|
18
|
+
fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log)
|
|
19
|
+
raise SystemExit(classname + ": Unexisting %s folder" % argument)
|
|
18
20
|
file_extension = PurePath(path).suffix
|
|
19
21
|
if not is_valid_file(file_extension[1:], argument):
|
|
20
|
-
fu.log(
|
|
21
|
-
|
|
22
|
+
fu.log(
|
|
23
|
+
classname
|
|
24
|
+
+ ": Format %s in %s file is not compatible"
|
|
25
|
+
% (file_extension[1:], argument),
|
|
26
|
+
out_log,
|
|
27
|
+
)
|
|
28
|
+
raise SystemExit(
|
|
29
|
+
classname
|
|
30
|
+
+ ": Format %s in %s file is not compatible"
|
|
31
|
+
% (file_extension[1:], argument)
|
|
32
|
+
)
|
|
22
33
|
return path
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
def is_valid_file(ext, argument):
|
|
26
|
-
"""
|
|
37
|
+
"""Checks if file format is compatible"""
|
|
27
38
|
formats = {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
39
|
+
"output_sdf_path": ["sdf"],
|
|
40
|
+
"output_pdb_path": ["pdb"],
|
|
41
|
+
"output_simulations": ["json"],
|
|
42
|
+
"output_simulation": ["zip"],
|
|
43
|
+
"output_pdb_zip_path": ["zip"],
|
|
44
|
+
"output_mutations_list_txt": ["txt"],
|
|
45
|
+
"output_json_path": ["json"],
|
|
46
|
+
"output_fasta_path": ["fasta"],
|
|
47
|
+
"output_mmcif_path": ["mmcif", "cif"],
|
|
37
48
|
}
|
|
38
49
|
return ext in formats[argument]
|
|
39
50
|
|
|
@@ -44,15 +55,15 @@ def download_pdb(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
44
55
|
String: Content of the pdb file.
|
|
45
56
|
"""
|
|
46
57
|
|
|
47
|
-
if api_id ==
|
|
58
|
+
if api_id == "mmb":
|
|
48
59
|
url = "https://mmb.irbbarcelona.org/api/pdb/" + pdb_code + "/coords/?"
|
|
49
|
-
elif api_id ==
|
|
60
|
+
elif api_id == "pdb":
|
|
50
61
|
url = "https://files.rcsb.org/download/" + pdb_code + ".pdb"
|
|
51
|
-
elif api_id ==
|
|
62
|
+
elif api_id == "pdbe":
|
|
52
63
|
url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent"
|
|
53
64
|
|
|
54
65
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
55
|
-
return requests.get(url).content.decode(
|
|
66
|
+
return requests.get(url).content.decode("utf-8")
|
|
56
67
|
|
|
57
68
|
|
|
58
69
|
def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
|
|
@@ -66,11 +77,11 @@ def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
|
|
|
66
77
|
fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log)
|
|
67
78
|
|
|
68
79
|
r = requests.get(url)
|
|
69
|
-
if
|
|
70
|
-
fu.log(classname +
|
|
71
|
-
raise SystemExit(classname +
|
|
80
|
+
if r.status_code == 404:
|
|
81
|
+
fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log)
|
|
82
|
+
raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code))
|
|
72
83
|
|
|
73
|
-
return r.content.decode(
|
|
84
|
+
return r.content.decode("utf-8")
|
|
74
85
|
|
|
75
86
|
|
|
76
87
|
def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
|
|
@@ -79,15 +90,15 @@ def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
79
90
|
String: Content of the mmcif file.
|
|
80
91
|
"""
|
|
81
92
|
|
|
82
|
-
if api_id ==
|
|
93
|
+
if api_id == "mmb":
|
|
83
94
|
url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".cif"
|
|
84
|
-
elif api_id ==
|
|
95
|
+
elif api_id == "pdb":
|
|
85
96
|
url = "https://files.rcsb.org/download/" + pdb_code + ".cif"
|
|
86
|
-
elif api_id ==
|
|
97
|
+
elif api_id == "pdbe":
|
|
87
98
|
url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif"
|
|
88
99
|
|
|
89
100
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
90
|
-
return requests.get(url, verify=
|
|
101
|
+
return requests.get(url, verify=True).content.decode("utf-8")
|
|
91
102
|
|
|
92
103
|
|
|
93
104
|
def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
|
|
@@ -96,12 +107,16 @@ def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
|
|
|
96
107
|
String: Content of the ligand file.
|
|
97
108
|
"""
|
|
98
109
|
|
|
99
|
-
if api_id ==
|
|
110
|
+
if api_id == "mmb":
|
|
100
111
|
url = "http://mmb.irbbarcelona.org/api/pdbMonomer/" + ligand_code.lower()
|
|
101
|
-
text = requests.get(url, verify=
|
|
102
|
-
elif api_id ==
|
|
103
|
-
url =
|
|
104
|
-
|
|
112
|
+
text = requests.get(url, verify=True).content.decode("utf-8")
|
|
113
|
+
elif api_id == "pdbe":
|
|
114
|
+
url = (
|
|
115
|
+
"https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/"
|
|
116
|
+
+ ligand_code.upper()
|
|
117
|
+
+ "_ideal.pdb"
|
|
118
|
+
)
|
|
119
|
+
text = urllib.request.urlopen(url).read().decode("utf-8")
|
|
105
120
|
|
|
106
121
|
fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
|
|
107
122
|
|
|
@@ -117,37 +132,47 @@ def download_fasta(pdb_code, api_id, out_log=None, global_log=None):
|
|
|
117
132
|
String: Content of the fasta file.
|
|
118
133
|
"""
|
|
119
134
|
|
|
120
|
-
if api_id ==
|
|
135
|
+
if api_id == "mmb":
|
|
121
136
|
url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".fasta"
|
|
122
|
-
elif api_id ==
|
|
137
|
+
elif api_id == "pdb":
|
|
123
138
|
url = "https://www.rcsb.org/fasta/entry/" + pdb_code
|
|
124
|
-
elif api_id ==
|
|
139
|
+
elif api_id == "pdbe":
|
|
125
140
|
url = "https://www.ebi.ac.uk/pdbe/entry/pdb/" + pdb_code + "/fasta"
|
|
126
141
|
|
|
127
142
|
fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
|
|
128
|
-
return requests.get(url, verify=
|
|
143
|
+
return requests.get(url, verify=True).content.decode("utf-8")
|
|
129
144
|
|
|
130
145
|
|
|
131
|
-
def download_drugbank(
|
|
146
|
+
def download_drugbank(
|
|
147
|
+
drugbank_id,
|
|
148
|
+
url="https://www.drugbank.ca/structures/small_molecule_drugs/%s.sdf?type=3d",
|
|
149
|
+
out_log=None,
|
|
150
|
+
global_log=None,
|
|
151
|
+
):
|
|
132
152
|
"""
|
|
133
153
|
Returns:
|
|
134
154
|
String: Content of the component file.
|
|
135
155
|
"""
|
|
136
|
-
url =
|
|
156
|
+
url = url % drugbank_id
|
|
137
157
|
|
|
138
158
|
fu.log("Downloading %s from: %s" % (drugbank_id, url), out_log, global_log)
|
|
139
159
|
|
|
140
|
-
text = requests.get(url, verify=
|
|
160
|
+
text = requests.get(url, verify=True).content.decode("utf-8")
|
|
141
161
|
|
|
142
162
|
return text
|
|
143
163
|
|
|
144
164
|
|
|
145
|
-
def download_binding_site(
|
|
165
|
+
def download_binding_site(
|
|
166
|
+
pdb_code,
|
|
167
|
+
url="https://www.ebi.ac.uk/pdbe/api/pdb/entry/binding_sites/%s",
|
|
168
|
+
out_log=None,
|
|
169
|
+
global_log=None,
|
|
170
|
+
):
|
|
146
171
|
"""
|
|
147
172
|
Returns:
|
|
148
173
|
String: Content of the component file.
|
|
149
174
|
"""
|
|
150
|
-
url =
|
|
175
|
+
url = url % pdb_code
|
|
151
176
|
|
|
152
177
|
fu.log("Getting binding sites from: %s" % (url), out_log, global_log)
|
|
153
178
|
|
|
@@ -165,24 +190,37 @@ def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None):
|
|
|
165
190
|
String: Content of the ideal sdf file.
|
|
166
191
|
"""
|
|
167
192
|
|
|
168
|
-
if api_id ==
|
|
169
|
-
url =
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
193
|
+
if api_id == "pdb":
|
|
194
|
+
url = (
|
|
195
|
+
"https://files.rcsb.org/ligands/download/"
|
|
196
|
+
+ ligand_code.upper()
|
|
197
|
+
+ "_ideal.sdf"
|
|
198
|
+
)
|
|
199
|
+
text = requests.get(url, verify=True).content.decode("utf-8")
|
|
200
|
+
elif api_id == "pdbe":
|
|
201
|
+
url = (
|
|
202
|
+
"https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/"
|
|
203
|
+
+ ligand_code.upper()
|
|
204
|
+
+ "_ideal.sdf"
|
|
205
|
+
)
|
|
206
|
+
text = urllib.request.urlopen(url).read().decode("utf-8")
|
|
174
207
|
|
|
175
208
|
fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
|
|
176
209
|
|
|
177
210
|
return text
|
|
178
211
|
|
|
179
212
|
|
|
180
|
-
def download_str_info(
|
|
213
|
+
def download_str_info(
|
|
214
|
+
pdb_code,
|
|
215
|
+
url="http://mmb.irbbarcelona.org/api/pdb/%s.json",
|
|
216
|
+
out_log=None,
|
|
217
|
+
global_log=None,
|
|
218
|
+
):
|
|
181
219
|
"""
|
|
182
220
|
Returns:
|
|
183
221
|
String: Content of the JSON file.
|
|
184
222
|
"""
|
|
185
|
-
url =
|
|
223
|
+
url = url % pdb_code
|
|
186
224
|
|
|
187
225
|
fu.log("Getting structure info from: %s" % (url), out_log, global_log)
|
|
188
226
|
|
|
@@ -195,11 +233,15 @@ def download_str_info(pdb_code, url="http://mmb.irbbarcelona.org/api/pdb/%s.json
|
|
|
195
233
|
|
|
196
234
|
|
|
197
235
|
def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None):
|
|
198
|
-
"""
|
|
236
|
+
"""Writes and filters a PDB"""
|
|
199
237
|
fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log)
|
|
200
|
-
with open(output_pdb_path,
|
|
238
|
+
with open(output_pdb_path, "w") as output_pdb_file:
|
|
201
239
|
if filt:
|
|
202
|
-
fu.log(
|
|
240
|
+
fu.log(
|
|
241
|
+
"Filtering lines NOT starting with one of these words: %s" % str(filt),
|
|
242
|
+
out_log,
|
|
243
|
+
global_log,
|
|
244
|
+
)
|
|
203
245
|
for line in pdb_string.splitlines(True):
|
|
204
246
|
if line.strip().split()[0][0:6] in filt:
|
|
205
247
|
output_pdb_file.write(line)
|
|
@@ -208,23 +250,23 @@ def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=N
|
|
|
208
250
|
|
|
209
251
|
|
|
210
252
|
def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None):
|
|
211
|
-
"""
|
|
253
|
+
"""Writes a mmcif"""
|
|
212
254
|
fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log)
|
|
213
|
-
with open(output_mmcif_path,
|
|
255
|
+
with open(output_mmcif_path, "w") as output_mmcif_file:
|
|
214
256
|
output_mmcif_file.write(mmcif_string)
|
|
215
257
|
|
|
216
258
|
|
|
217
259
|
def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None):
|
|
218
|
-
"""
|
|
260
|
+
"""Writes a FASTA"""
|
|
219
261
|
fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log)
|
|
220
|
-
with open(output_fasta_path,
|
|
262
|
+
with open(output_fasta_path, "w") as output_fasta_file:
|
|
221
263
|
output_fasta_file.write(fasta_string)
|
|
222
264
|
|
|
223
265
|
|
|
224
266
|
def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None):
|
|
225
|
-
"""
|
|
267
|
+
"""Writes a SDF"""
|
|
226
268
|
fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log)
|
|
227
|
-
with open(output_sdf_path,
|
|
269
|
+
with open(output_sdf_path, "w") as output_sdf_file:
|
|
228
270
|
output_sdf_file.write(sdf_string)
|
|
229
271
|
|
|
230
272
|
|
|
@@ -236,15 +278,32 @@ def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None):
|
|
|
236
278
|
url = "http://mmb.irbbarcelona.org/api/pdb/"
|
|
237
279
|
pdb_codes = set()
|
|
238
280
|
|
|
239
|
-
url = url+pdb_code.lower()+
|
|
240
|
-
cluster_list = json.loads(requests.get(url, verify=
|
|
281
|
+
url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json"
|
|
282
|
+
cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[
|
|
283
|
+
"clusterMembers"
|
|
284
|
+
]
|
|
241
285
|
for elem in cluster_list:
|
|
242
|
-
pdb_codes.add(elem[
|
|
286
|
+
pdb_codes.add(elem["_id"].lower())
|
|
243
287
|
|
|
244
288
|
if out_log:
|
|
245
|
-
out_log.info(
|
|
289
|
+
out_log.info(
|
|
290
|
+
"Cluster: "
|
|
291
|
+
+ str(cluster)
|
|
292
|
+
+ " of pdb_code: "
|
|
293
|
+
+ pdb_code
|
|
294
|
+
+ "\n List: "
|
|
295
|
+
+ str(pdb_codes)
|
|
296
|
+
)
|
|
246
297
|
if global_log:
|
|
247
|
-
global_log.info(
|
|
298
|
+
global_log.info(
|
|
299
|
+
fu.get_logs_prefix()
|
|
300
|
+
+ "Cluster: "
|
|
301
|
+
+ str(cluster)
|
|
302
|
+
+ " of pdb_code: "
|
|
303
|
+
+ pdb_code
|
|
304
|
+
+ "\n List: "
|
|
305
|
+
+ str(pdb_codes)
|
|
306
|
+
)
|
|
248
307
|
|
|
249
308
|
return pdb_codes
|
|
250
309
|
|
|
@@ -255,67 +314,84 @@ def get_uniprot(pdb_code, url, out_log=None, global_log=None):
|
|
|
255
314
|
Returns:
|
|
256
315
|
str: UNIPROT code.
|
|
257
316
|
"""
|
|
258
|
-
url_uniprot_id =
|
|
259
|
-
uniprot_id = requests.get(url_uniprot_id, verify=
|
|
317
|
+
url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id"
|
|
318
|
+
uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0]
|
|
260
319
|
|
|
261
320
|
if out_log:
|
|
262
|
-
out_log.info(
|
|
321
|
+
out_log.info(
|
|
322
|
+
"PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
|
|
323
|
+
)
|
|
263
324
|
if global_log:
|
|
264
|
-
global_log.info(
|
|
325
|
+
global_log.info(
|
|
326
|
+
"PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
|
|
327
|
+
)
|
|
265
328
|
|
|
266
329
|
return uniprot_id
|
|
267
330
|
|
|
268
331
|
|
|
269
|
-
def get_variants(
|
|
332
|
+
def get_variants(
|
|
333
|
+
uniprot_id, url="http://mmb.irbbarcelona.org/api", out_log=None, global_log=None
|
|
334
|
+
):
|
|
270
335
|
"""Returns the variants of the `uniprot_id` code.
|
|
271
336
|
|
|
272
337
|
Returns:
|
|
273
338
|
:obj:`list` of :obj:`str`: List of variants.
|
|
274
339
|
"""
|
|
275
|
-
url_uniprot_mut = (
|
|
276
|
-
|
|
340
|
+
url_uniprot_mut = (
|
|
341
|
+
url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar"
|
|
342
|
+
)
|
|
343
|
+
variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"]
|
|
277
344
|
variants = variants if variants else []
|
|
278
345
|
|
|
279
|
-
fu.log(
|
|
346
|
+
fu.log(
|
|
347
|
+
"Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id),
|
|
348
|
+
out_log,
|
|
349
|
+
global_log,
|
|
350
|
+
)
|
|
280
351
|
return variants if variants else []
|
|
281
352
|
|
|
282
353
|
|
|
283
354
|
def write_json(json_string, output_json_path, out_log=None, global_log=None):
|
|
284
|
-
"""
|
|
355
|
+
"""Writes a JSON"""
|
|
285
356
|
fu.log("Writting json to: %s" % (output_json_path), out_log, global_log)
|
|
286
|
-
with open(output_json_path,
|
|
357
|
+
with open(output_json_path, "w") as output_json_file:
|
|
287
358
|
output_json_file.write(json_string)
|
|
288
359
|
|
|
289
360
|
|
|
290
361
|
def get_memprotmd_sim_list(out_log=None, global_log=None):
|
|
291
|
-
"""
|
|
362
|
+
"""Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API"""
|
|
292
363
|
|
|
293
|
-
fu.log(
|
|
364
|
+
fu.log(
|
|
365
|
+
"Getting all available membrane-protein systems (simulations) from the MemProtMD REST API",
|
|
366
|
+
out_log,
|
|
367
|
+
global_log,
|
|
368
|
+
)
|
|
294
369
|
|
|
295
370
|
url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all"
|
|
296
371
|
json_obj = requests.post(url).json()
|
|
297
372
|
json_string = json.dumps(json_obj, indent=4)
|
|
298
373
|
|
|
299
|
-
fu.log(
|
|
374
|
+
fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log)
|
|
300
375
|
|
|
301
376
|
return json_string
|
|
302
377
|
|
|
303
378
|
|
|
304
379
|
def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None):
|
|
305
|
-
"""
|
|
380
|
+
"""Performs advanced searches in the MemProtMD DB using its REST API and a given keyword"""
|
|
306
381
|
|
|
307
|
-
fu.log(
|
|
382
|
+
fu.log(
|
|
383
|
+
"Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s"
|
|
384
|
+
% (collection_name, keyword),
|
|
385
|
+
out_log,
|
|
386
|
+
global_log,
|
|
387
|
+
)
|
|
308
388
|
|
|
309
389
|
url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced"
|
|
310
390
|
json_query = {
|
|
311
391
|
"collectionName": collection_name,
|
|
312
|
-
"query": {
|
|
313
|
-
|
|
314
|
-
},
|
|
315
|
-
"projection": {
|
|
316
|
-
"simulations": 1
|
|
317
|
-
},
|
|
318
|
-
"options": {}
|
|
392
|
+
"query": {"keywords": keyword},
|
|
393
|
+
"projection": {"simulations": 1},
|
|
394
|
+
"options": {},
|
|
319
395
|
}
|
|
320
396
|
|
|
321
397
|
json_obj = requests.post(url, json=json_query).json()
|
|
@@ -324,43 +400,49 @@ def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=
|
|
|
324
400
|
# get total number of simulation
|
|
325
401
|
list_kw = []
|
|
326
402
|
for sim_list in json_obj:
|
|
327
|
-
for sim in sim_list[
|
|
403
|
+
for sim in sim_list["simulations"]:
|
|
328
404
|
list_kw.append(sim)
|
|
329
405
|
|
|
330
|
-
fu.log(
|
|
406
|
+
fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log)
|
|
331
407
|
|
|
332
408
|
return json_string
|
|
333
409
|
|
|
334
410
|
|
|
335
411
|
def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None):
|
|
336
|
-
"""
|
|
412
|
+
"""Gets a single simulation from MemProtMD DB"""
|
|
337
413
|
|
|
338
|
-
fu.log(
|
|
414
|
+
fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log)
|
|
339
415
|
|
|
340
|
-
url =
|
|
416
|
+
url = (
|
|
417
|
+
"http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/"
|
|
418
|
+
+ pdb_code
|
|
419
|
+
+ "_default_dppc/files/run/at.zip"
|
|
420
|
+
)
|
|
341
421
|
response = requests.get(url)
|
|
342
422
|
|
|
343
|
-
open(output_file,
|
|
423
|
+
open(output_file, "wb").write(response.content)
|
|
344
424
|
|
|
345
425
|
fu.log("Saving output %s file" % (output_file), out_log, global_log)
|
|
346
426
|
|
|
347
427
|
|
|
348
428
|
def check_mandatory_property(property, name, out_log, classname):
|
|
349
|
-
"""
|
|
429
|
+
"""Checks mandatory properties"""
|
|
350
430
|
|
|
351
431
|
if not property:
|
|
352
|
-
fu.log(classname +
|
|
353
|
-
raise SystemExit(classname +
|
|
432
|
+
fu.log(classname + ": Unexisting %s property, exiting" % name, out_log)
|
|
433
|
+
raise SystemExit(classname + ": Unexisting %s property" % name)
|
|
354
434
|
return property
|
|
355
435
|
|
|
356
436
|
|
|
357
437
|
def check_uniprot_code(code, out_log, classname):
|
|
358
|
-
"""
|
|
438
|
+
"""Checks uniprot code"""
|
|
359
439
|
|
|
360
|
-
pattern = re.compile(
|
|
440
|
+
pattern = re.compile(
|
|
441
|
+
(r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
|
|
442
|
+
)
|
|
361
443
|
|
|
362
444
|
if not pattern.match(code):
|
|
363
|
-
fu.log(classname +
|
|
364
|
-
raise SystemExit(classname +
|
|
445
|
+
fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log)
|
|
446
|
+
raise SystemExit(classname + ": Incorrect uniprot code for %s" % code)
|
|
365
447
|
|
|
366
448
|
return True
|
biobb_io/api/drugbank.py
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
|
|
3
3
|
"""Module containing the Drugbank class and the command line interface."""
|
|
4
|
+
|
|
4
5
|
import argparse
|
|
5
|
-
from
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
6
8
|
from biobb_common.configuration import settings
|
|
9
|
+
from biobb_common.generic.biobb_object import BiobbObject
|
|
7
10
|
from biobb_common.tools.file_utils import launchlogger
|
|
8
|
-
|
|
11
|
+
|
|
12
|
+
from biobb_io.api.common import (
|
|
13
|
+
check_mandatory_property,
|
|
14
|
+
check_output_path,
|
|
15
|
+
download_drugbank,
|
|
16
|
+
write_sdf,
|
|
17
|
+
)
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
class Drugbank(BiobbObject):
|
|
@@ -20,6 +29,7 @@ class Drugbank(BiobbObject):
|
|
|
20
29
|
* **drugbank_id** (*str*) - (None) Drugbank component id.
|
|
21
30
|
* **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
|
|
22
31
|
* **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
|
|
32
|
+
* **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
|
|
23
33
|
|
|
24
34
|
Examples:
|
|
25
35
|
This is a use example of how to use the building block from Python::
|
|
@@ -41,8 +51,7 @@ class Drugbank(BiobbObject):
|
|
|
41
51
|
|
|
42
52
|
"""
|
|
43
53
|
|
|
44
|
-
def __init__(self, output_sdf_path,
|
|
45
|
-
properties=None, **kwargs) -> None:
|
|
54
|
+
def __init__(self, output_sdf_path, properties=None, **kwargs) -> None:
|
|
46
55
|
properties = properties or {}
|
|
47
56
|
|
|
48
57
|
# Call parent class constructor
|
|
@@ -50,12 +59,10 @@ class Drugbank(BiobbObject):
|
|
|
50
59
|
self.locals_var_dict = locals().copy()
|
|
51
60
|
|
|
52
61
|
# Input/Output files
|
|
53
|
-
self.io_dict = {
|
|
54
|
-
"out": {"output_sdf_path": output_sdf_path}
|
|
55
|
-
}
|
|
62
|
+
self.io_dict = {"out": {"output_sdf_path": output_sdf_path}}
|
|
56
63
|
|
|
57
64
|
# Properties specific for BB
|
|
58
|
-
self.drugbank_id = properties.get(
|
|
65
|
+
self.drugbank_id = properties.get("drugbank_id", None)
|
|
59
66
|
self.properties = properties
|
|
60
67
|
|
|
61
68
|
# Check the properties
|
|
@@ -63,8 +70,14 @@ class Drugbank(BiobbObject):
|
|
|
63
70
|
self.check_arguments()
|
|
64
71
|
|
|
65
72
|
def check_data_params(self, out_log, err_log):
|
|
66
|
-
"""
|
|
67
|
-
self.output_sdf_path = check_output_path(
|
|
73
|
+
"""Checks all the input/output paths and parameters"""
|
|
74
|
+
self.output_sdf_path = check_output_path(
|
|
75
|
+
self.io_dict["out"]["output_sdf_path"],
|
|
76
|
+
"output_sdf_path",
|
|
77
|
+
False,
|
|
78
|
+
out_log,
|
|
79
|
+
self.__class__.__name__,
|
|
80
|
+
)
|
|
68
81
|
|
|
69
82
|
@launchlogger
|
|
70
83
|
def launch(self) -> int:
|
|
@@ -77,13 +90,17 @@ class Drugbank(BiobbObject):
|
|
|
77
90
|
if self.check_restart():
|
|
78
91
|
return 0
|
|
79
92
|
|
|
80
|
-
check_mandatory_property(
|
|
93
|
+
check_mandatory_property(
|
|
94
|
+
self.drugbank_id, "drugbank_id", self.out_log, self.__class__.__name__
|
|
95
|
+
)
|
|
81
96
|
|
|
82
97
|
self.drugbank_id = self.drugbank_id.strip().lower()
|
|
83
98
|
url = "https://www.drugbank.ca/structures/small_molecule_drugs/%s.sdf?type=3d"
|
|
84
99
|
|
|
85
100
|
# Downloading SDF file
|
|
86
|
-
sdf_string = download_drugbank(
|
|
101
|
+
sdf_string = download_drugbank(
|
|
102
|
+
self.drugbank_id, url, self.out_log, self.global_log
|
|
103
|
+
)
|
|
87
104
|
write_sdf(sdf_string, self.output_sdf_path, self.out_log, self.global_log)
|
|
88
105
|
|
|
89
106
|
self.check_arguments(output_files_created=True, raise_exception=False)
|
|
@@ -91,31 +108,44 @@ class Drugbank(BiobbObject):
|
|
|
91
108
|
return 0
|
|
92
109
|
|
|
93
110
|
|
|
94
|
-
def drugbank(output_sdf_path: str, properties: dict = None, **kwargs) -> int:
|
|
111
|
+
def drugbank(output_sdf_path: str, properties: Optional[dict] = None, **kwargs) -> int:
|
|
95
112
|
"""Execute the :class:`Drugbank <api.drugbank.Drugbank>` class and
|
|
96
113
|
execute the :meth:`launch() <api.drugbank.Drugbank.launch>` method."""
|
|
97
114
|
|
|
98
|
-
return Drugbank(
|
|
99
|
-
|
|
115
|
+
return Drugbank(
|
|
116
|
+
output_sdf_path=output_sdf_path, properties=properties, **kwargs
|
|
117
|
+
).launch()
|
|
100
118
|
|
|
101
119
|
|
|
102
120
|
def main():
|
|
103
121
|
"""Command line execution of this building block. Please check the command line documentation."""
|
|
104
|
-
parser = argparse.ArgumentParser(
|
|
105
|
-
|
|
122
|
+
parser = argparse.ArgumentParser(
|
|
123
|
+
description="Download a component in SDF format from the Drugbank (https://www.drugbank.ca/).",
|
|
124
|
+
formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
|
|
125
|
+
)
|
|
126
|
+
parser.add_argument(
|
|
127
|
+
"-c",
|
|
128
|
+
"--config",
|
|
129
|
+
required=False,
|
|
130
|
+
help="This file can be a YAML file, JSON file or JSON string",
|
|
131
|
+
)
|
|
106
132
|
|
|
107
133
|
# Specific args of each building block
|
|
108
|
-
required_args = parser.add_argument_group(
|
|
109
|
-
required_args.add_argument(
|
|
134
|
+
required_args = parser.add_argument_group("required arguments")
|
|
135
|
+
required_args.add_argument(
|
|
136
|
+
"-o",
|
|
137
|
+
"--output_sdf_path",
|
|
138
|
+
required=True,
|
|
139
|
+
help="Path to the output SDF component file. Accepted formats: sdf.",
|
|
140
|
+
)
|
|
110
141
|
|
|
111
142
|
args = parser.parse_args()
|
|
112
143
|
config = args.config if args.config else None
|
|
113
144
|
properties = settings.ConfReader(config=config).get_prop_dic()
|
|
114
145
|
|
|
115
146
|
# Specific call of each building block
|
|
116
|
-
drugbank(output_sdf_path=args.output_sdf_path,
|
|
117
|
-
properties=properties)
|
|
147
|
+
drugbank(output_sdf_path=args.output_sdf_path, properties=properties)
|
|
118
148
|
|
|
119
149
|
|
|
120
|
-
if __name__ ==
|
|
150
|
+
if __name__ == "__main__":
|
|
121
151
|
main()
|