biotite 0.38.0__cp310-cp310-win_amd64.whl → 0.40.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/application.py +33 -28
- biotite/application/dssp/app.py +18 -18
- biotite/application/sra/__init__.py +5 -0
- biotite/application/sra/app.py +337 -55
- biotite/database/entrez/__init__.py +2 -1
- biotite/database/entrez/check.py +14 -3
- biotite/database/entrez/download.py +20 -13
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +38 -34
- biotite/database/pubchem/query.py +44 -44
- biotite/database/rcsb/download.py +19 -14
- biotite/database/rcsb/query.py +46 -46
- biotite/sequence/align/__init__.py +5 -1
- biotite/sequence/align/banded.c +1408 -1025
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +69 -0
- biotite/sequence/align/cigar.py +389 -0
- biotite/sequence/align/kmeralphabet.c +3220 -2850
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +713 -663
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +68398 -0
- biotite/sequence/align/localgapped.c +1507 -1074
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +1143 -833
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +1569 -1092
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +1612 -1212
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +33259 -0
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +685 -646
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/codec.c +1159 -841
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/graphics/alignment.py +212 -2
- biotite/sequence/io/genbank/annotation.py +11 -11
- biotite/sequence/phylo/nj.c +684 -636
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +970 -673
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +672 -626
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/structure/__init__.py +1 -1
- biotite/structure/atoms.py +1 -1
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +3861 -3749
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +727 -707
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/charges.c +1561 -1560
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +25 -67
- biotite/structure/info/bonds.py +46 -100
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1646 -0
- biotite/structure/info/ccd/carbohydrates.txt +1133 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +797 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +11 -22
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +1 -1
- biotite/structure/io/general.py +37 -43
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +528 -365
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +725 -676
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +1070 -754
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +727 -677
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +72 -70
- biotite/structure/io/pdb/hybrid36.c +540 -478
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +82 -68
- biotite/structure/io/pdbx/__init__.py +13 -6
- biotite/structure/io/pdbx/bcif.py +649 -0
- biotite/structure/io/pdbx/cif.py +1028 -0
- biotite/structure/io/pdbx/component.py +243 -0
- biotite/structure/io/pdbx/convert.py +707 -359
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/error.py +14 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/residues.py +40 -40
- biotite/structure/sasa.c +713 -644
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/superimpose.py +158 -115
- biotite/visualize.py +9 -11
- {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
- {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
- {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
biotite/database/entrez/query.py
CHANGED
|
@@ -11,13 +11,11 @@ import abc
|
|
|
11
11
|
from xml.etree import ElementTree
|
|
12
12
|
from .check import check_for_errors
|
|
13
13
|
from .dbnames import sanitize_database_name
|
|
14
|
+
from ..error import RequestError
|
|
15
|
+
from .key import get_api_key
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
_search_url = ("esearch.fcgi?db={:}"
|
|
19
|
-
"&term={:}"
|
|
20
|
-
"&retmax={:}")
|
|
18
|
+
_search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
21
19
|
|
|
22
20
|
class Query(metaclass=abc.ABCMeta):
|
|
23
21
|
"""
|
|
@@ -26,21 +24,21 @@ class Query(metaclass=abc.ABCMeta):
|
|
|
26
24
|
"""
|
|
27
25
|
def __init__(self):
|
|
28
26
|
pass
|
|
29
|
-
|
|
27
|
+
|
|
30
28
|
@abc.abstractmethod
|
|
31
29
|
def __str__(self):
|
|
32
30
|
pass
|
|
33
|
-
|
|
31
|
+
|
|
34
32
|
def __or__(self, operand):
|
|
35
33
|
if not isinstance(operand, Query):
|
|
36
34
|
operand = SimpleQuery(operand)
|
|
37
35
|
return CompositeQuery("OR", self, operand)
|
|
38
|
-
|
|
36
|
+
|
|
39
37
|
def __and__(self, operand):
|
|
40
38
|
if not isinstance(operand, Query):
|
|
41
39
|
operand = SimpleQuery(operand)
|
|
42
40
|
return CompositeQuery("AND", self, operand)
|
|
43
|
-
|
|
41
|
+
|
|
44
42
|
def __xor__(self, operand):
|
|
45
43
|
if not isinstance(operand, Query):
|
|
46
44
|
operand = SimpleQuery(operand)
|
|
@@ -51,21 +49,21 @@ class CompositeQuery(Query):
|
|
|
51
49
|
"""
|
|
52
50
|
A representation of an composite query
|
|
53
51
|
for the NCBI Entrez search service.
|
|
54
|
-
|
|
52
|
+
|
|
55
53
|
A composite query is a combination of two other queries,
|
|
56
54
|
combined either with an 'AND', 'OR' or 'NOT' operator.
|
|
57
55
|
|
|
58
56
|
Usually the user does not create instances of this class directly,
|
|
59
57
|
but :class:`Query` instances are combined with
|
|
60
58
|
``|`` (OR), ``&`` (AND) or ``^`` (NOT).
|
|
61
|
-
|
|
59
|
+
|
|
62
60
|
Parameters
|
|
63
61
|
----------
|
|
64
62
|
operator: str, {"AND", "OR", "NOT"}
|
|
65
63
|
The combination operator.
|
|
66
64
|
queries : iterable object of SimpleQuery
|
|
67
65
|
The queries to be combined.
|
|
68
|
-
|
|
66
|
+
|
|
69
67
|
Examples
|
|
70
68
|
--------
|
|
71
69
|
|
|
@@ -76,16 +74,16 @@ class CompositeQuery(Query):
|
|
|
76
74
|
>>> print(query)
|
|
77
75
|
("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
|
|
78
76
|
"""
|
|
79
|
-
|
|
77
|
+
|
|
80
78
|
def __init__(self, operator, query1, query2):
|
|
81
79
|
super().__init__()
|
|
82
80
|
self._op = operator
|
|
83
81
|
self._q1 = query1
|
|
84
82
|
self._q2 = query2
|
|
85
|
-
|
|
83
|
+
|
|
86
84
|
def __str__(self):
|
|
87
85
|
return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
|
|
88
|
-
|
|
86
|
+
|
|
89
87
|
|
|
90
88
|
|
|
91
89
|
class SimpleQuery(Query):
|
|
@@ -96,7 +94,7 @@ class SimpleQuery(Query):
|
|
|
96
94
|
|
|
97
95
|
A list of available search fields with description can be found
|
|
98
96
|
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
99
|
-
|
|
97
|
+
|
|
100
98
|
Parameters
|
|
101
99
|
----------
|
|
102
100
|
term: str
|
|
@@ -108,10 +106,10 @@ class SimpleQuery(Query):
|
|
|
108
106
|
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
109
107
|
By default the field is omitted and all fields are searched in
|
|
110
108
|
for the term, implicitly.
|
|
111
|
-
|
|
109
|
+
|
|
112
110
|
Examples
|
|
113
111
|
--------
|
|
114
|
-
|
|
112
|
+
|
|
115
113
|
>>> query = SimpleQuery("Escherichia coli")
|
|
116
114
|
>>> print(query)
|
|
117
115
|
"Escherichia coli"
|
|
@@ -152,7 +150,7 @@ class SimpleQuery(Query):
|
|
|
152
150
|
term = f'"{term}"'
|
|
153
151
|
self._term = term
|
|
154
152
|
self._field = field
|
|
155
|
-
|
|
153
|
+
|
|
156
154
|
def __str__(self):
|
|
157
155
|
string = self._term
|
|
158
156
|
if self._field is not None:
|
|
@@ -164,9 +162,9 @@ def search(query, db_name, number=20):
|
|
|
164
162
|
r"""
|
|
165
163
|
Get all PDB IDs that meet the given query requirements,
|
|
166
164
|
via the NCBI ESearch service.
|
|
167
|
-
|
|
165
|
+
|
|
168
166
|
This function requires an internet connection.
|
|
169
|
-
|
|
167
|
+
|
|
170
168
|
Parameters
|
|
171
169
|
----------
|
|
172
170
|
query : Query
|
|
@@ -175,13 +173,13 @@ def search(query, db_name, number=20):
|
|
|
175
173
|
E-utility or common database name.
|
|
176
174
|
number : Query
|
|
177
175
|
The maximum number of UIDs that are obtained.
|
|
178
|
-
|
|
176
|
+
|
|
179
177
|
Returns
|
|
180
178
|
-------
|
|
181
179
|
ids : list of str
|
|
182
180
|
A list of strings containing all NCBI UIDs (accession number)
|
|
183
181
|
that meet the query requirements.
|
|
184
|
-
|
|
182
|
+
|
|
185
183
|
Warnings
|
|
186
184
|
--------
|
|
187
185
|
Even if you give valid input to this function, in rare cases the
|
|
@@ -194,7 +192,7 @@ def search(query, db_name, number=20):
|
|
|
194
192
|
-----
|
|
195
193
|
A list of available search fields with description can be found
|
|
196
194
|
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
197
|
-
|
|
195
|
+
|
|
198
196
|
Examples
|
|
199
197
|
--------
|
|
200
198
|
>>> query = SimpleQuery("Escherichia coli", "Organism") & \
|
|
@@ -202,18 +200,24 @@ def search(query, db_name, number=20):
|
|
|
202
200
|
>>> ids = search(query, "nuccore", number=5)
|
|
203
201
|
>>> print(ids)
|
|
204
202
|
['...', '...', '...', '...', '...']
|
|
205
|
-
"""
|
|
206
|
-
|
|
207
|
-
(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
203
|
+
"""
|
|
204
|
+
param_dict = {
|
|
205
|
+
"db": sanitize_database_name(db_name),
|
|
206
|
+
"term": str(query),
|
|
207
|
+
"retmax": str(number),
|
|
208
|
+
}
|
|
209
|
+
api_key = get_api_key()
|
|
210
|
+
if api_key is not None:
|
|
211
|
+
param_dict["api_key"] = api_key
|
|
212
|
+
r = requests.get(_search_url, params=param_dict)
|
|
213
213
|
xml_response = r.text
|
|
214
214
|
check_for_errors(xml_response)
|
|
215
|
-
|
|
215
|
+
try:
|
|
216
|
+
root = ElementTree.fromstring(xml_response)
|
|
217
|
+
except ElementTree.ParseError:
|
|
218
|
+
if len(xml_response) > 100:
|
|
219
|
+
xml_response = xml_response[:100] + "..."
|
|
220
|
+
raise RequestError(f"Invalid server response: {xml_response}")
|
|
216
221
|
xpath = ".//IdList/Id"
|
|
217
222
|
uids = [element.text for element in root.findall(xpath)]
|
|
218
223
|
return uids
|
|
219
|
-
|
|
@@ -84,12 +84,12 @@ class NameQuery(Query):
|
|
|
84
84
|
--------
|
|
85
85
|
|
|
86
86
|
>>> print(search(NameQuery("Alanine")))
|
|
87
|
-
[5950,
|
|
87
|
+
[5950, ..., ..., ...]
|
|
88
88
|
"""
|
|
89
89
|
|
|
90
90
|
def __init__(self, name):
|
|
91
91
|
self._name = name
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
def get_input_url_path(self):
|
|
94
94
|
return "compound/name"
|
|
95
95
|
|
|
@@ -107,7 +107,7 @@ class SmilesQuery(Query):
|
|
|
107
107
|
----------
|
|
108
108
|
smiles : str
|
|
109
109
|
The *SMILES* string.
|
|
110
|
-
|
|
110
|
+
|
|
111
111
|
Examples
|
|
112
112
|
--------
|
|
113
113
|
|
|
@@ -117,7 +117,7 @@ class SmilesQuery(Query):
|
|
|
117
117
|
|
|
118
118
|
def __init__(self, smiles):
|
|
119
119
|
self._smiles = smiles
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
def get_input_url_path(self):
|
|
122
122
|
return "compound/smiles"
|
|
123
123
|
|
|
@@ -134,7 +134,7 @@ class InchiQuery(Query):
|
|
|
134
134
|
----------
|
|
135
135
|
inchi : str
|
|
136
136
|
The *InChI* string.
|
|
137
|
-
|
|
137
|
+
|
|
138
138
|
Examples
|
|
139
139
|
--------
|
|
140
140
|
|
|
@@ -144,7 +144,7 @@ class InchiQuery(Query):
|
|
|
144
144
|
|
|
145
145
|
def __init__(self, inchi):
|
|
146
146
|
self._inchi = inchi
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
def get_input_url_path(self):
|
|
149
149
|
return "compound/inchi"
|
|
150
150
|
|
|
@@ -161,7 +161,7 @@ class InchiKeyQuery(Query):
|
|
|
161
161
|
----------
|
|
162
162
|
inchi_key : str
|
|
163
163
|
The *InChI* key.
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
Examples
|
|
166
166
|
--------
|
|
167
167
|
|
|
@@ -171,7 +171,7 @@ class InchiKeyQuery(Query):
|
|
|
171
171
|
|
|
172
172
|
def __init__(self, inchi_key):
|
|
173
173
|
self._inchi_key = inchi_key
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
def get_input_url_path(self):
|
|
176
176
|
return "compound/inchikey"
|
|
177
177
|
|
|
@@ -199,22 +199,22 @@ class FormulaQuery(Query):
|
|
|
199
199
|
The maximum number of matches that this query may return.
|
|
200
200
|
By default, the *PubChem* default value is used, which can be
|
|
201
201
|
considered unlimited.
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
Examples
|
|
204
204
|
--------
|
|
205
205
|
|
|
206
206
|
>>> print(search(FormulaQuery("C4H10", number=5)))
|
|
207
|
-
[7843,
|
|
207
|
+
[7843, ..., ..., ..., ...]
|
|
208
208
|
>>> atom_array = residue("ALA")
|
|
209
209
|
>>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
|
|
210
|
-
[5950,
|
|
210
|
+
[5950, ..., ..., ..., ...]
|
|
211
211
|
"""
|
|
212
212
|
|
|
213
213
|
def __init__(self, formula, allow_other_elements=False, number=None):
|
|
214
214
|
self._formula = formula
|
|
215
215
|
self._allow_other_elements = allow_other_elements
|
|
216
216
|
self._number = number
|
|
217
|
-
|
|
217
|
+
|
|
218
218
|
@staticmethod
|
|
219
219
|
def from_atoms(atoms, allow_other_elements=False, number=None):
|
|
220
220
|
"""
|
|
@@ -247,7 +247,7 @@ class FormulaQuery(Query):
|
|
|
247
247
|
for element in sorted_elements:
|
|
248
248
|
formula += _format_element(element, element_counter[element])
|
|
249
249
|
return FormulaQuery(formula, allow_other_elements, number)
|
|
250
|
-
|
|
250
|
+
|
|
251
251
|
def get_input_url_path(self):
|
|
252
252
|
# The 'fastformula' service seems not to accept the formula
|
|
253
253
|
# in the parameter section of the request
|
|
@@ -287,7 +287,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
287
287
|
sdf : str, optional
|
|
288
288
|
A query structure as SDF formatted string.
|
|
289
289
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
290
|
-
:class:`AtomArray`.
|
|
290
|
+
:class:`AtomArray`.
|
|
291
291
|
cid : int, optional
|
|
292
292
|
The query structure given as CID.
|
|
293
293
|
number : int, optional
|
|
@@ -351,7 +351,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
351
351
|
sdf = "\r\n".join(mol_file.lines) + "\r\n$$$$\r\n",
|
|
352
352
|
**kwargs
|
|
353
353
|
)
|
|
354
|
-
|
|
354
|
+
|
|
355
355
|
def get_input_url_path(self):
|
|
356
356
|
input_string = f"compound/{self.search_type()}/{self._query_key}"
|
|
357
357
|
if self._query_key == "cid":
|
|
@@ -384,7 +384,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
384
384
|
return {"sdf": self._query_val}
|
|
385
385
|
else:
|
|
386
386
|
return {}
|
|
387
|
-
|
|
387
|
+
|
|
388
388
|
@abc.abstractmethod
|
|
389
389
|
def search_type(self):
|
|
390
390
|
"""
|
|
@@ -434,7 +434,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
|
434
434
|
sdf : str, optional
|
|
435
435
|
A query structure as SDF formatted string.
|
|
436
436
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
437
|
-
:class:`AtomArray`.
|
|
437
|
+
:class:`AtomArray`.
|
|
438
438
|
cid : int, optional
|
|
439
439
|
The query structure given as CID.
|
|
440
440
|
number : int, optional
|
|
@@ -463,7 +463,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
|
463
463
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
464
464
|
How to handle stereo.
|
|
465
465
|
(Default: 'ignore')
|
|
466
|
-
|
|
466
|
+
|
|
467
467
|
Notes
|
|
468
468
|
-----
|
|
469
469
|
Optional parameter descriptions are taken from the *PubChem* REST
|
|
@@ -488,7 +488,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
|
488
488
|
self._options[option] = value
|
|
489
489
|
del kwargs[option]
|
|
490
490
|
super().__init__(**kwargs)
|
|
491
|
-
|
|
491
|
+
|
|
492
492
|
def search_options(self):
|
|
493
493
|
return self._options
|
|
494
494
|
|
|
@@ -514,7 +514,7 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
|
514
514
|
sdf : str, optional
|
|
515
515
|
A query structure as SDF formatted string.
|
|
516
516
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
517
|
-
:class:`AtomArray`.
|
|
517
|
+
:class:`AtomArray`.
|
|
518
518
|
cid : int, optional
|
|
519
519
|
The query structure given as CID.
|
|
520
520
|
number : int, optional
|
|
@@ -543,7 +543,7 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
|
543
543
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
544
544
|
How to handle stereo.
|
|
545
545
|
(Default: 'ignore')
|
|
546
|
-
|
|
546
|
+
|
|
547
547
|
Notes
|
|
548
548
|
-----
|
|
549
549
|
Optional parameter descriptions are taken from the *PubChem* REST
|
|
@@ -555,11 +555,11 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
|
555
555
|
|
|
556
556
|
>>> # CID of alanine
|
|
557
557
|
>>> print(search(SuperstructureQuery(cid=5950, number=5)))
|
|
558
|
-
[1032,
|
|
558
|
+
[1032, ..., ..., ..., ...]
|
|
559
559
|
>>> # AtomArray of alanine
|
|
560
560
|
>>> atom_array = residue("ALA")
|
|
561
561
|
>>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
|
|
562
|
-
[1032,
|
|
562
|
+
[1032, ..., ..., ..., ...]
|
|
563
563
|
"""
|
|
564
564
|
|
|
565
565
|
def search_type(self):
|
|
@@ -587,7 +587,7 @@ class SubstructureQuery(SuperOrSubstructureQuery):
|
|
|
587
587
|
sdf : str, optional
|
|
588
588
|
A query structure as SDF formatted string.
|
|
589
589
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
590
|
-
:class:`AtomArray`.
|
|
590
|
+
:class:`AtomArray`.
|
|
591
591
|
cid : int, optional
|
|
592
592
|
The query structure given as CID.
|
|
593
593
|
number : int, optional
|
|
@@ -616,7 +616,7 @@ class SubstructureQuery(SuperOrSubstructureQuery):
|
|
|
616
616
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
617
617
|
How to handle stereo.
|
|
618
618
|
(Default: 'ignore')
|
|
619
|
-
|
|
619
|
+
|
|
620
620
|
Notes
|
|
621
621
|
-----
|
|
622
622
|
Optional parameter descriptions are taken from the *PubChem* REST
|
|
@@ -628,11 +628,11 @@ class SubstructureQuery(SuperOrSubstructureQuery):
|
|
|
628
628
|
|
|
629
629
|
>>> # CID of alanine
|
|
630
630
|
>>> print(search(SubstructureQuery(cid=5950, number=5)))
|
|
631
|
-
[5950,
|
|
631
|
+
[5950, ..., ..., ..., ...]
|
|
632
632
|
>>> # AtomArray of alanine
|
|
633
633
|
>>> atom_array = residue("ALA")
|
|
634
634
|
>>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
|
|
635
|
-
[5950,
|
|
635
|
+
[5950, ..., ..., ..., ...]
|
|
636
636
|
"""
|
|
637
637
|
|
|
638
638
|
def search_type(self):
|
|
@@ -666,14 +666,14 @@ class SimilarityQuery(StructureQuery):
|
|
|
666
666
|
sdf : str, optional
|
|
667
667
|
A query structure as SDF formatted string.
|
|
668
668
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
669
|
-
:class:`AtomArray`.
|
|
669
|
+
:class:`AtomArray`.
|
|
670
670
|
cid : int, optional
|
|
671
671
|
The query structure given as CID.
|
|
672
672
|
number : int, optional
|
|
673
673
|
The maximum number of matches that this query may return.
|
|
674
674
|
By default, the *PubChem* default value is used, which can
|
|
675
675
|
be considered unlimited.
|
|
676
|
-
|
|
676
|
+
|
|
677
677
|
Notes
|
|
678
678
|
-----
|
|
679
679
|
The conformation based similarity measure uses *shape-Tanimoto* and
|
|
@@ -681,7 +681,7 @@ class SimilarityQuery(StructureQuery):
|
|
|
681
681
|
|
|
682
682
|
References
|
|
683
683
|
----------
|
|
684
|
-
|
|
684
|
+
|
|
685
685
|
.. footbibliography::
|
|
686
686
|
|
|
687
687
|
Examples
|
|
@@ -689,22 +689,22 @@ class SimilarityQuery(StructureQuery):
|
|
|
689
689
|
|
|
690
690
|
>>> # CID of alanine
|
|
691
691
|
>>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
|
|
692
|
-
[5950,
|
|
692
|
+
[5950, ..., ..., ..., ...]
|
|
693
693
|
>>> # AtomArray of alanine
|
|
694
694
|
>>> atom_array = residue("ALA")
|
|
695
695
|
>>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
|
|
696
|
-
[5950,
|
|
696
|
+
[5950, ..., ..., ..., ...]
|
|
697
697
|
"""
|
|
698
698
|
|
|
699
699
|
def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
|
|
700
700
|
self._threshold = threshold
|
|
701
701
|
self._conformation_based = conformation_based
|
|
702
702
|
super().__init__(**kwargs)
|
|
703
|
-
|
|
703
|
+
|
|
704
704
|
def search_type(self):
|
|
705
705
|
dim = "3d" if self._conformation_based else "2d"
|
|
706
706
|
return f"fastsimilarity_{dim}"
|
|
707
|
-
|
|
707
|
+
|
|
708
708
|
def search_options(self):
|
|
709
709
|
return {"threshold" : int(round(self._threshold * 100))}
|
|
710
710
|
|
|
@@ -730,14 +730,14 @@ class IdentityQuery(StructureQuery):
|
|
|
730
730
|
sdf : str, optional
|
|
731
731
|
A query structure as SDF formatted string.
|
|
732
732
|
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
733
|
-
:class:`AtomArray`.
|
|
733
|
+
:class:`AtomArray`.
|
|
734
734
|
cid : int, optional
|
|
735
735
|
The query structure given as CID.
|
|
736
736
|
number : int, optional
|
|
737
737
|
The maximum number of matches that this query may return.
|
|
738
738
|
By default, the *PubChem* default value is used, which can
|
|
739
739
|
be considered unlimited.
|
|
740
|
-
|
|
740
|
+
|
|
741
741
|
Examples
|
|
742
742
|
--------
|
|
743
743
|
|
|
@@ -753,10 +753,10 @@ class IdentityQuery(StructureQuery):
|
|
|
753
753
|
def __init__(self, identity_type="same_stereo_isotope", **kwargs):
|
|
754
754
|
self._identity_type = identity_type
|
|
755
755
|
super().__init__(**kwargs)
|
|
756
|
-
|
|
756
|
+
|
|
757
757
|
def search_type(self):
|
|
758
758
|
return "fastidentity"
|
|
759
|
-
|
|
759
|
+
|
|
760
760
|
def get_params(self):
|
|
761
761
|
# Use 'get_params()' instead of 'search_options()', since the
|
|
762
762
|
# parameter 'identity_type' in the REST API is *snake case*
|
|
@@ -764,7 +764,7 @@ class IdentityQuery(StructureQuery):
|
|
|
764
764
|
params = super().get_params()
|
|
765
765
|
params["identity_type"] = self._identity_type
|
|
766
766
|
return params
|
|
767
|
-
|
|
767
|
+
|
|
768
768
|
|
|
769
769
|
|
|
770
770
|
|
|
@@ -772,9 +772,9 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
|
772
772
|
"""
|
|
773
773
|
Get all CIDs that meet the given query requirements,
|
|
774
774
|
via the PubChem REST API.
|
|
775
|
-
|
|
775
|
+
|
|
776
776
|
This function requires an internet connection.
|
|
777
|
-
|
|
777
|
+
|
|
778
778
|
Parameters
|
|
779
779
|
----------
|
|
780
780
|
query : Query
|
|
@@ -787,7 +787,7 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
|
787
787
|
If ``None`` is given, the execution is never halted.
|
|
788
788
|
return_throttle_status : float, optional
|
|
789
789
|
If set to true, the :class:`ThrottleStatus` is also returned.
|
|
790
|
-
|
|
790
|
+
|
|
791
791
|
Returns
|
|
792
792
|
-------
|
|
793
793
|
ids : list of int
|
|
@@ -796,12 +796,12 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
|
796
796
|
The :class:`ThrottleStatus` obtained from the server response.
|
|
797
797
|
This can be used for custom request throttling, for example.
|
|
798
798
|
Only returned, if `return_throttle_status` is set to true.
|
|
799
|
-
|
|
799
|
+
|
|
800
800
|
Examples
|
|
801
801
|
--------
|
|
802
802
|
|
|
803
803
|
>>> print(search(NameQuery("Alanine")))
|
|
804
|
-
[5950,
|
|
804
|
+
[5950, ..., ..., ...]
|
|
805
805
|
"""
|
|
806
806
|
# Use POST to be compatible with the larger payloads
|
|
807
807
|
# of structure searches
|
|
@@ -16,24 +16,25 @@ from ..error import RequestError
|
|
|
16
16
|
|
|
17
17
|
_standard_url = "https://files.rcsb.org/download/"
|
|
18
18
|
_mmtf_url = "https://mmtf.rcsb.org/v1.0/full/"
|
|
19
|
+
_bcif_url = "https://models.rcsb.org/"
|
|
19
20
|
_fasta_url = "https://www.rcsb.org/fasta/entry/"
|
|
20
21
|
|
|
21
|
-
_binary_formats = ["mmtf"]
|
|
22
|
+
_binary_formats = ["mmtf", "bcif"]
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
25
26
|
"""
|
|
26
27
|
Download structure files (or sequence files) from the RCSB PDB in
|
|
27
28
|
various formats.
|
|
28
|
-
|
|
29
|
+
|
|
29
30
|
This function requires an internet connection.
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
Parameters
|
|
32
33
|
----------
|
|
33
34
|
pdb_ids : str or iterable object of str
|
|
34
35
|
A single PDB ID or a list of PDB IDs of the structure(s)
|
|
35
36
|
to be downloaded.
|
|
36
|
-
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'mmtf', 'fasta'}
|
|
37
|
+
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'mmtf', 'fasta'}
|
|
37
38
|
The format of the files to be downloaded.
|
|
38
39
|
``'pdbx'``, ``'cif'`` and ``'mmcif'`` are synonyms for
|
|
39
40
|
the same format.
|
|
@@ -48,7 +49,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
48
49
|
the file is empty.
|
|
49
50
|
verbose: bool, optional
|
|
50
51
|
If set to true, the function will output the download progress.
|
|
51
|
-
|
|
52
|
+
|
|
52
53
|
Returns
|
|
53
54
|
-------
|
|
54
55
|
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
@@ -58,7 +59,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
58
59
|
object) was given, a list of strings is returned.
|
|
59
60
|
If no `target_path` was given, the file contents are stored in
|
|
60
61
|
either :class:`StringIO` or :class:`BytesIO` objects.
|
|
61
|
-
|
|
62
|
+
|
|
62
63
|
Warnings
|
|
63
64
|
--------
|
|
64
65
|
Even if you give valid input to this function, in rare cases the
|
|
@@ -66,10 +67,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
66
67
|
In these cases the request should be retried.
|
|
67
68
|
When the issue occurs repeatedly, the error is probably in your
|
|
68
69
|
input.
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
Examples
|
|
71
72
|
--------
|
|
72
|
-
|
|
73
|
+
|
|
73
74
|
>>> import os.path
|
|
74
75
|
>>> file = fetch("1l2y", "cif", path_to_directory)
|
|
75
76
|
>>> print(os.path.basename(file))
|
|
@@ -88,21 +89,21 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
88
89
|
# Create the target folder, if not existing
|
|
89
90
|
if target_path is not None and not os.path.isdir(target_path):
|
|
90
91
|
os.makedirs(target_path)
|
|
91
|
-
|
|
92
|
+
|
|
92
93
|
files = []
|
|
93
94
|
for i, id in enumerate(pdb_ids):
|
|
94
95
|
# Verbose output
|
|
95
96
|
if verbose:
|
|
96
97
|
print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...",
|
|
97
98
|
end="\r")
|
|
98
|
-
|
|
99
|
+
|
|
99
100
|
# Fetch file from database
|
|
100
101
|
if target_path is not None:
|
|
101
102
|
file = join(target_path, id + "." + format)
|
|
102
103
|
else:
|
|
103
104
|
# 'file = None' -> store content in a file-like object
|
|
104
105
|
file = None
|
|
105
|
-
|
|
106
|
+
|
|
106
107
|
if file is None \
|
|
107
108
|
or not isfile(file) \
|
|
108
109
|
or getsize(file) == 0 \
|
|
@@ -115,6 +116,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
115
116
|
r = requests.get(_standard_url + id + ".cif")
|
|
116
117
|
content = r.text
|
|
117
118
|
_assert_valid_file(content, id)
|
|
119
|
+
elif format in ["bcif"]:
|
|
120
|
+
r = requests.get(_bcif_url + id + ".bcif")
|
|
121
|
+
content = r.content
|
|
122
|
+
_assert_valid_file(r.text, id)
|
|
118
123
|
elif format == "mmtf":
|
|
119
124
|
r = requests.get(_mmtf_url + id)
|
|
120
125
|
content = r.content
|
|
@@ -125,7 +130,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
125
130
|
_assert_valid_file(content, id)
|
|
126
131
|
else:
|
|
127
132
|
raise ValueError(f"Format '{format}' is not supported")
|
|
128
|
-
|
|
133
|
+
|
|
129
134
|
if file is None:
|
|
130
135
|
if format in _binary_formats:
|
|
131
136
|
file = io.BytesIO(content)
|
|
@@ -135,7 +140,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
135
140
|
mode = "wb+" if format in _binary_formats else "w+"
|
|
136
141
|
with open(file, mode) as f:
|
|
137
142
|
f.write(content)
|
|
138
|
-
|
|
143
|
+
|
|
139
144
|
files.append(file)
|
|
140
145
|
if verbose:
|
|
141
146
|
print("\nDone")
|
|
@@ -153,7 +158,7 @@ def _assert_valid_file(response_text, pdb_id):
|
|
|
153
158
|
"""
|
|
154
159
|
# Structure file and FASTA file retrieval
|
|
155
160
|
# have different error messages
|
|
156
|
-
if any(err_msg in response_text for err_msg in [
|
|
161
|
+
if len(response_text) == 0 or any(err_msg in response_text for err_msg in [
|
|
157
162
|
"404 Not Found",
|
|
158
163
|
"<title>RCSB Protein Data Bank Error Page</title>",
|
|
159
164
|
"No fasta files were found.",
|