biotite 1.0.1__cp311-cp311-win_amd64.whl → 1.2.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +36 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +5 -18
- biotite/application/muscle/app5.py +5 -5
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +22 -2
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +9 -3
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +8 -9
- biotite/database/uniprot/check.py +22 -17
- biotite/database/uniprot/download.py +3 -6
- biotite/database/uniprot/query.py +4 -5
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +49 -14
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +26 -26
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +19 -2
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +58 -48
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +284 -57
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +5 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +105 -29
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +136 -8
- biotite/sequence/sequence.py +1 -2
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +6 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +163 -66
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +79 -25
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +74 -127
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +24 -15
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -34
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +62 -19
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -22
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +4 -4
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +80 -53
- biotite/structure/io/pdb/convert.py +4 -3
- biotite/structure/io/pdb/file.py +85 -25
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +36 -36
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +54 -15
- biotite/structure/io/pdbx/cif.py +92 -66
- biotite/structure/io/pdbx/component.py +15 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +410 -75
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +141 -156
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +69 -11
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +76 -4
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.afdb"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Alex Carlin"
|
|
7
|
+
__all__ = ["fetch"]
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from xml.etree import ElementTree
|
|
13
|
+
import requests
|
|
14
|
+
from biotite.database.error import RequestError
|
|
15
|
+
|
|
16
|
+
_METADATA_URL = "https://alphafold.com/api/prediction"
|
|
17
|
+
_BINARY_FORMATS = ["bcif"]
|
|
18
|
+
# Adopted from https://www.uniprot.org/help/accession_numbers
|
|
19
|
+
_UNIPROT_PATTERN = (
|
|
20
|
+
"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
25
|
+
"""
|
|
26
|
+
Download predicted protein structures from the AlphaFold DB.
|
|
27
|
+
|
|
28
|
+
This function requires an internet connection.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
ids : str or iterable object of str
|
|
33
|
+
A single ID or a list of IDs of the file(s) to be downloaded.
|
|
34
|
+
They can be either UniProt IDs (e.g. ``P12345``) or AlphaFold DB IDs
|
|
35
|
+
(e.g. ``AF-P12345F1``).
|
|
36
|
+
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
|
|
37
|
+
The format of the files to be downloaded.
|
|
38
|
+
target_path : str, optional
|
|
39
|
+
The target directory of the downloaded files.
|
|
40
|
+
By default, the file content is stored in a file-like object
|
|
41
|
+
(`StringIO` or `BytesIO`, respectively).
|
|
42
|
+
overwrite : bool, optional
|
|
43
|
+
If true, existing files will be overwritten.
|
|
44
|
+
Otherwise the respective file will only be downloaded if the file does not
|
|
45
|
+
exist yet in the specified target directory or if the file is empty.
|
|
46
|
+
verbose : bool, optional
|
|
47
|
+
If true, the function will output the download progress.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
52
|
+
The file path(s) to the downloaded files.
|
|
53
|
+
If a single string (a single ID) was given in `ids`, a single string is
|
|
54
|
+
returned.
|
|
55
|
+
If a list (or other iterable object) was given, a list of strings is returned.
|
|
56
|
+
If no `target_path` was given, the file contents are stored in either
|
|
57
|
+
``StringIO`` or ``BytesIO`` objects.
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
|
|
62
|
+
>>> from pathlib import Path
|
|
63
|
+
>>> file = fetch("P12345", "cif", path_to_directory)
|
|
64
|
+
>>> print(Path(file).name)
|
|
65
|
+
P12345.cif
|
|
66
|
+
>>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
|
|
67
|
+
>>> print([Path(file).name for file in files])
|
|
68
|
+
['P12345.cif', 'Q8K9I1.cif']
|
|
69
|
+
"""
|
|
70
|
+
if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
|
|
71
|
+
raise ValueError(f"Format '{format}' is not supported")
|
|
72
|
+
if format in ["pdbx", "mmcif"]:
|
|
73
|
+
format = "cif"
|
|
74
|
+
|
|
75
|
+
# If only a single ID is present,
|
|
76
|
+
# put it into a single element list
|
|
77
|
+
if isinstance(ids, str):
|
|
78
|
+
ids = [ids]
|
|
79
|
+
single_element = True
|
|
80
|
+
else:
|
|
81
|
+
single_element = False
|
|
82
|
+
if target_path is not None:
|
|
83
|
+
target_path = Path(target_path)
|
|
84
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
files = []
|
|
87
|
+
for i, id in enumerate(ids):
|
|
88
|
+
# Verbose output
|
|
89
|
+
if verbose:
|
|
90
|
+
print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
|
|
91
|
+
# Fetch file from database
|
|
92
|
+
if target_path is not None:
|
|
93
|
+
file = target_path / f"{id}.{format}"
|
|
94
|
+
else:
|
|
95
|
+
# 'file = None' -> store content in a file-like object
|
|
96
|
+
file = None
|
|
97
|
+
if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
|
|
98
|
+
file_response = requests.get(_get_file_url(id, format))
|
|
99
|
+
_assert_valid_file(file_response, id)
|
|
100
|
+
if format in _BINARY_FORMATS:
|
|
101
|
+
content = file_response.content
|
|
102
|
+
else:
|
|
103
|
+
content = file_response.text
|
|
104
|
+
|
|
105
|
+
if file is None:
|
|
106
|
+
if format in _BINARY_FORMATS:
|
|
107
|
+
file = io.BytesIO(content)
|
|
108
|
+
else:
|
|
109
|
+
file = io.StringIO(content)
|
|
110
|
+
else:
|
|
111
|
+
mode = "wb+" if format in _BINARY_FORMATS else "w+"
|
|
112
|
+
with open(file, mode) as f:
|
|
113
|
+
f.write(content)
|
|
114
|
+
|
|
115
|
+
files.append(file)
|
|
116
|
+
if verbose:
|
|
117
|
+
print("\nDone")
|
|
118
|
+
|
|
119
|
+
# Return paths as strings
|
|
120
|
+
files = [file.as_posix() if isinstance(file, Path) else file for file in files]
|
|
121
|
+
# If input was a single ID, return only a single element
|
|
122
|
+
if single_element:
|
|
123
|
+
return files[0]
|
|
124
|
+
else:
|
|
125
|
+
return files
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _get_file_url(id, format):
|
|
129
|
+
"""
|
|
130
|
+
Get the actual file URL for the given ID from the ``prediction`` API endpoint.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
id : str
|
|
135
|
+
The ID of the file to be downloaded.
|
|
136
|
+
format : str
|
|
137
|
+
The format of the file to be downloaded.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
file_url : str
|
|
142
|
+
The URL of the file to be downloaded.
|
|
143
|
+
"""
|
|
144
|
+
uniprot_id = _extract_id(id)
|
|
145
|
+
metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
|
|
146
|
+
if len(metadata) == 0:
|
|
147
|
+
raise RequestError(f"ID {id} is invalid")
|
|
148
|
+
# A list of length 1 is always returned, if the response is valid
|
|
149
|
+
return metadata[0][f"{format}Url"]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _extract_id(id):
|
|
153
|
+
"""
|
|
154
|
+
Extract a AFDB compatible UniProt ID from the given qualifier.
|
|
155
|
+
This may comprise
|
|
156
|
+
|
|
157
|
+
- Directly the UniProt ID (e.g. ``P12345``) (trivial case)
|
|
158
|
+
- Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
id : str
|
|
163
|
+
The qualifier to extract the UniProt ID from.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
uniprot_id : str
|
|
168
|
+
The UniProt ID.
|
|
169
|
+
"""
|
|
170
|
+
match = re.search(_UNIPROT_PATTERN, id)
|
|
171
|
+
if match is None:
|
|
172
|
+
raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
|
|
173
|
+
return match.group()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _assert_valid_file(response, id):
|
|
177
|
+
"""
|
|
178
|
+
Checks whether the response is an actual structure file
|
|
179
|
+
or the response a *404* error due to invalid UniProt ID.
|
|
180
|
+
"""
|
|
181
|
+
if len(response.text) == 0:
|
|
182
|
+
raise RequestError(f"Received no repsone for '{id}'")
|
|
183
|
+
try:
|
|
184
|
+
root = ElementTree.fromstring(response.text)
|
|
185
|
+
if root.tag == "Error":
|
|
186
|
+
raise RequestError(
|
|
187
|
+
f"Error while fetching '{id}': {root.find('Message').text}"
|
|
188
|
+
)
|
|
189
|
+
except ElementTree.ParseError:
|
|
190
|
+
# This is not XML -> the response is probably a valid file
|
|
191
|
+
pass
|
|
@@ -80,6 +80,16 @@ def sanitize_database_name(db_name):
|
|
|
80
80
|
database name is not existing.
|
|
81
81
|
|
|
82
82
|
Only for internal usage in ``download.py`` and ``query.py``.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
db_name : str
|
|
87
|
+
Entrez database name.
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
name : str
|
|
92
|
+
E-utility database name.
|
|
83
93
|
"""
|
|
84
94
|
if db_name in _db_names.keys():
|
|
85
95
|
# Convert into E-utility database name
|
|
@@ -54,17 +54,16 @@ def fetch(
|
|
|
54
54
|
db_name : str:
|
|
55
55
|
E-utility or common database name.
|
|
56
56
|
ret_type : str
|
|
57
|
-
Retrieval type
|
|
57
|
+
Retrieval type.
|
|
58
58
|
ret_mode : str, optional
|
|
59
|
-
Retrieval mode
|
|
59
|
+
Retrieval mode.
|
|
60
60
|
overwrite : bool, optional
|
|
61
61
|
If true, existing files will be overwritten. Otherwise the
|
|
62
62
|
respective file will only be downloaded if the file does not
|
|
63
63
|
exist yet in the specified target directory or if the file is
|
|
64
|
-
empty.
|
|
65
|
-
verbose: bool, optional
|
|
64
|
+
empty.
|
|
65
|
+
verbose : bool, optional
|
|
66
66
|
If true, the function will output the download progress.
|
|
67
|
-
(Default: False)
|
|
68
67
|
|
|
69
68
|
Returns
|
|
70
69
|
-------
|
|
@@ -84,9 +83,9 @@ def fetch(
|
|
|
84
83
|
When the issue occurs repeatedly, the error is probably in your
|
|
85
84
|
input.
|
|
86
85
|
|
|
87
|
-
See
|
|
86
|
+
See Also
|
|
88
87
|
--------
|
|
89
|
-
fetch_single_file
|
|
88
|
+
fetch_single_file : Fetch multiple entries as a single file.
|
|
90
89
|
|
|
91
90
|
Examples
|
|
92
91
|
--------
|
|
@@ -111,7 +110,7 @@ def fetch(
|
|
|
111
110
|
for i, id in enumerate(uids):
|
|
112
111
|
# Verbose output
|
|
113
112
|
if verbose:
|
|
114
|
-
print(f"Fetching file {i+1:d} / {len(uids):d} ({id})...", end="\r")
|
|
113
|
+
print(f"Fetching file {i + 1:d} / {len(uids):d} ({id})...", end="\r")
|
|
115
114
|
# Fetch file from database
|
|
116
115
|
if target_path is not None:
|
|
117
116
|
file = join(target_path, id + "." + suffix)
|
|
@@ -188,9 +187,9 @@ def fetch_single_file(
|
|
|
188
187
|
When the issue occurs repeatedly, the error is probably in your
|
|
189
188
|
input.
|
|
190
189
|
|
|
191
|
-
See
|
|
190
|
+
See Also
|
|
192
191
|
--------
|
|
193
|
-
fetch
|
|
192
|
+
fetch : Fetch one or multiple entries as separate files.
|
|
194
193
|
"""
|
|
195
194
|
if (
|
|
196
195
|
file_name is not None
|
biotite/database/entrez/key.py
CHANGED
biotite/database/entrez/query.py
CHANGED
|
@@ -60,9 +60,9 @@ class CompositeQuery(Query):
|
|
|
60
60
|
|
|
61
61
|
Parameters
|
|
62
62
|
----------
|
|
63
|
-
operator: str, {"AND", "OR", "NOT"}
|
|
63
|
+
operator : str, {"AND", "OR", "NOT"}
|
|
64
64
|
The combination operator.
|
|
65
|
-
|
|
65
|
+
query1, query2 : SimpleQuery
|
|
66
66
|
The queries to be combined.
|
|
67
67
|
|
|
68
68
|
Examples
|
|
@@ -97,7 +97,7 @@ class SimpleQuery(Query):
|
|
|
97
97
|
|
|
98
98
|
Parameters
|
|
99
99
|
----------
|
|
100
|
-
term: str
|
|
100
|
+
term : str
|
|
101
101
|
The search term.
|
|
102
102
|
field : str, optional
|
|
103
103
|
The field to search the term in.
|
|
@@ -173,7 +173,8 @@ class SimpleQuery(Query):
|
|
|
173
173
|
"SUBS",
|
|
174
174
|
"WORD",
|
|
175
175
|
"TI",
|
|
176
|
-
"TITL"
|
|
176
|
+
"TITL",
|
|
177
|
+
"VOL",
|
|
177
178
|
]
|
|
178
179
|
|
|
179
180
|
def __init__(self, term, field=None):
|
|
@@ -41,22 +41,22 @@ def fetch(
|
|
|
41
41
|
to be downloaded.
|
|
42
42
|
format : {'sdf', 'asnt' 'asnb', 'xml', 'json', 'jsonp', 'png'}
|
|
43
43
|
The format of the files to be downloaded.
|
|
44
|
+
target_path : str, optional
|
|
45
|
+
The target directory of the downloaded files.
|
|
46
|
+
By default, the file content is stored in a file-like object
|
|
47
|
+
(:class:`StringIO` or :class:`BytesIO`, respectively).
|
|
44
48
|
as_structural_formula : bool, optional
|
|
45
49
|
If set to true, the structural formula is download instead of
|
|
46
50
|
an 3D conformer.
|
|
47
51
|
This means that coordinates lie in th xy-plane and represent
|
|
48
52
|
the positions atoms would have an a structural formula
|
|
49
53
|
representation.
|
|
50
|
-
target_path : str, optional
|
|
51
|
-
The target directory of the downloaded files.
|
|
52
|
-
By default, the file content is stored in a file-like object
|
|
53
|
-
(:class:`StringIO` or :class:`BytesIO`, respectively).
|
|
54
54
|
overwrite : bool, optional
|
|
55
55
|
If true, existing files will be overwritten.
|
|
56
56
|
Otherwise the respective file will only be downloaded, if the
|
|
57
57
|
file does not exist yet in the specified target directory or if
|
|
58
58
|
the file is empty.
|
|
59
|
-
verbose: bool, optional
|
|
59
|
+
verbose : bool, optional
|
|
60
60
|
If set to true, the function will output the download progress.
|
|
61
61
|
throttle_threshold : float or None, optional
|
|
62
62
|
A value between 0 and 1.
|
|
@@ -114,7 +114,7 @@ def fetch(
|
|
|
114
114
|
raise TypeError("CIDs must be given as integers, not as string")
|
|
115
115
|
# Verbose output
|
|
116
116
|
if verbose:
|
|
117
|
-
print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...", end="\r")
|
|
117
|
+
print(f"Fetching file {i + 1:d} / {len(cids):d} ({cid})...", end="\r")
|
|
118
118
|
|
|
119
119
|
# Fetch file from database
|
|
120
120
|
if target_path is not None:
|
|
@@ -11,6 +11,16 @@ def parse_error_details(response_text):
|
|
|
11
11
|
"""
|
|
12
12
|
Parse the ``Detail: ...`` or alternatively ``Message: ...`` part of
|
|
13
13
|
an error response.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
response_text : str
|
|
18
|
+
The text of the response.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
error_details : str
|
|
23
|
+
The error details.
|
|
14
24
|
"""
|
|
15
25
|
for message_line_indicator in ["Detail: ", "Message: "]:
|
|
16
26
|
for line in response_text.splitlines():
|
|
@@ -240,6 +240,11 @@ class FormulaQuery(Query):
|
|
|
240
240
|
The maximum number of matches that this query may return.
|
|
241
241
|
By default, the *PubChem* default value is used, which can
|
|
242
242
|
be considered unlimited.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
query : FormulaQuery
|
|
247
|
+
The query.
|
|
243
248
|
"""
|
|
244
249
|
element_counter = collections.Counter(atoms.element)
|
|
245
250
|
formula = ""
|
|
@@ -327,7 +332,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
327
332
|
)
|
|
328
333
|
if not query_key_found:
|
|
329
334
|
raise TypeError(
|
|
330
|
-
"Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf'
|
|
335
|
+
"Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' or 'cid'"
|
|
331
336
|
)
|
|
332
337
|
if "number" in kwargs:
|
|
333
338
|
self._number = kwargs["number"]
|
|
@@ -348,8 +353,13 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
348
353
|
----------
|
|
349
354
|
atoms : AtomArray or AtomArrayStack
|
|
350
355
|
The query structure.
|
|
351
|
-
**kwargs
|
|
356
|
+
*args, **kwargs
|
|
352
357
|
See the constructor for additional options.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
query : StructureQuery
|
|
362
|
+
The query object.
|
|
353
363
|
"""
|
|
354
364
|
mol_file = MOLFile()
|
|
355
365
|
mol_file.set_structure(atoms)
|
|
@@ -448,26 +458,19 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
|
448
458
|
be considered unlimited.
|
|
449
459
|
match_charges : bool, optional
|
|
450
460
|
If set to true, atoms must match the specified charge.
|
|
451
|
-
(Default: False)
|
|
452
461
|
match_tautomers : bool, optional
|
|
453
462
|
If set to true, allow match to tautomers of the given structure.
|
|
454
|
-
(Default: False)
|
|
455
463
|
rings_not_embedded : bool, optional
|
|
456
464
|
If set to true, rings may not be embedded in a larger system.
|
|
457
|
-
(Default: False)
|
|
458
465
|
single_double_bonds_match : bool, optional
|
|
459
466
|
If set to true, single or double bonds match aromatic bonds.
|
|
460
|
-
(Default: True)
|
|
461
467
|
chains_match_rings : bool, optional
|
|
462
468
|
If set to true, chain bonds in the query may match rings in
|
|
463
469
|
hits.
|
|
464
|
-
(Default: True)
|
|
465
470
|
strip_hydrogen : bool, optional
|
|
466
471
|
If set to true, remove any explicit hydrogens before searching.
|
|
467
|
-
(Default: False)
|
|
468
472
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
469
473
|
How to handle stereo.
|
|
470
|
-
(Default: 'ignore')
|
|
471
474
|
|
|
472
475
|
Notes
|
|
473
476
|
-----
|
|
@@ -528,26 +531,19 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
|
528
531
|
be considered unlimited.
|
|
529
532
|
match_charges : bool, optional
|
|
530
533
|
If set to true, atoms must match the specified charge.
|
|
531
|
-
(Default: False)
|
|
532
534
|
match_tautomers : bool, optional
|
|
533
535
|
If set to true, allow match to tautomers of the given structure.
|
|
534
|
-
(Default: False)
|
|
535
536
|
rings_not_embedded : bool, optional
|
|
536
537
|
If set to true, rings may not be embedded in a larger system.
|
|
537
|
-
(Default: False)
|
|
538
538
|
single_double_bonds_match : bool, optional
|
|
539
539
|
If set to true, single or double bonds match aromatic bonds.
|
|
540
|
-
(Default: True)
|
|
541
540
|
chains_match_rings : bool, optional
|
|
542
541
|
If set to true, chain bonds in the query may match rings in
|
|
543
542
|
hits.
|
|
544
|
-
(Default: True)
|
|
545
543
|
strip_hydrogen : bool, optional
|
|
546
544
|
If set to true, remove any explicit hydrogens before searching.
|
|
547
|
-
(Default: False)
|
|
548
545
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
549
546
|
How to handle stereo.
|
|
550
|
-
(Default: 'ignore')
|
|
551
547
|
|
|
552
548
|
Notes
|
|
553
549
|
-----
|
|
@@ -601,26 +597,19 @@ class SubstructureQuery(SuperOrSubstructureQuery):
|
|
|
601
597
|
be considered unlimited.
|
|
602
598
|
match_charges : bool, optional
|
|
603
599
|
If set to true, atoms must match the specified charge.
|
|
604
|
-
(Default: False)
|
|
605
600
|
match_tautomers : bool, optional
|
|
606
601
|
If set to true, allow match to tautomers of the given structure.
|
|
607
|
-
(Default: False)
|
|
608
602
|
rings_not_embedded : bool, optional
|
|
609
603
|
If set to true, rings may not be embedded in a larger system.
|
|
610
|
-
(Default: False)
|
|
611
604
|
single_double_bonds_match : bool, optional
|
|
612
605
|
If set to true, single or double bonds match aromatic bonds.
|
|
613
|
-
(Default: True)
|
|
614
606
|
chains_match_rings : bool, optional
|
|
615
607
|
If set to true, chain bonds in the query may match rings in
|
|
616
608
|
hits.
|
|
617
|
-
(Default: True)
|
|
618
609
|
strip_hydrogen : bool, optional
|
|
619
610
|
If set to true, remove any explicit hydrogens before searching.
|
|
620
|
-
(Default: False)
|
|
621
611
|
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
622
612
|
How to handle stereo.
|
|
623
|
-
(Default: 'ignore')
|
|
624
613
|
|
|
625
614
|
Notes
|
|
626
615
|
-----
|
|
@@ -44,7 +44,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
44
44
|
Otherwise the respective file will only be downloaded, if the
|
|
45
45
|
file does not exist yet in the specified target directory or if
|
|
46
46
|
the file is empty.
|
|
47
|
-
verbose: bool, optional
|
|
47
|
+
verbose : bool, optional
|
|
48
48
|
If set to true, the function will output the download progress.
|
|
49
49
|
|
|
50
50
|
Returns
|
|
@@ -91,7 +91,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
91
91
|
for i, id in enumerate(pdb_ids):
|
|
92
92
|
# Verbose output
|
|
93
93
|
if verbose:
|
|
94
|
-
print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...", end="\r")
|
|
94
|
+
print(f"Fetching file {i + 1:d} / {len(pdb_ids):d} ({id})...", end="\r")
|
|
95
95
|
|
|
96
96
|
# Fetch file from database
|
|
97
97
|
if target_path is not None:
|
|
@@ -152,6 +152,7 @@ def _assert_valid_file(response_text, pdb_id):
|
|
|
152
152
|
for err_msg in [
|
|
153
153
|
"404 Not Found",
|
|
154
154
|
"<title>RCSB Protein Data Bank Error Page</title>",
|
|
155
|
+
"<title>PDB Archive over AWS</title>",
|
|
155
156
|
"No fasta files were found.",
|
|
156
157
|
"No valid PDB IDs were submitted.",
|
|
157
158
|
]
|
biotite/database/rcsb/query.py
CHANGED
|
@@ -146,9 +146,9 @@ class BasicQuery(SingleQuery):
|
|
|
146
146
|
Examples
|
|
147
147
|
--------
|
|
148
148
|
|
|
149
|
-
>>> query = BasicQuery("
|
|
149
|
+
>>> query = BasicQuery("Miniprotein Construct")
|
|
150
150
|
>>> print(sorted(search(query)))
|
|
151
|
-
['1L2Y'
|
|
151
|
+
['1L2Y']
|
|
152
152
|
"""
|
|
153
153
|
|
|
154
154
|
def __init__(self, term):
|
|
@@ -257,7 +257,7 @@ class FieldQuery(SingleQuery):
|
|
|
257
257
|
"exists",
|
|
258
258
|
]:
|
|
259
259
|
raise TypeError(
|
|
260
|
-
f"Constructor got an unexpected keyword argument
|
|
260
|
+
f"Constructor got an unexpected keyword argument '{self._operator}'"
|
|
261
261
|
)
|
|
262
262
|
|
|
263
263
|
# Convert dates into ISO 8601
|
|
@@ -346,9 +346,9 @@ class SequenceQuery(SingleQuery):
|
|
|
346
346
|
--------
|
|
347
347
|
|
|
348
348
|
>>> sequence = "NLYIQWLKDGGPSSGRPPPS"
|
|
349
|
-
>>> query = SequenceQuery(sequence, scope="protein", min_identity=0.
|
|
349
|
+
>>> query = SequenceQuery(sequence, scope="protein", min_identity=0.95)
|
|
350
350
|
>>> print(sorted(search(query)))
|
|
351
|
-
['1L2Y', '
|
|
351
|
+
['1L2Y', '2LDJ', '9G22', '9G2N', '9G2O', '9G31', '9G32', '9GDL', '9GDN', '9GDT', '9GDU', '9GE1']
|
|
352
352
|
"""
|
|
353
353
|
|
|
354
354
|
def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
|
|
@@ -441,7 +441,7 @@ class StructureQuery(SingleQuery):
|
|
|
441
441
|
|
|
442
442
|
>>> query = StructureQuery("1L2Y", chain="A")
|
|
443
443
|
>>> print(sorted(search(query)))
|
|
444
|
-
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
|
|
444
|
+
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS', '9DPF']
|
|
445
445
|
"""
|
|
446
446
|
|
|
447
447
|
def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
|
|
@@ -868,7 +868,7 @@ def search(
|
|
|
868
868
|
... query, return_type="polymer_entity", return_groups=True,
|
|
869
869
|
... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
|
|
870
870
|
... ))
|
|
871
|
-
|
|
871
|
+
{'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
|
|
872
872
|
"""
|
|
873
873
|
query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
|
|
874
874
|
|
|
@@ -944,8 +944,7 @@ def _initialize_query_dict(query, return_type, group_by, content_types):
|
|
|
944
944
|
if group_by is not None:
|
|
945
945
|
if not group_by.is_compatible_return_type(return_type):
|
|
946
946
|
raise ValueError(
|
|
947
|
-
f"Return type '{return_type}' is not compatible "
|
|
948
|
-
f"with the given Grouping"
|
|
947
|
+
f"Return type '{return_type}' is not compatible with the given Grouping"
|
|
949
948
|
)
|
|
950
949
|
request_options["group_by"] = group_by.get_content()
|
|
951
950
|
|
|
@@ -10,26 +10,31 @@ from biotite.database.error import RequestError
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
# Taken from https://www.uniprot.org/help/api_retrieve_entries
|
|
13
|
-
def assert_valid_response(
|
|
13
|
+
def assert_valid_response(response):
|
|
14
14
|
"""
|
|
15
15
|
Checks whether the response is valid.
|
|
16
16
|
|
|
17
17
|
Parameters
|
|
18
18
|
----------
|
|
19
|
-
|
|
20
|
-
Status code of
|
|
19
|
+
response : Response
|
|
20
|
+
Status code of :func:`requests.get()`.
|
|
21
21
|
"""
|
|
22
|
-
if
|
|
23
|
-
raise RequestError("
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
22
|
+
if len(response.content) == 0:
|
|
23
|
+
raise RequestError("No content returned")
|
|
24
|
+
match response.status_code:
|
|
25
|
+
case 400:
|
|
26
|
+
raise RequestError("Bad request. There is a problem with your input.")
|
|
27
|
+
case 404:
|
|
28
|
+
raise RequestError("Not found. The resource you requested doesn't exist.")
|
|
29
|
+
case 410:
|
|
30
|
+
raise RequestError("Gone. The resource you requested was removed.")
|
|
31
|
+
case 500:
|
|
32
|
+
raise RequestError(
|
|
33
|
+
"Internal server error. "
|
|
34
|
+
"Most likely a temporary problem, "
|
|
35
|
+
"but if the problem persists please contact UniProt team."
|
|
36
|
+
)
|
|
37
|
+
case 503:
|
|
38
|
+
raise RequestError(
|
|
39
|
+
"Service not available. The server is being updated, try again later."
|
|
40
|
+
)
|
|
@@ -41,7 +41,6 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
41
41
|
Download files from the UniProt in various formats.
|
|
42
42
|
|
|
43
43
|
Available databases are UniProtKB, UniRef and UniParc.
|
|
44
|
-
|
|
45
44
|
This function requires an internet connection.
|
|
46
45
|
|
|
47
46
|
Parameters
|
|
@@ -58,11 +57,9 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
58
57
|
overwrite : bool, optional
|
|
59
58
|
If true, existing files will be overwritten. Otherwise the
|
|
60
59
|
respective file will only be downloaded if the file does not
|
|
61
|
-
exist yet in the specified target directory
|
|
62
|
-
|
|
63
|
-
verbose: bool, optional
|
|
60
|
+
exist yet in the specified target directory.
|
|
61
|
+
verbose : bool, optional
|
|
64
62
|
If true, the function will output the download progress.
|
|
65
|
-
(Default: False)
|
|
66
63
|
|
|
67
64
|
Returns
|
|
68
65
|
-------
|
|
@@ -111,7 +108,7 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
111
108
|
if format in ["fasta", "gff", "txt", "xml", "rdf", "tab"]:
|
|
112
109
|
r = requests.get(_fetch_url + db_name + "/" + id + "." + format)
|
|
113
110
|
content = r.text
|
|
114
|
-
assert_valid_response(r
|
|
111
|
+
assert_valid_response(r)
|
|
115
112
|
else:
|
|
116
113
|
raise ValueError(f"Format '{format}' is not supported")
|
|
117
114
|
if file is None:
|
|
@@ -50,9 +50,9 @@ class CompositeQuery(Query):
|
|
|
50
50
|
|
|
51
51
|
Parameters
|
|
52
52
|
----------
|
|
53
|
-
operator: str, {"AND", "OR", "NOT"}
|
|
53
|
+
operator : str, {"AND", "OR", "NOT"}
|
|
54
54
|
The combination operator.
|
|
55
|
-
|
|
55
|
+
query1, query2 : SimpleQuery
|
|
56
56
|
The queries to be combined.
|
|
57
57
|
"""
|
|
58
58
|
|
|
@@ -114,7 +114,7 @@ class SimpleQuery(Query):
|
|
|
114
114
|
The list of possible fields and the required search term
|
|
115
115
|
formatting can be found
|
|
116
116
|
`here <https://www.uniprot.org/help/query-fields>`_.
|
|
117
|
-
term: str
|
|
117
|
+
term : str
|
|
118
118
|
The search term.
|
|
119
119
|
"""
|
|
120
120
|
|
|
@@ -264,7 +264,6 @@ def search(query, number=500):
|
|
|
264
264
|
The search query.
|
|
265
265
|
number : int
|
|
266
266
|
The maximum number of IDs that are obtained.
|
|
267
|
-
(Default: 500)
|
|
268
267
|
|
|
269
268
|
Returns
|
|
270
269
|
-------
|
|
@@ -289,5 +288,5 @@ def search(query, number=500):
|
|
|
289
288
|
params = {"query": str(query), "format": "list", "size": str(number)}
|
|
290
289
|
r = requests.get(_base_url, params=params)
|
|
291
290
|
content = r.text
|
|
292
|
-
assert_valid_response(r
|
|
291
|
+
assert_valid_response(r)
|
|
293
292
|
return content.split("\n")[:-1]
|