biotite 0.39.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/database/rcsb/download.py +19 -14
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +258 -237
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +243 -222
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +215 -196
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +233 -205
- biotite/sequence/align/localgapped.c +258 -237
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +235 -214
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +255 -234
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +274 -253
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +215 -196
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +217 -197
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +215 -195
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +235 -214
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +215 -196
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +227 -202
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +215 -196
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1437 -1279
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +217 -197
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/charges.c +1052 -1101
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +31 -68
- biotite/structure/info/bonds.py +47 -101
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +78 -25
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +167 -181
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +219 -198
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +217 -197
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +225 -204
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +215 -196
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +89 -34
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +12 -6
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +858 -386
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +215 -196
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +618 -116
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
biotite/__init__.py
CHANGED
|
@@ -9,11 +9,11 @@ it does provide utilities and base classes used by a lot of *Biotite*'s
|
|
|
9
9
|
modules.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
__version__ = "0.
|
|
12
|
+
__version__ = "0.41.0"
|
|
13
13
|
__name__ = "biotite"
|
|
14
14
|
__author__ = "Patrick Kunzmann"
|
|
15
15
|
|
|
16
16
|
from .file import *
|
|
17
17
|
from .temp import *
|
|
18
|
-
from .copyable import *
|
|
19
|
-
from .visualize import *
|
|
18
|
+
from .copyable import *
|
|
19
|
+
from .visualize import *
|
biotite/application/dssp/app.py
CHANGED
|
@@ -9,7 +9,7 @@ __all__ = ["DsspApp"]
|
|
|
9
9
|
from tempfile import NamedTemporaryFile
|
|
10
10
|
from ..localapp import LocalApp, cleanup_tempfile
|
|
11
11
|
from ..application import AppState, requires_state
|
|
12
|
-
from ...structure.io.pdbx.
|
|
12
|
+
from ...structure.io.pdbx.cif import CIFFile
|
|
13
13
|
from ...structure.io.pdbx.convert import set_structure
|
|
14
14
|
import numpy as np
|
|
15
15
|
|
|
@@ -18,13 +18,13 @@ class DsspApp(LocalApp):
|
|
|
18
18
|
r"""
|
|
19
19
|
Annotate the secondary structure of a protein structure using the
|
|
20
20
|
*DSSP* software.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
Internally this creates a :class:`Popen` instance, which handles
|
|
23
23
|
the execution.
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
DSSP differentiates between 8 different types of secondary
|
|
26
26
|
structure elements:
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
- C: loop, coil or irregular
|
|
29
29
|
- H: :math:`{\alpha}`-helix
|
|
30
30
|
- B: :math:`{\beta}`-bridge
|
|
@@ -32,15 +32,15 @@ class DsspApp(LocalApp):
|
|
|
32
32
|
- G: 3 :sub:`10`-helix
|
|
33
33
|
- I: :math:`{\pi}`-helix
|
|
34
34
|
- T: hydrogen bonded turn
|
|
35
|
-
- S: bend
|
|
36
|
-
|
|
35
|
+
- S: bend
|
|
36
|
+
|
|
37
37
|
Parameters
|
|
38
38
|
----------
|
|
39
39
|
atom_array : AtomArray
|
|
40
40
|
The atom array to be annotated.
|
|
41
41
|
bin_path : str, optional
|
|
42
42
|
Path of the *DDSP* binary.
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
Examples
|
|
45
45
|
--------
|
|
46
46
|
|
|
@@ -51,7 +51,7 @@ class DsspApp(LocalApp):
|
|
|
51
51
|
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'C' 'C'
|
|
52
52
|
'C' 'C']
|
|
53
53
|
"""
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
def __init__(self, atom_array, bin_path="mkdssp"):
|
|
56
56
|
super().__init__(bin_path)
|
|
57
57
|
|
|
@@ -77,15 +77,15 @@ class DsspApp(LocalApp):
|
|
|
77
77
|
self._out_file = NamedTemporaryFile("r", suffix=".dssp", delete=False)
|
|
78
78
|
|
|
79
79
|
def run(self):
|
|
80
|
-
in_file =
|
|
81
|
-
set_structure(in_file, self._array
|
|
80
|
+
in_file = CIFFile()
|
|
81
|
+
set_structure(in_file, self._array)
|
|
82
82
|
in_file.write(self._in_file)
|
|
83
83
|
self._in_file.flush()
|
|
84
84
|
self.set_arguments(
|
|
85
85
|
["-i", self._in_file.name, "-o", self._out_file.name]
|
|
86
86
|
)
|
|
87
87
|
super().run()
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
def evaluate(self):
|
|
90
90
|
super().evaluate()
|
|
91
91
|
lines = self._out_file.read().split("\n")
|
|
@@ -106,17 +106,17 @@ class DsspApp(LocalApp):
|
|
|
106
106
|
for i, line in enumerate(lines):
|
|
107
107
|
self._sse[i] = line[16]
|
|
108
108
|
self._sse[self._sse == " "] = "C"
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
def clean_up(self):
|
|
111
111
|
super().clean_up()
|
|
112
112
|
cleanup_tempfile(self._in_file)
|
|
113
113
|
cleanup_tempfile(self._out_file)
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
@requires_state(AppState.JOINED)
|
|
116
116
|
def get_sse(self):
|
|
117
117
|
"""
|
|
118
118
|
Get the resulting secondary structure assignment.
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
Returns
|
|
121
121
|
-------
|
|
122
122
|
sse : ndarray, dtype="U1"
|
|
@@ -124,22 +124,22 @@ class DsspApp(LocalApp):
|
|
|
124
124
|
corresponding to the residues in the input atom array.
|
|
125
125
|
"""
|
|
126
126
|
return self._sse
|
|
127
|
-
|
|
127
|
+
|
|
128
128
|
@staticmethod
|
|
129
129
|
def annotate_sse(atom_array, bin_path="mkdssp"):
|
|
130
130
|
"""
|
|
131
131
|
Perform a secondary structure assignment to an atom array.
|
|
132
|
-
|
|
132
|
+
|
|
133
133
|
This is a convenience function, that wraps the :class:`DsspApp`
|
|
134
134
|
execution.
|
|
135
|
-
|
|
135
|
+
|
|
136
136
|
Parameters
|
|
137
137
|
----------
|
|
138
138
|
atom_array : AtomArray
|
|
139
139
|
The atom array to be annotated.
|
|
140
140
|
bin_path : str, optional
|
|
141
141
|
Path of the DDSP binary.
|
|
142
|
-
|
|
142
|
+
|
|
143
143
|
Returns
|
|
144
144
|
-------
|
|
145
145
|
sse : ndarray, dtype="U1"
|
|
@@ -26,9 +26,9 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
26
26
|
throttle_threshold=0.5, return_throttle_status=False):
|
|
27
27
|
"""
|
|
28
28
|
Download structure files from *PubChem* in various formats.
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
This function requires an internet connection.
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
Parameters
|
|
33
33
|
----------
|
|
34
34
|
cids : int or iterable object or int
|
|
@@ -62,7 +62,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
62
62
|
return_throttle_status : float, optional
|
|
63
63
|
If set to true, the :class:`ThrottleStatus` of the final request
|
|
64
64
|
is also returned.
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Returns
|
|
67
67
|
-------
|
|
68
68
|
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
@@ -78,10 +78,10 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
78
78
|
of the final response is returned.
|
|
79
79
|
This can be used for custom request throttling, for example.
|
|
80
80
|
Only returned, if `return_throttle_status` is set to true.
|
|
81
|
-
|
|
81
|
+
|
|
82
82
|
Examples
|
|
83
83
|
--------
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
>>> import os.path
|
|
86
86
|
>>> file = fetch(2244, "sdf", path_to_directory)
|
|
87
87
|
>>> print(os.path.basename(file))
|
|
@@ -100,7 +100,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
100
100
|
# Create the target folder, if not existing
|
|
101
101
|
if target_path is not None and not isdir(target_path):
|
|
102
102
|
os.makedirs(target_path)
|
|
103
|
-
|
|
103
|
+
|
|
104
104
|
files = []
|
|
105
105
|
for i, cid in enumerate(cids):
|
|
106
106
|
# Prevent IDs as strings, this could be a common error, as other
|
|
@@ -111,14 +111,14 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
111
111
|
if verbose:
|
|
112
112
|
print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...",
|
|
113
113
|
end="\r")
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
# Fetch file from database
|
|
116
116
|
if target_path is not None:
|
|
117
117
|
file = join(target_path, str(cid) + "." + format)
|
|
118
118
|
else:
|
|
119
119
|
# 'file = None' -> store content in a file-like object
|
|
120
120
|
file = None
|
|
121
|
-
|
|
121
|
+
|
|
122
122
|
if file is None \
|
|
123
123
|
or not isfile(file) \
|
|
124
124
|
or getsize(file) == 0 \
|
|
@@ -130,12 +130,12 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
130
130
|
)
|
|
131
131
|
if not r.ok:
|
|
132
132
|
raise RequestError(parse_error_details(r.text))
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
if format.lower() in _binary_formats:
|
|
135
135
|
content = r.content
|
|
136
136
|
else:
|
|
137
137
|
content = r.text
|
|
138
|
-
|
|
138
|
+
|
|
139
139
|
if file is None:
|
|
140
140
|
if format in _binary_formats:
|
|
141
141
|
file = io.BytesIO(content)
|
|
@@ -145,11 +145,11 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
145
145
|
mode = "wb+" if format in _binary_formats else "w+"
|
|
146
146
|
with open(file, mode) as f:
|
|
147
147
|
f.write(content)
|
|
148
|
-
|
|
148
|
+
|
|
149
149
|
throttle_status = ThrottleStatus.from_response(r)
|
|
150
150
|
if throttle_threshold is not None:
|
|
151
151
|
throttle_status.wait_if_busy(throttle_threshold)
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
files.append(file)
|
|
154
154
|
if verbose:
|
|
155
155
|
print("\nDone")
|
|
@@ -168,9 +168,9 @@ def fetch_property(cids, name,
|
|
|
168
168
|
throttle_threshold=0.5, return_throttle_status=False):
|
|
169
169
|
"""
|
|
170
170
|
Download the given property for the given CID(s).
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
This function requires an internet connection.
|
|
173
|
-
|
|
173
|
+
|
|
174
174
|
Parameters
|
|
175
175
|
----------
|
|
176
176
|
cids : int or iterable object or int
|
|
@@ -189,7 +189,7 @@ def fetch_property(cids, name,
|
|
|
189
189
|
return_throttle_status : float, optional
|
|
190
190
|
If set to true, the :class:`ThrottleStatus` of the final request
|
|
191
191
|
is also returned.
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
Returns
|
|
194
194
|
-------
|
|
195
195
|
property : str or list of str
|
|
@@ -202,23 +202,23 @@ def fetch_property(cids, name,
|
|
|
202
202
|
The :class:`ThrottleStatus` obtained from the server response.
|
|
203
203
|
This can be used for custom request throttling, for example.
|
|
204
204
|
Only returned, if `return_throttle_status` is set to true.
|
|
205
|
-
|
|
205
|
+
|
|
206
206
|
Examples
|
|
207
207
|
--------
|
|
208
|
-
|
|
208
|
+
|
|
209
209
|
>>> butane_cids = np.array(search(FormulaQuery("C4H10")))
|
|
210
210
|
>>> # Filter natural isotopes...
|
|
211
211
|
>>> n_iso = np.array(fetch_property(butane_cids, "IsotopeAtomCount"), dtype=int)
|
|
212
212
|
>>> # ...and neutral compounds
|
|
213
213
|
>>> charge = np.array(fetch_property(butane_cids, "Charge"), dtype=int)
|
|
214
214
|
>>> butane_cids = butane_cids[(n_iso == 0) & (charge == 0)]
|
|
215
|
-
>>> print(butane_cids.tolist())
|
|
216
|
-
[7843,
|
|
215
|
+
>>> print(sorted(butane_cids.tolist()))
|
|
216
|
+
[6360, 7843, 18402699, 19029854, 19048342, 157632982, 158271732, 158934736, 161295599, 161897780]
|
|
217
217
|
>>> # Get the IUPAC names for each compound
|
|
218
218
|
>>> iupac_names = fetch_property(butane_cids, "IUPACName")
|
|
219
219
|
>>> # Compounds with multiple molecules use ';' as separator
|
|
220
220
|
>>> print(iupac_names)
|
|
221
|
-
['butane', '2-methylpropane', '
|
|
221
|
+
['butane', '2-methylpropane', 'methane;prop-1-ene', 'ethane;ethene', 'cyclopropane;methane', 'cyclobutane;molecular hydrogen', 'acetylene;methane', 'carbanide;propane', 'carbanylium;propane', 'methylcyclopropane;molecular hydrogen']
|
|
222
222
|
"""
|
|
223
223
|
# If only a single CID is present,
|
|
224
224
|
# put it into a single element list
|
|
@@ -227,13 +227,13 @@ def fetch_property(cids, name,
|
|
|
227
227
|
single_element = True
|
|
228
228
|
else:
|
|
229
229
|
single_element = False
|
|
230
|
-
|
|
230
|
+
|
|
231
231
|
# Property names may only contain letters and numbers
|
|
232
232
|
if not name.isalnum():
|
|
233
233
|
raise ValueError(
|
|
234
234
|
f"Property '{name}' contains invalid characters"
|
|
235
235
|
)
|
|
236
|
-
|
|
236
|
+
|
|
237
237
|
# Use TXT format instead of CSV to avoid issues with ',' characters
|
|
238
238
|
# within table elements
|
|
239
239
|
r = requests.post(
|
|
@@ -245,7 +245,7 @@ def fetch_property(cids, name,
|
|
|
245
245
|
throttle_status = ThrottleStatus.from_response(r)
|
|
246
246
|
if throttle_threshold is not None:
|
|
247
247
|
throttle_status.wait_if_busy(throttle_threshold)
|
|
248
|
-
|
|
248
|
+
|
|
249
249
|
# Each line contains the property for one CID
|
|
250
250
|
properties = r.text.splitlines()
|
|
251
251
|
|
|
@@ -16,7 +16,7 @@ import requests
|
|
|
16
16
|
from .error import parse_error_details
|
|
17
17
|
from .throttle import ThrottleStatus
|
|
18
18
|
from ..error import RequestError
|
|
19
|
-
from ...structure.io.mol.
|
|
19
|
+
from ...structure.io.mol.mol import MOLFile
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
_base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
|
|
@@ -84,7 +84,7 @@ class NameQuery(Query):
|
|
|
84
84
|
--------
|
|
85
85
|
|
|
86
86
|
>>> print(search(NameQuery("Alanine")))
|
|
87
|
-
[5950, ...,
|
|
87
|
+
[5950, ..., ...]
|
|
88
88
|
"""
|
|
89
89
|
|
|
90
90
|
def __init__(self, name):
|
|
@@ -204,10 +204,10 @@ class FormulaQuery(Query):
|
|
|
204
204
|
--------
|
|
205
205
|
|
|
206
206
|
>>> print(search(FormulaQuery("C4H10", number=5)))
|
|
207
|
-
[
|
|
207
|
+
[..., ..., ..., ..., ...]
|
|
208
208
|
>>> atom_array = residue("ALA")
|
|
209
209
|
>>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
|
|
210
|
-
[
|
|
210
|
+
[..., ..., ..., ..., ...]
|
|
211
211
|
"""
|
|
212
212
|
|
|
213
213
|
def __init__(self, formula, allow_other_elements=False, number=None):
|
|
@@ -555,11 +555,11 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
|
555
555
|
|
|
556
556
|
>>> # CID of alanine
|
|
557
557
|
>>> print(search(SuperstructureQuery(cid=5950, number=5)))
|
|
558
|
-
[
|
|
558
|
+
[..., ..., ..., ..., ...]
|
|
559
559
|
>>> # AtomArray of alanine
|
|
560
560
|
>>> atom_array = residue("ALA")
|
|
561
561
|
>>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
|
|
562
|
-
[
|
|
562
|
+
[..., ..., ..., ..., ...]
|
|
563
563
|
"""
|
|
564
564
|
|
|
565
565
|
def search_type(self):
|
|
@@ -801,7 +801,7 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
|
801
801
|
--------
|
|
802
802
|
|
|
803
803
|
>>> print(search(NameQuery("Alanine")))
|
|
804
|
-
[5950, ...,
|
|
804
|
+
[5950, ..., ...]
|
|
805
805
|
"""
|
|
806
806
|
# Use POST to be compatible with the larger payloads
|
|
807
807
|
# of structure searches
|
|
@@ -16,24 +16,25 @@ from ..error import RequestError
|
|
|
16
16
|
|
|
17
17
|
_standard_url = "https://files.rcsb.org/download/"
|
|
18
18
|
_mmtf_url = "https://mmtf.rcsb.org/v1.0/full/"
|
|
19
|
+
_bcif_url = "https://models.rcsb.org/"
|
|
19
20
|
_fasta_url = "https://www.rcsb.org/fasta/entry/"
|
|
20
21
|
|
|
21
|
-
_binary_formats = ["mmtf"]
|
|
22
|
+
_binary_formats = ["mmtf", "bcif"]
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
25
26
|
"""
|
|
26
27
|
Download structure files (or sequence files) from the RCSB PDB in
|
|
27
28
|
various formats.
|
|
28
|
-
|
|
29
|
+
|
|
29
30
|
This function requires an internet connection.
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
Parameters
|
|
32
33
|
----------
|
|
33
34
|
pdb_ids : str or iterable object of str
|
|
34
35
|
A single PDB ID or a list of PDB IDs of the structure(s)
|
|
35
36
|
to be downloaded.
|
|
36
|
-
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'mmtf', 'fasta'}
|
|
37
|
+
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'mmtf', 'fasta'}
|
|
37
38
|
The format of the files to be downloaded.
|
|
38
39
|
``'pdbx'``, ``'cif'`` and ``'mmcif'`` are synonyms for
|
|
39
40
|
the same format.
|
|
@@ -48,7 +49,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
48
49
|
the file is empty.
|
|
49
50
|
verbose: bool, optional
|
|
50
51
|
If set to true, the function will output the download progress.
|
|
51
|
-
|
|
52
|
+
|
|
52
53
|
Returns
|
|
53
54
|
-------
|
|
54
55
|
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
@@ -58,7 +59,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
58
59
|
object) was given, a list of strings is returned.
|
|
59
60
|
If no `target_path` was given, the file contents are stored in
|
|
60
61
|
either :class:`StringIO` or :class:`BytesIO` objects.
|
|
61
|
-
|
|
62
|
+
|
|
62
63
|
Warnings
|
|
63
64
|
--------
|
|
64
65
|
Even if you give valid input to this function, in rare cases the
|
|
@@ -66,10 +67,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
66
67
|
In these cases the request should be retried.
|
|
67
68
|
When the issue occurs repeatedly, the error is probably in your
|
|
68
69
|
input.
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
Examples
|
|
71
72
|
--------
|
|
72
|
-
|
|
73
|
+
|
|
73
74
|
>>> import os.path
|
|
74
75
|
>>> file = fetch("1l2y", "cif", path_to_directory)
|
|
75
76
|
>>> print(os.path.basename(file))
|
|
@@ -88,21 +89,21 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
88
89
|
# Create the target folder, if not existing
|
|
89
90
|
if target_path is not None and not os.path.isdir(target_path):
|
|
90
91
|
os.makedirs(target_path)
|
|
91
|
-
|
|
92
|
+
|
|
92
93
|
files = []
|
|
93
94
|
for i, id in enumerate(pdb_ids):
|
|
94
95
|
# Verbose output
|
|
95
96
|
if verbose:
|
|
96
97
|
print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...",
|
|
97
98
|
end="\r")
|
|
98
|
-
|
|
99
|
+
|
|
99
100
|
# Fetch file from database
|
|
100
101
|
if target_path is not None:
|
|
101
102
|
file = join(target_path, id + "." + format)
|
|
102
103
|
else:
|
|
103
104
|
# 'file = None' -> store content in a file-like object
|
|
104
105
|
file = None
|
|
105
|
-
|
|
106
|
+
|
|
106
107
|
if file is None \
|
|
107
108
|
or not isfile(file) \
|
|
108
109
|
or getsize(file) == 0 \
|
|
@@ -115,6 +116,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
115
116
|
r = requests.get(_standard_url + id + ".cif")
|
|
116
117
|
content = r.text
|
|
117
118
|
_assert_valid_file(content, id)
|
|
119
|
+
elif format in ["bcif"]:
|
|
120
|
+
r = requests.get(_bcif_url + id + ".bcif")
|
|
121
|
+
content = r.content
|
|
122
|
+
_assert_valid_file(r.text, id)
|
|
118
123
|
elif format == "mmtf":
|
|
119
124
|
r = requests.get(_mmtf_url + id)
|
|
120
125
|
content = r.content
|
|
@@ -125,7 +130,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
125
130
|
_assert_valid_file(content, id)
|
|
126
131
|
else:
|
|
127
132
|
raise ValueError(f"Format '{format}' is not supported")
|
|
128
|
-
|
|
133
|
+
|
|
129
134
|
if file is None:
|
|
130
135
|
if format in _binary_formats:
|
|
131
136
|
file = io.BytesIO(content)
|
|
@@ -135,7 +140,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
135
140
|
mode = "wb+" if format in _binary_formats else "w+"
|
|
136
141
|
with open(file, mode) as f:
|
|
137
142
|
f.write(content)
|
|
138
|
-
|
|
143
|
+
|
|
139
144
|
files.append(file)
|
|
140
145
|
if verbose:
|
|
141
146
|
print("\nDone")
|
|
@@ -153,7 +158,7 @@ def _assert_valid_file(response_text, pdb_id):
|
|
|
153
158
|
"""
|
|
154
159
|
# Structure file and FASTA file retrieval
|
|
155
160
|
# have different error messages
|
|
156
|
-
if any(err_msg in response_text for err_msg in [
|
|
161
|
+
if len(response_text) == 0 or any(err_msg in response_text for err_msg in [
|
|
157
162
|
"404 Not Found",
|
|
158
163
|
"<title>RCSB Protein Data Bank Error Page</title>",
|
|
159
164
|
"No fasta files were found.",
|
biotite/file.py
CHANGED
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["File", "TextFile", "InvalidFileError"
|
|
7
|
+
__all__ = ["File", "TextFile", "InvalidFileError",
|
|
8
|
+
"SerializationError", "DeserializationError"]
|
|
8
9
|
|
|
9
10
|
import abc
|
|
10
11
|
import io
|
|
@@ -38,13 +39,13 @@ class File(Copyable, metaclass=abc.ABCMeta):
|
|
|
38
39
|
def read(cls, file):
|
|
39
40
|
"""
|
|
40
41
|
Parse a file (or file-like object).
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
Parameters
|
|
43
44
|
----------
|
|
44
45
|
file : file-like object or str
|
|
45
46
|
The file to be read.
|
|
46
47
|
Alternatively a file path can be supplied.
|
|
47
|
-
|
|
48
|
+
|
|
48
49
|
Returns
|
|
49
50
|
-------
|
|
50
51
|
file_object : File
|
|
@@ -74,7 +75,7 @@ class File(Copyable, metaclass=abc.ABCMeta):
|
|
|
74
75
|
def write(self, file):
|
|
75
76
|
"""
|
|
76
77
|
Write the contents of this :class:`File` object into a file.
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
Parameters
|
|
79
80
|
----------
|
|
80
81
|
file_name : file-like object or str
|
|
@@ -90,7 +91,7 @@ class TextFile(File, metaclass=abc.ABCMeta):
|
|
|
90
91
|
When reading a file, the text content is saved as list of strings,
|
|
91
92
|
one for each line.
|
|
92
93
|
When writing a file, this list is written into the file.
|
|
93
|
-
|
|
94
|
+
|
|
94
95
|
Attributes
|
|
95
96
|
----------
|
|
96
97
|
lines : list
|
|
@@ -121,13 +122,13 @@ class TextFile(File, metaclass=abc.ABCMeta):
|
|
|
121
122
|
def read_iter(file):
|
|
122
123
|
"""
|
|
123
124
|
Create an iterator over each line of the given text file.
|
|
124
|
-
|
|
125
|
+
|
|
125
126
|
Parameters
|
|
126
127
|
----------
|
|
127
128
|
file : file-like object or str
|
|
128
129
|
The file to be read.
|
|
129
130
|
Alternatively a file path can be supplied.
|
|
130
|
-
|
|
131
|
+
|
|
131
132
|
Yields
|
|
132
133
|
------
|
|
133
134
|
line : str
|
|
@@ -147,7 +148,7 @@ class TextFile(File, metaclass=abc.ABCMeta):
|
|
|
147
148
|
"""
|
|
148
149
|
Write the contents of this object into a file
|
|
149
150
|
(or file-like object).
|
|
150
|
-
|
|
151
|
+
|
|
151
152
|
Parameters
|
|
152
153
|
----------
|
|
153
154
|
file : file-like object or str
|
|
@@ -174,7 +175,7 @@ class TextFile(File, metaclass=abc.ABCMeta):
|
|
|
174
175
|
Hence, this static method may save a large amount of memory if
|
|
175
176
|
a large file should be written, especially if the `lines`
|
|
176
177
|
are provided as generator.
|
|
177
|
-
|
|
178
|
+
|
|
178
179
|
Parameters
|
|
179
180
|
----------
|
|
180
181
|
file : file-like object or str
|
|
@@ -211,6 +212,13 @@ class InvalidFileError(Exception):
|
|
|
211
212
|
pass
|
|
212
213
|
|
|
213
214
|
|
|
215
|
+
class SerializationError(Exception):
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
class DeserializationError(Exception):
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
|
|
214
222
|
def wrap_string(text, width):
|
|
215
223
|
"""
|
|
216
224
|
A much simpler and hence much more efficient version of
|