biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/io/pdb/file.py
CHANGED
|
@@ -8,20 +8,23 @@ __all__ = ["PDBFile"]
|
|
|
8
8
|
|
|
9
9
|
import warnings
|
|
10
10
|
import numpy as np
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from ...filter import (
|
|
11
|
+
from biotite.file import InvalidFileError, TextFile
|
|
12
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
13
|
+
from biotite.structure.bonds import BondList, connect_via_residue_names
|
|
14
|
+
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
15
|
+
from biotite.structure.error import BadStructureError
|
|
16
|
+
from biotite.structure.filter import (
|
|
18
17
|
filter_first_altloc,
|
|
19
18
|
filter_highest_occupancy_altloc,
|
|
20
19
|
filter_solvent,
|
|
21
20
|
)
|
|
22
|
-
from
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
from biotite.structure.io.pdb.hybrid36 import (
|
|
22
|
+
decode_hybrid36,
|
|
23
|
+
encode_hybrid36,
|
|
24
|
+
max_hybrid36_number,
|
|
25
|
+
)
|
|
26
|
+
from biotite.structure.repair import infer_elements
|
|
27
|
+
from biotite.structure.util import matrix_rotate
|
|
25
28
|
|
|
26
29
|
_PDB_MAX_ATOMS = 99999
|
|
27
30
|
_PDB_MAX_RESIDUES = 9999
|
|
@@ -82,6 +85,7 @@ class PDBFile(TextFile):
|
|
|
82
85
|
>>> file.set_structure(array_stack_mod)
|
|
83
86
|
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
|
|
84
87
|
"""
|
|
88
|
+
|
|
85
89
|
@classmethod
|
|
86
90
|
def read(cls, file):
|
|
87
91
|
file = super().read(file)
|
|
@@ -91,7 +95,6 @@ class PDBFile(TextFile):
|
|
|
91
95
|
file._index_models_and_atoms()
|
|
92
96
|
return file
|
|
93
97
|
|
|
94
|
-
|
|
95
98
|
def get_remark(self, number):
|
|
96
99
|
r"""
|
|
97
100
|
Get the lines containing the *REMARK* records with the given
|
|
@@ -140,7 +143,8 @@ class PDBFile(TextFile):
|
|
|
140
143
|
remark_string = f"REMARK {number:>3d}"
|
|
141
144
|
# Find lines and omit ``REMARK XXX `` part
|
|
142
145
|
remark_lines = [
|
|
143
|
-
line[CONTENT_START_COLUMN:]
|
|
146
|
+
line[CONTENT_START_COLUMN:]
|
|
147
|
+
for line in self.lines
|
|
144
148
|
if line.startswith(remark_string)
|
|
145
149
|
]
|
|
146
150
|
if len(remark_lines) == 0:
|
|
@@ -149,7 +153,6 @@ class PDBFile(TextFile):
|
|
|
149
153
|
remark_lines = remark_lines[1:]
|
|
150
154
|
return remark_lines
|
|
151
155
|
|
|
152
|
-
|
|
153
156
|
def get_model_count(self):
|
|
154
157
|
"""
|
|
155
158
|
Get the number of models contained in the PDB file.
|
|
@@ -161,7 +164,6 @@ class PDBFile(TextFile):
|
|
|
161
164
|
"""
|
|
162
165
|
return len(self._model_start_i)
|
|
163
166
|
|
|
164
|
-
|
|
165
167
|
def get_coord(self, model=None):
|
|
166
168
|
"""
|
|
167
169
|
Get only the coordinates from the PDB file.
|
|
@@ -239,21 +241,21 @@ class PDBFile(TextFile):
|
|
|
239
241
|
if model is None:
|
|
240
242
|
coord = np.zeros(
|
|
241
243
|
(len(self._model_start_i), self._get_model_length(), 3),
|
|
242
|
-
dtype=np.float32
|
|
244
|
+
dtype=np.float32,
|
|
243
245
|
)
|
|
244
246
|
m = 0
|
|
245
247
|
i = 0
|
|
246
248
|
for line_i in self._atom_line_i:
|
|
247
249
|
if (
|
|
248
|
-
m < len(self._model_start_i)-1
|
|
249
|
-
and line_i > self._model_start_i[m+1]
|
|
250
|
+
m < len(self._model_start_i) - 1
|
|
251
|
+
and line_i > self._model_start_i[m + 1]
|
|
250
252
|
):
|
|
251
253
|
m += 1
|
|
252
254
|
i = 0
|
|
253
255
|
line = self.lines[line_i]
|
|
254
|
-
coord[m,i,0] = float(line[_coord_x])
|
|
255
|
-
coord[m,i,1] = float(line[_coord_y])
|
|
256
|
-
coord[m,i,2] = float(line[_coord_z])
|
|
256
|
+
coord[m, i, 0] = float(line[_coord_x])
|
|
257
|
+
coord[m, i, 1] = float(line[_coord_y])
|
|
258
|
+
coord[m, i, 2] = float(line[_coord_z])
|
|
257
259
|
i += 1
|
|
258
260
|
return coord
|
|
259
261
|
|
|
@@ -262,12 +264,11 @@ class PDBFile(TextFile):
|
|
|
262
264
|
coord = np.zeros((len(coord_i), 3), dtype=np.float32)
|
|
263
265
|
for i, line_i in enumerate(coord_i):
|
|
264
266
|
line = self.lines[line_i]
|
|
265
|
-
coord[i,0] = float(line[_coord_x])
|
|
266
|
-
coord[i,1] = float(line[_coord_y])
|
|
267
|
-
coord[i,2] = float(line[_coord_z])
|
|
267
|
+
coord[i, 0] = float(line[_coord_x])
|
|
268
|
+
coord[i, 1] = float(line[_coord_y])
|
|
269
|
+
coord[i, 2] = float(line[_coord_z])
|
|
268
270
|
return coord
|
|
269
271
|
|
|
270
|
-
|
|
271
272
|
def get_b_factor(self, model=None):
|
|
272
273
|
"""
|
|
273
274
|
Get only the B-factors from the PDB file.
|
|
@@ -300,20 +301,19 @@ class PDBFile(TextFile):
|
|
|
300
301
|
"""
|
|
301
302
|
if model is None:
|
|
302
303
|
b_factor = np.zeros(
|
|
303
|
-
(len(self._model_start_i), self._get_model_length()),
|
|
304
|
-
dtype=np.float32
|
|
304
|
+
(len(self._model_start_i), self._get_model_length()), dtype=np.float32
|
|
305
305
|
)
|
|
306
306
|
m = 0
|
|
307
307
|
i = 0
|
|
308
308
|
for line_i in self._atom_line_i:
|
|
309
309
|
if (
|
|
310
|
-
m < len(self._model_start_i)-1
|
|
311
|
-
and line_i > self._model_start_i[m+1]
|
|
310
|
+
m < len(self._model_start_i) - 1
|
|
311
|
+
and line_i > self._model_start_i[m + 1]
|
|
312
312
|
):
|
|
313
313
|
m += 1
|
|
314
314
|
i = 0
|
|
315
315
|
line = self.lines[line_i]
|
|
316
|
-
b_factor[m,i] = float(line[_temp_f])
|
|
316
|
+
b_factor[m, i] = float(line[_temp_f])
|
|
317
317
|
i += 1
|
|
318
318
|
return b_factor
|
|
319
319
|
|
|
@@ -325,9 +325,9 @@ class PDBFile(TextFile):
|
|
|
325
325
|
b_factor[i] = float(line[_temp_f])
|
|
326
326
|
return b_factor
|
|
327
327
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
328
|
+
def get_structure(
|
|
329
|
+
self, model=None, altloc="first", extra_fields=[], include_bonds=False
|
|
330
|
+
):
|
|
331
331
|
"""
|
|
332
332
|
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
|
|
333
333
|
|
|
@@ -391,17 +391,17 @@ class PDBFile(TextFile):
|
|
|
391
391
|
array = AtomArray(len(coord_i))
|
|
392
392
|
|
|
393
393
|
# Create mandatory and optional annotation arrays
|
|
394
|
-
chain_id
|
|
395
|
-
res_id
|
|
396
|
-
ins_code
|
|
397
|
-
res_name
|
|
398
|
-
hetero
|
|
394
|
+
chain_id = np.zeros(array.array_length(), array.chain_id.dtype)
|
|
395
|
+
res_id = np.zeros(array.array_length(), array.res_id.dtype)
|
|
396
|
+
ins_code = np.zeros(array.array_length(), array.ins_code.dtype)
|
|
397
|
+
res_name = np.zeros(array.array_length(), array.res_name.dtype)
|
|
398
|
+
hetero = np.zeros(array.array_length(), array.hetero.dtype)
|
|
399
399
|
atom_name = np.zeros(array.array_length(), array.atom_name.dtype)
|
|
400
|
-
element
|
|
400
|
+
element = np.zeros(array.array_length(), array.element.dtype)
|
|
401
401
|
atom_id_raw = np.zeros(array.array_length(), "U5")
|
|
402
|
-
charge_raw
|
|
402
|
+
charge_raw = np.zeros(array.array_length(), "U2")
|
|
403
403
|
occupancy = np.zeros(array.array_length(), float)
|
|
404
|
-
b_factor
|
|
404
|
+
b_factor = np.zeros(array.array_length(), float)
|
|
405
405
|
altloc_id = np.zeros(array.array_length(), dtype="U1")
|
|
406
406
|
|
|
407
407
|
# Fill annotation array
|
|
@@ -425,13 +425,11 @@ class PDBFile(TextFile):
|
|
|
425
425
|
occupancy[i] = float(line[_occupancy].strip())
|
|
426
426
|
b_factor[i] = float(line[_temp_f].strip())
|
|
427
427
|
|
|
428
|
-
if include_bonds or
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
dtype=int
|
|
434
|
-
)
|
|
428
|
+
if include_bonds or (extra_fields is not None and "atom_id" in extra_fields):
|
|
429
|
+
# The atom IDs are only required in these two cases
|
|
430
|
+
atom_id = np.array(
|
|
431
|
+
[decode_hybrid36(raw_id.item()) for raw_id in atom_id_raw], dtype=int
|
|
432
|
+
)
|
|
435
433
|
else:
|
|
436
434
|
atom_id = None
|
|
437
435
|
|
|
@@ -444,16 +442,16 @@ class PDBFile(TextFile):
|
|
|
444
442
|
array.atom_name = atom_name
|
|
445
443
|
array.element = element
|
|
446
444
|
|
|
447
|
-
for field in
|
|
445
|
+
for field in extra_fields if extra_fields is not None else []:
|
|
448
446
|
if field == "atom_id":
|
|
449
447
|
# Copy is necessary to avoid double masking in
|
|
450
448
|
# later altloc ID filtering
|
|
451
449
|
array.set_annotation("atom_id", atom_id.copy())
|
|
452
450
|
elif field == "charge":
|
|
453
451
|
charge = np.array(charge_raw)
|
|
454
|
-
array.set_annotation(
|
|
455
|
-
charge == " ", "0", charge
|
|
456
|
-
)
|
|
452
|
+
array.set_annotation(
|
|
453
|
+
"charge", np.where(charge == " ", "0", charge).astype(int)
|
|
454
|
+
)
|
|
457
455
|
elif field == "occupancy":
|
|
458
456
|
array.set_annotation("occupancy", occupancy)
|
|
459
457
|
elif field == "b_factor":
|
|
@@ -485,7 +483,10 @@ class PDBFile(TextFile):
|
|
|
485
483
|
m = 0
|
|
486
484
|
i = 0
|
|
487
485
|
for line_i in self._atom_line_i:
|
|
488
|
-
if
|
|
486
|
+
if (
|
|
487
|
+
m < len(self._model_start_i) - 1
|
|
488
|
+
and line_i > self._model_start_i[m + 1]
|
|
489
|
+
):
|
|
489
490
|
m += 1
|
|
490
491
|
i = 0
|
|
491
492
|
line = self.lines[line_i]
|
|
@@ -506,9 +507,7 @@ class PDBFile(TextFile):
|
|
|
506
507
|
alpha = np.deg2rad(float(line[_alpha]))
|
|
507
508
|
beta = np.deg2rad(float(line[_beta]))
|
|
508
509
|
gamma = np.deg2rad(float(line[_gamma]))
|
|
509
|
-
box = vectors_from_unitcell(
|
|
510
|
-
len_a, len_b, len_c, alpha, beta, gamma
|
|
511
|
-
)
|
|
510
|
+
box = vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
|
|
512
511
|
except ValueError:
|
|
513
512
|
# File contains invalid 'CRYST1' record
|
|
514
513
|
warnings.warn(
|
|
@@ -526,9 +525,7 @@ class PDBFile(TextFile):
|
|
|
526
525
|
|
|
527
526
|
# Filter altloc IDs
|
|
528
527
|
if altloc == "occupancy":
|
|
529
|
-
filter = filter_highest_occupancy_altloc(
|
|
530
|
-
array, altloc_id, occupancy
|
|
531
|
-
)
|
|
528
|
+
filter = filter_highest_occupancy_altloc(array, altloc_id, occupancy)
|
|
532
529
|
array = array[..., filter]
|
|
533
530
|
atom_id = atom_id[filter] if atom_id is not None else None
|
|
534
531
|
elif altloc == "first":
|
|
@@ -548,7 +545,6 @@ class PDBFile(TextFile):
|
|
|
548
545
|
|
|
549
546
|
return array
|
|
550
547
|
|
|
551
|
-
|
|
552
548
|
def set_structure(self, array, hybrid36=False):
|
|
553
549
|
"""
|
|
554
550
|
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
|
|
@@ -596,39 +592,42 @@ class PDBFile(TextFile):
|
|
|
596
592
|
occupancy = np.char.array(np.full(natoms, " 1.00", dtype="U6"))
|
|
597
593
|
if "charge" in annot_categories:
|
|
598
594
|
charge = np.char.array(
|
|
599
|
-
[
|
|
600
|
-
|
|
601
|
-
|
|
595
|
+
[
|
|
596
|
+
str(np.abs(charge)) + "+"
|
|
597
|
+
if charge > 0
|
|
598
|
+
else (str(np.abs(charge)) + "-" if charge < 0 else "")
|
|
599
|
+
for charge in array.get_annotation("charge")
|
|
600
|
+
]
|
|
602
601
|
)
|
|
603
602
|
else:
|
|
604
603
|
charge = np.char.array(np.full(natoms, " ", dtype="U2"))
|
|
605
604
|
|
|
606
605
|
if hybrid36:
|
|
607
|
-
pdb_atom_id = np.char.array(
|
|
608
|
-
|
|
609
|
-
)
|
|
610
|
-
pdb_res_id = np.char.array(
|
|
611
|
-
[encode_hybrid36(i, 4) for i in array.res_id]
|
|
612
|
-
)
|
|
606
|
+
pdb_atom_id = np.char.array([encode_hybrid36(i, 5) for i in atom_id])
|
|
607
|
+
pdb_res_id = np.char.array([encode_hybrid36(i, 4) for i in array.res_id])
|
|
613
608
|
else:
|
|
614
609
|
# Atom IDs are supported up to 99999,
|
|
615
610
|
# but negative IDs are also possible
|
|
616
|
-
pdb_atom_id = np.char.array(
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
)
|
|
611
|
+
pdb_atom_id = np.char.array(
|
|
612
|
+
np.where(
|
|
613
|
+
atom_id > 0, ((atom_id - 1) % _PDB_MAX_ATOMS) + 1, atom_id
|
|
614
|
+
).astype(str)
|
|
615
|
+
)
|
|
621
616
|
# Residue IDs are supported up to 9999,
|
|
622
617
|
# but negative IDs are also possible
|
|
623
|
-
pdb_res_id = np.char.array(
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
618
|
+
pdb_res_id = np.char.array(
|
|
619
|
+
np.where(
|
|
620
|
+
array.res_id > 0,
|
|
621
|
+
((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
|
|
622
|
+
array.res_id,
|
|
623
|
+
).astype(str)
|
|
624
|
+
)
|
|
628
625
|
|
|
629
626
|
names = np.char.array(
|
|
630
|
-
[
|
|
631
|
-
|
|
627
|
+
[
|
|
628
|
+
f" {atm}" if len(elem) == 1 and len(atm) < 4 else atm
|
|
629
|
+
for atm, elem in zip(array.atom_name, array.element)
|
|
630
|
+
]
|
|
632
631
|
)
|
|
633
632
|
res_names = np.char.array(array.res_name)
|
|
634
633
|
chain_ids = np.char.array(array.chain_id)
|
|
@@ -637,17 +636,20 @@ class PDBFile(TextFile):
|
|
|
637
636
|
elements = np.char.array(array.element)
|
|
638
637
|
|
|
639
638
|
first_half = (
|
|
640
|
-
record.ljust(6)
|
|
641
|
-
pdb_atom_id.rjust(5)
|
|
642
|
-
spaces
|
|
643
|
-
names.ljust(4)
|
|
644
|
-
|
|
645
|
-
|
|
639
|
+
record.ljust(6)
|
|
640
|
+
+ pdb_atom_id.rjust(5)
|
|
641
|
+
+ spaces
|
|
642
|
+
+ names.ljust(4)
|
|
643
|
+
+ spaces
|
|
644
|
+
+ res_names.rjust(3)
|
|
645
|
+
+ spaces
|
|
646
|
+
+ chain_ids
|
|
647
|
+
+ pdb_res_id.rjust(4)
|
|
648
|
+
+ ins_codes.rjust(1)
|
|
646
649
|
)
|
|
647
650
|
|
|
648
651
|
second_half = (
|
|
649
|
-
occupancy + b_factor + 10 * spaces +
|
|
650
|
-
elements.rjust(2) + charge.rjust(2)
|
|
652
|
+
occupancy + b_factor + 10 * spaces + elements.rjust(2) + charge.rjust(2)
|
|
651
653
|
)
|
|
652
654
|
|
|
653
655
|
coords = array.coord
|
|
@@ -674,9 +676,10 @@ class PDBFile(TextFile):
|
|
|
674
676
|
self.lines.append(f"MODEL {model_num:4}")
|
|
675
677
|
# Bundle non-coordinate data to simplify iteration
|
|
676
678
|
self.lines.extend(
|
|
677
|
-
[
|
|
678
|
-
|
|
679
|
-
|
|
679
|
+
[
|
|
680
|
+
f"{start:27} {x:>8.3f}{y:>8.3f}{z:>8.3f}{end:26}"
|
|
681
|
+
for start, (x, y, z), end in zip(first_half, coord_i, second_half)
|
|
682
|
+
]
|
|
680
683
|
)
|
|
681
684
|
if is_stack:
|
|
682
685
|
self.lines.append("ENDMDL")
|
|
@@ -688,18 +691,15 @@ class PDBFile(TextFile):
|
|
|
688
691
|
hetero_indices = np.where(array.hetero & ~filter_solvent(array))[0]
|
|
689
692
|
bond_array = array.bonds.as_array()
|
|
690
693
|
bond_array = bond_array[
|
|
691
|
-
np.isin(bond_array[:,0], hetero_indices)
|
|
692
|
-
np.isin(bond_array[:,1], hetero_indices)
|
|
693
|
-
(array.res_id
|
|
694
|
-
(array.chain_id[bond_array[:,0]] != array.chain_id[bond_array[:,1]])
|
|
694
|
+
np.isin(bond_array[:, 0], hetero_indices)
|
|
695
|
+
| np.isin(bond_array[:, 1], hetero_indices)
|
|
696
|
+
| (array.res_id[bond_array[:, 0]] != array.res_id[bond_array[:, 1]])
|
|
697
|
+
| (array.chain_id[bond_array[:, 0]] != array.chain_id[bond_array[:, 1]])
|
|
695
698
|
]
|
|
696
|
-
self._set_bonds(
|
|
697
|
-
BondList(array.array_length(), bond_array), pdb_atom_id
|
|
698
|
-
)
|
|
699
|
+
self._set_bonds(BondList(array.array_length(), bond_array), pdb_atom_id)
|
|
699
700
|
|
|
700
701
|
self._index_models_and_atoms()
|
|
701
702
|
|
|
702
|
-
|
|
703
703
|
def list_assemblies(self):
|
|
704
704
|
"""
|
|
705
705
|
List the biological assemblies that are available for the
|
|
@@ -727,14 +727,16 @@ class PDBFile(TextFile):
|
|
|
727
727
|
raise InvalidFileError(
|
|
728
728
|
"File does not contain assembly information (REMARK 300)"
|
|
729
729
|
)
|
|
730
|
-
return [
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
730
|
+
return [assembly_id.strip() for assembly_id in remark_lines[0][12:].split(",")]
|
|
731
|
+
|
|
732
|
+
def get_assembly(
|
|
733
|
+
self,
|
|
734
|
+
assembly_id=None,
|
|
735
|
+
model=None,
|
|
736
|
+
altloc="first",
|
|
737
|
+
extra_fields=[],
|
|
738
|
+
include_bonds=False,
|
|
739
|
+
):
|
|
738
740
|
"""
|
|
739
741
|
Build the given biological assembly.
|
|
740
742
|
|
|
@@ -829,18 +831,16 @@ class PDBFile(TextFile):
|
|
|
829
831
|
if assembly_start_i is None:
|
|
830
832
|
if assembly_id is None:
|
|
831
833
|
raise InvalidFileError(
|
|
832
|
-
"File does not contain transformation "
|
|
833
|
-
"expressions for assemblies"
|
|
834
|
+
"File does not contain transformation " "expressions for assemblies"
|
|
834
835
|
)
|
|
835
836
|
else:
|
|
836
|
-
raise KeyError(
|
|
837
|
-
|
|
838
|
-
)
|
|
839
|
-
assembly_lines = remark_lines[assembly_start_i : assembly_stop_i]
|
|
837
|
+
raise KeyError(f"The assembly ID '{assembly_id}' is not found")
|
|
838
|
+
assembly_lines = remark_lines[assembly_start_i:assembly_stop_i]
|
|
840
839
|
|
|
841
840
|
# Get transformations for a set of chains
|
|
842
841
|
chain_set_start_indices = [
|
|
843
|
-
i
|
|
842
|
+
i
|
|
843
|
+
for i, line in enumerate(assembly_lines)
|
|
844
844
|
if line.startswith("APPLY THE FOLLOWING TO CHAINS")
|
|
845
845
|
]
|
|
846
846
|
# Add exclusive stop at end of records
|
|
@@ -848,17 +848,17 @@ class PDBFile(TextFile):
|
|
|
848
848
|
assembly = None
|
|
849
849
|
for i in range(len(chain_set_start_indices) - 1):
|
|
850
850
|
start = chain_set_start_indices[i]
|
|
851
|
-
stop = chain_set_start_indices[i+1]
|
|
851
|
+
stop = chain_set_start_indices[i + 1]
|
|
852
852
|
# Read affected chain IDs from the following line(s)
|
|
853
853
|
affected_chain_ids = []
|
|
854
854
|
transform_start = None
|
|
855
|
-
for j, line in enumerate(assembly_lines[start
|
|
856
|
-
if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
855
|
+
for j, line in enumerate(assembly_lines[start:stop]):
|
|
856
|
+
if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or line.startswith(
|
|
857
|
+
" AND CHAINS:"
|
|
858
|
+
):
|
|
859
|
+
affected_chain_ids += [
|
|
860
|
+
chain_id.strip() for chain_id in line[30:].split(",")
|
|
861
|
+
]
|
|
862
862
|
else:
|
|
863
863
|
# Chain specification has finished
|
|
864
864
|
# BIOMT lines start directly after chain specification
|
|
@@ -866,11 +866,9 @@ class PDBFile(TextFile):
|
|
|
866
866
|
break
|
|
867
867
|
# Parse transformations from BIOMT lines
|
|
868
868
|
if transform_start is None:
|
|
869
|
-
raise InvalidFileError(
|
|
870
|
-
"No 'BIOMT' records found for chosen assembly"
|
|
871
|
-
)
|
|
869
|
+
raise InvalidFileError("No 'BIOMT' records found for chosen assembly")
|
|
872
870
|
rotations, translations = _parse_transformations(
|
|
873
|
-
assembly_lines[transform_start
|
|
871
|
+
assembly_lines[transform_start:stop]
|
|
874
872
|
)
|
|
875
873
|
# Filter affected chains
|
|
876
874
|
sub_structure = structure[
|
|
@@ -888,9 +886,9 @@ class PDBFile(TextFile):
|
|
|
888
886
|
|
|
889
887
|
return assembly
|
|
890
888
|
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
889
|
+
def get_symmetry_mates(
|
|
890
|
+
self, model=None, altloc="first", extra_fields=[], include_bonds=False
|
|
891
|
+
):
|
|
894
892
|
"""
|
|
895
893
|
Build a structure model containing all symmetric copies
|
|
896
894
|
of the structure within a single unit cell, given by the space
|
|
@@ -971,27 +969,15 @@ class PDBFile(TextFile):
|
|
|
971
969
|
"File does not contain crystallographic symmetry "
|
|
972
970
|
"information (REMARK 350)"
|
|
973
971
|
)
|
|
974
|
-
transform_lines = [
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
rotations, translations = _parse_transformations(
|
|
978
|
-
transform_lines
|
|
979
|
-
)
|
|
980
|
-
return _apply_transformations(
|
|
981
|
-
structure, rotations, translations
|
|
982
|
-
)
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
972
|
+
transform_lines = [line for line in remark_lines if line.startswith(" SMTRY")]
|
|
973
|
+
rotations, translations = _parse_transformations(transform_lines)
|
|
974
|
+
return _apply_transformations(structure, rotations, translations)
|
|
986
975
|
|
|
987
976
|
def _index_models_and_atoms(self):
|
|
988
977
|
# Line indices where a new model starts
|
|
989
978
|
self._model_start_i = np.array(
|
|
990
|
-
[
|
|
991
|
-
|
|
992
|
-
if self.lines[i].startswith(("MODEL"))
|
|
993
|
-
],
|
|
994
|
-
dtype=int
|
|
979
|
+
[i for i in range(len(self.lines)) if self.lines[i].startswith(("MODEL"))],
|
|
980
|
+
dtype=int,
|
|
995
981
|
)
|
|
996
982
|
if len(self._model_start_i) == 0:
|
|
997
983
|
# It could be an empty file or a file with a single model,
|
|
@@ -1005,13 +991,13 @@ class PDBFile(TextFile):
|
|
|
1005
991
|
# Line indices with ATOM or HETATM records
|
|
1006
992
|
self._atom_line_i = np.array(
|
|
1007
993
|
[
|
|
1008
|
-
i
|
|
994
|
+
i
|
|
995
|
+
for i in range(len(self.lines))
|
|
1009
996
|
if self.lines[i].startswith(("ATOM", "HETATM"))
|
|
1010
997
|
],
|
|
1011
|
-
dtype=int
|
|
998
|
+
dtype=int,
|
|
1012
999
|
)
|
|
1013
1000
|
|
|
1014
|
-
|
|
1015
1001
|
def _get_atom_record_indices_for_model(self, model):
|
|
1016
1002
|
last_model = len(self._model_start_i)
|
|
1017
1003
|
if model == 0:
|
|
@@ -1020,12 +1006,11 @@ class PDBFile(TextFile):
|
|
|
1020
1006
|
model = last_model + model + 1 if model < 0 else model
|
|
1021
1007
|
|
|
1022
1008
|
if model < last_model:
|
|
1023
|
-
line_filter = (
|
|
1024
|
-
|
|
1025
|
-
(self._atom_line_i < self._model_start_i[model ])
|
|
1009
|
+
line_filter = (self._atom_line_i >= self._model_start_i[model - 1]) & (
|
|
1010
|
+
self._atom_line_i < self._model_start_i[model]
|
|
1026
1011
|
)
|
|
1027
1012
|
elif model == last_model:
|
|
1028
|
-
line_filter =
|
|
1013
|
+
line_filter = self._atom_line_i >= self._model_start_i[model - 1]
|
|
1029
1014
|
else:
|
|
1030
1015
|
raise ValueError(
|
|
1031
1016
|
f"The file has {last_model} models, "
|
|
@@ -1033,7 +1018,6 @@ class PDBFile(TextFile):
|
|
|
1033
1018
|
)
|
|
1034
1019
|
return self._atom_line_i[line_filter]
|
|
1035
1020
|
|
|
1036
|
-
|
|
1037
1021
|
def _get_model_length(self):
|
|
1038
1022
|
"""
|
|
1039
1023
|
Determine length of models and check that all models
|
|
@@ -1043,11 +1027,13 @@ class PDBFile(TextFile):
|
|
|
1043
1027
|
length = None
|
|
1044
1028
|
for model_i in range(len(self._model_start_i)):
|
|
1045
1029
|
model_start = self._model_start_i[model_i]
|
|
1046
|
-
model_stop =
|
|
1047
|
-
|
|
1030
|
+
model_stop = (
|
|
1031
|
+
self._model_start_i[model_i + 1]
|
|
1032
|
+
if model_i + 1 < n_models
|
|
1033
|
+
else len(self.lines)
|
|
1034
|
+
)
|
|
1048
1035
|
model_length = np.count_nonzero(
|
|
1049
|
-
(self._atom_line_i >= model_start) &
|
|
1050
|
-
(self._atom_line_i < model_stop)
|
|
1036
|
+
(self._atom_line_i >= model_start) & (self._atom_line_i < model_stop)
|
|
1051
1037
|
)
|
|
1052
1038
|
if length is None:
|
|
1053
1039
|
length = model_length
|
|
@@ -1058,26 +1044,22 @@ class PDBFile(TextFile):
|
|
|
1058
1044
|
)
|
|
1059
1045
|
return length
|
|
1060
1046
|
|
|
1061
|
-
|
|
1062
1047
|
def _get_bonds(self, atom_ids):
|
|
1063
|
-
conect_lines = [line for line in self.lines
|
|
1064
|
-
if line.startswith("CONECT")]
|
|
1048
|
+
conect_lines = [line for line in self.lines if line.startswith("CONECT")]
|
|
1065
1049
|
|
|
1066
1050
|
# Mapping from atom ids to indices in an AtomArray
|
|
1067
|
-
atom_id_to_index = np.zeros(atom_ids[-1]+1, dtype=int)
|
|
1051
|
+
atom_id_to_index = np.zeros(atom_ids[-1] + 1, dtype=int)
|
|
1068
1052
|
try:
|
|
1069
1053
|
for i, id in enumerate(atom_ids):
|
|
1070
1054
|
atom_id_to_index[id] = i
|
|
1071
1055
|
except IndexError as e:
|
|
1072
|
-
raise InvalidFileError(
|
|
1073
|
-
"Atom IDs are not strictly increasing"
|
|
1074
|
-
) from e
|
|
1056
|
+
raise InvalidFileError("Atom IDs are not strictly increasing") from e
|
|
1075
1057
|
|
|
1076
1058
|
bonds = []
|
|
1077
1059
|
for line in conect_lines:
|
|
1078
|
-
center_id = atom_id_to_index[decode_hybrid36(line[6
|
|
1060
|
+
center_id = atom_id_to_index[decode_hybrid36(line[6:11])]
|
|
1079
1061
|
for i in range(11, 31, 5):
|
|
1080
|
-
id_string = line[i : i+5]
|
|
1062
|
+
id_string = line[i : i + 5]
|
|
1081
1063
|
try:
|
|
1082
1064
|
id = atom_id_to_index[decode_hybrid36(id_string)]
|
|
1083
1065
|
except ValueError:
|
|
@@ -1089,7 +1071,6 @@ class PDBFile(TextFile):
|
|
|
1089
1071
|
# is equal to the length of the AtomArray
|
|
1090
1072
|
return BondList(len(atom_ids), np.array(bonds, dtype=np.uint32))
|
|
1091
1073
|
|
|
1092
|
-
|
|
1093
1074
|
def _set_bonds(self, bond_list, atom_ids):
|
|
1094
1075
|
# Bond type is unused since PDB does not support bond orders
|
|
1095
1076
|
bonds, _ = bond_list.get_all_bonds()
|
|
@@ -1136,9 +1117,7 @@ def _parse_transformations(lines):
|
|
|
1136
1117
|
# transformation index) are not used
|
|
1137
1118
|
transformations = [float(e) for e in line.split()[2:]]
|
|
1138
1119
|
if len(transformations) != 4:
|
|
1139
|
-
raise InvalidFileError(
|
|
1140
|
-
"Invalid number of transformation vector elements"
|
|
1141
|
-
)
|
|
1120
|
+
raise InvalidFileError("Invalid number of transformation vector elements")
|
|
1142
1121
|
rotations[transformation_i, component_i, :] = transformations[:3]
|
|
1143
1122
|
translations[transformation_i, component_i] = transformations[3]
|
|
1144
1123
|
|
|
@@ -1237,4 +1216,4 @@ def _number_of_integer_digits(values):
|
|
|
1237
1216
|
n_digits = 0
|
|
1238
1217
|
n_digits = max(n_digits, len(str(np.min(values))))
|
|
1239
1218
|
n_digits = max(n_digits, len(str(np.max(values))))
|
|
1240
|
-
return n_digits
|
|
1219
|
+
return n_digits
|
|
Binary file
|