biotite 0.40.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +1 -1
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +119 -119
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +119 -119
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +119 -119
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +119 -119
- biotite/sequence/align/localgapped.c +119 -119
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +119 -119
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +119 -119
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +119 -119
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +119 -119
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +119 -119
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +119 -119
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +119 -119
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +119 -119
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +119 -119
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +119 -119
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/bonds.c +1124 -915
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +119 -119
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/charges.c +119 -119
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/info/atoms.py +6 -1
- biotite/structure/info/bonds.py +1 -1
- biotite/structure/info/ccd/amino_acids.txt +17 -0
- biotite/structure/info/ccd/carbohydrates.txt +2 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +1 -0
- biotite/structure/info/misc.py +69 -5
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +157 -165
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/convertarray.c +119 -119
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +119 -119
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +119 -119
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +119 -119
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/pdb/file.py +84 -31
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/__init__.py +0 -1
- biotite/structure/io/pdbx/bcif.py +2 -3
- biotite/structure/io/pdbx/cif.py +9 -5
- biotite/structure/io/pdbx/component.py +4 -1
- biotite/structure/io/pdbx/convert.py +203 -79
- biotite/structure/io/pdbx/encoding.c +119 -119
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +119 -119
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +472 -13
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
- biotite/structure/io/pdbx/error.py +0 -14
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
biotite/structure/io/pdb/file.py
CHANGED
|
@@ -12,7 +12,7 @@ from ...atoms import AtomArray, AtomArrayStack, repeat
|
|
|
12
12
|
from ...bonds import BondList, connect_via_residue_names
|
|
13
13
|
from ...box import vectors_from_unitcell, unitcell_from_vectors
|
|
14
14
|
from ....file import TextFile, InvalidFileError
|
|
15
|
-
from
|
|
15
|
+
from ...repair import infer_elements
|
|
16
16
|
from ...error import BadStructureError
|
|
17
17
|
from ...filter import (
|
|
18
18
|
filter_first_altloc,
|
|
@@ -23,6 +23,9 @@ from ...util import matrix_rotate
|
|
|
23
23
|
from .hybrid36 import encode_hybrid36, decode_hybrid36, max_hybrid36_number
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
_PDB_MAX_ATOMS = 99999
|
|
27
|
+
_PDB_MAX_RESIDUES = 9999
|
|
28
|
+
|
|
26
29
|
# slice objects for readability
|
|
27
30
|
# ATOM/HETATM
|
|
28
31
|
_record = slice(0, 6)
|
|
@@ -460,15 +463,14 @@ class PDBFile(TextFile):
|
|
|
460
463
|
|
|
461
464
|
# Replace empty strings for elements with guessed types
|
|
462
465
|
# This is used e.g. for PDB files created by Gromacs
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
for idx in range(len(array.element)):
|
|
466
|
-
if not array.element[idx]:
|
|
467
|
-
atom_name = array.atom_name[idx]
|
|
468
|
-
array.element[idx] = guess_element(atom_name)
|
|
469
|
-
rep_num += 1
|
|
466
|
+
empty_element_mask = array.element == ""
|
|
467
|
+
if empty_element_mask.any():
|
|
470
468
|
warnings.warn(
|
|
471
|
-
"{} elements
|
|
469
|
+
f"{np.count_nonzero(empty_element_mask)} elements "
|
|
470
|
+
"were guessed from atom name"
|
|
471
|
+
)
|
|
472
|
+
array.element[empty_element_mask] = infer_elements(
|
|
473
|
+
array.atom_name[empty_element_mask]
|
|
472
474
|
)
|
|
473
475
|
|
|
474
476
|
# Fill in coordinates
|
|
@@ -574,6 +576,8 @@ class PDBFile(TextFile):
|
|
|
574
576
|
records are also written for all non-water hetero residues
|
|
575
577
|
and all inter-residue connections.
|
|
576
578
|
"""
|
|
579
|
+
_check_pdb_compatibility(array, hybrid36)
|
|
580
|
+
|
|
577
581
|
natoms = array.array_length()
|
|
578
582
|
annot_categories = array.get_annotation_categories()
|
|
579
583
|
record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
|
|
@@ -599,25 +603,6 @@ class PDBFile(TextFile):
|
|
|
599
603
|
else:
|
|
600
604
|
charge = np.char.array(np.full(natoms, " ", dtype="U2"))
|
|
601
605
|
|
|
602
|
-
# Do checks on atom array (stack)
|
|
603
|
-
if hybrid36:
|
|
604
|
-
max_atoms = max_hybrid36_number(5)
|
|
605
|
-
max_residues = max_hybrid36_number(4)
|
|
606
|
-
else:
|
|
607
|
-
max_atoms, max_residues = 99999, 9999
|
|
608
|
-
if array.array_length() > max_atoms:
|
|
609
|
-
warnings.warn(f"More then {max_atoms:,} atoms per model")
|
|
610
|
-
if (array.res_id > max_residues).any():
|
|
611
|
-
warnings.warn(f"Residue IDs exceed {max_residues:,}")
|
|
612
|
-
if np.isnan(array.coord).any():
|
|
613
|
-
raise BadStructureError("Coordinates contain 'NaN' values")
|
|
614
|
-
if any([len(name) > 1 for name in array.chain_id]):
|
|
615
|
-
raise BadStructureError("Some chain IDs exceed 1 character")
|
|
616
|
-
if any([len(name) > 3 for name in array.res_name]):
|
|
617
|
-
raise BadStructureError("Some residue names exceed 3 characters")
|
|
618
|
-
if any([len(name) > 4 for name in array.atom_name]):
|
|
619
|
-
raise BadStructureError("Some atom names exceed 4 characters")
|
|
620
|
-
|
|
621
606
|
if hybrid36:
|
|
622
607
|
pdb_atom_id = np.char.array(
|
|
623
608
|
[encode_hybrid36(i, 5) for i in atom_id]
|
|
@@ -630,14 +615,14 @@ class PDBFile(TextFile):
|
|
|
630
615
|
# but negative IDs are also possible
|
|
631
616
|
pdb_atom_id = np.char.array(np.where(
|
|
632
617
|
atom_id > 0,
|
|
633
|
-
((atom_id - 1) %
|
|
618
|
+
((atom_id - 1) % _PDB_MAX_ATOMS) + 1,
|
|
634
619
|
atom_id
|
|
635
620
|
).astype(str))
|
|
636
621
|
# Residue IDs are supported up to 9999,
|
|
637
622
|
# but negative IDs are also possible
|
|
638
623
|
pdb_res_id = np.char.array(np.where(
|
|
639
624
|
array.res_id > 0,
|
|
640
|
-
((array.res_id - 1) %
|
|
625
|
+
((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
|
|
641
626
|
array.res_id
|
|
642
627
|
).astype(str))
|
|
643
628
|
|
|
@@ -1184,4 +1169,72 @@ def _apply_transformations(structure, rotations, translations):
|
|
|
1184
1169
|
coord += translation
|
|
1185
1170
|
assembly_coord[i] = coord
|
|
1186
1171
|
|
|
1187
|
-
return repeat(structure, assembly_coord)
|
|
1172
|
+
return repeat(structure, assembly_coord)
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
def _check_pdb_compatibility(array, hybrid36):
|
|
1176
|
+
annot_categories = array.get_annotation_categories()
|
|
1177
|
+
|
|
1178
|
+
if hybrid36:
|
|
1179
|
+
max_atoms = max_hybrid36_number(5)
|
|
1180
|
+
max_residues = max_hybrid36_number(4)
|
|
1181
|
+
else:
|
|
1182
|
+
max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
|
|
1183
|
+
if "atom_id" in annot_categories:
|
|
1184
|
+
max_atom_id = np.max(array.atom_id)
|
|
1185
|
+
else:
|
|
1186
|
+
max_atom_id = array.array_length()
|
|
1187
|
+
|
|
1188
|
+
if max_atom_id > max_atoms:
|
|
1189
|
+
warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
|
|
1190
|
+
if (array.res_id > max_residues).any():
|
|
1191
|
+
warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
|
|
1192
|
+
if np.isnan(array.coord).any():
|
|
1193
|
+
raise BadStructureError("Coordinates contain 'NaN' values")
|
|
1194
|
+
if any([len(name) > 1 for name in array.chain_id]):
|
|
1195
|
+
raise BadStructureError("Some chain IDs exceed 1 character")
|
|
1196
|
+
if any([len(name) > 3 for name in array.res_name]):
|
|
1197
|
+
raise BadStructureError("Some residue names exceed 3 characters")
|
|
1198
|
+
if any([len(name) > 4 for name in array.atom_name]):
|
|
1199
|
+
raise BadStructureError("Some atom names exceed 4 characters")
|
|
1200
|
+
for i, coord_name in enumerate(["x", "y", "z"]):
|
|
1201
|
+
n_coord_digits = _number_of_integer_digits(array.coord[..., i])
|
|
1202
|
+
if n_coord_digits > 4:
|
|
1203
|
+
raise BadStructureError(
|
|
1204
|
+
f"4 pre-decimal columns for {coord_name}-coordinates are "
|
|
1205
|
+
f"available, but array would require {n_coord_digits}"
|
|
1206
|
+
)
|
|
1207
|
+
if "b_factor" in annot_categories:
|
|
1208
|
+
n_b_factor_digits = _number_of_integer_digits(array.b_factor)
|
|
1209
|
+
if n_b_factor_digits > 3:
|
|
1210
|
+
raise BadStructureError(
|
|
1211
|
+
"3 pre-decimal columns for B-factor are available, "
|
|
1212
|
+
f"but array would require {n_b_factor_digits}"
|
|
1213
|
+
)
|
|
1214
|
+
if "occupancy" in annot_categories:
|
|
1215
|
+
n_occupancy_digits = _number_of_integer_digits(array.occupancy)
|
|
1216
|
+
if n_occupancy_digits > 3:
|
|
1217
|
+
raise BadStructureError(
|
|
1218
|
+
"3 pre-decimal columns for occupancy are available, "
|
|
1219
|
+
f"but array would require {n_occupancy_digits}"
|
|
1220
|
+
)
|
|
1221
|
+
if "charge" in annot_categories:
|
|
1222
|
+
# The sign can be omitted is it is put into the adjacent column
|
|
1223
|
+
n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
|
|
1224
|
+
if n_charge_digits > 1:
|
|
1225
|
+
raise BadStructureError(
|
|
1226
|
+
"1 column for charge is available, "
|
|
1227
|
+
f"but array would require {n_charge_digits}"
|
|
1228
|
+
)
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def _number_of_integer_digits(values):
|
|
1232
|
+
"""
|
|
1233
|
+
Get the maximum number of characters needed to represent the
|
|
1234
|
+
pre-decimal positions of the given numeric values.
|
|
1235
|
+
"""
|
|
1236
|
+
values = values.astype(int, copy=False)
|
|
1237
|
+
n_digits = 0
|
|
1238
|
+
n_digits = max(n_digits, len(str(np.min(values))))
|
|
1239
|
+
n_digits = max(n_digits, len(str(np.max(values))))
|
|
1240
|
+
return n_digits
|
|
Binary file
|
|
@@ -12,9 +12,8 @@ import numpy as np
|
|
|
12
12
|
import msgpack
|
|
13
13
|
from .component import _Component, _HierarchicalContainer, MaskValue
|
|
14
14
|
from .encoding import decode_stepwise, encode_stepwise, deserialize_encoding, \
|
|
15
|
-
create_uncompressed_encoding
|
|
16
|
-
from
|
|
17
|
-
from ....file import File, is_binary, is_open_compatible
|
|
15
|
+
create_uncompressed_encoding
|
|
16
|
+
from ....file import File, is_binary, is_open_compatible, SerializationError
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class BinaryCIFData(_Component):
|
biotite/structure/io/pdbx/cif.py
CHANGED
|
@@ -11,8 +11,8 @@ import shlex
|
|
|
11
11
|
from collections.abc import MutableMapping, Sequence
|
|
12
12
|
import numpy as np
|
|
13
13
|
from .component import _Component, MaskValue
|
|
14
|
-
from
|
|
15
|
-
|
|
14
|
+
from ....file import File, is_open_compatible, is_text, DeserializationError, \
|
|
15
|
+
SerializationError
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
UNICODE_CHAR_SIZE = 4
|
|
@@ -402,7 +402,9 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
402
402
|
f"while the first column has row_count {self._row_count}"
|
|
403
403
|
)
|
|
404
404
|
|
|
405
|
-
if self._row_count ==
|
|
405
|
+
if self._row_count == 0:
|
|
406
|
+
raise ValueError("At least one row is required")
|
|
407
|
+
elif self._row_count == 1:
|
|
406
408
|
lines = self._serialize_single()
|
|
407
409
|
else:
|
|
408
410
|
lines = self._serialize_looped()
|
|
@@ -766,7 +768,7 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
766
768
|
|
|
767
769
|
@property
|
|
768
770
|
def lines(self):
|
|
769
|
-
return
|
|
771
|
+
return self.serialize().splitlines()
|
|
770
772
|
|
|
771
773
|
@property
|
|
772
774
|
def block(self):
|
|
@@ -978,7 +980,7 @@ def _to_single(lines, is_looped):
|
|
|
978
980
|
processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
|
|
979
981
|
in_i = j + 1
|
|
980
982
|
|
|
981
|
-
elif not is_looped and lines[in_i][0]
|
|
983
|
+
elif not is_looped and lines[in_i][0] != "_":
|
|
982
984
|
# Singleline value in the line after the corresponding key
|
|
983
985
|
processed_lines[out_i - 1] += " " + lines[in_i]
|
|
984
986
|
in_i += 1
|
|
@@ -1025,4 +1027,6 @@ def _multiline(value):
|
|
|
1025
1027
|
def _arrayfy(data):
|
|
1026
1028
|
if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
|
|
1027
1029
|
data = [data]
|
|
1030
|
+
elif len(data) == 0:
|
|
1031
|
+
raise ValueError("Array must contain at least one element")
|
|
1028
1032
|
return np.asarray(data)
|
|
@@ -14,7 +14,7 @@ __all__ = ["MaskValue"]
|
|
|
14
14
|
from enum import IntEnum
|
|
15
15
|
from abc import ABCMeta, abstractmethod
|
|
16
16
|
from collections.abc import MutableMapping
|
|
17
|
-
from
|
|
17
|
+
from ....file import SerializationError, DeserializationError
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class MaskValue(IntEnum):
|
|
@@ -105,6 +105,9 @@ class _Component(metaclass=ABCMeta):
|
|
|
105
105
|
"""
|
|
106
106
|
raise NotImplementedError()
|
|
107
107
|
|
|
108
|
+
def __str__(self):
|
|
109
|
+
return str(self.serialize())
|
|
110
|
+
|
|
108
111
|
|
|
109
112
|
class _HierarchicalContainer(_Component, MutableMapping,
|
|
110
113
|
metaclass=ABCMeta):
|