biotite 1.1.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/localapp.py +2 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +3 -3
- biotite/application/muscle/app5.py +3 -3
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +21 -1
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +10 -8
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +2 -3
- biotite/database/uniprot/check.py +2 -2
- biotite/database/uniprot/download.py +2 -5
- biotite/database/uniprot/query.py +3 -4
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +33 -11
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +22 -22
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +2 -2
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +6 -6
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +12 -3
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +37 -39
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +2 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +19 -25
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +12 -5
- biotite/sequence/sequence.py +1 -2
- biotite/structure/__init__.py +2 -0
- biotite/structure/alphabet/i3d.py +1 -2
- biotite/structure/alphabet/pb.py +1 -2
- biotite/structure/alphabet/unkerasify.py +8 -2
- biotite/structure/atoms.py +35 -27
- biotite/structure/basepairs.py +39 -40
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +8 -5
- biotite/structure/box.py +159 -23
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -68
- biotite/structure/chains.py +17 -55
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +31 -32
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +15 -15
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +18 -21
- biotite/structure/info/atoms.py +11 -2
- biotite/structure/info/ccd.py +0 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +0 -3
- biotite/structure/info/misc.py +0 -1
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +1 -2
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +39 -13
- biotite/structure/io/pdb/convert.py +86 -5
- biotite/structure/io/pdb/file.py +90 -24
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +4 -4
- biotite/structure/io/pdbx/bcif.py +22 -7
- biotite/structure/io/pdbx/cif.py +20 -7
- biotite/structure/io/pdbx/component.py +6 -0
- biotite/structure/io/pdbx/compress.py +71 -34
- biotite/structure/io/pdbx/convert.py +429 -77
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +0 -15
- biotite/structure/pseudoknots.py +13 -19
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +20 -48
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +30 -30
- biotite/structure/segments.py +123 -9
- biotite/structure/sequence.py +0 -1
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +75 -253
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +232 -26
- biotite/structure/util.py +3 -3
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/io/pdb/file.py
CHANGED
|
@@ -24,6 +24,7 @@ from biotite.structure.io.pdb.hybrid36 import (
|
|
|
24
24
|
encode_hybrid36,
|
|
25
25
|
max_hybrid36_number,
|
|
26
26
|
)
|
|
27
|
+
from biotite.structure.io.util import number_of_integer_digits
|
|
27
28
|
from biotite.structure.repair import infer_elements
|
|
28
29
|
from biotite.structure.util import matrix_rotate
|
|
29
30
|
|
|
@@ -70,10 +71,10 @@ class PDBFile(TextFile):
|
|
|
70
71
|
records cannot be written.
|
|
71
72
|
Additionally, *REMARK* records can be read
|
|
72
73
|
|
|
73
|
-
See
|
|
74
|
+
See Also
|
|
74
75
|
--------
|
|
75
|
-
CIFFile
|
|
76
|
-
BinaryCIFFile
|
|
76
|
+
CIFFile : Interface to CIF files, a modern replacement for PDB files.
|
|
77
|
+
BinaryCIFFile : Interface to BinaryCIF files, a binary variant of CIF files.
|
|
77
78
|
|
|
78
79
|
Examples
|
|
79
80
|
--------
|
|
@@ -597,7 +598,7 @@ class PDBFile(TextFile):
|
|
|
597
598
|
The array or stack to be saved into this file. If a stack
|
|
598
599
|
is given, each array in the stack is saved as separate
|
|
599
600
|
model.
|
|
600
|
-
hybrid36: bool, optional
|
|
601
|
+
hybrid36 : bool, optional
|
|
601
602
|
Defines wether the file should be written in hybrid-36
|
|
602
603
|
format.
|
|
603
604
|
|
|
@@ -894,7 +895,7 @@ class PDBFile(TextFile):
|
|
|
894
895
|
if assembly_start_i is None:
|
|
895
896
|
if assembly_id is None:
|
|
896
897
|
raise InvalidFileError(
|
|
897
|
-
"File does not contain transformation
|
|
898
|
+
"File does not contain transformation expressions for assemblies"
|
|
898
899
|
)
|
|
899
900
|
else:
|
|
900
901
|
raise KeyError(f"The assembly ID '{assembly_id}' is not found")
|
|
@@ -953,7 +954,7 @@ class PDBFile(TextFile):
|
|
|
953
954
|
|
|
954
955
|
return assembly
|
|
955
956
|
|
|
956
|
-
def
|
|
957
|
+
def get_unit_cell(
|
|
957
958
|
self, model=None, altloc="first", extra_fields=[], include_bonds=False
|
|
958
959
|
):
|
|
959
960
|
"""
|
|
@@ -1020,7 +1021,7 @@ class PDBFile(TextFile):
|
|
|
1020
1021
|
|
|
1021
1022
|
>>> import os.path
|
|
1022
1023
|
>>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
|
|
1023
|
-
>>> atoms_in_unit_cell = file.
|
|
1024
|
+
>>> atoms_in_unit_cell = file.get_unit_cell(model=1)
|
|
1024
1025
|
"""
|
|
1025
1026
|
# Get base structure
|
|
1026
1027
|
structure = self.get_structure(
|
|
@@ -1040,6 +1041,83 @@ class PDBFile(TextFile):
|
|
|
1040
1041
|
rotations, translations = _parse_transformations(transform_lines)
|
|
1041
1042
|
return _apply_transformations(structure, rotations, translations)
|
|
1042
1043
|
|
|
1044
|
+
def get_symmetry_mates(
|
|
1045
|
+
self, model=None, altloc="first", extra_fields=[], include_bonds=False
|
|
1046
|
+
):
|
|
1047
|
+
"""
|
|
1048
|
+
Build a structure model containing all symmetric copies
|
|
1049
|
+
of the structure within a single unit cell, given by the space
|
|
1050
|
+
group.
|
|
1051
|
+
|
|
1052
|
+
This function receives the data from ``REMARK 290`` records in
|
|
1053
|
+
the file.
|
|
1054
|
+
Consequently, this remark must be present in the file, which is
|
|
1055
|
+
usually only true for crystal structures.
|
|
1056
|
+
|
|
1057
|
+
DEPRECATED: Use :meth:`get_unit_cell()` instead.
|
|
1058
|
+
|
|
1059
|
+
Parameters
|
|
1060
|
+
----------
|
|
1061
|
+
model : int, optional
|
|
1062
|
+
If this parameter is given, the function will return an
|
|
1063
|
+
:class:`AtomArray` from the atoms corresponding to the given
|
|
1064
|
+
model number (starting at 1).
|
|
1065
|
+
Negative values are used to index models starting from the
|
|
1066
|
+
last model instead of the first model.
|
|
1067
|
+
If this parameter is omitted, an :class:`AtomArrayStack`
|
|
1068
|
+
containing all models will be returned, even if the
|
|
1069
|
+
structure contains only one model.
|
|
1070
|
+
altloc : {'first', 'occupancy', 'all'}
|
|
1071
|
+
This parameter defines how *altloc* IDs are handled:
|
|
1072
|
+
- ``'first'`` - Use atoms that have the first
|
|
1073
|
+
*altloc* ID appearing in a residue.
|
|
1074
|
+
- ``'occupancy'`` - Use atoms that have the *altloc* ID
|
|
1075
|
+
with the highest occupancy for a residue.
|
|
1076
|
+
- ``'all'`` - Use all atoms.
|
|
1077
|
+
Note that this leads to duplicate atoms.
|
|
1078
|
+
When this option is chosen, the ``altloc_id``
|
|
1079
|
+
annotation array is added to the returned structure.
|
|
1080
|
+
extra_fields : list of str, optional
|
|
1081
|
+
The strings in the list are optional annotation categories
|
|
1082
|
+
that should be stored in the output array or stack.
|
|
1083
|
+
These are valid values:
|
|
1084
|
+
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
|
|
1085
|
+
``'charge'``.
|
|
1086
|
+
include_bonds : bool, optional
|
|
1087
|
+
If set to true, a :class:`BondList` will be created for the
|
|
1088
|
+
resulting :class:`AtomArray` containing the bond information
|
|
1089
|
+
from the file.
|
|
1090
|
+
Bonds, whose order could not be determined from the
|
|
1091
|
+
*Chemical Component Dictionary*
|
|
1092
|
+
(e.g. especially inter-residue bonds),
|
|
1093
|
+
have :attr:`BondType.ANY`, since the PDB format itself does
|
|
1094
|
+
not support bond orders.
|
|
1095
|
+
|
|
1096
|
+
Returns
|
|
1097
|
+
-------
|
|
1098
|
+
symmetry_mates : AtomArray or AtomArrayStack
|
|
1099
|
+
All atoms within a single unit cell.
|
|
1100
|
+
The return type depends on the `model` parameter.
|
|
1101
|
+
|
|
1102
|
+
Notes
|
|
1103
|
+
-----
|
|
1104
|
+
To expand the structure beyond a single unit cell, use
|
|
1105
|
+
:func:`repeat_box()` with the return value as its
|
|
1106
|
+
input.
|
|
1107
|
+
|
|
1108
|
+
Examples
|
|
1109
|
+
--------
|
|
1110
|
+
|
|
1111
|
+
>>> import os.path
|
|
1112
|
+
>>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
|
|
1113
|
+
>>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
|
|
1114
|
+
"""
|
|
1115
|
+
warnings.warn(
|
|
1116
|
+
"'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
|
|
1117
|
+
DeprecationWarning,
|
|
1118
|
+
)
|
|
1119
|
+
return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
|
|
1120
|
+
|
|
1043
1121
|
def _index_models_and_atoms(self):
|
|
1044
1122
|
# Line indices where a new model starts
|
|
1045
1123
|
self._model_start_i = np.array(
|
|
@@ -1106,7 +1184,7 @@ class PDBFile(TextFile):
|
|
|
1106
1184
|
length = model_length
|
|
1107
1185
|
if model_length != length:
|
|
1108
1186
|
raise InvalidFileError(
|
|
1109
|
-
f"Model {model_i+1} has {model_length} atoms, "
|
|
1187
|
+
f"Model {model_i + 1} has {model_length} atoms, "
|
|
1110
1188
|
f"but model 1 has {length} atoms, must be equal"
|
|
1111
1189
|
)
|
|
1112
1190
|
return length
|
|
@@ -1248,21 +1326,21 @@ def _check_pdb_compatibility(array, hybrid36):
|
|
|
1248
1326
|
if any([len(name) > 4 for name in array.atom_name]):
|
|
1249
1327
|
raise BadStructureError("Some atom names exceed 4 characters")
|
|
1250
1328
|
for i, coord_name in enumerate(["x", "y", "z"]):
|
|
1251
|
-
n_coord_digits =
|
|
1329
|
+
n_coord_digits = number_of_integer_digits(array.coord[..., i])
|
|
1252
1330
|
if n_coord_digits > 4:
|
|
1253
1331
|
raise BadStructureError(
|
|
1254
1332
|
f"4 pre-decimal columns for {coord_name}-coordinates are "
|
|
1255
1333
|
f"available, but array would require {n_coord_digits}"
|
|
1256
1334
|
)
|
|
1257
1335
|
if "b_factor" in annot_categories:
|
|
1258
|
-
n_b_factor_digits =
|
|
1336
|
+
n_b_factor_digits = number_of_integer_digits(array.b_factor)
|
|
1259
1337
|
if n_b_factor_digits > 3:
|
|
1260
1338
|
raise BadStructureError(
|
|
1261
1339
|
"3 pre-decimal columns for B-factor are available, "
|
|
1262
1340
|
f"but array would require {n_b_factor_digits}"
|
|
1263
1341
|
)
|
|
1264
1342
|
if "occupancy" in annot_categories:
|
|
1265
|
-
n_occupancy_digits =
|
|
1343
|
+
n_occupancy_digits = number_of_integer_digits(array.occupancy)
|
|
1266
1344
|
if n_occupancy_digits > 3:
|
|
1267
1345
|
raise BadStructureError(
|
|
1268
1346
|
"3 pre-decimal columns for occupancy are available, "
|
|
@@ -1270,21 +1348,9 @@ def _check_pdb_compatibility(array, hybrid36):
|
|
|
1270
1348
|
)
|
|
1271
1349
|
if "charge" in annot_categories:
|
|
1272
1350
|
# The sign can be omitted is it is put into the adjacent column
|
|
1273
|
-
n_charge_digits =
|
|
1351
|
+
n_charge_digits = number_of_integer_digits(np.abs(array.charge))
|
|
1274
1352
|
if n_charge_digits > 1:
|
|
1275
1353
|
raise BadStructureError(
|
|
1276
1354
|
"1 column for charge is available, "
|
|
1277
1355
|
f"but array would require {n_charge_digits}"
|
|
1278
1356
|
)
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
def _number_of_integer_digits(values):
|
|
1282
|
-
"""
|
|
1283
|
-
Get the maximum number of characters needed to represent the
|
|
1284
|
-
pre-decimal positions of the given numeric values.
|
|
1285
|
-
"""
|
|
1286
|
-
values = values.astype(int, copy=False)
|
|
1287
|
-
n_digits = 0
|
|
1288
|
-
n_digits = max(n_digits, len(str(np.min(values))))
|
|
1289
|
-
n_digits = max(n_digits, len(str(np.max(values))))
|
|
1290
|
-
return n_digits
|
|
Binary file
|
|
@@ -525,9 +525,9 @@ class PDBQTFile(TextFile):
|
|
|
525
525
|
f"{atoms.chain_id[i]:1}"
|
|
526
526
|
f"{atoms.res_id[i]:>4d}"
|
|
527
527
|
f"{atoms.ins_code[i]:1} "
|
|
528
|
-
f"{atoms.coord[i,0]:>8.3f}"
|
|
529
|
-
f"{atoms.coord[i,1]:>8.3f}"
|
|
530
|
-
f"{atoms.coord[i,2]:>8.3f}"
|
|
528
|
+
f"{atoms.coord[i, 0]:>8.3f}"
|
|
529
|
+
f"{atoms.coord[i, 1]:>8.3f}"
|
|
530
|
+
f"{atoms.coord[i, 2]:>8.3f}"
|
|
531
531
|
f"{occupancy[i]:>6.2f}"
|
|
532
532
|
f"{b_factor[i]:>6.2f} "
|
|
533
533
|
f"{charges[i]:>6.3f} "
|
|
@@ -604,7 +604,7 @@ class PDBQTFile(TextFile):
|
|
|
604
604
|
length = model_length
|
|
605
605
|
if model_length != length:
|
|
606
606
|
raise InvalidFileError(
|
|
607
|
-
f"Model {model_i+1} has {model_length} atoms, "
|
|
607
|
+
f"Model {model_i + 1} has {model_length} atoms, "
|
|
608
608
|
f"but model 1 has {length} atoms, must be equal"
|
|
609
609
|
)
|
|
610
610
|
return length
|
|
@@ -195,7 +195,7 @@ class BinaryCIFColumn(_Component):
|
|
|
195
195
|
mask = BinaryCIFData(mask)
|
|
196
196
|
if len(data) != len(mask):
|
|
197
197
|
raise IndexError(
|
|
198
|
-
f"Data has length {len(data)},
|
|
198
|
+
f"Data has length {len(data)}, but mask has length {len(mask)}"
|
|
199
199
|
)
|
|
200
200
|
self._data = data
|
|
201
201
|
self._mask = mask
|
|
@@ -256,6 +256,11 @@ class BinaryCIFColumn(_Component):
|
|
|
256
256
|
``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
|
|
257
257
|
By default, masked elements are converted to ``'.'`` or
|
|
258
258
|
``'?'`` depending on the :class:`MaskValue`.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
array : ndarray
|
|
263
|
+
The column data as array.
|
|
259
264
|
"""
|
|
260
265
|
if dtype is None:
|
|
261
266
|
dtype = self._data.array.dtype
|
|
@@ -341,12 +346,15 @@ class BinaryCIFCategory(_HierarchicalContainer):
|
|
|
341
346
|
into a :class:`BinaryCIFColumn`).
|
|
342
347
|
By default, an empty category is created.
|
|
343
348
|
Each column must have the same length.
|
|
349
|
+
row_count : int, optional
|
|
350
|
+
The number of rows in the category.
|
|
344
351
|
|
|
345
352
|
Attributes
|
|
346
353
|
----------
|
|
347
354
|
row_count : int
|
|
348
355
|
The number of rows in the category, i.e. the length of each
|
|
349
356
|
column.
|
|
357
|
+
By default, the row count is determined when the first column is added.
|
|
350
358
|
|
|
351
359
|
Examples
|
|
352
360
|
--------
|
|
@@ -526,6 +534,19 @@ class BinaryCIFFile(File, _HierarchicalContainer):
|
|
|
526
534
|
object, use the high-level :func:`get_structure()` or
|
|
527
535
|
:func:`set_structure()` function respectively.
|
|
528
536
|
|
|
537
|
+
Parameters
|
|
538
|
+
----------
|
|
539
|
+
blocks : dict (str -> BinaryCIFBlock), optional
|
|
540
|
+
The initial blocks of the file.
|
|
541
|
+
Maps the block names to the corresponding :class:`BinaryCIFBlock` objects.
|
|
542
|
+
By default no initial blocks are added.
|
|
543
|
+
|
|
544
|
+
Attributes
|
|
545
|
+
----------
|
|
546
|
+
block : BinaryCIFBlock
|
|
547
|
+
The sole block of the file.
|
|
548
|
+
If the file contains multiple blocks, an exception is raised.
|
|
549
|
+
|
|
529
550
|
Notes
|
|
530
551
|
-----
|
|
531
552
|
The content of *BinaryCIF* files are lazily deserialized:
|
|
@@ -534,12 +555,6 @@ class BinaryCIFFile(File, _HierarchicalContainer):
|
|
|
534
555
|
The decoded :class:`BinaryCIFBlock`/:class:`BinaryCIFCategory`
|
|
535
556
|
objects are cached for subsequent accesses.
|
|
536
557
|
|
|
537
|
-
Attributes
|
|
538
|
-
----------
|
|
539
|
-
block : BinaryCIFBlock
|
|
540
|
-
The sole block of the file.
|
|
541
|
-
If the file contains multiple blocks, an exception is raised.
|
|
542
|
-
|
|
543
558
|
Examples
|
|
544
559
|
--------
|
|
545
560
|
Read a *BinaryCIF* file and access its content:
|
biotite/structure/io/pdbx/cif.py
CHANGED
|
@@ -149,7 +149,7 @@ class CIFColumn:
|
|
|
149
149
|
mask = CIFData(mask, np.uint8)
|
|
150
150
|
if len(mask) != len(data):
|
|
151
151
|
raise IndexError(
|
|
152
|
-
f"Data has length {len(data)},
|
|
152
|
+
f"Data has length {len(data)}, but mask has length {len(mask)}"
|
|
153
153
|
)
|
|
154
154
|
self._data = data
|
|
155
155
|
self._mask = mask
|
|
@@ -215,6 +215,11 @@ class CIFColumn:
|
|
|
215
215
|
``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
|
|
216
216
|
By default, masked elements are converted to ``'.'`` or
|
|
217
217
|
``'?'`` depending on the :class:`MaskValue`.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
array : ndarray
|
|
222
|
+
The column data as array.
|
|
218
223
|
"""
|
|
219
224
|
if self._mask is None:
|
|
220
225
|
return self._data.array.astype(dtype, copy=False)
|
|
@@ -721,6 +726,19 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
721
726
|
use the high-level :func:`get_structure()` or
|
|
722
727
|
:func:`set_structure()` function respectively.
|
|
723
728
|
|
|
729
|
+
Parameters
|
|
730
|
+
----------
|
|
731
|
+
blocks : dict (str -> CIFBlock), optional
|
|
732
|
+
The initial blocks of the file.
|
|
733
|
+
Maps the block names to the corresponding :class:`CIFBlock` objects.
|
|
734
|
+
By default no initial blocks are added.
|
|
735
|
+
|
|
736
|
+
Attributes
|
|
737
|
+
----------
|
|
738
|
+
block : CIFBlock
|
|
739
|
+
The sole block of the file.
|
|
740
|
+
If the file contains multiple blocks, an exception is raised.
|
|
741
|
+
|
|
724
742
|
Notes
|
|
725
743
|
-----
|
|
726
744
|
The content of CIF files are lazily deserialized:
|
|
@@ -731,12 +749,6 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
731
749
|
The deserialized :class:`CIFBlock`/:class:`CIFCategory` objects
|
|
732
750
|
are cached for subsequent accesses.
|
|
733
751
|
|
|
734
|
-
Attributes
|
|
735
|
-
----------
|
|
736
|
-
block : CIFBlock
|
|
737
|
-
The sole block of the file.
|
|
738
|
-
If the file contains multiple blocks, an exception is raised.
|
|
739
|
-
|
|
740
752
|
Examples
|
|
741
753
|
--------
|
|
742
754
|
Read a CIF file and access its content:
|
|
@@ -884,6 +896,7 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
884
896
|
block = CIFBlock.deserialize(block)
|
|
885
897
|
except Exception:
|
|
886
898
|
raise DeserializationError(f"Failed to deserialize block '{key}'")
|
|
899
|
+
block.name = key
|
|
887
900
|
# Update with deserialized object
|
|
888
901
|
self._blocks[key] = block
|
|
889
902
|
return block
|
|
@@ -120,6 +120,12 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
|
|
|
120
120
|
A component is only deserialized from the serialized data, if it
|
|
121
121
|
is accessed.
|
|
122
122
|
The deserialized component is then cached in the container.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
elements : dict, optional
|
|
127
|
+
The initial elements of the container.
|
|
128
|
+
By default no initial elements are added.
|
|
123
129
|
"""
|
|
124
130
|
|
|
125
131
|
def __init__(self, elements=None):
|
|
@@ -3,6 +3,7 @@ __name__ = "biotite.structure.io.pdbx"
|
|
|
3
3
|
__author__ = "Patrick Kunzmann"
|
|
4
4
|
|
|
5
5
|
import itertools
|
|
6
|
+
import warnings
|
|
6
7
|
import msgpack
|
|
7
8
|
import numpy as np
|
|
8
9
|
import biotite.structure.io.pdbx.bcif as bcif
|
|
@@ -17,7 +18,7 @@ from biotite.structure.io.pdbx.encoding import (
|
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def compress(data, float_tolerance=1e-6):
|
|
21
|
+
def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
|
|
21
22
|
"""
|
|
22
23
|
Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
|
|
23
24
|
different data encodings for each data array and selecting the one, which results in
|
|
@@ -27,6 +28,14 @@ def compress(data, float_tolerance=1e-6):
|
|
|
27
28
|
----------
|
|
28
29
|
data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
|
|
29
30
|
The data to compress.
|
|
31
|
+
float_tolerance : float, optional
|
|
32
|
+
The relative error that is accepted when compressing floating point numbers.
|
|
33
|
+
DEPRECATED: Use `rtol` instead.
|
|
34
|
+
rtol, atol : float, optional
|
|
35
|
+
The compression factor of floating point numbers is chosen such that
|
|
36
|
+
either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
|
|
37
|
+
for each value, i.e. the difference between the compressed and uncompressed
|
|
38
|
+
value is smaller than the tolerance.
|
|
30
39
|
|
|
31
40
|
Returns
|
|
32
41
|
-------
|
|
@@ -35,8 +44,6 @@ def compress(data, float_tolerance=1e-6):
|
|
|
35
44
|
If no improved compression is found for a :class:`BinaryCIFData` array,
|
|
36
45
|
the input data is kept.
|
|
37
46
|
Hence, the return value is no deep copy of the input data.
|
|
38
|
-
float_tolerance : float, optional
|
|
39
|
-
The relative error that is accepted when compressing floating point numbers.
|
|
40
47
|
|
|
41
48
|
Examples
|
|
42
49
|
--------
|
|
@@ -58,55 +65,70 @@ def compress(data, float_tolerance=1e-6):
|
|
|
58
65
|
>>> print(f"{len(compressed_file.read()) // 1000} KB")
|
|
59
66
|
111 KB
|
|
60
67
|
"""
|
|
68
|
+
if float_tolerance is not None:
|
|
69
|
+
warnings.warn(
|
|
70
|
+
"The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
)
|
|
73
|
+
|
|
61
74
|
match type(data):
|
|
62
75
|
case bcif.BinaryCIFFile:
|
|
63
|
-
return _compress_file(data,
|
|
76
|
+
return _compress_file(data, rtol, atol)
|
|
64
77
|
case bcif.BinaryCIFBlock:
|
|
65
|
-
return _compress_block(data,
|
|
78
|
+
return _compress_block(data, rtol, atol)
|
|
66
79
|
case bcif.BinaryCIFCategory:
|
|
67
|
-
return _compress_category(data,
|
|
80
|
+
return _compress_category(data, rtol, atol)
|
|
68
81
|
case bcif.BinaryCIFColumn:
|
|
69
|
-
return _compress_column(data,
|
|
82
|
+
return _compress_column(data, rtol, atol)
|
|
70
83
|
case bcif.BinaryCIFData:
|
|
71
|
-
return _compress_data(data,
|
|
84
|
+
return _compress_data(data, rtol, atol)
|
|
72
85
|
case _:
|
|
73
86
|
raise TypeError(f"Unsupported type {type(data).__name__}")
|
|
74
87
|
|
|
75
88
|
|
|
76
|
-
def _compress_file(bcif_file,
|
|
89
|
+
def _compress_file(bcif_file, rtol, atol):
|
|
77
90
|
compressed_file = bcif.BinaryCIFFile()
|
|
78
91
|
for block_name, bcif_block in bcif_file.items():
|
|
79
|
-
|
|
92
|
+
try:
|
|
93
|
+
compressed_block = _compress_block(bcif_block, rtol, atol)
|
|
94
|
+
except Exception:
|
|
95
|
+
raise ValueError(f"Failed to compress block '{block_name}'")
|
|
80
96
|
compressed_file[block_name] = compressed_block
|
|
81
97
|
return compressed_file
|
|
82
98
|
|
|
83
99
|
|
|
84
|
-
def _compress_block(bcif_block,
|
|
100
|
+
def _compress_block(bcif_block, rtol, atol):
|
|
85
101
|
compressed_block = bcif.BinaryCIFBlock()
|
|
86
102
|
for category_name, bcif_category in bcif_block.items():
|
|
87
|
-
|
|
103
|
+
try:
|
|
104
|
+
compressed_category = _compress_category(bcif_category, rtol, atol)
|
|
105
|
+
except Exception:
|
|
106
|
+
raise ValueError(f"Failed to compress category '{category_name}'")
|
|
88
107
|
compressed_block[category_name] = compressed_category
|
|
89
108
|
return compressed_block
|
|
90
109
|
|
|
91
110
|
|
|
92
|
-
def _compress_category(bcif_category,
|
|
111
|
+
def _compress_category(bcif_category, rtol, atol):
|
|
93
112
|
compressed_category = bcif.BinaryCIFCategory()
|
|
94
113
|
for column_name, bcif_column in bcif_category.items():
|
|
95
|
-
|
|
114
|
+
try:
|
|
115
|
+
compressed_column = _compress_column(bcif_column, rtol, atol)
|
|
116
|
+
except Exception:
|
|
117
|
+
raise ValueError(f"Failed to compress column '{column_name}'")
|
|
96
118
|
compressed_category[column_name] = compressed_column
|
|
97
119
|
return compressed_category
|
|
98
120
|
|
|
99
121
|
|
|
100
|
-
def _compress_column(bcif_column,
|
|
101
|
-
data = _compress_data(bcif_column.data,
|
|
122
|
+
def _compress_column(bcif_column, rtol, atol):
|
|
123
|
+
data = _compress_data(bcif_column.data, rtol, atol)
|
|
102
124
|
if bcif_column.mask is not None:
|
|
103
|
-
mask = _compress_data(bcif_column.mask,
|
|
125
|
+
mask = _compress_data(bcif_column.mask, rtol, atol)
|
|
104
126
|
else:
|
|
105
127
|
mask = None
|
|
106
128
|
return bcif.BinaryCIFColumn(data, mask)
|
|
107
129
|
|
|
108
130
|
|
|
109
|
-
def _compress_data(bcif_data,
|
|
131
|
+
def _compress_data(bcif_data, rtol, atol):
|
|
110
132
|
array = bcif_data.array
|
|
111
133
|
if len(array) == 1:
|
|
112
134
|
# No need to compress a single value -> Use default uncompressed encoding
|
|
@@ -123,16 +145,28 @@ def _compress_data(bcif_data, float_tolerance):
|
|
|
123
145
|
return bcif.BinaryCIFData(array, [encoding])
|
|
124
146
|
|
|
125
147
|
elif np.issubdtype(array.dtype, np.floating):
|
|
148
|
+
if not np.isfinite(array).all():
|
|
149
|
+
# NaN/inf values cannot be represented by integers
|
|
150
|
+
# -> do not use integer encoding
|
|
151
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
126
152
|
to_integer_encoding = FixedPointEncoding(
|
|
127
|
-
10 ** _get_decimal_places(array,
|
|
153
|
+
10 ** _get_decimal_places(array, rtol, atol)
|
|
128
154
|
)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# The float array is smaller -> encode it directly as bytes
|
|
155
|
+
try:
|
|
156
|
+
integer_array = to_integer_encoding.encode(array)
|
|
157
|
+
except ValueError:
|
|
158
|
+
# With the given tolerances integer underflow/overflow would occur
|
|
159
|
+
# -> do not use integer encoding
|
|
135
160
|
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
161
|
+
else:
|
|
162
|
+
best_encoding, size_compressed = _find_best_integer_compression(
|
|
163
|
+
integer_array
|
|
164
|
+
)
|
|
165
|
+
if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
|
|
166
|
+
return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
|
|
167
|
+
else:
|
|
168
|
+
# The float array is smaller -> encode it directly as bytes
|
|
169
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
136
170
|
|
|
137
171
|
elif np.issubdtype(array.dtype, np.integer):
|
|
138
172
|
array = _to_smallest_integer_type(array)
|
|
@@ -273,7 +307,7 @@ def _data_size_in_file(data):
|
|
|
273
307
|
return len(bytes_in_file)
|
|
274
308
|
|
|
275
309
|
|
|
276
|
-
def _get_decimal_places(array,
|
|
310
|
+
def _get_decimal_places(array, rtol, atol):
|
|
277
311
|
"""
|
|
278
312
|
Get the number of decimal places in a floating point array.
|
|
279
313
|
|
|
@@ -281,21 +315,24 @@ def _get_decimal_places(array, tol):
|
|
|
281
315
|
----------
|
|
282
316
|
array : numpy.ndarray
|
|
283
317
|
The array to analyze.
|
|
284
|
-
|
|
285
|
-
The relative tolerance allowed when the values are cut off after
|
|
286
|
-
number of decimal places.
|
|
318
|
+
rtol, atol : float, optional
|
|
319
|
+
The relative and absolute tolerance allowed when the values are cut off after
|
|
320
|
+
the returned number of decimal places.
|
|
287
321
|
|
|
288
322
|
Returns
|
|
289
323
|
-------
|
|
290
324
|
decimals : int
|
|
291
325
|
The number of decimal places.
|
|
292
326
|
"""
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
327
|
+
if rtol <= 0 and atol <= 0:
|
|
328
|
+
raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
|
|
329
|
+
# 0 would give NaN when rounding on decimals
|
|
330
|
+
array = array[array != 0]
|
|
331
|
+
for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
|
|
297
332
|
error = np.abs(np.round(array, decimals) - array)
|
|
298
|
-
if
|
|
333
|
+
if decimals == 100:
|
|
334
|
+
raise
|
|
335
|
+
if np.all((error < rtol * np.abs(array)) | (error < atol)):
|
|
299
336
|
return decimals
|
|
300
337
|
|
|
301
338
|
|