biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module contains internally abstract classes for representing parts
|
|
7
|
+
of CIF/BinaryCIF files, such as categories and columns.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure.io.pdbx"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = ["MaskValue"]
|
|
13
|
+
|
|
14
|
+
from abc import ABCMeta, abstractmethod
|
|
15
|
+
from collections.abc import MutableMapping
|
|
16
|
+
from enum import IntEnum
|
|
17
|
+
from biotite.file import DeserializationError, SerializationError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MaskValue(IntEnum):
|
|
21
|
+
"""
|
|
22
|
+
This enum type represents the possible values of a mask array.
|
|
23
|
+
|
|
24
|
+
- `PRESENT` : A value is present.
|
|
25
|
+
- `INAPPLICABLE` : For this row no value is applicable or
|
|
26
|
+
inappropriate (``.`` in *CIF*).
|
|
27
|
+
In some cases it may also refer to a default value for the
|
|
28
|
+
respective column.
|
|
29
|
+
- `MISSING` : For this row the value is missing or unknown
|
|
30
|
+
(``?`` in *CIF*).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
PRESENT = 0
|
|
34
|
+
INAPPLICABLE = 1
|
|
35
|
+
MISSING = 2
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _Component(metaclass=ABCMeta):
|
|
39
|
+
"""
|
|
40
|
+
Base class for all components in a CIF/BinaryCIF file.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def subcomponent_class():
|
|
45
|
+
"""
|
|
46
|
+
Get the class of the components that are stored in this component.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
subcomponent_class : type
|
|
51
|
+
The class of the subcomponent.
|
|
52
|
+
If this component already represents the lowest level, i.e.
|
|
53
|
+
it does not contain subcomponents, ``None`` is
|
|
54
|
+
returned.
|
|
55
|
+
"""
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def supercomponent_class():
|
|
60
|
+
"""
|
|
61
|
+
Get the class of the component that contains this component.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
supercomponent_class : type
|
|
66
|
+
The class of the supercomponent.
|
|
67
|
+
If this component present already the highest level, i.e.
|
|
68
|
+
it is not contained in another component, ``None`` is
|
|
69
|
+
returned.
|
|
70
|
+
"""
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def deserialize(content):
|
|
76
|
+
"""
|
|
77
|
+
Create this component by deserializing the given content.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
content : str or dict
|
|
82
|
+
The content to be deserialized.
|
|
83
|
+
The type of this parameter depends on the file format.
|
|
84
|
+
In case of *CIF* files, this is the text of the lines
|
|
85
|
+
that represent this component.
|
|
86
|
+
In case of *BinaryCIF* files, this is a dictionary
|
|
87
|
+
parsed from the *MessagePack* data.
|
|
88
|
+
"""
|
|
89
|
+
raise NotImplementedError()
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def serialize(self):
|
|
93
|
+
"""
|
|
94
|
+
Convert this component into a Python object that can be written
|
|
95
|
+
to a file.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
content : str or dict
|
|
100
|
+
The content to be serialized.
|
|
101
|
+
The type of this return value depends on the file format.
|
|
102
|
+
In case of *CIF* files, this is the text of the lines
|
|
103
|
+
that represent this component.
|
|
104
|
+
In case of *BinaryCIF* files, this is a dictionary
|
|
105
|
+
that can be encoded into *MessagePack*.
|
|
106
|
+
"""
|
|
107
|
+
raise NotImplementedError()
|
|
108
|
+
|
|
109
|
+
def __str__(self):
|
|
110
|
+
return str(self.serialize())
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
|
|
114
|
+
"""
|
|
115
|
+
A container for hierarchical data in BinaryCIF files.
|
|
116
|
+
For example, the file contains multiple blocks, each block contains
|
|
117
|
+
multiple categories and each category contains multiple columns.
|
|
118
|
+
|
|
119
|
+
It uses lazy deserialization:
|
|
120
|
+
A component is only deserialized from the serialized data, if it
|
|
121
|
+
is accessed.
|
|
122
|
+
The deserialized component is then cached in the container.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
elements : dict, optional
|
|
127
|
+
The initial elements of the container.
|
|
128
|
+
By default no initial elements are added.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, elements=None):
|
|
132
|
+
if elements is None:
|
|
133
|
+
elements = {}
|
|
134
|
+
for element in elements.values():
|
|
135
|
+
if not isinstance(element, (dict, self.subcomponent_class())):
|
|
136
|
+
raise TypeError(
|
|
137
|
+
f"Expected '{self.subcomponent_class().__name__}', "
|
|
138
|
+
f"but got '{type(element).__name__}'"
|
|
139
|
+
)
|
|
140
|
+
self._elements = elements
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _deserialize_elements(content, take_key_from):
|
|
144
|
+
"""
|
|
145
|
+
Lazily deserialize the elements of this container.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
content : dict
|
|
150
|
+
The serialized content describing the elements for this
|
|
151
|
+
container.
|
|
152
|
+
take_key_from : str
|
|
153
|
+
The key in each element of `content`, whose value is used as
|
|
154
|
+
the key for the respective element.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
elements : dict
|
|
159
|
+
The elements that should be stored in this container.
|
|
160
|
+
This return value can be given to the constructor.
|
|
161
|
+
"""
|
|
162
|
+
elements = {}
|
|
163
|
+
for serialized_element in content:
|
|
164
|
+
key = serialized_element[take_key_from]
|
|
165
|
+
# Lazy deserialization
|
|
166
|
+
# -> keep serialized for now and deserialize later if needed
|
|
167
|
+
elements[key] = serialized_element
|
|
168
|
+
return elements
|
|
169
|
+
|
|
170
|
+
def _serialize_elements(self, store_key_in=None):
|
|
171
|
+
"""
|
|
172
|
+
Serialize the elements that are stored in this container.
|
|
173
|
+
|
|
174
|
+
Each element that is still serialized (due to lazy
|
|
175
|
+
deserialization), is kept as it is.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
store_key_in: str, optional
|
|
180
|
+
If given, the key of each element is stored as value in the
|
|
181
|
+
serialized element.
|
|
182
|
+
This is basically the reverse operation of `take_key_from` in
|
|
183
|
+
:meth:`_deserialize_elements()`.
|
|
184
|
+
"""
|
|
185
|
+
serialized_elements = []
|
|
186
|
+
for key, element in self._elements.items():
|
|
187
|
+
if isinstance(element, self.subcomponent_class()):
|
|
188
|
+
try:
|
|
189
|
+
serialized_element = element.serialize()
|
|
190
|
+
except Exception:
|
|
191
|
+
raise SerializationError(f"Failed to serialize element '{key}'")
|
|
192
|
+
else:
|
|
193
|
+
# Element is already stored in serialized form
|
|
194
|
+
serialized_element = element
|
|
195
|
+
if store_key_in is not None:
|
|
196
|
+
serialized_element[store_key_in] = key
|
|
197
|
+
serialized_elements.append(serialized_element)
|
|
198
|
+
return serialized_elements
|
|
199
|
+
|
|
200
|
+
def __getitem__(self, key):
|
|
201
|
+
element = self._elements[key]
|
|
202
|
+
if not isinstance(element, self.subcomponent_class()):
|
|
203
|
+
# Element is stored in serialized form
|
|
204
|
+
# -> must be deserialized first
|
|
205
|
+
try:
|
|
206
|
+
element = self.subcomponent_class().deserialize(element)
|
|
207
|
+
except Exception:
|
|
208
|
+
raise DeserializationError(f"Failed to deserialize element '{key}'")
|
|
209
|
+
# Update container with deserialized object
|
|
210
|
+
self._elements[key] = element
|
|
211
|
+
return element
|
|
212
|
+
|
|
213
|
+
def __setitem__(self, key, element):
|
|
214
|
+
if isinstance(element, self.subcomponent_class()):
|
|
215
|
+
pass
|
|
216
|
+
elif isinstance(element, _HierarchicalContainer):
|
|
217
|
+
# A common mistake may be to use the wrong container type
|
|
218
|
+
raise TypeError(
|
|
219
|
+
f"Expected '{self.subcomponent_class().__name__}', "
|
|
220
|
+
f"but got '{type(element).__name__}'"
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
try:
|
|
224
|
+
element = self.subcomponent_class().deserialize(element)
|
|
225
|
+
except Exception:
|
|
226
|
+
raise DeserializationError("Failed to deserialize given value")
|
|
227
|
+
self._elements[key] = element
|
|
228
|
+
|
|
229
|
+
def __delitem__(self, key):
|
|
230
|
+
del self._elements[key]
|
|
231
|
+
|
|
232
|
+
# Implement `__contains__()` explicitly,
|
|
233
|
+
# because the mixin method unnecessarily deserializes the value, if available
|
|
234
|
+
def __contains__(self, key):
|
|
235
|
+
return key in self._elements
|
|
236
|
+
|
|
237
|
+
def __iter__(self):
|
|
238
|
+
return iter(self._elements)
|
|
239
|
+
|
|
240
|
+
def __len__(self):
|
|
241
|
+
return len(self._elements)
|
|
242
|
+
|
|
243
|
+
def __eq__(self, other):
|
|
244
|
+
if not isinstance(other, type(self)):
|
|
245
|
+
return False
|
|
246
|
+
if set(self.keys()) != set(other.keys()):
|
|
247
|
+
return False
|
|
248
|
+
for key in self.keys():
|
|
249
|
+
if self[key] != other[key]:
|
|
250
|
+
return False
|
|
251
|
+
return True
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
__all__ = ["compress"]
|
|
2
|
+
__name__ = "biotite.structure.io.pdbx"
|
|
3
|
+
__author__ = "Patrick Kunzmann"
|
|
4
|
+
|
|
5
|
+
import itertools
|
|
6
|
+
import warnings
|
|
7
|
+
import msgpack
|
|
8
|
+
import numpy as np
|
|
9
|
+
import biotite.structure.io.pdbx.bcif as bcif
|
|
10
|
+
from biotite.structure.io.pdbx.bcif import _encode_numpy as encode_numpy
|
|
11
|
+
from biotite.structure.io.pdbx.encoding import (
|
|
12
|
+
ByteArrayEncoding,
|
|
13
|
+
DeltaEncoding,
|
|
14
|
+
FixedPointEncoding,
|
|
15
|
+
IntegerPackingEncoding,
|
|
16
|
+
RunLengthEncoding,
|
|
17
|
+
StringArrayEncoding,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
|
|
22
|
+
"""
|
|
23
|
+
Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
|
|
24
|
+
different data encodings for each data array and selecting the one, which results in
|
|
25
|
+
the smallest size.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
|
|
30
|
+
The data to compress.
|
|
31
|
+
float_tolerance : float, optional
|
|
32
|
+
The relative error that is accepted when compressing floating point numbers.
|
|
33
|
+
DEPRECATED: Use `rtol` instead.
|
|
34
|
+
rtol, atol : float, optional
|
|
35
|
+
The compression factor of floating point numbers is chosen such that
|
|
36
|
+
either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
|
|
37
|
+
for each value, i.e. the difference between the compressed and uncompressed
|
|
38
|
+
value is smaller than the tolerance.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
compressed_file : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
|
|
43
|
+
The compressed data with the same type as the input data.
|
|
44
|
+
If no improved compression is found for a :class:`BinaryCIFData` array,
|
|
45
|
+
the input data is kept.
|
|
46
|
+
Hence, the return value is no deep copy of the input data.
|
|
47
|
+
|
|
48
|
+
Examples
|
|
49
|
+
--------
|
|
50
|
+
|
|
51
|
+
>>> from io import BytesIO
|
|
52
|
+
>>> pdbx_file = BinaryCIFFile()
|
|
53
|
+
>>> set_structure(pdbx_file, atom_array_stack)
|
|
54
|
+
>>> # Write uncompressed file
|
|
55
|
+
>>> uncompressed_file = BytesIO()
|
|
56
|
+
>>> pdbx_file.write(uncompressed_file)
|
|
57
|
+
>>> _ = uncompressed_file.seek(0)
|
|
58
|
+
>>> print(f"{len(uncompressed_file.read()) // 1000} KB")
|
|
59
|
+
937 KB
|
|
60
|
+
>>> # Write compressed file
|
|
61
|
+
>>> pdbx_file = compress(pdbx_file)
|
|
62
|
+
>>> compressed_file = BytesIO()
|
|
63
|
+
>>> pdbx_file.write(compressed_file)
|
|
64
|
+
>>> _ = compressed_file.seek(0)
|
|
65
|
+
>>> print(f"{len(compressed_file.read()) // 1000} KB")
|
|
66
|
+
114 KB
|
|
67
|
+
"""
|
|
68
|
+
if float_tolerance is not None:
|
|
69
|
+
warnings.warn(
|
|
70
|
+
"The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
match type(data):
|
|
75
|
+
case bcif.BinaryCIFFile:
|
|
76
|
+
return _compress_file(data, rtol, atol)
|
|
77
|
+
case bcif.BinaryCIFBlock:
|
|
78
|
+
return _compress_block(data, rtol, atol)
|
|
79
|
+
case bcif.BinaryCIFCategory:
|
|
80
|
+
return _compress_category(data, rtol, atol)
|
|
81
|
+
case bcif.BinaryCIFColumn:
|
|
82
|
+
return _compress_column(data, rtol, atol)
|
|
83
|
+
case bcif.BinaryCIFData:
|
|
84
|
+
return _compress_data(data, rtol, atol)
|
|
85
|
+
case _:
|
|
86
|
+
raise TypeError(f"Unsupported type {type(data).__name__}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _compress_file(bcif_file, rtol, atol):
|
|
90
|
+
compressed_file = bcif.BinaryCIFFile()
|
|
91
|
+
for block_name, bcif_block in bcif_file.items():
|
|
92
|
+
try:
|
|
93
|
+
compressed_block = _compress_block(bcif_block, rtol, atol)
|
|
94
|
+
except Exception:
|
|
95
|
+
raise ValueError(f"Failed to compress block '{block_name}'")
|
|
96
|
+
compressed_file[block_name] = compressed_block
|
|
97
|
+
return compressed_file
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _compress_block(bcif_block, rtol, atol):
|
|
101
|
+
compressed_block = bcif.BinaryCIFBlock()
|
|
102
|
+
for category_name, bcif_category in bcif_block.items():
|
|
103
|
+
try:
|
|
104
|
+
compressed_category = _compress_category(bcif_category, rtol, atol)
|
|
105
|
+
except Exception:
|
|
106
|
+
raise ValueError(f"Failed to compress category '{category_name}'")
|
|
107
|
+
compressed_block[category_name] = compressed_category
|
|
108
|
+
return compressed_block
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _compress_category(bcif_category, rtol, atol):
|
|
112
|
+
compressed_category = bcif.BinaryCIFCategory()
|
|
113
|
+
for column_name, bcif_column in bcif_category.items():
|
|
114
|
+
try:
|
|
115
|
+
compressed_column = _compress_column(bcif_column, rtol, atol)
|
|
116
|
+
except Exception:
|
|
117
|
+
raise ValueError(f"Failed to compress column '{column_name}'")
|
|
118
|
+
compressed_category[column_name] = compressed_column
|
|
119
|
+
return compressed_category
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _compress_column(bcif_column, rtol, atol):
|
|
123
|
+
data = _compress_data(bcif_column.data, rtol, atol)
|
|
124
|
+
if bcif_column.mask is not None:
|
|
125
|
+
mask = _compress_data(bcif_column.mask, rtol, atol)
|
|
126
|
+
else:
|
|
127
|
+
mask = None
|
|
128
|
+
return bcif.BinaryCIFColumn(data, mask)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _compress_data(bcif_data, rtol, atol):
|
|
132
|
+
array = bcif_data.array
|
|
133
|
+
if len(array) == 1:
|
|
134
|
+
# No need to compress a single value -> Use default uncompressed encoding
|
|
135
|
+
return bcif.BinaryCIFData(array)
|
|
136
|
+
|
|
137
|
+
if np.issubdtype(array.dtype, np.str_):
|
|
138
|
+
# Leave encoding empty for now, as it is explicitly set later
|
|
139
|
+
encoding = StringArrayEncoding(data_encoding=[], offset_encoding=[])
|
|
140
|
+
# Run encode to initialize the data and offset arrays
|
|
141
|
+
indices = encoding.encode(array)
|
|
142
|
+
offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
|
|
143
|
+
encoding.data_encoding = _find_best_integer_compression(indices)
|
|
144
|
+
encoding.offset_encoding = _find_best_integer_compression(offsets)
|
|
145
|
+
return bcif.BinaryCIFData(array, [encoding])
|
|
146
|
+
|
|
147
|
+
elif np.issubdtype(array.dtype, np.floating):
|
|
148
|
+
if not np.isfinite(array).all():
|
|
149
|
+
# NaN/inf values cannot be represented by integers
|
|
150
|
+
# -> do not use integer encoding
|
|
151
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
152
|
+
to_integer_encoding = FixedPointEncoding(
|
|
153
|
+
10 ** _get_decimal_places(array, rtol, atol)
|
|
154
|
+
)
|
|
155
|
+
try:
|
|
156
|
+
integer_array = to_integer_encoding.encode(array)
|
|
157
|
+
except ValueError:
|
|
158
|
+
# With the given tolerances integer underflow/overflow would occur
|
|
159
|
+
# -> do not use integer encoding
|
|
160
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
161
|
+
else:
|
|
162
|
+
best_encoding = _find_best_integer_compression(integer_array)
|
|
163
|
+
compressed_data = bcif.BinaryCIFData(
|
|
164
|
+
array, [to_integer_encoding] + best_encoding
|
|
165
|
+
)
|
|
166
|
+
uncompressed_data = bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
167
|
+
if _data_size_in_file(compressed_data) < _data_size_in_file(
|
|
168
|
+
uncompressed_data
|
|
169
|
+
):
|
|
170
|
+
return compressed_data
|
|
171
|
+
else:
|
|
172
|
+
# The float array is smaller -> encode it directly as bytes
|
|
173
|
+
return uncompressed_data
|
|
174
|
+
|
|
175
|
+
elif np.issubdtype(array.dtype, np.integer):
|
|
176
|
+
array = _to_smallest_integer_type(array)
|
|
177
|
+
encodings = _find_best_integer_compression(array)
|
|
178
|
+
return bcif.BinaryCIFData(array, encodings)
|
|
179
|
+
|
|
180
|
+
else:
|
|
181
|
+
raise TypeError(f"Unsupported data type {array.dtype}")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _find_best_integer_compression(array):
|
|
185
|
+
"""
|
|
186
|
+
Try different data encodings on an integer array and return the one that results in
|
|
187
|
+
the smallest size.
|
|
188
|
+
"""
|
|
189
|
+
best_encoding_sequence = None
|
|
190
|
+
smallest_size = np.inf
|
|
191
|
+
|
|
192
|
+
for use_delta in [False, True]:
|
|
193
|
+
if use_delta:
|
|
194
|
+
encoding = DeltaEncoding()
|
|
195
|
+
array_after_delta = encoding.encode(array)
|
|
196
|
+
encodings_after_delta = [encoding]
|
|
197
|
+
else:
|
|
198
|
+
encodings_after_delta = []
|
|
199
|
+
array_after_delta = array
|
|
200
|
+
for use_run_length in [False, True]:
|
|
201
|
+
# Use encoded data from previous step to save time
|
|
202
|
+
if use_run_length:
|
|
203
|
+
encoding = RunLengthEncoding()
|
|
204
|
+
array_after_rle = encoding.encode(array_after_delta)
|
|
205
|
+
encodings_after_rle = encodings_after_delta + [encoding]
|
|
206
|
+
else:
|
|
207
|
+
encodings_after_rle = encodings_after_delta
|
|
208
|
+
array_after_rle = array_after_delta
|
|
209
|
+
for packed_byte_count in [None, 1, 2]:
|
|
210
|
+
if packed_byte_count is not None:
|
|
211
|
+
# Quickly check this heuristic
|
|
212
|
+
# to avoid computing an exploding packed data array
|
|
213
|
+
if (
|
|
214
|
+
_estimate_packed_length(array_after_rle, packed_byte_count)
|
|
215
|
+
>= array_after_rle.nbytes
|
|
216
|
+
):
|
|
217
|
+
# Packing would not reduce the size
|
|
218
|
+
continue
|
|
219
|
+
encoding = IntegerPackingEncoding(packed_byte_count)
|
|
220
|
+
array_after_packing = encoding.encode(array_after_rle)
|
|
221
|
+
encodings_after_packing = encodings_after_rle + [encoding]
|
|
222
|
+
else:
|
|
223
|
+
encodings_after_packing = encodings_after_rle
|
|
224
|
+
array_after_packing = array_after_rle
|
|
225
|
+
encoding = ByteArrayEncoding()
|
|
226
|
+
encoded_array = encoding.encode(array_after_packing)
|
|
227
|
+
encodings = encodings_after_packing + [encoding]
|
|
228
|
+
# Pack data directly instead of using the BinaryCIFData class
|
|
229
|
+
# to avoid the unnecessary re-encoding of the array,
|
|
230
|
+
# as it is already available in 'encoded_array'
|
|
231
|
+
serialized_encoding = [enc.serialize() for enc in encodings]
|
|
232
|
+
serialized_data = {
|
|
233
|
+
"data": encoded_array,
|
|
234
|
+
"encoding": serialized_encoding,
|
|
235
|
+
}
|
|
236
|
+
size = _data_size_in_file(serialized_data)
|
|
237
|
+
if size < smallest_size:
|
|
238
|
+
best_encoding_sequence = encodings
|
|
239
|
+
smallest_size = size
|
|
240
|
+
return best_encoding_sequence
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _estimate_packed_length(array, packed_byte_count):
|
|
244
|
+
"""
|
|
245
|
+
Estimate the length of an integer array after packing it with a given number of
|
|
246
|
+
bytes.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
array : numpy.ndarray
|
|
251
|
+
The array to pack.
|
|
252
|
+
packed_byte_count : int
|
|
253
|
+
The number of bytes used for packing.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
length : int
|
|
258
|
+
The estimated length of the packed array.
|
|
259
|
+
"""
|
|
260
|
+
# Use int64 to avoid integer overflow in the following line
|
|
261
|
+
max_val_per_element = np.int64(2 ** (8 * packed_byte_count))
|
|
262
|
+
n_bytes_per_element = packed_byte_count * (np.abs(array // max_val_per_element) + 1)
|
|
263
|
+
return np.sum(n_bytes_per_element, dtype=np.int64)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _to_smallest_integer_type(array):
|
|
267
|
+
"""
|
|
268
|
+
Convert an integer array to the smallest possible integer type, that is still able
|
|
269
|
+
to represent all values in the array.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
array : numpy.ndarray
|
|
274
|
+
The array to convert.
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
array : numpy.ndarray
|
|
279
|
+
The converted array.
|
|
280
|
+
"""
|
|
281
|
+
if array.min() >= 0:
|
|
282
|
+
for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
|
|
283
|
+
if np.all(array <= np.iinfo(dtype).max):
|
|
284
|
+
return array.astype(dtype)
|
|
285
|
+
for dtype in [np.int8, np.int16, np.int32, np.int64]:
|
|
286
|
+
if np.all(array >= np.iinfo(dtype).min) and np.all(
|
|
287
|
+
array <= np.iinfo(dtype).max
|
|
288
|
+
):
|
|
289
|
+
return array.astype(dtype)
|
|
290
|
+
raise ValueError("Array is out of bounds for all integer types")
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _data_size_in_file(data):
|
|
294
|
+
"""
|
|
295
|
+
Get the size of the data, it would have when written into a *BinaryCIF* file.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
data : BinaryCIFData or dict
|
|
300
|
+
The data array whose size is measured.
|
|
301
|
+
Can be either a :class:`BinaryCIFData` object or already serialized data.
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
size : int
|
|
306
|
+
The size of the data array in the file in bytes.
|
|
307
|
+
"""
|
|
308
|
+
if isinstance(data, bcif.BinaryCIFData):
|
|
309
|
+
data = data.serialize()
|
|
310
|
+
bytes_in_file = msgpack.packb(data, use_bin_type=True, default=encode_numpy)
|
|
311
|
+
return len(bytes_in_file)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _get_decimal_places(array, rtol, atol):
|
|
315
|
+
"""
|
|
316
|
+
Get the number of decimal places in a floating point array.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
array : numpy.ndarray
|
|
321
|
+
The array to analyze.
|
|
322
|
+
rtol, atol : float, optional
|
|
323
|
+
The relative and absolute tolerance allowed when the values are cut off after
|
|
324
|
+
the returned number of decimal places.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
decimals : int
|
|
329
|
+
The number of decimal places.
|
|
330
|
+
"""
|
|
331
|
+
if rtol <= 0 and atol <= 0:
|
|
332
|
+
raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
|
|
333
|
+
# 0 would give NaN when rounding on decimals
|
|
334
|
+
array = array[array != 0]
|
|
335
|
+
for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
|
|
336
|
+
error = np.abs(np.round(array, decimals) - array)
|
|
337
|
+
if decimals == 100:
|
|
338
|
+
raise
|
|
339
|
+
if np.all((error < rtol * np.abs(array)) | (error < atol)):
|
|
340
|
+
return decimals
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _order_magnitude(array):
|
|
344
|
+
"""
|
|
345
|
+
Get the order of magnitude of floating point values.
|
|
346
|
+
|
|
347
|
+
Parameters
|
|
348
|
+
----------
|
|
349
|
+
array : ndarray, dtype=float
|
|
350
|
+
The value to analyze.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
magnitude : int
|
|
355
|
+
The order of magnitude, i.e. the maximum exponent a number in the array would
|
|
356
|
+
have in scientific notation, if only one digit is left of the decimal point.
|
|
357
|
+
"""
|
|
358
|
+
array = array[array != 0]
|
|
359
|
+
if len(array) == 0:
|
|
360
|
+
# No non-zero values -> define order of magnitude as 0
|
|
361
|
+
return 0
|
|
362
|
+
return int(np.max(np.floor(np.log10(np.abs(array)))).item())
|