PyPI - biotite - Versions diffs - 0.41.1__cp312-cp312-macosx_10_16_arm64.whl - Mend

biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show

biotite/__init__.py +19 -0
biotite/application/__init__.py +43 -0
biotite/application/application.py +265 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +505 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +83 -0
biotite/application/blast/webapp.py +421 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +238 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +152 -0
biotite/application/localapp.py +306 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +122 -0
biotite/application/msaapp.py +374 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +254 -0
biotite/application/muscle/app5.py +171 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +456 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +222 -0
biotite/application/util.py +59 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +304 -0
biotite/application/viennarna/rnafold.py +269 -0
biotite/application/viennarna/rnaplot.py +187 -0
biotite/application/viennarna/util.py +72 -0
biotite/application/webapp.py +77 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +61 -0
biotite/database/entrez/dbnames.py +89 -0
biotite/database/entrez/download.py +223 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +223 -0
biotite/database/error.py +15 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +260 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +827 -0
biotite/database/pubchem/throttle.py +99 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +167 -0
biotite/database/rcsb/query.py +959 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +32 -0
biotite/database/uniprot/download.py +134 -0
biotite/database/uniprot/query.py +209 -0
biotite/file.py +251 -0
biotite/sequence/__init__.py +73 -0
biotite/sequence/align/__init__.py +49 -0
biotite/sequence/align/alignment.py +658 -0
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +434 -0
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +574 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3400 -0
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +405 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +305 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.pyx +956 -0
biotite/sequence/align/statistics.py +265 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +566 -0
biotite/sequence/annotation.py +829 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +466 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1034 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +139 -0
biotite/sequence/graphics/dendrogram.py +184 -0
biotite/sequence/graphics/features.py +510 -0
biotite/sequence/graphics/logo.py +110 -0
biotite/sequence/graphics/plasmid.py +661 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +273 -0
biotite/sequence/io/fasta/file.py +278 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +120 -0
biotite/sequence/io/fastq/file.py +551 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +277 -0
biotite/sequence/io/genbank/file.py +575 -0
biotite/sequence/io/genbank/metadata.py +324 -0
biotite/sequence/io/genbank/sequence.py +172 -0
biotite/sequence/io/general.py +192 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +133 -0
biotite/sequence/io/gff/file.py +434 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +456 -0
biotite/sequence/search.py +116 -0
biotite/sequence/seqtypes.py +556 -0
biotite/sequence/sequence.py +374 -0
biotite/structure/__init__.py +132 -0
biotite/structure/atoms.py +1455 -0
biotite/structure/basepairs.py +1415 -0
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +1933 -0
biotite/structure/box.py +592 -0
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +298 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +114 -0
biotite/structure/dotbracket.py +216 -0
biotite/structure/error.py +31 -0
biotite/structure/filter.py +585 -0
biotite/structure/geometry.py +697 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +226 -0
biotite/structure/graphics/rna.py +282 -0
biotite/structure/hbond.py +409 -0
biotite/structure/info/__init__.py +25 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +82 -0
biotite/structure/info/bonds.py +145 -0
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +123 -0
biotite/structure/info/misc.py +144 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +196 -0
biotite/structure/integrity.py +268 -0
biotite/structure/io/__init__.py +30 -0
biotite/structure/io/ctab.py +72 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +65 -0
biotite/structure/io/general.py +257 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +343 -0
biotite/structure/io/mmtf/__init__.py +21 -0
biotite/structure/io/mmtf/assembly.py +214 -0
biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +341 -0
biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +501 -0
biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +152 -0
biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +183 -0
biotite/structure/io/mmtf/file.py +233 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +115 -0
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/mol.py +193 -0
biotite/structure/io/mol/sdf.py +916 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +63 -0
biotite/structure/io/npz/__init__.py +20 -0
biotite/structure/io/npz/file.py +152 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +293 -0
biotite/structure/io/pdb/file.py +1240 -0
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +107 -0
biotite/structure/io/pdbqt/file.py +640 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +1597 -0
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +950 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/io/tng/__init__.py +13 -0
biotite/structure/io/tng/file.py +46 -0
biotite/structure/io/trajfile.py +710 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +46 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +46 -0
biotite/structure/mechanics.py +75 -0
biotite/structure/molecules.py +353 -0
biotite/structure/pseudoknots.py +642 -0
biotite/structure/rdf.py +243 -0
biotite/structure/repair.py +253 -0
biotite/structure/residues.py +562 -0
biotite/structure/resutil.py +178 -0
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/sequence.py +112 -0
biotite/structure/sse.py +327 -0
biotite/structure/superimpose.py +727 -0
biotite/structure/transform.py +504 -0
biotite/structure/util.py +98 -0
biotite/temp.py +86 -0
biotite/version.py +16 -0
biotite/visualize.py +251 -0
biotite-0.41.1.dist-info/METADATA +187 -0
biotite-0.41.1.dist-info/RECORD +340 -0
biotite-0.41.1.dist-info/WHEEL +4 -0
biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0

biotite/structure/io/mmtf/convertfile.pyx ADDED Viewed

@@ -0,0 +1,501 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.io.mmtf"
+__author__ = "Patrick Kunzmann"
+__all__ = ["get_model_count", "get_structure"]
+cimport cython
+cimport numpy as np
+import numpy as np
+from .file import MMTFFile
+from ...atoms import Atom, AtomArray, AtomArrayStack
+from ...bonds import BondList
+from ...error import BadStructureError
+from ...filter import filter_first_altloc, filter_highest_occupancy_altloc
+from ...residues import get_residue_starts
+from ...box import vectors_from_unitcell
+from ....file import InvalidFileError
+ctypedef np.int8_t int8
+ctypedef np.int16_t int16
+ctypedef np.int32_t int32
+ctypedef np.uint8_t uint8
+ctypedef np.uint16_t uint16
+ctypedef np.uint32_t uint32
+ctypedef np.uint64_t uint64
+ctypedef np.float32_t float32
+def get_model_count(file):
+    """
+    Get the number of models contained in a MMTF file.
+    Parameters
+    ----------
+    file : MMTFFile
+        The file object.
+    Returns
+    -------
+    model_count : int
+        The number of models.
+    """
+    return file["numModels"]
+def get_structure(file, model=None, altloc="first",
+                  extra_fields=[], include_bonds=False):
+    """
+    get_structure(file, model=None, altloc=[], extra_fields=[],
+                  include_bonds=False)
+    Get an :class:`AtomArray` or :class:`AtomArrayStack` from the MMTF file.
+    Parameters
+    ----------
+    file : MMTFFile
+        The file object.
+    model : int, optional
+        If this parameter is given, the function will return an
+        :class:`AtomArray` from the atoms corresponding to the given
+        model number (starting at 1).
+        Negative values are used to index models starting from the last
+        model insted of the first model.
+        If this parameter is omitted, an :class:`AtomArrayStack`
+        containing all models will be returned, even if the structure
+        contains only one model.
+    altloc : {'first', 'occupancy', 'all'}
+        This parameter defines how *altloc* IDs are handled:
+            - ``'first'`` - Use atoms that have the first *altloc* ID
+              appearing in a residue.
+            - ``'occupancy'`` - Use atoms that have the *altloc* ID
+              with the highest occupancy for a residue.
+            - ``'all'`` - Use all atoms.
+              Note that this leads to duplicate atoms.
+              When this option is chosen, the ``altloc_id`` annotation
+              array is added to the returned structure.
+    extra_fields : list of str, optional
+        The strings in the list are optional annotation categories
+        that should be stored in the output array or stack.
+        These are valid values:
+        ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
+    include_bonds : bool, optional
+        If set to true, a :class:`BondList` will be created for the
+        resulting :class:`AtomArray` containing the bond information
+        from the file.
+    Returns
+    -------
+    array : AtomArray or AtomArrayStack
+        The return type depends on the `model` parameter.
+    Examples
+    --------
+    >>> import os.path
+    >>> file = MMTFFile.read(os.path.join(path_to_structures, "1l2y.mmtf"))
+    >>> array = get_structure(file, model=1)
+    >>> print(array.array_length())
+    304
+    >>> stack = get_structure(file)
+    >>> print(stack.stack_depth(), stack.array_length())
+    38 304
+    """
+    cdef int i, j, m
+    # Obtain (and potentially decode) required arrays/values from file
+    cdef int atom_count = file["numAtoms"]
+    cdef int model_count = file["numModels"]
+    cdef np.ndarray chain_names = file["chainNameList"]
+    cdef int32[:] chains_per_model = np.array(file["chainsPerModel"], np.int32)
+    cdef int32[:] res_per_chain = np.array(file["groupsPerChain"], np.int32)
+    cdef int32[:] res_type_i = file["groupTypeList"]
+    cdef np.ndarray index_list = file["groupIdList"]
+    cdef int32[:] res_ids = index_list
+    cdef np.ndarray x_coord = file["xCoordList"]
+    cdef np.ndarray y_coord = file["yCoordList"]
+    cdef np.ndarray z_coord = file["zCoordList"]
+    cdef np.ndarray occupancy = file.get("occupancyList")
+    cdef np.ndarray b_factor
+    if "b_factor" in extra_fields:
+        b_factor = file["bFactorList"]
+    cdef np.ndarray atom_ids
+    if "atom_id" in extra_fields:
+        atom_ids = file["atomIdList"]
+    cdef np.ndarray all_altloc_ids
+    cdef np.ndarray inscode
+    all_altloc_ids = file.get("altLocList")
+    inscode = file.get("insCodeList")
+    # Create arrays from 'groupList' list of dictionaries
+    cdef list group_list = file["groupList"]
+    cdef list non_hetero_list = ["L-PEPTIDE LINKING", "PEPTIDE LINKING",
+                                 "DNA LINKING", "RNA LINKING"]
+    # Determine per-residue-count and maximum count
+    # of atoms in each residue
+    cdef np.ndarray atoms_per_res = np.zeros(len(group_list), dtype=np.int32)
+    for i in range(len(group_list)):
+        atoms_per_res[i] = len(group_list[i]["atomNameList"])
+    cdef int32 max_atoms_per_res = np.max(atoms_per_res)
+    # Create the arrays
+    cdef np.ndarray res_names = np.zeros(len(group_list), dtype="U5")
+    cdef np.ndarray hetero_res = np.zeros(len(group_list), dtype=bool)
+    cdef np.ndarray atom_names = np.zeros((len(group_list), max_atoms_per_res),
+                                          dtype="U6")
+    cdef np.ndarray elements = np.zeros((len(group_list), max_atoms_per_res),
+                                        dtype="U2")
+    cdef np.ndarray charges = np.zeros((len(group_list), max_atoms_per_res),
+                                          dtype=np.int32)
+    # Fill the arrays
+    for i in range(len(group_list)):
+        residue = group_list[i]
+        res_names[i] = residue["groupName"]
+        hetero_res[i] = (residue["chemCompType"] not in non_hetero_list)
+        atom_names[i, :atoms_per_res[i]] = residue["atomNameList"]
+        elements[i, :atoms_per_res[i]] = residue["elementList"]
+        charges[i, :atoms_per_res[i]] = residue["formalChargeList"]
+    # Create the atom array (stack)
+    cdef int depth, length
+    cdef int start_i, stop_i
+    cdef bint extra_charge
+    cdef np.ndarray altloc_ids
+    cdef np.ndarray inscode_array
+    if model == None:
+        lengths = _get_model_lengths(res_type_i, chains_per_model,
+                                     res_per_chain, atoms_per_res)
+        # Check if each model has the same amount of atoms
+        # If not, raise exception
+        if (lengths != lengths[0]).any():
+            raise InvalidFileError("The models in the file have unequal "
+                                   "amount of atoms, give an explicit "
+                                   "model instead")
+        length = lengths[0]
+        depth = model_count
+        array = AtomArrayStack(depth, length)
+        array.coord = np.stack(
+            [x_coord,
+             y_coord,
+             z_coord],
+             axis=1
+        ).reshape(depth, length, 3)
+        # Create altloc array for the final filtering
+        if all_altloc_ids is not None:
+            altloc_ids = all_altloc_ids[:length]
+        else:
+            altloc_ids = None
+        extra_charge = False
+        if "ins_code" in extra_fields:
+            extra_inscode = True
+            array.add_annotation("ins_code", "U1")
+        if "charge" in extra_fields:
+            extra_charge = True
+            array.add_annotation("charge", int)
+        if "atom_id" in extra_fields:
+            array.set_annotation("atom_id", atom_ids[:length])
+        if "b_factor" in extra_fields:
+            array.set_annotation("b_factor", b_factor[:length])
+        if "occupancy" in extra_fields:
+            array.set_annotation("occupancy", occupancy[:length])
+        _fill_annotations(1, array, extra_charge,
+                          chain_names, chains_per_model, res_per_chain,
+                          res_type_i, res_ids, inscode, atoms_per_res,
+                          res_names, hetero_res, atom_names, elements, charges)
+        if include_bonds:
+            array.bonds = _create_bond_list(
+                1, file["bondAtomList"], file["bondOrderList"],
+                0, length, file["numAtoms"], group_list, res_type_i,
+                atoms_per_res, res_per_chain, chains_per_model
+            )
+    else:
+        lengths = _get_model_lengths(res_type_i, chains_per_model,
+                                     res_per_chain, atoms_per_res)
+        if model == 0:
+            raise ValueError("The model index must not be 0")
+        # Negative models mean model index starting from last model
+        model = len(lengths) + model + 1 if model < 0 else model
+        if model > len(lengths):
+            raise ValueError(
+                f"The file has {len(lengths)} models, "
+                f"the given model {model} does not exist"
+            )
+        length = lengths[model-1]
+        # Indices to filter coords and some annotations
+        # for the specified model
+        start_i = np.sum(lengths[:model-1])
+        stop_i = start_i + length
+        array = AtomArray(length)
+        array.coord[:,0] = x_coord[start_i : stop_i]
+        array.coord[:,1] = y_coord[start_i : stop_i]
+        array.coord[:,2] = z_coord[start_i : stop_i]
+        # Create altloc array for the final filtering
+        if all_altloc_ids is not None:
+            altloc_ids = np.array(all_altloc_ids[start_i : stop_i], dtype="U1")
+        else:
+            altloc_ids = None
+        extra_charge = False
+        if "charge" in extra_fields:
+            extra_charge = True
+            array.add_annotation("charge", int)
+        if "atom_id" in extra_fields:
+            array.set_annotation("atom_id", atom_ids[start_i : stop_i])
+        if "b_factor" in extra_fields:
+            array.set_annotation("b_factor", b_factor[start_i : stop_i])
+        if "occupancy" in extra_fields:
+            array.set_annotation("occupancy", occupancy[start_i : stop_i])
+        _fill_annotations(model, array, extra_charge,
+                          chain_names, chains_per_model, res_per_chain,
+                          res_type_i, res_ids, inscode, atoms_per_res,
+                          res_names, hetero_res, atom_names, elements, charges)
+        if include_bonds:
+            array.bonds = _create_bond_list(
+                model, file["bondAtomList"], file["bondOrderList"],
+                start_i, stop_i, file["numAtoms"], group_list, res_type_i,
+                atoms_per_res, res_per_chain, chains_per_model
+            )
+    # Get box
+    if "unitCell" in file:
+        a_len, b_len, c_len, alpha, beta, gamma = file["unitCell"]
+        alpha = np.deg2rad(alpha)
+        beta  = np.deg2rad(beta )
+        gamma = np.deg2rad(gamma)
+        box = vectors_from_unitcell(
+            a_len, b_len, c_len, alpha, beta, gamma
+        )
+        if isinstance(array, AtomArrayStack):
+            array.box = np.repeat(
+                box[np.newaxis, ...], array.stack_depth(), axis=0
+            )
+        else:
+            # AtomArray
+            array.box = box
+    # Filter altloc IDs and return
+    if altloc_ids is None:
+        return array
+    elif altloc == "occupancy" and occupancy is not None:
+        return array[
+            ...,
+            filter_highest_occupancy_altloc(array, altloc_ids, occupancy)
+        ]
+    # 'first' is also fallback if file has no occupancy information
+    elif altloc == "first":
+        return array[..., filter_first_altloc(array, altloc_ids)]
+    elif altloc == "all":
+        array.set_annotation("altloc_id", altloc_ids)
+        return array
+    else:
+        raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
+def _get_model_lengths(int32[:] res_type_i,
+                       int32[:] chains_per_model,
+                       int32[:] res_per_chain,
+                       int32[:] atoms_per_res):
+    cdef int[:] model_lengths = np.zeros(len(chains_per_model), np.int32)
+    cdef int atom_count = 0
+    cdef int model_i = 0
+    cdef int chain_i = 0
+    cdef int res_i
+    cdef int res_count_in_chain = 0
+    cdef int chain_count_in_model = 0
+    # The length of 'res_type_i'
+    # is equal to the total number of residues
+    for res_i in range(res_type_i.shape[0]):
+        atom_count += atoms_per_res[res_type_i[res_i]]
+        res_count_in_chain += 1
+        if res_count_in_chain == res_per_chain[chain_i]:
+            # Chain is full -> Bump chain index and reset residue count
+            res_count_in_chain = 0
+            chain_i += 1
+            chain_count_in_model += 1
+        if chain_count_in_model == chains_per_model[model_i]:
+            # Model is full -> Bump model index and reset chain count
+            chain_count_in_model = 0
+            model_lengths[model_i] = atom_count
+            # Restart counting for the next model
+            atom_count = 0
+            model_i += 1
+    return np.asarray(model_lengths)
+def _fill_annotations(int model, array,
+                      bint extra_charge,
+                      np.ndarray chain_names,
+                      int32[:] chains_per_model,
+                      int32[:] res_per_chain,
+                      int32[:] res_type_i,
+                      int32[:] res_ids,
+                      np.ndarray res_inscodes,
+                      np.ndarray atoms_per_res,
+                      np.ndarray res_names,
+                      np.ndarray hetero_res,
+                      np.ndarray atom_names,
+                      np.ndarray elements,
+                      np.ndarray charges):
+    # Get annotation arrays from atom array (stack)
+    cdef np.ndarray chain_id  = array.chain_id
+    cdef np.ndarray res_id    = array.res_id
+    cdef np.ndarray ins_code  = array.ins_code
+    cdef np.ndarray res_name  = array.res_name
+    cdef np.ndarray hetero    = array.hetero
+    cdef np.ndarray atom_name = array.atom_name
+    cdef np.ndarray element   = array.element
+    if extra_charge:
+        charge = array.charge
+    cdef int model_i = 0
+    cdef int chain_i = 0
+    cdef int res_i
+    cdef int atom_i = 0
+    cdef int res_count_in_chain = 0
+    cdef int chain_count_in_model = 0
+    cdef int atom_index_in_res
+    cdef chain_id_for_chain
+    cdef res_name_for_res
+    cdef inscode_for_res
+    cdef bint hetero_for_res
+    cdef int res_id_for_res
+    cdef int type_i
+    # The length of 'res_type_i'
+    # is equal to the total number of residues
+    for res_i in range(res_type_i.shape[0]):
+        # Wait for the data of the given model
+        if model_i == model-1:
+            chain_id_for_chain = chain_names[chain_i]
+            res_id_for_res = res_ids[res_i]
+            if res_inscodes is not None:
+                inscode_for_res = res_inscodes[res_i]
+            type_i = res_type_i[res_i]
+            res_name_for_res = res_names[type_i]
+            hetero_for_res = hetero_res[type_i]
+            for atom_index_in_res in range(atoms_per_res[type_i]):
+                chain_id[atom_i]  = chain_id_for_chain
+                res_id[atom_i]    = res_id_for_res
+                ins_code[atom_i]  = inscode_for_res
+                hetero[atom_i]    = hetero_for_res
+                res_name[atom_i]  = res_name_for_res
+                atom_name[atom_i] = atom_names[type_i][atom_index_in_res]
+                element[atom_i]   = elements[type_i][atom_index_in_res].upper()
+                if extra_charge:
+                    charge[atom_i] = charges[type_i][atom_index_in_res]
+                atom_i += 1
+        elif model_i > model-1:
+            # The given model has already been parsed
+            # -> parsing is finished
+            break
+        res_count_in_chain += 1
+        if res_count_in_chain == res_per_chain[chain_i]:
+            # Chain is full -> Bump chain index and reset residue count
+            res_count_in_chain = 0
+            chain_i += 1
+            chain_count_in_model += 1
+        if chain_count_in_model == chains_per_model[model_i]:
+            # Model is full -> Bump model index and reset chain count
+            chain_count_in_model = 0
+            model_i += 1
+def _create_bond_list(int model, np.ndarray bonds, np.ndarray bond_types,
+                      int model_start, int model_stop, int atom_count,
+                      list group_list, int32[:] res_type_i,
+                      int32[:] atoms_per_res,
+                      int32[:] res_per_chain, int32[:] chains_per_model):
+    cdef int i=0, j=0
+    # Determine per-residue-count and maximum count
+    # of bonds in each residue
+    cdef int32[:] bonds_per_res = np.zeros(len(group_list), dtype=np.int32)
+    for i in range(len(group_list)):
+        bonds_per_res[i] = len(group_list[i]["bondOrderList"])
+    cdef int32 max_bonds_per_res = np.max(bonds_per_res)
+    # Create arrays for intra-residue bonds and bond types
+    cdef np.ndarray intra_bonds = np.zeros(
+        (len(group_list), max_bonds_per_res, 3), dtype=np.uint32
+    )
+    # Dictionary for groupList entry
+    cdef dict residue
+    # Fill the array
+    for i in range(len(group_list)):
+        residue = group_list[i]
+        bonds_in_residue = np.array(residue["bondAtomList"], dtype=np.uint32)
+        intra_bonds[i, :bonds_per_res[i], :2] = \
+            np.array(residue["bondAtomList"], dtype=np.uint32).reshape((-1, 2))
+        intra_bonds[i, :bonds_per_res[i], 2] = residue["bondOrderList"]
+    # Unify intra-residue bonds to one BondList
+    cdef int model_i = 0
+    cdef int chain_i = 0
+    cdef int res_i
+    cdef int res_count_in_chain = 0
+    cdef int chain_count_in_model = 0
+    cdef int type_i
+    intra_bond_list = BondList(0)
+    # The length of 'res_type_i'
+    # is equal to the total number of residues
+    for res_i in range(res_type_i.shape[0]):
+        # Wait for the data of the given model
+        if model_i == model-1:
+            type_i = res_type_i[res_i]
+            bond_list_per_res = BondList(
+                atoms_per_res[type_i],
+                intra_bonds[type_i, :bonds_per_res[type_i]]
+            )
+            intra_bond_list += bond_list_per_res
+        elif model_i > model-1:
+            # The given model has already been parsed
+            # -> parsing is finished
+            break
+        res_count_in_chain += 1
+        if res_count_in_chain == res_per_chain[chain_i]:
+            # Chain is full -> Bump chain index and reset residue count
+            res_count_in_chain = 0
+            chain_i += 1
+            chain_count_in_model += 1
+        if chain_count_in_model == chains_per_model[model_i]:
+            # Model is full -> Bump model index and reset chain count
+            chain_count_in_model = 0
+            model_i += 1
+    # Add inter-residue bonds to BondList
+    cdef np.ndarray inter_bonds = np.zeros((len(bond_types), 3),
+                                           dtype=np.uint32)
+    inter_bonds[:,:2] = bonds.reshape((len(bond_types), 2))
+    inter_bonds[:,2] = bond_types
+    inter_bond_list = BondList(atom_count, inter_bonds)
+    inter_bond_list = inter_bond_list[model_start : model_stop]
+    global_bond_list = inter_bond_list.merge(intra_bond_list)
+    return global_bond_list

biotite/structure/io/mmtf/decode.cpython-312-darwin.so ADDED Viewed

Binary file

biotite/structure/io/mmtf/decode.pyx ADDED Viewed

@@ -0,0 +1,152 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.io.mmtf"
+__author__ = "Patrick Kunzmann"
+__all__ = ["decode_array"]
+cimport cython
+cimport numpy as np
+import numpy as np
+ctypedef np.int8_t int8
+ctypedef np.int16_t int16
+ctypedef np.int32_t int32
+ctypedef np.uint8_t uint8
+ctypedef np.uint16_t uint16
+ctypedef np.uint32_t uint32
+ctypedef np.uint64_t uint64
+ctypedef np.float32_t float32
+def decode_array(int codec, bytes raw_bytes, int param):
+    cdef np.ndarray array
+    # Pass-through: 32-bit floating-point number array
+    if   codec == 1:
+        array = np.frombuffer(raw_bytes, dtype=">f4").astype(np.float32)
+        return array
+    # Pass-through: 8-bit signed integer array
+    elif codec == 2:
+        array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
+        return array
+    # Pass-through: 16-bit signed integer array
+    elif codec == 3:
+        array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
+        return array
+    # Pass-through: 32-bit signed integer array
+    elif codec == 4:
+        array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
+        return array
+    # UTF8/ASCII fixed-length string array
+    elif codec == 5:
+        array = np.frombuffer(raw_bytes, np.dtype("S" + str(param)))
+        return array.astype(np.dtype("U" + str(param)))
+    # Run-length encoded character array
+    elif codec == 6:
+        array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
+        return np.frombuffer(_decode_run_length(array), dtype="U1")
+    # Run-length encoded 32-bit signed integer array
+    elif codec == 7:
+        array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
+        return _decode_run_length(array)
+    # Delta & run-length encoded 32-bit signed integer array
+    elif codec == 8:
+        array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
+        return _decode_delta(
+               _decode_run_length(array))
+    # Integer & run-length encoded 32-bit floating-point number array
+    elif codec == 9:
+        array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
+        return _decode_integer(param,
+               _decode_run_length(array))
+    # Integer & delta encoded
+    # & two-byte-packed 32-bit floating-point number array
+    elif codec == 10:
+        array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
+        return _decode_integer(param,
+               _decode_delta(
+               _decode_packed(array)))
+    # Integer encoded 32-bit floating-point number array
+    elif codec == 11:
+        array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
+        return _decode_integer(param, array)
+    # Integer & two-byte-packed 32-bit floating-point number array
+    elif codec == 12:
+        array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
+        return _decode_integer(param,
+               _decode_packed(array))
+    # Integer & one-byte-packed 32-bit floating-point number array
+    elif codec == 13:
+        array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
+        return _decode_integer(param,
+               _decode_packed(array))
+    # Two-byte-packed 32-bit signed integer array
+    elif codec == 14:
+        array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
+        return _decode_packed(array)
+    # One-byte-packed 32-bit signed integer array
+    elif codec == 15:
+        array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
+        return _decode_packed(array)
+    else:
+        raise ValueError("Unknown codec with ID {codec}")
+def _decode_delta(np.ndarray array):
+    return np.cumsum(array, dtype=np.int32)
+def _decode_run_length(int32[:] array):
+    cdef int length = 0
+    cdef int i, j
+    cdef int value, repeat
+    # Determine length of output array by summing the run lengths
+    for i in range(1, array.shape[0], 2):
+        length += array[i]
+    cdef int32[:] output = np.zeros(length, dtype=np.int32)
+    # Fill output array
+    j = 0
+    for i in range(0, array.shape[0], 2):
+        value = array[i]
+        repeat = array[i+1]
+        output[j : j+repeat] = value
+        j += repeat
+    return np.asarray(output)
+ctypedef fused PackedType:
+    int8
+    int16
+def _decode_packed(PackedType[:] array):
+    cdef int min_val, max_val
+    if PackedType is int8:
+        min_val = np.iinfo(np.int8).min
+        max_val = np.iinfo(np.int8).max
+    else:
+        min_val = np.iinfo(np.int16).min
+        max_val = np.iinfo(np.int16).max
+    cdef int i, j
+    cdef int packed_val, unpacked_val
+    # Pessimistic size assumption:
+    # The maximum output array length is the input array length
+    # in case all values are within the type limits
+    cdef int32[:] output = np.zeros(array.shape[0], dtype=np.int32)
+    j = 0
+    unpacked_val = 0
+    for i in range(array.shape[0]):
+        packed_val = array[i]
+        if packed_val == max_val or packed_val == min_val:
+            unpacked_val += packed_val
+        else:
+            unpacked_val += packed_val
+            output[j] = unpacked_val
+            unpacked_val = 0
+            j += 1
+    # Trim to correct size and return
+    return np.asarray(output[:j])
+def _decode_integer(int divisor, np.ndarray array):
+    return np.divide(array, divisor, dtype=np.float32)

biotite/structure/io/mmtf/encode.cpython-312-darwin.so ADDED Viewed

Binary file