PyPI - biotite - Versions diffs - 0.41.1__cp312-cp312-macosx_10_16_x86_64.whl - Mend

biotite 0.41.1__cp312-cp312-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show

biotite/__init__.py +19 -0
biotite/application/__init__.py +43 -0
biotite/application/application.py +265 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +505 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +83 -0
biotite/application/blast/webapp.py +421 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +238 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +152 -0
biotite/application/localapp.py +306 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +122 -0
biotite/application/msaapp.py +374 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +254 -0
biotite/application/muscle/app5.py +171 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +456 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +222 -0
biotite/application/util.py +59 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +304 -0
biotite/application/viennarna/rnafold.py +269 -0
biotite/application/viennarna/rnaplot.py +187 -0
biotite/application/viennarna/util.py +72 -0
biotite/application/webapp.py +77 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +61 -0
biotite/database/entrez/dbnames.py +89 -0
biotite/database/entrez/download.py +223 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +223 -0
biotite/database/error.py +15 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +260 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +827 -0
biotite/database/pubchem/throttle.py +99 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +167 -0
biotite/database/rcsb/query.py +959 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +32 -0
biotite/database/uniprot/download.py +134 -0
biotite/database/uniprot/query.py +209 -0
biotite/file.py +251 -0
biotite/sequence/__init__.py +73 -0
biotite/sequence/align/__init__.py +49 -0
biotite/sequence/align/alignment.py +658 -0
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +434 -0
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +574 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3400 -0
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +405 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +305 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.pyx +956 -0
biotite/sequence/align/statistics.py +265 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +566 -0
biotite/sequence/annotation.py +829 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +466 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1034 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +139 -0
biotite/sequence/graphics/dendrogram.py +184 -0
biotite/sequence/graphics/features.py +510 -0
biotite/sequence/graphics/logo.py +110 -0
biotite/sequence/graphics/plasmid.py +661 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +273 -0
biotite/sequence/io/fasta/file.py +278 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +120 -0
biotite/sequence/io/fastq/file.py +551 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +277 -0
biotite/sequence/io/genbank/file.py +575 -0
biotite/sequence/io/genbank/metadata.py +324 -0
biotite/sequence/io/genbank/sequence.py +172 -0
biotite/sequence/io/general.py +192 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +133 -0
biotite/sequence/io/gff/file.py +434 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +456 -0
biotite/sequence/search.py +116 -0
biotite/sequence/seqtypes.py +556 -0
biotite/sequence/sequence.py +374 -0
biotite/structure/__init__.py +132 -0
biotite/structure/atoms.py +1455 -0
biotite/structure/basepairs.py +1415 -0
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +1933 -0
biotite/structure/box.py +592 -0
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +298 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +114 -0
biotite/structure/dotbracket.py +216 -0
biotite/structure/error.py +31 -0
biotite/structure/filter.py +585 -0
biotite/structure/geometry.py +697 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +226 -0
biotite/structure/graphics/rna.py +282 -0
biotite/structure/hbond.py +409 -0
biotite/structure/info/__init__.py +25 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +82 -0
biotite/structure/info/bonds.py +145 -0
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +123 -0
biotite/structure/info/misc.py +144 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +196 -0
biotite/structure/integrity.py +268 -0
biotite/structure/io/__init__.py +30 -0
biotite/structure/io/ctab.py +72 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +65 -0
biotite/structure/io/general.py +257 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +343 -0
biotite/structure/io/mmtf/__init__.py +21 -0
biotite/structure/io/mmtf/assembly.py +214 -0
biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +341 -0
biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +501 -0
biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +152 -0
biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +183 -0
biotite/structure/io/mmtf/file.py +233 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +115 -0
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/mol.py +193 -0
biotite/structure/io/mol/sdf.py +916 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +63 -0
biotite/structure/io/npz/__init__.py +20 -0
biotite/structure/io/npz/file.py +152 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +293 -0
biotite/structure/io/pdb/file.py +1240 -0
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +107 -0
biotite/structure/io/pdbqt/file.py +640 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +1597 -0
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +950 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/io/tng/__init__.py +13 -0
biotite/structure/io/tng/file.py +46 -0
biotite/structure/io/trajfile.py +710 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +46 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +46 -0
biotite/structure/mechanics.py +75 -0
biotite/structure/molecules.py +353 -0
biotite/structure/pseudoknots.py +642 -0
biotite/structure/rdf.py +243 -0
biotite/structure/repair.py +253 -0
biotite/structure/residues.py +562 -0
biotite/structure/resutil.py +178 -0
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/sequence.py +112 -0
biotite/structure/sse.py +327 -0
biotite/structure/superimpose.py +727 -0
biotite/structure/transform.py +504 -0
biotite/structure/util.py +98 -0
biotite/temp.py +86 -0
biotite/version.py +16 -0
biotite/visualize.py +251 -0
biotite-0.41.1.dist-info/METADATA +187 -0
biotite-0.41.1.dist-info/RECORD +340 -0
biotite-0.41.1.dist-info/WHEEL +4 -0
biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0

biotite/structure/info/standardize.py ADDED Viewed

@@ -0,0 +1,196 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["standardize_order"]
+import warnings
+import numpy as np
+from .ccd import get_from_ccd
+from ..residues import get_residue_starts
+from ..error import BadStructureError
+def standardize_order(atoms):
+    """
+    Get an index array for an input :class:`AtomArray` or
+    :class:`AtomArrayStack` that reorders the atoms for each residue
+    to obtain the standard *RCSB PDB* atom order.
+    The standard atom order is determined from the reference residues in
+    the official *Chemical Component Dictionary*.
+    If a residue of the input structure contains additional atoms that
+    are not present in the reference residue, these indices to these
+    atoms are appended to the end of the respective residue.
+    A example for this are optional hydrogen atoms, that appear due to
+    protonation.
+    Parameters
+    ----------
+    atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
+        Input structure with atoms that are potentially not in the
+        *standard* order.
+    Returns
+    -------
+    indices : ndarray, dtype=int, shape=(n,)
+        When this index array is applied on the input `atoms`,
+        the atoms for each residue are reordered to obtain the
+        standard *RCSB PDB* atom order.
+    Raises
+    ------
+    BadStructureError
+        If the input `atoms` have duplicate atoms (same atom name)
+        within a residue.
+    Examples
+    --------
+    Use as single residue as example.
+    >>> residue = atom_array[atom_array.res_id == 1]
+    >>> print(residue)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+        A       1  ASN CA     C        -8.608    3.135   -1.618
+        A       1  ASN C      C        -7.117    2.964   -1.897
+        A       1  ASN O      O        -6.634    1.849   -1.758
+        A       1  ASN CB     C        -9.437    3.396   -2.889
+        A       1  ASN CG     C       -10.915    3.130   -2.611
+        A       1  ASN OD1    O       -11.269    2.700   -1.524
+        A       1  ASN ND2    N       -11.806    3.406   -3.543
+        A       1  ASN H1     H        -8.330    3.957    0.261
+        A       1  ASN H2     H        -8.740    5.068   -0.889
+        A       1  ASN H3     H        -9.877    4.041   -0.293
+        A       1  ASN HA     H        -8.930    2.162   -1.239
+        A       1  ASN HB2    H        -9.310    4.417   -3.193
+        A       1  ASN HB3    H        -9.108    2.719   -3.679
+        A       1  ASN HD21   H       -11.572    3.791   -4.444
+        A       1  ASN HD22   H       -12.757    3.183   -3.294
+    Reverse the atom array.
+    Consequently, this also changes the atom order within the residue.
+    >>> reordered = residue[np.arange(len(residue))[::-1]]
+    >>> print(reordered)
+        A       1  ASN HD22   H       -12.757    3.183   -3.294
+        A       1  ASN HD21   H       -11.572    3.791   -4.444
+        A       1  ASN HB3    H        -9.108    2.719   -3.679
+        A       1  ASN HB2    H        -9.310    4.417   -3.193
+        A       1  ASN HA     H        -8.930    2.162   -1.239
+        A       1  ASN H3     H        -9.877    4.041   -0.293
+        A       1  ASN H2     H        -8.740    5.068   -0.889
+        A       1  ASN H1     H        -8.330    3.957    0.261
+        A       1  ASN ND2    N       -11.806    3.406   -3.543
+        A       1  ASN OD1    O       -11.269    2.700   -1.524
+        A       1  ASN CG     C       -10.915    3.130   -2.611
+        A       1  ASN CB     C        -9.437    3.396   -2.889
+        A       1  ASN O      O        -6.634    1.849   -1.758
+        A       1  ASN C      C        -7.117    2.964   -1.897
+        A       1  ASN CA     C        -8.608    3.135   -1.618
+        A       1  ASN N      N        -8.901    4.127   -0.555
+    The order is restored with the exception of the N-terminus protonation.
+    >>> restored = reordered[info.standardize_order(reordered)]
+    >>> print(restored)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+        A       1  ASN CA     C        -8.608    3.135   -1.618
+        A       1  ASN C      C        -7.117    2.964   -1.897
+        A       1  ASN O      O        -6.634    1.849   -1.758
+        A       1  ASN CB     C        -9.437    3.396   -2.889
+        A       1  ASN CG     C       -10.915    3.130   -2.611
+        A       1  ASN OD1    O       -11.269    2.700   -1.524
+        A       1  ASN ND2    N       -11.806    3.406   -3.543
+        A       1  ASN H2     H        -8.740    5.068   -0.889
+        A       1  ASN HA     H        -8.930    2.162   -1.239
+        A       1  ASN HB2    H        -9.310    4.417   -3.193
+        A       1  ASN HB3    H        -9.108    2.719   -3.679
+        A       1  ASN HD21   H       -11.572    3.791   -4.444
+        A       1  ASN HD22   H       -12.757    3.183   -3.294
+        A       1  ASN H3     H        -9.877    4.041   -0.293
+        A       1  ASN H1     H        -8.330    3.957    0.261
+    """
+    reordered_indices = np.zeros(atoms.array_length(), dtype=int)
+    starts = get_residue_starts(atoms, add_exclusive_stop=True)
+    for i in range(len(starts)-1):
+        start = starts[i]
+        stop = starts[i+1]
+        res_name = atoms.res_name[start]
+        standard_atom_names = get_from_ccd(
+            "chem_comp_atom", res_name, "atom_id"
+        )
+        if standard_atom_names is None:
+            # If the residue is not in the CCD, keep the current order
+            warnings.warn(
+                f"Residue '{res_name}' is not in the CCD, "
+                f"keeping current atom order"
+            )
+            reordered_indices[start : stop] = np.arange(start, stop)
+            continue
+        reordered_indices[start : stop] = _reorder(
+            atoms.atom_name[start : stop], standard_atom_names
+        ) + start
+    return reordered_indices
+def _reorder(origin, target):
+    """
+    Create indices to `origin`, that changes the order of `origin`,
+    so that the order is the same as in `target`.
+    Indices for elements of `target` that are not in `origin`
+    are ignored.
+    Indices for elements of `origin` that are not in `target`
+    are appended to the end of the returned array.
+    Parameters
+    ----------
+    origin : ndarray, dtype=str
+        The atom names to reorder.
+    target : ndarray, dtype=str
+        The atom names in target order.
+    Returns
+    -------
+    indices : ndarray, dtype=int
+        Indices for `origin` that that changes the order of `origin`
+        to the order of `target`.
+    """
+    target_hits, origin_hits = np.where(
+        target[:, np.newaxis] == origin[np.newaxis, :]
+    )
+    counts = np.bincount(target_hits, minlength=len(target))
+    if (counts > 1).any():
+        counts = np.bincount(target_hits, minlength=len(target))
+        # Identify which atom is duplicate
+        duplicate_i = np.where(
+            counts > 1
+        )[0][0]
+        duplicate_name = target[duplicate_i]
+        raise BadStructureError(
+            f"Input structure has duplicate atom '{duplicate_name}'"
+        )
+    if len(origin_hits) < len(origin):
+        # The origin structure has additional atoms
+        # to the target structure
+        # -> Identify which atoms are missing in the target structure
+        # and append these to the end of the residue
+        missing_atom_mask = np.bincount(
+            origin_hits, minlength=len(origin)
+        ).astype(bool)
+        return np.concatenate([
+            origin_hits,
+            np.where(~missing_atom_mask)[0]
+        ])
+    else:
+        return origin_hits

biotite/structure/integrity.py ADDED Viewed

@@ -0,0 +1,268 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+This module allows checking of atom arrays and atom array stacks for
+errors in the structure.
+"""
+__name__ = "biotite.structure"
+__author__ = "Patrick Kunzmann, Daniel Bauer"
+__all__ = ["check_id_continuity", "check_atom_id_continuity",
+           "check_res_id_continuity", "check_backbone_continuity",
+           "check_duplicate_atoms", "check_bond_continuity",
+           "check_linear_continuity"]
+import numpy as np
+import warnings
+from .atoms import AtomArray, AtomArrayStack
+from .filter import (
+    filter_peptide_backbone, filter_phosphate_backbone, filter_linear_bond_continuity)
+from .box import coord_to_fraction
+def _check_continuity(array):
+    diff = np.diff(array)
+    discontinuity = np.where( ((diff != 0) & (diff != 1)) )
+    return discontinuity[0] + 1
+def check_id_continuity(array):
+    """
+    Check if the residue IDs are incremented by more than 1 or
+    decremented, from one atom to the next one.
+    An increment by more than 1 is as strong clue for missing residues,
+    a decrement means probably a start of a new chain.
+    DEPRECATED: Use :func:`check_res_id_continuity()` instead.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The array to be checked.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+        Contains the indices of atoms after a discontinuity
+    """
+    warnings.warn(
+        "'check_id_continuity()' is deprecated, "
+        "use 'check_res_id_continuity()' instead",
+        DeprecationWarning
+    )
+    return check_res_id_continuity(array)
+def check_atom_id_continuity(array):
+    """
+    Check if the atom IDs are incremented by more than 1 or
+    decremented, from one atom to the next one.
+    An increment by more than 1 is as strong clue for missing atoms.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The array to be checked.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+        Contains the indices of atoms after a discontinuity
+    """
+    ids = array.atom_id
+    return _check_continuity(ids)
+def check_res_id_continuity(array):
+    """
+    Check if the residue IDs are incremented by more than 1 or
+    decremented, from one atom to the next one.
+    An increment by more than 1 is as strong clue for missing residues,
+    a decrement means probably a start of a new chain.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The array to be checked.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+        Contains the indices of atoms after a discontinuity
+    """
+    ids = array.res_id
+    return _check_continuity(ids)
+def check_bond_continuity(array, min_len=1.2, max_len=1.8):
+    """
+    Check if the peptide or phosphate backbone atoms have a
+    non-reasonable distance to the next residue.
+    A large or very small distance is a very strong clue, that there is
+    no bond between those atoms, therefore the chain is discontinued.
+    DEPRECATED: Please use :func:`check_backbone_continuity` for the same functionality.
+    Parameters
+    ----------
+    array : AtomArray
+        The array to be checked.
+    min_len, max_len : float, optional
+        The interval in which the atom-atom distance is evaluated as
+        bond.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+         Contains the indices of atoms after a discontinuity.
+    """
+    warnings.warn(
+        "Reimplemented into `check_backbone_continuity()`",
+        DeprecationWarning
+    )
+    return check_backbone_continuity(array, min_len, max_len)
+def check_linear_continuity(array, min_len=1.2, max_len=1.8):
+    """
+    Check linear (consecutive) bond continuity of atoms in atom array.
+    Parameters
+    ----------
+    array : AtomArray
+        Arbitrary structure.
+    min_len : float, optional
+        Minimum bond length.
+    max_len : float, optional
+        Maximum bond length.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+        Indices of `array` corresponding to atoms where the bond
+        with the preceding atom is beyond the provided bounds.
+    See Also
+    --------
+    biotite.structure.filter.filter_linear_bond_continuity :
+        A function to filter for atoms preserving the continuity (used here).
+    biotite.structure.bonds.BondList :
+        A class that doesn't depend on the atoms' order to identify bonds.
+    """
+    con_mask = filter_linear_bond_continuity(array, min_len, max_len)
+    # The continuity mask `con_mask` points to atoms for which the next atom is continuous.
+    # We invert this mask and shift-extend by one from the left.
+    # The resulting discontinuity mask points to atoms having the preceding atom exceeding
+    # the bond length requirements.
+    discon_mask = np.insert(~con_mask[:-1], 0, False)
+    return np.where(discon_mask)[0]
+def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
+    """
+    Check if the (peptide or phosphate) backbone atoms have
+    non-reasonable distance to the next atom.
+    A large or very small distance is a very strong clue, that there is
+    no bond between those atoms, therefore the chain is discontinued.
+    Parameters
+    ----------
+    array : AtomArray
+        The array to be checked.
+    min_len, max_len : float, optional
+        The interval in which the atom-atom distance is evaluated as
+        bond.
+    Returns
+    -------
+    discontinuity : ndarray, dtype=int
+         Contains the indices of atoms after a discontinuity.
+    See Also
+    --------
+    filter_linear_bond_continuity : A function to filter for atoms preserving the continuity.
+    filter_peptide_backbone : A function to filter for peptide backbone atoms.
+    filter_phosphate_backbone : A function to filter for phosphate backbone atoms.
+    """
+    backbone_mask = filter_peptide_backbone(array) | filter_phosphate_backbone(array)
+    con_mask = filter_linear_bond_continuity(array[backbone_mask], min_len, max_len)
+    # See the comments for `check_linear_continuity()`
+    discon_mask = np.insert(~con_mask[:-1], 0, False)
+    discon_mask_full = np.full_like(backbone_mask, False)
+    discon_mask_full[backbone_mask] = discon_mask
+    return np.where(discon_mask_full)[0]
+def check_duplicate_atoms(array):
+    """
+    Check if a structure contains duplicate atoms, i.e. two atoms in a
+    structure have the same annotations (coordinates may be different).
+    Duplicate atoms may appear, when a structure has occupancy for an
+    atom at two or more positions or when the *altloc* positions are
+    improperly read.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The array to be checked.
+    Returns
+    -------
+    duplicate : ndarray, dtype=int
+        Contains the indices of duplicate atoms.
+        The first occurence of an atom is not counted as duplicate.
+    """
+    duplicates = []
+    annots = [array.get_annotation(category) for category
+              in array.get_annotation_categories()]
+    for i in range(1, array.array_length()):
+        # Start with assumption that all atoms in the array
+        # until index i are duplicates of the atom at index i
+        is_duplicate = np.full(i, True, dtype=bool)
+        for annot in annots:
+            # For each annotation array filter out the atoms until
+            # index i that have an unequal annotation
+            # to the atom at index i
+            is_duplicate &= (annot[:i] == annot[i])
+        # After checking all annotation arrays,
+        # if there still is any duplicate to the atom at index i,
+        # add i the the list of duplicate atom indices
+        if is_duplicate.any():
+            duplicates.append(i)
+    return np.array(duplicates)
+def check_in_box(array):
+    r"""
+    Check if a structure contains atoms whose position is outside the
+    box.
+    Coordinates are outside the box, when they cannot be represented by
+    a linear combination of the box vectors with scalar factors
+    :math:`0 \le a_i \le 1`.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The array to be checked.
+    Returns
+    -------
+    outside : ndarray, dtype=int
+        Contains the indices of atoms outside the atom array's box.
+    """
+    if array.box is None:
+        raise TypeError("Structure has no box")
+    box = array.box
+    fractions = coord_to_fraction(array, box)
+    return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]

biotite/structure/io/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+A subpackage for reading and writing structure related data.
+Macromolecular structure files (PDB, PDBx/mmCIF, BinaryCIF, etc.) and
+small molecule files (MOL, SDF, etc.) can be used
+to load an :class:`AtomArray` or :class:`AtomArrayStack`.
+Since the data model for the :class:`AtomArray` and
+:class:`AtomArrayStack` class does not support duplicate atoms,
+only one *altloc* can be chosen for each atom. Hence, the amount of
+atoms may be lower in the atom array (stack) than in respective
+structure file.
+The recommended format for reading structure files is *BinaryCIF*.
+It has by far the shortest parsing time and file size.
+Besides the mentioned structure formats, Gromacs trajectory files can be
+loaded, if `mdtraj` is installed.
+"""
+__name__ = "biotite.structure.io"
+__author__ = "Patrick Kunzmann"
+from .ctab import *
+from .general import *
+from .trajfile import *

biotite/structure/io/ctab.py ADDED Viewed

@@ -0,0 +1,72 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.io"
+__author__ = "Patrick Kunzmann"
+__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
+import warnings
+from ..bonds import BondType
+def read_structure_from_ctab(ctab_lines):
+    """
+    Parse a *MDL* connection table (Ctab) to obtain an
+    :class:`AtomArray`. :footcite:`Dalby1992`.
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
+    Parameters
+    ----------
+    ctab_lines : lines of str
+        The lines containing the *ctab*.
+        Must begin with the *counts* line and end with the `M END` line
+    Returns
+    -------
+    atoms : AtomArray
+        This :class:`AtomArray` contains the optional ``charge``
+        annotation and has an associated :class:`BondList`.
+    References
+    ----------
+    .. footbibliography::
+    """
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import read_structure_from_ctab
+    return read_structure_from_ctab(ctab_lines)
+def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
+    """
+    Convert an :class:`AtomArray` into a
+    *MDL* connection table (Ctab). :footcite:`Dalby1992`
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
+    Parameters
+    ----------
+    atoms : AtomArray
+        The array must have an associated :class:`BondList`.
+    Returns
+    -------
+    ctab_lines : lines of str
+        The lines containing the *ctab*.
+        The lines begin with the *counts* line and end with the `M END`
+        .line
+    default_bond_type : BondType
+        Bond type fallback in the *Bond block* if a bond has no bond_type
+        defined in *atoms* array. By default, each bond is treated as
+        :attr:`BondType.ANY`.
+    References
+    ----------
+    .. footbibliography::
+    """
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import write_structure_to_ctab
+    return write_structure_to_ctab(atoms, default_bond_type)

biotite/structure/io/dcd/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+This subpackage is used for reading and writing trajectories in the
+CDC format used by software like *CHARMM*, *OpenMM* and *NAMD*.
+"""
+__name__ = "biotite.structure.io.dcd"
+__author__ = "Patrick Kunzmann"
+from .file import *

biotite/structure/io/dcd/file.py ADDED Viewed

@@ -0,0 +1,65 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.io.dcd"
+__author__ = "Patrick Kunzmann"
+__all__ = ["DCDFile"]
+import numpy as np
+from ..trajfile import TrajectoryFile
+from ...box import vectors_from_unitcell, unitcell_from_vectors
+class DCDFile(TrajectoryFile):
+    """
+    This file class represents a DCD trajectory file.
+    """
+    @classmethod
+    def traj_type(cls):
+        import mdtraj.formats as traj
+        return traj.DCDTrajectoryFile
+    @classmethod
+    def process_read_values(cls, read_values):
+        # .netcdf files use Angstrom
+        coord = read_values[0]
+        cell_lengths = read_values[1]
+        cell_angles = read_values[2]
+        if cell_lengths is None or cell_angles is None:
+             box = None
+        else:
+            box = np.stack(
+                [vectors_from_unitcell(a, b, c, alpha, beta, gamma)
+                for (a, b, c), (alpha, beta, gamma)
+                in zip(cell_lengths, np.deg2rad(cell_angles))],
+                axis=0
+            )
+        return coord, box, None
+    @classmethod
+    def prepare_write_values(cls, coord, box, time):
+        xyz = coord.astype(np.float32, copy=False) \
+              if coord is not None else None
+        if box is None:
+            cell_lengths = None
+            cell_angles  = None
+        else:
+            cell_lengths = np.zeros((len(box), 3), dtype=np.float32)
+            cell_angles  = np.zeros((len(box), 3), dtype=np.float32)
+            for i, model_box in enumerate(box):
+                a, b, c, alpha, beta, gamma = unitcell_from_vectors(model_box)
+                cell_lengths[i] = np.array((a, b, c))
+                cell_angles[i] = np.rad2deg((alpha, beta, gamma))
+        return {
+            "xyz" : xyz,
+            "cell_lengths" : cell_lengths,
+            "cell_angles" : cell_angles,
+        }
+    def set_time(self, time):
+        if time is not None:
+            raise NotImplementedError(
+                "This trajectory file does not support writing simulation time"
+            )