biotite 1.1.0__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +159 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +452 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +57 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +206 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +91 -0
- biotite/database/entrez/download.py +229 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +262 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +20 -0
- biotite/database/pubchem/query.py +830 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +159 -0
- biotite/database/rcsb/query.py +964 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +129 -0
- biotite/database/uniprot/query.py +293 -0
- biotite/file.py +232 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +203 -0
- biotite/sequence/align/alignment.py +680 -0
- biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +622 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +620 -0
- biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +587 -0
- biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +830 -0
- biotite/sequence/codec.cpython-313-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +477 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1115 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +229 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +104 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +284 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +171 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +450 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +567 -0
- biotite/sequence/search.py +118 -0
- biotite/sequence/seqtypes.py +713 -0
- biotite/sequence/sequence.py +374 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +133 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +1554 -0
- biotite/structure/basepairs.py +1404 -0
- biotite/structure/bonds.cpython-313-darwin.so +0 -0
- biotite/structure/bonds.pyx +1972 -0
- biotite/structure/box.py +588 -0
- biotite/structure/celllist.cpython-313-darwin.so +0 -0
- biotite/structure/celllist.pyx +849 -0
- biotite/structure/chains.py +314 -0
- biotite/structure/charges.cpython-313-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +274 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +214 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +590 -0
- biotite/structure/geometry.py +655 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +295 -0
- biotite/structure/hbond.py +428 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +81 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +202 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +131 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +138 -0
- biotite/structure/info/radii.py +197 -0
- biotite/structure/info/standardize.py +186 -0
- biotite/structure/integrity.py +215 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +344 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +415 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +914 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +307 -0
- biotite/structure/io/pdb/file.py +1290 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +656 -0
- biotite/structure/io/pdbx/cif.py +1075 -0
- biotite/structure/io/pdbx/component.py +245 -0
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +1745 -0
- biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1031 -0
- biotite/structure/io/trajfile.py +693 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +73 -0
- biotite/structure/molecules.py +352 -0
- biotite/structure/pseudoknots.py +628 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +304 -0
- biotite/structure/residues.py +572 -0
- biotite/structure/sasa.cpython-313-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +178 -0
- biotite/structure/sequence.py +111 -0
- biotite/structure/sse.py +308 -0
- biotite/structure/superimpose.py +689 -0
- biotite/structure/transform.py +530 -0
- biotite/structure/util.py +168 -0
- biotite/version.py +16 -0
- biotite/visualize.py +265 -0
- biotite-1.1.0.dist-info/METADATA +190 -0
- biotite-1.1.0.dist-info/RECORD +332 -0
- biotite-1.1.0.dist-info/WHEEL +4 -0
- biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions for calculation of characteristic values when
|
|
7
|
+
comparing multiple structures with each other.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = ["rmsd", "rmspd", "rmsf", "average"]
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from biotite.structure.atoms import AtomArrayStack, coord
|
|
16
|
+
from biotite.structure.geometry import index_distance
|
|
17
|
+
from biotite.structure.util import vector_dot
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def rmsd(reference, subject):
|
|
21
|
+
r"""
|
|
22
|
+
Calculate the RMSD between two structures.
|
|
23
|
+
|
|
24
|
+
The *root-mean-square-deviation* (RMSD) indicates the overall
|
|
25
|
+
deviation of each model of a structure to a reference structure.
|
|
26
|
+
It is defined as:
|
|
27
|
+
|
|
28
|
+
.. math:: RMSD = \sqrt{ \frac{1}{n} \sum\limits_{i=1}^n (x_i - x_{ref,i})^2}
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
33
|
+
The reference structure.
|
|
34
|
+
Alternatively, coordinates can be provided directly as
|
|
35
|
+
:class:`ndarray`.
|
|
36
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
37
|
+
Structure(s) to be compared with `reference`.
|
|
38
|
+
Alternatively, coordinates can be provided directly as
|
|
39
|
+
:class:`ndarray`.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
rmsd : float or ndarray, dtype=float, shape=(m,)
|
|
44
|
+
RMSD between subject and reference.
|
|
45
|
+
If subject is an :class:`AtomArray` a float is returned.
|
|
46
|
+
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
47
|
+
containing the RMSD for each model is returned.
|
|
48
|
+
|
|
49
|
+
See Also
|
|
50
|
+
--------
|
|
51
|
+
rmsf
|
|
52
|
+
|
|
53
|
+
Notes
|
|
54
|
+
-----
|
|
55
|
+
This function does not superimpose the subject to its reference.
|
|
56
|
+
In most cases :func:`superimpose()` should be called prior to this
|
|
57
|
+
function.
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
|
|
62
|
+
Calculate the RMSD of all models to the first model:
|
|
63
|
+
|
|
64
|
+
>>> superimposed, _ = superimpose(atom_array, atom_array_stack)
|
|
65
|
+
>>> rms = rmsd(atom_array, superimposed)
|
|
66
|
+
>>> print(np.around(rms, decimals=3))
|
|
67
|
+
[0.000 1.928 2.103 2.209 1.806 2.172 2.704 1.360 2.337 1.818 1.879 2.471
|
|
68
|
+
1.939 2.035 2.167 1.789 1.653 2.348 2.247 2.529 1.583 2.115 2.131 2.050
|
|
69
|
+
2.512 2.666 2.206 2.397 2.328 1.868 2.316 1.984 2.124 1.761 2.642 1.721
|
|
70
|
+
2.571 2.579]
|
|
71
|
+
"""
|
|
72
|
+
return np.sqrt(np.mean(_sq_euclidian(reference, subject), axis=-1))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def rmspd(reference, subject, periodic=False, box=None):
|
|
76
|
+
r"""
|
|
77
|
+
Calculate the RMSD of atom pair distances for given structures
|
|
78
|
+
relative to those found in a reference structure.
|
|
79
|
+
|
|
80
|
+
Unlike the standard RMSD, the *root-mean-square-pairwise-deviation*
|
|
81
|
+
(RMSPD) is a fit-free method to determine deviations between
|
|
82
|
+
a structure and a preset reference.
|
|
83
|
+
|
|
84
|
+
.. math:: RMSPD = \sqrt{ \frac{1}{n^2} \sum\limits_{i=1}^n \sum\limits_{j \neq i}^n (d_{ij} - d_{ref,ij})^2}
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
89
|
+
The reference structure.
|
|
90
|
+
Alternatively, coordinates can be provided directly as
|
|
91
|
+
:class:`ndarray`.
|
|
92
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
93
|
+
Structure(s) to be compared with `reference`.
|
|
94
|
+
Alternatively, coordinates can be provided directly as
|
|
95
|
+
:class:`ndarray`.
|
|
96
|
+
periodic : bool, optional
|
|
97
|
+
If set to true, periodic boundary conditions are taken into
|
|
98
|
+
account (minimum-image convention).
|
|
99
|
+
The `box` attribute of the `atoms` parameter is used for
|
|
100
|
+
calculation.
|
|
101
|
+
An alternative box can be provided via the `box` parameter.
|
|
102
|
+
By default, periodicity is ignored.
|
|
103
|
+
box : ndarray, shape=(3,3) or shape=(m,3,3), optional
|
|
104
|
+
If this parameter is set, the given box is used instead of the
|
|
105
|
+
`box` attribute of `atoms`.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
rmspd : float or ndarray, dtype=float, shape=(m,)
|
|
110
|
+
Atom pair distance RMSD between subject and reference.
|
|
111
|
+
If subject is an :class:`AtomArray` a float is returned.
|
|
112
|
+
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
113
|
+
containing the RMSD for each model is returned.
|
|
114
|
+
|
|
115
|
+
Warnings
|
|
116
|
+
--------
|
|
117
|
+
Internally, this function uses :func:`index_distance()`.
|
|
118
|
+
For non-orthorombic boxes (at least one angle deviates from
|
|
119
|
+
90 degrees), periodic boundary conditions should be corrected
|
|
120
|
+
prior to the computation of RMSPDs with `periodic` set to false
|
|
121
|
+
to ensure correct results.
|
|
122
|
+
(e.g. with :func:`remove_pbc()`).
|
|
123
|
+
|
|
124
|
+
See also
|
|
125
|
+
--------
|
|
126
|
+
index_distance
|
|
127
|
+
remove_pbc
|
|
128
|
+
rmsd
|
|
129
|
+
"""
|
|
130
|
+
# Compute index pairs in reference structure -> pair_ij for j < i
|
|
131
|
+
reflen = reference.array_length()
|
|
132
|
+
index_i = np.repeat(np.arange(reflen), reflen)
|
|
133
|
+
index_j = np.tile(np.arange(reflen), reflen)
|
|
134
|
+
pairs = np.stack([index_i, index_j]).T
|
|
135
|
+
refdist = index_distance(reference, pairs, periodic=periodic, box=box)
|
|
136
|
+
subjdist = index_distance(subject, pairs, periodic=periodic, box=box)
|
|
137
|
+
|
|
138
|
+
rmspd = np.sqrt(np.sum((subjdist - refdist) ** 2, axis=-1)) / reflen
|
|
139
|
+
return rmspd
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def rmsf(reference, subject):
|
|
143
|
+
r"""
|
|
144
|
+
Calculate the RMSF between two structures.
|
|
145
|
+
|
|
146
|
+
The *root-mean-square-fluctuation* (RMSF) indicates the positional
|
|
147
|
+
deviation of a structure to a reference structure, averaged over all
|
|
148
|
+
models.
|
|
149
|
+
Usually the reference structure, is the average over all models.
|
|
150
|
+
The RMSF is defined as:
|
|
151
|
+
|
|
152
|
+
.. math:: RMSF(i) = \sqrt{ \frac{1}{T} \sum\limits_{t=1}^T (x_i(t) - x_{ref,i}(t))^2}
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
157
|
+
The reference structure.
|
|
158
|
+
Alternatively, coordinates can be provided directly as
|
|
159
|
+
:class:`ndarray`.
|
|
160
|
+
subject : AtomArrayStack or ndarray, dtype=float, shape=(m,n,3)
|
|
161
|
+
Structures to be compared with `reference`.
|
|
162
|
+
The time *t* is represented by the models in the
|
|
163
|
+
:class:`AtomArrayStack`.
|
|
164
|
+
Alternatively, coordinates can be provided directly as
|
|
165
|
+
:class:`ndarray`.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
rmsf : ndarray, dtype=float, shape=(n,)
|
|
170
|
+
RMSF between subject and reference structure.
|
|
171
|
+
Each element gives the RMSF for the atom at the respective
|
|
172
|
+
index.
|
|
173
|
+
|
|
174
|
+
See Also
|
|
175
|
+
--------
|
|
176
|
+
rmsd
|
|
177
|
+
|
|
178
|
+
Notes
|
|
179
|
+
-----
|
|
180
|
+
This function does not superimpose the subject to its reference.
|
|
181
|
+
In most cases :func:`superimpose()` should be called prior to this
|
|
182
|
+
function.
|
|
183
|
+
|
|
184
|
+
Examples
|
|
185
|
+
--------
|
|
186
|
+
|
|
187
|
+
Calculate the :math:`C_\alpha` RMSF of all models to the average
|
|
188
|
+
model:
|
|
189
|
+
|
|
190
|
+
>>> ca = atom_array_stack[:, atom_array_stack.atom_name == "CA"]
|
|
191
|
+
>>> ca_average = average(ca)
|
|
192
|
+
>>> ca, _ = superimpose(ca_average, ca)
|
|
193
|
+
>>> print(rmsf(ca_average, ca))
|
|
194
|
+
[1.372 0.360 0.265 0.261 0.288 0.204 0.196 0.306 0.353 0.238 0.266 0.317
|
|
195
|
+
0.358 0.448 0.586 0.369 0.332 0.396 0.410 0.968]
|
|
196
|
+
"""
|
|
197
|
+
return np.sqrt(np.mean(_sq_euclidian(reference, subject), axis=-2))
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def average(atoms):
|
|
201
|
+
"""
|
|
202
|
+
Calculate an average structure.
|
|
203
|
+
|
|
204
|
+
The average structure has the average coordinates
|
|
205
|
+
of the input models.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
atoms : AtomArrayStack or ndarray, dtype=float, shape=(m,n,3)
|
|
210
|
+
The structure models to be averaged.
|
|
211
|
+
Alternatively, coordinates can be provided directly as
|
|
212
|
+
:class:`ndarray`.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
average : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
217
|
+
Structure with averaged atom coordinates.
|
|
218
|
+
If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
|
|
219
|
+
returned.
|
|
220
|
+
|
|
221
|
+
See Also
|
|
222
|
+
--------
|
|
223
|
+
rmsd, rmsf
|
|
224
|
+
|
|
225
|
+
Notes
|
|
226
|
+
-----
|
|
227
|
+
The calculated average structure is not suitable for visualization
|
|
228
|
+
or geometric calculations, since bond lengths and angles will
|
|
229
|
+
deviate from meaningful values.
|
|
230
|
+
This method is rather useful to provide a reference structure for
|
|
231
|
+
calculation of e.g. the RMSD or RMSF.
|
|
232
|
+
"""
|
|
233
|
+
coords = coord(atoms)
|
|
234
|
+
if coords.ndim != 3:
|
|
235
|
+
raise TypeError("Expected an AtomArrayStack or an ndarray with shape (m,n,3)")
|
|
236
|
+
mean_coords = np.mean(coords, axis=0)
|
|
237
|
+
if isinstance(atoms, AtomArrayStack):
|
|
238
|
+
mean_array = atoms[0].copy()
|
|
239
|
+
mean_array.coord = mean_coords
|
|
240
|
+
return mean_array
|
|
241
|
+
else:
|
|
242
|
+
return mean_coords
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _sq_euclidian(reference, subject):
|
|
246
|
+
"""
|
|
247
|
+
Calculate squared euclidian distance between atoms in two
|
|
248
|
+
structures.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
253
|
+
Reference structure.
|
|
254
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
255
|
+
Structure(s) whose atoms squared euclidian distance to
|
|
256
|
+
`reference` is measured.
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
ndarray, dtype=float, shape=(n,) or shape=(m,n)
|
|
261
|
+
Squared euclidian distance between subject and reference.
|
|
262
|
+
If subject is an :class:`AtomArray` a 1-D array is returned.
|
|
263
|
+
If subject is an :class:`AtomArrayStack` a 2-D array is
|
|
264
|
+
returned.
|
|
265
|
+
In this case the first dimension indexes the AtomArray.
|
|
266
|
+
"""
|
|
267
|
+
reference_coord = coord(reference)
|
|
268
|
+
subject_coord = coord(subject)
|
|
269
|
+
if reference_coord.ndim != 2:
|
|
270
|
+
raise TypeError(
|
|
271
|
+
"Expected an AtomArray or an ndarray with shape (n,3) as reference"
|
|
272
|
+
)
|
|
273
|
+
dif = subject_coord - reference_coord
|
|
274
|
+
return vector_dot(dif, dif)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions to calculate atomistic densities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Daniel Bauer"
|
|
11
|
+
__all__ = ["density"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.structure.atoms import coord
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def density(atoms, selection=None, delta=1.0, bins=None, density=False, weights=None):
|
|
18
|
+
r"""
|
|
19
|
+
Compute the density of the selected atoms.
|
|
20
|
+
|
|
21
|
+
This creates a 3d histogram over the coordinates of selected atoms.
|
|
22
|
+
By default, the grid for the histogram is built based on the
|
|
23
|
+
coordinates of the given `atoms` with an even gridspacing of
|
|
24
|
+
`delta` in all three dimensions.
|
|
25
|
+
Alternatively, a custom grid can be used.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
atoms : AtomArray or AtomArrayStack or ndarray, shape=(n,3) or shape=(m,n,3)
|
|
30
|
+
The density is calculated based on these atoms.
|
|
31
|
+
Alternatively, the coordinates can be directly provided as
|
|
32
|
+
`ndarray`.
|
|
33
|
+
selection : ndarray, dtype=bool, shape=(n,), optional
|
|
34
|
+
Boolean mask for `atoms` to calculate the density only on a set
|
|
35
|
+
of atoms.
|
|
36
|
+
delta : float, optional
|
|
37
|
+
Distance between grid points for density calculation (in Å).
|
|
38
|
+
bins : int or sequence of scalars or str, optional
|
|
39
|
+
Bins for the RDF.
|
|
40
|
+
|
|
41
|
+
- If `bins` is an `int`, it defines the number of bins.
|
|
42
|
+
- If `bins` is a sequence, it defines the bin edges, ignoring
|
|
43
|
+
the actual coordinates of the `atoms` selection.
|
|
44
|
+
- If `bins` is a string, it defines the function used to
|
|
45
|
+
calculate the bins.
|
|
46
|
+
|
|
47
|
+
See :func:`numpy.histogramdd()` for further details.
|
|
48
|
+
density : boolean, optional
|
|
49
|
+
If False, the number of samples in each bin is returned.
|
|
50
|
+
Otherwise, returns the probability density function of each bin.
|
|
51
|
+
See :func:`numpy.histogramdd()` for further details.
|
|
52
|
+
weights: ndarray, shape=(n,) or shape=(m,n), optional
|
|
53
|
+
An array of values to weight the contribution of *n* atoms in
|
|
54
|
+
*m* models.
|
|
55
|
+
If the shape is *(n,)*, the weights will be interpreted as
|
|
56
|
+
*per atom*.
|
|
57
|
+
A shape of *(m,n)* allows to additionally weight atoms on a
|
|
58
|
+
*per model* basis.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
H : ndarray, dtype=float
|
|
63
|
+
The threedimensional histogram of the selected atoms.
|
|
64
|
+
The histogram takes the atoms in all models into account.
|
|
65
|
+
The length of the histogram depends on `atoms` coordinates and
|
|
66
|
+
`delta`, or the supplied `bins` input parameter.
|
|
67
|
+
edges : list of ndarray, dtype=float
|
|
68
|
+
A list containing the 3 arrays describing the bin edges.
|
|
69
|
+
"""
|
|
70
|
+
coords = coord(atoms)
|
|
71
|
+
|
|
72
|
+
is_stack = coords.ndim == 3
|
|
73
|
+
|
|
74
|
+
# Define the grid for coordinate binning based on coordinates of
|
|
75
|
+
# supplied atoms
|
|
76
|
+
# This makes the binning independent of a supplied box vector and
|
|
77
|
+
# fluctuating box dimensions are not a problem
|
|
78
|
+
# However, this means that the user has to make sure the region of
|
|
79
|
+
# interest is in the center of the box, i.e. by centering the
|
|
80
|
+
# investigated protein in the box.
|
|
81
|
+
if bins is None:
|
|
82
|
+
if is_stack:
|
|
83
|
+
axis = (0, 1)
|
|
84
|
+
else:
|
|
85
|
+
axis = 0
|
|
86
|
+
grid_min, grid_max = np.min(coords, axis=axis), np.max(coords, axis=axis)
|
|
87
|
+
bins = [
|
|
88
|
+
np.arange(grid_min[0], grid_max[0] + delta, delta),
|
|
89
|
+
np.arange(grid_min[1], grid_max[1] + delta, delta),
|
|
90
|
+
np.arange(grid_min[2], grid_max[2] + delta, delta),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
if selection is None:
|
|
94
|
+
selected_coords = coords
|
|
95
|
+
else:
|
|
96
|
+
selected_coords = coords[..., selection, :]
|
|
97
|
+
|
|
98
|
+
# Reshape the coords into Nx3
|
|
99
|
+
coords = selected_coords.reshape((np.prod(selected_coords.shape[:-1]), 3))
|
|
100
|
+
|
|
101
|
+
# We need a weight value per coordinate, but input might be per atom
|
|
102
|
+
if weights is not None:
|
|
103
|
+
if is_stack and len(weights.shape) < 2:
|
|
104
|
+
weights = np.tile(weights, len(selected_coords))
|
|
105
|
+
weights = weights.reshape(coords.shape[0])
|
|
106
|
+
|
|
107
|
+
# Calculate the histogram
|
|
108
|
+
hist = np.histogramdd(coords, bins=bins, density=density, weights=weights)
|
|
109
|
+
return hist
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module handles conversion of RNA structures to
|
|
7
|
+
dot-bracket-notation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Tom David Müller"
|
|
12
|
+
__all__ = ["dot_bracket_from_structure", "dot_bracket", "base_pairs_from_dot_bracket"]
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from biotite.structure.basepairs import base_pairs
|
|
16
|
+
from biotite.structure.pseudoknots import pseudoknots
|
|
17
|
+
from biotite.structure.residues import get_residue_count, get_residue_positions
|
|
18
|
+
|
|
19
|
+
_OPENING_BRACKETS = "([{<ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
20
|
+
_OPENING_BRACKETS_BYTES = _OPENING_BRACKETS.encode()
|
|
21
|
+
_CLOSING_BRACKETS = ")]}>abcdefghijklmnopqrstuvwxyz"
|
|
22
|
+
_CLOSING_BRACKETS_BYTES = _CLOSING_BRACKETS.encode()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def dot_bracket_from_structure(
|
|
26
|
+
nucleic_acid_strand, scores=None, max_pseudoknot_order=None
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Represent a nucleic-acid-strand in dot-bracket-letter-notation
|
|
30
|
+
(DBL-notation). :footcite:`Antczak2018`
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
atom_array : AtomArray
|
|
35
|
+
The nucleic acid strand to be represented in DBL-notation.
|
|
36
|
+
scores : ndarray, dtype=int, shape=(n,) (default: None)
|
|
37
|
+
The score for each base pair, which is passed on to
|
|
38
|
+
:func:`pseudoknots()`.
|
|
39
|
+
max_pseudoknot_order : int (default: None)
|
|
40
|
+
The maximum pseudoknot order to be found. If a base pair would
|
|
41
|
+
be of a higher order, it is represented as unpaired. If ``None``
|
|
42
|
+
is given, all base pairs are evaluated.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
notations : list [str, ...]
|
|
47
|
+
The DBL-notation for each solution from :func:`pseudoknots()`.
|
|
48
|
+
|
|
49
|
+
See Also
|
|
50
|
+
--------
|
|
51
|
+
base_pairs
|
|
52
|
+
pseudoknots
|
|
53
|
+
|
|
54
|
+
References
|
|
55
|
+
----------
|
|
56
|
+
|
|
57
|
+
.. footbibliography::
|
|
58
|
+
"""
|
|
59
|
+
basepairs = base_pairs(nucleic_acid_strand)
|
|
60
|
+
if len(basepairs) == 0:
|
|
61
|
+
return [""]
|
|
62
|
+
basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
|
|
63
|
+
length = get_residue_count(nucleic_acid_strand)
|
|
64
|
+
return dot_bracket(
|
|
65
|
+
basepairs, length, scores=scores, max_pseudoknot_order=max_pseudoknot_order
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
70
|
+
"""
|
|
71
|
+
Represent a nucleic acid strand in dot-bracket-letter-notation
|
|
72
|
+
(DBL-notation). :footcite:`Antczak2018`
|
|
73
|
+
|
|
74
|
+
The nucleic acid strand is represented as nucleotide sequence,
|
|
75
|
+
where the nucleotides are counted continiously from zero.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
basepairs : ndarray, shape=(n,2)
|
|
80
|
+
Each row corresponds to the positions of the bases in the
|
|
81
|
+
strand.
|
|
82
|
+
length : int
|
|
83
|
+
The number of bases in the strand.
|
|
84
|
+
scores : ndarray, dtype=int, shape=(n,) (default: None)
|
|
85
|
+
The score for each base pair, which is passed on to
|
|
86
|
+
:func:`pseudoknots()`
|
|
87
|
+
max_pseudoknot_order : int (default: None)
|
|
88
|
+
The maximum pseudoknot order to be found. If a base pair would
|
|
89
|
+
be of a higher order, it is represented as unpaired. If ``None``
|
|
90
|
+
is given, all pseudoknot orders are evaluated.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
notations : list [str, ...]
|
|
95
|
+
The DBL-notation for each solution from :func:`pseudoknots()`.
|
|
96
|
+
|
|
97
|
+
Examples
|
|
98
|
+
--------
|
|
99
|
+
The sequence ``ACGTC`` has a length of 5. If there was to be a
|
|
100
|
+
pairing interaction between the ``A`` and ``T``, `basepairs` would
|
|
101
|
+
have the form:
|
|
102
|
+
|
|
103
|
+
>>> import numpy as np
|
|
104
|
+
>>> basepairs = np.array([[0, 3]])
|
|
105
|
+
|
|
106
|
+
The DBL Notation can then be found with ``dot_bracket()``:
|
|
107
|
+
|
|
108
|
+
>>> dot_bracket(basepairs, 5)[0]
|
|
109
|
+
'(..).'
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
See Also
|
|
113
|
+
--------
|
|
114
|
+
dot_bracket_from_structure
|
|
115
|
+
base_pairs
|
|
116
|
+
pseudoknots
|
|
117
|
+
|
|
118
|
+
References
|
|
119
|
+
----------
|
|
120
|
+
|
|
121
|
+
.. footbibliography::
|
|
122
|
+
"""
|
|
123
|
+
# Make sure the lower residue is on the left for each row
|
|
124
|
+
basepairs = np.sort(basepairs, axis=1)
|
|
125
|
+
|
|
126
|
+
# Get pseudoknot order
|
|
127
|
+
pseudoknot_order = pseudoknots(
|
|
128
|
+
basepairs, scores=scores, max_pseudoknot_order=max_pseudoknot_order
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Each optimal pseudoknot order solution is represented in
|
|
132
|
+
# dot-bracket-notation
|
|
133
|
+
notations = [bytearray((b"." * length)) for _ in range(len(pseudoknot_order))]
|
|
134
|
+
for s, solution in enumerate(pseudoknot_order):
|
|
135
|
+
for basepair, order in zip(basepairs, solution):
|
|
136
|
+
if order == -1:
|
|
137
|
+
continue
|
|
138
|
+
notations[s][basepair[0]] = _OPENING_BRACKETS_BYTES[order]
|
|
139
|
+
notations[s][basepair[1]] = _CLOSING_BRACKETS_BYTES[order]
|
|
140
|
+
return [notation.decode() for notation in notations]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
144
|
+
"""
|
|
145
|
+
Extract the base pairs from a nucleic-acid-strand in
|
|
146
|
+
dot-bracket-letter-notation (DBL-notation). :footcite:`Antczak2018`
|
|
147
|
+
|
|
148
|
+
The nucleic acid strand is represented as nucleotide sequence,
|
|
149
|
+
where the nucleotides are counted continiously from zero.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
dot_bracket_notation : str
|
|
154
|
+
The DBL-notation.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
basepairs : ndarray, shape=(n,2)
|
|
159
|
+
Each row corresponds to the positions of the bases in the
|
|
160
|
+
sequence.
|
|
161
|
+
|
|
162
|
+
Examples
|
|
163
|
+
--------
|
|
164
|
+
The notation string ``'(..).'`` contains a base pair between the
|
|
165
|
+
indices 0 and 3. This pairing interaction can be extracted
|
|
166
|
+
conveniently by the use of :func:`base_pairs_from_dot_bracket()`:
|
|
167
|
+
|
|
168
|
+
>>> base_pairs_from_dot_bracket('(..).')
|
|
169
|
+
array([[0, 3]])
|
|
170
|
+
|
|
171
|
+
See Also
|
|
172
|
+
--------
|
|
173
|
+
dot_bracket
|
|
174
|
+
|
|
175
|
+
References
|
|
176
|
+
----------
|
|
177
|
+
|
|
178
|
+
.. footbibliography::
|
|
179
|
+
"""
|
|
180
|
+
basepairs = []
|
|
181
|
+
opened_brackets = [[] for _ in range(len(_OPENING_BRACKETS))]
|
|
182
|
+
|
|
183
|
+
# Iterate through input string and extract base pairs
|
|
184
|
+
for pos, symbol in enumerate(dot_bracket_notation):
|
|
185
|
+
if symbol in _OPENING_BRACKETS:
|
|
186
|
+
# Add opening residues to list (separate list for each
|
|
187
|
+
# bracket type)
|
|
188
|
+
index = _OPENING_BRACKETS.index(symbol)
|
|
189
|
+
opened_brackets[index].append(pos)
|
|
190
|
+
|
|
191
|
+
elif symbol in _CLOSING_BRACKETS:
|
|
192
|
+
# For each closing bracket, the the base pair consists out
|
|
193
|
+
# of the current index and the last index added to the list
|
|
194
|
+
# in `opened_brackets` corresponding to the same bracket
|
|
195
|
+
# type.
|
|
196
|
+
index = _CLOSING_BRACKETS.index(symbol)
|
|
197
|
+
basepairs.append((opened_brackets[index].pop(), pos))
|
|
198
|
+
|
|
199
|
+
else:
|
|
200
|
+
if symbol != ".":
|
|
201
|
+
raise ValueError(f"'{symbol}' is an invalid character for DBL-notation")
|
|
202
|
+
|
|
203
|
+
for not_closed in opened_brackets:
|
|
204
|
+
if not_closed != []:
|
|
205
|
+
raise ValueError(
|
|
206
|
+
"Invalid DBL-notation, not all opening brackets have a "
|
|
207
|
+
"closing bracket"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Sort the base pair indices in ascending order
|
|
211
|
+
basepairs = np.array(basepairs)
|
|
212
|
+
if len(basepairs) > 0:
|
|
213
|
+
basepairs = basepairs[np.argsort(basepairs[:, 0])]
|
|
214
|
+
return basepairs
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information..
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module contains all possible errors of the `structure` subpackage.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BadStructureError",
|
|
13
|
+
"IncompleteStructureWarning",
|
|
14
|
+
"UnexpectedStructureWarning",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BadStructureError(Exception):
|
|
19
|
+
"""
|
|
20
|
+
Indicates that a structure is not suitable for a certain operation.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IncompleteStructureWarning(Warning):
|
|
27
|
+
"""
|
|
28
|
+
Indicates that a structure is not complete.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class UnexpectedStructureWarning(Warning):
|
|
35
|
+
"""
|
|
36
|
+
Indicates that a structure was not expected.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
pass
|