biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +159 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +452 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +57 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +206 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +91 -0
- biotite/database/entrez/download.py +229 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +262 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +20 -0
- biotite/database/pubchem/query.py +830 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +159 -0
- biotite/database/rcsb/query.py +964 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +129 -0
- biotite/database/uniprot/query.py +293 -0
- biotite/file.py +232 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +203 -0
- biotite/sequence/align/alignment.py +680 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +622 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +620 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +587 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +830 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +477 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1115 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +229 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +104 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +284 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +171 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +450 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +567 -0
- biotite/sequence/search.py +118 -0
- biotite/sequence/seqtypes.py +713 -0
- biotite/sequence/sequence.py +374 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +133 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +1554 -0
- biotite/structure/basepairs.py +1404 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +1972 -0
- biotite/structure/box.py +588 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +849 -0
- biotite/structure/chains.py +314 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +274 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +214 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +590 -0
- biotite/structure/geometry.py +655 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +295 -0
- biotite/structure/hbond.py +428 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +81 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +202 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +131 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +138 -0
- biotite/structure/info/radii.py +197 -0
- biotite/structure/info/standardize.py +186 -0
- biotite/structure/integrity.py +215 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +344 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +415 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +914 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +307 -0
- biotite/structure/io/pdb/file.py +1290 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +656 -0
- biotite/structure/io/pdbx/cif.py +1075 -0
- biotite/structure/io/pdbx/component.py +245 -0
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +1745 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1031 -0
- biotite/structure/io/trajfile.py +693 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +73 -0
- biotite/structure/molecules.py +352 -0
- biotite/structure/pseudoknots.py +628 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +304 -0
- biotite/structure/residues.py +572 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +178 -0
- biotite/structure/sequence.py +111 -0
- biotite/structure/sse.py +308 -0
- biotite/structure/superimpose.py +689 -0
- biotite/structure/transform.py +530 -0
- biotite/structure/util.py +168 -0
- biotite/version.py +16 -0
- biotite/visualize.py +265 -0
- biotite-1.1.0.dist-info/METADATA +190 -0
- biotite-1.1.0.dist-info/RECORD +332 -0
- biotite-1.1.0.dist-info/WHEEL +6 -0
- biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Use this module to calculate the Solvent Accessible Surface Area (SASA) of
|
|
7
|
+
a protein or single atoms.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = ["sasa"]
|
|
13
|
+
|
|
14
|
+
cimport cython
|
|
15
|
+
cimport numpy as np
|
|
16
|
+
from libc.stdlib cimport malloc, free
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from .celllist import CellList
|
|
20
|
+
from .filter import filter_solvent, filter_monoatomic_ions
|
|
21
|
+
from .info.radii import vdw_radius_protor, vdw_radius_single
|
|
22
|
+
|
|
23
|
+
ctypedef np.uint8_t np_bool
|
|
24
|
+
ctypedef np.int64_t int64
|
|
25
|
+
ctypedef np.float32_t float32
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@cython.boundscheck(False)
|
|
29
|
+
@cython.wraparound(False)
|
|
30
|
+
def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
31
|
+
bint ignore_ions=True, int point_number=1000,
|
|
32
|
+
point_distr="Fibonacci", vdw_radii="ProtOr"):
|
|
33
|
+
"""
|
|
34
|
+
sasa(array, probe_radius=1.4, atom_filter=None, ignore_ions=True,
|
|
35
|
+
point_number=1000, point_distr="Fibonacci", vdw_radii="ProtOr")
|
|
36
|
+
|
|
37
|
+
Calculate the Solvent Accessible Surface Area (SASA) of a protein.
|
|
38
|
+
|
|
39
|
+
This function uses the Shrake-Rupley ("rolling probe")
|
|
40
|
+
algorithm :footcite:`Shrake1973`:
|
|
41
|
+
Every atom is occupied by a evenly distributed point mesh. The
|
|
42
|
+
points that can be reached by the "rolling probe", are surface
|
|
43
|
+
accessible.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
array : AtomArray
|
|
48
|
+
The protein model to calculate the SASA for.
|
|
49
|
+
probe_radius : float, optional
|
|
50
|
+
The VdW-radius of the solvent molecules (default: 1.4).
|
|
51
|
+
atom_filter : ndarray, dtype=bool, optional
|
|
52
|
+
If this parameter is given, SASA is only calculated for the
|
|
53
|
+
filtered atoms.
|
|
54
|
+
ignore_ions : bool, optional
|
|
55
|
+
If true, all monoatomic ions are removed before SASA calculation
|
|
56
|
+
(default: True).
|
|
57
|
+
point_number : int, optional
|
|
58
|
+
The number of points in the mesh occupying each atom for SASA
|
|
59
|
+
calculation (default: 100). The SASA calculation time is
|
|
60
|
+
proportional to the amount of sphere points.
|
|
61
|
+
point_distr : str or function, optional
|
|
62
|
+
If a function is given, the function is used to calculate the
|
|
63
|
+
point distribution for the mesh (the function must take `float`
|
|
64
|
+
*n* as parameter and return a *(n x 3)* :class:`ndarray`).
|
|
65
|
+
Alternatively a string can be given to choose a built-in
|
|
66
|
+
distribution:
|
|
67
|
+
|
|
68
|
+
- **Fibonacci** - Distribute points using a golden section
|
|
69
|
+
spiral.
|
|
70
|
+
|
|
71
|
+
By default *Fibonacci* is used.
|
|
72
|
+
vdw_radii : str or ndarray, dtype=float, optional
|
|
73
|
+
Indicates the set of VdW radii to be used. If an `array`-length
|
|
74
|
+
:class:`ndarray` is given, each atom gets the radius at the
|
|
75
|
+
corresponding index. Radii given for atoms that are not used in
|
|
76
|
+
SASA calculation (e.g. solvent atoms) can have arbitrary values
|
|
77
|
+
(e.g. `NaN`). If instead a string is given, one of the
|
|
78
|
+
built-in sets is used:
|
|
79
|
+
|
|
80
|
+
- **ProtOr** - A set, which does not require hydrogen atoms
|
|
81
|
+
in the model. Suitable for crystal structures.
|
|
82
|
+
:footcite:`Tsai1999`
|
|
83
|
+
- **Single** - A set, which uses a defined VdW radius for
|
|
84
|
+
every single atom, therefore hydrogen atoms are required
|
|
85
|
+
in the model (e.g. NMR elucidated structures).
|
|
86
|
+
:footcite:`Bondi1964`
|
|
87
|
+
|
|
88
|
+
By default *ProtOr* is used.
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
sasa : ndarray, dtype=bool, shape=(n,)
|
|
94
|
+
Atom-wise SASA. `NaN` for atoms where SASA has not been
|
|
95
|
+
calculated
|
|
96
|
+
(solvent atoms, hydrogen atoms (ProtOr), atoms not in `filter`).
|
|
97
|
+
|
|
98
|
+
References
|
|
99
|
+
----------
|
|
100
|
+
|
|
101
|
+
.. footbibliography::
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
cdef int i=0, j=0, k=0, adj_atom_i=0, rel_atom_i=0
|
|
105
|
+
|
|
106
|
+
cdef np.ndarray sasa_filter
|
|
107
|
+
cdef np.ndarray occl_filter
|
|
108
|
+
if atom_filter is not None:
|
|
109
|
+
# Filter for all atoms to calculate SASA for
|
|
110
|
+
sasa_filter = np.array(atom_filter, dtype=bool)
|
|
111
|
+
else:
|
|
112
|
+
sasa_filter = np.ones(len(array), dtype=bool)
|
|
113
|
+
# Filter for all atoms that are considered for occlusion calculation
|
|
114
|
+
# sasa_filter is subfilter of occlusion_filter
|
|
115
|
+
occl_filter = np.ones(len(array), dtype=bool)
|
|
116
|
+
# Remove water residues, since it is the solvent
|
|
117
|
+
filter = ~filter_solvent(array)
|
|
118
|
+
sasa_filter = sasa_filter & filter
|
|
119
|
+
occl_filter = occl_filter & filter
|
|
120
|
+
if ignore_ions:
|
|
121
|
+
filter = ~filter_monoatomic_ions(array)
|
|
122
|
+
sasa_filter = sasa_filter & filter
|
|
123
|
+
occl_filter = occl_filter & filter
|
|
124
|
+
|
|
125
|
+
cdef np.ndarray sphere_points
|
|
126
|
+
if callable(point_distr):
|
|
127
|
+
sphere_points = point_distr(point_number)
|
|
128
|
+
elif point_distr == "Fibonacci":
|
|
129
|
+
sphere_points = _create_fibonacci_points(point_number)
|
|
130
|
+
else:
|
|
131
|
+
raise ValueError(f"'{point_distr}' is not a valid point distribution")
|
|
132
|
+
sphere_points = sphere_points.astype(np.float32)
|
|
133
|
+
|
|
134
|
+
cdef np.ndarray radii
|
|
135
|
+
if isinstance(vdw_radii, np.ndarray):
|
|
136
|
+
radii = vdw_radii.astype(np.float32)
|
|
137
|
+
if len(radii) != array.array_length():
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Amount VdW radii ({len(radii)}) and "
|
|
140
|
+
f"amount of atoms ({array.array_length()}) are not equal"
|
|
141
|
+
)
|
|
142
|
+
elif vdw_radii == "ProtOr":
|
|
143
|
+
filter = (array.element != "H")
|
|
144
|
+
sasa_filter = sasa_filter & filter
|
|
145
|
+
occl_filter = occl_filter & filter
|
|
146
|
+
radii = np.full(len(array), np.nan, dtype=np.float32)
|
|
147
|
+
for i in np.arange(len(radii))[occl_filter]:
|
|
148
|
+
rad = vdw_radius_protor(array.res_name[i], array.atom_name[i])
|
|
149
|
+
# 1.8 is default radius
|
|
150
|
+
radii[i] = rad if rad is not None else 1.8
|
|
151
|
+
elif vdw_radii == "Single":
|
|
152
|
+
radii = np.full(len(array), np.nan, dtype=np.float32)
|
|
153
|
+
for i in np.arange(len(radii))[occl_filter]:
|
|
154
|
+
rad = vdw_radius_single(array.element[i])
|
|
155
|
+
# 1.5 is default radius
|
|
156
|
+
radii[i] = rad if rad is not None else 1.8
|
|
157
|
+
else:
|
|
158
|
+
raise KeyError(f"'{vdw_radii}' is not a valid radii set")
|
|
159
|
+
# Increase atom radii by probe size ("rolling probe")
|
|
160
|
+
radii += probe_radius
|
|
161
|
+
|
|
162
|
+
# Memoryview for filter
|
|
163
|
+
# Problem with creating boolean memoryviews
|
|
164
|
+
# -> Type uint8 is used
|
|
165
|
+
cdef np_bool[:] sasa_filter_view = np.frombuffer(sasa_filter,
|
|
166
|
+
dtype=np.uint8)
|
|
167
|
+
|
|
168
|
+
cdef np.ndarray occl_r = radii[occl_filter]
|
|
169
|
+
# Atom array containing occluding atoms
|
|
170
|
+
occl_array = array[occl_filter]
|
|
171
|
+
|
|
172
|
+
# Memoryviews for coordinates of entire (main) array
|
|
173
|
+
# and for coordinates of occluding atom array
|
|
174
|
+
cdef float32[:,:] main_coord = array.coord.astype(np.float32,
|
|
175
|
+
copy=False)
|
|
176
|
+
cdef float32[:,:] occl_coord = occl_array.coord.astype(np.float32,
|
|
177
|
+
copy=False)
|
|
178
|
+
# Memoryviews for sphere points
|
|
179
|
+
cdef float32[:,:] sphere_coord = sphere_points
|
|
180
|
+
# Check if any of these arrays are empty to prevent segfault
|
|
181
|
+
if main_coord.shape[0] == 0 \
|
|
182
|
+
or occl_coord.shape[0] == 0 \
|
|
183
|
+
or sphere_coord.shape[0] == 0:
|
|
184
|
+
raise ValueError("Coordinates are empty")
|
|
185
|
+
# Memoryviews for radii of SASA and occluding atoms
|
|
186
|
+
# their squares and their sum of sqaures
|
|
187
|
+
cdef float32[:] atom_radii = radii
|
|
188
|
+
cdef float32[:] atom_radii_sq = radii * radii
|
|
189
|
+
cdef float32[:] occl_radii = occl_r
|
|
190
|
+
cdef float32[:] occl_radii_sq = occl_r * occl_r
|
|
191
|
+
# Memoryview for atomwise SASA
|
|
192
|
+
cdef float32[:] sasa = np.full(len(array), np.nan, dtype=np.float32)
|
|
193
|
+
|
|
194
|
+
# Area of a sphere point on a unit sphere
|
|
195
|
+
cdef float32 area_per_point = 4.0 * np.pi / point_number
|
|
196
|
+
|
|
197
|
+
# Define further statically typed variables
|
|
198
|
+
# that are needed for SASA calculation
|
|
199
|
+
cdef int n_accesible = 0
|
|
200
|
+
cdef float32 radius = 0
|
|
201
|
+
cdef float32 radius_sq = 0
|
|
202
|
+
cdef float32 adj_radius = 0
|
|
203
|
+
cdef float32 adj_radius_sq = 0
|
|
204
|
+
cdef float32 dist_sq = 0
|
|
205
|
+
cdef float32 point_x = 0
|
|
206
|
+
cdef float32 point_y = 0
|
|
207
|
+
cdef float32 point_z = 0
|
|
208
|
+
cdef float32 atom_x = 0
|
|
209
|
+
cdef float32 atom_y = 0
|
|
210
|
+
cdef float32 atom_z = 0
|
|
211
|
+
cdef float32 occl_x = 0
|
|
212
|
+
cdef float32 occl_y = 0
|
|
213
|
+
cdef float32 occl_z = 0
|
|
214
|
+
cdef float32[:,:] relevant_occl_coord = None
|
|
215
|
+
|
|
216
|
+
# Cell size is as large as the maximum distance,
|
|
217
|
+
# where two atom can intersect.
|
|
218
|
+
# Therefore intersecting atoms are always in the same or adjacent cell.
|
|
219
|
+
cell_list = CellList(occl_array, np.max(radii[occl_filter])*2)
|
|
220
|
+
cdef np.ndarray cell_indices
|
|
221
|
+
cdef int[:,:] cell_indices_view
|
|
222
|
+
cdef int length
|
|
223
|
+
cdef int max_adj_list_length = 0
|
|
224
|
+
cdef int array_length = array.array_length()
|
|
225
|
+
|
|
226
|
+
cell_indices = cell_list.get_atoms_in_cells(array.coord)
|
|
227
|
+
cell_indices_view = cell_indices
|
|
228
|
+
max_adj_list_length = cell_indices.shape[0]
|
|
229
|
+
|
|
230
|
+
# Later on, this array stores coordinates for actual
|
|
231
|
+
# occluding atoms for a certain atom to calculate the
|
|
232
|
+
# SASA for
|
|
233
|
+
# The first three indices of the second axis
|
|
234
|
+
# are x, y and z, the last one is the squared radius
|
|
235
|
+
# This list is as long as the maximal length of a list of
|
|
236
|
+
# adjacent atoms
|
|
237
|
+
relevant_occl_coord = np.zeros((max_adj_list_length, 4),
|
|
238
|
+
dtype=np.float32)
|
|
239
|
+
|
|
240
|
+
# Actual SASA calculation
|
|
241
|
+
for i in range(array_length):
|
|
242
|
+
# First level: The atoms to calculate SASA for
|
|
243
|
+
if not sasa_filter_view[i]:
|
|
244
|
+
# SASA is not calculated for this atom
|
|
245
|
+
continue
|
|
246
|
+
n_accesible = point_number
|
|
247
|
+
atom_x = main_coord[i,0]
|
|
248
|
+
atom_y = main_coord[i,1]
|
|
249
|
+
atom_z = main_coord[i,2]
|
|
250
|
+
radius = atom_radii[i]
|
|
251
|
+
radius_sq = atom_radii_sq[i]
|
|
252
|
+
# Find occluding atoms from list of adjacent atoms
|
|
253
|
+
rel_atom_i = 0
|
|
254
|
+
for j in range(max_adj_list_length):
|
|
255
|
+
# Remove all atoms, where the distance to the relevant atom
|
|
256
|
+
# is larger than the sum of the radii,
|
|
257
|
+
# since those atoms do not touch
|
|
258
|
+
# If distance is 0, it is the same atom,
|
|
259
|
+
# and the atom is removed from the list as well
|
|
260
|
+
adj_atom_i = cell_indices_view[i,j]
|
|
261
|
+
if adj_atom_i == -1:
|
|
262
|
+
# -1 means end of list
|
|
263
|
+
break
|
|
264
|
+
occl_x = occl_coord[adj_atom_i,0]
|
|
265
|
+
occl_y = occl_coord[adj_atom_i,1]
|
|
266
|
+
occl_z = occl_coord[adj_atom_i,2]
|
|
267
|
+
adj_radius = occl_radii[adj_atom_i]
|
|
268
|
+
adj_radius_sq = occl_radii_sq[adj_atom_i]
|
|
269
|
+
dist_sq = distance_sq(atom_x, atom_y, atom_z,
|
|
270
|
+
occl_x, occl_y, occl_z)
|
|
271
|
+
if dist_sq != 0 \
|
|
272
|
+
and dist_sq < (adj_radius+radius) * (adj_radius+radius):
|
|
273
|
+
relevant_occl_coord[rel_atom_i,0] = occl_x
|
|
274
|
+
relevant_occl_coord[rel_atom_i,1] = occl_y
|
|
275
|
+
relevant_occl_coord[rel_atom_i,2] = occl_z
|
|
276
|
+
relevant_occl_coord[rel_atom_i,3] = adj_radius_sq
|
|
277
|
+
rel_atom_i += 1
|
|
278
|
+
for j in range(sphere_coord.shape[0]):
|
|
279
|
+
# Second level: The sphere points for that atom
|
|
280
|
+
# Transform sphere point to sphere of current atom
|
|
281
|
+
point_x = sphere_coord[j,0] * radius + atom_x
|
|
282
|
+
point_y = sphere_coord[j,1] * radius + atom_y
|
|
283
|
+
point_z = sphere_coord[j,2] * radius + atom_z
|
|
284
|
+
for k in range(rel_atom_i):
|
|
285
|
+
# Third level: Compare point to occluding atoms
|
|
286
|
+
dist_sq = distance_sq(point_x, point_y, point_z,
|
|
287
|
+
relevant_occl_coord[k, 0],
|
|
288
|
+
relevant_occl_coord[k, 1],
|
|
289
|
+
relevant_occl_coord[k, 2])
|
|
290
|
+
# Compare squared distance
|
|
291
|
+
# to squared radius of occluding atom
|
|
292
|
+
# (Radius is relevant_occl_coord[3])
|
|
293
|
+
if dist_sq < relevant_occl_coord[k, 3]:
|
|
294
|
+
# Point is occluded
|
|
295
|
+
# -> Continue with next point
|
|
296
|
+
n_accesible -= 1
|
|
297
|
+
break
|
|
298
|
+
sasa[i] = area_per_point * n_accesible * radius_sq
|
|
299
|
+
return np.asarray(sasa)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
cdef inline float32 distance_sq(float32 x1, float32 y1, float32 z1,
|
|
303
|
+
float32 x2, float32 y2, float32 z2):
|
|
304
|
+
cdef float32 dx = x2 - x1
|
|
305
|
+
cdef float32 dy = y2 - y1
|
|
306
|
+
cdef float32 dz = z2 - z1
|
|
307
|
+
return dx*dx + dy*dy + dz*dz
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _create_fibonacci_points(n):
|
|
311
|
+
"""
|
|
312
|
+
Get an array of approximately equidistant points on a sphere surface
|
|
313
|
+
using a golden section spiral.
|
|
314
|
+
"""
|
|
315
|
+
phi = (3 - np.sqrt(5)) * np.pi * np.arange(n)
|
|
316
|
+
z = np.linspace(1 - 1.0/n, 1.0/n - 1, n)
|
|
317
|
+
radius = np.sqrt(1 - z*z)
|
|
318
|
+
coords = np.zeros((n, 3))
|
|
319
|
+
coords[:,0] = radius * np.cos(phi)
|
|
320
|
+
coords[:,1] = radius * np.sin(phi)
|
|
321
|
+
coords[:,2] = z
|
|
322
|
+
return coords
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = [
|
|
8
|
+
"apply_segment_wise",
|
|
9
|
+
"spread_segment_wise",
|
|
10
|
+
"get_segment_masks",
|
|
11
|
+
"get_segment_starts_for",
|
|
12
|
+
"get_segment_positions",
|
|
13
|
+
"segment_iter",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def apply_segment_wise(starts, data, function, axis=None):
|
|
20
|
+
"""
|
|
21
|
+
Generalized version of :func:`apply_residue_wise()` for
|
|
22
|
+
residues and chains.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
starts : ndarray, dtype=int
|
|
27
|
+
The sorted start indices of segments.
|
|
28
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
29
|
+
atom array.
|
|
30
|
+
"""
|
|
31
|
+
# The result array
|
|
32
|
+
processed_data = None
|
|
33
|
+
for i in range(len(starts) - 1):
|
|
34
|
+
segment = data[starts[i] : starts[i + 1]]
|
|
35
|
+
if axis is None:
|
|
36
|
+
value = function(segment)
|
|
37
|
+
else:
|
|
38
|
+
value = function(segment, axis=axis)
|
|
39
|
+
# Identify the shape of the resulting array by evaluation
|
|
40
|
+
# of the function return value for the first segment
|
|
41
|
+
if processed_data is None:
|
|
42
|
+
if isinstance(value, np.ndarray):
|
|
43
|
+
# Maximum length of the processed data
|
|
44
|
+
# is length of segment of size 1 -> length of all IDs
|
|
45
|
+
# (equal to atom array length)
|
|
46
|
+
processed_data = np.zeros(
|
|
47
|
+
(len(starts) - 1,) + value.shape, dtype=value.dtype
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
# Scalar value -> one dimensional result array
|
|
51
|
+
processed_data = np.zeros(len(starts) - 1, dtype=type(value))
|
|
52
|
+
# Write values into result arrays
|
|
53
|
+
processed_data[i] = value
|
|
54
|
+
return processed_data
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def spread_segment_wise(starts, input_data):
|
|
58
|
+
"""
|
|
59
|
+
Generalized version of :func:`spread_residue_wise()`
|
|
60
|
+
for residues and chains.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
starts : ndarray, dtype=int
|
|
65
|
+
The sorted start indices of segments.
|
|
66
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
67
|
+
atom array.
|
|
68
|
+
"""
|
|
69
|
+
output_data = np.zeros(starts[-1], dtype=input_data.dtype)
|
|
70
|
+
for i in range(len(starts) - 1):
|
|
71
|
+
start = starts[i]
|
|
72
|
+
stop = starts[i + 1]
|
|
73
|
+
output_data[start:stop] = input_data[i]
|
|
74
|
+
return output_data
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_segment_masks(starts, indices):
|
|
78
|
+
"""
|
|
79
|
+
Generalized version of :func:`get_residue_masks()`
|
|
80
|
+
for residues and chains.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
starts : ndarray, dtype=int
|
|
85
|
+
The sorted start indices of segments.
|
|
86
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
87
|
+
atom array.
|
|
88
|
+
"""
|
|
89
|
+
indices = np.asarray(indices)
|
|
90
|
+
length = starts[-1]
|
|
91
|
+
masks = np.zeros((len(indices), length), dtype=bool)
|
|
92
|
+
|
|
93
|
+
if (indices < 0).any():
|
|
94
|
+
raise ValueError("This function does not support negative indices")
|
|
95
|
+
if (indices >= length).any():
|
|
96
|
+
index = np.min(np.where(indices >= length)[0])
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
insertion_points = np.searchsorted(starts, indices, side="right") - 1
|
|
102
|
+
for i, point in enumerate(insertion_points):
|
|
103
|
+
masks[i, starts[point] : starts[point + 1]] = True
|
|
104
|
+
|
|
105
|
+
return masks
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_segment_starts_for(starts, indices):
|
|
109
|
+
"""
|
|
110
|
+
Generalized version of :func:`get_residue_starts_for()`
|
|
111
|
+
for residues and chains.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
starts : ndarray, dtype=int
|
|
116
|
+
The sorted start indices of segments.
|
|
117
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
118
|
+
atom array.
|
|
119
|
+
"""
|
|
120
|
+
indices = np.asarray(indices)
|
|
121
|
+
length = starts[-1]
|
|
122
|
+
# Remove exclusive stop
|
|
123
|
+
starts = starts[:-1]
|
|
124
|
+
|
|
125
|
+
if (indices < 0).any():
|
|
126
|
+
raise ValueError("This function does not support negative indices")
|
|
127
|
+
if (indices >= length).any():
|
|
128
|
+
index = np.min(np.where(indices >= length)[0])
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
insertion_points = np.searchsorted(starts, indices, side="right") - 1
|
|
134
|
+
return starts[insertion_points]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_segment_positions(starts, indices):
|
|
138
|
+
"""
|
|
139
|
+
Generalized version of :func:`get_residue_positions()`
|
|
140
|
+
for residues and chains.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
starts : ndarray, dtype=int
|
|
145
|
+
The sorted start indices of segments.
|
|
146
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
147
|
+
atom array.
|
|
148
|
+
"""
|
|
149
|
+
indices = np.asarray(indices)
|
|
150
|
+
length = starts[-1]
|
|
151
|
+
# Remove exclusive stop
|
|
152
|
+
starts = starts[:-1]
|
|
153
|
+
|
|
154
|
+
if (indices < 0).any():
|
|
155
|
+
raise ValueError("This function does not support negative indices")
|
|
156
|
+
if (indices >= length).any():
|
|
157
|
+
index = np.min(np.where(indices >= length)[0])
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return np.searchsorted(starts, indices, side="right") - 1
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def segment_iter(array, starts):
|
|
166
|
+
"""
|
|
167
|
+
Generalized version of :func:`residue_iter()`
|
|
168
|
+
for residues and chains.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
starts : ndarray, dtype=int
|
|
173
|
+
The sorted start indices of segments.
|
|
174
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
175
|
+
atom array.
|
|
176
|
+
"""
|
|
177
|
+
for i in range(len(starts) - 1):
|
|
178
|
+
yield array[..., starts[i] : starts[i + 1]]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Function for converting a structure into a sequence.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["to_sequence"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
15
|
+
from biotite.structure.chains import get_chain_starts
|
|
16
|
+
from biotite.structure.error import BadStructureError
|
|
17
|
+
from biotite.structure.info.groups import amino_acid_names, nucleotide_names
|
|
18
|
+
from biotite.structure.info.misc import one_letter_code
|
|
19
|
+
from biotite.structure.residues import get_residues
|
|
20
|
+
|
|
21
|
+
HETERO_PLACEHOLDER = "."
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def to_sequence(atoms, allow_hetero=False):
|
|
25
|
+
"""
|
|
26
|
+
Convert each chain in a structure into a sequence.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
atoms : AtomArray or AtomArrayStack
|
|
31
|
+
The structure.
|
|
32
|
+
May contain multiple chains.
|
|
33
|
+
Each chain must be either a peptide or a nucleic acid.
|
|
34
|
+
allow_hetero : bool, optional
|
|
35
|
+
If true, residues inside a amino acid or nucleotide chain,
|
|
36
|
+
that have no one-letter code, are replaced by the respective
|
|
37
|
+
'*any*' symbol (`"X"` or `"N"`, respectively).
|
|
38
|
+
The same is true for amino acids in nucleotide chains and vice
|
|
39
|
+
versa.
|
|
40
|
+
By default, an exception is raised.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
sequences : list of Sequence, length=n
|
|
45
|
+
The sequence for each chain in the structure.
|
|
46
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
47
|
+
The atom index where each chain starts.
|
|
48
|
+
|
|
49
|
+
Notes
|
|
50
|
+
-----
|
|
51
|
+
Residues are considered amino acids or nucleotides based on their
|
|
52
|
+
appearance :func:`info.amino_acid_names()` or
|
|
53
|
+
:func:`info.nucleotide_names()`, respectively.
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
|
|
58
|
+
>>> sequences, chain_starts = to_sequence(atom_array)
|
|
59
|
+
>>> print(sequences)
|
|
60
|
+
[ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
sequences = []
|
|
64
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
65
|
+
for i in range(len(chain_start_indices) - 1):
|
|
66
|
+
start = chain_start_indices[i]
|
|
67
|
+
stop = chain_start_indices[i + 1]
|
|
68
|
+
chain = atoms[start:stop]
|
|
69
|
+
_, residues = get_residues(chain)
|
|
70
|
+
one_letter_symbols = np.array(
|
|
71
|
+
[one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
|
|
72
|
+
)
|
|
73
|
+
hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
|
|
74
|
+
|
|
75
|
+
aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
|
|
76
|
+
nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
|
|
77
|
+
if aa_count == 0 and nuc_count == 0:
|
|
78
|
+
raise BadStructureError(
|
|
79
|
+
f"Chain {chain.chain_id[0]} contains neither amino acids "
|
|
80
|
+
"nor nucleotides"
|
|
81
|
+
)
|
|
82
|
+
elif aa_count > nuc_count:
|
|
83
|
+
# Chain is a peptide
|
|
84
|
+
hetero_mask |= ~np.isin(residues, amino_acid_names())
|
|
85
|
+
if not allow_hetero and np.any(hetero_mask):
|
|
86
|
+
hetero_indices = np.where(hetero_mask)[0]
|
|
87
|
+
raise BadStructureError(
|
|
88
|
+
f"Hetero residue(s) "
|
|
89
|
+
f"{', '.join(residues[hetero_indices])} in peptide"
|
|
90
|
+
)
|
|
91
|
+
one_letter_symbols[hetero_mask] = "X"
|
|
92
|
+
# Replace selenocysteine and pyrrolysine
|
|
93
|
+
one_letter_symbols[one_letter_symbols == "U"] = "C"
|
|
94
|
+
one_letter_symbols[one_letter_symbols == "O"] = "K"
|
|
95
|
+
sequences.append(ProteinSequence("".join(one_letter_symbols)))
|
|
96
|
+
else:
|
|
97
|
+
# Chain is a nucleic acid
|
|
98
|
+
hetero_mask |= ~np.isin(residues, nucleotide_names())
|
|
99
|
+
if not allow_hetero and np.any(hetero_mask):
|
|
100
|
+
hetero_indices = np.where(hetero_mask)[0]
|
|
101
|
+
raise BadStructureError(
|
|
102
|
+
f"Hetero residue(s) "
|
|
103
|
+
f"{', '.join(residues[hetero_indices])} in nucleic acid"
|
|
104
|
+
)
|
|
105
|
+
one_letter_symbols[hetero_mask] = "N"
|
|
106
|
+
# Replace uracil
|
|
107
|
+
one_letter_symbols[one_letter_symbols == "U"] = "T"
|
|
108
|
+
sequences.append(NucleotideSequence("".join(one_letter_symbols)))
|
|
109
|
+
|
|
110
|
+
# Remove exclusive stop
|
|
111
|
+
return sequences, chain_start_indices[:-1]
|