biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides utility for handling data on residue level, rather than
|
|
7
|
+
atom level.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = [
|
|
13
|
+
"get_residue_starts",
|
|
14
|
+
"apply_residue_wise",
|
|
15
|
+
"spread_residue_wise",
|
|
16
|
+
"get_residue_masks",
|
|
17
|
+
"get_residue_starts_for",
|
|
18
|
+
"get_residue_positions",
|
|
19
|
+
"get_all_residue_positions",
|
|
20
|
+
"get_residues",
|
|
21
|
+
"get_residue_count",
|
|
22
|
+
"residue_iter",
|
|
23
|
+
"get_atom_name_indices",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
from biotite.structure.segments import (
|
|
28
|
+
apply_segment_wise,
|
|
29
|
+
get_all_segment_positions,
|
|
30
|
+
get_segment_masks,
|
|
31
|
+
get_segment_positions,
|
|
32
|
+
get_segment_starts,
|
|
33
|
+
get_segment_starts_for,
|
|
34
|
+
segment_iter,
|
|
35
|
+
spread_segment_wise,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
|
|
40
|
+
"""
|
|
41
|
+
Get indices for an atom array, each indicating the beginning of
|
|
42
|
+
a residue.
|
|
43
|
+
|
|
44
|
+
A new residue starts, either when the chain ID, sym ID, residue ID,
|
|
45
|
+
insertion code or residue name changes from one to the next atom.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
array : AtomArray or AtomArrayStack
|
|
50
|
+
The atom array (stack) to get the residue starts from.
|
|
51
|
+
add_exclusive_stop : bool, optional
|
|
52
|
+
If true, the exclusive stop of the input atom array, i.e.
|
|
53
|
+
``array.array_length()``, is added to the returned array of
|
|
54
|
+
start indices as last element.
|
|
55
|
+
extra_categories : tuple of str, optional
|
|
56
|
+
Additional annotation categories that induce the start of a new residue,
|
|
57
|
+
when their value change from one atom to the next.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
starts : ndarray, dtype=int
|
|
62
|
+
The start indices of residues in `array`.
|
|
63
|
+
|
|
64
|
+
Notes
|
|
65
|
+
-----
|
|
66
|
+
This method is internally used by all other residue-related
|
|
67
|
+
functions.
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
|
|
72
|
+
>>> print(get_residue_starts(atom_array))
|
|
73
|
+
[ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
|
|
74
|
+
278 292]
|
|
75
|
+
>>> print(get_residue_starts(atom_array, add_exclusive_stop=True))
|
|
76
|
+
[ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
|
|
77
|
+
278 292 304]
|
|
78
|
+
"""
|
|
79
|
+
categories = ["chain_id", "res_id", "ins_code"] + list(extra_categories)
|
|
80
|
+
if "sym_id" in array.get_annotation_categories():
|
|
81
|
+
categories.append("sym_id")
|
|
82
|
+
return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def apply_residue_wise(array, data, function, axis=None):
|
|
86
|
+
"""
|
|
87
|
+
Apply a function to intervals of data, where each interval
|
|
88
|
+
corresponds to one residue.
|
|
89
|
+
|
|
90
|
+
The function takes an atom array (stack) and an data array
|
|
91
|
+
(`ndarray`) of the same length. The function iterates through the
|
|
92
|
+
residue IDs of the atom array (stack) and identifies intervals of
|
|
93
|
+
the same ID. Then the data is
|
|
94
|
+
partitioned into the same intervals, and each interval (also an
|
|
95
|
+
:class:`ndarray`) is put as parameter into `function`. Each return value is
|
|
96
|
+
stored as element in the resulting :class:`ndarray`, therefore each element
|
|
97
|
+
corresponds to one residue.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
array : AtomArray or AtomArrayStack
|
|
102
|
+
The atom array (stack) to determine the residues from.
|
|
103
|
+
data : ndarray
|
|
104
|
+
The data, whose intervals are the parameter for `function`. Must
|
|
105
|
+
have same length as `array`.
|
|
106
|
+
function : function
|
|
107
|
+
The `function` must have either the form *f(data)* or
|
|
108
|
+
*f(data, axis)* in case `axis` is given. Every `function` call
|
|
109
|
+
must return a value with the same shape and data type.
|
|
110
|
+
axis : int, optional
|
|
111
|
+
This value is given to the `axis` parameter of `function`.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
processed_data : ndarray
|
|
116
|
+
Residue-wise evaluation of `data` by `function`. The size of the first dimension
|
|
117
|
+
of this array is equal to the amount of residues.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
Calculate residue-wise SASA from atom-wise SASA of a 20 residue
|
|
122
|
+
peptide.
|
|
123
|
+
|
|
124
|
+
>>> sasa_per_atom = sasa(atom_array)
|
|
125
|
+
>>> print(len(sasa_per_atom))
|
|
126
|
+
304
|
|
127
|
+
>>> sasa_per_residue = apply_residue_wise(atom_array, sasa_per_atom, np.nansum)
|
|
128
|
+
>>> print(len(sasa_per_residue))
|
|
129
|
+
20
|
|
130
|
+
>>> print(sasa_per_residue)
|
|
131
|
+
[157.979 117.136 94.983 115.485 113.583 23.471 93.013 144.173 61.561
|
|
132
|
+
38.885 0.792 114.053 108.568 27.888 83.583 113.016 114.318 74.281
|
|
133
|
+
47.811 172.035]
|
|
134
|
+
|
|
135
|
+
Calculate the centroids of each residue for the same peptide.
|
|
136
|
+
|
|
137
|
+
>>> print(len(atom_array))
|
|
138
|
+
304
|
|
139
|
+
>>> centroids = apply_residue_wise(atom_array, atom_array.coord,
|
|
140
|
+
... np.average, axis=0)
|
|
141
|
+
>>> print(len(centroids))
|
|
142
|
+
20
|
|
143
|
+
>>> print(centroids)
|
|
144
|
+
[[-9.582 3.378 -2.073]
|
|
145
|
+
[-4.670 5.816 -1.860]
|
|
146
|
+
[-2.461 3.060 3.076]
|
|
147
|
+
[-7.211 -0.396 1.013]
|
|
148
|
+
[-4.698 -1.080 -4.284]
|
|
149
|
+
[ 1.172 0.206 1.038]
|
|
150
|
+
[-2.160 -2.245 3.541]
|
|
151
|
+
[-3.682 -5.540 -2.895]
|
|
152
|
+
[ 0.711 -5.409 -2.549]
|
|
153
|
+
[ 2.002 -6.322 1.695]
|
|
154
|
+
[ 2.799 -3.140 2.327]
|
|
155
|
+
[ 5.901 -2.489 4.845]
|
|
156
|
+
[ 6.754 -6.712 3.094]
|
|
157
|
+
[ 5.699 -5.101 -1.209]
|
|
158
|
+
[ 9.295 -2.970 -1.835]
|
|
159
|
+
[ 5.518 -1.521 -3.473]
|
|
160
|
+
[ 7.219 3.673 -0.684]
|
|
161
|
+
[ 4.007 4.364 2.674]
|
|
162
|
+
[ 0.341 5.575 -0.254]
|
|
163
|
+
[ 1.194 10.416 1.130]]
|
|
164
|
+
"""
|
|
165
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
166
|
+
return apply_segment_wise(starts, data, function, axis)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def spread_residue_wise(array, input_data):
|
|
170
|
+
"""
|
|
171
|
+
Expand residue-wise data to atom-wise data.
|
|
172
|
+
|
|
173
|
+
Each value in the residue-wise input is assigned to all atoms of
|
|
174
|
+
this residue:
|
|
175
|
+
|
|
176
|
+
``output_data[i] = input_data[j]``,
|
|
177
|
+
*i* is incremented from atom to atom,
|
|
178
|
+
*j* is incremented every residue change.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
array : AtomArray or AtomArrayStack
|
|
183
|
+
The atom array (stack) to determine the residues from.
|
|
184
|
+
input_data : ndarray
|
|
185
|
+
The data to be spread.
|
|
186
|
+
The length of the 0-th axis must be equal to the amount of different residue IDs
|
|
187
|
+
in `array`.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
output_data : ndarray
|
|
192
|
+
Residue-wise spread `input_data`.
|
|
193
|
+
Length is the same as `array_length()` of `array`.
|
|
194
|
+
|
|
195
|
+
Examples
|
|
196
|
+
--------
|
|
197
|
+
Spread secondary structure annotation to every atom of a 20 residue
|
|
198
|
+
peptide (with 304 atoms).
|
|
199
|
+
|
|
200
|
+
>>> sse = annotate_sse(atom_array)
|
|
201
|
+
>>> print(len(sse))
|
|
202
|
+
20
|
|
203
|
+
>>> print(sse)
|
|
204
|
+
['c' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
205
|
+
'c' 'c']
|
|
206
|
+
>>> atom_wise_sse = spread_residue_wise(atom_array, sse)
|
|
207
|
+
>>> print(len(atom_wise_sse))
|
|
208
|
+
304
|
|
209
|
+
>>> print(atom_wise_sse)
|
|
210
|
+
['c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'a' 'a'
|
|
211
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
212
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
213
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
214
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
215
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
216
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
217
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
218
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a'
|
|
219
|
+
'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
220
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
221
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
222
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
223
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
224
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
225
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
226
|
+
'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c']
|
|
227
|
+
"""
|
|
228
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
229
|
+
return spread_segment_wise(starts, input_data)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def get_residue_masks(array, indices):
|
|
233
|
+
"""
|
|
234
|
+
Get boolean masks indicating the residues to which the given atom
|
|
235
|
+
indices belong.
|
|
236
|
+
|
|
237
|
+
Parameters
|
|
238
|
+
----------
|
|
239
|
+
array : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
240
|
+
The atom array (stack) to determine the residues from.
|
|
241
|
+
indices : ndarray, dtype=int, shape=(k,)
|
|
242
|
+
These indices indicate the atoms to get the corresponding
|
|
243
|
+
residues for.
|
|
244
|
+
Negative indices are not allowed.
|
|
245
|
+
|
|
246
|
+
Returns
|
|
247
|
+
-------
|
|
248
|
+
residues_masks : ndarray, dtype=bool, shape=(k,n)
|
|
249
|
+
Multiple boolean masks, one for each given index in `indices`.
|
|
250
|
+
Each array masks the atoms that belong to the same residue as
|
|
251
|
+
the atom at the given index.
|
|
252
|
+
|
|
253
|
+
Examples
|
|
254
|
+
--------
|
|
255
|
+
|
|
256
|
+
>>> indices = [5, 42]
|
|
257
|
+
>>> residue_masks = get_residue_masks(atom_array, indices)
|
|
258
|
+
>>> print(atom_array[indices[0]])
|
|
259
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
260
|
+
>>> print(atom_array[residue_masks[0]])
|
|
261
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
262
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
263
|
+
A 1 ASN C C -7.117 2.964 -1.897
|
|
264
|
+
A 1 ASN O O -6.634 1.849 -1.758
|
|
265
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
266
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
267
|
+
A 1 ASN OD1 O -11.269 2.700 -1.524
|
|
268
|
+
A 1 ASN ND2 N -11.806 3.406 -3.543
|
|
269
|
+
A 1 ASN H1 H -8.330 3.957 0.261
|
|
270
|
+
A 1 ASN H2 H -8.740 5.068 -0.889
|
|
271
|
+
A 1 ASN H3 H -9.877 4.041 -0.293
|
|
272
|
+
A 1 ASN HA H -8.930 2.162 -1.239
|
|
273
|
+
A 1 ASN HB2 H -9.310 4.417 -3.193
|
|
274
|
+
A 1 ASN HB3 H -9.108 2.719 -3.679
|
|
275
|
+
A 1 ASN HD21 H -11.572 3.791 -4.444
|
|
276
|
+
A 1 ASN HD22 H -12.757 3.183 -3.294
|
|
277
|
+
>>> print(atom_array[indices[1]])
|
|
278
|
+
A 3 TYR CD2 C -1.820 4.326 3.332
|
|
279
|
+
>>> print(atom_array[residue_masks[1]])
|
|
280
|
+
A 3 TYR N N -4.354 3.455 -0.111
|
|
281
|
+
A 3 TYR CA C -3.690 2.738 0.981
|
|
282
|
+
A 3 TYR C C -4.102 1.256 1.074
|
|
283
|
+
A 3 TYR O O -3.291 0.409 1.442
|
|
284
|
+
A 3 TYR CB C -3.964 3.472 2.302
|
|
285
|
+
A 3 TYR CG C -2.824 3.339 3.290
|
|
286
|
+
A 3 TYR CD1 C -2.746 2.217 4.138
|
|
287
|
+
A 3 TYR CD2 C -1.820 4.326 3.332
|
|
288
|
+
A 3 TYR CE1 C -1.657 2.076 5.018
|
|
289
|
+
A 3 TYR CE2 C -0.725 4.185 4.205
|
|
290
|
+
A 3 TYR CZ C -0.639 3.053 5.043
|
|
291
|
+
A 3 TYR OH O 0.433 2.881 5.861
|
|
292
|
+
A 3 TYR H H -4.934 4.245 0.120
|
|
293
|
+
A 3 TYR HA H -2.615 2.768 0.796
|
|
294
|
+
A 3 TYR HB2 H -4.117 4.513 2.091
|
|
295
|
+
A 3 TYR HB3 H -4.886 3.096 2.750
|
|
296
|
+
A 3 TYR HD1 H -3.513 1.456 4.101
|
|
297
|
+
A 3 TYR HD2 H -1.877 5.200 2.695
|
|
298
|
+
A 3 TYR HE1 H -1.576 1.221 5.669
|
|
299
|
+
A 3 TYR HE2 H 0.033 4.952 4.233
|
|
300
|
+
A 3 TYR HH H 1.187 3.395 5.567
|
|
301
|
+
"""
|
|
302
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
303
|
+
return get_segment_masks(starts, indices)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def get_residue_starts_for(array, indices):
|
|
307
|
+
"""
|
|
308
|
+
For each given atom index, get the index that points to the
|
|
309
|
+
start of the residue that atom belongs to.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
array : AtomArray or AtomArrayStack
|
|
314
|
+
The atom array (stack) to determine the residues from.
|
|
315
|
+
indices : ndarray, dtype=int, shape=(k,)
|
|
316
|
+
These indices point to the atoms to get the corresponding
|
|
317
|
+
residue starts for.
|
|
318
|
+
Negative indices are not allowed.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
start_indices : ndarray, dtype=int, shape=(k,)
|
|
323
|
+
The indices that point to the residue starts for the input
|
|
324
|
+
`indices`.
|
|
325
|
+
|
|
326
|
+
Examples
|
|
327
|
+
--------
|
|
328
|
+
|
|
329
|
+
>>> indices = [5, 42]
|
|
330
|
+
>>> residue_starts = get_residue_starts_for(atom_array, indices)
|
|
331
|
+
>>> print(residue_starts)
|
|
332
|
+
[ 0 35]
|
|
333
|
+
>>> print(atom_array[indices[0]])
|
|
334
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
335
|
+
>>> print(atom_array[residue_starts[0]])
|
|
336
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
337
|
+
>>> print(atom_array[indices[1]])
|
|
338
|
+
A 3 TYR CD2 C -1.820 4.326 3.332
|
|
339
|
+
>>> print(atom_array[residue_starts[1]])
|
|
340
|
+
A 3 TYR N N -4.354 3.455 -0.111
|
|
341
|
+
"""
|
|
342
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
343
|
+
return get_segment_starts_for(starts, indices)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def get_residue_positions(array, indices):
|
|
347
|
+
"""
|
|
348
|
+
For each given atom index, obtain the position of the residue
|
|
349
|
+
corresponding to this index in the input `array`.
|
|
350
|
+
|
|
351
|
+
For example, the position of the first residue in the atom array is
|
|
352
|
+
``0``, the the position of the second residue is ``1``, etc.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
array : AtomArray or AtomArrayStack
|
|
357
|
+
The atom array (stack) to determine the residues from.
|
|
358
|
+
indices : ndarray, dtype=int, shape=(k,)
|
|
359
|
+
These indices point to the atoms to get the corresponding
|
|
360
|
+
residue positions for.
|
|
361
|
+
Negative indices are not allowed.
|
|
362
|
+
|
|
363
|
+
Returns
|
|
364
|
+
-------
|
|
365
|
+
residue_indices : ndarray, dtype=int, shape=(k,)
|
|
366
|
+
The indices that point to the position of the residues.
|
|
367
|
+
|
|
368
|
+
See Also
|
|
369
|
+
--------
|
|
370
|
+
get_all_residue_positions :
|
|
371
|
+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
|
|
372
|
+
|
|
373
|
+
Examples
|
|
374
|
+
--------
|
|
375
|
+
>>> atom_index = [5, 42]
|
|
376
|
+
>>> print(atom_array.res_name[atom_index])
|
|
377
|
+
['ASN' 'TYR']
|
|
378
|
+
>>> _, residues = get_residues(atom_array)
|
|
379
|
+
>>> print(residues)
|
|
380
|
+
['ASN' 'LEU' 'TYR' 'ILE' 'GLN' 'TRP' 'LEU' 'LYS' 'ASP' 'GLY' 'GLY' 'PRO'
|
|
381
|
+
'SER' 'SER' 'GLY' 'ARG' 'PRO' 'PRO' 'PRO' 'SER']
|
|
382
|
+
>>> residue_index = get_residue_positions(atom_array, atom_index)
|
|
383
|
+
>>> print(residue_index)
|
|
384
|
+
[0 2]
|
|
385
|
+
>>> print(residues[residue_index])
|
|
386
|
+
['ASN' 'TYR']
|
|
387
|
+
"""
|
|
388
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
389
|
+
return get_segment_positions(starts, indices)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def get_all_residue_positions(array):
|
|
393
|
+
"""
|
|
394
|
+
For each atom, obtain the position of the residue
|
|
395
|
+
corresponding to this atom in the input `array`.
|
|
396
|
+
|
|
397
|
+
For example, the position of the first residue in the atom array is
|
|
398
|
+
``0``, the the position of the second residue is ``1``, etc.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
array : AtomArray or AtomArrayStack
|
|
403
|
+
The atom array (stack) to determine the residues from.
|
|
404
|
+
|
|
405
|
+
Returns
|
|
406
|
+
-------
|
|
407
|
+
residue_indices : ndarray, dtype=int, shape=(k,)
|
|
408
|
+
The indices that point to the position of the residues.
|
|
409
|
+
|
|
410
|
+
See Also
|
|
411
|
+
--------
|
|
412
|
+
get_residue_positions :
|
|
413
|
+
Similar to this function, but for a given subset of atom indices.
|
|
414
|
+
|
|
415
|
+
Examples
|
|
416
|
+
--------
|
|
417
|
+
>>> print(get_all_residue_positions(atom_array))
|
|
418
|
+
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
|
|
419
|
+
1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2
|
|
420
|
+
2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
|
|
421
|
+
3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
|
|
422
|
+
5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
|
|
423
|
+
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7
|
|
424
|
+
7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8
|
|
425
|
+
8 9 9 9 9 9 9 9 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11
|
|
426
|
+
11 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
|
|
427
|
+
13 13 13 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15
|
|
428
|
+
15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16 16 16 16 16 16 16
|
|
429
|
+
17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18
|
|
430
|
+
18 18 18 18 19 19 19 19 19 19 19 19 19 19 19 19]
|
|
431
|
+
"""
|
|
432
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
433
|
+
return get_all_segment_positions(starts, array.array_length())
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def get_residues(array):
|
|
437
|
+
"""
|
|
438
|
+
Get the residue IDs and names of an atom array (stack).
|
|
439
|
+
|
|
440
|
+
The residues are listed in the same order they occur in the array
|
|
441
|
+
(stack).
|
|
442
|
+
|
|
443
|
+
Parameters
|
|
444
|
+
----------
|
|
445
|
+
array : AtomArray or AtomArrayStack
|
|
446
|
+
The atom array (stack) to determine the residues from.
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
ids : ndarray, dtype=int
|
|
451
|
+
List of residue IDs.
|
|
452
|
+
names : ndarray, dtype="U5"
|
|
453
|
+
List of residue names.
|
|
454
|
+
|
|
455
|
+
Examples
|
|
456
|
+
--------
|
|
457
|
+
Get the residue names of a 20 residue peptide.
|
|
458
|
+
|
|
459
|
+
>>> print(atom_array.res_name)
|
|
460
|
+
['ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN' 'ASN'
|
|
461
|
+
'ASN' 'ASN' 'ASN' 'ASN' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU'
|
|
462
|
+
'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'TYR'
|
|
463
|
+
'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR'
|
|
464
|
+
'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'TYR' 'ILE' 'ILE' 'ILE' 'ILE'
|
|
465
|
+
'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE' 'ILE'
|
|
466
|
+
'ILE' 'ILE' 'ILE' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN'
|
|
467
|
+
'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'GLN' 'TRP' 'TRP' 'TRP' 'TRP'
|
|
468
|
+
'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP'
|
|
469
|
+
'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'TRP' 'LEU' 'LEU' 'LEU' 'LEU'
|
|
470
|
+
'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU' 'LEU'
|
|
471
|
+
'LEU' 'LEU' 'LEU' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS'
|
|
472
|
+
'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS' 'LYS'
|
|
473
|
+
'LYS' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP' 'ASP'
|
|
474
|
+
'ASP' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY'
|
|
475
|
+
'GLY' 'GLY' 'GLY' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO'
|
|
476
|
+
'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER'
|
|
477
|
+
'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER'
|
|
478
|
+
'SER' 'SER' 'SER' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'GLY' 'ARG' 'ARG'
|
|
479
|
+
'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG'
|
|
480
|
+
'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'ARG' 'PRO' 'PRO'
|
|
481
|
+
'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO'
|
|
482
|
+
'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO'
|
|
483
|
+
'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO' 'PRO'
|
|
484
|
+
'PRO' 'PRO' 'PRO' 'PRO' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER' 'SER'
|
|
485
|
+
'SER' 'SER' 'SER' 'SER']
|
|
486
|
+
>>> ids, names = get_residues(atom_array)
|
|
487
|
+
>>> print(names)
|
|
488
|
+
['ASN' 'LEU' 'TYR' 'ILE' 'GLN' 'TRP' 'LEU' 'LYS' 'ASP' 'GLY' 'GLY' 'PRO'
|
|
489
|
+
'SER' 'SER' 'GLY' 'ARG' 'PRO' 'PRO' 'PRO' 'SER']
|
|
490
|
+
"""
|
|
491
|
+
starts = get_residue_starts(array)
|
|
492
|
+
return array.res_id[starts], array.res_name[starts]
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def get_residue_count(array):
|
|
496
|
+
"""
|
|
497
|
+
Get the amount of residues in an atom array (stack).
|
|
498
|
+
|
|
499
|
+
The count is determined from the `res_id` and `chain_id` annotation.
|
|
500
|
+
Each time the residue ID or chain ID changes,
|
|
501
|
+
the count is incremented. Special rules apply to hetero residues.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
array : AtomArray or AtomArrayStack
|
|
506
|
+
The atom array (stack) to determine the residues from.
|
|
507
|
+
|
|
508
|
+
Returns
|
|
509
|
+
-------
|
|
510
|
+
count : int
|
|
511
|
+
Amount of residues.
|
|
512
|
+
"""
|
|
513
|
+
return len(get_residue_starts(array))
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def residue_iter(array):
|
|
517
|
+
"""
|
|
518
|
+
Iterate over all residues in an atom array (stack).
|
|
519
|
+
|
|
520
|
+
Parameters
|
|
521
|
+
----------
|
|
522
|
+
array : AtomArray or AtomArrayStack
|
|
523
|
+
The atom array (stack) to iterate over.
|
|
524
|
+
|
|
525
|
+
Yields
|
|
526
|
+
------
|
|
527
|
+
residue : AtomArray or AtomArrayStack
|
|
528
|
+
A single residue of the input `array`.
|
|
529
|
+
|
|
530
|
+
Examples
|
|
531
|
+
--------
|
|
532
|
+
|
|
533
|
+
>>> for res in residue_iter(atom_array[:50]):
|
|
534
|
+
... print("New residue")
|
|
535
|
+
... print(res)
|
|
536
|
+
... print()
|
|
537
|
+
New residue
|
|
538
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
539
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
540
|
+
A 1 ASN C C -7.117 2.964 -1.897
|
|
541
|
+
A 1 ASN O O -6.634 1.849 -1.758
|
|
542
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
543
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
544
|
+
A 1 ASN OD1 O -11.269 2.700 -1.524
|
|
545
|
+
A 1 ASN ND2 N -11.806 3.406 -3.543
|
|
546
|
+
A 1 ASN H1 H -8.330 3.957 0.261
|
|
547
|
+
A 1 ASN H2 H -8.740 5.068 -0.889
|
|
548
|
+
A 1 ASN H3 H -9.877 4.041 -0.293
|
|
549
|
+
A 1 ASN HA H -8.930 2.162 -1.239
|
|
550
|
+
A 1 ASN HB2 H -9.310 4.417 -3.193
|
|
551
|
+
A 1 ASN HB3 H -9.108 2.719 -3.679
|
|
552
|
+
A 1 ASN HD21 H -11.572 3.791 -4.444
|
|
553
|
+
A 1 ASN HD22 H -12.757 3.183 -3.294
|
|
554
|
+
<BLANKLINE>
|
|
555
|
+
New residue
|
|
556
|
+
A 2 LEU N N -6.379 4.031 -2.228
|
|
557
|
+
A 2 LEU CA C -4.923 4.002 -2.452
|
|
558
|
+
A 2 LEU C C -4.136 3.187 -1.404
|
|
559
|
+
A 2 LEU O O -3.391 2.274 -1.760
|
|
560
|
+
A 2 LEU CB C -4.411 5.450 -2.619
|
|
561
|
+
A 2 LEU CG C -4.795 6.450 -1.495
|
|
562
|
+
A 2 LEU CD1 C -3.612 6.803 -0.599
|
|
563
|
+
A 2 LEU CD2 C -5.351 7.748 -2.084
|
|
564
|
+
A 2 LEU H H -6.821 4.923 -2.394
|
|
565
|
+
A 2 LEU HA H -4.750 3.494 -3.403
|
|
566
|
+
A 2 LEU HB2 H -3.340 5.414 -2.672
|
|
567
|
+
A 2 LEU HB3 H -4.813 5.817 -3.564
|
|
568
|
+
A 2 LEU HG H -5.568 6.022 -0.858
|
|
569
|
+
A 2 LEU HD11 H -3.207 5.905 -0.146
|
|
570
|
+
A 2 LEU HD12 H -2.841 7.304 -1.183
|
|
571
|
+
A 2 LEU HD13 H -3.929 7.477 0.197
|
|
572
|
+
A 2 LEU HD21 H -4.607 8.209 -2.736
|
|
573
|
+
A 2 LEU HD22 H -6.255 7.544 -2.657
|
|
574
|
+
A 2 LEU HD23 H -5.592 8.445 -1.281
|
|
575
|
+
<BLANKLINE>
|
|
576
|
+
New residue
|
|
577
|
+
A 3 TYR N N -4.354 3.455 -0.111
|
|
578
|
+
A 3 TYR CA C -3.690 2.738 0.981
|
|
579
|
+
A 3 TYR C C -4.102 1.256 1.074
|
|
580
|
+
A 3 TYR O O -3.291 0.409 1.442
|
|
581
|
+
A 3 TYR CB C -3.964 3.472 2.302
|
|
582
|
+
A 3 TYR CG C -2.824 3.339 3.290
|
|
583
|
+
A 3 TYR CD1 C -2.746 2.217 4.138
|
|
584
|
+
A 3 TYR CD2 C -1.820 4.326 3.332
|
|
585
|
+
A 3 TYR CE1 C -1.657 2.076 5.018
|
|
586
|
+
A 3 TYR CE2 C -0.725 4.185 4.205
|
|
587
|
+
A 3 TYR CZ C -0.639 3.053 5.043
|
|
588
|
+
A 3 TYR OH O 0.433 2.881 5.861
|
|
589
|
+
A 3 TYR H H -4.934 4.245 0.120
|
|
590
|
+
A 3 TYR HA H -2.615 2.768 0.796
|
|
591
|
+
A 3 TYR HB2 H -4.117 4.513 2.091
|
|
592
|
+
<BLANKLINE>
|
|
593
|
+
"""
|
|
594
|
+
# The exclusive stop is appended to the residue starts
|
|
595
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
596
|
+
for residue in segment_iter(array, starts):
|
|
597
|
+
yield residue
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def get_atom_name_indices(atoms, atom_names):
|
|
601
|
+
"""
|
|
602
|
+
For each residue, get the index of the atom with the given atom name.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
atoms : AtomArray or AtomArrayStack
|
|
607
|
+
Search for the indices of the given atom names in this structure.
|
|
608
|
+
atom_names : list of str, length=p
|
|
609
|
+
The names of the atoms to get the indices of.
|
|
610
|
+
|
|
611
|
+
Returns
|
|
612
|
+
-------
|
|
613
|
+
indices : ndarray, dtype=int, shape=(k, p)
|
|
614
|
+
For every residue and atom name, the return value contains the atom index in
|
|
615
|
+
the :class:`AtomArray` where the sought atom name is located.
|
|
616
|
+
Where the atom name is not present in a residue, the array is filled with `-1`.
|
|
617
|
+
|
|
618
|
+
Examples
|
|
619
|
+
--------
|
|
620
|
+
|
|
621
|
+
>>> indices = get_atom_name_indices(atom_array, ["CA", "CB"])
|
|
622
|
+
>>> print(indices)
|
|
623
|
+
[[ 1 4]
|
|
624
|
+
[ 17 20]
|
|
625
|
+
[ 36 39]
|
|
626
|
+
[ 57 60]
|
|
627
|
+
[ 76 79]
|
|
628
|
+
[ 93 96]
|
|
629
|
+
[117 120]
|
|
630
|
+
[136 139]
|
|
631
|
+
[158 161]
|
|
632
|
+
[170 -1]
|
|
633
|
+
[177 -1]
|
|
634
|
+
[184 187]
|
|
635
|
+
[198 201]
|
|
636
|
+
[209 212]
|
|
637
|
+
[220 -1]
|
|
638
|
+
[227 230]
|
|
639
|
+
[251 254]
|
|
640
|
+
[265 268]
|
|
641
|
+
[279 282]
|
|
642
|
+
[293 296]]
|
|
643
|
+
>>> for row in indices:
|
|
644
|
+
... for index in row:
|
|
645
|
+
... if index != -1:
|
|
646
|
+
... print(atom_array[index])
|
|
647
|
+
... print()
|
|
648
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
649
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
650
|
+
<BLANKLINE>
|
|
651
|
+
A 2 LEU CA C -4.923 4.002 -2.452
|
|
652
|
+
A 2 LEU CB C -4.411 5.450 -2.619
|
|
653
|
+
<BLANKLINE>
|
|
654
|
+
A 3 TYR CA C -3.690 2.738 0.981
|
|
655
|
+
A 3 TYR CB C -3.964 3.472 2.302
|
|
656
|
+
<BLANKLINE>
|
|
657
|
+
A 4 ILE CA C -5.857 -0.449 0.613
|
|
658
|
+
A 4 ILE CB C -7.386 -0.466 0.343
|
|
659
|
+
<BLANKLINE>
|
|
660
|
+
A 5 GLN CA C -4.122 -1.167 -2.743
|
|
661
|
+
A 5 GLN CB C -4.292 -0.313 -4.013
|
|
662
|
+
<BLANKLINE>
|
|
663
|
+
A 6 TRP CA C -0.716 -0.631 -0.993
|
|
664
|
+
A 6 TRP CB C -0.221 0.703 -0.417
|
|
665
|
+
<BLANKLINE>
|
|
666
|
+
A 7 LEU CA C -1.641 -2.932 1.963
|
|
667
|
+
A 7 LEU CB C -2.710 -2.645 3.033
|
|
668
|
+
<BLANKLINE>
|
|
669
|
+
A 8 LYS CA C -3.024 -5.791 -0.269
|
|
670
|
+
A 8 LYS CB C -4.224 -5.697 -1.232
|
|
671
|
+
<BLANKLINE>
|
|
672
|
+
A 9 ASP CA C 0.466 -6.016 -1.905
|
|
673
|
+
A 9 ASP CB C 1.033 -4.839 -2.724
|
|
674
|
+
<BLANKLINE>
|
|
675
|
+
A 10 GLY CA C 2.060 -6.618 1.593
|
|
676
|
+
<BLANKLINE>
|
|
677
|
+
A 11 GLY CA C 2.626 -2.967 2.723
|
|
678
|
+
<BLANKLINE>
|
|
679
|
+
A 12 PRO CA C 6.333 -2.533 3.806
|
|
680
|
+
A 12 PRO CB C 6.740 -2.387 5.279
|
|
681
|
+
<BLANKLINE>
|
|
682
|
+
A 13 SER CA C 7.049 -6.179 2.704
|
|
683
|
+
A 13 SER CB C 6.458 -7.371 3.472
|
|
684
|
+
<BLANKLINE>
|
|
685
|
+
A 14 SER CA C 6.389 -5.315 -1.015
|
|
686
|
+
A 14 SER CB C 4.914 -4.993 -1.265
|
|
687
|
+
<BLANKLINE>
|
|
688
|
+
A 15 GLY CA C 9.451 -3.116 -1.870
|
|
689
|
+
<BLANKLINE>
|
|
690
|
+
A 16 ARG CA C 7.289 0.084 -2.054
|
|
691
|
+
A 16 ARG CB C 6.110 -0.243 -2.994
|
|
692
|
+
<BLANKLINE>
|
|
693
|
+
A 17 PRO CA C 6.782 3.088 0.345
|
|
694
|
+
A 17 PRO CB C 7.554 4.394 0.119
|
|
695
|
+
<BLANKLINE>
|
|
696
|
+
A 18 PRO CA C 3.287 4.031 1.686
|
|
697
|
+
A 18 PRO CB C 3.035 4.190 3.187
|
|
698
|
+
<BLANKLINE>
|
|
699
|
+
A 19 PRO CA C 1.185 6.543 -0.353
|
|
700
|
+
A 19 PRO CB C 0.048 6.014 -1.229
|
|
701
|
+
<BLANKLINE>
|
|
702
|
+
A 20 SER CA C 0.852 10.027 1.285
|
|
703
|
+
A 20 SER CB C 1.972 11.071 1.284
|
|
704
|
+
<BLANKLINE>
|
|
705
|
+
"""
|
|
706
|
+
residue_indices = get_all_residue_positions(atoms)
|
|
707
|
+
indices = np.full(
|
|
708
|
+
(residue_indices[-1] + 1, len(atom_names)), fill_value=-1, dtype=int
|
|
709
|
+
)
|
|
710
|
+
for i, atom_name in enumerate(atom_names):
|
|
711
|
+
if atom_name is None:
|
|
712
|
+
atom_name_indices = np.where(atoms.hetero)[0]
|
|
713
|
+
else:
|
|
714
|
+
atom_name_indices = np.where(atoms.atom_name == atom_name)[0]
|
|
715
|
+
indices[residue_indices[atom_name_indices], i] = atom_name_indices
|
|
716
|
+
return indices
|