biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides utility functions for creating filters on atom
|
|
7
|
+
arrays and atom array stacks.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann, Tom David Müller"
|
|
12
|
+
__all__ = [
|
|
13
|
+
"filter_solvent",
|
|
14
|
+
"filter_monoatomic_ions",
|
|
15
|
+
"filter_nucleotides",
|
|
16
|
+
"filter_canonical_nucleotides",
|
|
17
|
+
"filter_amino_acids",
|
|
18
|
+
"filter_canonical_amino_acids",
|
|
19
|
+
"filter_carbohydrates",
|
|
20
|
+
"filter_intersection",
|
|
21
|
+
"filter_first_altloc",
|
|
22
|
+
"filter_highest_occupancy_altloc",
|
|
23
|
+
"filter_peptide_backbone",
|
|
24
|
+
"filter_phosphate_backbone",
|
|
25
|
+
"filter_linear_bond_continuity",
|
|
26
|
+
"filter_polymer",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
from functools import partial
|
|
31
|
+
import numpy as np
|
|
32
|
+
from biotite.structure.atoms import array as atom_array
|
|
33
|
+
from biotite.structure.info.groups import (
|
|
34
|
+
amino_acid_names,
|
|
35
|
+
carbohydrate_names,
|
|
36
|
+
nucleotide_names,
|
|
37
|
+
)
|
|
38
|
+
from biotite.structure.residues import get_residue_count, get_residue_starts
|
|
39
|
+
|
|
40
|
+
_canonical_aa_list = [
|
|
41
|
+
"ALA",
|
|
42
|
+
"ARG",
|
|
43
|
+
"ASN",
|
|
44
|
+
"ASP",
|
|
45
|
+
"CYS",
|
|
46
|
+
"GLN",
|
|
47
|
+
"GLU",
|
|
48
|
+
"GLY",
|
|
49
|
+
"HIS",
|
|
50
|
+
"ILE",
|
|
51
|
+
"LEU",
|
|
52
|
+
"LYS",
|
|
53
|
+
"MET",
|
|
54
|
+
"PHE",
|
|
55
|
+
"PRO",
|
|
56
|
+
"PYL",
|
|
57
|
+
"SER",
|
|
58
|
+
"THR",
|
|
59
|
+
"TRP",
|
|
60
|
+
"TYR",
|
|
61
|
+
"VAL",
|
|
62
|
+
"SEC",
|
|
63
|
+
]
|
|
64
|
+
_canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
|
|
65
|
+
|
|
66
|
+
# Residue names of solvent molecules non only in CCD, but also from modeling software
|
|
67
|
+
_solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
|
|
68
|
+
|
|
69
|
+
_peptide_backbone_atoms = ["N", "CA", "C"]
|
|
70
|
+
_phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def filter_monoatomic_ions(array):
|
|
74
|
+
"""
|
|
75
|
+
Filter all atoms of an atom array, that are monoatomic ions
|
|
76
|
+
(e.g. sodium or chloride ions).
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
array : AtomArray or AtomArrayStack
|
|
81
|
+
The array to be filtered.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
filter : ndarray, dtype=bool
|
|
86
|
+
This array is `True` for all indices in `array`, where the atom
|
|
87
|
+
is a monoatomic ion.
|
|
88
|
+
"""
|
|
89
|
+
# Exclusively in monoatomic ions,
|
|
90
|
+
# the element name is equal to the residue name
|
|
91
|
+
return array.res_name == array.element
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def filter_solvent(array):
|
|
95
|
+
"""
|
|
96
|
+
Filter all atoms of one array that are part of the solvent.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
array : AtomArray or AtomArrayStack
|
|
101
|
+
The array to be filtered.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
filter : ndarray, dtype=bool
|
|
106
|
+
This array is `True` for all indices in `array`, where the atom
|
|
107
|
+
belongs to the solvent.
|
|
108
|
+
"""
|
|
109
|
+
return np.isin(array.res_name, _solvent_list)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def filter_canonical_nucleotides(array):
|
|
113
|
+
"""
|
|
114
|
+
Filter all atoms of one array that belong to canonical nucleotides.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
array : AtomArray or AtomArrayStack
|
|
119
|
+
The array to be filtered.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
filter : ndarray, dtype=bool
|
|
124
|
+
This array is `True` for all indices in `array`, where the atom
|
|
125
|
+
belongs to a canonical nucleotide.
|
|
126
|
+
"""
|
|
127
|
+
return np.isin(array.res_name, _canonical_nucleotide_list)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def filter_nucleotides(array):
|
|
131
|
+
"""
|
|
132
|
+
Filter all atoms of one array that belong to nucleotides.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
array : AtomArray or AtomArrayStack
|
|
137
|
+
The array to be filtered.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
filter : ndarray, dtype=bool
|
|
142
|
+
This array is `True` for all indices in `array`, where the atom
|
|
143
|
+
belongs to a nucleotide.
|
|
144
|
+
|
|
145
|
+
Notes
|
|
146
|
+
-----
|
|
147
|
+
Nucleotides are identified according to the PDB chemical component
|
|
148
|
+
dictionary. A residue is considered a nucleotide if it its
|
|
149
|
+
``_chem_comp.type`` property has one of the following values (case
|
|
150
|
+
insensitive):
|
|
151
|
+
|
|
152
|
+
``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
|
|
153
|
+
``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
|
|
154
|
+
``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
|
|
155
|
+
``RNA OH 5 PRIME TERMINUS``
|
|
156
|
+
"""
|
|
157
|
+
return np.isin(array.res_name, nucleotide_names())
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def filter_canonical_amino_acids(array):
|
|
161
|
+
"""
|
|
162
|
+
Filter all atoms of one array that belong to canonical amino acid
|
|
163
|
+
residues.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
array : AtomArray or AtomArrayStack
|
|
168
|
+
The array to be filtered.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
filter : ndarray, dtype=bool
|
|
173
|
+
This array is `True` for all indices in `array`, where the atom
|
|
174
|
+
belongs to a canonical amino acid residue.
|
|
175
|
+
"""
|
|
176
|
+
return np.isin(array.res_name, _canonical_aa_list)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def filter_amino_acids(array):
|
|
180
|
+
"""
|
|
181
|
+
Filter all atoms of one array that belong to amino acid residues.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
array : AtomArray or AtomArrayStack
|
|
186
|
+
The array to be filtered.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
filter : ndarray, dtype=bool
|
|
191
|
+
This array is `True` for all indices in `array`, where the atom
|
|
192
|
+
belongs to an amino acid residue.
|
|
193
|
+
|
|
194
|
+
Notes
|
|
195
|
+
-----
|
|
196
|
+
Amino acids are identified according to the PDB chemical component
|
|
197
|
+
dictionary. A residue is considered an amino acid if it its
|
|
198
|
+
``_chem_comp.type`` property has one of the following values (case
|
|
199
|
+
insensitive):
|
|
200
|
+
|
|
201
|
+
``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
|
|
202
|
+
``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
|
|
203
|
+
``D-PEPTIDE NH3 AMINO TERMINUS``,
|
|
204
|
+
``L-BETA-PEPTIDE, C-GAMMA LINKING``,
|
|
205
|
+
``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
|
|
206
|
+
``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
|
|
207
|
+
``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
|
|
208
|
+
"""
|
|
209
|
+
return np.isin(array.res_name, amino_acid_names())
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def filter_carbohydrates(array):
|
|
213
|
+
"""
|
|
214
|
+
Filter all atoms of one array that belong to carbohydrates.
|
|
215
|
+
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
array : AtomArray or AtomArrayStack
|
|
219
|
+
The array to be filtered.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
filter : ndarray, dtype=bool
|
|
224
|
+
This array is `True` for all indices in `array`, where the atom
|
|
225
|
+
belongs to a carbohydrate.
|
|
226
|
+
|
|
227
|
+
Notes
|
|
228
|
+
-----
|
|
229
|
+
Carbohydrates are identified according to the PDB chemical component
|
|
230
|
+
dictionary. A residue is considered a carbohydrate if it its
|
|
231
|
+
``_chem_comp.type`` property has one of the following values (case
|
|
232
|
+
insensitive):
|
|
233
|
+
|
|
234
|
+
``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
|
|
235
|
+
``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
|
|
236
|
+
``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
|
|
237
|
+
``SACCHARIDE``
|
|
238
|
+
"""
|
|
239
|
+
return np.isin(array.res_name, carbohydrate_names())
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _filter_atom_names(array, atom_names):
|
|
243
|
+
return np.isin(array.atom_name, atom_names)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def filter_peptide_backbone(array):
|
|
247
|
+
"""
|
|
248
|
+
Filter all peptide backbone atoms of one array.
|
|
249
|
+
|
|
250
|
+
This includes the "N", "CA" and "C" atoms of amino acids.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
array : AtomArray or AtomArrayStack
|
|
255
|
+
The array to be filtered.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
filter : ndarray, dtype=bool
|
|
260
|
+
This array is `True` for all indices in `array`, where an atom
|
|
261
|
+
is a part of the peptide backbone.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
return _filter_atom_names(array, _peptide_backbone_atoms) & filter_amino_acids(
|
|
265
|
+
array
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def filter_phosphate_backbone(array):
|
|
270
|
+
"""
|
|
271
|
+
Filter all phosphate backbone atoms of one array.
|
|
272
|
+
|
|
273
|
+
This includes the P, O5', C5', C4', C3', and O3' atoms.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
array : AtomArray or AtomArrayStack
|
|
278
|
+
The array to be filtered.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
filter : ndarray, dtype=bool
|
|
283
|
+
This array is ``True`` for all indices in `array`, where an atom
|
|
284
|
+
is a part of the phosphate backbone.
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
return _filter_atom_names(array, _phosphate_backbone_atoms) & filter_nucleotides(
|
|
288
|
+
array
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
|
|
293
|
+
"""
|
|
294
|
+
Filter for atoms such that their bond length with the next atom
|
|
295
|
+
lies within the provided boundaries.
|
|
296
|
+
|
|
297
|
+
The result will depend on the atoms' order.
|
|
298
|
+
For instance, consider a molecule:
|
|
299
|
+
|
|
300
|
+
.. code-block:: none
|
|
301
|
+
|
|
302
|
+
C3
|
|
303
|
+
|
|
|
304
|
+
C1-C2-C4
|
|
305
|
+
|
|
306
|
+
If the order corresponds to ``[C1, C2, C4, C3]``, the output will be
|
|
307
|
+
``[True, True, False, True]``.
|
|
308
|
+
Note that the trailing atom will always evaluate to ``True``.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
array : AtomArray
|
|
313
|
+
The array to filter.
|
|
314
|
+
min_len : float
|
|
315
|
+
Minmum bond length.
|
|
316
|
+
max_len : float
|
|
317
|
+
Maximum bond length.
|
|
318
|
+
|
|
319
|
+
Returns
|
|
320
|
+
-------
|
|
321
|
+
filter : ndarray, dtype=bool
|
|
322
|
+
This array is `True` for all indices in `array`, where an atom
|
|
323
|
+
has a bond length with the next atom within [`min_len`, `max_len`]
|
|
324
|
+
boundaries.
|
|
325
|
+
|
|
326
|
+
Notes
|
|
327
|
+
-----
|
|
328
|
+
Note that this function purely uses distances between consecutive atoms.
|
|
329
|
+
A potential ``BondList`` is not considered here.
|
|
330
|
+
"""
|
|
331
|
+
dist = np.linalg.norm(np.diff(array.coord, axis=0), axis=1)
|
|
332
|
+
mask = (dist >= min_len) & (dist <= max_len)
|
|
333
|
+
return np.append(mask, True)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _is_polymer(array, min_size, pol_type):
|
|
337
|
+
if pol_type.startswith("p"):
|
|
338
|
+
filt_fn = filter_amino_acids
|
|
339
|
+
elif pol_type.startswith("n"):
|
|
340
|
+
filt_fn = filter_nucleotides
|
|
341
|
+
elif pol_type.startswith("c"):
|
|
342
|
+
filt_fn = filter_carbohydrates
|
|
343
|
+
else:
|
|
344
|
+
raise ValueError(f"Unsupported polymer type {pol_type}")
|
|
345
|
+
|
|
346
|
+
mask = filt_fn(array)
|
|
347
|
+
return get_residue_count(array[mask]) >= min_size
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def filter_polymer(array, min_size=2, pol_type="peptide"):
|
|
351
|
+
"""
|
|
352
|
+
Filter for atoms that are a part of a consecutive standard macromolecular
|
|
353
|
+
polymer entity.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
array : AtomArray or AtomArrayStack
|
|
358
|
+
The array to filter.
|
|
359
|
+
min_size : int
|
|
360
|
+
The minimum number of monomers.
|
|
361
|
+
pol_type : str
|
|
362
|
+
The polymer type, either ``"peptide"``, ``"nucleotide"``, or ``"carbohydrate"``.
|
|
363
|
+
Abbreviations are supported: ``"p"``, ``"pep"``, ``"n"``, etc.
|
|
364
|
+
|
|
365
|
+
Returns
|
|
366
|
+
-------
|
|
367
|
+
filter : ndarray, dtype=bool
|
|
368
|
+
This array is `True` for all indices in `array`, where atoms belong to
|
|
369
|
+
consecutive polymer entity having at least `min_size` monomers.
|
|
370
|
+
"""
|
|
371
|
+
# Import `check_res_id_continuity` here to avoid circular imports
|
|
372
|
+
from biotite.structure.integrity import check_res_id_continuity
|
|
373
|
+
|
|
374
|
+
split_idx = check_res_id_continuity(array)
|
|
375
|
+
|
|
376
|
+
check_pol = partial(_is_polymer, min_size=min_size, pol_type=pol_type)
|
|
377
|
+
bool_idx = map(
|
|
378
|
+
lambda a: np.full(len(a), check_pol(atom_array(a)), dtype=bool),
|
|
379
|
+
np.split(array, split_idx),
|
|
380
|
+
)
|
|
381
|
+
return np.concatenate(list(bool_idx))
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def filter_intersection(array, intersect):
|
|
385
|
+
"""
|
|
386
|
+
Filter all atoms of one array that exist also in another array.
|
|
387
|
+
|
|
388
|
+
An atom is defined as existent in the second array, if there is an
|
|
389
|
+
atom in the second array that has the same annotation values in all
|
|
390
|
+
categories that exists in both arrays.
|
|
391
|
+
|
|
392
|
+
Parameters
|
|
393
|
+
----------
|
|
394
|
+
array : AtomArray or AtomArrayStack
|
|
395
|
+
The array to be filtered.
|
|
396
|
+
intersect : AtomArray
|
|
397
|
+
Atoms in `array` that also exists in `intersect` are filtered.
|
|
398
|
+
|
|
399
|
+
Returns
|
|
400
|
+
-------
|
|
401
|
+
filter : ndarray, dtype=bool
|
|
402
|
+
This array is `True` for all indices in `array`, where the atom
|
|
403
|
+
exists also in `intersect`.
|
|
404
|
+
|
|
405
|
+
Examples
|
|
406
|
+
--------
|
|
407
|
+
|
|
408
|
+
Creating an atom array from atoms:
|
|
409
|
+
|
|
410
|
+
>>> array1 = AtomArray(length=5)
|
|
411
|
+
>>> array1.chain_id = np.array(["A","B","C","D","E"])
|
|
412
|
+
>>> array2 = AtomArray(length=3)
|
|
413
|
+
>>> array2.chain_id = np.array(["D","B","C"])
|
|
414
|
+
>>> array1 = array1[filter_intersection(array1, array2)]
|
|
415
|
+
>>> print(array1.chain_id)
|
|
416
|
+
['B' 'C' 'D']
|
|
417
|
+
"""
|
|
418
|
+
filter = np.full(array.array_length(), True, dtype=bool)
|
|
419
|
+
intersect_categories = intersect.get_annotation_categories()
|
|
420
|
+
# Check atom equality only for categories,
|
|
421
|
+
# which exist in both arrays
|
|
422
|
+
categories = [
|
|
423
|
+
category
|
|
424
|
+
for category in array.get_annotation_categories()
|
|
425
|
+
if category in intersect_categories
|
|
426
|
+
]
|
|
427
|
+
for i in range(array.array_length()):
|
|
428
|
+
subfilter = np.full(intersect.array_length(), True, dtype=bool)
|
|
429
|
+
for category in categories:
|
|
430
|
+
subfilter &= (
|
|
431
|
+
intersect.get_annotation(category) == array.get_annotation(category)[i]
|
|
432
|
+
)
|
|
433
|
+
filter[i] = subfilter.any()
|
|
434
|
+
return filter
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def filter_first_altloc(atoms, altloc_ids):
|
|
438
|
+
"""
|
|
439
|
+
Filter all atoms, that have the first *altloc* ID appearing in a
|
|
440
|
+
residue.
|
|
441
|
+
|
|
442
|
+
Structure files (PDB, PDBx) allow for duplicate atom records,
|
|
443
|
+
in case a residue is found in multiple alternate locations
|
|
444
|
+
(*altloc*).
|
|
445
|
+
This function is used to remove such duplicate atoms by choosing a
|
|
446
|
+
single *altloc ID* for an atom with other *altlocs* being removed.
|
|
447
|
+
|
|
448
|
+
Parameters
|
|
449
|
+
----------
|
|
450
|
+
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
451
|
+
The unfiltered structure to be filtered.
|
|
452
|
+
altloc_ids : ndarray, shape=(n,), dtype='U1'
|
|
453
|
+
An array containing the alternate location IDs for each
|
|
454
|
+
atom in `atoms`.
|
|
455
|
+
Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
|
|
456
|
+
position.
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
filter : ndarray, dtype=bool
|
|
461
|
+
For each residue, this array is True in the following cases:
|
|
462
|
+
|
|
463
|
+
- The atom has no altloc ID (`'.'`, `'?'`, `' '`, `''`).
|
|
464
|
+
- The atom has the same altloc ID (e.g. `'A'`, `'B'`, etc.)
|
|
465
|
+
as the first atom in the residue that has an altloc ID.
|
|
466
|
+
|
|
467
|
+
Notes
|
|
468
|
+
-----
|
|
469
|
+
The function will be rarely used by the end user, since this kind
|
|
470
|
+
of filtering is usually automatically performed, when the structure
|
|
471
|
+
is loaded from a file.
|
|
472
|
+
The exception are structures that were read with `altloc` set to
|
|
473
|
+
`True`.
|
|
474
|
+
|
|
475
|
+
Examples
|
|
476
|
+
--------
|
|
477
|
+
|
|
478
|
+
>>> atoms = array([
|
|
479
|
+
... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
|
|
480
|
+
... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
|
|
481
|
+
... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
|
|
482
|
+
... ])
|
|
483
|
+
>>> altloc_ids = np.array([".", "A", "B"])
|
|
484
|
+
>>> filtered = atoms[filter_first_altloc(atoms, altloc_ids)]
|
|
485
|
+
>>> print(filtered)
|
|
486
|
+
1 CA 1.000 2.000 3.000
|
|
487
|
+
1 CB 4.000 5.000 6.000
|
|
488
|
+
"""
|
|
489
|
+
# Filter all atoms without altloc code
|
|
490
|
+
altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
|
|
491
|
+
|
|
492
|
+
# And filter all atoms for each residue with the first altloc ID
|
|
493
|
+
residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
|
|
494
|
+
for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
|
|
495
|
+
letter_altloc_ids = [loc for loc in altloc_ids[start:stop] if loc.isalpha()]
|
|
496
|
+
if len(letter_altloc_ids) > 0:
|
|
497
|
+
first_id = letter_altloc_ids[0]
|
|
498
|
+
altloc_filter[start:stop] |= altloc_ids[start:stop] == first_id
|
|
499
|
+
else:
|
|
500
|
+
# No altloc ID in this residue -> Nothing to do
|
|
501
|
+
pass
|
|
502
|
+
|
|
503
|
+
return altloc_filter
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
|
|
507
|
+
"""
|
|
508
|
+
For each residue, filter all atoms, that have the *altloc* ID
|
|
509
|
+
with the highest occupancy for this residue.
|
|
510
|
+
|
|
511
|
+
Structure files (PDB, PDBx) allow for duplicate atom records,
|
|
512
|
+
in case a residue is found in multiple alternate locations
|
|
513
|
+
(*altloc*).
|
|
514
|
+
This function is used to remove such duplicate atoms by choosing a
|
|
515
|
+
single *altloc ID* for an atom with other *altlocs* being removed.
|
|
516
|
+
|
|
517
|
+
Parameters
|
|
518
|
+
----------
|
|
519
|
+
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
520
|
+
The unfiltered structure to be filtered.
|
|
521
|
+
altloc_ids : ndarray, shape=(n,), dtype='U1'
|
|
522
|
+
An array containing the alternate location IDs for each
|
|
523
|
+
atom in `atoms`.
|
|
524
|
+
Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
|
|
525
|
+
position.
|
|
526
|
+
occupancies : ndarray, shape=(n,), dtype=float
|
|
527
|
+
An array containing the occupancy values for each atom in
|
|
528
|
+
`atoms`.
|
|
529
|
+
|
|
530
|
+
Returns
|
|
531
|
+
-------
|
|
532
|
+
filter : ndarray, dtype=bool
|
|
533
|
+
For each residue, this array is True in the following cases:
|
|
534
|
+
|
|
535
|
+
- The atom has no altloc ID
|
|
536
|
+
(``'.'``, ``'?'``, ``' '``, ``''``).
|
|
537
|
+
- The atom has the altloc ID (e.g. ``'A'``, ``'B'``, etc.),
|
|
538
|
+
of which the corresponding occupancy values are highest
|
|
539
|
+
for the **entire** residue.
|
|
540
|
+
|
|
541
|
+
Notes
|
|
542
|
+
-----
|
|
543
|
+
The function will be rarely used by the end user, since this kind
|
|
544
|
+
of filtering is usually automatically performed, when the structure
|
|
545
|
+
is loaded from a file.
|
|
546
|
+
The exception are structures that were read with ``altloc`` set to
|
|
547
|
+
``True``.
|
|
548
|
+
|
|
549
|
+
Examples
|
|
550
|
+
--------
|
|
551
|
+
|
|
552
|
+
>>> atoms = array([
|
|
553
|
+
... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
|
|
554
|
+
... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
|
|
555
|
+
... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
|
|
556
|
+
... ])
|
|
557
|
+
>>> altloc_ids = np.array([".", "A", "B"])
|
|
558
|
+
>>> occupancies = np.array([1.0, 0.1, 0.9])
|
|
559
|
+
>>> filtered = atoms[filter_highest_occupancy_altloc(
|
|
560
|
+
... atoms, altloc_ids, occupancies
|
|
561
|
+
... )]
|
|
562
|
+
>>> print(filtered)
|
|
563
|
+
1 CA 1.000 2.000 3.000
|
|
564
|
+
1 CB 6.000 5.000 4.000
|
|
565
|
+
"""
|
|
566
|
+
# Filter all atoms without altloc code
|
|
567
|
+
altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
|
|
568
|
+
|
|
569
|
+
# And filter all atoms for each residue with the highest sum of
|
|
570
|
+
# occupancies
|
|
571
|
+
residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
|
|
572
|
+
for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
|
|
573
|
+
occupancies_in_res = occupancies[start:stop]
|
|
574
|
+
altloc_ids_in_res = altloc_ids[start:stop]
|
|
575
|
+
|
|
576
|
+
letter_altloc_ids = [loc for loc in altloc_ids_in_res if loc.isalpha()]
|
|
577
|
+
|
|
578
|
+
if len(letter_altloc_ids) > 0:
|
|
579
|
+
highest = -1.0
|
|
580
|
+
highest_id = None
|
|
581
|
+
for id in sorted(set(letter_altloc_ids)):
|
|
582
|
+
occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
|
|
583
|
+
if occupancy_sum > highest:
|
|
584
|
+
highest = occupancy_sum
|
|
585
|
+
highest_id = id
|
|
586
|
+
altloc_filter[start:stop] |= altloc_ids[start:stop] == highest_id
|
|
587
|
+
else:
|
|
588
|
+
# No altloc ID in this residue -> Nothing to do
|
|
589
|
+
pass
|
|
590
|
+
|
|
591
|
+
return altloc_filter
|