biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Conversion of structures into the *Protein Blocks* structural alphabet.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
15
|
+
from biotite.sequence.sequence import Sequence
|
|
16
|
+
from biotite.structure.chains import get_chain_starts
|
|
17
|
+
from biotite.structure.geometry import dihedral_backbone
|
|
18
|
+
|
|
19
|
+
# PB reference angles, adapted from PBxplore
|
|
20
|
+
PB_ANGLES = np.array(
|
|
21
|
+
[
|
|
22
|
+
[41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
|
|
23
|
+
[108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
|
|
24
|
+
[-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
|
|
25
|
+
[141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
|
|
26
|
+
[133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
|
|
27
|
+
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
|
|
28
|
+
[0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
|
|
29
|
+
[119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
|
|
30
|
+
[130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
|
|
31
|
+
[114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
|
|
32
|
+
[117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
|
|
33
|
+
[139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
|
|
34
|
+
[-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
|
|
35
|
+
[-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
|
|
36
|
+
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
|
|
37
|
+
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
|
|
38
|
+
]
|
|
39
|
+
) # fmt: skip
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ProteinBlocksSequence(Sequence):
|
|
43
|
+
"""
|
|
44
|
+
Representation of a structure in the *Protein Blocks* structural alphabet.
|
|
45
|
+
:footcite:`Brevern2000`
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
sequence : iterable object, optional
|
|
50
|
+
The *Protein Blocks* sequence.
|
|
51
|
+
This may either be a list or a string.
|
|
52
|
+
May take upper or lower case letters.
|
|
53
|
+
By default the sequence is empty.
|
|
54
|
+
|
|
55
|
+
See Also
|
|
56
|
+
--------
|
|
57
|
+
to_protein_blocks : Create *Protein Blocks* sequences from a structure.
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
|
|
62
|
+
.. footbibliography::
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
alphabet = LetterAlphabet("abcdefghijklmnopz")
|
|
66
|
+
undefined_symbol = "z"
|
|
67
|
+
|
|
68
|
+
def __init__(self, sequence=""):
|
|
69
|
+
if isinstance(sequence, str):
|
|
70
|
+
sequence = sequence.lower()
|
|
71
|
+
else:
|
|
72
|
+
sequence = [symbol.upper() for symbol in sequence]
|
|
73
|
+
super().__init__(sequence)
|
|
74
|
+
|
|
75
|
+
def get_alphabet(self):
|
|
76
|
+
return ProteinBlocksSequence.alphabet
|
|
77
|
+
|
|
78
|
+
def remove_undefined(self):
|
|
79
|
+
"""
|
|
80
|
+
Remove undefined symbols from the sequence.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
filtered_sequence : ProteinBlocksSequence
|
|
85
|
+
The sequence without undefined symbols.
|
|
86
|
+
"""
|
|
87
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
88
|
+
ProteinBlocksSequence.undefined_symbol
|
|
89
|
+
)
|
|
90
|
+
filtered_code = self.code[self.code != undefined_code]
|
|
91
|
+
filtered_sequence = ProteinBlocksSequence()
|
|
92
|
+
filtered_sequence.code = filtered_code
|
|
93
|
+
return filtered_sequence
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def to_protein_blocks(atoms):
|
|
97
|
+
"""
|
|
98
|
+
Encode each chain in the given structure to the *Protein Blocks* structural
|
|
99
|
+
alphabet.
|
|
100
|
+
:footcite:`Brevern2000`
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
atoms : AtomArray
|
|
105
|
+
The atom array to encode.
|
|
106
|
+
May contain multiple chains.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
sequences : list of Sequence, length=n
|
|
111
|
+
The encoded *Protein Blocks* sequence for each peptide chain in the structure.
|
|
112
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
113
|
+
The atom index where each chain starts.
|
|
114
|
+
|
|
115
|
+
References
|
|
116
|
+
----------
|
|
117
|
+
|
|
118
|
+
.. footbibliography::
|
|
119
|
+
|
|
120
|
+
Examples
|
|
121
|
+
--------
|
|
122
|
+
|
|
123
|
+
>>> sequences, chain_starts = to_protein_blocks(atom_array)
|
|
124
|
+
>>> print(sequences[0])
|
|
125
|
+
zzmmmmmnopjmnopacdzz
|
|
126
|
+
"""
|
|
127
|
+
sequences = []
|
|
128
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
129
|
+
for i in range(len(chain_start_indices) - 1):
|
|
130
|
+
start = chain_start_indices[i]
|
|
131
|
+
stop = chain_start_indices[i + 1]
|
|
132
|
+
chain = atoms[start:stop]
|
|
133
|
+
sequences.append(_to_protein_blocks(chain))
|
|
134
|
+
return sequences, chain_start_indices[:-1]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _to_protein_blocks(chain):
|
|
138
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
139
|
+
ProteinBlocksSequence.undefined_symbol
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
phi, psi, _ = dihedral_backbone(chain)
|
|
143
|
+
|
|
144
|
+
pb_angles = np.full((len(phi), 8), np.nan)
|
|
145
|
+
pb_angles[2:-2, 0] = psi[:-4]
|
|
146
|
+
pb_angles[2:-2, 1] = phi[1:-3]
|
|
147
|
+
pb_angles[2:-2, 2] = psi[1:-3]
|
|
148
|
+
pb_angles[2:-2, 3] = phi[2:-2]
|
|
149
|
+
pb_angles[2:-2, 4] = psi[2:-2]
|
|
150
|
+
pb_angles[2:-2, 5] = phi[3:-1]
|
|
151
|
+
pb_angles[2:-2, 6] = psi[3:-1]
|
|
152
|
+
pb_angles[2:-2, 7] = phi[4:]
|
|
153
|
+
pb_angles = np.rad2deg(pb_angles)
|
|
154
|
+
|
|
155
|
+
# Angle RMSD of all reference angles with all actual angles
|
|
156
|
+
rmsda = np.sum(
|
|
157
|
+
((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
|
|
158
|
+
axis=-1,
|
|
159
|
+
)
|
|
160
|
+
# Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
|
|
161
|
+
pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
|
|
162
|
+
pb_available_mask = ~np.isnan(rmsda).any(axis=0)
|
|
163
|
+
# Chose PB, where the RMSDA to the reference angle is lowest
|
|
164
|
+
# Due to the definition of Biotite symbol codes
|
|
165
|
+
# the index of the chosen PB is directly the symbol code
|
|
166
|
+
pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
|
|
167
|
+
# Put the array of symbol codes into actual sequence objects
|
|
168
|
+
pb_sequence = ProteinBlocksSequence()
|
|
169
|
+
pb_sequence.code = pb_seq_code
|
|
170
|
+
return pb_sequence
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Parser for extracting weights from Keras files.
|
|
7
|
+
|
|
8
|
+
Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__name__ = "biotite.structure.alphabet"
|
|
12
|
+
__author__ = "Martin Larralde"
|
|
13
|
+
__all__ = ["load_kerasify"]
|
|
14
|
+
|
|
15
|
+
import enum
|
|
16
|
+
import functools
|
|
17
|
+
import itertools
|
|
18
|
+
import struct
|
|
19
|
+
import numpy as np
|
|
20
|
+
from biotite.structure.alphabet.layers import DenseLayer, Layer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LayerType(enum.IntEnum):
|
|
24
|
+
DENSE = 1
|
|
25
|
+
CONVOLUTION2D = 2
|
|
26
|
+
FLATTEN = 3
|
|
27
|
+
ELU = 4
|
|
28
|
+
ACTIVATION = 5
|
|
29
|
+
MAXPOOLING2D = 6
|
|
30
|
+
LSTM = 7
|
|
31
|
+
EMBEDDING = 8
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ActivationType(enum.IntEnum):
|
|
35
|
+
LINEAR = 1
|
|
36
|
+
RELU = 2
|
|
37
|
+
SOFTPLUS = 3
|
|
38
|
+
SIGMOID = 4
|
|
39
|
+
TANH = 5
|
|
40
|
+
HARD_SIGMOID = 6
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class KerasifyParser:
|
|
44
|
+
"""
|
|
45
|
+
An incomplete parser for model files serialized with `kerasify`.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
file : file-like
|
|
50
|
+
The ``.kerasify`` file to parse.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
Only dense layers are supported, since the ``foldseek`` VQ-VAE model
|
|
55
|
+
is only using 3 dense layers.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, file) -> None:
|
|
59
|
+
self.file = file
|
|
60
|
+
self.buffer = bytearray(1024)
|
|
61
|
+
(self.n_layers,) = self._get("I")
|
|
62
|
+
|
|
63
|
+
def read(self):
|
|
64
|
+
if self.n_layers == 0:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
self.n_layers -= 1
|
|
68
|
+
layer_type = LayerType(self._get("I")[0])
|
|
69
|
+
if layer_type == LayerType.DENSE:
|
|
70
|
+
(w0,) = self._get("I")
|
|
71
|
+
(w1,) = self._get("I")
|
|
72
|
+
(b0,) = self._get("I")
|
|
73
|
+
weights = (
|
|
74
|
+
np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
|
|
75
|
+
.reshape(w0, w1)
|
|
76
|
+
.copy()
|
|
77
|
+
)
|
|
78
|
+
biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
|
|
79
|
+
activation = ActivationType(self._get("I")[0])
|
|
80
|
+
if activation not in (ActivationType.LINEAR, ActivationType.RELU):
|
|
81
|
+
raise NotImplementedError(
|
|
82
|
+
f"Unsupported activation type: {activation!r}"
|
|
83
|
+
)
|
|
84
|
+
return DenseLayer(weights, biases, activation == ActivationType.RELU)
|
|
85
|
+
else:
|
|
86
|
+
raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
|
|
87
|
+
|
|
88
|
+
def __iter__(self):
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __next__(self) -> Layer:
|
|
92
|
+
layer = self.read()
|
|
93
|
+
if layer is None:
|
|
94
|
+
raise StopIteration
|
|
95
|
+
return layer
|
|
96
|
+
|
|
97
|
+
def _read(self, format: str) -> memoryview:
|
|
98
|
+
n = struct.calcsize(format)
|
|
99
|
+
if len(self.buffer) < n:
|
|
100
|
+
self.buffer.extend(
|
|
101
|
+
itertools.islice(itertools.repeat(0), n - len(self.buffer))
|
|
102
|
+
)
|
|
103
|
+
v = memoryview(self.buffer)[:n]
|
|
104
|
+
self.file.readinto(v) # type: ignore
|
|
105
|
+
return v
|
|
106
|
+
|
|
107
|
+
def _get(self, format: str):
|
|
108
|
+
v = self._read(format)
|
|
109
|
+
return struct.unpack(format, v)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@functools.cache
|
|
113
|
+
def load_kerasify(file_path):
|
|
114
|
+
"""
|
|
115
|
+
Load the the model layers from a ``.kerasify`` file.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
file_path : str
|
|
120
|
+
The path to the ``.kerasify`` file.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
layers : tuple of Layer
|
|
125
|
+
The model layers.
|
|
126
|
+
"""
|
|
127
|
+
with open(file_path, "rb") as file:
|
|
128
|
+
return tuple(KerasifyParser(file))
|