biotite 1.5.0__cp314-cp314-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-darwin.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-darwin.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Conversion of structures into the *Protein Blocks* structural alphabet.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
15
|
+
from biotite.sequence.sequence import Sequence
|
|
16
|
+
from biotite.structure.chains import get_chain_starts
|
|
17
|
+
from biotite.structure.geometry import dihedral_backbone
|
|
18
|
+
|
|
19
|
+
# PB reference angles, adapted from PBxplore
|
|
20
|
+
PB_ANGLES = np.array(
|
|
21
|
+
[
|
|
22
|
+
[41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
|
|
23
|
+
[108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
|
|
24
|
+
[-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
|
|
25
|
+
[141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
|
|
26
|
+
[133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
|
|
27
|
+
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
|
|
28
|
+
[0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
|
|
29
|
+
[119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
|
|
30
|
+
[130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
|
|
31
|
+
[114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
|
|
32
|
+
[117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
|
|
33
|
+
[139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
|
|
34
|
+
[-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
|
|
35
|
+
[-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
|
|
36
|
+
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
|
|
37
|
+
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
|
|
38
|
+
]
|
|
39
|
+
) # fmt: skip
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ProteinBlocksSequence(Sequence):
|
|
43
|
+
"""
|
|
44
|
+
Representation of a structure in the *Protein Blocks* structural alphabet.
|
|
45
|
+
:footcite:`Brevern2000`
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
sequence : iterable object, optional
|
|
50
|
+
The *Protein Blocks* sequence.
|
|
51
|
+
This may either be a list or a string.
|
|
52
|
+
May take upper or lower case letters.
|
|
53
|
+
By default the sequence is empty.
|
|
54
|
+
|
|
55
|
+
See Also
|
|
56
|
+
--------
|
|
57
|
+
to_protein_blocks : Create *Protein Blocks* sequences from a structure.
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
|
|
62
|
+
.. footbibliography::
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
alphabet = LetterAlphabet("abcdefghijklmnopz")
|
|
66
|
+
undefined_symbol = "z"
|
|
67
|
+
|
|
68
|
+
def __init__(self, sequence=""):
|
|
69
|
+
if isinstance(sequence, str):
|
|
70
|
+
sequence = sequence.lower()
|
|
71
|
+
else:
|
|
72
|
+
sequence = [symbol.upper() for symbol in sequence]
|
|
73
|
+
super().__init__(sequence)
|
|
74
|
+
|
|
75
|
+
def get_alphabet(self):
|
|
76
|
+
return ProteinBlocksSequence.alphabet
|
|
77
|
+
|
|
78
|
+
def remove_undefined(self):
|
|
79
|
+
"""
|
|
80
|
+
Remove undefined symbols from the sequence.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
filtered_sequence : ProteinBlocksSequence
|
|
85
|
+
The sequence without undefined symbols.
|
|
86
|
+
"""
|
|
87
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
88
|
+
ProteinBlocksSequence.undefined_symbol
|
|
89
|
+
)
|
|
90
|
+
filtered_code = self.code[self.code != undefined_code]
|
|
91
|
+
filtered_sequence = ProteinBlocksSequence()
|
|
92
|
+
filtered_sequence.code = filtered_code
|
|
93
|
+
return filtered_sequence
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def to_protein_blocks(atoms):
|
|
97
|
+
"""
|
|
98
|
+
Encode each chain in the given structure to the *Protein Blocks* structural
|
|
99
|
+
alphabet.
|
|
100
|
+
:footcite:`Brevern2000`
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
atoms : AtomArray
|
|
105
|
+
The atom array to encode.
|
|
106
|
+
May contain multiple chains.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
sequences : list of Sequence, length=n
|
|
111
|
+
The encoded *Protein Blocks* sequence for each peptide chain in the structure.
|
|
112
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
113
|
+
The atom index where each chain starts.
|
|
114
|
+
|
|
115
|
+
References
|
|
116
|
+
----------
|
|
117
|
+
|
|
118
|
+
.. footbibliography::
|
|
119
|
+
|
|
120
|
+
Examples
|
|
121
|
+
--------
|
|
122
|
+
|
|
123
|
+
>>> sequences, chain_starts = to_protein_blocks(atom_array)
|
|
124
|
+
>>> print(sequences[0])
|
|
125
|
+
zzmmmmmnopjmnopacdzz
|
|
126
|
+
"""
|
|
127
|
+
sequences = []
|
|
128
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
129
|
+
for i in range(len(chain_start_indices) - 1):
|
|
130
|
+
start = chain_start_indices[i]
|
|
131
|
+
stop = chain_start_indices[i + 1]
|
|
132
|
+
chain = atoms[start:stop]
|
|
133
|
+
sequences.append(_to_protein_blocks(chain))
|
|
134
|
+
return sequences, chain_start_indices[:-1]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _to_protein_blocks(chain):
|
|
138
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
139
|
+
ProteinBlocksSequence.undefined_symbol
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
phi, psi, _ = dihedral_backbone(chain)
|
|
143
|
+
|
|
144
|
+
pb_angles = np.full((len(phi), 8), np.nan)
|
|
145
|
+
pb_angles[2:-2, 0] = psi[:-4]
|
|
146
|
+
pb_angles[2:-2, 1] = phi[1:-3]
|
|
147
|
+
pb_angles[2:-2, 2] = psi[1:-3]
|
|
148
|
+
pb_angles[2:-2, 3] = phi[2:-2]
|
|
149
|
+
pb_angles[2:-2, 4] = psi[2:-2]
|
|
150
|
+
pb_angles[2:-2, 5] = phi[3:-1]
|
|
151
|
+
pb_angles[2:-2, 6] = psi[3:-1]
|
|
152
|
+
pb_angles[2:-2, 7] = phi[4:]
|
|
153
|
+
pb_angles = np.rad2deg(pb_angles)
|
|
154
|
+
|
|
155
|
+
# Angle RMSD of all reference angles with all actual angles
|
|
156
|
+
rmsda = np.sum(
|
|
157
|
+
((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
|
|
158
|
+
axis=-1,
|
|
159
|
+
)
|
|
160
|
+
# Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
|
|
161
|
+
pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
|
|
162
|
+
pb_available_mask = ~np.isnan(rmsda).any(axis=0)
|
|
163
|
+
# Chose PB, where the RMSDA to the reference angle is lowest
|
|
164
|
+
# Due to the definition of Biotite symbol codes
|
|
165
|
+
# the index of the chosen PB is directly the symbol code
|
|
166
|
+
pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
|
|
167
|
+
# Put the array of symbol codes into actual sequence objects
|
|
168
|
+
pb_sequence = ProteinBlocksSequence()
|
|
169
|
+
pb_sequence.code = pb_seq_code
|
|
170
|
+
return pb_sequence
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Parser for extracting weights from Keras files.
|
|
7
|
+
|
|
8
|
+
Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__name__ = "biotite.structure.alphabet"
|
|
12
|
+
__author__ = "Martin Larralde"
|
|
13
|
+
__all__ = ["load_kerasify"]
|
|
14
|
+
|
|
15
|
+
import enum
|
|
16
|
+
import functools
|
|
17
|
+
import itertools
|
|
18
|
+
import struct
|
|
19
|
+
import numpy as np
|
|
20
|
+
from biotite.structure.alphabet.layers import DenseLayer, Layer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LayerType(enum.IntEnum):
|
|
24
|
+
DENSE = 1
|
|
25
|
+
CONVOLUTION2D = 2
|
|
26
|
+
FLATTEN = 3
|
|
27
|
+
ELU = 4
|
|
28
|
+
ACTIVATION = 5
|
|
29
|
+
MAXPOOLING2D = 6
|
|
30
|
+
LSTM = 7
|
|
31
|
+
EMBEDDING = 8
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ActivationType(enum.IntEnum):
|
|
35
|
+
LINEAR = 1
|
|
36
|
+
RELU = 2
|
|
37
|
+
SOFTPLUS = 3
|
|
38
|
+
SIGMOID = 4
|
|
39
|
+
TANH = 5
|
|
40
|
+
HARD_SIGMOID = 6
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class KerasifyParser:
|
|
44
|
+
"""
|
|
45
|
+
An incomplete parser for model files serialized with `kerasify`.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
file : file-like
|
|
50
|
+
The ``.kerasify`` file to parse.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
Only dense layers are supported, since the ``foldseek`` VQ-VAE model
|
|
55
|
+
is only using 3 dense layers.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, file) -> None:
|
|
59
|
+
self.file = file
|
|
60
|
+
self.buffer = bytearray(1024)
|
|
61
|
+
(self.n_layers,) = self._get("I")
|
|
62
|
+
|
|
63
|
+
def read(self):
|
|
64
|
+
if self.n_layers == 0:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
self.n_layers -= 1
|
|
68
|
+
layer_type = LayerType(self._get("I")[0])
|
|
69
|
+
if layer_type == LayerType.DENSE:
|
|
70
|
+
(w0,) = self._get("I")
|
|
71
|
+
(w1,) = self._get("I")
|
|
72
|
+
(b0,) = self._get("I")
|
|
73
|
+
weights = (
|
|
74
|
+
np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
|
|
75
|
+
.reshape(w0, w1)
|
|
76
|
+
.copy()
|
|
77
|
+
)
|
|
78
|
+
biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
|
|
79
|
+
activation = ActivationType(self._get("I")[0])
|
|
80
|
+
if activation not in (ActivationType.LINEAR, ActivationType.RELU):
|
|
81
|
+
raise NotImplementedError(
|
|
82
|
+
f"Unsupported activation type: {activation!r}"
|
|
83
|
+
)
|
|
84
|
+
return DenseLayer(weights, biases, activation == ActivationType.RELU)
|
|
85
|
+
else:
|
|
86
|
+
raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
|
|
87
|
+
|
|
88
|
+
def __iter__(self):
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __next__(self) -> Layer:
|
|
92
|
+
layer = self.read()
|
|
93
|
+
if layer is None:
|
|
94
|
+
raise StopIteration
|
|
95
|
+
return layer
|
|
96
|
+
|
|
97
|
+
def _read(self, format: str) -> memoryview:
|
|
98
|
+
n = struct.calcsize(format)
|
|
99
|
+
if len(self.buffer) < n:
|
|
100
|
+
self.buffer.extend(
|
|
101
|
+
itertools.islice(itertools.repeat(0), n - len(self.buffer))
|
|
102
|
+
)
|
|
103
|
+
v = memoryview(self.buffer)[:n]
|
|
104
|
+
self.file.readinto(v) # type: ignore
|
|
105
|
+
return v
|
|
106
|
+
|
|
107
|
+
def _get(self, format: str):
|
|
108
|
+
v = self._read(format)
|
|
109
|
+
return struct.unpack(format, v)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@functools.cache
|
|
113
|
+
def load_kerasify(file_path):
|
|
114
|
+
"""
|
|
115
|
+
Load the the model layers from a ``.kerasify`` file.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
file_path : str
|
|
120
|
+
The path to the ``.kerasify`` file.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
layers : tuple of Layer
|
|
125
|
+
The model layers.
|
|
126
|
+
"""
|
|
127
|
+
with open(file_path, "rb") as file:
|
|
128
|
+
return tuple(KerasifyParser(file))
|