biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for converting structures to structural alphabet sequences.
|
|
7
|
+
|
|
8
|
+
Structural alphabets represent the local geometry of each residue in a structure as
|
|
9
|
+
symbol in a sequence.
|
|
10
|
+
This allows using sequence-based functionality from :mod:`biotite.sequence` on
|
|
11
|
+
structural data.
|
|
12
|
+
|
|
13
|
+
For each supported structural alphabet, this subpackage provides a conversion function
|
|
14
|
+
that converts each chain of a given structure into a :class:`Sequence` object from the
|
|
15
|
+
respective structural alphabet.
|
|
16
|
+
|
|
17
|
+
Note that the structural alphabets use lower-case letters as symbols, in order to
|
|
18
|
+
distinguish them better from the nucleotide and amino acid alphabets.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
__name__ = "biotite.structure.alphabet"
|
|
22
|
+
__author__ = "Martin Larralde, Patrick Kunzmann"
|
|
23
|
+
|
|
24
|
+
from .i3d import *
|
|
25
|
+
from .pb import *
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Implementation of the encoder neural network adapted from ``foldseek``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Martin Larralde"
|
|
11
|
+
__all__ = ["Encoder", "VirtualCenterEncoder", "PartnerIndexEncoder", "FeatureEncoder"]
|
|
12
|
+
|
|
13
|
+
import abc
|
|
14
|
+
from importlib.resources import files as resource_files
|
|
15
|
+
import numpy
|
|
16
|
+
import numpy.ma
|
|
17
|
+
from biotite.structure.alphabet.layers import CentroidLayer, Model
|
|
18
|
+
from biotite.structure.alphabet.unkerasify import load_kerasify
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _BaseEncoder(abc.ABC):
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
def encode(self, ca, cb, n, c):
|
|
24
|
+
"""
|
|
25
|
+
Encode the given atom coordinates to a different representation.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
ca, cb, n, c : ndarray, shape=(n, 3), dtype=float
|
|
30
|
+
The coordinates of the ``CA``, ``CB``, ``N`` and ``C`` atoms for each
|
|
31
|
+
residue.
|
|
32
|
+
*NaN* if missing, e.g. ``CB`` for glycine.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
encoded : MaskedArray, shape=(n, m), dtype=float
|
|
37
|
+
The encoded representation.
|
|
38
|
+
"""
|
|
39
|
+
raise NotImplementedError
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class VirtualCenterEncoder(_BaseEncoder):
|
|
43
|
+
r"""
|
|
44
|
+
An encoder for converting a protein structure to a virtual center.
|
|
45
|
+
|
|
46
|
+
For each residue, the coordinates of the virtual center are computed
|
|
47
|
+
from the coordinates of the ``CA``, ``CB`` and ``N`` atoms. The virtual center
|
|
48
|
+
:math:`V` is defined by the angle :math:`\theta = \angle V C_{\alpha} C_{\beta}`,
|
|
49
|
+
the dihedral angle :math:`\tau = \angle V C_{\alpha} C_{\beta} N` and the length
|
|
50
|
+
:math:`l = |V - C_{\alpha}|`. The default parameters used
|
|
51
|
+
in ``foldseek`` were selected after optimization on a validation set.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
distance_alpha_beta : float
|
|
56
|
+
The default distance between the ``CA`` and ``CB`` atoms to use when
|
|
57
|
+
reconstructing missing *Cβ* coordinates.
|
|
58
|
+
distance_alpha_v : float
|
|
59
|
+
The distance between the virtual center *V* and the ``CA`` atom, used to compute
|
|
60
|
+
the virtual center coordinates.
|
|
61
|
+
theta : float
|
|
62
|
+
The angle θ between the virtual center *V*, the ``CA`` and ``CB`` atoms, used to
|
|
63
|
+
compute the virtual center coordinates.
|
|
64
|
+
tau : float
|
|
65
|
+
The dihedral angle τ between the virtual center *V* and the ``CA``, ``CB``
|
|
66
|
+
and ``N`` atoms, used to compute the virtual center coordinates.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
_DISTANCE_ALPHA_BETA = 1.5336
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
*,
|
|
74
|
+
distance_alpha_beta=_DISTANCE_ALPHA_BETA,
|
|
75
|
+
distance_alpha_v=2.0,
|
|
76
|
+
theta=270.0,
|
|
77
|
+
tau=0.0,
|
|
78
|
+
):
|
|
79
|
+
self.theta = theta
|
|
80
|
+
self.tau = tau
|
|
81
|
+
self.distance_alpha_v = distance_alpha_v
|
|
82
|
+
self.distance_alpha_beta = distance_alpha_beta
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def theta(self):
|
|
86
|
+
return numpy.rad2deg(self._theta)
|
|
87
|
+
|
|
88
|
+
@theta.setter
|
|
89
|
+
def theta(self, theta):
|
|
90
|
+
self._theta = numpy.deg2rad(theta)
|
|
91
|
+
self._cos_theta = numpy.cos(self._theta)
|
|
92
|
+
self._sin_theta = numpy.sin(self._theta)
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def tau(self):
|
|
96
|
+
return numpy.rad2deg(self._tau)
|
|
97
|
+
|
|
98
|
+
@tau.setter
|
|
99
|
+
def tau(self, tau):
|
|
100
|
+
self._tau = numpy.deg2rad(tau)
|
|
101
|
+
self._cos_tau = numpy.cos(self._tau)
|
|
102
|
+
self._sin_tau = numpy.sin(self._tau)
|
|
103
|
+
|
|
104
|
+
def _compute_virtual_center(self, ca, cb, n):
|
|
105
|
+
assert ca.shape == n.shape
|
|
106
|
+
assert ca.shape == cb.shape
|
|
107
|
+
v = cb - ca
|
|
108
|
+
a = cb - ca
|
|
109
|
+
b = n - ca
|
|
110
|
+
# normal angle
|
|
111
|
+
k = _normalize(numpy.cross(a, b, axis=-1), inplace=True)
|
|
112
|
+
v = (
|
|
113
|
+
v * self._cos_theta
|
|
114
|
+
+ numpy.cross(k, v) * self._sin_theta
|
|
115
|
+
+ k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_theta)
|
|
116
|
+
)
|
|
117
|
+
# dihedral angle
|
|
118
|
+
k = _normalize(n - ca, inplace=True)
|
|
119
|
+
v = (
|
|
120
|
+
v * self._cos_tau
|
|
121
|
+
+ numpy.cross(k, v) * self._sin_tau
|
|
122
|
+
+ k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_tau)
|
|
123
|
+
)
|
|
124
|
+
# apply final vector to Cα
|
|
125
|
+
v *= self.distance_alpha_v
|
|
126
|
+
v += ca
|
|
127
|
+
return v
|
|
128
|
+
|
|
129
|
+
def _approximate_cb_position(self, ca, n, c):
|
|
130
|
+
"""
|
|
131
|
+
Approximate the position of ``CB`` from the backbone atoms.
|
|
132
|
+
"""
|
|
133
|
+
assert ca.shape == n.shape
|
|
134
|
+
assert ca.shape == c.shape
|
|
135
|
+
v1 = _normalize(c - ca, inplace=True)
|
|
136
|
+
v2 = _normalize(n - ca, inplace=True)
|
|
137
|
+
v3 = v1 / 3.0
|
|
138
|
+
|
|
139
|
+
b1 = numpy.add(v2, v3, out=v2)
|
|
140
|
+
b2 = numpy.cross(v1, b1, axis=-1)
|
|
141
|
+
u1 = _normalize(b1, inplace=True)
|
|
142
|
+
u2 = _normalize(b2, inplace=True)
|
|
143
|
+
|
|
144
|
+
out = (numpy.sqrt(8) / 3.0) * ((-u1 / 2.0) - (u2 * numpy.sqrt(3) / 2.0)) - v3
|
|
145
|
+
out *= self.distance_alpha_beta
|
|
146
|
+
out += ca
|
|
147
|
+
return out
|
|
148
|
+
|
|
149
|
+
def _create_nan_mask(self, ca, n, c):
|
|
150
|
+
"""
|
|
151
|
+
Mask any column which contains at least one *NaN* value.
|
|
152
|
+
"""
|
|
153
|
+
mask_ca = numpy.isnan(ca).max(axis=1)
|
|
154
|
+
mask_n = numpy.isnan(n).max(axis=1)
|
|
155
|
+
mask_c = numpy.isnan(c).max(axis=1)
|
|
156
|
+
return (mask_ca | mask_n | mask_c).repeat(3).reshape(-1, 3)
|
|
157
|
+
|
|
158
|
+
def encode(self, ca, cb, n, c):
|
|
159
|
+
ca = numpy.asarray(ca)
|
|
160
|
+
cb = numpy.asarray(cb)
|
|
161
|
+
n = numpy.asarray(n)
|
|
162
|
+
c = numpy.asarray(c)
|
|
163
|
+
|
|
164
|
+
assert ca.shape == cb.shape
|
|
165
|
+
assert ca.shape == c.shape
|
|
166
|
+
assert ca.shape == n.shape
|
|
167
|
+
|
|
168
|
+
# fix CB positions if needed
|
|
169
|
+
nan_indices = numpy.isnan(cb)
|
|
170
|
+
if numpy.any(nan_indices):
|
|
171
|
+
cb_approx = self._approximate_cb_position(ca, n, c)
|
|
172
|
+
# avoid writing to CB directly since it should be callee-save
|
|
173
|
+
cb_approx[~nan_indices] = cb[~nan_indices]
|
|
174
|
+
cb = cb_approx
|
|
175
|
+
# compute virtual center
|
|
176
|
+
vc = self._compute_virtual_center(ca, cb, n)
|
|
177
|
+
# mask residues without coordinates
|
|
178
|
+
return numpy.ma.masked_array(
|
|
179
|
+
vc,
|
|
180
|
+
mask=self._create_nan_mask(ca, n, c),
|
|
181
|
+
fill_value=numpy.nan,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class PartnerIndexEncoder(_BaseEncoder):
|
|
186
|
+
"""
|
|
187
|
+
An encoder for converting a protein structure to partner indices.
|
|
188
|
+
|
|
189
|
+
For each residue, the coordinates of the virtual center are computed from the
|
|
190
|
+
coordinates of the ``CA``, ``CB`` and ``N`` atoms.
|
|
191
|
+
A pairwise distance matrix is then created, and the index of the closest partner
|
|
192
|
+
residue is extracted for each position.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(self):
|
|
196
|
+
self.vc_encoder = VirtualCenterEncoder()
|
|
197
|
+
|
|
198
|
+
def _find_residue_partners(
|
|
199
|
+
self,
|
|
200
|
+
x,
|
|
201
|
+
):
|
|
202
|
+
# compute pairwise squared distance matrix
|
|
203
|
+
r = numpy.sum(x * x, axis=-1).reshape(-1, 1)
|
|
204
|
+
r[0] = r[-1] = numpy.nan
|
|
205
|
+
D = r - 2 * numpy.ma.dot(x, x.T) + r.T
|
|
206
|
+
# avoid selecting residue itself as the best
|
|
207
|
+
D[numpy.diag_indices_from(D)] = numpy.inf
|
|
208
|
+
# get the closest non-masked residue
|
|
209
|
+
return numpy.nan_to_num(D, copy=False, nan=numpy.inf).argmin(axis=1)
|
|
210
|
+
|
|
211
|
+
def encode(self, ca, cb, n, c):
|
|
212
|
+
# encode backbone atoms to virtual center
|
|
213
|
+
vc = self.vc_encoder.encode(ca, cb, n, c)
|
|
214
|
+
# find closest neighbor for each residue
|
|
215
|
+
return self._find_residue_partners(vc)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class FeatureEncoder(_BaseEncoder):
|
|
219
|
+
"""
|
|
220
|
+
An encoder for converting a protein structure to structural descriptors.
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
def __init__(self):
|
|
224
|
+
self.partner_index_encoder = PartnerIndexEncoder()
|
|
225
|
+
self.vc_encoder = self.partner_index_encoder.vc_encoder
|
|
226
|
+
|
|
227
|
+
def _calc_conformation_descriptors(self, ca, partner_index, dtype=numpy.float32):
|
|
228
|
+
# build arrays of indices to use for vectorized angles
|
|
229
|
+
i = numpy.arange(1, ca.shape[-2] - 1)
|
|
230
|
+
j = partner_index[i]
|
|
231
|
+
# compute conformational descriptors
|
|
232
|
+
u1 = _normalize(ca[..., i, :] - ca[..., i - 1, :], inplace=True)
|
|
233
|
+
u2 = _normalize(ca[..., i + 1, :] - ca[..., i, :], inplace=True)
|
|
234
|
+
u3 = _normalize(ca[..., j, :] - ca[..., j - 1, :], inplace=True)
|
|
235
|
+
u4 = _normalize(ca[..., j + 1, :] - ca[..., j, :], inplace=True)
|
|
236
|
+
u5 = _normalize(ca[..., j, :] - ca[..., i, :], inplace=True)
|
|
237
|
+
desc = numpy.zeros((ca.shape[0], 10), dtype=dtype)
|
|
238
|
+
desc[i, 0] = numpy.sum(u1 * u2, axis=-1)
|
|
239
|
+
desc[i, 1] = numpy.sum(u3 * u4, axis=-1)
|
|
240
|
+
desc[i, 2] = numpy.sum(u1 * u5, axis=-1)
|
|
241
|
+
desc[i, 3] = numpy.sum(u3 * u5, axis=-1)
|
|
242
|
+
desc[i, 4] = numpy.sum(u1 * u4, axis=-1)
|
|
243
|
+
desc[i, 5] = numpy.sum(u2 * u3, axis=-1)
|
|
244
|
+
desc[i, 6] = numpy.sum(u1 * u3, axis=-1)
|
|
245
|
+
desc[i, 7] = numpy.linalg.norm(ca[i] - ca[j], axis=-1)
|
|
246
|
+
desc[i, 8] = numpy.clip(j - i, -4, 4)
|
|
247
|
+
desc[i, 9] = numpy.copysign(numpy.log(numpy.abs(j - i) + 1), j - i)
|
|
248
|
+
return desc
|
|
249
|
+
|
|
250
|
+
def _create_descriptor_mask(self, mask, partner_index):
|
|
251
|
+
i = numpy.arange(1, mask.shape[0] - 1)
|
|
252
|
+
j = partner_index[i]
|
|
253
|
+
out = numpy.zeros((mask.shape[0], 10), dtype=numpy.bool_)
|
|
254
|
+
out[1:-1, :] |= (
|
|
255
|
+
mask[i - 1] | mask[i] | mask[i + 1] | mask[j - 1] | mask[j] | mask[j + 1]
|
|
256
|
+
).reshape(mask.shape[0] - 2, 1)
|
|
257
|
+
out[0] = out[-1] = True
|
|
258
|
+
return out
|
|
259
|
+
|
|
260
|
+
def encode(self, ca, cb, n, c):
|
|
261
|
+
# encode backbone atoms to virtual center
|
|
262
|
+
vc = self.vc_encoder.encode(ca, cb, n, c)
|
|
263
|
+
# find closest neighbor for each residue
|
|
264
|
+
partner_index = self.partner_index_encoder._find_residue_partners(vc)
|
|
265
|
+
# build position features from residue angles
|
|
266
|
+
descriptors = self._calc_conformation_descriptors(ca, partner_index)
|
|
267
|
+
# create mask
|
|
268
|
+
mask = self._create_descriptor_mask(vc.mask[:, 0], partner_index)
|
|
269
|
+
return numpy.ma.masked_array(
|
|
270
|
+
descriptors,
|
|
271
|
+
mask=mask,
|
|
272
|
+
fill_value=numpy.nan,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class Encoder(_BaseEncoder):
|
|
277
|
+
"""
|
|
278
|
+
An encoder for converting a protein structure to 3di states.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
_INVALID_STATE = 2
|
|
282
|
+
_CENTROIDS = numpy.array(
|
|
283
|
+
[
|
|
284
|
+
[-1.0729, -0.3600],
|
|
285
|
+
[-0.1356, -1.8914],
|
|
286
|
+
[0.4948, -0.4205],
|
|
287
|
+
[-0.9874, 0.8128],
|
|
288
|
+
[-1.6621, -0.4259],
|
|
289
|
+
[2.1394, 0.0486],
|
|
290
|
+
[1.5558, -0.1503],
|
|
291
|
+
[2.9179, 1.1437],
|
|
292
|
+
[-2.8814, 0.9956],
|
|
293
|
+
[-1.1400, -2.0068],
|
|
294
|
+
[3.2025, 1.7356],
|
|
295
|
+
[1.7769, -1.3037],
|
|
296
|
+
[0.6901, -1.2554],
|
|
297
|
+
[-1.1061, -1.3397],
|
|
298
|
+
[2.1495, -0.8030],
|
|
299
|
+
[2.3060, -1.4988],
|
|
300
|
+
[2.5522, 0.6046],
|
|
301
|
+
[0.7786, -2.1660],
|
|
302
|
+
[-2.3030, 0.3813],
|
|
303
|
+
[1.0290, 0.8772],
|
|
304
|
+
]
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def __init__(self):
|
|
308
|
+
self.feature_encoder = FeatureEncoder()
|
|
309
|
+
layers = load_kerasify(
|
|
310
|
+
resource_files(__package__).joinpath("encoder_weights_3di.kerasify")
|
|
311
|
+
)
|
|
312
|
+
self.vae_encoder = Model(layers + (CentroidLayer(self._CENTROIDS),))
|
|
313
|
+
|
|
314
|
+
def encode(
|
|
315
|
+
self,
|
|
316
|
+
ca,
|
|
317
|
+
cb,
|
|
318
|
+
n,
|
|
319
|
+
c,
|
|
320
|
+
):
|
|
321
|
+
descriptors = self.feature_encoder.encode(ca, cb, n, c)
|
|
322
|
+
states = self.vae_encoder(descriptors.data)
|
|
323
|
+
return numpy.ma.masked_array(
|
|
324
|
+
states,
|
|
325
|
+
mask=descriptors.mask[:, 0],
|
|
326
|
+
fill_value=self._INVALID_STATE,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _normalize(x, *, inplace=False):
|
|
331
|
+
norm = numpy.linalg.norm(x, axis=-1).reshape(*x.shape[:-1], 1)
|
|
332
|
+
return numpy.divide(x, norm, out=x if inplace else None, where=norm != 0)
|
|
Binary file
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
NumPy port of the ``foldseek`` code for encoding structures to 3di.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Martin Larralde"
|
|
11
|
+
__all__ = ["I3DSequence", "to_3di"]
|
|
12
|
+
|
|
13
|
+
import warnings
|
|
14
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
15
|
+
from biotite.sequence.sequence import Sequence
|
|
16
|
+
from biotite.structure.alphabet.encoder import Encoder
|
|
17
|
+
from biotite.structure.chains import get_chain_starts
|
|
18
|
+
from biotite.structure.util import coord_for_atom_name_per_residue
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class I3DSequence(Sequence):
|
|
22
|
+
"""
|
|
23
|
+
Representation of a structure in the 3Di structural alphabet.
|
|
24
|
+
:footcite:`VanKempen2024`
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
sequence : iterable object, optional
|
|
29
|
+
The 3Di sequence.
|
|
30
|
+
This may either be a list or a string.
|
|
31
|
+
May take upper or lower case letters.
|
|
32
|
+
By default the sequence is empty.
|
|
33
|
+
|
|
34
|
+
See Also
|
|
35
|
+
--------
|
|
36
|
+
to_3di : Create 3Di sequences from a structure.
|
|
37
|
+
|
|
38
|
+
References
|
|
39
|
+
----------
|
|
40
|
+
|
|
41
|
+
.. footbibliography::
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
alphabet = LetterAlphabet("acdefghiklmnpqrstvwy")
|
|
45
|
+
undefined_symbol = "d"
|
|
46
|
+
|
|
47
|
+
def __init__(self, sequence=""):
|
|
48
|
+
if isinstance(sequence, str):
|
|
49
|
+
sequence = sequence.lower()
|
|
50
|
+
else:
|
|
51
|
+
sequence = [symbol.upper() for symbol in sequence]
|
|
52
|
+
super().__init__(sequence)
|
|
53
|
+
|
|
54
|
+
def get_alphabet(self):
|
|
55
|
+
return I3DSequence.alphabet
|
|
56
|
+
|
|
57
|
+
def __repr__(self):
|
|
58
|
+
return f'I3DSequence("{"".join(self.symbols)}")'
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def to_3di(atoms):
|
|
62
|
+
"""
|
|
63
|
+
Encode each chain in the given structure to the 3Di structure alphabet.
|
|
64
|
+
:footcite:`VanKempen2024`
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
atoms : AtomArray
|
|
69
|
+
The atom array to encode.
|
|
70
|
+
May contain multiple chains.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
sequences : list of Sequence, length=n
|
|
75
|
+
The encoded 3Di sequence for each peptide chain in the structure.
|
|
76
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
77
|
+
The atom index where each chain starts.
|
|
78
|
+
|
|
79
|
+
References
|
|
80
|
+
----------
|
|
81
|
+
|
|
82
|
+
.. footbibliography::
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
|
|
87
|
+
>>> sequences, chain_starts = to_3di(atom_array)
|
|
88
|
+
>>> print(sequences[0])
|
|
89
|
+
dqqvvcvvcpnvvnvdhgdd
|
|
90
|
+
"""
|
|
91
|
+
sequences = []
|
|
92
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
93
|
+
for i in range(len(chain_start_indices) - 1):
|
|
94
|
+
start = chain_start_indices[i]
|
|
95
|
+
stop = chain_start_indices[i + 1]
|
|
96
|
+
chain = atoms[start:stop]
|
|
97
|
+
sequence = I3DSequence()
|
|
98
|
+
if chain.array_length() == 0:
|
|
99
|
+
warnings.warn("Ignoring empty chain")
|
|
100
|
+
else:
|
|
101
|
+
sequence.code = (
|
|
102
|
+
Encoder()
|
|
103
|
+
.encode(
|
|
104
|
+
*coord_for_atom_name_per_residue(chain, ["CA", "CB", "N", "C"]),
|
|
105
|
+
)
|
|
106
|
+
.filled()
|
|
107
|
+
)
|
|
108
|
+
sequences.append(sequence)
|
|
109
|
+
return sequences, chain_start_indices[:-1]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Implementation of the neural network layers used in ``foldseek``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Martin Larralde"
|
|
11
|
+
__all__ = ["Layer", "DenseLayer", "CentroidLayer", "Model"]
|
|
12
|
+
|
|
13
|
+
import abc
|
|
14
|
+
import functools
|
|
15
|
+
import numpy
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Layer(abc.ABC):
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def __call__(self, x):
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DenseLayer(Layer):
|
|
25
|
+
def __init__(self, weights, biases=None, activation: bool = True):
|
|
26
|
+
self.activation = activation
|
|
27
|
+
self.weights = numpy.asarray(weights)
|
|
28
|
+
if biases is None:
|
|
29
|
+
self.biases = numpy.zeros(self.weights.shape[1])
|
|
30
|
+
else:
|
|
31
|
+
self.biases = numpy.asarray(biases)
|
|
32
|
+
|
|
33
|
+
def __call__(self, x):
|
|
34
|
+
x = numpy.asarray(x)
|
|
35
|
+
out = x @ self.weights
|
|
36
|
+
out += self.biases
|
|
37
|
+
|
|
38
|
+
if self.activation:
|
|
39
|
+
return _relu(out, out=out)
|
|
40
|
+
else:
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CentroidLayer(Layer):
|
|
45
|
+
def __init__(self, centroids) -> None:
|
|
46
|
+
self.centroids = numpy.asarray(centroids)
|
|
47
|
+
self.r2 = numpy.sum(self.centroids**2, axis=1).reshape(-1, 1).T
|
|
48
|
+
|
|
49
|
+
def __call__(self, x):
|
|
50
|
+
# compute pairwise squared distance matrix
|
|
51
|
+
r1 = numpy.sum(x**2, axis=1).reshape(-1, 1)
|
|
52
|
+
D = r1 - 2 * x @ self.centroids.T + self.r2
|
|
53
|
+
# find closest centroid
|
|
54
|
+
states = numpy.empty(D.shape[0], dtype=numpy.uint8)
|
|
55
|
+
D.argmin(axis=1, out=states)
|
|
56
|
+
return states
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Model:
|
|
60
|
+
def __init__(self, layers=()):
|
|
61
|
+
self.layers = list(layers)
|
|
62
|
+
|
|
63
|
+
def __call__(self, x):
|
|
64
|
+
return functools.reduce(lambda x, f: f(x), self.layers, x)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _relu(
|
|
68
|
+
x,
|
|
69
|
+
out=None,
|
|
70
|
+
*,
|
|
71
|
+
where=True,
|
|
72
|
+
casting="same_kind",
|
|
73
|
+
order="K",
|
|
74
|
+
dtype=None,
|
|
75
|
+
subok=True,
|
|
76
|
+
):
|
|
77
|
+
return numpy.maximum(
|
|
78
|
+
0.0,
|
|
79
|
+
x,
|
|
80
|
+
out=out,
|
|
81
|
+
where=where,
|
|
82
|
+
casting=casting,
|
|
83
|
+
order=order,
|
|
84
|
+
dtype=dtype,
|
|
85
|
+
subok=subok,
|
|
86
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2013 Poulain, A. G. de Brevern
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|