biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
Binary file
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["encode_chars", "decode_to_chars", "map_sequence_code"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
ctypedef np.int64_t int64
|
|
16
|
+
ctypedef np.uint8_t uint8
|
|
17
|
+
ctypedef np.uint16_t uint16
|
|
18
|
+
ctypedef np.uint32_t uint32
|
|
19
|
+
ctypedef np.uint64_t uint64
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cython.boundscheck(False)
|
|
23
|
+
@cython.wraparound(False)
|
|
24
|
+
def encode_chars(const unsigned char[:] alphabet not None,
|
|
25
|
+
const unsigned char[:] symbols not None):
|
|
26
|
+
"""
|
|
27
|
+
Encode an array of symbols into an array of symbol codes.
|
|
28
|
+
|
|
29
|
+
Only works for symbols that are printable ASCII characters.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
alphabet : ndarray, shape=(n,), dtype="|S1"
|
|
34
|
+
The alphabet as array.
|
|
35
|
+
It is indexed via ASCII values and the corresponding values are
|
|
36
|
+
the symbol codes.
|
|
37
|
+
symbols : ndarray, dtype="|S1"
|
|
38
|
+
The symbols (ASCII characters) to be encoded.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
code : ndarray, shape=(n,), dtype="|S1"
|
|
43
|
+
The encoded symbols.
|
|
44
|
+
"""
|
|
45
|
+
cdef int i
|
|
46
|
+
# The last symbol code of the alphabet + 1 is always illegal
|
|
47
|
+
# Since this code cannot occur from symbol encoding
|
|
48
|
+
# it can be later used to check for illegal symbols
|
|
49
|
+
cdef uint8 illegal_code = alphabet.shape[0]
|
|
50
|
+
# An array based map that maps from symbol to code
|
|
51
|
+
# Since the maximum value of a char is 256
|
|
52
|
+
# the size of the map is known at compile time
|
|
53
|
+
cdef uint8 sym_to_code[256]
|
|
54
|
+
# Initially fill the map with the illegal symbol
|
|
55
|
+
# Consequently, the map will later return the illegal symbol
|
|
56
|
+
# when indexed with a character that is not part of the alphabet
|
|
57
|
+
sym_to_code[:] = [illegal_code] * 256
|
|
58
|
+
# Then fill in entries for the symbols of the alphabet
|
|
59
|
+
cdef unsigned char symbol
|
|
60
|
+
for i, symbol in enumerate(alphabet):
|
|
61
|
+
sym_to_code[symbol] = i
|
|
62
|
+
|
|
63
|
+
# Encode the symbols
|
|
64
|
+
code = np.empty(symbols.shape[0], dtype=np.uint8)
|
|
65
|
+
cdef uint8[:] code_view = code
|
|
66
|
+
cdef uint8 symbol_code
|
|
67
|
+
for i in range(symbols.shape[0]):
|
|
68
|
+
symbol_code = sym_to_code[symbols[i]]
|
|
69
|
+
# Check if the symbols is valid
|
|
70
|
+
if symbol_code == illegal_code:
|
|
71
|
+
illegal_symbol = chr(symbols[i])
|
|
72
|
+
# Local import to avoid circular imports
|
|
73
|
+
from .alphabet import AlphabetError
|
|
74
|
+
raise AlphabetError(
|
|
75
|
+
f"Symbol {repr(illegal_symbol)} is not in the alphabet"
|
|
76
|
+
)
|
|
77
|
+
code_view[i] = symbol_code
|
|
78
|
+
|
|
79
|
+
return code
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@cython.boundscheck(False)
|
|
83
|
+
@cython.wraparound(False)
|
|
84
|
+
def decode_to_chars(const unsigned char[:] alphabet not None,
|
|
85
|
+
const uint8[:] code not None):
|
|
86
|
+
"""
|
|
87
|
+
Decode an array of symbol codes into an array of symbols.
|
|
88
|
+
|
|
89
|
+
Only works for symbols that are printable ASCII characters.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
alphabet : ndarray, shape=(n,), dtype="|S1"
|
|
94
|
+
The alphabet as array.
|
|
95
|
+
It is indexed via ASCII values and the corresponding values are
|
|
96
|
+
the symbol codes.
|
|
97
|
+
code : ndarray, shape=(n,), dtype="|S1"
|
|
98
|
+
The code to be decoded.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
symbols : ndarray, dtype="|S1"
|
|
103
|
+
The resulting symbols (ASCII characters).
|
|
104
|
+
"""
|
|
105
|
+
cdef int i
|
|
106
|
+
cdef int alphabet_length = alphabet.shape[0]
|
|
107
|
+
|
|
108
|
+
symbols = np.empty(code.shape[0], dtype=np.ubyte)
|
|
109
|
+
cdef uint8[:] symbols_view = symbols
|
|
110
|
+
cdef uint8 symbol_code
|
|
111
|
+
for i in range(code.shape[0]):
|
|
112
|
+
symbol_code = code[i]
|
|
113
|
+
if symbol_code >= alphabet_length:
|
|
114
|
+
# Local import to avoid circular imports
|
|
115
|
+
from .alphabet import AlphabetError
|
|
116
|
+
raise AlphabetError(f"'{symbol_code:d}' is not a valid code")
|
|
117
|
+
symbols_view[i] = alphabet[symbol_code]
|
|
118
|
+
return symbols
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
ctypedef fused CodeType1:
|
|
122
|
+
uint8
|
|
123
|
+
uint16
|
|
124
|
+
uint32
|
|
125
|
+
uint64
|
|
126
|
+
ctypedef fused CodeType2:
|
|
127
|
+
uint8
|
|
128
|
+
uint16
|
|
129
|
+
uint32
|
|
130
|
+
uint64
|
|
131
|
+
def map_sequence_code(CodeType2[:] mapping,
|
|
132
|
+
CodeType1[:] in_code, CodeType2[:] out_code):
|
|
133
|
+
"""
|
|
134
|
+
Efficiently maps a sequence code into another alphabet using a
|
|
135
|
+
mapping.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
mapping : ndarray, dtype=int
|
|
140
|
+
Maps the input codes to output codes.
|
|
141
|
+
in_code : ndarray, shape=(n,), dtype=int
|
|
142
|
+
The symbol codes to be mapped.
|
|
143
|
+
out_code : ndarray, shape=(n,), dtype=int
|
|
144
|
+
An empty array, where the mapped symbols are stored.
|
|
145
|
+
This is a parameter instead of the return value in order to
|
|
146
|
+
choose the correct integer type.
|
|
147
|
+
"""
|
|
148
|
+
cdef int64 i
|
|
149
|
+
if in_code.shape[0] != out_code.shape[0]:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"Input sequence code has length {in_code.shape[0]}, "
|
|
152
|
+
f"but output sequence code has length {out_code.shape[0]}"
|
|
153
|
+
)
|
|
154
|
+
for i in range(in_code.shape[0]):
|
|
155
|
+
out_code[i] = mapping[in_code[i]]
|
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["CodonTable"]
|
|
8
|
+
|
|
9
|
+
import copy
|
|
10
|
+
from numbers import Integral
|
|
11
|
+
from os.path import dirname, join, realpath
|
|
12
|
+
import numpy as np
|
|
13
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
14
|
+
|
|
15
|
+
# Abbreviations
|
|
16
|
+
_NUC_ALPH = NucleotideSequence.alphabet_unamb
|
|
17
|
+
_PROT_ALPH = ProteinSequence.alphabet
|
|
18
|
+
|
|
19
|
+
# Multiplier array that converts a codon in code representation
|
|
20
|
+
# into a unique integer
|
|
21
|
+
_radix = len(_NUC_ALPH)
|
|
22
|
+
_radix_multiplier = np.array([_radix**n for n in (2, 1, 0)], dtype=int)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CodonTable(object):
|
|
26
|
+
"""
|
|
27
|
+
A :class:`CodonTable` maps a codon (sequence of 3 nucleotides) to an
|
|
28
|
+
amino acid.
|
|
29
|
+
It also defines start codons. A :class:`CodonTable`
|
|
30
|
+
takes/outputs either the symbols or code of the codon/amino acid.
|
|
31
|
+
|
|
32
|
+
Furthermore, this class is able to give a list of codons that
|
|
33
|
+
corresponds to a given amino acid.
|
|
34
|
+
|
|
35
|
+
The :func:`load()` method allows loading of NCBI codon tables.
|
|
36
|
+
|
|
37
|
+
Objects of this class are immutable.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
codon_dict : dict of (str -> str)
|
|
42
|
+
A dictionary that maps codons to amino acids. The keys must be
|
|
43
|
+
strings of length 3 and the values strings of length 1
|
|
44
|
+
(all upper case).
|
|
45
|
+
The dictionary must provide entries for all 64 possible codons.
|
|
46
|
+
starts : iterable object of str
|
|
47
|
+
The start codons. Each entry must be a string of length 3
|
|
48
|
+
(all upper case).
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
|
|
53
|
+
Get the amino acid coded by a given codon (symbol and code):
|
|
54
|
+
|
|
55
|
+
>>> table = CodonTable.default_table()
|
|
56
|
+
>>> print(table["ATG"])
|
|
57
|
+
M
|
|
58
|
+
>>> print(table[(1,2,3)])
|
|
59
|
+
14
|
|
60
|
+
|
|
61
|
+
Get the codons coding for a given amino acid (symbol and code):
|
|
62
|
+
|
|
63
|
+
>>> table = CodonTable.default_table()
|
|
64
|
+
>>> print(table["M"])
|
|
65
|
+
('ATG',)
|
|
66
|
+
>>> print(table[14])
|
|
67
|
+
((0, 2, 0), (0, 2, 2), (1, 2, 0), (1, 2, 1), (1, 2, 2), (1, 2, 3))
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# For efficient mapping of codon codes to amino acid codes,
|
|
71
|
+
# especially in in the 'map_codon_codes()' function, the class
|
|
72
|
+
# maps each possible codon into a unique number using a radix based
|
|
73
|
+
# approach.
|
|
74
|
+
# For example the codon (3,1,2) would be represented as
|
|
75
|
+
# 3*16 + 1*4 + 2**1 = 53
|
|
76
|
+
|
|
77
|
+
# file for builtin codon tables from NCBI
|
|
78
|
+
_table_file = join(dirname(realpath(__file__)), "codon_tables.txt")
|
|
79
|
+
|
|
80
|
+
def __init__(self, codon_dict, starts):
|
|
81
|
+
# Check if 'starts' is iterable object of length 3 string
|
|
82
|
+
for start in starts:
|
|
83
|
+
if not isinstance(start, str) or len(start) != 3:
|
|
84
|
+
raise ValueError(f"Invalid codon '{start}' as start codon")
|
|
85
|
+
# Internally store codons as single unique numbers
|
|
86
|
+
start_codon_codes = np.array(
|
|
87
|
+
[_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
|
|
88
|
+
)
|
|
89
|
+
self._starts = CodonTable._to_number(start_codon_codes)
|
|
90
|
+
# Use -1 as error code
|
|
91
|
+
# The array uses the number representation of codons as index
|
|
92
|
+
# and stores the corresponding symbol codes for amino acids
|
|
93
|
+
self._codons = np.full(_radix**3, -1, dtype=int)
|
|
94
|
+
for key, value in codon_dict.items():
|
|
95
|
+
codon_code = _NUC_ALPH.encode_multiple(key)
|
|
96
|
+
codon_number = CodonTable._to_number(codon_code)
|
|
97
|
+
aa_code = _PROT_ALPH.encode(value)
|
|
98
|
+
self._codons[codon_number] = aa_code
|
|
99
|
+
if (self._codons == -1).any():
|
|
100
|
+
# Find the missing codon
|
|
101
|
+
missing_index = np.where(self._codons == -1)[0][0]
|
|
102
|
+
codon_code = CodonTable._to_codon(missing_index)
|
|
103
|
+
codon = _NUC_ALPH.decode_multiple(codon_code)
|
|
104
|
+
codon_str = "".join(codon)
|
|
105
|
+
raise ValueError(f"Codon dictionary does not contain codon '{codon_str}'")
|
|
106
|
+
|
|
107
|
+
def __repr__(self):
|
|
108
|
+
"""Represent CodonTable as a string for debugging."""
|
|
109
|
+
return f"CodonTable({self.codon_dict()}, {self.start_codons()})"
|
|
110
|
+
|
|
111
|
+
def __eq__(self, item):
|
|
112
|
+
if not isinstance(item, CodonTable):
|
|
113
|
+
return False
|
|
114
|
+
if self.codon_dict() != item.codon_dict():
|
|
115
|
+
return False
|
|
116
|
+
if self.start_codons() != item.start_codons():
|
|
117
|
+
return False
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
def __ne__(self, item):
|
|
121
|
+
return not self == item
|
|
122
|
+
|
|
123
|
+
def __getitem__(self, item):
|
|
124
|
+
if isinstance(item, str):
|
|
125
|
+
if len(item) == 1:
|
|
126
|
+
# Amino acid -> return possible codons
|
|
127
|
+
aa_code = _PROT_ALPH.encode(item)
|
|
128
|
+
codon_numbers = np.where(self._codons == aa_code)[0]
|
|
129
|
+
codon_codes = CodonTable._to_codon(codon_numbers)
|
|
130
|
+
codons = tuple(
|
|
131
|
+
[
|
|
132
|
+
"".join(_NUC_ALPH.decode_multiple(codon_code))
|
|
133
|
+
for codon_code in codon_codes
|
|
134
|
+
]
|
|
135
|
+
)
|
|
136
|
+
return codons
|
|
137
|
+
elif len(item) == 3:
|
|
138
|
+
# Codon -> return corresponding amino acid
|
|
139
|
+
codon_code = _NUC_ALPH.encode_multiple(item)
|
|
140
|
+
codon_number = CodonTable._to_number(codon_code)
|
|
141
|
+
aa_code = self._codons[codon_number]
|
|
142
|
+
aa = _PROT_ALPH.decode(aa_code)
|
|
143
|
+
return aa
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(f"'{item}' is an invalid index")
|
|
146
|
+
elif isinstance(item, int):
|
|
147
|
+
# Code for amino acid -> return possible codon codes
|
|
148
|
+
codon_numbers = np.where(self._codons == item)[0]
|
|
149
|
+
codon_codes = tuple(
|
|
150
|
+
[tuple(code.tolist()) for code in CodonTable._to_codon(codon_numbers)]
|
|
151
|
+
)
|
|
152
|
+
return codon_codes
|
|
153
|
+
else:
|
|
154
|
+
# Code for codon as any iterable object
|
|
155
|
+
# Code for codon -> return corresponding amino acid codes
|
|
156
|
+
if len(item) != 3:
|
|
157
|
+
raise ValueError(f"{item} is an invalid sequence code for a codon")
|
|
158
|
+
codon_number = CodonTable._to_number(item)
|
|
159
|
+
aa_code = self._codons[codon_number]
|
|
160
|
+
return aa_code
|
|
161
|
+
|
|
162
|
+
def map_codon_codes(self, codon_codes):
|
|
163
|
+
"""
|
|
164
|
+
Efficiently map multiple codons to the corresponding amino
|
|
165
|
+
acids.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
codon_codes : ndarray, dtype=int, shape=(n,3)
|
|
170
|
+
The codons to be translated into amino acids.
|
|
171
|
+
The codons are given as symbol codes.
|
|
172
|
+
*n* is the amount of codons.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
aa_codes : ndarray, dtype=int, shape=(n,)
|
|
177
|
+
The amino acids as symbol codes.
|
|
178
|
+
|
|
179
|
+
Examples
|
|
180
|
+
--------
|
|
181
|
+
>>> dna = NucleotideSequence("ATGGTTTAA")
|
|
182
|
+
>>> sequence_code = dna.code
|
|
183
|
+
>>> print(sequence_code)
|
|
184
|
+
[0 3 2 2 3 3 3 0 0]
|
|
185
|
+
>>> # Reshape to get codons
|
|
186
|
+
>>> codon_codes = sequence_code.reshape(-1, 3)
|
|
187
|
+
>>> print(codon_codes)
|
|
188
|
+
[[0 3 2]
|
|
189
|
+
[2 3 3]
|
|
190
|
+
[3 0 0]]
|
|
191
|
+
>>> # Map to amino acids
|
|
192
|
+
>>> aa_codes = CodonTable.default_table().map_codon_codes(codon_codes)
|
|
193
|
+
>>> print(aa_codes)
|
|
194
|
+
[10 17 23]
|
|
195
|
+
>>> # Put into a protein sequence
|
|
196
|
+
>>> protein = ProteinSequence()
|
|
197
|
+
>>> protein.code = aa_codes
|
|
198
|
+
>>> print(protein)
|
|
199
|
+
MV*
|
|
200
|
+
"""
|
|
201
|
+
if codon_codes.shape[-1] != 3:
|
|
202
|
+
raise ValueError(
|
|
203
|
+
f"Codons must be length 3, "
|
|
204
|
+
f"but size of last dimension is {codon_codes.shape[-1]}"
|
|
205
|
+
)
|
|
206
|
+
codon_numbers = CodonTable._to_number(codon_codes)
|
|
207
|
+
aa_codes = self._codons[codon_numbers]
|
|
208
|
+
return aa_codes
|
|
209
|
+
|
|
210
|
+
def codon_dict(self, code=False):
|
|
211
|
+
"""
|
|
212
|
+
Get the codon to amino acid mappings dictionary.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
code : bool
|
|
217
|
+
If true, the dictionary contains keys and values as code.
|
|
218
|
+
Otherwise, the dictionary contains strings for codons and
|
|
219
|
+
amino acid.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
codon_dict : dict
|
|
224
|
+
The dictionary mapping codons to amino acids.
|
|
225
|
+
"""
|
|
226
|
+
if code:
|
|
227
|
+
return {
|
|
228
|
+
tuple(CodonTable._to_codon(codon_number)): aa_code
|
|
229
|
+
for codon_number, aa_code in enumerate(self._codons)
|
|
230
|
+
}
|
|
231
|
+
else:
|
|
232
|
+
return {
|
|
233
|
+
"".join(_NUC_ALPH.decode_multiple(codon_code)): _PROT_ALPH.decode(
|
|
234
|
+
aa_code
|
|
235
|
+
)
|
|
236
|
+
for codon_code, aa_code in self.codon_dict(code=True).items()
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
def is_start_codon(self, codon_codes):
|
|
240
|
+
codon_numbers = CodonTable._to_number(codon_codes)
|
|
241
|
+
return np.isin(codon_numbers, self._starts)
|
|
242
|
+
|
|
243
|
+
def start_codons(self, code=False):
|
|
244
|
+
"""
|
|
245
|
+
Get the start codons of the codon table.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
code : bool
|
|
250
|
+
If true, the code will be returned instead of strings.
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
start_codons : tuple
|
|
255
|
+
The start codons. Contains strings or tuples, depending on
|
|
256
|
+
the `code` parameter.
|
|
257
|
+
"""
|
|
258
|
+
if code:
|
|
259
|
+
return tuple(
|
|
260
|
+
[
|
|
261
|
+
tuple(CodonTable._to_codon(codon_number))
|
|
262
|
+
for codon_number in self._starts
|
|
263
|
+
]
|
|
264
|
+
)
|
|
265
|
+
else:
|
|
266
|
+
return tuple(
|
|
267
|
+
[
|
|
268
|
+
"".join(_NUC_ALPH.decode_multiple(codon_code))
|
|
269
|
+
for codon_code in self.start_codons(code=True)
|
|
270
|
+
]
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
def with_start_codons(self, starts):
|
|
274
|
+
"""
|
|
275
|
+
Create an new :class:`CodonTable` with the same codon mappings,
|
|
276
|
+
but changed start codons.
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
starts : iterable object of str
|
|
281
|
+
The new start codons.
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
new_table : CodonTable
|
|
286
|
+
The codon table with the new start codons.
|
|
287
|
+
"""
|
|
288
|
+
# Copy this table and replace the start codons
|
|
289
|
+
new_table = copy.deepcopy(self)
|
|
290
|
+
start_codon_codes = np.array(
|
|
291
|
+
[_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
|
|
292
|
+
)
|
|
293
|
+
new_table._starts = CodonTable._to_number(start_codon_codes)
|
|
294
|
+
return new_table
|
|
295
|
+
|
|
296
|
+
def with_codon_mappings(self, codon_dict):
|
|
297
|
+
"""
|
|
298
|
+
Create an new :class:`CodonTable` with partially changed codon
|
|
299
|
+
mappings.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
codon_dict : dict of (str -> str)
|
|
304
|
+
The changed codon mappings.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
new_table : CodonTable
|
|
309
|
+
The codon table with changed codon mappings.
|
|
310
|
+
"""
|
|
311
|
+
# Copy this table and replace the codon
|
|
312
|
+
new_table = copy.deepcopy(self)
|
|
313
|
+
for key, value in codon_dict.items():
|
|
314
|
+
codon_code = _NUC_ALPH.encode_multiple(key)
|
|
315
|
+
codon_number = CodonTable._to_number(codon_code)
|
|
316
|
+
aa_code = _PROT_ALPH.encode(value)
|
|
317
|
+
new_table._codons[codon_number] = aa_code
|
|
318
|
+
return new_table
|
|
319
|
+
|
|
320
|
+
def __str__(self):
|
|
321
|
+
string = ""
|
|
322
|
+
# ['A', 'C', 'G', 'T']
|
|
323
|
+
bases = _NUC_ALPH.get_symbols()
|
|
324
|
+
for b1 in bases:
|
|
325
|
+
for b2 in bases:
|
|
326
|
+
for b3 in bases:
|
|
327
|
+
codon = b1 + b2 + b3
|
|
328
|
+
string += codon + " " + self[codon]
|
|
329
|
+
# Indicator for start codon
|
|
330
|
+
codon_code = _NUC_ALPH.encode_multiple(codon)
|
|
331
|
+
if CodonTable._to_number(codon_code) in self._starts:
|
|
332
|
+
string += " i "
|
|
333
|
+
else:
|
|
334
|
+
string += " "
|
|
335
|
+
# Add space for next codon
|
|
336
|
+
string += " " * 3
|
|
337
|
+
# Remove terminal space
|
|
338
|
+
string = string[:-6]
|
|
339
|
+
# Jump to next line
|
|
340
|
+
string += "\n"
|
|
341
|
+
# Add empty line
|
|
342
|
+
string += "\n"
|
|
343
|
+
# Remove the two terminal new lines
|
|
344
|
+
string = string[:-2]
|
|
345
|
+
return string
|
|
346
|
+
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _to_number(codons):
|
|
349
|
+
if not isinstance(codons, np.ndarray):
|
|
350
|
+
codons = np.array(list(codons), dtype=int)
|
|
351
|
+
return np.sum(_radix_multiplier * codons, axis=-1)
|
|
352
|
+
|
|
353
|
+
@staticmethod
|
|
354
|
+
def _to_codon(numbers):
|
|
355
|
+
if isinstance(numbers, Integral):
|
|
356
|
+
# Only a single number
|
|
357
|
+
return CodonTable._to_codon(np.array([numbers]))[0]
|
|
358
|
+
if not isinstance(numbers, np.ndarray):
|
|
359
|
+
numbers = np.array(list(numbers), dtype=int)
|
|
360
|
+
codons = np.zeros(numbers.shape + (3,), dtype=int)
|
|
361
|
+
for n in (2, 1, 0):
|
|
362
|
+
val = _radix**n
|
|
363
|
+
digit = numbers // val
|
|
364
|
+
codons[..., -(n + 1)] = digit
|
|
365
|
+
numbers = numbers - digit * val
|
|
366
|
+
return codons
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def load(table_name):
|
|
370
|
+
"""
|
|
371
|
+
Load a NCBI codon table.
|
|
372
|
+
|
|
373
|
+
Parameters
|
|
374
|
+
----------
|
|
375
|
+
table_name : str or int
|
|
376
|
+
If a string is given, it is interpreted as official NCBI
|
|
377
|
+
codon table name (e.g. "Vertebrate Mitochondrial").
|
|
378
|
+
An integer is interpreted as NCBI codon table ID.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
table : CodonTable
|
|
383
|
+
The NCBI codon table.
|
|
384
|
+
"""
|
|
385
|
+
# Loads codon tables from codon_tables.txt
|
|
386
|
+
with open(CodonTable._table_file, "r") as f:
|
|
387
|
+
lines = f.read().split("\n")
|
|
388
|
+
|
|
389
|
+
# Extract data for codon table from file
|
|
390
|
+
table_found = False
|
|
391
|
+
aa = None
|
|
392
|
+
init = None
|
|
393
|
+
base1 = None
|
|
394
|
+
base2 = None
|
|
395
|
+
base3 = None
|
|
396
|
+
for line in lines:
|
|
397
|
+
if not line:
|
|
398
|
+
table_found = False
|
|
399
|
+
if isinstance(table_name, Integral) and line.startswith("id"):
|
|
400
|
+
# remove identifier 'id'
|
|
401
|
+
if table_name == int(line[2:]):
|
|
402
|
+
table_found = True
|
|
403
|
+
elif isinstance(table_name, str) and line.startswith("name"):
|
|
404
|
+
# Get list of table names from lines
|
|
405
|
+
# (separated with ';')
|
|
406
|
+
# remove identifier 'name'
|
|
407
|
+
names = [name.strip() for name in line[4:].split(";")]
|
|
408
|
+
if table_name in names:
|
|
409
|
+
table_found = True
|
|
410
|
+
if table_found:
|
|
411
|
+
if line.startswith("AA"):
|
|
412
|
+
# Remove identifier
|
|
413
|
+
aa = line[5:].strip()
|
|
414
|
+
elif line.startswith("Init"):
|
|
415
|
+
init = line[5:].strip()
|
|
416
|
+
elif line.startswith("Base1"):
|
|
417
|
+
base1 = line[5:].strip()
|
|
418
|
+
elif line.startswith("Base2"):
|
|
419
|
+
base2 = line[5:].strip()
|
|
420
|
+
elif line.startswith("Base3"):
|
|
421
|
+
base3 = line[5:].strip()
|
|
422
|
+
|
|
423
|
+
# Create codon table from data
|
|
424
|
+
if (
|
|
425
|
+
aa is not None
|
|
426
|
+
and init is not None
|
|
427
|
+
and base1 is not None
|
|
428
|
+
and base2 is not None
|
|
429
|
+
and base3 is not None
|
|
430
|
+
):
|
|
431
|
+
symbol_dict = {}
|
|
432
|
+
starts = []
|
|
433
|
+
# aa, init and baseX all have the same length
|
|
434
|
+
for i in range(len(aa)):
|
|
435
|
+
codon = base1[i] + base2[i] + base3[i]
|
|
436
|
+
if init[i] == "i":
|
|
437
|
+
starts.append(codon)
|
|
438
|
+
symbol_dict[codon] = aa[i]
|
|
439
|
+
return CodonTable(symbol_dict, starts)
|
|
440
|
+
else:
|
|
441
|
+
raise ValueError(f"Codon table '{table_name}' was not found")
|
|
442
|
+
|
|
443
|
+
@staticmethod
|
|
444
|
+
def table_names():
|
|
445
|
+
"""
|
|
446
|
+
The possible codon table names for :func:`load()`.
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
names : list of str
|
|
451
|
+
List of valid codon table names.
|
|
452
|
+
"""
|
|
453
|
+
with open(CodonTable._table_file, "r") as f:
|
|
454
|
+
lines = f.read().split("\n")
|
|
455
|
+
names = []
|
|
456
|
+
for line in lines:
|
|
457
|
+
if line.startswith("name"):
|
|
458
|
+
names.extend([name.strip() for name in line[4:].split(";")])
|
|
459
|
+
return names
|
|
460
|
+
|
|
461
|
+
@staticmethod
|
|
462
|
+
def default_table():
|
|
463
|
+
"""
|
|
464
|
+
The default codon table.
|
|
465
|
+
The table is equal to the NCBI "Standard" codon table,
|
|
466
|
+
with the difference that only "ATG" is a start codon.
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
table : CodonTable
|
|
471
|
+
The default codon table.
|
|
472
|
+
"""
|
|
473
|
+
return _default_table
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
_default_table = CodonTable.load("Standard").with_start_codons(["ATG"])
|