biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Alphabet",
|
|
9
|
+
"LetterAlphabet",
|
|
10
|
+
"AlphabetMapper",
|
|
11
|
+
"AlphabetError",
|
|
12
|
+
"common_alphabet",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
import string
|
|
16
|
+
from numbers import Integral
|
|
17
|
+
import numpy as np
|
|
18
|
+
from biotite.sequence.codec import decode_to_chars, encode_chars, map_sequence_code
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Alphabet(object):
|
|
22
|
+
"""
|
|
23
|
+
This class defines the allowed symbols for a :class:`Sequence` and
|
|
24
|
+
handles the encoding/decoding between symbols and symbol codes.
|
|
25
|
+
|
|
26
|
+
An :class:`Alphabet` is created with the list of symbols, that can
|
|
27
|
+
be used in this context.
|
|
28
|
+
In most cases a symbol will be simply a letter, hence a string of
|
|
29
|
+
length 1. But in principle every hashable Python object can serve
|
|
30
|
+
as symbol.
|
|
31
|
+
|
|
32
|
+
The encoding of a symbol into a symbol code is
|
|
33
|
+
done in the following way: Find the first index in the symbol list,
|
|
34
|
+
where the list element equals the symbol. This index is the
|
|
35
|
+
symbol code. If the symbol is not found in the list, an
|
|
36
|
+
:class:`AlphabetError` is raised.
|
|
37
|
+
|
|
38
|
+
Internally, a dictionary is used for encoding, with symbols as keys
|
|
39
|
+
and symbol codes as values. Therefore, every symbol must be
|
|
40
|
+
hashable. For decoding the symbol list is indexed with the symbol
|
|
41
|
+
code.
|
|
42
|
+
|
|
43
|
+
If an alphabet *1* contains the same symbols and the same
|
|
44
|
+
symbol-code-mappings like another alphabet *2*, but alphabet *1*
|
|
45
|
+
introduces also new symbols, then alphabet *1* *extends* alphabet
|
|
46
|
+
*2*.
|
|
47
|
+
Per definition, every alphabet also extends itself.
|
|
48
|
+
|
|
49
|
+
Objects of this class are immutable.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
symbols : iterable object
|
|
54
|
+
The symbols, that are allowed in this alphabet. The
|
|
55
|
+
corresponding code for a symbol, is the index of that symbol
|
|
56
|
+
in this list.
|
|
57
|
+
|
|
58
|
+
Examples
|
|
59
|
+
--------
|
|
60
|
+
Create an Alphabet containing DNA letters and encode/decode a
|
|
61
|
+
letter/code:
|
|
62
|
+
|
|
63
|
+
>>> alph = Alphabet(["A","C","G","T"])
|
|
64
|
+
>>> print(alph.encode("G"))
|
|
65
|
+
2
|
|
66
|
+
>>> print(alph.decode(2))
|
|
67
|
+
G
|
|
68
|
+
>>> try:
|
|
69
|
+
... alph.encode("foo")
|
|
70
|
+
... except Exception as e:
|
|
71
|
+
... print(e)
|
|
72
|
+
Symbol 'foo' is not in the alphabet
|
|
73
|
+
|
|
74
|
+
Create an Alphabet of arbitrary objects:
|
|
75
|
+
|
|
76
|
+
>>> alph = Alphabet(["foo", 42, (1,2,3), 5, 3.141])
|
|
77
|
+
>>> print(alph.encode((1,2,3)))
|
|
78
|
+
2
|
|
79
|
+
>>> print(alph.decode(4))
|
|
80
|
+
3.141
|
|
81
|
+
|
|
82
|
+
On the subject of alphabet extension:
|
|
83
|
+
An alphabet always extends itself.
|
|
84
|
+
|
|
85
|
+
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T"]))
|
|
86
|
+
True
|
|
87
|
+
|
|
88
|
+
An alphabet extends an alphabet when it contains additional symbols...
|
|
89
|
+
|
|
90
|
+
>>> Alphabet(["A","C","G","T","U"]).extends(Alphabet(["A","C","G","T"]))
|
|
91
|
+
True
|
|
92
|
+
|
|
93
|
+
...but not vice versa
|
|
94
|
+
|
|
95
|
+
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T","U"]))
|
|
96
|
+
False
|
|
97
|
+
|
|
98
|
+
Two alphabets with same symbols but different symbol-code-mappings
|
|
99
|
+
|
|
100
|
+
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
|
|
101
|
+
False
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(self, symbols):
|
|
105
|
+
if len(symbols) == 0:
|
|
106
|
+
raise ValueError("Symbol list is empty")
|
|
107
|
+
self._symbols = tuple(symbols)
|
|
108
|
+
self._symbol_dict = {}
|
|
109
|
+
for i, symbol in enumerate(symbols):
|
|
110
|
+
self._symbol_dict[symbol] = i
|
|
111
|
+
|
|
112
|
+
def __repr__(self):
|
|
113
|
+
"""Represent Alphabet as a string for debugging."""
|
|
114
|
+
return f"Alphabet({self._symbols})"
|
|
115
|
+
|
|
116
|
+
def get_symbols(self):
|
|
117
|
+
"""
|
|
118
|
+
Get the symbols in the alphabet.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
symbols : tuple
|
|
123
|
+
The symbols.
|
|
124
|
+
"""
|
|
125
|
+
return self._symbols
|
|
126
|
+
|
|
127
|
+
def extends(self, alphabet):
|
|
128
|
+
"""
|
|
129
|
+
Check, if this alphabet extends another alphabet.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
alphabet : Alphabet
|
|
134
|
+
The potential parent alphabet.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
result : bool
|
|
139
|
+
True, if this object extends `alphabet`, false otherwise.
|
|
140
|
+
"""
|
|
141
|
+
if alphabet is self:
|
|
142
|
+
return True
|
|
143
|
+
elif len(alphabet) > len(self):
|
|
144
|
+
return False
|
|
145
|
+
else:
|
|
146
|
+
return alphabet.get_symbols() == self.get_symbols()[: len(alphabet)]
|
|
147
|
+
|
|
148
|
+
def encode(self, symbol):
|
|
149
|
+
"""
|
|
150
|
+
Use the alphabet to encode a symbol.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
symbol : object
|
|
155
|
+
The object to encode into a symbol code.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
code : int
|
|
160
|
+
The symbol code of `symbol`.
|
|
161
|
+
|
|
162
|
+
Raises
|
|
163
|
+
------
|
|
164
|
+
AlphabetError
|
|
165
|
+
If `symbol` is not in the alphabet.
|
|
166
|
+
"""
|
|
167
|
+
try:
|
|
168
|
+
return self._symbol_dict[symbol]
|
|
169
|
+
except KeyError:
|
|
170
|
+
raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
|
|
171
|
+
|
|
172
|
+
def decode(self, code):
|
|
173
|
+
"""
|
|
174
|
+
Use the alphabet to decode a symbol code.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
code : int
|
|
179
|
+
The symbol code to be decoded.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
symbol : object
|
|
184
|
+
The symbol corresponding to `code`.
|
|
185
|
+
|
|
186
|
+
Raises
|
|
187
|
+
------
|
|
188
|
+
AlphabetError
|
|
189
|
+
If `code` is not a valid code in the alphabet.
|
|
190
|
+
"""
|
|
191
|
+
if code < 0 or code >= len(self._symbols):
|
|
192
|
+
raise AlphabetError(f"'{code:d}' is not a valid code")
|
|
193
|
+
return self._symbols[code]
|
|
194
|
+
|
|
195
|
+
def encode_multiple(self, symbols, dtype=np.int64):
|
|
196
|
+
"""
|
|
197
|
+
Encode a list of symbols.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
symbols : array-like
|
|
202
|
+
The symbols to encode.
|
|
203
|
+
dtype : dtype, optional
|
|
204
|
+
The dtype of the output ndarray.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
code : ndarray
|
|
209
|
+
The sequence code.
|
|
210
|
+
"""
|
|
211
|
+
return np.array([self.encode(e) for e in symbols], dtype=dtype)
|
|
212
|
+
|
|
213
|
+
def decode_multiple(self, code):
|
|
214
|
+
"""
|
|
215
|
+
Decode a sequence code into a list of symbols.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
code : ndarray
|
|
220
|
+
The sequence code to decode.
|
|
221
|
+
|
|
222
|
+
Returns
|
|
223
|
+
-------
|
|
224
|
+
symbols : list
|
|
225
|
+
The decoded list of symbols.
|
|
226
|
+
"""
|
|
227
|
+
return [self.decode(c) for c in code]
|
|
228
|
+
|
|
229
|
+
def is_letter_alphabet(self):
|
|
230
|
+
"""
|
|
231
|
+
Check whether the symbols in this alphabet are single printable
|
|
232
|
+
letters.
|
|
233
|
+
If so, the alphabet could be expressed by a `LetterAlphabet`.
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
is_letter_alphabet : bool
|
|
238
|
+
True, if all symbols in the alphabet are 'str' or 'bytes',
|
|
239
|
+
have length 1 and are printable.
|
|
240
|
+
"""
|
|
241
|
+
for symbol in self:
|
|
242
|
+
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
243
|
+
return False
|
|
244
|
+
if isinstance(symbol, str):
|
|
245
|
+
symbol = symbol.encode("ASCII")
|
|
246
|
+
if symbol not in LetterAlphabet.PRINTABLES:
|
|
247
|
+
return False
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
def __str__(self):
|
|
251
|
+
return str(self.get_symbols())
|
|
252
|
+
|
|
253
|
+
def __len__(self):
|
|
254
|
+
return len(self.get_symbols())
|
|
255
|
+
|
|
256
|
+
def __iter__(self):
|
|
257
|
+
return self.get_symbols().__iter__()
|
|
258
|
+
|
|
259
|
+
def __contains__(self, symbol):
|
|
260
|
+
return symbol in self.get_symbols()
|
|
261
|
+
|
|
262
|
+
def __hash__(self):
|
|
263
|
+
symbols = self.get_symbols()
|
|
264
|
+
if isinstance(symbols, tuple):
|
|
265
|
+
return hash(symbols)
|
|
266
|
+
else:
|
|
267
|
+
return hash(tuple(symbols))
|
|
268
|
+
|
|
269
|
+
def __eq__(self, item):
|
|
270
|
+
if item is self:
|
|
271
|
+
return True
|
|
272
|
+
if not isinstance(item, Alphabet):
|
|
273
|
+
return False
|
|
274
|
+
return self.get_symbols() == item.get_symbols()
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class LetterAlphabet(Alphabet):
|
|
278
|
+
"""
|
|
279
|
+
:class:`LetterAlphabet` is a an :class:`Alphabet` subclass
|
|
280
|
+
specialized for letter based alphabets, like DNA or protein
|
|
281
|
+
sequence alphabets.
|
|
282
|
+
The alphabet size is limited to the 94 printable, non-whitespace
|
|
283
|
+
characters.
|
|
284
|
+
Internally the symbols are saved as `bytes` objects.
|
|
285
|
+
The encoding and decoding process is a lot faster than for a
|
|
286
|
+
normal :class:`Alphabet`.
|
|
287
|
+
|
|
288
|
+
The performance gain comes through the use of *NumPy* and *Cython*
|
|
289
|
+
for encoding and decoding, without the need of a dictionary.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
symbols : iterable object or str or bytes
|
|
294
|
+
The symbols, that are allowed in this alphabet. The
|
|
295
|
+
corresponding code for a symbol, is the index of that symbol
|
|
296
|
+
in this list.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
PRINTABLES = (string.digits + string.ascii_letters + string.punctuation).encode(
|
|
300
|
+
"ASCII"
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
def __init__(self, symbols):
|
|
304
|
+
if len(symbols) == 0:
|
|
305
|
+
raise ValueError("Symbol list is empty")
|
|
306
|
+
self._symbols = []
|
|
307
|
+
for symbol in symbols:
|
|
308
|
+
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
309
|
+
raise ValueError(f"Symbol '{symbol}' is not a single letter")
|
|
310
|
+
if isinstance(symbol, str):
|
|
311
|
+
symbol = symbol.encode("ASCII")
|
|
312
|
+
if symbol not in LetterAlphabet.PRINTABLES:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"Symbol {repr(symbol)} is not printable or whitespace"
|
|
315
|
+
)
|
|
316
|
+
self._symbols.append(symbol)
|
|
317
|
+
# Direct 'astype' conversion is not allowed by numpy
|
|
318
|
+
# -> frombuffer()
|
|
319
|
+
self._symbols = np.frombuffer(
|
|
320
|
+
np.array(self._symbols, dtype="|S1"), dtype=np.ubyte
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def __repr__(self):
|
|
324
|
+
"""Represent LetterAlphabet as a string for debugging."""
|
|
325
|
+
return f"LetterAlphabet({self.get_symbols()})"
|
|
326
|
+
|
|
327
|
+
def extends(self, alphabet):
|
|
328
|
+
if alphabet is self:
|
|
329
|
+
return True
|
|
330
|
+
elif isinstance(alphabet, LetterAlphabet):
|
|
331
|
+
if len(alphabet._symbols) > len(self._symbols):
|
|
332
|
+
return False
|
|
333
|
+
return np.all(alphabet._symbols == self._symbols[: len(alphabet._symbols)])
|
|
334
|
+
else:
|
|
335
|
+
return super().extends(alphabet)
|
|
336
|
+
|
|
337
|
+
def get_symbols(self):
|
|
338
|
+
return tuple([symbol.decode("ASCII") for symbol in self._symbols_as_bytes()])
|
|
339
|
+
|
|
340
|
+
def encode(self, symbol):
|
|
341
|
+
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
342
|
+
raise AlphabetError(f"Symbol '{symbol}' is not a single letter")
|
|
343
|
+
indices = np.where(self._symbols == ord(symbol))[0]
|
|
344
|
+
if len(indices) == 0:
|
|
345
|
+
raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
|
|
346
|
+
return indices[0].item()
|
|
347
|
+
|
|
348
|
+
def decode(self, code, as_bytes=False):
|
|
349
|
+
if code < 0 or code >= len(self._symbols):
|
|
350
|
+
raise AlphabetError(f"'{code:d}' is not a valid code")
|
|
351
|
+
return chr(self._symbols[code])
|
|
352
|
+
|
|
353
|
+
def encode_multiple(self, symbols, dtype=None):
|
|
354
|
+
"""
|
|
355
|
+
Encode multiple symbols.
|
|
356
|
+
|
|
357
|
+
Parameters
|
|
358
|
+
----------
|
|
359
|
+
symbols : iterable object or str or bytes
|
|
360
|
+
The symbols to encode. The method is fastest when a
|
|
361
|
+
:class:`ndarray`, :class:`str` or :class:`bytes` object
|
|
362
|
+
containing the symbols is provided, instead of e.g. a list.
|
|
363
|
+
dtype : dtype, optional
|
|
364
|
+
For compatibility with superclass. The value is ignored.
|
|
365
|
+
|
|
366
|
+
Returns
|
|
367
|
+
-------
|
|
368
|
+
code : ndarray
|
|
369
|
+
The sequence code.
|
|
370
|
+
"""
|
|
371
|
+
if isinstance(symbols, str):
|
|
372
|
+
symbols = np.frombuffer(symbols.encode("ASCII"), dtype=np.ubyte)
|
|
373
|
+
elif isinstance(symbols, bytes):
|
|
374
|
+
symbols = np.frombuffer(symbols, dtype=np.ubyte)
|
|
375
|
+
elif isinstance(symbols, np.ndarray):
|
|
376
|
+
symbols = np.frombuffer(symbols.astype(dtype="|S1"), dtype=np.ubyte)
|
|
377
|
+
else:
|
|
378
|
+
symbols = np.frombuffer(
|
|
379
|
+
np.array(list(symbols), dtype="|S1"), dtype=np.ubyte
|
|
380
|
+
)
|
|
381
|
+
return encode_chars(alphabet=self._symbols, symbols=symbols)
|
|
382
|
+
|
|
383
|
+
def decode_multiple(self, code, as_bytes=False):
|
|
384
|
+
"""
|
|
385
|
+
Decode a sequence code into a list of symbols.
|
|
386
|
+
|
|
387
|
+
Parameters
|
|
388
|
+
----------
|
|
389
|
+
code : ndarray, dtype=uint8
|
|
390
|
+
The sequence code to decode.
|
|
391
|
+
Works fastest if a :class:`ndarray` is provided.
|
|
392
|
+
as_bytes : bool, optional
|
|
393
|
+
If true, the output array will contain `bytes`
|
|
394
|
+
(dtype 'S1').
|
|
395
|
+
Otherwise, the the output array will contain `str`
|
|
396
|
+
(dtype 'U1').
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
symbols : ndarray, dtype='U1' or dtype='S1'
|
|
401
|
+
The decoded list of symbols.
|
|
402
|
+
"""
|
|
403
|
+
if not isinstance(code, np.ndarray):
|
|
404
|
+
code = np.array(code, dtype=np.uint8)
|
|
405
|
+
code = code.astype(np.uint8, copy=False)
|
|
406
|
+
symbols = decode_to_chars(alphabet=self._symbols, code=code)
|
|
407
|
+
# Symbols must be convverted from 'np.ubyte' to '|S1'
|
|
408
|
+
symbols = np.frombuffer(symbols, dtype="|S1")
|
|
409
|
+
if not as_bytes:
|
|
410
|
+
symbols = symbols.astype("U1")
|
|
411
|
+
return symbols
|
|
412
|
+
|
|
413
|
+
def is_letter_alphabet(self):
|
|
414
|
+
return True
|
|
415
|
+
|
|
416
|
+
def __contains__(self, symbol):
|
|
417
|
+
if not isinstance(symbol, (str, bytes)):
|
|
418
|
+
return False
|
|
419
|
+
return ord(symbol) in self._symbols
|
|
420
|
+
|
|
421
|
+
def __len__(self):
|
|
422
|
+
return len(self._symbols)
|
|
423
|
+
|
|
424
|
+
def _symbols_as_bytes(self):
|
|
425
|
+
"Properly convert from dtype 'np.ubyte' to '|S1'"
|
|
426
|
+
return np.frombuffer(self._symbols, dtype="|S1")
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
class AlphabetMapper(object):
|
|
430
|
+
"""
|
|
431
|
+
This class is used for symbol code conversion from a source
|
|
432
|
+
alphabet into a target alphabet.
|
|
433
|
+
|
|
434
|
+
This means that the symbol codes are converted from one to another
|
|
435
|
+
alphabet so that the symbol itself is preserved.
|
|
436
|
+
This class works for single symbol codes or an entire sequence code
|
|
437
|
+
likewise.
|
|
438
|
+
|
|
439
|
+
Parameters
|
|
440
|
+
----------
|
|
441
|
+
source_alphabet, target_alphabet : Alphabet
|
|
442
|
+
The codes are converted from the source alphabet into the
|
|
443
|
+
target alphabet.
|
|
444
|
+
The target alphabet must contain at least all symbols of the
|
|
445
|
+
source alphabet, but it is not required that the shared symbols
|
|
446
|
+
are in the same order.
|
|
447
|
+
|
|
448
|
+
Examples
|
|
449
|
+
--------
|
|
450
|
+
|
|
451
|
+
>>> source_alph = Alphabet(["A","C","G","T"])
|
|
452
|
+
>>> target_alph = Alphabet(["T","U","A","G","C"])
|
|
453
|
+
>>> mapper = AlphabetMapper(source_alph, target_alph)
|
|
454
|
+
>>> print(mapper[0])
|
|
455
|
+
2
|
|
456
|
+
>>> print(mapper[1])
|
|
457
|
+
4
|
|
458
|
+
>>> print(mapper[[1,1,3]])
|
|
459
|
+
[4 4 0]
|
|
460
|
+
>>> in_sequence = GeneralSequence(source_alph, "GCCTAT")
|
|
461
|
+
>>> print(in_sequence.code)
|
|
462
|
+
[2 1 1 3 0 3]
|
|
463
|
+
>>> print("".join(in_sequence.symbols))
|
|
464
|
+
GCCTAT
|
|
465
|
+
>>> out_sequence = GeneralSequence(target_alph)
|
|
466
|
+
>>> out_sequence.code = mapper[in_sequence.code]
|
|
467
|
+
>>> print(out_sequence.code)
|
|
468
|
+
[3 4 4 0 2 0]
|
|
469
|
+
>>> print("".join(out_sequence.symbols))
|
|
470
|
+
GCCTAT
|
|
471
|
+
"""
|
|
472
|
+
|
|
473
|
+
def __init__(self, source_alphabet, target_alphabet):
|
|
474
|
+
if target_alphabet.extends(source_alphabet):
|
|
475
|
+
self._necessary_mapping = False
|
|
476
|
+
else:
|
|
477
|
+
self._necessary_mapping = True
|
|
478
|
+
self._mapper = np.zeros(
|
|
479
|
+
len(source_alphabet), dtype=AlphabetMapper._dtype(len(target_alphabet))
|
|
480
|
+
)
|
|
481
|
+
for old_code in range(len(source_alphabet)):
|
|
482
|
+
symbol = source_alphabet.decode(old_code)
|
|
483
|
+
new_code = target_alphabet.encode(symbol)
|
|
484
|
+
self._mapper[old_code] = new_code
|
|
485
|
+
|
|
486
|
+
def __getitem__(self, code):
|
|
487
|
+
if isinstance(code, Integral):
|
|
488
|
+
if self._necessary_mapping:
|
|
489
|
+
return self._mapper[code]
|
|
490
|
+
else:
|
|
491
|
+
return code
|
|
492
|
+
if not isinstance(code, np.ndarray) or code.dtype not in (
|
|
493
|
+
np.uint8,
|
|
494
|
+
np.uint16,
|
|
495
|
+
np.uint32,
|
|
496
|
+
np.uint64,
|
|
497
|
+
):
|
|
498
|
+
code = np.array(code, dtype=np.uint64)
|
|
499
|
+
if self._necessary_mapping:
|
|
500
|
+
mapped_code = np.empty(len(code), dtype=self._mapper.dtype)
|
|
501
|
+
map_sequence_code(self._mapper, code, mapped_code)
|
|
502
|
+
return mapped_code
|
|
503
|
+
else:
|
|
504
|
+
return code
|
|
505
|
+
|
|
506
|
+
@staticmethod
|
|
507
|
+
def _dtype(alphabet_size):
|
|
508
|
+
_size_uint8 = np.iinfo(np.uint8).max + 1
|
|
509
|
+
_size_uint16 = np.iinfo(np.uint16).max + 1
|
|
510
|
+
_size_uint32 = np.iinfo(np.uint32).max + 1
|
|
511
|
+
if alphabet_size <= _size_uint8:
|
|
512
|
+
return np.uint8
|
|
513
|
+
elif alphabet_size <= _size_uint16:
|
|
514
|
+
return np.uint16
|
|
515
|
+
elif alphabet_size <= _size_uint32:
|
|
516
|
+
return np.uint32
|
|
517
|
+
else:
|
|
518
|
+
return np.uint64
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class AlphabetError(Exception):
|
|
522
|
+
"""
|
|
523
|
+
This exception is raised, when a code or a symbol is not in an
|
|
524
|
+
:class:`Alphabet`.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
pass
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def common_alphabet(alphabets):
|
|
531
|
+
"""
|
|
532
|
+
Determine the alphabet from a list of alphabets, that
|
|
533
|
+
extends all alphabets.
|
|
534
|
+
|
|
535
|
+
Parameters
|
|
536
|
+
----------
|
|
537
|
+
alphabets : iterable of Alphabet
|
|
538
|
+
The alphabets from which the common one should be identified.
|
|
539
|
+
|
|
540
|
+
Returns
|
|
541
|
+
-------
|
|
542
|
+
common_alphabet : Alphabet or None
|
|
543
|
+
The alphabet from `alphabets` that extends all alphabets.
|
|
544
|
+
``None`` if no such common alphabet exists.
|
|
545
|
+
"""
|
|
546
|
+
common_alphabet = None
|
|
547
|
+
for alphabet in alphabets:
|
|
548
|
+
if common_alphabet is None:
|
|
549
|
+
common_alphabet = alphabet
|
|
550
|
+
elif not common_alphabet.extends(alphabet):
|
|
551
|
+
if alphabet.extends(common_alphabet):
|
|
552
|
+
common_alphabet = alphabet
|
|
553
|
+
else:
|
|
554
|
+
return None
|
|
555
|
+
return common_alphabet
|