biotite 1.6.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +426 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +202 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +66 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +224 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +259 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +191 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +127 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +491 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +763 -0
- biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +462 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1596 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cp314-win_amd64.pyd +0 -0
- biotite/structure/charges.pyx +521 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +646 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +426 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2122 -0
- biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +452 -0
- biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.6.0.dist-info/METADATA +162 -0
- biotite-1.6.0.dist-info/RECORD +354 -0
- biotite-1.6.0.dist-info/WHEEL +4 -0
- biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for reading and writing sequence related data.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.sequence.io"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .general import *
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This subpackage is used for reading and writing sequence objects
|
|
7
|
+
using the popular FASTA format.
|
|
8
|
+
|
|
9
|
+
This package contains the :class:`FastaFile`, which provides a
|
|
10
|
+
dictionary like interface to FASTA files, where the header lines are
|
|
11
|
+
keys and the strings containing sequence data are the corresponding
|
|
12
|
+
values.
|
|
13
|
+
|
|
14
|
+
Furthermore, the package contains convenience functions for
|
|
15
|
+
getting/setting directly :class:`Sequence` objects, rather than strings.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
__name__ = "biotite.sequence.io.fasta"
|
|
19
|
+
__author__ = "Patrick Kunzmann"
|
|
20
|
+
|
|
21
|
+
from .convert import *
|
|
22
|
+
from .file import *
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.io.fasta"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
|
|
8
|
+
import functools
|
|
9
|
+
import warnings
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
import numpy as np
|
|
12
|
+
from biotite.sequence.align.alignment import Alignment, get_codes
|
|
13
|
+
from biotite.sequence.alphabet import AlphabetError, LetterAlphabet
|
|
14
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"get_sequence",
|
|
18
|
+
"get_sequences",
|
|
19
|
+
"set_sequence",
|
|
20
|
+
"set_sequences",
|
|
21
|
+
"get_alignment",
|
|
22
|
+
"set_alignment",
|
|
23
|
+
"get_a3m_alignments",
|
|
24
|
+
"set_a3m_alignments",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_sequence(fasta_file, header=None, seq_type=None):
|
|
29
|
+
"""
|
|
30
|
+
Get a sequence from a :class:`FastaFile` instance.
|
|
31
|
+
|
|
32
|
+
The type of sequence is guessed from the sequence string:
|
|
33
|
+
First, a conversion into a :class:`NucleotideSequence` and
|
|
34
|
+
second a conversion into a :class:`ProteinSequence` is tried.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
fasta_file : FastaFile
|
|
39
|
+
The :class:`FastaFile` to be accessed.
|
|
40
|
+
header : str, optional
|
|
41
|
+
The header to get the sequence from. By default, the first
|
|
42
|
+
sequence of the file is returned.
|
|
43
|
+
seq_type : type[Sequence], optional
|
|
44
|
+
The :class:`Sequence` subclass contained in the file.
|
|
45
|
+
If not set, the type is automatically inferred as
|
|
46
|
+
:class:`ProteinSequence` or :class:`NucleotideSequence`.
|
|
47
|
+
For large sequence data it is recommended to set this parameter.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
sequence : NucleotideSequence or ProteinSequence
|
|
52
|
+
The requested sequence in the `FastaFile`.
|
|
53
|
+
:class:`NucleotideSequence` if the sequence string fits the
|
|
54
|
+
corresponding alphabet, :class:`ProteinSequence` otherwise.
|
|
55
|
+
|
|
56
|
+
Raises
|
|
57
|
+
------
|
|
58
|
+
ValueError
|
|
59
|
+
If the sequence data can be neither converted into a
|
|
60
|
+
:class:`NucleotideSequence` nor a :class:`ProteinSequence`.
|
|
61
|
+
"""
|
|
62
|
+
if header is not None:
|
|
63
|
+
seq_str = fasta_file[header]
|
|
64
|
+
else:
|
|
65
|
+
# Return first (and probably only) sequence of file
|
|
66
|
+
seq_str = None
|
|
67
|
+
for seq_str in fasta_file.values():
|
|
68
|
+
break
|
|
69
|
+
if seq_str is None:
|
|
70
|
+
raise ValueError("File does not contain any sequences")
|
|
71
|
+
# Determine the sequence type:
|
|
72
|
+
# If NucleotideSequence can be created it is a DNA sequence,
|
|
73
|
+
# otherwise protein sequence
|
|
74
|
+
return _convert_to_sequence(seq_str, seq_type)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_sequences(fasta_file, seq_type=None):
|
|
78
|
+
"""
|
|
79
|
+
Get dictionary from a :class:`FastaFile` instance,
|
|
80
|
+
where headers are keys and sequences are values.
|
|
81
|
+
|
|
82
|
+
The type of sequence is guessed from the sequence string:
|
|
83
|
+
First, a conversion into a :class:`NucleotideSequence` and
|
|
84
|
+
second a conversion into a :class:`ProteinSequence` is tried.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
fasta_file : FastaFile
|
|
89
|
+
The :class:`FastaFile` to be accessed.
|
|
90
|
+
seq_type : type[Sequence], optional
|
|
91
|
+
The :class:`Sequence` subclass contained in the file.
|
|
92
|
+
If not set, the type is automatically inferred as
|
|
93
|
+
:class:`ProteinSequence` or :class:`NucleotideSequence`.
|
|
94
|
+
For large sequence data it is recommended to set this parameter.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
seq_dict : dict
|
|
99
|
+
A dictionary that maps headers to
|
|
100
|
+
:class:`NucleotideSequence` and/or :class:`ProteinSequence`
|
|
101
|
+
instances as values.
|
|
102
|
+
|
|
103
|
+
Raises
|
|
104
|
+
------
|
|
105
|
+
ValueError
|
|
106
|
+
If at least on of the sequence strings can be neither converted
|
|
107
|
+
into a :class:`NucleotideSequence` nor a
|
|
108
|
+
:class:`ProteinSequence`.
|
|
109
|
+
"""
|
|
110
|
+
seq_dict = OrderedDict()
|
|
111
|
+
for header, seq_str in fasta_file.items():
|
|
112
|
+
seq_dict[header] = _convert_to_sequence(seq_str, seq_type)
|
|
113
|
+
return seq_dict
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def set_sequence(fasta_file, sequence, header=None, as_rna=False):
|
|
117
|
+
"""
|
|
118
|
+
Set a sequence in a :class:`FastaFile` instance.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
fasta_file : FastaFile
|
|
123
|
+
The :class:`FastaFile` to be accessed.
|
|
124
|
+
sequence : Sequence
|
|
125
|
+
The sequence to be set.
|
|
126
|
+
header : str, optional
|
|
127
|
+
The header for the sequence. Default is ``'sequence'``.
|
|
128
|
+
as_rna : bool, optional
|
|
129
|
+
If set to true, ``'T'`` will be replaced by ``'U'``,
|
|
130
|
+
if a :class:`NucleotideSequence` was given.
|
|
131
|
+
|
|
132
|
+
Raises
|
|
133
|
+
------
|
|
134
|
+
ValueError
|
|
135
|
+
If the sequence's alphabet uses symbols other than single
|
|
136
|
+
characters.
|
|
137
|
+
"""
|
|
138
|
+
if header is None:
|
|
139
|
+
header = "sequence"
|
|
140
|
+
fasta_file[header] = _convert_to_string(sequence, as_rna)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def set_sequences(fasta_file, sequence_dict, as_rna=False):
|
|
144
|
+
"""
|
|
145
|
+
Set sequences in a :class:`FastaFile` instance from a dictionary.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
fasta_file : FastaFile
|
|
150
|
+
The :class:`FastaFile` to be accessed.
|
|
151
|
+
sequence_dict : dict
|
|
152
|
+
A dictionary containing the sequences to be set.
|
|
153
|
+
Header are keys, :class:`Sequence` instances are values.
|
|
154
|
+
as_rna : bool, optional
|
|
155
|
+
If set to true, ``'T'`` will be replaced by ``'U'``,
|
|
156
|
+
if a :class:`NucleotideSequence` was given.
|
|
157
|
+
|
|
158
|
+
Raises
|
|
159
|
+
------
|
|
160
|
+
ValueError
|
|
161
|
+
If the sequences alphabets uses symbols other than single
|
|
162
|
+
characters.
|
|
163
|
+
"""
|
|
164
|
+
for header, sequence in sequence_dict.items():
|
|
165
|
+
fasta_file[header] = _convert_to_string(sequence, as_rna)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_alignment(fasta_file, additional_gap_chars=("_",), seq_type=None):
|
|
169
|
+
"""
|
|
170
|
+
Get an alignment from a :class:`FastaFile` instance.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
fasta_file : FastaFile
|
|
175
|
+
The :class:`FastaFile` to be accessed.
|
|
176
|
+
additional_gap_chars : str, optional
|
|
177
|
+
The characters to be treated as gaps.
|
|
178
|
+
seq_type : type[Sequence], optional
|
|
179
|
+
The :class:`Sequence` subclass contained in the file.
|
|
180
|
+
If not set, the type is automatically inferred as
|
|
181
|
+
:class:`ProteinSequence` or :class:`NucleotideSequence`.
|
|
182
|
+
For large sequence data it is recommended to set this parameter.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
alignment : Alignment
|
|
187
|
+
The alignment from the :class:`FastaFile`.
|
|
188
|
+
"""
|
|
189
|
+
seq_strings = list(fasta_file.values())
|
|
190
|
+
# Replace additional gap symbols with default gap symbol ('-')
|
|
191
|
+
for char in additional_gap_chars:
|
|
192
|
+
for i, seq_str in enumerate(seq_strings):
|
|
193
|
+
seq_strings[i] = seq_str.replace(char, "-")
|
|
194
|
+
return Alignment.from_strings(
|
|
195
|
+
seq_strings, functools.partial(_convert_to_sequence, seq_type=seq_type)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def set_alignment(fasta_file, alignment, seq_names):
|
|
200
|
+
"""
|
|
201
|
+
Fill a :class:`FastaFile` with gapped sequence strings from an alignment.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
fasta_file : FastaFile
|
|
206
|
+
The :class:`FastaFile` to be accessed.
|
|
207
|
+
alignment : Alignment
|
|
208
|
+
The alignment to be set.
|
|
209
|
+
seq_names : iterable object of str
|
|
210
|
+
The names for the sequences in the alignment.
|
|
211
|
+
Must have the same length as the sequence count in `alignment`.
|
|
212
|
+
"""
|
|
213
|
+
gapped_seq_strings = alignment.get_gapped_sequences()
|
|
214
|
+
if len(gapped_seq_strings) != len(seq_names):
|
|
215
|
+
raise ValueError(
|
|
216
|
+
f"Alignment has {len(gapped_seq_strings)} sequences, "
|
|
217
|
+
f"but {len(seq_names)} names were given"
|
|
218
|
+
)
|
|
219
|
+
for i in range(len(gapped_seq_strings)):
|
|
220
|
+
fasta_file[seq_names[i]] = gapped_seq_strings[i]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_a3m_alignments(a3m_file, seq_type=None):
|
|
224
|
+
"""
|
|
225
|
+
Get pairwise sequence alignments from an *A3M*-formatted FASTA file.
|
|
226
|
+
|
|
227
|
+
The *i*-th alignment is an alignment of the first sequence in the file (the query)
|
|
228
|
+
to the *i+1*-th sequence in the file (the target).
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
a3m_file : FastaFile
|
|
233
|
+
The A3M file to parse.
|
|
234
|
+
The first sequence (the query) must not contain any deletions or insertions.
|
|
235
|
+
All subsequent sequences indicate insertions and deletions by ``-`` or
|
|
236
|
+
lower case characters, respectively.
|
|
237
|
+
seq_type : type[Sequence], optional
|
|
238
|
+
The :class:`Sequence` subclass contained in the file.
|
|
239
|
+
If not set, the type is automatically inferred as
|
|
240
|
+
:class:`ProteinSequence` or :class:`NucleotideSequence` from the query sequence.
|
|
241
|
+
For large sequence data it is recommended to set this parameter.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
alignments : list of Alignment
|
|
246
|
+
Alignments of all sequences (excluding the query itself) to the query sequence.
|
|
247
|
+
Each alignment is between the query (first element) and each target sequence
|
|
248
|
+
(second element).
|
|
249
|
+
"""
|
|
250
|
+
sequence_iterator = iter(a3m_file.values())
|
|
251
|
+
query_str = next(sequence_iterator)
|
|
252
|
+
query = _convert_to_sequence(query_str, seq_type)
|
|
253
|
+
if isinstance(query, NucleotideSequence):
|
|
254
|
+
factory = _convert_to_nucleotide
|
|
255
|
+
elif isinstance(query, ProteinSequence):
|
|
256
|
+
factory = _convert_to_protein
|
|
257
|
+
else:
|
|
258
|
+
factory = seq_type
|
|
259
|
+
|
|
260
|
+
alignments = []
|
|
261
|
+
for target_str in sequence_iterator:
|
|
262
|
+
# The target sequence provides all information about the alignment
|
|
263
|
+
# - matches/mismatches -> upper case
|
|
264
|
+
# - gaps in query -> lower case
|
|
265
|
+
# - gaps in target -> '-'
|
|
266
|
+
target_byte_array = np.frombuffer(target_str.encode("ASCII"), dtype=np.ubyte)
|
|
267
|
+
query_gaps = _is_lower(target_byte_array)
|
|
268
|
+
target_gaps = _is_gap(target_byte_array)
|
|
269
|
+
|
|
270
|
+
# Start with a trace filled with gaps (-1)
|
|
271
|
+
trace = np.full((len(target_str), 2), -1, dtype=np.int64)
|
|
272
|
+
# Fill the trace with the positions of the query sequence where there is no gap
|
|
273
|
+
trace[~query_gaps, 0] = np.arange(len(query))
|
|
274
|
+
# Do the same for the target sequence, but without the gap indicators
|
|
275
|
+
# but remove the gap indicators from it first to get the actual sequence length
|
|
276
|
+
trace[~target_gaps, 1] = np.arange(np.count_nonzero(~target_gaps))
|
|
277
|
+
|
|
278
|
+
alignments.append(
|
|
279
|
+
Alignment([query, factory(target_str.replace("-", ""))], trace)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return alignments
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def set_a3m_alignments(a3m_file, alignments, query_label, target_labels):
|
|
286
|
+
"""
|
|
287
|
+
Fill a :class:`FastaFile` with *A3M*-formatted alignments.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
a3m_file : FastaFile
|
|
292
|
+
The A3M file to fill.
|
|
293
|
+
alignments : list of Alignment, length=n
|
|
294
|
+
The pairwise alignments to fill the file with.
|
|
295
|
+
The first sequence of each alignment must always be the same
|
|
296
|
+
and will become the first sequence (the query) in the file.
|
|
297
|
+
query_label : str
|
|
298
|
+
The label for the query sequence.
|
|
299
|
+
target_labels : iterable object of str, length=n
|
|
300
|
+
The labels for the target sequences in the alignment.
|
|
301
|
+
"""
|
|
302
|
+
query = alignments[0].sequences[0]
|
|
303
|
+
a3m_file[query_label] = _convert_to_string(query, as_rna=False)
|
|
304
|
+
|
|
305
|
+
for alignment, name in zip(alignments, target_labels, strict=True):
|
|
306
|
+
if len(alignment.sequences) != 2:
|
|
307
|
+
raise ValueError("Each alignment must be pairwise")
|
|
308
|
+
if alignment.sequences[0] != query:
|
|
309
|
+
raise ValueError(
|
|
310
|
+
"The first sequence of each alignment must be the same as the query"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
alignment = _as_global(alignment)
|
|
314
|
+
|
|
315
|
+
code = get_codes(alignment)
|
|
316
|
+
query_code = code[0]
|
|
317
|
+
target_code = code[1]
|
|
318
|
+
query_gaps = query_code == -1
|
|
319
|
+
target_gaps = target_code == -1
|
|
320
|
+
match_mask = ~query_gaps & ~target_gaps
|
|
321
|
+
|
|
322
|
+
a3m_string_array = np.zeros(len(query_code), dtype="S1")
|
|
323
|
+
# Indicate gaps in the target sequence with '-'
|
|
324
|
+
a3m_string_array[target_gaps] = "-"
|
|
325
|
+
# Keep gaps in the query sequence as lower case letters
|
|
326
|
+
a3m_string_array[query_gaps] = np.char.lower(
|
|
327
|
+
query.alphabet.decode_multiple(target_code[query_gaps], as_bytes=True)
|
|
328
|
+
)
|
|
329
|
+
# Matches/mismatches are indicated with upper case letters
|
|
330
|
+
a3m_string_array[match_mask] = query.alphabet.decode_multiple(
|
|
331
|
+
target_code[match_mask], as_bytes=True
|
|
332
|
+
)
|
|
333
|
+
a3m_file[name] = a3m_string_array.tobytes().decode("ASCII")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _convert_to_sequence(seq_str, seq_type=None):
|
|
337
|
+
# Set manually selected sequence type
|
|
338
|
+
if seq_type is not None:
|
|
339
|
+
# Do preprocessing as done without manual selection
|
|
340
|
+
if seq_type == NucleotideSequence:
|
|
341
|
+
return _convert_to_nucleotide(seq_str)
|
|
342
|
+
elif seq_type == ProteinSequence:
|
|
343
|
+
if "U" in seq_str:
|
|
344
|
+
warnings.warn(
|
|
345
|
+
"ProteinSequence objects do not support selenocysteine "
|
|
346
|
+
"(U), occurrences were substituted by cysteine (C)"
|
|
347
|
+
)
|
|
348
|
+
return _convert_to_protein(seq_str)
|
|
349
|
+
else:
|
|
350
|
+
return seq_type(seq_str)
|
|
351
|
+
|
|
352
|
+
# Attempt to automatically determine sequence type
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
return _convert_to_nucleotide(seq_str)
|
|
356
|
+
except AlphabetError:
|
|
357
|
+
pass
|
|
358
|
+
try:
|
|
359
|
+
prot_seq = _convert_to_protein(seq_str)
|
|
360
|
+
# Raise Warning after conversion into 'ProteinSequence'
|
|
361
|
+
# to wait for potential 'AlphabetError'
|
|
362
|
+
if "U" in seq_str:
|
|
363
|
+
warnings.warn(
|
|
364
|
+
"ProteinSequence objects do not support selenocysteine (U), "
|
|
365
|
+
"occurrences were substituted by cysteine (C)"
|
|
366
|
+
)
|
|
367
|
+
return prot_seq
|
|
368
|
+
except AlphabetError:
|
|
369
|
+
raise ValueError(
|
|
370
|
+
"FASTA data cannot be converted either to "
|
|
371
|
+
"'NucleotideSequence' nor to 'ProteinSequence'"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _convert_to_protein(seq_str):
|
|
376
|
+
"""
|
|
377
|
+
Replace selenocysteine with cysteine and pyrrolysine with lysine.
|
|
378
|
+
"""
|
|
379
|
+
return ProteinSequence(seq_str.upper().replace("U", "C").replace("O", "K"))
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _convert_to_nucleotide(seq_str):
|
|
383
|
+
"""
|
|
384
|
+
For nucleotides uracil is represented by thymine and there is only
|
|
385
|
+
one letter for completely unknown nucleotides
|
|
386
|
+
"""
|
|
387
|
+
return NucleotideSequence(seq_str.upper().replace("U", "T").replace("X", "N"))
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _convert_to_string(sequence, as_rna):
|
|
391
|
+
if not isinstance(sequence.get_alphabet(), LetterAlphabet):
|
|
392
|
+
raise ValueError(
|
|
393
|
+
"Only sequences using single letter alphabets can be stored in a FASTA file"
|
|
394
|
+
)
|
|
395
|
+
if isinstance(sequence, NucleotideSequence) and as_rna:
|
|
396
|
+
return str(sequence).replace("T", "U")
|
|
397
|
+
else:
|
|
398
|
+
return str(sequence)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _as_global(alignment):
|
|
402
|
+
"""
|
|
403
|
+
Convert a semi-global alignment into a global alignment.
|
|
404
|
+
|
|
405
|
+
A semi-global alignment is an alignment, where alignment columns for terminal
|
|
406
|
+
gaps are not included.
|
|
407
|
+
"""
|
|
408
|
+
trace = alignment.trace
|
|
409
|
+
sequence_lengths = np.array([len(sequence) for sequence in alignment.sequences])
|
|
410
|
+
|
|
411
|
+
start_positions = []
|
|
412
|
+
end_positions = []
|
|
413
|
+
for i in range(trace.shape[1]):
|
|
414
|
+
trace_for_seq = trace[:, i]
|
|
415
|
+
trace_wo_gaps = trace_for_seq[trace_for_seq != -1]
|
|
416
|
+
start_positions.append(trace_wo_gaps[0])
|
|
417
|
+
end_positions.append(trace_wo_gaps[-1])
|
|
418
|
+
start_positions = np.array(start_positions)
|
|
419
|
+
end_positions = np.array(end_positions)
|
|
420
|
+
if (
|
|
421
|
+
np.count_nonzero(start_positions != 0) > 1
|
|
422
|
+
or np.count_nonzero(end_positions != sequence_lengths - 1) > 1
|
|
423
|
+
):
|
|
424
|
+
# If multiple sequences do not run from beginning to end,
|
|
425
|
+
# the alignment is not semi-global, but local
|
|
426
|
+
raise ValueError("Alignment is local, but a semi-global alignment is required")
|
|
427
|
+
|
|
428
|
+
trace_parts = [trace]
|
|
429
|
+
if not (start_positions == 0).all():
|
|
430
|
+
# We need to add a prefix to the alignment, which has gaps for all sequences
|
|
431
|
+
# except for one
|
|
432
|
+
seq_index_with_missing_start = np.where(start_positions != 0)[0][0]
|
|
433
|
+
trace_prefix = np.full(
|
|
434
|
+
(start_positions[seq_index_with_missing_start], trace.shape[1]),
|
|
435
|
+
-1,
|
|
436
|
+
dtype=int,
|
|
437
|
+
)
|
|
438
|
+
trace_prefix[:, seq_index_with_missing_start] = np.arange(len(trace_prefix))
|
|
439
|
+
trace_parts.insert(0, trace_prefix)
|
|
440
|
+
if not (end_positions == sequence_lengths).all():
|
|
441
|
+
# The same needs to be done for the end of the alignment
|
|
442
|
+
seq_index_with_missing_end = np.where(end_positions != sequence_lengths)[0][0]
|
|
443
|
+
end_position = end_positions[seq_index_with_missing_end]
|
|
444
|
+
seq_length = sequence_lengths[seq_index_with_missing_end]
|
|
445
|
+
trace_suffix = np.full(
|
|
446
|
+
(seq_length - end_position - 1, trace.shape[1]), -1, dtype=int
|
|
447
|
+
)
|
|
448
|
+
trace_suffix[:, seq_index_with_missing_end] = np.arange(
|
|
449
|
+
end_position + 1, end_position + 1 + len(trace_suffix)
|
|
450
|
+
)
|
|
451
|
+
trace_parts.append(trace_suffix)
|
|
452
|
+
|
|
453
|
+
trace = np.concatenate(trace_parts, axis=0)
|
|
454
|
+
return Alignment(alignment.sequences, trace)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def _is_lower(characters):
|
|
458
|
+
return (characters >= ord("a")) & (characters <= ord("z"))
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _is_gap(characters):
|
|
462
|
+
return characters == ord("-")
|