biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Functions for converting an annotation from/to a GenBank file.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.sequence.io.genbank"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["get_annotation", "set_annotation"]
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
import warnings
|
|
15
|
+
from biotite.file import InvalidFileError
|
|
16
|
+
from biotite.sequence.annotation import Annotation, Feature, Location
|
|
17
|
+
|
|
18
|
+
_KEY_START = 5
|
|
19
|
+
_QUAL_START = 21
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_annotation(gb_file, include_only=None):
|
|
23
|
+
"""
|
|
24
|
+
Get the sequence annotation from the *FEATURES* field of a
|
|
25
|
+
GenBank file.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
gb_file : GenBankFile
|
|
30
|
+
The GenBank file to read the *FEATURES* field from.
|
|
31
|
+
include_only : iterable object of str, optional
|
|
32
|
+
List of names of feature keys, which should included
|
|
33
|
+
in the annotation. By default all features are included.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
annotation : Annotation
|
|
38
|
+
Sequence annotation from the file.
|
|
39
|
+
"""
|
|
40
|
+
fields = gb_file.get_fields("FEATURES")
|
|
41
|
+
if len(fields) == 0:
|
|
42
|
+
raise InvalidFileError("File has no 'FEATURES' field")
|
|
43
|
+
if len(fields) > 1:
|
|
44
|
+
raise InvalidFileError("File has multiple 'FEATURES' fields")
|
|
45
|
+
lines, _ = fields[0]
|
|
46
|
+
|
|
47
|
+
### Parse all lines to create an index of features,
|
|
48
|
+
# i.e. pairs of the feature key
|
|
49
|
+
# and the text belonging to the respective feature
|
|
50
|
+
feature_list = []
|
|
51
|
+
feature_key = None
|
|
52
|
+
feature_value = ""
|
|
53
|
+
for line in lines:
|
|
54
|
+
# Check if line contains feature key
|
|
55
|
+
if line[_KEY_START] != " ":
|
|
56
|
+
if feature_key is not None:
|
|
57
|
+
# Store old feature key and value
|
|
58
|
+
feature_list.append((feature_key, feature_value))
|
|
59
|
+
# Track new key
|
|
60
|
+
feature_key = line[_KEY_START : _QUAL_START - 1].strip()
|
|
61
|
+
feature_value = ""
|
|
62
|
+
feature_value += line[_QUAL_START:] + " "
|
|
63
|
+
# Store last feature key and value (loop already exited)
|
|
64
|
+
feature_list.append((feature_key, feature_value))
|
|
65
|
+
|
|
66
|
+
### Process only relevant features and put them into an Annotation
|
|
67
|
+
annotation = Annotation()
|
|
68
|
+
# Regex to separate qualifiers from each other
|
|
69
|
+
regex = re.compile(r"""(".*?"|/.*?=)""")
|
|
70
|
+
for key, val in feature_list:
|
|
71
|
+
if include_only is None or key in include_only:
|
|
72
|
+
qual_dict = {}
|
|
73
|
+
|
|
74
|
+
# Split feature definition into parts
|
|
75
|
+
# e.g.
|
|
76
|
+
#
|
|
77
|
+
# 1..12
|
|
78
|
+
# /gene="abcA"
|
|
79
|
+
# /product="AbcA"
|
|
80
|
+
#
|
|
81
|
+
# becomes
|
|
82
|
+
#
|
|
83
|
+
# ['1..12', '/gene=', '"abcA"', '/product=', '"AbcA"']
|
|
84
|
+
qualifier_parts = [s.strip() for s in regex.split(val)]
|
|
85
|
+
# Remove empty qualifier parts
|
|
86
|
+
qualifier_parts = [s for s in qualifier_parts if s]
|
|
87
|
+
# First part is location identifier
|
|
88
|
+
loc_string = qualifier_parts.pop(0).strip()
|
|
89
|
+
try:
|
|
90
|
+
locs = _parse_locs(loc_string)
|
|
91
|
+
except Exception:
|
|
92
|
+
warnings.warn(
|
|
93
|
+
f"'{loc_string}' is an unsupported location identifier, "
|
|
94
|
+
f"skipping feature"
|
|
95
|
+
)
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# The other parts are pairwise qualifier keys and values
|
|
99
|
+
qual_key = None
|
|
100
|
+
qual_val = None
|
|
101
|
+
for part in qualifier_parts:
|
|
102
|
+
if qual_key is None:
|
|
103
|
+
# This is a qualifier key
|
|
104
|
+
# When the feature contains qualifiers without
|
|
105
|
+
# value, e.g. '/pseudo'
|
|
106
|
+
# The part may contain multiple keys, e.g.
|
|
107
|
+
#
|
|
108
|
+
# '/pseudo /gene='
|
|
109
|
+
#
|
|
110
|
+
# -> split at whitespaces,
|
|
111
|
+
# as keys do not contain whitespaces
|
|
112
|
+
for subpart in part.split():
|
|
113
|
+
if "=" not in subpart:
|
|
114
|
+
# Qualifier without value, e.g. '/pseudo'
|
|
115
|
+
# -> store immediately
|
|
116
|
+
# Remove "/" -> subpart[1:]
|
|
117
|
+
qual_key = subpart[1:]
|
|
118
|
+
_set_qual(qual_dict, qual_key, None)
|
|
119
|
+
qual_key = None
|
|
120
|
+
else:
|
|
121
|
+
# Regular qualifier
|
|
122
|
+
# -> store key in variable and wait for
|
|
123
|
+
# next qualifier part to set the value
|
|
124
|
+
# Remove '/' and '=' -> subpart[1:-1]
|
|
125
|
+
qual_key = subpart[1:-1]
|
|
126
|
+
else:
|
|
127
|
+
# This is a qualifier value
|
|
128
|
+
# -> remove potential quotes
|
|
129
|
+
if part[0] == '"':
|
|
130
|
+
qual_val = part[1:-1]
|
|
131
|
+
else:
|
|
132
|
+
qual_val = part
|
|
133
|
+
# Store qualifier pair
|
|
134
|
+
_set_qual(qual_dict, qual_key, qual_val)
|
|
135
|
+
qual_key = None
|
|
136
|
+
qual_val = None
|
|
137
|
+
|
|
138
|
+
annotation.add_feature(Feature(key, locs, qual_dict))
|
|
139
|
+
|
|
140
|
+
return annotation
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _parse_locs(loc_str):
|
|
144
|
+
locs = []
|
|
145
|
+
if loc_str.startswith(("join", "order")):
|
|
146
|
+
str_list = loc_str[loc_str.index("(") + 1 : loc_str.rindex(")")].split(",")
|
|
147
|
+
for s in str_list:
|
|
148
|
+
locs.extend(_parse_locs(s.strip()))
|
|
149
|
+
elif loc_str.startswith("complement"):
|
|
150
|
+
compl_str = loc_str[loc_str.index("(") + 1 : loc_str.rindex(")")]
|
|
151
|
+
compl_locs = [
|
|
152
|
+
Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
|
|
153
|
+
for loc in _parse_locs(compl_str)
|
|
154
|
+
]
|
|
155
|
+
locs.extend(compl_locs)
|
|
156
|
+
else:
|
|
157
|
+
locs = [_parse_single_loc(loc_str)]
|
|
158
|
+
return locs
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _parse_single_loc(loc_str):
|
|
162
|
+
if ".." in loc_str:
|
|
163
|
+
split_char = ".."
|
|
164
|
+
defect = Location.Defect.NONE
|
|
165
|
+
elif "." in loc_str:
|
|
166
|
+
split_char = "."
|
|
167
|
+
defect = Location.Defect.UNK_LOC
|
|
168
|
+
elif "^" in loc_str:
|
|
169
|
+
split_char = "^"
|
|
170
|
+
loc_str_split = loc_str.split("..")
|
|
171
|
+
defect = Location.Defect.BETWEEN
|
|
172
|
+
else:
|
|
173
|
+
# Parse single location
|
|
174
|
+
defect = Location.Defect.NONE
|
|
175
|
+
if loc_str[0] == "<":
|
|
176
|
+
loc_str = loc_str[1:]
|
|
177
|
+
defect |= Location.Defect.BEYOND_LEFT
|
|
178
|
+
elif loc_str[0] == ">":
|
|
179
|
+
loc_str = loc_str[1:]
|
|
180
|
+
defect |= Location.Defect.BEYOND_RIGHT
|
|
181
|
+
first_and_last = int(loc_str)
|
|
182
|
+
return Location(first_and_last, first_and_last, defect=defect)
|
|
183
|
+
# Parse location range
|
|
184
|
+
loc_str_split = loc_str.split(split_char)
|
|
185
|
+
first_str = loc_str_split[0]
|
|
186
|
+
last_str = loc_str_split[1]
|
|
187
|
+
# Parse Defects
|
|
188
|
+
if first_str[0] == "<":
|
|
189
|
+
first = int(first_str[1:])
|
|
190
|
+
defect |= Location.Defect.BEYOND_LEFT
|
|
191
|
+
else:
|
|
192
|
+
first = int(first_str)
|
|
193
|
+
if last_str[0] == ">":
|
|
194
|
+
last = int(last_str[1:])
|
|
195
|
+
defect |= Location.Defect.BEYOND_RIGHT
|
|
196
|
+
else:
|
|
197
|
+
last = int(last_str)
|
|
198
|
+
return Location(first, last, defect=defect)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _set_qual(qual_dict, key, val):
|
|
202
|
+
"""
|
|
203
|
+
Set a mapping key to val in the dictionary.
|
|
204
|
+
If the key already exists in the dictionary, append the value (str)
|
|
205
|
+
to the existing value, separated by a line break.
|
|
206
|
+
"""
|
|
207
|
+
if key in qual_dict:
|
|
208
|
+
qual_dict[key] += "\n" + val
|
|
209
|
+
else:
|
|
210
|
+
qual_dict[key] = val
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def set_annotation(gb_file, annotation):
|
|
214
|
+
"""
|
|
215
|
+
Set the *FEATURES* field of a GenBank file with an annotation.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
gb_file : GenBankFile
|
|
220
|
+
The GenBank file to be edited.
|
|
221
|
+
annotation : Annotation
|
|
222
|
+
The annotation that is put into the GenBank file.
|
|
223
|
+
"""
|
|
224
|
+
lines = []
|
|
225
|
+
for feature in sorted(annotation):
|
|
226
|
+
line = " " * _KEY_START
|
|
227
|
+
line += feature.key.ljust(_QUAL_START - _KEY_START)
|
|
228
|
+
line += _convert_to_loc_string(feature.locs)
|
|
229
|
+
lines.append(line)
|
|
230
|
+
for key, values in feature.qual.items():
|
|
231
|
+
if values is None:
|
|
232
|
+
line = " " * _QUAL_START
|
|
233
|
+
line += f"/{key}"
|
|
234
|
+
lines.append(line)
|
|
235
|
+
else:
|
|
236
|
+
for val in values.split("\n"):
|
|
237
|
+
line = " " * _QUAL_START
|
|
238
|
+
line += f'/{key}="{val}"'
|
|
239
|
+
lines.append(line)
|
|
240
|
+
gb_file.set_field("FEATURES", lines)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _convert_to_loc_string(locs):
|
|
244
|
+
"""
|
|
245
|
+
Create GenBank comptabile location strings from a list of :class:`Location`
|
|
246
|
+
objects.
|
|
247
|
+
"""
|
|
248
|
+
if len(locs) == 1:
|
|
249
|
+
loc = list(locs)[0]
|
|
250
|
+
loc_first_str = str(loc.first)
|
|
251
|
+
loc_last_str = str(loc.last)
|
|
252
|
+
if loc.defect & Location.Defect.BEYOND_LEFT:
|
|
253
|
+
loc_first_str = "<" + loc_first_str
|
|
254
|
+
if loc.defect & Location.Defect.BEYOND_RIGHT:
|
|
255
|
+
loc_last_str = ">" + loc_last_str
|
|
256
|
+
if loc.first == loc.last:
|
|
257
|
+
loc_string = loc_first_str
|
|
258
|
+
elif loc.defect & Location.Defect.UNK_LOC:
|
|
259
|
+
loc_string = loc_first_str + "." + loc_last_str
|
|
260
|
+
elif loc.defect & Location.Defect.BETWEEN:
|
|
261
|
+
loc_string = loc_first_str + "^" + loc_last_str
|
|
262
|
+
else:
|
|
263
|
+
loc_string = loc_first_str + ".." + loc_last_str
|
|
264
|
+
if loc.strand == Location.Strand.REVERSE:
|
|
265
|
+
loc_string = f"complement({loc_string})"
|
|
266
|
+
else:
|
|
267
|
+
loc_string = ",".join([_convert_to_loc_string([loc]) for loc in locs])
|
|
268
|
+
loc_string = f"join({loc_string})"
|
|
269
|
+
return loc_string
|