biotite 1.5.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-darwin.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-darwin.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,836 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["Location", "Feature", "Annotation", "AnnotatedSequence"]
|
|
8
|
+
|
|
9
|
+
import copy
|
|
10
|
+
import numbers
|
|
11
|
+
import sys
|
|
12
|
+
from enum import Enum, Flag, auto
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.copyable import Copyable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Location:
|
|
18
|
+
"""
|
|
19
|
+
A :class:`Location` defines at which base(s)/residue(s) a feature is
|
|
20
|
+
located.
|
|
21
|
+
|
|
22
|
+
A feature can have multiple :class:`Location` instances if multiple
|
|
23
|
+
locations are joined.
|
|
24
|
+
|
|
25
|
+
Objects of this class are immutable.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
first : int
|
|
30
|
+
Starting base or residue position of the feature.
|
|
31
|
+
last : int
|
|
32
|
+
Inclusive ending base or residue position of the feature.
|
|
33
|
+
strand : Strand
|
|
34
|
+
The strand direction.
|
|
35
|
+
Always :attr:`Strand.FORWARD` for peptide features.
|
|
36
|
+
defect : Defect
|
|
37
|
+
A possible defect of the location.
|
|
38
|
+
|
|
39
|
+
Attributes
|
|
40
|
+
----------
|
|
41
|
+
first, last, strand, defect
|
|
42
|
+
Same as the parameters.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
class Defect(Flag):
|
|
46
|
+
"""
|
|
47
|
+
This enum type describes location defects.
|
|
48
|
+
|
|
49
|
+
A location has a defect, when the feature itself is not directly
|
|
50
|
+
located in the range of the first to the last base.
|
|
51
|
+
|
|
52
|
+
- **NONE** - No location defect
|
|
53
|
+
- **MISS_LEFT** - A part of the feature has been truncated
|
|
54
|
+
before the first base/residue of the :class:`Location`
|
|
55
|
+
(probably by indexing an :class:`Annotation` object)
|
|
56
|
+
- **MISS_RIGHT** - A part of the feature has been truncated
|
|
57
|
+
after the last base/residue of the :class:`Location`
|
|
58
|
+
(probably by indexing an :class:`Annotation` object)
|
|
59
|
+
- **BEYOND_LEFT** - The feature starts at an unknown position
|
|
60
|
+
before the first base/residue of the :class:`Location`
|
|
61
|
+
- **BEYOND_RIGHT** - The feature ends at an unknown position
|
|
62
|
+
after the last base/residue of the :class:`Location`
|
|
63
|
+
- **UNK_LOC** - The exact position is unknown, but it is at a
|
|
64
|
+
single base/residue between the first and last residue of
|
|
65
|
+
the :class:`Location`, inclusive
|
|
66
|
+
- **BETWEEN** - The position is between to consecutive
|
|
67
|
+
bases/residues.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
NONE = 0
|
|
71
|
+
MISS_LEFT = auto()
|
|
72
|
+
MISS_RIGHT = auto()
|
|
73
|
+
BEYOND_LEFT = auto()
|
|
74
|
+
BEYOND_RIGHT = auto()
|
|
75
|
+
UNK_LOC = auto()
|
|
76
|
+
BETWEEN = auto()
|
|
77
|
+
|
|
78
|
+
class Strand(Enum):
|
|
79
|
+
"""
|
|
80
|
+
This enum type describes the strand of the feature location.
|
|
81
|
+
This is not relevant for protein sequence features.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
FORWARD = auto()
|
|
85
|
+
REVERSE = auto()
|
|
86
|
+
|
|
87
|
+
def __init__(self, first, last, strand=Strand.FORWARD, defect=Defect.NONE):
|
|
88
|
+
if first > last:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
"The first position cannot be higher than the last position"
|
|
91
|
+
)
|
|
92
|
+
self._first = first
|
|
93
|
+
self._last = last
|
|
94
|
+
self._strand = strand
|
|
95
|
+
self._defect = defect
|
|
96
|
+
|
|
97
|
+
def __repr__(self):
|
|
98
|
+
"""Represent Location as a string for debugging."""
|
|
99
|
+
return (
|
|
100
|
+
f"Location({self._first}, {self._last}, strand={'Location.' + str(self._strand)}, "
|
|
101
|
+
f"defect={'Location.' + str(self._defect)})"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def first(self):
|
|
106
|
+
return self._first
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def last(self):
|
|
110
|
+
return self._last
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def strand(self):
|
|
114
|
+
return self._strand
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def defect(self):
|
|
118
|
+
return self._defect
|
|
119
|
+
|
|
120
|
+
def __str__(self):
|
|
121
|
+
string = "{:d}-{:d}".format(self.first, self.last)
|
|
122
|
+
if self.strand == Location.Strand.FORWARD:
|
|
123
|
+
string = string + " >"
|
|
124
|
+
else:
|
|
125
|
+
string = "< " + string
|
|
126
|
+
return string
|
|
127
|
+
|
|
128
|
+
def __eq__(self, item):
|
|
129
|
+
if not isinstance(item, Location):
|
|
130
|
+
return False
|
|
131
|
+
return (
|
|
132
|
+
self.first == item.first
|
|
133
|
+
and self.last == item.last
|
|
134
|
+
and self.strand == item.strand
|
|
135
|
+
and self.defect == item.defect
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def __hash__(self):
|
|
139
|
+
return hash((self._first, self._last, self._strand, self._defect))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class Feature(Copyable):
|
|
143
|
+
"""
|
|
144
|
+
This class represents a single sequence feature, for example from a
|
|
145
|
+
GenBank feature table.
|
|
146
|
+
A feature describes a functional part of a sequence.
|
|
147
|
+
It consists of a feature key, describing the general class of the
|
|
148
|
+
feature, at least one location, describing its position on the
|
|
149
|
+
reference, and qualifiers, describing the feature in detail.
|
|
150
|
+
|
|
151
|
+
Objects of this class are immutable.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
key : str
|
|
156
|
+
The name of the feature class, e.g. *gene*, *CDS* or
|
|
157
|
+
*regulatory*.
|
|
158
|
+
locs : iterable object of Location
|
|
159
|
+
A list of feature locations. In most cases this list will only
|
|
160
|
+
contain one location, but multiple ones are also possible for
|
|
161
|
+
example in eukaryotic CDS (due to splicing).
|
|
162
|
+
qual : dict, optional
|
|
163
|
+
Maps feature qualifiers to their corresponding values.
|
|
164
|
+
The keys are always strings. A value is either a string or
|
|
165
|
+
``None`` if the qualifier key do not has a value.
|
|
166
|
+
If key has multiple values, the values are separated by a
|
|
167
|
+
line break.
|
|
168
|
+
|
|
169
|
+
Attributes
|
|
170
|
+
----------
|
|
171
|
+
key : str
|
|
172
|
+
The name of the feature class, e.g. *gene*, *CDS* or
|
|
173
|
+
*regulatory*.
|
|
174
|
+
locs : iterable object of Location
|
|
175
|
+
A list of feature locations. In most cases this list will only
|
|
176
|
+
contain one location, but multiple ones are also possible for
|
|
177
|
+
example in eukaryotic CDS (due to splicing).
|
|
178
|
+
qual : dict
|
|
179
|
+
Maps feature qualifiers to their corresponding values.
|
|
180
|
+
The keys are always strings. A value is either a string or
|
|
181
|
+
``None`` if the qualifier key do not has a value.
|
|
182
|
+
If key has multiple values, the values are separated by a
|
|
183
|
+
line break.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
def __init__(self, key, locs, qual=None):
|
|
187
|
+
self._key = key
|
|
188
|
+
if len(locs) == 0:
|
|
189
|
+
raise ValueError("A feature must have at least one location")
|
|
190
|
+
self._locs = frozenset(locs)
|
|
191
|
+
self._qual = copy.deepcopy(qual) if qual is not None else {}
|
|
192
|
+
|
|
193
|
+
def __repr__(self):
|
|
194
|
+
"""Represent Feature as a string for debugging."""
|
|
195
|
+
return f'Feature("{self._key}", [{", ".join([loc.__repr__() for loc in self.locs])}], qual={self._qual})'
|
|
196
|
+
|
|
197
|
+
def get_location_range(self):
|
|
198
|
+
"""
|
|
199
|
+
Get the minimum first base/residue and maximum last base/residue
|
|
200
|
+
of all feature locations.
|
|
201
|
+
|
|
202
|
+
This can be used to create a location, that spans all of the
|
|
203
|
+
feature's locations.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
first : int
|
|
208
|
+
The minimum first base/residue of all locations.
|
|
209
|
+
last : int
|
|
210
|
+
The maximum last base/residue of all locations.
|
|
211
|
+
"""
|
|
212
|
+
first = np.min([loc.first for loc in self._locs])
|
|
213
|
+
last = np.max([loc.last for loc in self._locs])
|
|
214
|
+
return first, last
|
|
215
|
+
|
|
216
|
+
def __eq__(self, item):
|
|
217
|
+
if not isinstance(item, Feature):
|
|
218
|
+
return False
|
|
219
|
+
return (
|
|
220
|
+
self._key == item._key
|
|
221
|
+
and self._locs == item._locs
|
|
222
|
+
and self._qual == item._qual
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def __lt__(self, item):
|
|
226
|
+
if not isinstance(item, Feature):
|
|
227
|
+
return False
|
|
228
|
+
first, last = self.get_location_range()
|
|
229
|
+
it_first, it_last = item.get_location_range()
|
|
230
|
+
# The first base/residue is most significant,
|
|
231
|
+
# if it is equal for both features, look at last base/residue
|
|
232
|
+
if first < it_first:
|
|
233
|
+
return True
|
|
234
|
+
elif first > it_first:
|
|
235
|
+
return False
|
|
236
|
+
else: # First is equal
|
|
237
|
+
return last > it_last
|
|
238
|
+
|
|
239
|
+
def __gt__(self, item):
|
|
240
|
+
if not isinstance(item, Feature):
|
|
241
|
+
return False
|
|
242
|
+
first, last = self.get_location_range()
|
|
243
|
+
it_first, it_last = item.get_location_range()
|
|
244
|
+
# The first base/residue is most significant,
|
|
245
|
+
# if it is equal for both features, look at last base/residue
|
|
246
|
+
if first > it_first:
|
|
247
|
+
return True
|
|
248
|
+
elif first < it_first:
|
|
249
|
+
return False
|
|
250
|
+
else: # First is equal
|
|
251
|
+
return last < it_last
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def key(self):
|
|
255
|
+
return self._key
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def locs(self):
|
|
259
|
+
return copy.copy(self._locs)
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def qual(self):
|
|
263
|
+
return copy.copy(self._qual)
|
|
264
|
+
|
|
265
|
+
def __hash__(self):
|
|
266
|
+
return hash((self._key, self._locs, frozenset(self._qual.items())))
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class Annotation(Copyable):
|
|
270
|
+
"""
|
|
271
|
+
An :class:`Annotation` is a set of features belonging to one
|
|
272
|
+
sequence.
|
|
273
|
+
|
|
274
|
+
Its advantage over a simple list is the base/residue position based
|
|
275
|
+
indexing:
|
|
276
|
+
When using slice indices in Annotation objects, a subannotation is
|
|
277
|
+
created, containing copies of all :class:`Feature` objects whose
|
|
278
|
+
first and last base/residue are in range of the slice.
|
|
279
|
+
If the slice starts after the first base/residue or/and the slice
|
|
280
|
+
ends before the last residue, the position out of range is set to
|
|
281
|
+
the boundaries of the slice (the :class:`Feature` is truncated).
|
|
282
|
+
In this case the :class:`Feature` obtains the
|
|
283
|
+
:attr:`Location.Defect.MISS_LEFT` and/or
|
|
284
|
+
:attr:`Location.Defect.MISS_RIGHT` defect.
|
|
285
|
+
The third case occurs when a :class:`Feature` starts after the slice
|
|
286
|
+
ends or a :class:`Feature` ends before the slice starts.
|
|
287
|
+
In this case the :class:`Feature` will not appear in the
|
|
288
|
+
subannotation.
|
|
289
|
+
|
|
290
|
+
The start or stop position in the slice indices can be omitted, then
|
|
291
|
+
the subannotation will include all features from the start or up to
|
|
292
|
+
the stop, respectively. Step values are ignored.
|
|
293
|
+
The stop values are still exclusive, i.e. the subannotation will
|
|
294
|
+
contain a not truncated :class:`Feature` only if its last
|
|
295
|
+
base/residue is smaller than the stop value of the slice.
|
|
296
|
+
|
|
297
|
+
Integers or other index types are not supported. If you want to
|
|
298
|
+
obtain the :class:`Feature` instances from the :class:`Annotation`
|
|
299
|
+
you need to iterate over it.
|
|
300
|
+
The iteration has no defined order.
|
|
301
|
+
Alternatively, you can obtain a copy of the internal
|
|
302
|
+
:class:`Feature` set via :func:`get_features()`.
|
|
303
|
+
|
|
304
|
+
Multiple :class:`Annotation` objects can be concatenated to one
|
|
305
|
+
:class:`Annotation` object using the '+' operator.
|
|
306
|
+
Single :class:`Feature` instances can be added this way, too.
|
|
307
|
+
If a feature is present in both :class:`Annotation` objects, the
|
|
308
|
+
resulting :class:`Annotation` will contain this feature twice.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
features : iterable object of Feature, optional
|
|
313
|
+
The features to create the :class:`Annotation` from. if not
|
|
314
|
+
provided, an empty :class:`Annotation` is created.
|
|
315
|
+
|
|
316
|
+
Examples
|
|
317
|
+
--------
|
|
318
|
+
Creating an annotation from a feature list:
|
|
319
|
+
|
|
320
|
+
>>> feature1 = Feature("CDS", [Location(-10, 30 )], qual={"gene" : "test1"})
|
|
321
|
+
>>> feature2 = Feature("CDS", [Location(20, 50 )], qual={"gene" : "test2"})
|
|
322
|
+
>>> annotation = Annotation([feature1, feature2])
|
|
323
|
+
>>> for f in sorted(list(annotation)):
|
|
324
|
+
... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
|
|
325
|
+
test1 -10-30 >
|
|
326
|
+
test2 20-50 >
|
|
327
|
+
|
|
328
|
+
Merging two annotations and a feature:
|
|
329
|
+
|
|
330
|
+
>>> feature3 = Feature("CDS", [Location(100, 130 )], qual={"gene" : "test3"})
|
|
331
|
+
>>> feature4 = Feature("CDS", [Location(150, 250 )], qual={"gene" : "test4"})
|
|
332
|
+
>>> annotation2 = Annotation([feature3, feature4])
|
|
333
|
+
>>> feature5 = Feature("CDS", [Location(-50, 200 )], qual={"gene" : "test5"})
|
|
334
|
+
>>> annotation = annotation + annotation2 + feature5
|
|
335
|
+
>>> for f in sorted(list(annotation)):
|
|
336
|
+
... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
|
|
337
|
+
test5 -50-200 >
|
|
338
|
+
test1 -10-30 >
|
|
339
|
+
test2 20-50 >
|
|
340
|
+
test3 100-130 >
|
|
341
|
+
test4 150-250 >
|
|
342
|
+
|
|
343
|
+
Location based indexing, note the defects:
|
|
344
|
+
|
|
345
|
+
>>> annotation = annotation[40:150]
|
|
346
|
+
>>> for f in sorted(list(annotation)):
|
|
347
|
+
... gene = f.qual["gene"]
|
|
348
|
+
... loc_str = "".join([f"{loc} {loc.defect}" for loc in f.locs])
|
|
349
|
+
... print(gene, loc_str)
|
|
350
|
+
test5 40-149 > Defect.MISS_LEFT|MISS_RIGHT
|
|
351
|
+
test2 40-50 > Defect.MISS_LEFT
|
|
352
|
+
test3 100-130 > Defect.NONE
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
def __init__(self, features=None):
|
|
356
|
+
if features is None:
|
|
357
|
+
self._features = set()
|
|
358
|
+
else:
|
|
359
|
+
self._features = set(features)
|
|
360
|
+
|
|
361
|
+
def __repr__(self):
|
|
362
|
+
"""Represent Annotation as a string for debugging."""
|
|
363
|
+
return (
|
|
364
|
+
f"Annotation([{', '.join([feat.__repr__() for feat in self._features])}])"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def __copy_create__(self):
|
|
368
|
+
return Annotation(self._features)
|
|
369
|
+
|
|
370
|
+
def get_features(self):
|
|
371
|
+
"""
|
|
372
|
+
Get a copy of the internal feature set.
|
|
373
|
+
|
|
374
|
+
Returns
|
|
375
|
+
-------
|
|
376
|
+
feature_list : list of Feature
|
|
377
|
+
A copy of the internal feature set.
|
|
378
|
+
"""
|
|
379
|
+
return copy.copy(self._features)
|
|
380
|
+
|
|
381
|
+
def add_feature(self, feature):
|
|
382
|
+
"""
|
|
383
|
+
Add a feature to the annotation.
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
feature : Feature
|
|
388
|
+
Feature to be added.
|
|
389
|
+
"""
|
|
390
|
+
if not isinstance(feature, Feature):
|
|
391
|
+
raise TypeError(
|
|
392
|
+
f"Only 'Feature' objects are supported, not {type(feature).__name__}"
|
|
393
|
+
)
|
|
394
|
+
self._features.add(feature)
|
|
395
|
+
|
|
396
|
+
def get_location_range(self):
|
|
397
|
+
"""
|
|
398
|
+
Get the range of feature locations,
|
|
399
|
+
i.e. the first and exclusive last base/residue.
|
|
400
|
+
|
|
401
|
+
Returns
|
|
402
|
+
-------
|
|
403
|
+
int : start
|
|
404
|
+
Start location.
|
|
405
|
+
int : stop
|
|
406
|
+
Exclusive stop location.
|
|
407
|
+
"""
|
|
408
|
+
first = sys.maxsize
|
|
409
|
+
last = -sys.maxsize
|
|
410
|
+
for feature in self._features:
|
|
411
|
+
for loc in feature.locs:
|
|
412
|
+
if loc.first < first:
|
|
413
|
+
first = loc.first
|
|
414
|
+
if loc.last > last:
|
|
415
|
+
last = loc.last
|
|
416
|
+
# Exclusive stop -> +1
|
|
417
|
+
return first, last + 1
|
|
418
|
+
|
|
419
|
+
def del_feature(self, feature):
|
|
420
|
+
"""
|
|
421
|
+
Delete a feature from the annotation.
|
|
422
|
+
|
|
423
|
+
Parameters
|
|
424
|
+
----------
|
|
425
|
+
feature : Feature
|
|
426
|
+
Feature to be removed.
|
|
427
|
+
|
|
428
|
+
Raises
|
|
429
|
+
------
|
|
430
|
+
KeyError
|
|
431
|
+
If the feature is not in the annotation
|
|
432
|
+
"""
|
|
433
|
+
self._features.remove(feature)
|
|
434
|
+
|
|
435
|
+
def __add__(self, item):
|
|
436
|
+
if isinstance(item, Annotation):
|
|
437
|
+
return Annotation(self._features | item._features)
|
|
438
|
+
elif isinstance(item, Feature):
|
|
439
|
+
return Annotation(self._features | set([item]))
|
|
440
|
+
else:
|
|
441
|
+
raise TypeError(
|
|
442
|
+
f"Only 'Feature' and 'Annotation' objects are supported, "
|
|
443
|
+
f"not {type(item).__name__}"
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
def __iadd__(self, item):
|
|
447
|
+
if isinstance(item, Annotation):
|
|
448
|
+
self._features |= item._features
|
|
449
|
+
elif isinstance(item, Feature):
|
|
450
|
+
self._features.add(item)
|
|
451
|
+
else:
|
|
452
|
+
raise TypeError(
|
|
453
|
+
f"Only 'Feature' and 'Annotation' objects are supported, "
|
|
454
|
+
f"not {type(item).__name__}"
|
|
455
|
+
)
|
|
456
|
+
return self
|
|
457
|
+
|
|
458
|
+
def __getitem__(self, index):
|
|
459
|
+
if isinstance(index, slice):
|
|
460
|
+
# If no start or stop index is given, include all
|
|
461
|
+
if index.start is None:
|
|
462
|
+
i_first = -sys.maxsize
|
|
463
|
+
else:
|
|
464
|
+
i_first = index.start
|
|
465
|
+
if index.stop is None:
|
|
466
|
+
i_last = sys.maxsize
|
|
467
|
+
else:
|
|
468
|
+
i_last = index.stop - 1
|
|
469
|
+
|
|
470
|
+
sub_annot = Annotation()
|
|
471
|
+
for feature in self:
|
|
472
|
+
locs_in_scope = []
|
|
473
|
+
for loc in feature.locs:
|
|
474
|
+
# Always true for maxsize values
|
|
475
|
+
# in case no start or stop index is given
|
|
476
|
+
if loc.first <= i_last and loc.last >= i_first:
|
|
477
|
+
# The location is at least partly in the
|
|
478
|
+
# given location range
|
|
479
|
+
# Handle defects
|
|
480
|
+
first = loc.first
|
|
481
|
+
last = loc.last
|
|
482
|
+
defect = loc.defect
|
|
483
|
+
if loc.first < i_first:
|
|
484
|
+
defect |= Location.Defect.MISS_LEFT
|
|
485
|
+
first = i_first
|
|
486
|
+
if loc.last > i_last:
|
|
487
|
+
defect |= Location.Defect.MISS_RIGHT
|
|
488
|
+
last = i_last
|
|
489
|
+
locs_in_scope.append(Location(first, last, loc.strand, defect))
|
|
490
|
+
if len(locs_in_scope) > 0:
|
|
491
|
+
# The feature is present in the new annotation
|
|
492
|
+
# if any of the original locations is in the new
|
|
493
|
+
# scope
|
|
494
|
+
new_feature = Feature(
|
|
495
|
+
key=feature.key, locs=locs_in_scope, qual=feature.qual
|
|
496
|
+
)
|
|
497
|
+
sub_annot.add_feature(new_feature)
|
|
498
|
+
return sub_annot
|
|
499
|
+
else:
|
|
500
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
501
|
+
|
|
502
|
+
def __delitem__(self, item):
|
|
503
|
+
if not isinstance(item, Feature):
|
|
504
|
+
raise TypeError(
|
|
505
|
+
f"Only 'Feature' objects are supported, not {type(item).__name__}"
|
|
506
|
+
)
|
|
507
|
+
self.del_feature(item)
|
|
508
|
+
|
|
509
|
+
def __iter__(self):
|
|
510
|
+
return self._features.__iter__()
|
|
511
|
+
|
|
512
|
+
def __contains__(self, item):
|
|
513
|
+
return item in self._features
|
|
514
|
+
|
|
515
|
+
def __eq__(self, item):
|
|
516
|
+
if not isinstance(item, Annotation):
|
|
517
|
+
return False
|
|
518
|
+
return self._features == item._features
|
|
519
|
+
|
|
520
|
+
def __len__(self):
|
|
521
|
+
return len(self._features)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
class AnnotatedSequence(Copyable):
|
|
525
|
+
"""
|
|
526
|
+
An :class:`AnnotatedSequence` is a combination of a
|
|
527
|
+
:class:`Sequence` and an :class:`Annotation`.
|
|
528
|
+
|
|
529
|
+
Indexing an :class:`AnnotatedSequence` with a slice returns another
|
|
530
|
+
:class:`AnnotatedSequence` with the corresponding subannotation and
|
|
531
|
+
a sequence start corrected subsequence, i.e. indexing starts at 1
|
|
532
|
+
with the default sequence start 1.
|
|
533
|
+
The sequence start in the newly created :class:`AnnotatedSequence`
|
|
534
|
+
is the start of the slice.
|
|
535
|
+
Furthermore, integer indices are allowed in which case the
|
|
536
|
+
corresponding symbol of the sequence is returned (also sequence
|
|
537
|
+
start corrected).
|
|
538
|
+
In both cases the index must be in range of the sequence, e.g. if
|
|
539
|
+
sequence start is 1, index 0 is not allowed.
|
|
540
|
+
Negative indices do not mean indexing from the end of the sequence,
|
|
541
|
+
in contrast to the behavior in :class:`Sequence` objects.
|
|
542
|
+
Both index types can also be used to modify the sequence.
|
|
543
|
+
|
|
544
|
+
Another option is indexing with a :class:`Feature` (preferably from the
|
|
545
|
+
:class:`Annotation` in the same :class:`AnnotatedSequence`).
|
|
546
|
+
In this case a sequence, described by the location(s) of the
|
|
547
|
+
:class:`Feature`, is returned.
|
|
548
|
+
When using a :class:`Feature` for setting an
|
|
549
|
+
:class:`AnnotatedSequence` with a sequence, the new sequence is
|
|
550
|
+
replacing the locations of the
|
|
551
|
+
:class:`Feature`.
|
|
552
|
+
Note the the replacing sequence must have the same length as the
|
|
553
|
+
sequence of the :class:`Feature` index.
|
|
554
|
+
|
|
555
|
+
Parameters
|
|
556
|
+
----------
|
|
557
|
+
annotation : Annotation
|
|
558
|
+
The annotation corresponding to `sequence`.
|
|
559
|
+
sequence : Sequence
|
|
560
|
+
The sequence.
|
|
561
|
+
Usually a :class:`NucleotideSequence` or
|
|
562
|
+
:class:`ProteinSequence`.
|
|
563
|
+
sequence_start : int, optional
|
|
564
|
+
By default, the first symbol of the sequence is corresponding
|
|
565
|
+
to location 1 of the features in the annotation. The location
|
|
566
|
+
of the first symbol can be changed by setting this parameter.
|
|
567
|
+
Negative values are not supported yet.
|
|
568
|
+
|
|
569
|
+
Attributes
|
|
570
|
+
----------
|
|
571
|
+
annotation : Annotation
|
|
572
|
+
The annotation corresponding to `sequence`.
|
|
573
|
+
sequence : Sequence
|
|
574
|
+
The represented sequence.
|
|
575
|
+
sequence_start : int
|
|
576
|
+
The location of the first symbol in the sequence.
|
|
577
|
+
|
|
578
|
+
See Also
|
|
579
|
+
--------
|
|
580
|
+
Annotation : An annotation separated from a sequence.
|
|
581
|
+
Sequence : A sequence separated from an annotation.
|
|
582
|
+
|
|
583
|
+
Examples
|
|
584
|
+
--------
|
|
585
|
+
Creating an annotated sequence
|
|
586
|
+
|
|
587
|
+
>>> sequence = NucleotideSequence("ATGGCGTACGATTAGAAAAAAA")
|
|
588
|
+
>>> feature1 = Feature("misc_feature", [Location(1,2), Location(11,12)],
|
|
589
|
+
... {"note" : "walker"})
|
|
590
|
+
>>> feature2 = Feature("misc_feature", [Location(16,22)], {"note" : "poly-A"})
|
|
591
|
+
>>> annotation = Annotation([feature1, feature2])
|
|
592
|
+
>>> annot_seq = AnnotatedSequence(annotation, sequence)
|
|
593
|
+
>>> print(annot_seq.sequence)
|
|
594
|
+
ATGGCGTACGATTAGAAAAAAA
|
|
595
|
+
>>> for f in sorted(list(annot_seq.annotation)):
|
|
596
|
+
... print(f.qual["note"])
|
|
597
|
+
walker
|
|
598
|
+
poly-A
|
|
599
|
+
|
|
600
|
+
Indexing with integers, note the sequence start correction
|
|
601
|
+
|
|
602
|
+
>>> print(annot_seq[2])
|
|
603
|
+
T
|
|
604
|
+
>>> print(annot_seq.sequence[2])
|
|
605
|
+
G
|
|
606
|
+
|
|
607
|
+
indexing with slices
|
|
608
|
+
|
|
609
|
+
>>> annot_seq2 = annot_seq[:16]
|
|
610
|
+
>>> print(annot_seq2.sequence)
|
|
611
|
+
ATGGCGTACGATTAG
|
|
612
|
+
>>> for f in annot_seq2.annotation:
|
|
613
|
+
... print(f.qual["note"])
|
|
614
|
+
walker
|
|
615
|
+
|
|
616
|
+
Indexing with features
|
|
617
|
+
|
|
618
|
+
>>> print(annot_seq[feature1])
|
|
619
|
+
ATAT
|
|
620
|
+
>>> print(annot_seq[feature2])
|
|
621
|
+
AAAAAAA
|
|
622
|
+
>>> print(annot_seq.sequence)
|
|
623
|
+
ATGGCGTACGATTAGAAAAAAA
|
|
624
|
+
>>> annot_seq[feature1] = NucleotideSequence("CCCC")
|
|
625
|
+
>>> print(annot_seq.sequence)
|
|
626
|
+
CCGGCGTACGCCTAGAAAAAAA
|
|
627
|
+
"""
|
|
628
|
+
|
|
629
|
+
def __init__(self, annotation, sequence, sequence_start=1):
|
|
630
|
+
self._annotation = annotation
|
|
631
|
+
self._sequence = sequence
|
|
632
|
+
self._seqstart = sequence_start
|
|
633
|
+
|
|
634
|
+
def __repr__(self):
|
|
635
|
+
"""Represent AnnotatedSequence as a string for debugging."""
|
|
636
|
+
return (
|
|
637
|
+
f"AnnotatedSequence({self._annotation.__repr__()}, {self._sequence.__repr__()}, "
|
|
638
|
+
f"sequence_start={self._seqstart})"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
@property
|
|
642
|
+
def sequence_start(self):
|
|
643
|
+
return self._seqstart
|
|
644
|
+
|
|
645
|
+
@property
|
|
646
|
+
def sequence(self):
|
|
647
|
+
return self._sequence
|
|
648
|
+
|
|
649
|
+
@property
|
|
650
|
+
def annotation(self):
|
|
651
|
+
return self._annotation
|
|
652
|
+
|
|
653
|
+
def __copy_create__(self):
|
|
654
|
+
return AnnotatedSequence(
|
|
655
|
+
self._annotation.copy(), self._sequence.copy, self._seqstart
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
def reverse_complement(self, sequence_start=1):
|
|
659
|
+
"""
|
|
660
|
+
Create the reverse complement of the annotated sequence.
|
|
661
|
+
|
|
662
|
+
This method accurately converts the position and the strand of
|
|
663
|
+
the annotation.
|
|
664
|
+
The information on the sequence start is lost.
|
|
665
|
+
|
|
666
|
+
Parameters
|
|
667
|
+
----------
|
|
668
|
+
sequence_start : int, optional
|
|
669
|
+
The location of the first symbol in the reverse complement
|
|
670
|
+
sequence.
|
|
671
|
+
|
|
672
|
+
Returns
|
|
673
|
+
-------
|
|
674
|
+
rev_sequence : Sequence
|
|
675
|
+
The reverse complement of the annotated sequence.
|
|
676
|
+
"""
|
|
677
|
+
rev_seqstart = sequence_start
|
|
678
|
+
|
|
679
|
+
rev_sequence = self._sequence.reverse().complement()
|
|
680
|
+
|
|
681
|
+
seq_len = len(self._sequence)
|
|
682
|
+
rev_features = []
|
|
683
|
+
for feature in self._annotation:
|
|
684
|
+
rev_locs = []
|
|
685
|
+
for loc in feature.locs:
|
|
686
|
+
# Transform location to the reverse complement strand
|
|
687
|
+
# (seq_len-1) -> last sequence index
|
|
688
|
+
# (loc.last-self._seqstart) -> location to index
|
|
689
|
+
# ... + rev_seqstart -> index to location
|
|
690
|
+
rev_loc_first = (
|
|
691
|
+
(seq_len - 1) - (loc.last - self._seqstart) + rev_seqstart
|
|
692
|
+
)
|
|
693
|
+
rev_loc_last = (
|
|
694
|
+
(seq_len - 1) - (loc.first - self._seqstart) + rev_seqstart
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
if loc.strand == Location.Strand.FORWARD:
|
|
698
|
+
rev_loc_strand = Location.Strand.REVERSE
|
|
699
|
+
else:
|
|
700
|
+
rev_loc_strand = Location.Strand.FORWARD
|
|
701
|
+
|
|
702
|
+
rev_loc_defect = Location.Defect.NONE
|
|
703
|
+
if loc.defect & Location.Defect.MISS_LEFT:
|
|
704
|
+
rev_loc_defect |= Location.Defect.MISS_RIGHT
|
|
705
|
+
if loc.defect & Location.Defect.MISS_RIGHT:
|
|
706
|
+
rev_loc_defect |= Location.Defect.MISS_LEFT
|
|
707
|
+
if loc.defect & Location.Defect.BEYOND_RIGHT:
|
|
708
|
+
rev_loc_defect |= Location.Defect.BEYOND_LEFT
|
|
709
|
+
if loc.defect & Location.Defect.BEYOND_LEFT:
|
|
710
|
+
rev_loc_defect |= Location.Defect.BEYOND_RIGHT
|
|
711
|
+
if loc.defect & Location.Defect.UNK_LOC:
|
|
712
|
+
rev_loc_defect |= Location.Defect.UNK_LOC
|
|
713
|
+
if loc.defect & Location.Defect.BETWEEN:
|
|
714
|
+
rev_loc_defect |= Location.Defect.BETWEEN
|
|
715
|
+
|
|
716
|
+
rev_locs.append(
|
|
717
|
+
Location(
|
|
718
|
+
rev_loc_first, rev_loc_last, rev_loc_strand, rev_loc_defect
|
|
719
|
+
)
|
|
720
|
+
)
|
|
721
|
+
rev_features.append(Feature(feature.key, rev_locs, feature.qual))
|
|
722
|
+
|
|
723
|
+
return AnnotatedSequence(Annotation(rev_features), rev_sequence, rev_seqstart)
|
|
724
|
+
|
|
725
|
+
def __getitem__(self, index):
|
|
726
|
+
if isinstance(index, Feature):
|
|
727
|
+
# Concatenate subsequences for each location of the feature
|
|
728
|
+
locs = index.locs
|
|
729
|
+
if len(locs) == 0:
|
|
730
|
+
raise ValueError("Feature does not contain any locations")
|
|
731
|
+
# Start by creating an empty sequence
|
|
732
|
+
sub_seq = self._sequence.copy(new_seq_code=np.array([]))
|
|
733
|
+
# Locations need to be sorted, as otherwise the locations
|
|
734
|
+
# chunks would be merged in the wrong order
|
|
735
|
+
# The order depends on whether the locs are on the forward
|
|
736
|
+
# or reverse strand
|
|
737
|
+
strand = None
|
|
738
|
+
for loc in locs:
|
|
739
|
+
if loc.strand == strand:
|
|
740
|
+
pass
|
|
741
|
+
elif strand is None:
|
|
742
|
+
strand = loc.strand
|
|
743
|
+
else: # loc.strand != strand
|
|
744
|
+
raise ValueError(
|
|
745
|
+
"All locations of the feature must have the same "
|
|
746
|
+
"strand direction"
|
|
747
|
+
)
|
|
748
|
+
if strand == Location.Strand.FORWARD:
|
|
749
|
+
sorted_locs = sorted(locs, key=lambda loc: loc.first)
|
|
750
|
+
else:
|
|
751
|
+
sorted_locs = sorted(locs, key=lambda loc: loc.last, reverse=True)
|
|
752
|
+
# Merge the sequences corresponding to the ordered locations
|
|
753
|
+
for loc in sorted_locs:
|
|
754
|
+
slice_start = loc.first - self._seqstart
|
|
755
|
+
# +1 due to exclusive stop
|
|
756
|
+
slice_stop = loc.last - self._seqstart + 1
|
|
757
|
+
add_seq = self._sequence[slice_start:slice_stop]
|
|
758
|
+
if loc.strand == Location.Strand.REVERSE:
|
|
759
|
+
add_seq = add_seq.reverse().complement()
|
|
760
|
+
sub_seq += add_seq
|
|
761
|
+
return sub_seq
|
|
762
|
+
|
|
763
|
+
elif isinstance(index, slice):
|
|
764
|
+
# Sequence start correction
|
|
765
|
+
if index.start is None:
|
|
766
|
+
seq_start = 0
|
|
767
|
+
else:
|
|
768
|
+
if index.start < self._seqstart:
|
|
769
|
+
raise IndexError(
|
|
770
|
+
f"The start of the index ({index.start}) is lower "
|
|
771
|
+
f"than the start of the sequence ({self._seqstart})"
|
|
772
|
+
)
|
|
773
|
+
seq_start = index.start - self._seqstart
|
|
774
|
+
if index.stop is None:
|
|
775
|
+
seq_stop = len(self._sequence)
|
|
776
|
+
index = slice(index.start, seq_stop, index.step)
|
|
777
|
+
else:
|
|
778
|
+
seq_stop = index.stop - self._seqstart
|
|
779
|
+
# New value for the sequence start, value is base position
|
|
780
|
+
if index.start is None:
|
|
781
|
+
rel_seq_start = self._seqstart
|
|
782
|
+
else:
|
|
783
|
+
rel_seq_start = index.start
|
|
784
|
+
return AnnotatedSequence(
|
|
785
|
+
self._annotation[index],
|
|
786
|
+
self._sequence[seq_start:seq_stop],
|
|
787
|
+
rel_seq_start,
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
elif isinstance(index, numbers.Integral):
|
|
791
|
+
return self._sequence[index - self._seqstart]
|
|
792
|
+
|
|
793
|
+
else:
|
|
794
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
795
|
+
|
|
796
|
+
def __setitem__(self, index, item):
|
|
797
|
+
if isinstance(index, Feature):
|
|
798
|
+
# Item must be sequence
|
|
799
|
+
# with length equal to sum of location lengths
|
|
800
|
+
sub_seq = item
|
|
801
|
+
sub_seq_i = 0
|
|
802
|
+
for loc in index.locs:
|
|
803
|
+
slice_start = loc.first - self._seqstart
|
|
804
|
+
# +1 due to exclusive stop
|
|
805
|
+
slice_stop = loc.last - self._seqstart + 1
|
|
806
|
+
interval_size = slice_stop - slice_start
|
|
807
|
+
self._sequence[slice_start:slice_stop] = sub_seq[
|
|
808
|
+
sub_seq_i : sub_seq_i + interval_size
|
|
809
|
+
]
|
|
810
|
+
sub_seq_i += interval_size
|
|
811
|
+
elif isinstance(index, slice):
|
|
812
|
+
# Sequence start correction
|
|
813
|
+
if index.start is None:
|
|
814
|
+
seq_start = 0
|
|
815
|
+
else:
|
|
816
|
+
seq_start = index.start - self._seqstart
|
|
817
|
+
if index.stop is None:
|
|
818
|
+
seq_stop = len(self._sequence)
|
|
819
|
+
else:
|
|
820
|
+
seq_stop = index.stop - self._seqstart
|
|
821
|
+
# Item is a Sequence
|
|
822
|
+
self._sequence[seq_start:seq_stop] = item
|
|
823
|
+
elif isinstance(index, numbers.Integral):
|
|
824
|
+
# Item is a symbol
|
|
825
|
+
self._sequence[index - self._seqstart] = item
|
|
826
|
+
else:
|
|
827
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
828
|
+
|
|
829
|
+
def __eq__(self, item):
|
|
830
|
+
if not isinstance(item, AnnotatedSequence):
|
|
831
|
+
return False
|
|
832
|
+
return (
|
|
833
|
+
self.annotation == item.annotation
|
|
834
|
+
and self.sequence == item.sequence
|
|
835
|
+
and self._seqstart == item._seqstart
|
|
836
|
+
)
|