biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +6 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,830 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Location", "Feature", "Annotation", "AnnotatedSequence"]
8
+
9
+ import copy
10
+ import numbers
11
+ import sys
12
+ from enum import Enum, Flag, auto
13
+ import numpy as np
14
+ from biotite.copyable import Copyable
15
+
16
+
17
+ class Location:
18
+ """
19
+ A :class:`Location` defines at which base(s)/residue(s) a feature is
20
+ located.
21
+
22
+ A feature can have multiple :class:`Location` instances if multiple
23
+ locations are joined.
24
+
25
+ Objects of this class are immutable.
26
+
27
+ Attributes
28
+ ----------
29
+ first : int
30
+ Starting base or residue position of the feature.
31
+ last : int
32
+ Inclusive ending base or residue position of the feature.
33
+ strand : Strand
34
+ The strand direction.
35
+ Always :attr:`Strand.FORWARD` for peptide features.
36
+ defect : Defect
37
+ A possible defect of the location.
38
+ """
39
+
40
+ class Defect(Flag):
41
+ """
42
+ This enum type describes location defects.
43
+
44
+ A location has a defect, when the feature itself is not directly
45
+ located in the range of the first to the last base.
46
+
47
+ - **NONE** - No location defect
48
+ - **MISS_LEFT** - A part of the feature has been truncated
49
+ before the first base/residue of the :class:`Location`
50
+ (probably by indexing an :class:`Annotation` object)
51
+ - **MISS_RIGHT** - A part of the feature has been truncated
52
+ after the last base/residue of the :class:`Location`
53
+ (probably by indexing an :class:`Annotation` object)
54
+ - **BEYOND_LEFT** - The feature starts at an unknown position
55
+ before the first base/residue of the :class:`Location`
56
+ - **BEYOND_RIGHT** - The feature ends at an unknown position
57
+ after the last base/residue of the :class:`Location`
58
+ - **UNK_LOC** - The exact position is unknown, but it is at a
59
+ single base/residue between the first and last residue of
60
+ the :class:`Location`, inclusive
61
+ - **BETWEEN** - The position is between to consecutive
62
+ bases/residues.
63
+ """
64
+
65
+ NONE = 0
66
+ MISS_LEFT = auto()
67
+ MISS_RIGHT = auto()
68
+ BEYOND_LEFT = auto()
69
+ BEYOND_RIGHT = auto()
70
+ UNK_LOC = auto()
71
+ BETWEEN = auto()
72
+
73
+ class Strand(Enum):
74
+ """
75
+ This enum type describes the strand of the feature location.
76
+ This is not relevant for protein sequence features.
77
+ """
78
+
79
+ FORWARD = auto()
80
+ REVERSE = auto()
81
+
82
+ def __init__(self, first, last, strand=Strand.FORWARD, defect=Defect.NONE):
83
+ if first > last:
84
+ raise ValueError(
85
+ "The first position cannot be higher than the last position"
86
+ )
87
+ self._first = first
88
+ self._last = last
89
+ self._strand = strand
90
+ self._defect = defect
91
+
92
+ def __repr__(self):
93
+ """Represent Location as a string for debugging."""
94
+ return (
95
+ f'Location({self._first}, {self._last}, strand={"Location." + str(self._strand)}, '
96
+ f'defect={"Location." + str(self._defect)})'
97
+ )
98
+
99
+ @property
100
+ def first(self):
101
+ return self._first
102
+
103
+ @property
104
+ def last(self):
105
+ return self._last
106
+
107
+ @property
108
+ def strand(self):
109
+ return self._strand
110
+
111
+ @property
112
+ def defect(self):
113
+ return self._defect
114
+
115
+ def __str__(self):
116
+ string = "{:d}-{:d}".format(self.first, self.last)
117
+ if self.strand == Location.Strand.FORWARD:
118
+ string = string + " >"
119
+ else:
120
+ string = "< " + string
121
+ return string
122
+
123
+ def __eq__(self, item):
124
+ if not isinstance(item, Location):
125
+ return False
126
+ return (
127
+ self.first == item.first
128
+ and self.last == item.last
129
+ and self.strand == item.strand
130
+ and self.defect == item.defect
131
+ )
132
+
133
+ def __hash__(self):
134
+ return hash((self._first, self._last, self._strand, self._defect))
135
+
136
+
137
+ class Feature(Copyable):
138
+ """
139
+ This class represents a single sequence feature, for example from a
140
+ GenBank feature table.
141
+ A feature describes a functional part of a sequence.
142
+ It consists of a feature key, describing the general class of the
143
+ feature, at least one location, describing its position on the
144
+ reference, and qualifiers, describing the feature in detail.
145
+
146
+ Objects of this class are immutable.
147
+
148
+ Parameters
149
+ ----------
150
+ key : str
151
+ The name of the feature class, e.g. *gene*, *CDS* or
152
+ *regulatory*.
153
+ locs : iterable object of Location
154
+ A list of feature locations. In most cases this list will only
155
+ contain one location, but multiple ones are also possible for
156
+ example in eukaryotic CDS (due to splicing).
157
+ qual : dict, optional
158
+ Maps feature qualifiers to their corresponding values.
159
+ The keys are always strings. A value is either a string or
160
+ ``None`` if the qualifier key do not has a value.
161
+ If key has multiple values, the values are separated by a
162
+ line break.
163
+
164
+ Attributes
165
+ ----------
166
+ key : str
167
+ The name of the feature class, e.g. *gene*, *CDS* or
168
+ *regulatory*.
169
+ locs : iterable object of Location
170
+ A list of feature locations. In most cases this list will only
171
+ contain one location, but multiple ones are also possible for
172
+ example in eukaryotic CDS (due to splicing).
173
+ qual : dict
174
+ Maps feature qualifiers to their corresponding values.
175
+ The keys are always strings. A value is either a string or
176
+ ``None`` if the qualifier key do not has a value.
177
+ If key has multiple values, the values are separated by a
178
+ line break.
179
+ """
180
+
181
+ def __init__(self, key, locs, qual=None):
182
+ self._key = key
183
+ if len(locs) == 0:
184
+ raise ValueError("A feature must have at least one location")
185
+ self._locs = frozenset(locs)
186
+ self._qual = copy.deepcopy(qual) if qual is not None else {}
187
+
188
+ def __repr__(self):
189
+ """Represent Feature as a string for debugging."""
190
+ return f'Feature("{self._key}", [{", ".join([loc.__repr__() for loc in self.locs])}], qual={self._qual})'
191
+
192
+ def get_location_range(self):
193
+ """
194
+ Get the minimum first base/residue and maximum last base/residue
195
+ of all feature locations.
196
+
197
+ This can be used to create a location, that spans all of the
198
+ feature's locations.
199
+
200
+ Returns
201
+ -------
202
+ first : int
203
+ The minimum first base/residue of all locations.
204
+ last : int
205
+ The maximum last base/residue of all locations.
206
+ """
207
+ first = np.min([loc.first for loc in self._locs])
208
+ last = np.max([loc.last for loc in self._locs])
209
+ return first, last
210
+
211
+ def __eq__(self, item):
212
+ if not isinstance(item, Feature):
213
+ return False
214
+ return (
215
+ self._key == item._key
216
+ and self._locs == item._locs
217
+ and self._qual == item._qual
218
+ )
219
+
220
+ def __lt__(self, item):
221
+ if not isinstance(item, Feature):
222
+ return False
223
+ first, last = self.get_location_range()
224
+ it_first, it_last = item.get_location_range()
225
+ # The first base/residue is most significant,
226
+ # if it is equal for both features, look at last base/residue
227
+ if first < it_first:
228
+ return True
229
+ elif first > it_first:
230
+ return False
231
+ else: # First is equal
232
+ return last > it_last
233
+
234
+ def __gt__(self, item):
235
+ if not isinstance(item, Feature):
236
+ return False
237
+ first, last = self.get_location_range()
238
+ it_first, it_last = item.get_location_range()
239
+ # The first base/residue is most significant,
240
+ # if it is equal for both features, look at last base/residue
241
+ if first > it_first:
242
+ return True
243
+ elif first < it_first:
244
+ return False
245
+ else: # First is equal
246
+ return last < it_last
247
+
248
+ @property
249
+ def key(self):
250
+ return self._key
251
+
252
+ @property
253
+ def locs(self):
254
+ return copy.copy(self._locs)
255
+
256
+ @property
257
+ def qual(self):
258
+ return copy.copy(self._qual)
259
+
260
+ def __hash__(self):
261
+ return hash((self._key, self._locs, frozenset(self._qual.items())))
262
+
263
+
264
+ class Annotation(Copyable):
265
+ """
266
+ An :class:`Annotation` is a set of features belonging to one
267
+ sequence.
268
+
269
+ Its advantage over a simple list is the base/residue position based
270
+ indexing:
271
+ When using slice indices in Annotation objects, a subannotation is
272
+ created, containing copies of all :class:`Feature` objects whose
273
+ first and last base/residue are in range of the slice.
274
+ If the slice starts after the first base/residue or/and the slice
275
+ ends before the last residue, the position out of range is set to
276
+ the boundaries of the slice (the :class:`Feature` is truncated).
277
+ In this case the :class:`Feature` obtains the
278
+ :attr:`Location.Defect.MISS_LEFT` and/or
279
+ :attr:`Location.Defect.MISS_RIGHT` defect.
280
+ The third case occurs when a :class:`Feature` starts after the slice
281
+ ends or a :class:`Feature` ends before the slice starts.
282
+ In this case the :class:`Feature` will not appear in the
283
+ subannotation.
284
+
285
+ The start or stop position in the slice indices can be omitted, then
286
+ the subannotation will include all features from the start or up to
287
+ the stop, respectively. Step values are ignored.
288
+ The stop values are still exclusive, i.e. the subannotation will
289
+ contain a not truncated :class:`Feature` only if its last
290
+ base/residue is smaller than the stop value of the slice.
291
+
292
+ Integers or other index types are not supported. If you want to
293
+ obtain the :class:`Feature` instances from the :class:`Annotation`
294
+ you need to iterate over it.
295
+ The iteration has no defined order.
296
+ Alternatively, you can obtain a copy of the internal
297
+ :class:`Feature` set via :func:`get_features()`.
298
+
299
+ Multiple :class:`Annotation` objects can be concatenated to one
300
+ :class:`Annotation` object using the '+' operator.
301
+ Single :class:`Feature` instances can be added this way, too.
302
+ If a feature is present in both :class:`Annotation` objects, the
303
+ resulting :class:`Annotation` will contain this feature twice.
304
+
305
+ Parameters
306
+ ----------
307
+ features : iterable object of Feature, optional
308
+ The features to create the :class:`Annotation` from. if not
309
+ provided, an empty :class:`Annotation` is created.
310
+
311
+ Examples
312
+ --------
313
+ Creating an annotation from a feature list:
314
+
315
+ >>> feature1 = Feature("CDS", [Location(-10, 30 )], qual={"gene" : "test1"})
316
+ >>> feature2 = Feature("CDS", [Location(20, 50 )], qual={"gene" : "test2"})
317
+ >>> annotation = Annotation([feature1, feature2])
318
+ >>> for f in sorted(list(annotation)):
319
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
320
+ test1 -10-30 >
321
+ test2 20-50 >
322
+
323
+ Merging two annotations and a feature:
324
+
325
+ >>> feature3 = Feature("CDS", [Location(100, 130 )], qual={"gene" : "test3"})
326
+ >>> feature4 = Feature("CDS", [Location(150, 250 )], qual={"gene" : "test4"})
327
+ >>> annotation2 = Annotation([feature3, feature4])
328
+ >>> feature5 = Feature("CDS", [Location(-50, 200 )], qual={"gene" : "test5"})
329
+ >>> annotation = annotation + annotation2 + feature5
330
+ >>> for f in sorted(list(annotation)):
331
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
332
+ test5 -50-200 >
333
+ test1 -10-30 >
334
+ test2 20-50 >
335
+ test3 100-130 >
336
+ test4 150-250 >
337
+
338
+ Location based indexing, note the defects:
339
+
340
+ >>> annotation = annotation[40:150]
341
+ >>> for f in sorted(list(annotation)):
342
+ ... gene = f.qual["gene"]
343
+ ... loc_str = "".join([f"{loc} {loc.defect}" for loc in f.locs])
344
+ ... print(gene, loc_str)
345
+ test5 40-149 > Defect.MISS_LEFT|MISS_RIGHT
346
+ test2 40-50 > Defect.MISS_LEFT
347
+ test3 100-130 > Defect.NONE
348
+ """
349
+
350
+ def __init__(self, features=None):
351
+ if features is None:
352
+ self._features = set()
353
+ else:
354
+ self._features = set(features)
355
+
356
+ def __repr__(self):
357
+ """Represent Annotation as a string for debugging."""
358
+ return (
359
+ f'Annotation([{", ".join([feat.__repr__() for feat in self._features])}])'
360
+ )
361
+
362
+ def __copy_create__(self):
363
+ return Annotation(self._features)
364
+
365
+ def get_features(self):
366
+ """
367
+ Get a copy of the internal feature set.
368
+
369
+ Returns
370
+ -------
371
+ feature_list : list of Feature
372
+ A copy of the internal feature set.
373
+ """
374
+ return copy.copy(self._features)
375
+
376
+ def add_feature(self, feature):
377
+ """
378
+ Add a feature to the annotation.
379
+
380
+ Parameters
381
+ ----------
382
+ feature : Feature
383
+ Feature to be added.
384
+ """
385
+ if not isinstance(feature, Feature):
386
+ raise TypeError(
387
+ f"Only 'Feature' objects are supported, "
388
+ f"not {type(feature).__name__}"
389
+ )
390
+ self._features.add(feature)
391
+
392
+ def get_location_range(self):
393
+ """
394
+ Get the range of feature locations,
395
+ i.e. the first and exclusive last base/residue.
396
+
397
+ Returns
398
+ -------
399
+ int : start
400
+ Start location.
401
+ int : stop
402
+ Exclusive stop location.
403
+ """
404
+ first = sys.maxsize
405
+ last = -sys.maxsize
406
+ for feature in self._features:
407
+ for loc in feature.locs:
408
+ if loc.first < first:
409
+ first = loc.first
410
+ if loc.last > last:
411
+ last = loc.last
412
+ # Exclusive stop -> +1
413
+ return first, last + 1
414
+
415
+ def del_feature(self, feature):
416
+ """
417
+ Delete a feature from the annotation.
418
+
419
+ Parameters
420
+ ----------
421
+ feature : Feature
422
+ Feature to be removed.
423
+
424
+ Raises
425
+ ------
426
+ KeyError
427
+ If the feature is not in the annotation
428
+ """
429
+ self._features.remove(feature)
430
+
431
+ def __add__(self, item):
432
+ if isinstance(item, Annotation):
433
+ return Annotation(self._features | item._features)
434
+ elif isinstance(item, Feature):
435
+ return Annotation(self._features | set([item]))
436
+ else:
437
+ raise TypeError(
438
+ f"Only 'Feature' and 'Annotation' objects are supported, "
439
+ f"not {type(item).__name__}"
440
+ )
441
+
442
+ def __iadd__(self, item):
443
+ if isinstance(item, Annotation):
444
+ self._features |= item._features
445
+ elif isinstance(item, Feature):
446
+ self._features.add(item)
447
+ else:
448
+ raise TypeError(
449
+ f"Only 'Feature' and 'Annotation' objects are supported, "
450
+ f"not {type(item).__name__}"
451
+ )
452
+ return self
453
+
454
+ def __getitem__(self, index):
455
+ if isinstance(index, slice):
456
+ # If no start or stop index is given, include all
457
+ if index.start is None:
458
+ i_first = -sys.maxsize
459
+ else:
460
+ i_first = index.start
461
+ if index.stop is None:
462
+ i_last = sys.maxsize
463
+ else:
464
+ i_last = index.stop - 1
465
+
466
+ sub_annot = Annotation()
467
+ for feature in self:
468
+ locs_in_scope = []
469
+ for loc in feature.locs:
470
+ # Always true for maxsize values
471
+ # in case no start or stop index is given
472
+ if loc.first <= i_last and loc.last >= i_first:
473
+ # The location is at least partly in the
474
+ # given location range
475
+ # Handle defects
476
+ first = loc.first
477
+ last = loc.last
478
+ defect = loc.defect
479
+ if loc.first < i_first:
480
+ defect |= Location.Defect.MISS_LEFT
481
+ first = i_first
482
+ if loc.last > i_last:
483
+ defect |= Location.Defect.MISS_RIGHT
484
+ last = i_last
485
+ locs_in_scope.append(Location(first, last, loc.strand, defect))
486
+ if len(locs_in_scope) > 0:
487
+ # The feature is present in the new annotation
488
+ # if any of the original locations is in the new
489
+ # scope
490
+ new_feature = Feature(
491
+ key=feature.key, locs=locs_in_scope, qual=feature.qual
492
+ )
493
+ sub_annot.add_feature(new_feature)
494
+ return sub_annot
495
+ else:
496
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
497
+
498
+ def __delitem__(self, item):
499
+ if not isinstance(item, Feature):
500
+ raise TypeError(
501
+ f"Only 'Feature' objects are supported, " f"not {type(item).__name__}"
502
+ )
503
+ self.del_feature(item)
504
+
505
+ def __iter__(self):
506
+ return self._features.__iter__()
507
+
508
+ def __contains__(self, item):
509
+ return item in self._features
510
+
511
+ def __eq__(self, item):
512
+ if not isinstance(item, Annotation):
513
+ return False
514
+ return self._features == item._features
515
+
516
+ def __len__(self):
517
+ return len(self._features)
518
+
519
+
520
+ class AnnotatedSequence(Copyable):
521
+ """
522
+ An :class:`AnnotatedSequence` is a combination of a
523
+ :class:`Sequence` and an :class:`Annotation`.
524
+
525
+ Indexing an :class:`AnnotatedSequence` with a slice returns another
526
+ :class:`AnnotatedSequence` with the corresponding subannotation and
527
+ a sequence start corrected subsequence, i.e. indexing starts at 1
528
+ with the default sequence start 1.
529
+ The sequence start in the newly created :class:`AnnotatedSequence`
530
+ is the start of the slice.
531
+ Furthermore, integer indices are allowed in which case the
532
+ corresponding symbol of the sequence is returned (also sequence
533
+ start corrected).
534
+ In both cases the index must be in range of the sequence, e.g. if
535
+ sequence start is 1, index 0 is not allowed.
536
+ Negative indices do not mean indexing from the end of the sequence,
537
+ in contrast to the behavior in :class:`Sequence` objects.
538
+ Both index types can also be used to modify the sequence.
539
+
540
+ Another option is indexing with a :class:`Feature` (preferably from the
541
+ :class:`Annotation` in the same :class:`AnnotatedSequence`).
542
+ In this case a sequence, described by the location(s) of the
543
+ :class:`Feature`, is returned.
544
+ When using a :class:`Feature` for setting an
545
+ :class:`AnnotatedSequence` with a sequence, the new sequence is
546
+ replacing the locations of the
547
+ :class:`Feature`.
548
+ Note the the replacing sequence must have the same length as the
549
+ sequence of the :class:`Feature` index.
550
+
551
+ Parameters
552
+ ----------
553
+ sequence : Sequence
554
+ The sequence.
555
+ Usually a :class:`NucleotideSequence` or
556
+ :class:`ProteinSequence`.
557
+ annotation : Annotation
558
+ The annotation corresponding to `sequence`.
559
+ sequence_start : int, optional
560
+ By default, the first symbol of the sequence is corresponding
561
+ to location 1 of the features in the annotation. The location
562
+ of the first symbol can be changed by setting this parameter.
563
+ Negative values are not supported yet.
564
+
565
+ Attributes
566
+ ----------
567
+ sequence : Sequence
568
+ The represented sequence.
569
+ annotation : Annotation
570
+ The annotation corresponding to `sequence`.
571
+ sequence_start : int
572
+ The location of the first symbol in the sequence.
573
+
574
+ See also
575
+ --------
576
+ Annotation, Sequence
577
+
578
+ Examples
579
+ --------
580
+ Creating an annotated sequence
581
+
582
+ >>> sequence = NucleotideSequence("ATGGCGTACGATTAGAAAAAAA")
583
+ >>> feature1 = Feature("misc_feature", [Location(1,2), Location(11,12)],
584
+ ... {"note" : "walker"})
585
+ >>> feature2 = Feature("misc_feature", [Location(16,22)], {"note" : "poly-A"})
586
+ >>> annotation = Annotation([feature1, feature2])
587
+ >>> annot_seq = AnnotatedSequence(annotation, sequence)
588
+ >>> print(annot_seq.sequence)
589
+ ATGGCGTACGATTAGAAAAAAA
590
+ >>> for f in sorted(list(annot_seq.annotation)):
591
+ ... print(f.qual["note"])
592
+ walker
593
+ poly-A
594
+
595
+ Indexing with integers, note the sequence start correction
596
+
597
+ >>> print(annot_seq[2])
598
+ T
599
+ >>> print(annot_seq.sequence[2])
600
+ G
601
+
602
+ indexing with slices
603
+
604
+ >>> annot_seq2 = annot_seq[:16]
605
+ >>> print(annot_seq2.sequence)
606
+ ATGGCGTACGATTAG
607
+ >>> for f in annot_seq2.annotation:
608
+ ... print(f.qual["note"])
609
+ walker
610
+
611
+ Indexing with features
612
+
613
+ >>> print(annot_seq[feature1])
614
+ ATAT
615
+ >>> print(annot_seq[feature2])
616
+ AAAAAAA
617
+ >>> print(annot_seq.sequence)
618
+ ATGGCGTACGATTAGAAAAAAA
619
+ >>> annot_seq[feature1] = NucleotideSequence("CCCC")
620
+ >>> print(annot_seq.sequence)
621
+ CCGGCGTACGCCTAGAAAAAAA
622
+ """
623
+
624
+ def __init__(self, annotation, sequence, sequence_start=1):
625
+ self._annotation = annotation
626
+ self._sequence = sequence
627
+ self._seqstart = sequence_start
628
+
629
+ def __repr__(self):
630
+ """Represent AnnotatedSequence as a string for debugging."""
631
+ return (
632
+ f"AnnotatedSequence({self._annotation.__repr__()}, {self._sequence.__repr__()}, "
633
+ f"sequence_start={self._seqstart})"
634
+ )
635
+
636
+ @property
637
+ def sequence_start(self):
638
+ return self._seqstart
639
+
640
+ @property
641
+ def sequence(self):
642
+ return self._sequence
643
+
644
+ @property
645
+ def annotation(self):
646
+ return self._annotation
647
+
648
+ def __copy_create__(self):
649
+ return AnnotatedSequence(
650
+ self._annotation.copy(), self._sequence.copy, self._seqstart
651
+ )
652
+
653
+ def reverse_complement(self, sequence_start=1):
654
+ """
655
+ Create the reverse complement of the annotated sequence.
656
+
657
+ This method accurately converts the position and the strand of
658
+ the annotation.
659
+ The information on the sequence start is lost.
660
+
661
+ Parameters
662
+ ----------
663
+ sequence_start : int, optional
664
+ The location of the first symbol in the reverse complement
665
+ sequence.
666
+
667
+ Returns
668
+ -------
669
+ The reverse complement of the annotated sequence.
670
+ """
671
+ rev_seqstart = sequence_start
672
+
673
+ rev_sequence = self._sequence.reverse().complement()
674
+
675
+ seq_len = len(self._sequence)
676
+ rev_features = []
677
+ for feature in self._annotation:
678
+ rev_locs = []
679
+ for loc in feature.locs:
680
+ # Transform location to the reverse complement strand
681
+ # (seq_len-1) -> last sequence index
682
+ # (loc.last-self._seqstart) -> location to index
683
+ # ... + rev_seqstart -> index to location
684
+ rev_loc_first = (
685
+ (seq_len - 1) - (loc.last - self._seqstart) + rev_seqstart
686
+ )
687
+ rev_loc_last = (
688
+ (seq_len - 1) - (loc.first - self._seqstart) + rev_seqstart
689
+ )
690
+
691
+ if loc.strand == Location.Strand.FORWARD:
692
+ rev_loc_strand = Location.Strand.REVERSE
693
+ else:
694
+ rev_loc_strand = Location.Strand.FORWARD
695
+
696
+ rev_loc_defect = Location.Defect.NONE
697
+ if loc.defect & Location.Defect.MISS_LEFT:
698
+ rev_loc_defect |= Location.Defect.MISS_RIGHT
699
+ if loc.defect & Location.Defect.MISS_RIGHT:
700
+ rev_loc_defect |= Location.Defect.MISS_LEFT
701
+ if loc.defect & Location.Defect.BEYOND_RIGHT:
702
+ rev_loc_defect |= Location.Defect.BEYOND_LEFT
703
+ if loc.defect & Location.Defect.BEYOND_LEFT:
704
+ rev_loc_defect |= Location.Defect.BEYOND_RIGHT
705
+ if loc.defect & Location.Defect.UNK_LOC:
706
+ rev_loc_defect |= Location.Defect.UNK_LOC
707
+ if loc.defect & Location.Defect.BETWEEN:
708
+ rev_loc_defect |= Location.Defect.BETWEEN
709
+
710
+ rev_locs.append(
711
+ Location(
712
+ rev_loc_first, rev_loc_last, rev_loc_strand, rev_loc_defect
713
+ )
714
+ )
715
+ rev_features.append(Feature(feature.key, rev_locs, feature.qual))
716
+
717
+ return AnnotatedSequence(Annotation(rev_features), rev_sequence, rev_seqstart)
718
+
719
+ def __getitem__(self, index):
720
+ if isinstance(index, Feature):
721
+ # Concatenate subsequences for each location of the feature
722
+ locs = index.locs
723
+ if len(locs) == 0:
724
+ raise ValueError("Feature does not contain any locations")
725
+ # Start by creating an empty sequence
726
+ sub_seq = self._sequence.copy(new_seq_code=np.array([]))
727
+ # Locations need to be sorted, as otherwise the locations
728
+ # chunks would be merged in the wrong order
729
+ # The order depends on whether the locs are on the forward
730
+ # or reverse strand
731
+ strand = None
732
+ for loc in locs:
733
+ if loc.strand == strand:
734
+ pass
735
+ elif strand is None:
736
+ strand = loc.strand
737
+ else: # loc.strand != strand
738
+ raise ValueError(
739
+ "All locations of the feature must have the same "
740
+ "strand direction"
741
+ )
742
+ if strand == Location.Strand.FORWARD:
743
+ sorted_locs = sorted(locs, key=lambda loc: loc.first)
744
+ else:
745
+ sorted_locs = sorted(locs, key=lambda loc: loc.last, reverse=True)
746
+ # Merge the sequences corresponding to the ordered locations
747
+ for loc in sorted_locs:
748
+ slice_start = loc.first - self._seqstart
749
+ # +1 due to exclusive stop
750
+ slice_stop = loc.last - self._seqstart + 1
751
+ add_seq = self._sequence[slice_start:slice_stop]
752
+ if loc.strand == Location.Strand.REVERSE:
753
+ add_seq = add_seq.reverse().complement()
754
+ sub_seq += add_seq
755
+ return sub_seq
756
+
757
+ elif isinstance(index, slice):
758
+ # Sequence start correction
759
+ if index.start is None:
760
+ seq_start = 0
761
+ else:
762
+ if index.start < self._seqstart:
763
+ raise IndexError(
764
+ f"The start of the index ({index.start}) is lower "
765
+ f"than the start of the sequence ({self._seqstart})"
766
+ )
767
+ seq_start = index.start - self._seqstart
768
+ if index.stop is None:
769
+ seq_stop = len(self._sequence)
770
+ index = slice(index.start, seq_stop, index.step)
771
+ else:
772
+ seq_stop = index.stop - self._seqstart
773
+ # New value for the sequence start, value is base position
774
+ if index.start is None:
775
+ rel_seq_start = self._seqstart
776
+ else:
777
+ rel_seq_start = index.start
778
+ return AnnotatedSequence(
779
+ self._annotation[index],
780
+ self._sequence[seq_start:seq_stop],
781
+ rel_seq_start,
782
+ )
783
+
784
+ elif isinstance(index, numbers.Integral):
785
+ return self._sequence[index - self._seqstart]
786
+
787
+ else:
788
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
789
+
790
+ def __setitem__(self, index, item):
791
+ if isinstance(index, Feature):
792
+ # Item must be sequence
793
+ # with length equal to sum of location lengths
794
+ sub_seq = item
795
+ sub_seq_i = 0
796
+ for loc in index.locs:
797
+ slice_start = loc.first - self._seqstart
798
+ # +1 due to exclusive stop
799
+ slice_stop = loc.last - self._seqstart + 1
800
+ interval_size = slice_stop - slice_start
801
+ self._sequence[slice_start:slice_stop] = sub_seq[
802
+ sub_seq_i : sub_seq_i + interval_size
803
+ ]
804
+ sub_seq_i += interval_size
805
+ elif isinstance(index, slice):
806
+ # Sequence start correction
807
+ if index.start is None:
808
+ seq_start = 0
809
+ else:
810
+ seq_start = index.start - self._seqstart
811
+ if index.stop is None:
812
+ seq_stop = len(self._sequence)
813
+ else:
814
+ seq_stop = index.stop - self._seqstart
815
+ # Item is a Sequence
816
+ self._sequence[seq_start:seq_stop] = item
817
+ elif isinstance(index, numbers.Integral):
818
+ # Item is a symbol
819
+ self._sequence[index - self._seqstart] = item
820
+ else:
821
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
822
+
823
+ def __eq__(self, item):
824
+ if not isinstance(item, AnnotatedSequence):
825
+ return False
826
+ return (
827
+ self.annotation == item.annotation
828
+ and self.sequence == item.sequence
829
+ and self._seqstart == item._seqstart
830
+ )