biotite 0.41.1__cp311-cp311-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-311-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,829 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Location", "Feature", "Annotation", "AnnotatedSequence"]
8
+
9
+ import numbers
10
+ import copy
11
+ import sys
12
+ from enum import Flag, Enum, auto
13
+ import numpy as np
14
+ from .sequence import Sequence
15
+ from ..copyable import Copyable
16
+ from .seqtypes import NucleotideSequence
17
+
18
+
19
+ class Location():
20
+ """
21
+ A :class:`Location` defines at which base(s)/residue(s) a feature is
22
+ located.
23
+
24
+ A feature can have multiple :class:`Location` instances if multiple
25
+ locations are joined.
26
+
27
+ Objects of this class are immutable.
28
+
29
+ Attributes
30
+ ----------
31
+ first : int
32
+ Starting base or residue position of the feature.
33
+ last : int
34
+ Inclusive ending base or residue position of the feature.
35
+ strand : Strand
36
+ The strand direction.
37
+ Always :attr:`Strand.FORWARD` for peptide features.
38
+ defect : Defect
39
+ A possible defect of the location.
40
+ """
41
+
42
+ class Defect(Flag):
43
+ """
44
+ This enum type describes location defects.
45
+
46
+ A location has a defect, when the feature itself is not directly
47
+ located in the range of the first to the last base.
48
+
49
+ - **NONE** - No location defect
50
+ - **MISS_LEFT** - A part of the feature has been truncated
51
+ before the first base/residue of the :class:`Location`
52
+ (probably by indexing an :class:`Annotation` object)
53
+ - **MISS_RIGHT** - A part of the feature has been truncated
54
+ after the last base/residue of the :class:`Location`
55
+ (probably by indexing an :class:`Annotation` object)
56
+ - **BEYOND_LEFT** - The feature starts at an unknown position
57
+ before the first base/residue of the :class:`Location`
58
+ - **BEYOND_RIGHT** - The feature ends at an unknown position
59
+ after the last base/residue of the :class:`Location`
60
+ - **UNK_LOC** - The exact position is unknown, but it is at a
61
+ single base/residue between the first and last residue of
62
+ the :class:`Location`, inclusive
63
+ - **BETWEEN** - The position is between to consecutive
64
+ bases/residues.
65
+ """
66
+ NONE = 0
67
+ MISS_LEFT = auto()
68
+ MISS_RIGHT = auto()
69
+ BEYOND_LEFT = auto()
70
+ BEYOND_RIGHT = auto()
71
+ UNK_LOC = auto()
72
+ BETWEEN = auto()
73
+
74
+ class Strand(Enum):
75
+ """
76
+ This enum type describes the strand of the feature location.
77
+ This is not relevant for protein sequence features.
78
+ """
79
+ FORWARD = auto()
80
+ REVERSE = auto()
81
+
82
+ def __init__(self, first, last, strand=Strand.FORWARD,
83
+ defect=Defect.NONE):
84
+ if first > last:
85
+ raise ValueError(
86
+ "The first position cannot be higher than the last position"
87
+ )
88
+ self._first = first
89
+ self._last = last
90
+ self._strand = strand
91
+ self._defect = defect
92
+
93
+ def __repr__(self):
94
+ """Represent Location as a string for debugging."""
95
+ return f'Location({self._first}, {self._last}, strand={"Location." + str(self._strand)}, ' \
96
+ f'defect={"Location." + str(self._defect)})'
97
+
98
+ @property
99
+ def first(self):
100
+ return self._first
101
+
102
+ @property
103
+ def last(self):
104
+ return self._last
105
+
106
+ @property
107
+ def strand(self):
108
+ return self._strand
109
+
110
+ @property
111
+ def defect(self):
112
+ return self._defect
113
+
114
+ def __str__(self):
115
+ string = "{:d}-{:d}".format(self.first, self.last)
116
+ if self.strand == Location.Strand.FORWARD:
117
+ string = string + " >"
118
+ else:
119
+ string = "< " + string
120
+ return string
121
+
122
+ def __eq__(self, item):
123
+ if not isinstance(item, Location):
124
+ return False
125
+ return ( self.first == item.first
126
+ and self.last == item.last
127
+ and self.strand == item.strand
128
+ and self.defect == item.defect)
129
+
130
+ def __hash__(self):
131
+ return hash((self._first, self._last, self._strand, self._defect))
132
+
133
+
134
+ class Feature(Copyable):
135
+ """
136
+ This class represents a single sequence feature, for example from a
137
+ GenBank feature table.
138
+ A feature describes a functional part of a sequence.
139
+ It consists of a feature key, describing the general class of the
140
+ feature, at least one location, describing its position on the
141
+ reference, and qualifiers, describing the feature in detail.
142
+
143
+ Objects of this class are immutable.
144
+
145
+ Parameters
146
+ ----------
147
+ key : str
148
+ The name of the feature class, e.g. *gene*, *CDS* or
149
+ *regulatory*.
150
+ locs : iterable object of Location
151
+ A list of feature locations. In most cases this list will only
152
+ contain one location, but multiple ones are also possible for
153
+ example in eukaryotic CDS (due to splicing).
154
+ qual : dict, optional
155
+ Maps feature qualifiers to their corresponding values.
156
+ The keys are always strings. A value is either a string or
157
+ ``None`` if the qualifier key do not has a value.
158
+ If key has multiple values, the values are separated by a
159
+ line break.
160
+
161
+ Attributes
162
+ ----------
163
+ key : str
164
+ The name of the feature class, e.g. *gene*, *CDS* or
165
+ *regulatory*.
166
+ locs : iterable object of Location
167
+ A list of feature locations. In most cases this list will only
168
+ contain one location, but multiple ones are also possible for
169
+ example in eukaryotic CDS (due to splicing).
170
+ qual : dict
171
+ Maps feature qualifiers to their corresponding values.
172
+ The keys are always strings. A value is either a string or
173
+ ``None`` if the qualifier key do not has a value.
174
+ If key has multiple values, the values are separated by a
175
+ line break.
176
+ """
177
+
178
+ def __init__(self, key, locs, qual=None):
179
+ self._key = key
180
+ if len(locs) == 0:
181
+ raise ValueError("A feature must have at least one location")
182
+ self._locs = frozenset(locs)
183
+ self._qual = copy.deepcopy(qual) if qual is not None else {}
184
+
185
+ def __repr__(self):
186
+ """Represent Feature as a string for debugging."""
187
+ return f'Feature("{self._key}", [{", ".join([loc.__repr__() for loc in self.locs])}], qual={self._qual})'
188
+
189
+ def get_location_range(self):
190
+ """
191
+ Get the minimum first base/residue and maximum last base/residue
192
+ of all feature locations.
193
+
194
+ This can be used to create a location, that spans all of the
195
+ feature's locations.
196
+
197
+ Returns
198
+ -------
199
+ first : int
200
+ The minimum first base/residue of all locations.
201
+ last : int
202
+ The maximum last base/residue of all locations.
203
+ """
204
+ first = np.min([loc.first for loc in self._locs])
205
+ last = np.max([loc.last for loc in self._locs])
206
+ return first, last
207
+
208
+ def __eq__(self, item):
209
+ if not isinstance(item, Feature):
210
+ return False
211
+ return ( self._key == item._key
212
+ and self._locs == item._locs
213
+ and self._qual == item._qual)
214
+
215
+ def __lt__(self, item):
216
+ if not isinstance(item, Feature):
217
+ return False
218
+ first, last = self.get_location_range()
219
+ it_first, it_last = item.get_location_range()
220
+ # The first base/residue is most significant,
221
+ # if it is equal for both features, look at last base/residue
222
+ if first < it_first:
223
+ return True
224
+ elif first > it_first:
225
+ return False
226
+ else: # First is equal
227
+ return last > it_last
228
+
229
+ def __gt__(self, item):
230
+ if not isinstance(item, Feature):
231
+ return False
232
+ first, last = self.get_location_range()
233
+ it_first, it_last = item.get_location_range()
234
+ # The first base/residue is most significant,
235
+ # if it is equal for both features, look at last base/residue
236
+ if first > it_first:
237
+ return True
238
+ elif first < it_first:
239
+ return False
240
+ else: # First is equal
241
+ return last < it_last
242
+
243
+ @property
244
+ def key(self):
245
+ return self._key
246
+
247
+ @property
248
+ def locs(self):
249
+ return copy.copy(self._locs)
250
+
251
+ @property
252
+ def qual(self):
253
+ return copy.copy(self._qual)
254
+
255
+ def __hash__(self):
256
+ return hash(( self._key, self._locs, frozenset(self._qual.items()) ))
257
+
258
+
259
+ class Annotation(Copyable):
260
+ """
261
+ An :class:`Annotation` is a set of features belonging to one
262
+ sequence.
263
+
264
+ Its advantage over a simple list is the base/residue position based
265
+ indexing:
266
+ When using slice indices in Annotation objects, a subannotation is
267
+ created, containing copies of all :class:`Feature` objects whose
268
+ first and last base/residue are in range of the slice.
269
+ If the slice starts after the first base/residue or/and the slice
270
+ ends before the last residue, the position out of range is set to
271
+ the boundaries of the slice (the :class:`Feature` is truncated).
272
+ In this case the :class:`Feature` obtains the
273
+ :attr:`Location.Defect.MISS_LEFT` and/or
274
+ :attr:`Location.Defect.MISS_RIGHT` defect.
275
+ The third case occurs when a :class:`Feature` starts after the slice
276
+ ends or a :class:`Feature` ends before the slice starts.
277
+ In this case the :class:`Feature` will not appear in the
278
+ subannotation.
279
+
280
+ The start or stop position in the slice indices can be omitted, then
281
+ the subannotation will include all features from the start or up to
282
+ the stop, respectively. Step values are ignored.
283
+ The stop values are still exclusive, i.e. the subannotation will
284
+ contain a not truncated :class:`Feature` only if its last
285
+ base/residue is smaller than the stop value of the slice.
286
+
287
+ Integers or other index types are not supported. If you want to
288
+ obtain the :class:`Feature` instances from the :class:`Annotation`
289
+ you need to iterate over it.
290
+ The iteration has no defined order.
291
+ Alternatively, you can obtain a copy of the internal
292
+ :class:`Feature` set via :func:`get_features()`.
293
+
294
+ Multiple :class:`Annotation` objects can be concatenated to one
295
+ :class:`Annotation` object using the '+' operator.
296
+ Single :class:`Feature` instances can be added this way, too.
297
+ If a feature is present in both :class:`Annotation` objects, the
298
+ resulting :class:`Annotation` will contain this feature twice.
299
+
300
+ Parameters
301
+ ----------
302
+ features : iterable object of Feature, optional
303
+ The features to create the :class:`Annotation` from. if not
304
+ provided, an empty :class:`Annotation` is created.
305
+
306
+ Examples
307
+ --------
308
+ Creating an annotation from a feature list:
309
+
310
+ >>> feature1 = Feature("CDS", [Location(-10, 30 )], qual={"gene" : "test1"})
311
+ >>> feature2 = Feature("CDS", [Location(20, 50 )], qual={"gene" : "test2"})
312
+ >>> annotation = Annotation([feature1, feature2])
313
+ >>> for f in sorted(list(annotation)):
314
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
315
+ test1 -10-30 >
316
+ test2 20-50 >
317
+
318
+ Merging two annotations and a feature:
319
+
320
+ >>> feature3 = Feature("CDS", [Location(100, 130 )], qual={"gene" : "test3"})
321
+ >>> feature4 = Feature("CDS", [Location(150, 250 )], qual={"gene" : "test4"})
322
+ >>> annotation2 = Annotation([feature3, feature4])
323
+ >>> feature5 = Feature("CDS", [Location(-50, 200 )], qual={"gene" : "test5"})
324
+ >>> annotation = annotation + annotation2 + feature5
325
+ >>> for f in sorted(list(annotation)):
326
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
327
+ test5 -50-200 >
328
+ test1 -10-30 >
329
+ test2 20-50 >
330
+ test3 100-130 >
331
+ test4 150-250 >
332
+
333
+ Location based indexing, note the defects:
334
+
335
+ >>> annotation = annotation[40:150]
336
+ >>> for f in sorted(list(annotation)):
337
+ ... gene = f.qual["gene"]
338
+ ... loc_str = "".join([f"{loc} {loc.defect}" for loc in f.locs])
339
+ ... print(gene, loc_str)
340
+ test5 40-149 > Defect.MISS_RIGHT|MISS_LEFT
341
+ test2 40-50 > Defect.MISS_LEFT
342
+ test3 100-130 > Defect.NONE
343
+ """
344
+
345
+ def __init__(self, features=None):
346
+ if features is None:
347
+ self._features = set()
348
+ else:
349
+ self._features = set(features)
350
+
351
+ def __repr__(self):
352
+ """Represent Annotation as a string for debugging."""
353
+ return f'Annotation([{", ".join([feat.__repr__() for feat in self._features])}])'
354
+
355
+ def __copy_create__(self):
356
+ return Annotation(self._features)
357
+
358
+ def get_features(self):
359
+ """
360
+ Get a copy of the internal feature set.
361
+
362
+ Returns
363
+ -------
364
+ feature_list : list of Feature
365
+ A copy of the internal feature set.
366
+ """
367
+ return copy.copy(self._features)
368
+
369
+ def add_feature(self, feature):
370
+ """
371
+ Add a feature to the annotation.
372
+
373
+ Parameters
374
+ ----------
375
+ feature : Feature
376
+ Feature to be added.
377
+ """
378
+ if not isinstance(feature, Feature):
379
+ raise TypeError(
380
+ f"Only 'Feature' objects are supported, "
381
+ f"not {type(feature).__name__}"
382
+ )
383
+ self._features.add(feature)
384
+
385
+ def get_location_range(self):
386
+ """
387
+ Get the range of feature locations,
388
+ i.e. the first and exclusive last base/residue.
389
+
390
+ Returns
391
+ -------
392
+ int : start
393
+ Start location.
394
+ int : stop
395
+ Exclusive stop location.
396
+ """
397
+ first = sys.maxsize
398
+ last = -sys.maxsize
399
+ for feature in self._features:
400
+ for loc in feature.locs:
401
+ if loc.first < first:
402
+ first = loc.first
403
+ if loc.last > last:
404
+ last = loc.last
405
+ # Exclusive stop -> +1
406
+ return first, last+1
407
+
408
+ def del_feature(self, feature):
409
+ """
410
+ Delete a feature from the annotation.
411
+
412
+ Parameters
413
+ ----------
414
+ feature : Feature
415
+ Feature to be removed.
416
+
417
+ Raises
418
+ ------
419
+ KeyError
420
+ If the feature is not in the annotation
421
+ """
422
+ self._features.remove(feature)
423
+
424
+ def __add__(self, item):
425
+ if isinstance(item, Annotation):
426
+ return Annotation(self._features | item._features)
427
+ elif isinstance(item, Feature):
428
+ return Annotation(self._features | set([item]))
429
+ else:
430
+ raise TypeError(
431
+ f"Only 'Feature' and 'Annotation' objects are supported, "
432
+ f"not {type(item).__name__}"
433
+ )
434
+
435
+ def __iadd__(self, item):
436
+ if isinstance(item, Annotation):
437
+ self._features |= item._features
438
+ elif isinstance(item, Feature):
439
+ self._features.add(item)
440
+ else:
441
+ raise TypeError(
442
+ f"Only 'Feature' and 'Annotation' objects are supported, "
443
+ f"not {type(item).__name__}"
444
+ )
445
+ return self
446
+
447
+ def __getitem__(self, index):
448
+ if isinstance(index, slice):
449
+ # If no start or stop index is given, include all
450
+ if index.start is None:
451
+ i_first = -sys.maxsize
452
+ else:
453
+ i_first = index.start
454
+ if index.stop is None:
455
+ i_last = sys.maxsize
456
+ else:
457
+ i_last = index.stop - 1
458
+
459
+ sub_annot = Annotation()
460
+ for feature in self:
461
+ locs_in_scope = []
462
+ for loc in feature.locs:
463
+ # Always true for maxsize values
464
+ # in case no start or stop index is given
465
+ if loc.first <= i_last and loc.last >= i_first:
466
+ # The location is at least partly in the
467
+ # given location range
468
+ # Handle defects
469
+ first = loc.first
470
+ last = loc.last
471
+ defect = loc.defect
472
+ if loc.first < i_first:
473
+ defect |= Location.Defect.MISS_LEFT
474
+ first = i_first
475
+ if loc.last > i_last:
476
+ defect |= Location.Defect.MISS_RIGHT
477
+ last = i_last
478
+ locs_in_scope.append(Location(
479
+ first, last, loc.strand, defect
480
+ ))
481
+ if len(locs_in_scope) > 0:
482
+ # The feature is present in the new annotation
483
+ # if any of the original locations is in the new
484
+ # scope
485
+ new_feature = Feature(
486
+ key=feature.key, locs=locs_in_scope, qual=feature.qual
487
+ )
488
+ sub_annot.add_feature(new_feature)
489
+ return sub_annot
490
+ else:
491
+ raise TypeError(
492
+ f"'{type(index).__name__}' instances are invalid indices"
493
+ )
494
+
495
+ def __delitem__(self, item):
496
+ if not isinstance(item, Feature):
497
+ raise TypeError(
498
+ f"Only 'Feature' objects are supported, "
499
+ f"not {type(item).__name__}"
500
+ )
501
+ self.del_feature(item)
502
+
503
+ def __iter__(self):
504
+ return self._features.__iter__()
505
+
506
+ def __contains__(self, item):
507
+ return item in self._features
508
+
509
+ def __eq__(self, item):
510
+ if not isinstance(item, Annotation):
511
+ return False
512
+ return self._features == item._features
513
+
514
+ def __len__(self):
515
+ return len(self._features)
516
+
517
+
518
+ class AnnotatedSequence(Copyable):
519
+ """
520
+ An :class:`AnnotatedSequence` is a combination of a
521
+ :class:`Sequence` and an :class:`Annotation`.
522
+
523
+ Indexing an :class:`AnnotatedSequence` with a slice returns another
524
+ :class:`AnnotatedSequence` with the corresponding subannotation and
525
+ a sequence start corrected subsequence, i.e. indexing starts at 1
526
+ with the default sequence start 1.
527
+ The sequence start in the newly created :class:`AnnotatedSequence`
528
+ is the start of the slice.
529
+ Furthermore, integer indices are allowed in which case the
530
+ corresponding symbol of the sequence is returned (also sequence
531
+ start corrected).
532
+ In both cases the index must be in range of the sequence, e.g. if
533
+ sequence start is 1, index 0 is not allowed.
534
+ Negative indices do not mean indexing from the end of the sequence,
535
+ in contrast to the behavior in :class:`Sequence` objects.
536
+ Both index types can also be used to modify the sequence.
537
+
538
+ Another option is indexing with a :class:`Feature` (preferably from the
539
+ :class:`Annotation` in the same :class:`AnnotatedSequence`).
540
+ In this case a sequence, described by the location(s) of the
541
+ :class:`Feature`, is returned.
542
+ When using a :class:`Feature` for setting an
543
+ :class:`AnnotatedSequence` with a sequence, the new sequence is
544
+ replacing the locations of the
545
+ :class:`Feature`.
546
+ Note the the replacing sequence must have the same length as the
547
+ sequence of the :class:`Feature` index.
548
+
549
+ Parameters
550
+ ----------
551
+ sequence : Sequence
552
+ The sequence.
553
+ Usually a :class:`NucleotideSequence` or
554
+ :class:`ProteinSequence`.
555
+ annotation : Annotation
556
+ The annotation corresponding to `sequence`.
557
+ sequence_start : int, optional
558
+ By default, the first symbol of the sequence is corresponding
559
+ to location 1 of the features in the annotation. The location
560
+ of the first symbol can be changed by setting this parameter.
561
+ Negative values are not supported yet.
562
+
563
+ Attributes
564
+ ----------
565
+ sequence : Sequence
566
+ The represented sequence.
567
+ annotation : Annotation
568
+ The annotation corresponding to `sequence`.
569
+ sequence_start : int
570
+ The location of the first symbol in the sequence.
571
+
572
+ See also
573
+ --------
574
+ Annotation, Sequence
575
+
576
+ Examples
577
+ --------
578
+ Creating an annotated sequence
579
+
580
+ >>> sequence = NucleotideSequence("ATGGCGTACGATTAGAAAAAAA")
581
+ >>> feature1 = Feature("misc_feature", [Location(1,2), Location(11,12)],
582
+ ... {"note" : "walker"})
583
+ >>> feature2 = Feature("misc_feature", [Location(16,22)], {"note" : "poly-A"})
584
+ >>> annotation = Annotation([feature1, feature2])
585
+ >>> annot_seq = AnnotatedSequence(annotation, sequence)
586
+ >>> print(annot_seq.sequence)
587
+ ATGGCGTACGATTAGAAAAAAA
588
+ >>> for f in sorted(list(annot_seq.annotation)):
589
+ ... print(f.qual["note"])
590
+ walker
591
+ poly-A
592
+
593
+ Indexing with integers, note the sequence start correction
594
+
595
+ >>> print(annot_seq[2])
596
+ T
597
+ >>> print(annot_seq.sequence[2])
598
+ G
599
+
600
+ indexing with slices
601
+
602
+ >>> annot_seq2 = annot_seq[:16]
603
+ >>> print(annot_seq2.sequence)
604
+ ATGGCGTACGATTAG
605
+ >>> for f in annot_seq2.annotation:
606
+ ... print(f.qual["note"])
607
+ walker
608
+
609
+ Indexing with features
610
+
611
+ >>> print(annot_seq[feature1])
612
+ ATAT
613
+ >>> print(annot_seq[feature2])
614
+ AAAAAAA
615
+ >>> print(annot_seq.sequence)
616
+ ATGGCGTACGATTAGAAAAAAA
617
+ >>> annot_seq[feature1] = NucleotideSequence("CCCC")
618
+ >>> print(annot_seq.sequence)
619
+ CCGGCGTACGCCTAGAAAAAAA
620
+ """
621
+
622
+ def __init__(self, annotation, sequence, sequence_start=1):
623
+ self._annotation = annotation
624
+ self._sequence = sequence
625
+ self._seqstart = sequence_start
626
+
627
+ def __repr__(self):
628
+ """Represent AnnotatedSequence as a string for debugging."""
629
+ return f'AnnotatedSequence({self._annotation.__repr__()}, {self._sequence.__repr__()}, ' \
630
+ f'sequence_start={self._seqstart})'
631
+
632
+ @property
633
+ def sequence_start(self):
634
+ return self._seqstart
635
+
636
+ @property
637
+ def sequence(self):
638
+ return self._sequence
639
+
640
+ @property
641
+ def annotation(self):
642
+ return self._annotation
643
+
644
+ def __copy_create__(self):
645
+ return AnnotatedSequence(
646
+ self._annotation.copy(), self._sequence.copy, self._seqstart)
647
+
648
+ def reverse_complement(self, sequence_start=1):
649
+ """
650
+ Create the reverse complement of the annotated sequence.
651
+
652
+ This method accurately converts the position and the strand of
653
+ the annotation.
654
+ The information on the sequence start is lost.
655
+
656
+ Parameters
657
+ ----------
658
+ sequence_start : int, optional
659
+ The location of the first symbol in the reverse complement
660
+ sequence.
661
+
662
+ Returns
663
+ -------
664
+ The reverse complement of the annotated sequence.
665
+ """
666
+ rev_seqstart = sequence_start
667
+
668
+ rev_sequence = self._sequence.reverse().complement()
669
+
670
+ seq_len = len(self._sequence)
671
+ rev_features = []
672
+ for feature in self._annotation:
673
+ rev_locs = []
674
+ for loc in feature.locs:
675
+ # Transform location to the reverse complement strand
676
+ # (seq_len-1) -> last sequence index
677
+ # (loc.last-self._seqstart) -> location to index
678
+ # ... + rev_seqstart -> index to location
679
+ rev_loc_first \
680
+ = (seq_len-1) - (loc.last-self._seqstart) + rev_seqstart
681
+ rev_loc_last \
682
+ = (seq_len-1) - (loc.first-self._seqstart) + rev_seqstart
683
+
684
+ if loc.strand == Location.Strand.FORWARD:
685
+ rev_loc_strand = Location.Strand.REVERSE
686
+ else:
687
+ rev_loc_strand = Location.Strand.FORWARD
688
+
689
+ rev_loc_defect = Location.Defect.NONE
690
+ if loc.defect & Location.Defect.MISS_LEFT:
691
+ rev_loc_defect |= Location.Defect.MISS_RIGHT
692
+ if loc.defect & Location.Defect.MISS_RIGHT:
693
+ rev_loc_defect |= Location.Defect.MISS_LEFT
694
+ if loc.defect & Location.Defect.BEYOND_RIGHT:
695
+ rev_loc_defect |= Location.Defect.BEYOND_LEFT
696
+ if loc.defect & Location.Defect.BEYOND_LEFT:
697
+ rev_loc_defect |= Location.Defect.BEYOND_RIGHT
698
+ if loc.defect & Location.Defect.UNK_LOC:
699
+ rev_loc_defect |= Location.Defect.UNK_LOC
700
+ if loc.defect & Location.Defect.BETWEEN:
701
+ rev_loc_defect |= Location.Defect.BETWEEN
702
+
703
+ rev_locs.append(Location(
704
+ rev_loc_first, rev_loc_last,
705
+ rev_loc_strand, rev_loc_defect
706
+ ))
707
+ rev_features.append(Feature(
708
+ feature.key, rev_locs, feature.qual
709
+ ))
710
+
711
+ return AnnotatedSequence(
712
+ Annotation(rev_features), rev_sequence, rev_seqstart
713
+ )
714
+
715
+ def __getitem__(self, index):
716
+ if isinstance(index, Feature):
717
+ # Concatenate subsequences for each location of the feature
718
+ locs = index.locs
719
+ if len(locs) == 0:
720
+ raise ValueError("Feature does not contain any locations")
721
+ # Start by creating an empty sequence
722
+ sub_seq = self._sequence.copy(new_seq_code=np.array([]))
723
+ # Locations need to be sorted, as otherwise the locations
724
+ # chunks would be merged in the wrong order
725
+ # The order depends on whether the locs are on the forward
726
+ # or reverse strand
727
+ strand = None
728
+ for loc in locs:
729
+ if loc.strand == strand:
730
+ pass
731
+ elif strand is None:
732
+ strand = loc.strand
733
+ else: # loc.strand != strand
734
+ raise ValueError(
735
+ "All locations of the feature must have the same "
736
+ "strand direction"
737
+ )
738
+ if strand == Location.Strand.FORWARD:
739
+ sorted_locs = sorted(
740
+ locs, key=lambda loc: loc.first
741
+ )
742
+ else:
743
+ sorted_locs = sorted(
744
+ locs, key=lambda loc: loc.last, reverse=True
745
+ )
746
+ # Merge the sequences corresponding to the ordered locations
747
+ for loc in sorted_locs:
748
+ slice_start = loc.first - self._seqstart
749
+ # +1 due to exclusive stop
750
+ slice_stop = loc.last - self._seqstart +1
751
+ add_seq = self._sequence[slice_start:slice_stop]
752
+ if loc.strand == Location.Strand.REVERSE:
753
+ add_seq = add_seq.reverse().complement()
754
+ sub_seq += add_seq
755
+ return sub_seq
756
+
757
+ elif isinstance(index, slice):
758
+ # Sequence start correction
759
+ if index.start is None:
760
+ seq_start = 0
761
+ else:
762
+ if index.start < self._seqstart:
763
+ raise IndexError(
764
+ f"The start of the index ({index.start}) is lower "
765
+ f"than the start of the sequence ({self._seqstart})"
766
+ )
767
+ seq_start = index.start - self._seqstart
768
+ if index.stop is None:
769
+ seq_stop = len(self._sequence)
770
+ index = slice(index.start, seq_stop, index.step)
771
+ else:
772
+ seq_stop = index.stop - self._seqstart
773
+ # New value for the sequence start, value is base position
774
+ if index.start is None:
775
+ rel_seq_start = self._seqstart
776
+ else:
777
+ rel_seq_start = index.start
778
+ return AnnotatedSequence(self._annotation[index],
779
+ self._sequence[seq_start:seq_stop],
780
+ rel_seq_start)
781
+
782
+ elif isinstance(index, numbers.Integral):
783
+ return self._sequence[index - self._seqstart]
784
+
785
+ else:
786
+ raise TypeError(
787
+ f"'{type(index).__name__}' instances are invalid indices"
788
+ )
789
+
790
+ def __setitem__(self, index, item):
791
+ if isinstance(index, Feature):
792
+ # Item must be sequence
793
+ # with length equal to sum of location lengths
794
+ sub_seq = item
795
+ sub_seq_i = 0
796
+ for loc in index.locs:
797
+ slice_start = loc.first - self._seqstart
798
+ # +1 due to exclusive stop
799
+ slice_stop = loc.last - self._seqstart +1
800
+ interval_size = slice_stop - slice_start
801
+ self._sequence[slice_start:slice_stop] \
802
+ = sub_seq[sub_seq_i : sub_seq_i + interval_size]
803
+ sub_seq_i += interval_size
804
+ elif isinstance(index, slice):
805
+ # Sequence start correction
806
+ if index.start is None:
807
+ seq_start = 0
808
+ else:
809
+ seq_start = index.start - self._seqstart
810
+ if index.stop is None:
811
+ seq_stop = len(self._sequence)
812
+ else:
813
+ seq_stop = index.stop - self._seqstart
814
+ # Item is a Sequence
815
+ self._sequence[seq_start:seq_stop] = item
816
+ elif isinstance(index, numbers.Integral):
817
+ # Item is a symbol
818
+ self._sequence[index - self._seqstart] = item
819
+ else:
820
+ raise TypeError(
821
+ f"'{type(index).__name__}' instances are invalid indices"
822
+ )
823
+
824
+ def __eq__(self, item):
825
+ if not isinstance(item, AnnotatedSequence):
826
+ return False
827
+ return ( self.annotation == item.annotation
828
+ and self.sequence == item.sequence
829
+ and self._seqstart == item._seqstart)