biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,836 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Location", "Feature", "Annotation", "AnnotatedSequence"]
8
+
9
+ import copy
10
+ import numbers
11
+ import sys
12
+ from enum import Enum, Flag, auto
13
+ import numpy as np
14
+ from biotite.copyable import Copyable
15
+
16
+
17
+ class Location:
18
+ """
19
+ A :class:`Location` defines at which base(s)/residue(s) a feature is
20
+ located.
21
+
22
+ A feature can have multiple :class:`Location` instances if multiple
23
+ locations are joined.
24
+
25
+ Objects of this class are immutable.
26
+
27
+ Parameters
28
+ ----------
29
+ first : int
30
+ Starting base or residue position of the feature.
31
+ last : int
32
+ Inclusive ending base or residue position of the feature.
33
+ strand : Strand
34
+ The strand direction.
35
+ Always :attr:`Strand.FORWARD` for peptide features.
36
+ defect : Defect
37
+ A possible defect of the location.
38
+
39
+ Attributes
40
+ ----------
41
+ first, last, strand, defect
42
+ Same as the parameters.
43
+ """
44
+
45
+ class Defect(Flag):
46
+ """
47
+ This enum type describes location defects.
48
+
49
+ A location has a defect, when the feature itself is not directly
50
+ located in the range of the first to the last base.
51
+
52
+ - **NONE** - No location defect
53
+ - **MISS_LEFT** - A part of the feature has been truncated
54
+ before the first base/residue of the :class:`Location`
55
+ (probably by indexing an :class:`Annotation` object)
56
+ - **MISS_RIGHT** - A part of the feature has been truncated
57
+ after the last base/residue of the :class:`Location`
58
+ (probably by indexing an :class:`Annotation` object)
59
+ - **BEYOND_LEFT** - The feature starts at an unknown position
60
+ before the first base/residue of the :class:`Location`
61
+ - **BEYOND_RIGHT** - The feature ends at an unknown position
62
+ after the last base/residue of the :class:`Location`
63
+ - **UNK_LOC** - The exact position is unknown, but it is at a
64
+ single base/residue between the first and last residue of
65
+ the :class:`Location`, inclusive
66
+ - **BETWEEN** - The position is between to consecutive
67
+ bases/residues.
68
+ """
69
+
70
+ NONE = 0
71
+ MISS_LEFT = auto()
72
+ MISS_RIGHT = auto()
73
+ BEYOND_LEFT = auto()
74
+ BEYOND_RIGHT = auto()
75
+ UNK_LOC = auto()
76
+ BETWEEN = auto()
77
+
78
+ class Strand(Enum):
79
+ """
80
+ This enum type describes the strand of the feature location.
81
+ This is not relevant for protein sequence features.
82
+ """
83
+
84
+ FORWARD = auto()
85
+ REVERSE = auto()
86
+
87
+ def __init__(self, first, last, strand=Strand.FORWARD, defect=Defect.NONE):
88
+ if first > last:
89
+ raise ValueError(
90
+ "The first position cannot be higher than the last position"
91
+ )
92
+ self._first = first
93
+ self._last = last
94
+ self._strand = strand
95
+ self._defect = defect
96
+
97
+ def __repr__(self):
98
+ """Represent Location as a string for debugging."""
99
+ return (
100
+ f"Location({self._first}, {self._last}, strand={'Location.' + str(self._strand)}, "
101
+ f"defect={'Location.' + str(self._defect)})"
102
+ )
103
+
104
+ @property
105
+ def first(self):
106
+ return self._first
107
+
108
+ @property
109
+ def last(self):
110
+ return self._last
111
+
112
+ @property
113
+ def strand(self):
114
+ return self._strand
115
+
116
+ @property
117
+ def defect(self):
118
+ return self._defect
119
+
120
+ def __str__(self):
121
+ string = "{:d}-{:d}".format(self.first, self.last)
122
+ if self.strand == Location.Strand.FORWARD:
123
+ string = string + " >"
124
+ else:
125
+ string = "< " + string
126
+ return string
127
+
128
+ def __eq__(self, item):
129
+ if not isinstance(item, Location):
130
+ return False
131
+ return (
132
+ self.first == item.first
133
+ and self.last == item.last
134
+ and self.strand == item.strand
135
+ and self.defect == item.defect
136
+ )
137
+
138
+ def __hash__(self):
139
+ return hash((self._first, self._last, self._strand, self._defect))
140
+
141
+
142
+ class Feature(Copyable):
143
+ """
144
+ This class represents a single sequence feature, for example from a
145
+ GenBank feature table.
146
+ A feature describes a functional part of a sequence.
147
+ It consists of a feature key, describing the general class of the
148
+ feature, at least one location, describing its position on the
149
+ reference, and qualifiers, describing the feature in detail.
150
+
151
+ Objects of this class are immutable.
152
+
153
+ Parameters
154
+ ----------
155
+ key : str
156
+ The name of the feature class, e.g. *gene*, *CDS* or
157
+ *regulatory*.
158
+ locs : iterable object of Location
159
+ A list of feature locations. In most cases this list will only
160
+ contain one location, but multiple ones are also possible for
161
+ example in eukaryotic CDS (due to splicing).
162
+ qual : dict, optional
163
+ Maps feature qualifiers to their corresponding values.
164
+ The keys are always strings. A value is either a string or
165
+ ``None`` if the qualifier key do not has a value.
166
+ If key has multiple values, the values are separated by a
167
+ line break.
168
+
169
+ Attributes
170
+ ----------
171
+ key : str
172
+ The name of the feature class, e.g. *gene*, *CDS* or
173
+ *regulatory*.
174
+ locs : iterable object of Location
175
+ A list of feature locations. In most cases this list will only
176
+ contain one location, but multiple ones are also possible for
177
+ example in eukaryotic CDS (due to splicing).
178
+ qual : dict
179
+ Maps feature qualifiers to their corresponding values.
180
+ The keys are always strings. A value is either a string or
181
+ ``None`` if the qualifier key do not has a value.
182
+ If key has multiple values, the values are separated by a
183
+ line break.
184
+ """
185
+
186
+ def __init__(self, key, locs, qual=None):
187
+ self._key = key
188
+ if len(locs) == 0:
189
+ raise ValueError("A feature must have at least one location")
190
+ self._locs = frozenset(locs)
191
+ self._qual = copy.deepcopy(qual) if qual is not None else {}
192
+
193
+ def __repr__(self):
194
+ """Represent Feature as a string for debugging."""
195
+ return f'Feature("{self._key}", [{", ".join([loc.__repr__() for loc in self.locs])}], qual={self._qual})'
196
+
197
+ def get_location_range(self):
198
+ """
199
+ Get the minimum first base/residue and maximum last base/residue
200
+ of all feature locations.
201
+
202
+ This can be used to create a location, that spans all of the
203
+ feature's locations.
204
+
205
+ Returns
206
+ -------
207
+ first : int
208
+ The minimum first base/residue of all locations.
209
+ last : int
210
+ The maximum last base/residue of all locations.
211
+ """
212
+ first = np.min([loc.first for loc in self._locs])
213
+ last = np.max([loc.last for loc in self._locs])
214
+ return first, last
215
+
216
+ def __eq__(self, item):
217
+ if not isinstance(item, Feature):
218
+ return False
219
+ return (
220
+ self._key == item._key
221
+ and self._locs == item._locs
222
+ and self._qual == item._qual
223
+ )
224
+
225
+ def __lt__(self, item):
226
+ if not isinstance(item, Feature):
227
+ return False
228
+ first, last = self.get_location_range()
229
+ it_first, it_last = item.get_location_range()
230
+ # The first base/residue is most significant,
231
+ # if it is equal for both features, look at last base/residue
232
+ if first < it_first:
233
+ return True
234
+ elif first > it_first:
235
+ return False
236
+ else: # First is equal
237
+ return last > it_last
238
+
239
+ def __gt__(self, item):
240
+ if not isinstance(item, Feature):
241
+ return False
242
+ first, last = self.get_location_range()
243
+ it_first, it_last = item.get_location_range()
244
+ # The first base/residue is most significant,
245
+ # if it is equal for both features, look at last base/residue
246
+ if first > it_first:
247
+ return True
248
+ elif first < it_first:
249
+ return False
250
+ else: # First is equal
251
+ return last < it_last
252
+
253
+ @property
254
+ def key(self):
255
+ return self._key
256
+
257
+ @property
258
+ def locs(self):
259
+ return copy.copy(self._locs)
260
+
261
+ @property
262
+ def qual(self):
263
+ return copy.copy(self._qual)
264
+
265
+ def __hash__(self):
266
+ return hash((self._key, self._locs, frozenset(self._qual.items())))
267
+
268
+
269
+ class Annotation(Copyable):
270
+ """
271
+ An :class:`Annotation` is a set of features belonging to one
272
+ sequence.
273
+
274
+ Its advantage over a simple list is the base/residue position based
275
+ indexing:
276
+ When using slice indices in Annotation objects, a subannotation is
277
+ created, containing copies of all :class:`Feature` objects whose
278
+ first and last base/residue are in range of the slice.
279
+ If the slice starts after the first base/residue or/and the slice
280
+ ends before the last residue, the position out of range is set to
281
+ the boundaries of the slice (the :class:`Feature` is truncated).
282
+ In this case the :class:`Feature` obtains the
283
+ :attr:`Location.Defect.MISS_LEFT` and/or
284
+ :attr:`Location.Defect.MISS_RIGHT` defect.
285
+ The third case occurs when a :class:`Feature` starts after the slice
286
+ ends or a :class:`Feature` ends before the slice starts.
287
+ In this case the :class:`Feature` will not appear in the
288
+ subannotation.
289
+
290
+ The start or stop position in the slice indices can be omitted, then
291
+ the subannotation will include all features from the start or up to
292
+ the stop, respectively. Step values are ignored.
293
+ The stop values are still exclusive, i.e. the subannotation will
294
+ contain a not truncated :class:`Feature` only if its last
295
+ base/residue is smaller than the stop value of the slice.
296
+
297
+ Integers or other index types are not supported. If you want to
298
+ obtain the :class:`Feature` instances from the :class:`Annotation`
299
+ you need to iterate over it.
300
+ The iteration has no defined order.
301
+ Alternatively, you can obtain a copy of the internal
302
+ :class:`Feature` set via :func:`get_features()`.
303
+
304
+ Multiple :class:`Annotation` objects can be concatenated to one
305
+ :class:`Annotation` object using the '+' operator.
306
+ Single :class:`Feature` instances can be added this way, too.
307
+ If a feature is present in both :class:`Annotation` objects, the
308
+ resulting :class:`Annotation` will contain this feature twice.
309
+
310
+ Parameters
311
+ ----------
312
+ features : iterable object of Feature, optional
313
+ The features to create the :class:`Annotation` from. if not
314
+ provided, an empty :class:`Annotation` is created.
315
+
316
+ Examples
317
+ --------
318
+ Creating an annotation from a feature list:
319
+
320
+ >>> feature1 = Feature("CDS", [Location(-10, 30 )], qual={"gene" : "test1"})
321
+ >>> feature2 = Feature("CDS", [Location(20, 50 )], qual={"gene" : "test2"})
322
+ >>> annotation = Annotation([feature1, feature2])
323
+ >>> for f in sorted(list(annotation)):
324
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
325
+ test1 -10-30 >
326
+ test2 20-50 >
327
+
328
+ Merging two annotations and a feature:
329
+
330
+ >>> feature3 = Feature("CDS", [Location(100, 130 )], qual={"gene" : "test3"})
331
+ >>> feature4 = Feature("CDS", [Location(150, 250 )], qual={"gene" : "test4"})
332
+ >>> annotation2 = Annotation([feature3, feature4])
333
+ >>> feature5 = Feature("CDS", [Location(-50, 200 )], qual={"gene" : "test5"})
334
+ >>> annotation = annotation + annotation2 + feature5
335
+ >>> for f in sorted(list(annotation)):
336
+ ... print(f.qual["gene"], "".join([str(loc) for loc in f.locs]))
337
+ test5 -50-200 >
338
+ test1 -10-30 >
339
+ test2 20-50 >
340
+ test3 100-130 >
341
+ test4 150-250 >
342
+
343
+ Location based indexing, note the defects:
344
+
345
+ >>> annotation = annotation[40:150]
346
+ >>> for f in sorted(list(annotation)):
347
+ ... gene = f.qual["gene"]
348
+ ... loc_str = "".join([f"{loc} {loc.defect}" for loc in f.locs])
349
+ ... print(gene, loc_str)
350
+ test5 40-149 > Defect.MISS_LEFT|MISS_RIGHT
351
+ test2 40-50 > Defect.MISS_LEFT
352
+ test3 100-130 > Defect.NONE
353
+ """
354
+
355
+ def __init__(self, features=None):
356
+ if features is None:
357
+ self._features = set()
358
+ else:
359
+ self._features = set(features)
360
+
361
+ def __repr__(self):
362
+ """Represent Annotation as a string for debugging."""
363
+ return (
364
+ f"Annotation([{', '.join([feat.__repr__() for feat in self._features])}])"
365
+ )
366
+
367
+ def __copy_create__(self):
368
+ return Annotation(self._features)
369
+
370
+ def get_features(self):
371
+ """
372
+ Get a copy of the internal feature set.
373
+
374
+ Returns
375
+ -------
376
+ feature_list : list of Feature
377
+ A copy of the internal feature set.
378
+ """
379
+ return copy.copy(self._features)
380
+
381
+ def add_feature(self, feature):
382
+ """
383
+ Add a feature to the annotation.
384
+
385
+ Parameters
386
+ ----------
387
+ feature : Feature
388
+ Feature to be added.
389
+ """
390
+ if not isinstance(feature, Feature):
391
+ raise TypeError(
392
+ f"Only 'Feature' objects are supported, not {type(feature).__name__}"
393
+ )
394
+ self._features.add(feature)
395
+
396
+ def get_location_range(self):
397
+ """
398
+ Get the range of feature locations,
399
+ i.e. the first and exclusive last base/residue.
400
+
401
+ Returns
402
+ -------
403
+ int : start
404
+ Start location.
405
+ int : stop
406
+ Exclusive stop location.
407
+ """
408
+ first = sys.maxsize
409
+ last = -sys.maxsize
410
+ for feature in self._features:
411
+ for loc in feature.locs:
412
+ if loc.first < first:
413
+ first = loc.first
414
+ if loc.last > last:
415
+ last = loc.last
416
+ # Exclusive stop -> +1
417
+ return first, last + 1
418
+
419
+ def del_feature(self, feature):
420
+ """
421
+ Delete a feature from the annotation.
422
+
423
+ Parameters
424
+ ----------
425
+ feature : Feature
426
+ Feature to be removed.
427
+
428
+ Raises
429
+ ------
430
+ KeyError
431
+ If the feature is not in the annotation
432
+ """
433
+ self._features.remove(feature)
434
+
435
+ def __add__(self, item):
436
+ if isinstance(item, Annotation):
437
+ return Annotation(self._features | item._features)
438
+ elif isinstance(item, Feature):
439
+ return Annotation(self._features | set([item]))
440
+ else:
441
+ raise TypeError(
442
+ f"Only 'Feature' and 'Annotation' objects are supported, "
443
+ f"not {type(item).__name__}"
444
+ )
445
+
446
+ def __iadd__(self, item):
447
+ if isinstance(item, Annotation):
448
+ self._features |= item._features
449
+ elif isinstance(item, Feature):
450
+ self._features.add(item)
451
+ else:
452
+ raise TypeError(
453
+ f"Only 'Feature' and 'Annotation' objects are supported, "
454
+ f"not {type(item).__name__}"
455
+ )
456
+ return self
457
+
458
+ def __getitem__(self, index):
459
+ if isinstance(index, slice):
460
+ # If no start or stop index is given, include all
461
+ if index.start is None:
462
+ i_first = -sys.maxsize
463
+ else:
464
+ i_first = index.start
465
+ if index.stop is None:
466
+ i_last = sys.maxsize
467
+ else:
468
+ i_last = index.stop - 1
469
+
470
+ sub_annot = Annotation()
471
+ for feature in self:
472
+ locs_in_scope = []
473
+ for loc in feature.locs:
474
+ # Always true for maxsize values
475
+ # in case no start or stop index is given
476
+ if loc.first <= i_last and loc.last >= i_first:
477
+ # The location is at least partly in the
478
+ # given location range
479
+ # Handle defects
480
+ first = loc.first
481
+ last = loc.last
482
+ defect = loc.defect
483
+ if loc.first < i_first:
484
+ defect |= Location.Defect.MISS_LEFT
485
+ first = i_first
486
+ if loc.last > i_last:
487
+ defect |= Location.Defect.MISS_RIGHT
488
+ last = i_last
489
+ locs_in_scope.append(Location(first, last, loc.strand, defect))
490
+ if len(locs_in_scope) > 0:
491
+ # The feature is present in the new annotation
492
+ # if any of the original locations is in the new
493
+ # scope
494
+ new_feature = Feature(
495
+ key=feature.key, locs=locs_in_scope, qual=feature.qual
496
+ )
497
+ sub_annot.add_feature(new_feature)
498
+ return sub_annot
499
+ else:
500
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
501
+
502
+ def __delitem__(self, item):
503
+ if not isinstance(item, Feature):
504
+ raise TypeError(
505
+ f"Only 'Feature' objects are supported, not {type(item).__name__}"
506
+ )
507
+ self.del_feature(item)
508
+
509
+ def __iter__(self):
510
+ return self._features.__iter__()
511
+
512
+ def __contains__(self, item):
513
+ return item in self._features
514
+
515
+ def __eq__(self, item):
516
+ if not isinstance(item, Annotation):
517
+ return False
518
+ return self._features == item._features
519
+
520
+ def __len__(self):
521
+ return len(self._features)
522
+
523
+
524
+ class AnnotatedSequence(Copyable):
525
+ """
526
+ An :class:`AnnotatedSequence` is a combination of a
527
+ :class:`Sequence` and an :class:`Annotation`.
528
+
529
+ Indexing an :class:`AnnotatedSequence` with a slice returns another
530
+ :class:`AnnotatedSequence` with the corresponding subannotation and
531
+ a sequence start corrected subsequence, i.e. indexing starts at 1
532
+ with the default sequence start 1.
533
+ The sequence start in the newly created :class:`AnnotatedSequence`
534
+ is the start of the slice.
535
+ Furthermore, integer indices are allowed in which case the
536
+ corresponding symbol of the sequence is returned (also sequence
537
+ start corrected).
538
+ In both cases the index must be in range of the sequence, e.g. if
539
+ sequence start is 1, index 0 is not allowed.
540
+ Negative indices do not mean indexing from the end of the sequence,
541
+ in contrast to the behavior in :class:`Sequence` objects.
542
+ Both index types can also be used to modify the sequence.
543
+
544
+ Another option is indexing with a :class:`Feature` (preferably from the
545
+ :class:`Annotation` in the same :class:`AnnotatedSequence`).
546
+ In this case a sequence, described by the location(s) of the
547
+ :class:`Feature`, is returned.
548
+ When using a :class:`Feature` for setting an
549
+ :class:`AnnotatedSequence` with a sequence, the new sequence is
550
+ replacing the locations of the
551
+ :class:`Feature`.
552
+ Note the the replacing sequence must have the same length as the
553
+ sequence of the :class:`Feature` index.
554
+
555
+ Parameters
556
+ ----------
557
+ annotation : Annotation
558
+ The annotation corresponding to `sequence`.
559
+ sequence : Sequence
560
+ The sequence.
561
+ Usually a :class:`NucleotideSequence` or
562
+ :class:`ProteinSequence`.
563
+ sequence_start : int, optional
564
+ By default, the first symbol of the sequence is corresponding
565
+ to location 1 of the features in the annotation. The location
566
+ of the first symbol can be changed by setting this parameter.
567
+ Negative values are not supported yet.
568
+
569
+ Attributes
570
+ ----------
571
+ annotation : Annotation
572
+ The annotation corresponding to `sequence`.
573
+ sequence : Sequence
574
+ The represented sequence.
575
+ sequence_start : int
576
+ The location of the first symbol in the sequence.
577
+
578
+ See Also
579
+ --------
580
+ Annotation : An annotation separated from a sequence.
581
+ Sequence : A sequence separated from an annotation.
582
+
583
+ Examples
584
+ --------
585
+ Creating an annotated sequence
586
+
587
+ >>> sequence = NucleotideSequence("ATGGCGTACGATTAGAAAAAAA")
588
+ >>> feature1 = Feature("misc_feature", [Location(1,2), Location(11,12)],
589
+ ... {"note" : "walker"})
590
+ >>> feature2 = Feature("misc_feature", [Location(16,22)], {"note" : "poly-A"})
591
+ >>> annotation = Annotation([feature1, feature2])
592
+ >>> annot_seq = AnnotatedSequence(annotation, sequence)
593
+ >>> print(annot_seq.sequence)
594
+ ATGGCGTACGATTAGAAAAAAA
595
+ >>> for f in sorted(list(annot_seq.annotation)):
596
+ ... print(f.qual["note"])
597
+ walker
598
+ poly-A
599
+
600
+ Indexing with integers, note the sequence start correction
601
+
602
+ >>> print(annot_seq[2])
603
+ T
604
+ >>> print(annot_seq.sequence[2])
605
+ G
606
+
607
+ indexing with slices
608
+
609
+ >>> annot_seq2 = annot_seq[:16]
610
+ >>> print(annot_seq2.sequence)
611
+ ATGGCGTACGATTAG
612
+ >>> for f in annot_seq2.annotation:
613
+ ... print(f.qual["note"])
614
+ walker
615
+
616
+ Indexing with features
617
+
618
+ >>> print(annot_seq[feature1])
619
+ ATAT
620
+ >>> print(annot_seq[feature2])
621
+ AAAAAAA
622
+ >>> print(annot_seq.sequence)
623
+ ATGGCGTACGATTAGAAAAAAA
624
+ >>> annot_seq[feature1] = NucleotideSequence("CCCC")
625
+ >>> print(annot_seq.sequence)
626
+ CCGGCGTACGCCTAGAAAAAAA
627
+ """
628
+
629
+ def __init__(self, annotation, sequence, sequence_start=1):
630
+ self._annotation = annotation
631
+ self._sequence = sequence
632
+ self._seqstart = sequence_start
633
+
634
+ def __repr__(self):
635
+ """Represent AnnotatedSequence as a string for debugging."""
636
+ return (
637
+ f"AnnotatedSequence({self._annotation.__repr__()}, {self._sequence.__repr__()}, "
638
+ f"sequence_start={self._seqstart})"
639
+ )
640
+
641
+ @property
642
+ def sequence_start(self):
643
+ return self._seqstart
644
+
645
+ @property
646
+ def sequence(self):
647
+ return self._sequence
648
+
649
+ @property
650
+ def annotation(self):
651
+ return self._annotation
652
+
653
+ def __copy_create__(self):
654
+ return AnnotatedSequence(
655
+ self._annotation.copy(), self._sequence.copy, self._seqstart
656
+ )
657
+
658
+ def reverse_complement(self, sequence_start=1):
659
+ """
660
+ Create the reverse complement of the annotated sequence.
661
+
662
+ This method accurately converts the position and the strand of
663
+ the annotation.
664
+ The information on the sequence start is lost.
665
+
666
+ Parameters
667
+ ----------
668
+ sequence_start : int, optional
669
+ The location of the first symbol in the reverse complement
670
+ sequence.
671
+
672
+ Returns
673
+ -------
674
+ rev_sequence : Sequence
675
+ The reverse complement of the annotated sequence.
676
+ """
677
+ rev_seqstart = sequence_start
678
+
679
+ rev_sequence = self._sequence.reverse().complement()
680
+
681
+ seq_len = len(self._sequence)
682
+ rev_features = []
683
+ for feature in self._annotation:
684
+ rev_locs = []
685
+ for loc in feature.locs:
686
+ # Transform location to the reverse complement strand
687
+ # (seq_len-1) -> last sequence index
688
+ # (loc.last-self._seqstart) -> location to index
689
+ # ... + rev_seqstart -> index to location
690
+ rev_loc_first = (
691
+ (seq_len - 1) - (loc.last - self._seqstart) + rev_seqstart
692
+ )
693
+ rev_loc_last = (
694
+ (seq_len - 1) - (loc.first - self._seqstart) + rev_seqstart
695
+ )
696
+
697
+ if loc.strand == Location.Strand.FORWARD:
698
+ rev_loc_strand = Location.Strand.REVERSE
699
+ else:
700
+ rev_loc_strand = Location.Strand.FORWARD
701
+
702
+ rev_loc_defect = Location.Defect.NONE
703
+ if loc.defect & Location.Defect.MISS_LEFT:
704
+ rev_loc_defect |= Location.Defect.MISS_RIGHT
705
+ if loc.defect & Location.Defect.MISS_RIGHT:
706
+ rev_loc_defect |= Location.Defect.MISS_LEFT
707
+ if loc.defect & Location.Defect.BEYOND_RIGHT:
708
+ rev_loc_defect |= Location.Defect.BEYOND_LEFT
709
+ if loc.defect & Location.Defect.BEYOND_LEFT:
710
+ rev_loc_defect |= Location.Defect.BEYOND_RIGHT
711
+ if loc.defect & Location.Defect.UNK_LOC:
712
+ rev_loc_defect |= Location.Defect.UNK_LOC
713
+ if loc.defect & Location.Defect.BETWEEN:
714
+ rev_loc_defect |= Location.Defect.BETWEEN
715
+
716
+ rev_locs.append(
717
+ Location(
718
+ rev_loc_first, rev_loc_last, rev_loc_strand, rev_loc_defect
719
+ )
720
+ )
721
+ rev_features.append(Feature(feature.key, rev_locs, feature.qual))
722
+
723
+ return AnnotatedSequence(Annotation(rev_features), rev_sequence, rev_seqstart)
724
+
725
+ def __getitem__(self, index):
726
+ if isinstance(index, Feature):
727
+ # Concatenate subsequences for each location of the feature
728
+ locs = index.locs
729
+ if len(locs) == 0:
730
+ raise ValueError("Feature does not contain any locations")
731
+ # Start by creating an empty sequence
732
+ sub_seq = self._sequence.copy(new_seq_code=np.array([]))
733
+ # Locations need to be sorted, as otherwise the locations
734
+ # chunks would be merged in the wrong order
735
+ # The order depends on whether the locs are on the forward
736
+ # or reverse strand
737
+ strand = None
738
+ for loc in locs:
739
+ if loc.strand == strand:
740
+ pass
741
+ elif strand is None:
742
+ strand = loc.strand
743
+ else: # loc.strand != strand
744
+ raise ValueError(
745
+ "All locations of the feature must have the same "
746
+ "strand direction"
747
+ )
748
+ if strand == Location.Strand.FORWARD:
749
+ sorted_locs = sorted(locs, key=lambda loc: loc.first)
750
+ else:
751
+ sorted_locs = sorted(locs, key=lambda loc: loc.last, reverse=True)
752
+ # Merge the sequences corresponding to the ordered locations
753
+ for loc in sorted_locs:
754
+ slice_start = loc.first - self._seqstart
755
+ # +1 due to exclusive stop
756
+ slice_stop = loc.last - self._seqstart + 1
757
+ add_seq = self._sequence[slice_start:slice_stop]
758
+ if loc.strand == Location.Strand.REVERSE:
759
+ add_seq = add_seq.reverse().complement()
760
+ sub_seq += add_seq
761
+ return sub_seq
762
+
763
+ elif isinstance(index, slice):
764
+ # Sequence start correction
765
+ if index.start is None:
766
+ seq_start = 0
767
+ else:
768
+ if index.start < self._seqstart:
769
+ raise IndexError(
770
+ f"The start of the index ({index.start}) is lower "
771
+ f"than the start of the sequence ({self._seqstart})"
772
+ )
773
+ seq_start = index.start - self._seqstart
774
+ if index.stop is None:
775
+ seq_stop = len(self._sequence)
776
+ index = slice(index.start, seq_stop, index.step)
777
+ else:
778
+ seq_stop = index.stop - self._seqstart
779
+ # New value for the sequence start, value is base position
780
+ if index.start is None:
781
+ rel_seq_start = self._seqstart
782
+ else:
783
+ rel_seq_start = index.start
784
+ return AnnotatedSequence(
785
+ self._annotation[index],
786
+ self._sequence[seq_start:seq_stop],
787
+ rel_seq_start,
788
+ )
789
+
790
+ elif isinstance(index, numbers.Integral):
791
+ return self._sequence[index - self._seqstart]
792
+
793
+ else:
794
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
795
+
796
+ def __setitem__(self, index, item):
797
+ if isinstance(index, Feature):
798
+ # Item must be sequence
799
+ # with length equal to sum of location lengths
800
+ sub_seq = item
801
+ sub_seq_i = 0
802
+ for loc in index.locs:
803
+ slice_start = loc.first - self._seqstart
804
+ # +1 due to exclusive stop
805
+ slice_stop = loc.last - self._seqstart + 1
806
+ interval_size = slice_stop - slice_start
807
+ self._sequence[slice_start:slice_stop] = sub_seq[
808
+ sub_seq_i : sub_seq_i + interval_size
809
+ ]
810
+ sub_seq_i += interval_size
811
+ elif isinstance(index, slice):
812
+ # Sequence start correction
813
+ if index.start is None:
814
+ seq_start = 0
815
+ else:
816
+ seq_start = index.start - self._seqstart
817
+ if index.stop is None:
818
+ seq_stop = len(self._sequence)
819
+ else:
820
+ seq_stop = index.stop - self._seqstart
821
+ # Item is a Sequence
822
+ self._sequence[seq_start:seq_stop] = item
823
+ elif isinstance(index, numbers.Integral):
824
+ # Item is a symbol
825
+ self._sequence[index - self._seqstart] = item
826
+ else:
827
+ raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
828
+
829
+ def __eq__(self, item):
830
+ if not isinstance(item, AnnotatedSequence):
831
+ return False
832
+ return (
833
+ self.annotation == item.annotation
834
+ and self.sequence == item.sequence
835
+ and self._seqstart == item._seqstart
836
+ )