biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,264 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["EValueEstimator"]
8
+
9
+ import numpy as np
10
+ from biotite.sequence.align.pairwise import align_optimal
11
+ from biotite.sequence.seqtypes import GeneralSequence
12
+
13
+
14
+ class EValueEstimator:
15
+ r"""
16
+ This class is used to calculate *expect values* (E-values) for local
17
+ pairwise sequence alignments.
18
+
19
+ The E-value is a measure to quantify the significance of a found
20
+ homology.
21
+ It is the number of alignments, that would result from aligning
22
+ random sequences of a given length, with a score at least as high as
23
+ the score from an alignment of interest.
24
+
25
+ The calculation of the E-value from score and sequence lengths
26
+ depend on the two parameters :math:`\lambda` and :math:`K`
27
+ :footcite:`Altschul1996`.
28
+ These parameters are estimated from sampling a large number
29
+ of random sequence alignments in :meth:`from_samples()`
30
+ :footcite:`Altschul1986`, which may be time consuming.
31
+ If these parameters are known, the constructor can be used instead.
32
+
33
+ Based on the sampled parameters, the decadic logarithm of the
34
+ E-value can be quickly calculated via :meth:`log_evalue()`.
35
+
36
+ Parameters
37
+ ----------
38
+ lam : float
39
+ The :math:`\lambda` parameter.
40
+ k : float
41
+ The :math:`K` parameter.
42
+
43
+ Notes
44
+ -----
45
+ The calculated E-value is a rough estimation that gets more
46
+ accurate the more sequences are used in the sampling process.
47
+ Note that the accuracy for alignment of short sequences, where the
48
+ average length of a sampled alignment make up a significant part of
49
+ the complete sampled sequence :footcite:`Altschul1996`.
50
+
51
+ References
52
+ ----------
53
+
54
+ .. footbibliography::
55
+
56
+ Examples
57
+ --------
58
+
59
+ Create an alignment, whose significance should be evaluated.
60
+
61
+ >>> query = NucleotideSequence("CGACGGCGTCTACGAGTCAACATCATTC")
62
+ >>> hit = NucleotideSequence("GCTTTATTACGGGTTTACGAGTTCAACATCACGAAAACAA")
63
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
64
+ >>> gap_penalty = (-12, -2)
65
+ >>> alignment = align_optimal(query, hit, matrix, gap_penalty, local=True)[0]
66
+ >>> print(alignment)
67
+ ACGGCGTCTACGAGT-CAACATCA
68
+ ACGG-GTTTACGAGTTCAACATCA
69
+ >>> print(alignment.score)
70
+ 77
71
+
72
+ Create an estimator based on the same scoring scheme as the
73
+ alignment.
74
+ Use background symbol frequencies from the hypothetical reference
75
+ database.
76
+
77
+ >>> # Ensure deterministic results
78
+ >>> np.random.seed(0)
79
+ >>> # Sequences in database have a GC content of 0.6
80
+ >>> background = np.array([0.2, 0.3, 0.3, 0.2])
81
+ >>> estimator = EValueEstimator.from_samples(
82
+ ... query.alphabet, matrix, gap_penalty, background, sample_length=100
83
+ ... )
84
+
85
+ Approach 1: Calculate E-value based on number of sequences in the
86
+ hypothetical database (*100*).
87
+
88
+ >>> log_e = estimator.log_evalue(alignment.score, len(query), 100 * len(hit))
89
+ >>> print(f"E-value = {10**log_e:.2e}")
90
+ E-value = 3.36e-01
91
+
92
+ Approach 2: Calculate E-value based on total length of all sequences
93
+ in the hypothetical database combined (*10000*).
94
+
95
+ >>> log_e = estimator.log_evalue(alignment.score, len(query), 10000)
96
+ >>> print(f"E-value = {10**log_e:.2e}")
97
+ E-value = 8.41e-01
98
+ """
99
+
100
+ def __init__(self, lam, k):
101
+ self._lam = lam
102
+ self._k = k
103
+
104
+ @staticmethod
105
+ def from_samples(
106
+ alphabet, matrix, gap_penalty, frequencies, sample_length=1000, sample_size=1000
107
+ ):
108
+ r"""
109
+ Create an :class:`EValueEstimator` with :math:`\lambda` and
110
+ :math:`K` estimated via sampling alignments of random sequences
111
+ based on a given scoring scheme.
112
+
113
+ The parameters are estimated from the sampled alignment scores
114
+ using the method of moments :footcite:`Altschul1986`.
115
+
116
+ Parameters
117
+ ----------
118
+ alphabet : Alphabet, length=k
119
+ The alphabet for the sampled sequences.
120
+ matrix : SubstitutionMatrix
121
+ The substitution matrix.
122
+ It must be compatible with the given `alphabet` and the
123
+ expected similarity score between two random symbols must be
124
+ negative.
125
+ gap_penalty : int or tuple(int,int)
126
+ Either a linear (``int``) or affine (``tuple``) gap penalty.
127
+ Integers must be negative.
128
+ frequencies : ndarray, shape=k, dtype=float
129
+ The background frequencies for each symbol in the
130
+ `alphabet`.
131
+ The random sequences are created based on these frequencies.
132
+ sample_length : int
133
+ The length of the sampled sequences.
134
+ It should be much larger than the average length of a local
135
+ alignment of two sequences.
136
+ The runtime scales quadratically with this parameter.
137
+ sample_size : int
138
+ The number of sampled sequences.
139
+ The accuracy of the estimated parameters and E-values,
140
+ but also the runtime increases with the sample size.
141
+
142
+ Returns
143
+ -------
144
+ estimator : EValueEstimator
145
+ A :class:`EValueEstimator` with sampled :math:`\lambda` and
146
+ :math:`K` parameters.
147
+
148
+ Notes
149
+ -----
150
+ The sampling process generates random sequences based on
151
+ ``numpy.random``.
152
+ To ensure reproducible results you could call
153
+ :func:`numpy.random.seed()` before running
154
+ :meth:`from_samples()`.
155
+ """
156
+ if len(frequencies) != len(alphabet):
157
+ raise IndexError(
158
+ f"Background frequencies for {len(frequencies)} symbols were "
159
+ f"given, but the alphabet has {len(alphabet)} symbols"
160
+ )
161
+ if np.any(frequencies < 0):
162
+ raise ValueError("Background frequencies must be positive")
163
+ # Normalize background frequencies
164
+ frequencies = frequencies / np.sum(frequencies)
165
+
166
+ # Check matrix
167
+ if not matrix.is_symmetric():
168
+ raise ValueError("A symmetric substitution matrix is required")
169
+ if not matrix.get_alphabet1().extends(alphabet):
170
+ raise ValueError(
171
+ "The substitution matrix is not compatible with the given alphabet"
172
+ )
173
+ score_matrix = matrix.score_matrix()[: len(alphabet), : len(alphabet)]
174
+ if (
175
+ np.sum(
176
+ score_matrix * frequencies[np.newaxis, :] * frequencies[:, np.newaxis]
177
+ )
178
+ >= 0
179
+ ):
180
+ raise ValueError(
181
+ "Invalid substitution matrix, the expected similarity "
182
+ "score between two random symbols is not negative"
183
+ )
184
+
185
+ # Generate the sequence code for the random sequences
186
+ random_sequence_code = np.random.choice(
187
+ len(alphabet), size=(sample_size, 2, sample_length), p=frequencies
188
+ )
189
+
190
+ # Sample the alignments of random sequences
191
+ sample_scores = np.zeros(sample_size, dtype=int)
192
+ for i in range(sample_size):
193
+ seq1 = GeneralSequence(alphabet)
194
+ seq2 = GeneralSequence(alphabet)
195
+ seq1.code = random_sequence_code[i, 0]
196
+ seq2.code = random_sequence_code[i, 1]
197
+ sample_scores[i] = align_optimal(
198
+ seq1, seq2, matrix, local=True, gap_penalty=gap_penalty, max_number=1
199
+ )[0].score
200
+
201
+ # Use method of moments to estimate parameters
202
+ lam = np.pi / np.sqrt(6 * np.var(sample_scores))
203
+ u = np.mean(sample_scores) - np.euler_gamma / lam
204
+ k = np.exp(lam * u) / sample_length**2
205
+
206
+ return EValueEstimator(lam, k)
207
+
208
+ @property
209
+ def lam(self):
210
+ return self._lam
211
+
212
+ @property
213
+ def k(self):
214
+ return self._k
215
+
216
+ def log_evalue(self, score, seq1_length, seq2_length):
217
+ r"""
218
+ Calculate the decadic logarithm of the E-value for a given
219
+ score.
220
+
221
+ The E-value and the logarithm of the E-value is calculated as
222
+
223
+ .. math::
224
+
225
+ E = Kmn e^{-\lambda s}
226
+
227
+ \log_{10} E = (\log_{10} Kmn) - \frac{\lambda s}{\ln 10},
228
+
229
+ where :math:`s` is the similarity score and :math:`m` and
230
+ :math:`n` are the lengths of the aligned sequences.
231
+
232
+ Parameters
233
+ ----------
234
+ score : int or ndarray, dtype=int
235
+ The score to evaluate.
236
+ seq1_length : int or ndarray, dtype=int
237
+ The length of the first sequence.
238
+ In the context of a homology search in a sequence database,
239
+ this is usually the length of the query sequence.
240
+ seq2_length : int or ndarray, dtype=int
241
+ The length of the second sequence.
242
+ In the context of a homology search in a sequence database,
243
+ this is usually either the combined length of all sequences
244
+ in the database or the length of the hit sequence multiplied
245
+ by the number of sequences in the database.
246
+
247
+ Returns
248
+ -------
249
+ log_e : float
250
+ The decadic logarithm of the E-value.
251
+
252
+ Notes
253
+ -----
254
+ This method returns the logarithm of the E-value instead of
255
+ the E-value, as low E-values indicating a highly significant
256
+ homology cannot be accurately represented by a ``float``.
257
+ """
258
+ score = np.asarray(score)
259
+ seq1_length = np.asarray(seq1_length)
260
+ seq2_length = np.asarray(seq2_length)
261
+
262
+ return np.log10(
263
+ self._k * seq1_length * seq2_length
264
+ ) - self._lam * score / np.log(10)
@@ -0,0 +1,64 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ cimport cython
6
+ cimport numpy as np
7
+
8
+
9
+ # A trace table saves the directions a cell came from
10
+ # A "1" in the corresponding bit in the trace table means
11
+ # the cell came from this direction
12
+
13
+ cdef enum TraceDirectionLinear:
14
+ # Values for linear gap penalty (one score table)
15
+ MATCH = 1 # bit 1 -> diagonal -> alignment of symbols
16
+ GAP_LEFT = 2 # bit 2 -> left -> gap in first sequence
17
+ GAP_TOP = 4 # bit 3 -> top -> gap in second sequence
18
+
19
+ cdef enum TraceDirectionAffine:
20
+ # Values for affine gap penalty (three score tables)
21
+ MATCH_TO_MATCH = 1 # bit 1 -> match - match transition
22
+ GAP_LEFT_TO_MATCH = 2 # bit 2 -> seq 1 gap - match transition
23
+ GAP_TOP_TO_MATCH = 4 # bit 3 -> seq 2 gap - match transition
24
+ MATCH_TO_GAP_LEFT = 8 # bit 4 -> match - seq 1 gap transition
25
+ GAP_LEFT_TO_GAP_LEFT = 16 # bit 5 -> seq 1 gap - seq 1 gap transition
26
+ MATCH_TO_GAP_TOP = 32 # bit 6 -> match - seq 2 gap transition
27
+ GAP_TOP_TO_GAP_TOP = 64 # bit 7 -> seq 2 gap - seq 2 gap transition
28
+
29
+
30
+ cdef enum TraceState:
31
+ # The state specifies the table the traceback is currently in
32
+ # For linear gap penalty (only one table/state exists):
33
+ NO_STATE = 0
34
+ # For affine gap penalty (three tables/states exists):
35
+ MATCH_STATE = 1
36
+ GAP_LEFT_STATE = 2
37
+ GAP_TOP_STATE = 3
38
+
39
+
40
+ cdef np.uint8_t get_trace_linear(np.int32_t match_score,
41
+ np.int32_t gap_left_score,
42
+ np.int32_t gap_top_score,
43
+ np.int32_t *max_score)
44
+
45
+ cdef np.uint8_t get_trace_affine(np.int32_t match_to_match_score,
46
+ np.int32_t gap_left_to_match_score,
47
+ np.int32_t gap_top_to_match_score,
48
+ np.int32_t match_to_gap_left_score,
49
+ np.int32_t gap_left_to_gap_left_score,
50
+ np.int32_t match_to_gap_top_score,
51
+ np.int32_t gap_top_to_gap_top_score,
52
+ np.int32_t *max_match_score,
53
+ np.int32_t *max_gap_left_score,
54
+ np.int32_t *max_gap_top_score)
55
+
56
+ cdef int follow_trace(np.uint8_t[:,:] trace_table,
57
+ bint banded,
58
+ int i, int j, int pos,
59
+ np.int64_t[:,:] trace,
60
+ list trace_list,
61
+ int state,
62
+ int* curr_trace_count,
63
+ int max_trace_count,
64
+ int lower_diag, int upper_diag) except -1
@@ -0,0 +1,370 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A module for Biotite's internal use only.
7
+ Contains C-functions for handling trace tables in a reuasable way for
8
+ different alignment functions.
9
+ """
10
+
11
+ __name__ = "biotite.sequence.align"
12
+ __author__ = "Patrick Kunzmann"
13
+ __all__ = []
14
+
15
+ cimport cython
16
+ cimport numpy as np
17
+
18
+ import numpy as np
19
+
20
+
21
+ cdef inline np.uint8_t get_trace_linear(np.int32_t match_score,
22
+ np.int32_t gap_left_score,
23
+ np.int32_t gap_top_score,
24
+ np.int32_t *max_score):
25
+ """
26
+ Find maximum score from the input scores and return corresponding
27
+ trace direction for linear gap penalty.
28
+ """
29
+ if match_score > gap_left_score:
30
+ if match_score > gap_top_score:
31
+ trace = TraceDirectionLinear.MATCH
32
+ max_score[0] = match_score
33
+ elif match_score == gap_top_score:
34
+ trace = (
35
+ TraceDirectionLinear.MATCH |
36
+ TraceDirectionLinear.GAP_TOP
37
+ )
38
+ max_score[0] = match_score
39
+ else:
40
+ trace = TraceDirectionLinear.GAP_TOP
41
+ max_score[0] = gap_top_score
42
+ elif match_score == gap_left_score:
43
+ if match_score > gap_top_score:
44
+ trace = (
45
+ TraceDirectionLinear.MATCH |
46
+ TraceDirectionLinear.GAP_LEFT
47
+ )
48
+ max_score[0] = match_score
49
+ elif match_score == gap_top_score:
50
+ trace = (
51
+ TraceDirectionLinear.MATCH |
52
+ TraceDirectionLinear.GAP_LEFT |
53
+ TraceDirectionLinear.GAP_TOP
54
+ )
55
+ max_score[0] = match_score
56
+ else:
57
+ trace = TraceDirectionLinear.GAP_TOP
58
+ max_score[0] = gap_top_score
59
+ else:
60
+ if gap_left_score > gap_top_score:
61
+ trace = TraceDirectionLinear.GAP_LEFT
62
+ max_score[0] = gap_left_score
63
+ elif gap_left_score == gap_top_score:
64
+ trace = (
65
+ TraceDirectionLinear.GAP_LEFT |
66
+ TraceDirectionLinear.GAP_TOP
67
+ )
68
+ max_score[0] = gap_left_score
69
+ else:
70
+ trace = TraceDirectionLinear.GAP_TOP
71
+ max_score[0] = gap_top_score
72
+
73
+ return trace
74
+
75
+
76
+ cdef inline np.uint8_t get_trace_affine(np.int32_t match_to_match_score,
77
+ np.int32_t gap_left_to_match_score,
78
+ np.int32_t gap_top_to_match_score,
79
+ np.int32_t match_to_gap_left_score,
80
+ np.int32_t gap_left_to_gap_left_score,
81
+ np.int32_t match_to_gap_top_score,
82
+ np.int32_t gap_top_to_gap_top_score,
83
+ np.int32_t *max_match_score,
84
+ np.int32_t *max_gap_left_score,
85
+ np.int32_t *max_gap_top_score):
86
+ """
87
+ Find maximum scores from the input scores and return corresponding
88
+ trace direction for affine gap penalty.
89
+ """
90
+ # Match Table
91
+ if match_to_match_score > gap_left_to_match_score:
92
+ if match_to_match_score > gap_top_to_match_score:
93
+ trace = TraceDirectionAffine.MATCH_TO_MATCH
94
+ max_match_score[0] = match_to_match_score
95
+ elif match_to_match_score == gap_top_to_match_score:
96
+ trace = (
97
+ TraceDirectionAffine.MATCH_TO_MATCH |
98
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
99
+ )
100
+ max_match_score[0] = match_to_match_score
101
+ else:
102
+ trace = TraceDirectionAffine.GAP_TOP_TO_MATCH
103
+ max_match_score[0] = gap_top_to_match_score
104
+ elif match_to_match_score == gap_left_to_match_score:
105
+ if match_to_match_score > gap_top_to_match_score:
106
+ trace = (
107
+ TraceDirectionAffine.MATCH_TO_MATCH |
108
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH
109
+ )
110
+ max_match_score[0] = match_to_match_score
111
+ elif match_to_match_score == gap_top_to_match_score:
112
+ trace = (
113
+ TraceDirectionAffine.MATCH_TO_MATCH |
114
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
115
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
116
+ )
117
+ max_match_score[0] = match_to_match_score
118
+ else:
119
+ trace = TraceDirectionAffine.GAP_TOP_TO_MATCH
120
+ max_match_score[0] = gap_top_to_match_score
121
+ else:
122
+ if gap_left_to_match_score > gap_top_to_match_score:
123
+ trace = TraceDirectionAffine.GAP_LEFT_TO_MATCH
124
+ max_match_score[0] = gap_left_to_match_score
125
+ elif gap_left_to_match_score == gap_top_to_match_score:
126
+ trace = (
127
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
128
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
129
+ )
130
+ max_match_score[0] = gap_left_to_match_score
131
+ else:
132
+ trace = TraceDirectionAffine.GAP_TOP_TO_MATCH
133
+ max_match_score[0] = gap_top_to_match_score
134
+
135
+ # 'Gap left' table
136
+ if match_to_gap_left_score > gap_left_to_gap_left_score:
137
+ trace |= TraceDirectionAffine.MATCH_TO_GAP_LEFT
138
+ max_gap_left_score[0] = match_to_gap_left_score
139
+ elif match_to_gap_left_score < gap_left_to_gap_left_score:
140
+ trace |= TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
141
+ max_gap_left_score[0] = gap_left_to_gap_left_score
142
+ else:
143
+ trace |= (
144
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
145
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
146
+ )
147
+ max_gap_left_score[0] = match_to_gap_left_score
148
+
149
+ # 'Gap right' table
150
+ if match_to_gap_top_score > gap_top_to_gap_top_score:
151
+ trace |= TraceDirectionAffine.MATCH_TO_GAP_TOP
152
+ max_gap_top_score[0] = match_to_gap_top_score
153
+ elif match_to_gap_top_score < gap_top_to_gap_top_score:
154
+ trace |= TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
155
+ max_gap_top_score[0] = gap_top_to_gap_top_score
156
+ else:
157
+ trace |= (
158
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
159
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
160
+ )
161
+ max_gap_top_score[0] = gap_top_to_gap_top_score
162
+
163
+ return trace
164
+
165
+
166
+ cdef int follow_trace(np.uint8_t[:,:] trace_table,
167
+ bint banded,
168
+ int i, int j, int pos,
169
+ np.int64_t[:,:] trace,
170
+ list trace_list,
171
+ int state,
172
+ int* curr_trace_count,
173
+ int max_trace_count,
174
+ int lower_diag, int upper_diag) except -1:
175
+ """
176
+ Follow and return traces from a trace table.
177
+
178
+ Parameters
179
+ ----------
180
+ trace_table
181
+ A matrix containing values indicating the direction for the
182
+ traceback.
183
+ banded
184
+ Whether the trace table belongs to a banded alignment
185
+ i, j
186
+ The current position in the trace table.
187
+ For the first branch, this is the start of the traceback.
188
+ For additional branches this is the start of the respective
189
+ branch.
190
+ pos
191
+ The current position inthe trace array to be created.
192
+ For the first branch, this is 0.
193
+ For additional branches the value of the parent branch is taken.
194
+ trace
195
+ The alignment trace array to be filled.
196
+ trace_list
197
+ When a trace is finished, it is appened to this list
198
+ state
199
+ The current score table (*match*, *gap left*, *gap top*)
200
+ the traceback is in, taken from parent branch.
201
+ Always 0 when a linear gap penalty is used.
202
+ curr_trace_count
203
+ The current number of branches. The value is a pointer, so that
204
+ updating this value propagates the value to all other branches
205
+ max_trace_count
206
+ The maximum number of branches created. When the number of
207
+ branches reaches this value, no new branches are created.
208
+ lower_diag, upper_diag
209
+ The lower and upper diagonal for a banded alignment.
210
+ Unused, if `banded` is false.
211
+
212
+ Returns
213
+ -------
214
+ int
215
+ ``0`` if, no exception is raised, otherwisw ``-1``.
216
+ """
217
+
218
+ cdef list next_indices
219
+ cdef list next_states
220
+ cdef int trace_value
221
+ cdef int k
222
+ cdef int seq_i, seq_j
223
+ cdef int i_match, i_gap_left, i_gap_top
224
+ cdef int j_match, j_gap_left, j_gap_top
225
+
226
+ if state == TraceState.NO_STATE:
227
+ # Linear gap penalty
228
+ # Trace table has a 0 -> no trace direction -> break loop
229
+ # The '0'-cell itself is also not included in the traceback
230
+ while trace_table[i,j] != 0:
231
+ if banded:
232
+ seq_i = i - 1
233
+ seq_j = j + seq_i + lower_diag - 1
234
+ i_match, i_gap_left, i_gap_top = i-1, i, i-1
235
+ j_match, j_gap_left, j_gap_top = j , j-1, j+1
236
+ else:
237
+ # -1 is necessary due to the shift of the sequences
238
+ # to the bottom/right in the table
239
+ seq_i = i - 1
240
+ seq_j = j - 1
241
+ i_match, i_gap_left, i_gap_top = i-1, i, i-1
242
+ j_match, j_gap_left, j_gap_top = j-1, j-1, j
243
+ trace[pos, 0] = seq_i
244
+ trace[pos, 1] = seq_j
245
+ pos += 1
246
+ # Traces may split
247
+ next_indices = []
248
+ trace_value = trace_table[i,j]
249
+ if trace_value & TraceDirectionLinear.MATCH:
250
+ next_indices.append((i_match, j_match))
251
+ if trace_value & TraceDirectionLinear.GAP_LEFT:
252
+ next_indices.append((i_gap_left, j_gap_left))
253
+ if trace_value & TraceDirectionLinear.GAP_TOP:
254
+ next_indices.append((i_gap_top, j_gap_top))
255
+ # Trace branching
256
+ # -> Recursive call of _follow_trace() for indices[1:]
257
+ for k in range(1, len(next_indices)):
258
+ if curr_trace_count[0] < max_trace_count:
259
+ curr_trace_count[0] += 1
260
+ new_i, new_j = next_indices[k]
261
+ follow_trace(
262
+ trace_table, banded, new_i, new_j, pos,
263
+ np.copy(trace), trace_list, 0,
264
+ curr_trace_count, max_trace_count,
265
+ lower_diag, upper_diag
266
+ )
267
+ # Continue in this method with indices[0]
268
+ i, j = next_indices[0]
269
+ else:
270
+ # Affine gap penalty
271
+ # -> check only for the current state whether the trace ends
272
+ while (
273
+ (
274
+ state == TraceState.MATCH_STATE and trace_table[i,j] & (
275
+ TraceDirectionAffine.MATCH_TO_MATCH |
276
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
277
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
278
+ ) != 0
279
+ ) or (
280
+ state == TraceState.GAP_LEFT_STATE and trace_table[i,j] & (
281
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
282
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
283
+ ) != 0
284
+ ) or (
285
+ state == TraceState.GAP_TOP_STATE and trace_table[i,j] & (
286
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
287
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
288
+ ) != 0
289
+ )
290
+ ):
291
+ if banded:
292
+ seq_i = i - 1
293
+ seq_j = j + seq_i + lower_diag - 1
294
+ i_match, i_gap_left, i_gap_top = i-1, i, i-1
295
+ j_match, j_gap_left, j_gap_top = j , j-1, j+1
296
+ else:
297
+ # -1 is necessary due to the shift of the sequences
298
+ # to the bottom/right in the table
299
+ seq_i = i - 1
300
+ seq_j = j - 1
301
+ i_match, i_gap_left, i_gap_top = i-1, i, i-1
302
+ j_match, j_gap_left, j_gap_top = j-1, j-1, j
303
+ trace[pos, 0] = seq_i
304
+ trace[pos, 1] = seq_j
305
+ pos += 1
306
+ next_indices = []
307
+ next_states = []
308
+
309
+ # Get value of trace corresponding to current state
310
+ # = table trace is currently in
311
+ if state == TraceState.MATCH_STATE:
312
+ trace_value = trace_table[i,j] & (
313
+ TraceDirectionAffine.MATCH_TO_MATCH |
314
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
315
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
316
+ )
317
+ elif state == TraceState.GAP_LEFT_STATE:
318
+ trace_value = trace_table[i,j] & (
319
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
320
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
321
+ )
322
+ else: # state == TraceState.GAP_TOP_STATE:
323
+ trace_value = trace_table[i,j] & (
324
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
325
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
326
+ )
327
+
328
+ # Determine indices and state of next trace step
329
+ if trace_value & TraceDirectionAffine.MATCH_TO_MATCH:
330
+ next_indices.append((i_match, j_match))
331
+ next_states.append(TraceState.MATCH_STATE)
332
+ if trace_value & TraceDirectionAffine.GAP_LEFT_TO_MATCH:
333
+ next_indices.append((i_match, j_match))
334
+ next_states.append(TraceState.GAP_LEFT_STATE)
335
+ if trace_value & TraceDirectionAffine.GAP_TOP_TO_MATCH:
336
+ next_indices.append((i_match, j_match))
337
+ next_states.append(TraceState.GAP_TOP_STATE)
338
+ if trace_value & TraceDirectionAffine.MATCH_TO_GAP_LEFT:
339
+ next_indices.append((i_gap_left, j_gap_left))
340
+ next_states.append(TraceState.MATCH_STATE)
341
+ if trace_value & TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT:
342
+ next_indices.append((i_gap_left, j_gap_left))
343
+ next_states.append(TraceState.GAP_LEFT_STATE)
344
+ if trace_value & TraceDirectionAffine.MATCH_TO_GAP_TOP:
345
+ next_indices.append((i_gap_top, j_gap_top))
346
+ next_states.append(TraceState.MATCH_STATE)
347
+ if trace_value & TraceDirectionAffine.GAP_TOP_TO_GAP_TOP:
348
+ next_indices.append((i_gap_top, j_gap_top))
349
+ next_states.append(TraceState.GAP_TOP_STATE)
350
+ # Trace branching
351
+ # -> Recursive call of _follow_trace() for indices[1:]
352
+ for k in range(1, len(next_indices)):
353
+ if curr_trace_count[0] < max_trace_count:
354
+ curr_trace_count[0] += 1
355
+ new_i, new_j = next_indices[k]
356
+ new_state = next_states[k]
357
+ follow_trace(
358
+ trace_table, banded, new_i, new_j, pos,
359
+ np.copy(trace), trace_list, new_state,
360
+ curr_trace_count, max_trace_count,
361
+ lower_diag, upper_diag
362
+ )
363
+ # Continue in this method with indices[0] and states[0]
364
+ i, j = next_indices[0]
365
+ state = next_states[0]
366
+ # Trim trace to correct size (delete all pure -1 entries)
367
+ # and append to trace_list
368
+ tr_arr = np.asarray(trace)
369
+ trace_list.append(tr_arr[(tr_arr[:,0] != -1) | (tr_arr[:,1] != -1)])
370
+ return 0