biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,892 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["align_local_gapped"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+ from .tracetable cimport follow_trace, get_trace_linear, get_trace_affine
12
+
13
+ import itertools
14
+ import numpy as np
15
+ from .alignment import Alignment
16
+
17
+
18
+ ctypedef np.int32_t int32
19
+ ctypedef np.int64_t int64
20
+ ctypedef np.uint8_t uint8
21
+ ctypedef np.uint16_t uint16
22
+ ctypedef np.uint32_t uint32
23
+ ctypedef np.uint64_t uint64
24
+
25
+ ctypedef fused CodeType1:
26
+ uint8
27
+ uint16
28
+ uint32
29
+ uint64
30
+ ctypedef fused CodeType2:
31
+ uint8
32
+ uint16
33
+ uint32
34
+ uint64
35
+
36
+
37
+ cdef int INIT_SIZE = 100
38
+
39
+
40
+ def align_local_gapped(seq1, seq2, matrix, seed, int32 threshold,
41
+ gap_penalty=-10, max_number=1,
42
+ direction="both", score_only=False,
43
+ max_table_size=None):
44
+ """
45
+ align_local_gapped(seq1, seq2, matrix, seed, threshold,
46
+ gap_penalty=-10, max_number=1,
47
+ direction="both", score_only=False,
48
+ max_table_size=None)
49
+
50
+ Perform a local gapped alignment extending from a given `seed`
51
+ position.
52
+
53
+ The alignment extends into one or both directions (controlled by
54
+ `direction`) until the total alignment score falls more than
55
+ `threshold` below the maximum score found (*X-Drop*).
56
+ :footcite:`Zhang2000`
57
+ The returned alignment contains the range that yielded the maximum
58
+ score.
59
+
60
+ Parameters
61
+ ----------
62
+ seq1, seq2 : Sequence
63
+ The sequences to be aligned.
64
+ matrix : SubstitutionMatrix
65
+ The substitution matrix used for scoring.
66
+ seed : tuple(int, int)
67
+ The indices in `seq1` and `seq2` where the local alignment
68
+ starts.
69
+ The indices must be non-negative.
70
+ threshold : int
71
+ If the current score falls this value below the maximum score
72
+ found, the alignment terminates.
73
+ gap_penalty : int or tuple(int, int), optional
74
+ If an integer is provided, the value will be interpreted as
75
+ linear gap penalty.
76
+ If a tuple is provided, an affine gap penalty is used
77
+ :footcite:`Gotoh1982`.
78
+ The first integer in the tuple is the gap opening penalty,
79
+ the second integer is the gap extension penalty.
80
+ threshold : int
81
+ If the current score falls this value below the maximum score
82
+ found, the alignment terminates.
83
+ max_number : int, optional
84
+ The maximum number of alignments returned.
85
+ When the number of branches exceeds this value in the traceback
86
+ step, no further branches are created.
87
+ By default, only a single alignment is returned.
88
+ direction : {'both', 'upstream', 'downstream'}, optional
89
+ Controls in which direction the alignment extends starting
90
+ from the seed.
91
+ If ``'upstream'``, the alignment starts before the `seed` and
92
+ ends at the `seed`.
93
+ If ``'downstream'``, the alignment starts at the `seed` and
94
+ ends behind the `seed`.
95
+ If ``'both'`` (default) the alignment starts before the `seed`
96
+ and ends behind the `seed`.
97
+ The `seed` position itself is always included in the alignment.
98
+ score_only : bool, optional
99
+ If set to ``True``, only the similarity score is returned
100
+ instead of the :class:`Alignment`, decreasing the runtime
101
+ substantially.
102
+ max_table_size : int, optional
103
+ A :class:`MemoryError` is raised, if the number of cells
104
+ in the internal dynamic programming table, i.e. approximately
105
+ the product of the lengths of the aligned regions, would exceed
106
+ the given value.
107
+
108
+ Returns
109
+ -------
110
+ alignments : list of Alignment
111
+ A list of found alignments.
112
+ Each alignment in the list has the same similarity
113
+ score.
114
+ Only returned, if `score_only` is ``False``.
115
+ score : int
116
+ The alignment similarity score.
117
+ Only returned, if `score_only` is ``True``.
118
+
119
+ See Also
120
+ --------
121
+ align_ungapped
122
+ For ungapped local alignments with the same *X-Drop* technique.
123
+
124
+ Notes
125
+ -----
126
+ Unilke :func:`align_optimal()`, this function does not allocate
127
+ memory proportional to the length of both sequences, but only
128
+ approximately proportional to lengths of the aligned regions.
129
+ In principle, this makes this function viable for local alignments
130
+ of sequences of any length.
131
+ However, if the product of the lengths of the homologous regions
132
+ is too large to fit into memory, a :class:`MemoryError` or even a
133
+ crash may occur.
134
+ This may also happen in spurious long alignments due to poor choice
135
+ of substitution matrix or gap penalty.
136
+ You may set `max_table_size` to avoid excessive memory use and
137
+ crashes.
138
+
139
+ References
140
+ ----------
141
+
142
+ .. footbibliography::
143
+
144
+ Examples
145
+ --------
146
+
147
+ >>> seq1 = NucleotideSequence("CGTAGCTATCGCCTGTACGGTT")
148
+ >>> seq2 = NucleotideSequence("TATATGCCTTACGGAATTGCTTTTT")
149
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
150
+ >>> alignment = align_local_gapped(
151
+ ... seq1, seq2, matrix, seed=(16, 10), threshold=20
152
+ ... )[0]
153
+ >>> print(alignment)
154
+ TATCGCCTGTACGG
155
+ TAT-GCCT-TACGG
156
+ >>> alignment = align_local_gapped(
157
+ ... seq1, seq2, matrix, seed=(16, 10), threshold=20, direction="upstream"
158
+ ... )[0]
159
+ >>> print(alignment)
160
+ TATCGCCTGTA
161
+ TAT-GCCT-TA
162
+ >>> alignment = align_local_gapped(
163
+ ... seq1, seq2, matrix, seed=(16, 10), threshold=20, direction="downstream"
164
+ ... )[0]
165
+ >>> print(alignment)
166
+ ACGG
167
+ ACGG
168
+ >>> score = align_local_gapped(
169
+ ... seq1, seq2, matrix, seed=(16, 10), threshold=20, score_only=True
170
+ ... )
171
+ >>> print(score)
172
+ 40
173
+ """
174
+ # Check matrix alphabets
175
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
176
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
177
+ raise ValueError("The sequences' alphabets do not fit the matrix")
178
+ score_matrix = matrix.score_matrix()
179
+
180
+ # Check if gap penalty is linear or affine
181
+ if type(gap_penalty) == int:
182
+ if gap_penalty >= 0:
183
+ raise ValueError("Gap penalty must be negative")
184
+ elif type(gap_penalty) == tuple:
185
+ if gap_penalty[0] >= 0 or gap_penalty[1] >= 0:
186
+ raise ValueError("Gap penalty must be negative")
187
+ else:
188
+ raise TypeError("Gap penalty must be either integer or tuple")
189
+
190
+ # Check if max_number is reasonable
191
+ if max_number < 1:
192
+ raise ValueError(
193
+ "Maximum number of returned alignments must be at least 1"
194
+ )
195
+
196
+ # Check maximum table size
197
+ if max_table_size is None:
198
+ max_table_size = np.iinfo(np.int64).max
199
+ elif max_table_size <= 0:
200
+ raise ValueError("Maximum table size must be a positve value")
201
+
202
+
203
+ code1 = seq1.code
204
+ code2 = seq2.code
205
+
206
+ cdef int seq1_start, seq2_start
207
+ seq1_start, seq2_start = seed
208
+ if seq1_start < 0 or seq2_start < 0:
209
+ raise IndexError("Seed must contain positive indices")
210
+ if seq1_start >= len(code1) or seq2_start >= len(code2):
211
+ raise IndexError(
212
+ f"Seed {(seq1_start, seq2_start)} is out of bounds "
213
+ f"for the sequences of length {len(code1)} and {len(code2)}"
214
+ )
215
+
216
+
217
+ cdef bint upstream
218
+ cdef bint downstream
219
+ if direction == "both":
220
+ upstream = True
221
+ downstream = True
222
+ elif direction == "upstream":
223
+ upstream = True
224
+ downstream = False
225
+ elif direction == "downstream":
226
+ upstream = False
227
+ downstream = True
228
+ else:
229
+ raise ValueError(f"Direction '{direction}' is invalid")
230
+ # Range check to avoid negative indices
231
+ if seq1_start == 0 or seq2_start == 0:
232
+ upstream = False
233
+
234
+ if threshold < 0:
235
+ raise ValueError("The threshold value must be a non-negative integer")
236
+
237
+
238
+ cdef int32 score
239
+ cdef int32 total_score = 0
240
+ # Separate alignment into two parts:
241
+ # the regions upstream and downstream from the seed position
242
+ if upstream:
243
+ # For the upstream region the respective part of the sequence
244
+ # must be reversed
245
+ score, upstream_traces = _align_region(
246
+ code1[seq1_start-1::-1], code2[seq2_start-1::-1],
247
+ score_matrix, threshold, gap_penalty,
248
+ max_number, score_only, max_table_size
249
+ )
250
+ total_score += score
251
+ if upstream_traces is not None:
252
+ # Undo the sequence reversing
253
+ upstream_traces = [trace[::-1] for trace in upstream_traces]
254
+ offset = np.array(seed) - 1
255
+ for trace in upstream_traces:
256
+ # Gap values (-1) are not transformed,
257
+ # as gaps are not indices
258
+ non_gap_mask = (trace != -1)
259
+ # Second part of sequence reversing
260
+ trace[non_gap_mask] *= -1
261
+ # Add seed offset to trace indices
262
+ trace[non_gap_mask[:, 0], 0] += offset[0]
263
+ trace[non_gap_mask[:, 1], 1] += offset[1]
264
+
265
+ if downstream:
266
+ score, downstream_traces = _align_region(
267
+ code1[seq1_start+1:], code2[seq2_start+1:],
268
+ score_matrix, threshold, gap_penalty,
269
+ max_number, score_only, max_table_size
270
+ )
271
+ total_score += score
272
+ if downstream_traces is not None:
273
+ offset = np.array(seed) + 1
274
+ for trace in downstream_traces:
275
+ trace[trace[:, 0] != -1, 0] += offset[0]
276
+ trace[trace[:, 1] != -1, 1] += offset[1]
277
+
278
+ total_score += score_matrix[code1[seq1_start], code2[seq2_start]]
279
+
280
+
281
+ if score_only:
282
+ return total_score
283
+ else:
284
+ if upstream and downstream:
285
+ # Create cartesian product of upstream and downstream traces
286
+ # Only consider max_number alignments
287
+ traces = [
288
+ np.concatenate([upstream_trace, [seed], downstream_trace])
289
+ for _, (upstream_trace, downstream_trace) in zip(
290
+ range(max_number),
291
+ itertools.product(upstream_traces, downstream_traces)
292
+ )
293
+ ]
294
+ elif upstream:
295
+ traces = [
296
+ np.concatenate([trace, [seed]]) for trace in upstream_traces
297
+ ]
298
+ elif downstream:
299
+ traces = [
300
+ np.concatenate([[seed], trace]) for trace in downstream_traces
301
+ ]
302
+ else:
303
+ # 'direction == "upstream"', but the start index is 0 so no
304
+ # upstream alignment is performed
305
+ # -> the trace includes only the seed
306
+ traces = [np.array(seed)[np.newaxis, :]]
307
+
308
+ return [Alignment([seq1, seq2], trace, total_score)
309
+ for trace in traces]
310
+
311
+
312
+ def _align_region(code1, code2, matrix, threshold, gap_penalty,
313
+ max_number, score_only, max_table_size):
314
+ """
315
+ Perfrom a local *X-Drop* alignment extending from the start of the
316
+ given sequences
317
+
318
+ Parameters
319
+ ----------
320
+ code1, code2 : ndarray, dtype={np.uint8, np.uint16, np.uint32, np.uint64}
321
+ The code of the sequences to be aligned.
322
+ matrix : ndarray, shape(k, k), dtype=np.int32
323
+ The score matrix.
324
+ threshold : int
325
+ If the current score falls this value below the maximum score
326
+ found, the alignment terminates.
327
+ gap_penalty : int or tuple(int, int)
328
+ If an integer is provided, the value will be interpreted as
329
+ linear gap penalty.
330
+ If a tuple is provided, an affine gap penalty is used [2]_.
331
+ The first integer in the tuple is the gap opening penalty,
332
+ the second integer is the gap extension penalty.
333
+ threshold : int
334
+ If the current score falls this value below the maximum score
335
+ found, the alignment terminates.
336
+ max_number : int
337
+ The maximum number of alignments returned.
338
+ When the number of branches exceeds this value in the traceback
339
+ step, no further branches are created.
340
+ score_only : bool
341
+ If set to ``True``, only the similarity score is calculated and
342
+ the traceback is not conducted.
343
+ max_table_size : int
344
+ Raise a :class:`MemoryError`, if a dynamic programming table
345
+ exceeds this size.
346
+
347
+ Returns
348
+ -------
349
+ score : int or None
350
+ The alignment similarity score.
351
+ trace : list of (ndarray, shape=(n,2), dtype=int) or None
352
+ A list of alignment traces, where each trace corresponds to an
353
+ alignment with the maximum similarity score found.
354
+ This list has only multiple elements if there are multiple
355
+ traces, that correspond to the same maximum similarity score.
356
+ ``None``, if `score_only` is ``False``.
357
+ """
358
+ if type(gap_penalty) == int:
359
+ affine_penalty = False
360
+ else:
361
+ affine_penalty = True
362
+
363
+
364
+
365
+ init_size = (
366
+ _min(len(code1)+1, INIT_SIZE),
367
+ _min(len(code2)+1, INIT_SIZE)
368
+ )
369
+ trace_table = np.zeros(init_size, dtype=np.uint8)
370
+
371
+
372
+ # Table filling
373
+ ###############
374
+ # Set the initial (upper left) score value to 'threshold + 1',
375
+ # to be able to use '0' as minus infinity value
376
+ init_score = threshold + 1
377
+ if affine_penalty:
378
+ m_table = np.zeros(init_size, dtype=np.int32)
379
+ g1_table = np.zeros(init_size, dtype=np.int32)
380
+ g2_table = np.zeros(init_size, dtype=np.int32)
381
+ # This implementation does not initialize the entire first
382
+ # row/column to avoid issues with premature pruning in the table
383
+ # filling process
384
+ m_table[0,0] = init_score
385
+ trace_table, m_table, g1_table, g2_table = _fill_align_table_affine(
386
+ code1, code2, matrix, trace_table, m_table, g1_table, g2_table,
387
+ threshold, gap_penalty[0], gap_penalty[1], score_only,
388
+ max_table_size
389
+ )
390
+ else:
391
+ score_table = np.zeros(init_size, dtype=np.int32)
392
+ score_table[0,0] = init_score
393
+ trace_table, score_table = _fill_align_table(
394
+ code1, code2, matrix, trace_table, score_table, threshold,
395
+ gap_penalty, score_only, max_table_size
396
+ )
397
+
398
+ # If only the score is desired, the traceback is not necessary
399
+ if score_only:
400
+ if affine_penalty:
401
+ # The maximum score in the gap score tables do not need to
402
+ # be considered, as these starting positions would indicate
403
+ # that the alignment starts with a gap
404
+ # Hence the maximum score value in these tables is always
405
+ # less than in the match table
406
+ max_score = np.max(m_table)
407
+ else:
408
+ max_score = np.max(score_table)
409
+ # The initial score needs to be subtracted again,
410
+ # since it was artificially added for convenience resaons
411
+ return max_score - init_score, None
412
+
413
+
414
+ # Traceback
415
+ ###########
416
+ # Stores all possible traces (= possible alignments)
417
+ # A trace stores the indices of the aligned symbols
418
+ # in both sequences
419
+ trace_list = []
420
+ # Lists of trace starting indices
421
+ i_list = np.zeros(0, dtype=int)
422
+ j_list = np.zeros(0, dtype=int)
423
+ # List of start states
424
+ # State specifies the table the trace starts in
425
+ state_list = np.zeros(0, dtype=int)
426
+ # The start point is the maximal score in the table
427
+ # Multiple starting points possible,
428
+ # when duplicates of maximal score exist
429
+ if affine_penalty:
430
+ # Only consicder match table (see reason above)
431
+ max_score = np.max(m_table)
432
+ i_list, j_list = np.where((m_table == max_score))
433
+ state_list = np.append(state_list, np.full(len(i_list), 1))
434
+ else:
435
+ max_score = np.max(score_table)
436
+ i_list, j_list = np.where((score_table == max_score))
437
+ # State is always 0 for linear gap penalty
438
+ # since there is only one table
439
+ state_list = np.zeros(len(i_list), dtype=int)
440
+
441
+ # Follow the traces specified in state and indices lists
442
+ cdef int curr_trace_count
443
+ for k in range(len(i_list)):
444
+ i_start = i_list[k]
445
+ j_start = j_list[k]
446
+ state_start = state_list[k]
447
+ # Pessimistic array allocation:
448
+ # The maximum trace length arises from an alignment, where each
449
+ # symbol is aligned to a gap
450
+ trace = np.full(( i_start+1 + j_start+1, 2 ), -1, dtype=np.int64)
451
+ curr_trace_count = 1
452
+ follow_trace(
453
+ trace_table, False, i_start, j_start, 0, trace, trace_list,
454
+ state=state_start, curr_trace_count=&curr_trace_count,
455
+ max_trace_count=max_number,
456
+ # Diagonals are only needed for banded alignments
457
+ lower_diag=0, upper_diag=0
458
+ )
459
+
460
+ # Replace gap entries in trace with -1
461
+ for i, trace in enumerate(trace_list):
462
+ trace = np.flip(trace, axis=0)
463
+ gap_filter = np.zeros(trace.shape, dtype=bool)
464
+ gap_filter[np.unique(trace[:,0], return_index=True)[1], 0] = True
465
+ gap_filter[np.unique(trace[:,1], return_index=True)[1], 1] = True
466
+ trace[~gap_filter] = -1
467
+ trace_list[i] = trace
468
+
469
+ # Limit the number of generated alignments to `max_number`:
470
+ # In most cases this is achieved by discarding branches in
471
+ # 'follow_trace()', however, if multiple local alignment starts
472
+ # are used, the number of created traces are the number of
473
+ # starts times `max_number`
474
+ trace_list = trace_list[:max_number]
475
+
476
+ return max_score - init_score, trace_list
477
+
478
+
479
+ @cython.boundscheck(False)
480
+ @cython.wraparound(False)
481
+ def _fill_align_table(CodeType1[:] code1 not None,
482
+ CodeType2[:] code2 not None,
483
+ const int32[:,:] matrix not None,
484
+ uint8[:,:] trace_table not None,
485
+ int32[:,:] score_table not None,
486
+ int32 threshold,
487
+ int32 gap_penalty,
488
+ bint score_only,
489
+ int64 max_table_size):
490
+ """
491
+ Fill an alignment table with linear gap penalty using dynamic
492
+ programming.
493
+
494
+ Parameters
495
+ ----------
496
+ code1, code2
497
+ The sequence code of each sequence to be aligned.
498
+ matrix
499
+ The score matrix obtained from the :class:`SubstitutionMatrix`
500
+ object.
501
+ trace_table
502
+ The initial matrix containing values indicating the direction
503
+ for the traceback step.
504
+ score_table
505
+ The initial score table.
506
+ threshold
507
+ An alignment cell is invalidated if the total similarity score
508
+ is this threshold below the maximum similarity score found so
509
+ far.
510
+ gap_penalty
511
+ The linear gap penalty.
512
+ score_only
513
+ If true, the trace table is not filled.
514
+ max_table_size : int64
515
+ Raise a :class:`MemoryError`, if a dynamic programming table
516
+ exceeds this size.
517
+
518
+ Returns
519
+ -------
520
+ trace_table
521
+ The filled trace table.
522
+ score_table
523
+ The filled score table.
524
+ """
525
+ cdef int i, j, k=0
526
+ # The ranges for i in the current (k=0)
527
+ # and previous (k=1, k=2) antidiagonals, that point to valid cells
528
+ cdef int i_min_k_0=0, i_max_k_0=0
529
+ cdef int i_min_k_1=0, i_max_k_1=0
530
+ cdef int i_min_k_2=0, i_max_k_2=0
531
+ # The pruned range for i and j in the current antidiagonal,
532
+ # calculated from the previous antidiagonals
533
+ cdef int i_min, i_max
534
+ cdef int j_max
535
+ # The maximum values for i and j ever encountered while iterating
536
+ # over the antidiagonals -> used for final trimming of tables
537
+ cdef int i_max_total=0, j_max_total=0
538
+
539
+ cdef int32 from_diag, from_left, from_top
540
+ cdef uint8 trace = 0
541
+ cdef int32 score = 0
542
+ cdef int32 max_score = score_table[0, 0]
543
+ cdef int32 req_score = max_score - threshold
544
+
545
+ # Instead of iteration over row and column,
546
+ # iterate over antidiagonals and diagonals to achieve symmetric
547
+ # treatment of both sequences
548
+ for k in range(1, code1.shape[0] + code2.shape[0] + 1):
549
+ # Prepare values for iteration
550
+ i_min_k_2 = i_min_k_1
551
+ i_max_k_2 = i_max_k_1
552
+ i_min_k_1 = i_min_k_0
553
+ i_max_k_1 = i_max_k_0
554
+ # Reset values for iteration to most 'restrictive' values
555
+ # These restrictive values are overwritten in the next iteration
556
+ # if valid cells are present
557
+ i_min_k_0 = k
558
+ i_max_k_0 = 0
559
+
560
+ # Prune index range for antidiagonal
561
+ # to range where valid cells exist
562
+ i_min = _min(i_min_k_1, i_min_k_2 + 1)
563
+ i_max = _max(i_max_k_1 + 1, i_max_k_2 + 1)
564
+ # The index must also not be out of sequence range
565
+ i_min = _max(i_min, k - code2.shape[0])
566
+ i_max = _min(i_max, code1.shape[0])
567
+ # The algorithm has finished,
568
+ # if the calculated antidiagonal has no range of valid cells
569
+ if i_min > i_max:
570
+ break
571
+
572
+ j_max = k - i_min
573
+ # Expand ndarrays
574
+ # if their size would be exceeded in the following iteration
575
+ if i_max >= score_table.shape[0]:
576
+ score_table = _extend_table(
577
+ np.asarray(score_table), 0, max_table_size
578
+ )
579
+ if not score_only:
580
+ trace_table = _extend_table(
581
+ np.asarray(trace_table), 0, max_table_size
582
+ )
583
+ if j_max >= score_table.shape[1]:
584
+ score_table = _extend_table(
585
+ np.asarray(score_table), 1, max_table_size
586
+ )
587
+ if not score_only:
588
+ trace_table = _extend_table(
589
+ np.asarray(trace_table), 1, max_table_size
590
+ )
591
+ i_max_total = _max(i_max_total, i_max)
592
+ j_max_total = _max(j_max_total, j_max)
593
+
594
+ for i in range(i_min, i_max+1):
595
+ j = k - i
596
+
597
+ # Evaluate score from diagonal direction
598
+ if i != 0 and j != 0:
599
+ from_diag = score_table[i-1, j-1]
600
+ # Check if score stems from cells that are valid
601
+ if from_diag != 0:
602
+ # -1 in sequence index is necessary
603
+ # due to the shift of the sequences
604
+ # to the bottom/right in the table
605
+ from_diag += matrix[code1[i-1], code2[j-1]]
606
+ else:
607
+ from_diag = 0
608
+ else:
609
+ from_diag = 0
610
+ # Evaluate score through gap insertion
611
+ if i != 0:
612
+ from_top = score_table[i-1, j] + gap_penalty
613
+ else:
614
+ from_top = 0
615
+ if j != 0:
616
+ from_left = score_table[i, j-1] + gap_penalty
617
+ else:
618
+ from_left = 0
619
+
620
+ if score_only:
621
+ score = _max(from_diag, _max(from_left, from_top))
622
+ else:
623
+ trace = get_trace_linear(
624
+ from_diag, from_left, from_top, &score
625
+ )
626
+
627
+ # Check if the obtained score reaches the required threshold
628
+ # and if they even exceed the maximum score
629
+ if score >= req_score:
630
+ if i_min_k_0 == k:
631
+ # 'i_min_k_0 == k'
632
+ # -> i_min_k_0 has not been set in this iteration, yet
633
+ i_min_k_0 = i
634
+ i_max_k_0 = i
635
+ score_table[i,j] = score
636
+ if not score_only:
637
+ trace_table[i,j] = trace
638
+ if score > max_score:
639
+ max_score = score
640
+ req_score = max_score - threshold
641
+
642
+
643
+ return np.asarray(trace_table)[:i_max_total+1, :j_max_total+1], \
644
+ np.asarray(score_table)[:i_max_total+1, :j_max_total+1]
645
+
646
+
647
+ @cython.boundscheck(False)
648
+ @cython.wraparound(False)
649
+ def _fill_align_table_affine(CodeType1[:] code1 not None,
650
+ CodeType2[:] code2 not None,
651
+ const int32[:,:] matrix not None,
652
+ uint8[:,:] trace_table not None,
653
+ int32[:,:] m_table not None,
654
+ int32[:,:] g1_table not None,
655
+ int32[:,:] g2_table not None,
656
+ int32 threshold,
657
+ int32 gap_open,
658
+ int32 gap_ext,
659
+ bint score_only,
660
+ int64 max_table_size):
661
+ """
662
+ Fill an alignment table with affines gap penalty using dynamic
663
+ programming.
664
+
665
+ Parameters
666
+ ----------
667
+ code1, code2
668
+ The sequence code of each sequence to be aligned.
669
+ matrix
670
+ The score matrix obtained from the :class:`SubstitutionMatrix`
671
+ object.
672
+ trace_table
673
+ The initial matrix containing values indicating the direction
674
+ for the traceback step.
675
+ m_table, g1_table, g2_table
676
+ The alignment tables containing the scores.
677
+ `m_table` contains values for matches.
678
+ `g1_table` contains values for gaps in the first sequence.
679
+ `g2_table` contains values for gaps in the second sequence.
680
+ threshold
681
+ An alignment cell is invalidated if the total similarity score
682
+ is this threshold below the maximum similarity score found so
683
+ far.
684
+ gap_open
685
+ The gap opening penalty.
686
+ gap_ext
687
+ The gap extension penalty.
688
+ score_only
689
+ If true, the trace table is not filled.
690
+ max_table_size : int64
691
+ Raise a :class:`MemoryError`, if a dynamic programming table
692
+ exceeds this size.
693
+
694
+ Returns
695
+ -------
696
+ trace_table
697
+ The filled trace table.
698
+ m_table, g1_table, g2_table
699
+ The filled score tables.
700
+ """
701
+ cdef int i, j, k=0
702
+ # The ranges for i in the current (k=0)
703
+ # and previous (k=1, k=2) antidiagonals, that point to valid cells
704
+ cdef int i_min_k_0=0, i_max_k_0=0
705
+ cdef int i_min_k_1=0, i_max_k_1=0
706
+ cdef int i_min_k_2=0, i_max_k_2=0
707
+ # The pruned range for i and j in the current antidiagonal,
708
+ # calculated from the previous antidiagonals
709
+ cdef int i_min, i_max
710
+ cdef int j_max
711
+ # The maximum values for i and j ever encountered while iterating
712
+ # over the antidiagonals -> used for final trimming of tables
713
+ cdef int i_max_total=0, j_max_total=0
714
+
715
+ cdef uint8 trace = 0
716
+ cdef bint is_valid_cell
717
+
718
+ cdef int32 mm_score, g1m_score, g2m_score
719
+ cdef int32 mg1_score, g1g1_score
720
+ cdef int32 mg2_score, g2g2_score
721
+ cdef int32 m_score, g1_score, g2_score
722
+ cdef int32 similarity_score
723
+ cdef int32 max_score = m_table[0, 0]
724
+ cdef int32 req_score = max_score - threshold
725
+
726
+ # Instead of iteration over row and column,
727
+ # iterate over antidiagonals and diagonals to achieve symmetric
728
+ # treatment of both sequences
729
+ for k in range(1, code1.shape[0] + code2.shape[0] + 1):
730
+ # Prepare values for iteration
731
+ i_min_k_2 = i_min_k_1
732
+ i_max_k_2 = i_max_k_1
733
+ i_min_k_1 = i_min_k_0
734
+ i_max_k_1 = i_max_k_0
735
+ # Reset values for iteration to most 'restrictive' values
736
+ # These restrictive values are overwritten in the next iteration
737
+ # if valid cells are present
738
+ i_min_k_0 = k
739
+ i_max_k_0 = 0
740
+
741
+ # Prune index range for antidiagonal
742
+ # to range where valid cells exist
743
+ i_min = _min(i_min_k_1, i_min_k_2 + 1)
744
+ i_max = _max(i_max_k_1 + 1, i_max_k_2 + 1)
745
+ # The index must also not be out of sequence range
746
+ i_min = _max(i_min, k - code2.shape[0])
747
+ i_max = _min(i_max, code1.shape[0])
748
+ # The algorithm has finished,
749
+ # if the calculated antidiagonal has no range of valid cells
750
+ if i_min > i_max:
751
+ break
752
+
753
+ j_max = k - i_min
754
+ # Expand ndarrays
755
+ # if their size would be exceeded in the following iteration
756
+ if i_max >= m_table.shape[0]:
757
+ m_table = _extend_table(np.asarray(m_table), 0, max_table_size)
758
+ g1_table = _extend_table(np.asarray(g1_table), 0, max_table_size)
759
+ g2_table = _extend_table(np.asarray(g2_table), 0, max_table_size)
760
+ if not score_only:
761
+ trace_table = _extend_table(
762
+ np.asarray(trace_table), 0, max_table_size
763
+ )
764
+ if j_max >= m_table.shape[1]:
765
+ m_table = _extend_table(np.asarray(m_table), 1, max_table_size)
766
+ g1_table = _extend_table(np.asarray(g1_table), 1, max_table_size)
767
+ g2_table = _extend_table(np.asarray(g2_table), 1, max_table_size)
768
+ if not score_only:
769
+ trace_table = _extend_table(
770
+ np.asarray(trace_table), 1, max_table_size
771
+ )
772
+ i_max_total = _max(i_max_total, i_max)
773
+ j_max_total = _max(j_max_total, j_max)
774
+
775
+ for i in range(i_min, i_max+1):
776
+ j = k - i
777
+
778
+ # Evaluate score from diagonal direction
779
+ if i != 0 and j != 0:
780
+ # -1 in sequence index is necessary
781
+ # due to the shift of the sequences
782
+ # to the bottom/right in the table
783
+ similarity_score = matrix[code1[i-1], code2[j-1]]
784
+ mm_score = m_table[i-1,j-1]
785
+ g1m_score = g1_table[i-1,j-1]
786
+ g2m_score = g2_table[i-1,j-1]
787
+ # Check if scores stem from cells that are valid
788
+ if mm_score != 0:
789
+ mm_score += similarity_score
790
+ if g1m_score != 0:
791
+ g1m_score += similarity_score
792
+ if g2m_score != 0:
793
+ g2m_score += similarity_score
794
+ else:
795
+ mm_score = 0
796
+ g1m_score = 0
797
+ g2m_score = 0
798
+ # Evaluate score through gap insertion
799
+ # No transition from g1_table to g2_table and vice versa,
800
+ # since this would mean adjacent gaps in both sequences;
801
+ # a substitution makes more sense in this case
802
+ if j != 0:
803
+ mg1_score = m_table[i,j-1] + gap_open
804
+ g1g1_score = g1_table[i,j-1] + gap_ext
805
+ else:
806
+ mg1_score = 0
807
+ g1g1_score = 0
808
+ if i != 0:
809
+ mg2_score = m_table[i-1,j] + gap_open
810
+ g2g2_score = g2_table[i-1,j] + gap_ext
811
+ else:
812
+ mg2_score = 0
813
+ g2g2_score = 0
814
+
815
+
816
+
817
+ if score_only:
818
+ m_score = _max(mm_score, _max(g1m_score, g2m_score))
819
+ g1_score = _max(mg1_score, g1g1_score)
820
+ g2_score = _max(mg2_score, g2g2_score)
821
+ else:
822
+ trace = get_trace_affine(
823
+ mm_score, g1m_score, g2m_score,
824
+ mg1_score, g1g1_score,
825
+ mg2_score, g2g2_score,
826
+ # The max score values to be written
827
+ &m_score, &g1_score, &g2_score
828
+ )
829
+
830
+
831
+ # Check if the obtained scores reach the required threshold
832
+ # and if they even exceed the maximum score
833
+ is_valid_cell = False
834
+
835
+ if m_score >= req_score:
836
+ if i_min_k_0 == k:
837
+ i_min_k_0 = i
838
+ i_max_k_0 = i
839
+ m_table[i,j] = m_score
840
+ is_valid_cell = True
841
+ if m_score > max_score:
842
+ max_score = m_score
843
+ req_score = max_score - threshold
844
+
845
+ if g1_score >= req_score:
846
+ if i_min_k_0 == k:
847
+ i_min_k_0 = i
848
+ i_max_k_0 = i
849
+ g1_table[i,j] = g1_score
850
+ is_valid_cell = True
851
+ if g1_score > max_score:
852
+ max_score = g1_score
853
+ req_score = max_score - threshold
854
+
855
+ if g2_score >= req_score:
856
+ if i_min_k_0 == k:
857
+ i_min_k_0 = i
858
+ i_max_k_0 = i
859
+ g2_table[i,j] = g2_score
860
+ is_valid_cell = True
861
+ if g2_score > max_score:
862
+ max_score = g2_score
863
+ req_score = max_score - threshold
864
+
865
+ if is_valid_cell and not score_only:
866
+ trace_table[i,j] = trace
867
+
868
+
869
+ return np.asarray(trace_table)[:i_max_total+1, :j_max_total+1], \
870
+ np.asarray(m_table )[:i_max_total+1, :j_max_total+1], \
871
+ np.asarray(g1_table )[:i_max_total+1, :j_max_total+1], \
872
+ np.asarray(g2_table )[:i_max_total+1, :j_max_total+1]
873
+
874
+
875
+ def _extend_table(table, int dimension, int64 max_size):
876
+ if dimension == 0:
877
+ new_shape = (table.shape[0] * 2, table.shape[1])
878
+ else:
879
+ new_shape = (table.shape[0], table.shape[1] * 2)
880
+ if new_shape[0] * new_shape[1] > max_size:
881
+ raise MemoryError("Maximum table size exceeded")
882
+ new_table = np.zeros(new_shape, dtype=table.dtype)
883
+ # Fill in exiisting data
884
+ new_table[:table.shape[0], :table.shape[1]] = table
885
+ return new_table
886
+
887
+
888
+ cdef inline int _min(int32 a, int32 b):
889
+ return a if a < b else b
890
+
891
+ cdef inline int _max(int32 a, int32 b):
892
+ return a if a > b else b