biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,652 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["align_banded"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+ from .tracetable cimport follow_trace, get_trace_linear, get_trace_affine, \
12
+ TraceDirectionAffine, TraceState
13
+
14
+ from .matrix import SubstitutionMatrix
15
+ from ..sequence import Sequence
16
+ from .alignment import Alignment
17
+ import numpy as np
18
+
19
+
20
+ ctypedef np.int32_t int32
21
+ ctypedef np.int64_t int64
22
+ ctypedef np.uint8_t uint8
23
+ ctypedef np.uint16_t uint16
24
+ ctypedef np.uint32_t uint32
25
+ ctypedef np.uint64_t uint64
26
+
27
+ ctypedef fused CodeType1:
28
+ uint8
29
+ uint16
30
+ uint32
31
+ uint64
32
+ ctypedef fused CodeType2:
33
+ uint8
34
+ uint16
35
+ uint32
36
+ uint64
37
+
38
+
39
+ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
40
+ max_number=1000):
41
+ """
42
+ align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
43
+ max_number=1000)
44
+
45
+ Perform a local or semi-global alignment within a defined diagonal
46
+ band. :footcite:`Pearson1988`
47
+
48
+ The function requires two diagonals that defines the lower
49
+ and upper limit of the alignment band.
50
+ A diagonal is an integer defined as :math:`D = j - i`, where *i* and
51
+ *j* are sequence positions in the first and second sequence,
52
+ respectively.
53
+ This means that two symbols at position *i* and *j* can only be
54
+ aligned to each other, if :math:`D_L \leq j - i \leq D_U`.
55
+ With increasing width of the diagonal band, the probability to find
56
+ the optimal alignment, but also the computation time increases.
57
+
58
+ Parameters
59
+ ----------
60
+ seq1, seq2 : Sequence
61
+ The sequences to be aligned.
62
+ matrix : SubstitutionMatrix
63
+ The substitution matrix used for scoring.
64
+ band : tuple(int, int)
65
+ The diagonals that represent the lower and upper limit of the
66
+ search space.
67
+ A diagonal :math:`D` is defined as :math:`D = j-i`, where
68
+ :math:`i` and :math:`j` are positions in `seq1` and `seq2`,
69
+ respectively.
70
+ An alignment of sequence positions where :math:`D` is lower than
71
+ the lower limit or greater than the upper limit is not explored
72
+ by the algorithm.
73
+ gap_penalty : int or tuple(int, int), optional
74
+ If an integer is provided, the value will be interpreted as
75
+ linear gap penalty.
76
+ If a tuple is provided, an affine gap penalty is used.
77
+ The first integer in the tuple is the gap opening penalty,
78
+ the second integer is the gap extension penalty.
79
+ The values need to be negative.
80
+ local : bool, optional
81
+ If set to true, a local alignment is performed.
82
+ Otherwise (default) a semi-global alignment is performed.
83
+ max_number : int, optional
84
+ The maximum number of alignments returned.
85
+ When the number of branches exceeds this value in the traceback
86
+ step, no further branches are created.
87
+
88
+ Returns
89
+ -------
90
+ alignments : list of Alignment
91
+ The generated alignments.
92
+ Each alignment in the list has the same similarity score,
93
+ which is the maximum score possible within the defined band.
94
+
95
+ See Also
96
+ --------
97
+ align_optimal
98
+ Guarantees to find the optimal alignment at the cost of greater
99
+ compuation time and memory requirements.
100
+
101
+ Notes
102
+ -----
103
+ The diagonals give the maximum difference between the
104
+ number of inserted gaps.
105
+ This means for any position in the alignment, the algorithm
106
+ will not consider inserting a gap into a sequence, if the first
107
+ sequence has already ``-band[0]`` more gaps than the second
108
+ sequence or if the second sequence has already ``band[1]`` more gaps
109
+ than the first sequence, even if inserting additional gaps would
110
+ yield a more optimal alignment.
111
+ Considerations on how to find a suitable band width are discussed in
112
+ :footcite:`Gibrat2018`.
113
+
114
+ The restriction to a limited band is the central difference between
115
+ the banded alignment heuristic and the optimal alignment
116
+ algorithms :footcite:`Needleman1970, Smith1981`.
117
+ Those classical algorithms require :math:`O(m \cdot n)`
118
+ memory space and computation time for aligning two sequences with
119
+ lengths :math:`m` and :math:`n`, respectively.
120
+ The banded alignment algorithm reduces both requirements to
121
+ :math:`O(\min(m,n) \cdot (D_U - D_L))`.
122
+
123
+ *Implementation details*
124
+
125
+ The implementation is very similar to :func:`align_optimal()`.
126
+ The most significant difference is that not the complete alignment
127
+ table is filled, but only the cells that lie within the diagonal
128
+ band.
129
+ Furthermore, to reduce also the space requirements the diagnoal band
130
+ is 'straightened', i.e. the table's rows are indented to the left.
131
+ Hence, this table
132
+
133
+ = = = = = = = = = =
134
+ . . x x x . . . . .
135
+ . . . x x x . . . .
136
+ . . . . x x x . . .
137
+ . . . . . x x x . .
138
+ . . . . . . x x x .
139
+ = = = = = = = = = =
140
+
141
+ is transformed into this table:
142
+
143
+ = = =
144
+ x x x
145
+ x x x
146
+ x x x
147
+ x x x
148
+ x x x
149
+ = = =
150
+
151
+ Filled cells, i.e. cells within the band, are indicated by ``x``.
152
+ The shorter sequence is always represented by the first dimension
153
+ of the table in this implementation.
154
+
155
+ References
156
+ ----------
157
+
158
+ .. footbibliography::
159
+
160
+ Examples
161
+ --------
162
+
163
+ Find a matching diagonal for two sequences:
164
+
165
+ >>> sequence1 = NucleotideSequence("GCGCGCTATATTATGCGCGC")
166
+ >>> sequence2 = NucleotideSequence("TATAAT")
167
+ >>> table = KmerTable.from_sequences(k=4, sequences=[sequence1])
168
+ >>> match = table.match(sequence2)[0]
169
+ >>> diagonal = match[0] - match[2]
170
+ >>> print(diagonal)
171
+ -6
172
+
173
+ Align the sequences centered on the diagonal with buffer in both
174
+ directions:
175
+
176
+ >>> BUFFER = 5
177
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
178
+ >>> alignment = align_banded(
179
+ ... sequence1, sequence2, matrix,
180
+ ... band=(diagonal - BUFFER, diagonal + BUFFER), gap_penalty=(-6, -1)
181
+ ... )[0]
182
+ >>> print(alignment)
183
+ TATATTAT
184
+ TATA--AT
185
+ """
186
+ # Check matrix alphabets
187
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
188
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
189
+ raise ValueError("The sequences' alphabets do not fit the matrix")
190
+ # Check if gap penalty is linear or affine
191
+ if type(gap_penalty) == int:
192
+ if gap_penalty > 0:
193
+ raise ValueError("Gap penalty must be negative")
194
+ affine_penalty = False
195
+ elif type(gap_penalty) == tuple:
196
+ if gap_penalty[0] > 0 or gap_penalty[1] > 0:
197
+ raise ValueError("Gap penalty must be negative")
198
+ affine_penalty = True
199
+ else:
200
+ raise TypeError("Gap penalty must be either integer or tuple")
201
+ # Check if max_number is reasonable
202
+ if max_number < 1:
203
+ raise ValueError(
204
+ "Maximum number of returned alignments must be at least 1"
205
+ )
206
+
207
+ # The shorter sequence is the one on the left of the matrix
208
+ # -> shorter sequence is 'seq1'
209
+ if len(seq2) < len(seq1):
210
+ seq1, seq2 = seq2, seq1
211
+ band = [-diag for diag in band]
212
+ matrix = matrix.transpose()
213
+ is_swapped = True
214
+ else:
215
+ is_swapped = False
216
+ lower_diag, upper_diag = min(band), max(band)
217
+ if len(seq1) + upper_diag <= 0 or lower_diag >= len(seq2):
218
+ raise ValueError(
219
+ "Alignment band is out of range, the band allows no overlap "
220
+ "between both sequences"
221
+ )
222
+ # Crop band diagonals to reasonable size, so that it at maximum
223
+ # covers the search space of an unbanded alignment
224
+ lower_diag = max(lower_diag, -len(seq1)+1)
225
+ upper_diag = min(upper_diag, len(seq2)-1)
226
+ band_width = upper_diag - lower_diag + 1
227
+ if band_width < 1:
228
+ raise ValueError("The width of the band is 0")
229
+
230
+ # This implementation uses transposed tables in comparison
231
+ # to the common visualization
232
+ # This means, the first sequence is one the left
233
+ # and the second sequence is at the top
234
+
235
+ # Terminal gap column on the left can be omitted in this algorithm,
236
+ # as terminal gaps are not part of the alignment
237
+ # This is not possible for the top row, as the dynamic programming
238
+ # algorithm requires these initial values
239
+ # On the left and right side an additional column is inserted
240
+ # representing the invalid boundaries of the band
241
+ # This prevents unnecessary bound checks when filling the dynamic
242
+ # programming matrix (score and trace)
243
+ trace_table = np.zeros((len(seq1)+1, band_width+2), dtype=np.uint8)
244
+ code1 = seq1.code
245
+ code2 = seq2.code
246
+
247
+
248
+ # Table filling
249
+ ###############
250
+
251
+ # A score value that signals that the respective direction in the
252
+ # dynamic programming matrix should not be used, since it would be
253
+ # outside the band
254
+ # It is the 'worst' score available, so the trace table will never
255
+ # include such a direction
256
+ neg_inf = np.iinfo(np.int32).min
257
+ # Correct the 'negative infinity' integer, by making it more positive
258
+ # This prevents an integer underflow when the gap penalty or
259
+ # match score is added to this value
260
+ neg_inf -= min(gap_penalty) if affine_penalty else gap_penalty
261
+ min_score = np.min(matrix.score_matrix())
262
+ if min_score < 0:
263
+ neg_inf -= min_score
264
+
265
+ if affine_penalty:
266
+ # Affine gap penalty
267
+ gap_open = gap_penalty[0]
268
+ gap_ext = gap_penalty[1]
269
+ # m_table, g1_table and g2_table are the 3 score tables
270
+ m_table = np.zeros((len(seq1)+1, band_width+2), dtype=np.int32)
271
+ # Fill with negative infinity values to prevent that an
272
+ # alignment trace starts with a gap extension
273
+ # instead of a gap opening
274
+ g1_table = np.full((len(seq1)+1, band_width+2), neg_inf, np.int32)
275
+ g2_table = np.full((len(seq1)+1, band_width+2), neg_inf, np.int32)
276
+ # As explained for the trace table (see above),
277
+ # the score table is filled with with netagive infinty values
278
+ # on the left and right column to prevent the trace leaving the
279
+ # alignment band
280
+ m_table[:, 0] = neg_inf
281
+ m_table[:, -1] = neg_inf
282
+ # Initialize first row and column for global alignments
283
+ _fill_align_table_affine(code1, code2,
284
+ matrix.score_matrix(), trace_table,
285
+ m_table, g1_table, g2_table,
286
+ lower_diag, upper_diag,
287
+ gap_open, gap_ext, local)
288
+ else:
289
+ # Linear gap penalty
290
+ score_table = np.zeros((len(seq1)+1, band_width+2), dtype=np.int32)
291
+ score_table[:, 0] = neg_inf
292
+ score_table[:, -1] = neg_inf
293
+ _fill_align_table(
294
+ code1, code2, matrix.score_matrix(), trace_table, score_table,
295
+ lower_diag, upper_diag, gap_penalty, local
296
+ )
297
+
298
+
299
+ # Traceback
300
+ ###########
301
+
302
+ # Stores all possible traces (= possible alignments)
303
+ # A trace stores the indices of the aligned symbols
304
+ # in both sequences
305
+ trace_list = []
306
+ # Lists of trace starting indices
307
+ i_list = np.zeros(0, dtype=int)
308
+ j_list = np.zeros(0, dtype=int)
309
+ # `state_list` lists of start states
310
+ # State specifies the table the trace starts in
311
+ if local:
312
+ # The start point is the maximal score in the table
313
+ # Multiple starting points possible,
314
+ # when duplicates of maximum score exist
315
+ if affine_penalty:
316
+ # The maximum score in the gap score tables do not need to
317
+ # be considered, as these starting positions would indicate
318
+ # that the local alignment starts with a gap
319
+ # Hence the maximum score value in these tables is always
320
+ # less than in the match table
321
+ max_score = np.max(m_table)
322
+ i_list, j_list = np.where((m_table == max_score))
323
+ state_list = np.full(
324
+ len(i_list), TraceState.MATCH_STATE, dtype=int
325
+ )
326
+ else:
327
+ max_score = np.max(score_table)
328
+ i_list, j_list = np.where((score_table == max_score))
329
+ # State is always 0 for linear gap penalty
330
+ # since there is only one table
331
+ state_list = np.full(
332
+ len(i_list), TraceState.NO_STATE, dtype=int
333
+ )
334
+ else:
335
+ # Get all allowed trace start indices
336
+ possible_i_start, possible_j_start = get_global_trace_starts(
337
+ len(seq1), len(seq2), lower_diag, upper_diag
338
+ )
339
+ if affine_penalty:
340
+ state_list = np.zeros(0, dtype=int)
341
+ m_scores = m_table[possible_i_start, possible_j_start]
342
+ g1_scores = g1_table[possible_i_start, possible_j_start]
343
+ g2_scores = g2_table[possible_i_start, possible_j_start]
344
+ m_max_score = np.max(m_scores)
345
+ g1_max_score = np.max(g1_scores)
346
+ g2_max_score = np.max(g2_scores)
347
+ max_score = max(m_max_score, g1_max_score, g2_max_score)
348
+ if m_max_score == max_score:
349
+ best_indices = np.where(m_scores == max_score)[0]
350
+ i_list = np.append(i_list, possible_i_start[best_indices])
351
+ j_list = np.append(j_list, possible_j_start[best_indices])
352
+ state_list = np.append(
353
+ state_list,
354
+ np.full(len(best_indices),
355
+ TraceState.MATCH_STATE, dtype=int)
356
+ )
357
+ if g1_max_score == max_score:
358
+ best_indices = np.where(g1_scores == max_score)[0]
359
+ i_list = np.append(i_list, possible_i_start[best_indices])
360
+ j_list = np.append(j_list, possible_j_start[best_indices])
361
+ state_list = np.append(
362
+ state_list,
363
+ np.full(len(best_indices),
364
+ TraceState.GAP_LEFT_STATE, dtype=int)
365
+ )
366
+ if g2_max_score == max_score:
367
+ best_indices = np.where(g2_scores == max_score)[0]
368
+ i_list = np.append(i_list, possible_i_start[best_indices])
369
+ j_list = np.append(j_list, possible_j_start[best_indices])
370
+ state_list = np.append(
371
+ state_list,
372
+ np.full(len(best_indices),
373
+ TraceState.GAP_TOP_STATE, dtype=int)
374
+ )
375
+ else:
376
+ # Choose the trace start index with the highest score
377
+ # in the score table
378
+ scores = score_table[possible_i_start, possible_j_start]
379
+ max_score = np.max(scores)
380
+ best_indices = np.where(scores == max_score)
381
+ i_list = possible_i_start[best_indices]
382
+ j_list = possible_j_start[best_indices]
383
+ state_list = np.full(
384
+ len(i_list), TraceState.NO_STATE, dtype=int
385
+ )
386
+
387
+ # Follow the traces specified in state and indices lists
388
+ cdef int curr_trace_count
389
+ for k in range(len(i_list)):
390
+ i_start = i_list[k]
391
+ j_start = j_list[k]
392
+ state_start = state_list[k]
393
+ # Pessimistic array allocation:
394
+ # The maximum trace length arises from an alignment, where each
395
+ # symbol is aligned to a gap
396
+ trace = np.full((len(seq1) + len(seq2), 2), -1, dtype=np.int64)
397
+ curr_trace_count = 1
398
+ follow_trace(
399
+ trace_table, True, i_start, j_start, 0,
400
+ trace, trace_list, state=state_start,
401
+ curr_trace_count=&curr_trace_count, max_trace_count=max_number,
402
+ lower_diag=lower_diag, upper_diag=upper_diag
403
+ )
404
+
405
+ # Replace gap entries in trace with -1
406
+ for i, trace in enumerate(trace_list):
407
+ trace = np.flip(trace, axis=0)
408
+ gap_filter = np.zeros(trace.shape, dtype=bool)
409
+ gap_filter[np.unique(trace[:,0], return_index=True)[1], 0] = True
410
+ gap_filter[np.unique(trace[:,1], return_index=True)[1], 1] = True
411
+ trace[~gap_filter] = -1
412
+ trace_list[i] = trace
413
+
414
+ # Limit the number of generated alignments to `max_number`:
415
+ # In most cases this is achieved by discarding branches in
416
+ # 'follow_trace()', however, if multiple alignment starts
417
+ # are used, the number of created traces are the number of
418
+ # starts times `max_number`
419
+ trace_list = trace_list[:max_number]
420
+ if is_swapped:
421
+ return [Alignment([seq2, seq1], np.flip(trace, axis=1), max_score)
422
+ for trace in trace_list]
423
+ else:
424
+ return [Alignment([seq1, seq2], trace, max_score)
425
+ for trace in trace_list]
426
+
427
+
428
+ @cython.boundscheck(False)
429
+ @cython.wraparound(False)
430
+ def _fill_align_table(CodeType1[:] code1 not None,
431
+ CodeType2[:] code2 not None,
432
+ const int32[:,:] mat not None,
433
+ uint8[:,:] trace_table not None,
434
+ int32[:,:] score_table not None,
435
+ int lower_diag,
436
+ int upper_diag,
437
+ int gap_penalty,
438
+ bint local):
439
+ """
440
+ Fill an alignment table with linear gap penalty using dynamic
441
+ programming.
442
+
443
+ Parameters
444
+ ----------
445
+ code1, code2
446
+ The sequence code of each sequence to be aligned.
447
+ mat
448
+ The score matrix obtained from the :class:`SubstitutionMatrix`
449
+ object.
450
+ trace_table
451
+ A matrix containing values indicating the direction for the
452
+ traceback step.
453
+ The matrix is filled in this function
454
+ score_table
455
+ The alignment table.
456
+ The matrix is filled in this function.
457
+ gap_penalty
458
+ The linear gap penalty.
459
+ local
460
+ Indicates, whether a local alignment should be performed.
461
+ """
462
+
463
+ cdef int i, j
464
+ cdef int seq_i, seq_j
465
+ cdef int32 from_diag, from_left, from_top
466
+ cdef uint8 trace
467
+ cdef int32 score
468
+
469
+ # Starts at 1 since the first row and column are already filled
470
+ for seq_i in range(0, code1.shape[0]):
471
+ # Transform sequence index into table index
472
+ i = seq_i + 1
473
+ for seq_j in range(
474
+ max(0, seq_i + lower_diag),
475
+ min(code2.shape[0], seq_i + upper_diag+1)
476
+ ):
477
+ # Transform sequence index into table index
478
+ # Due to the diagonal band and its 'straightening'
479
+ # seq_j must be transformed to obtain the table index
480
+ j = seq_j - seq_i - lower_diag + 1
481
+
482
+ # Evaluate score from available directions:
483
+ # Due the 'straightening' of the the diagonal band,
484
+ # the 'upper left' and 'upper' direction from the classic
485
+ # matrix become 'upper' and 'upper right', respectively
486
+ from_diag = score_table[i-1, j ] + mat[code1[seq_i], code2[seq_j]]
487
+ from_left = score_table[i, j-1] + gap_penalty
488
+ from_top = score_table[i-1, j+1] + gap_penalty
489
+
490
+ trace = get_trace_linear(from_diag, from_left, from_top, &score)
491
+
492
+ # Local alignment specialty:
493
+ # If score is less than or equal to 0,
494
+ # then 0 is saved on the field and the trace ends here
495
+ if local == True and score <= 0:
496
+ score_table[i,j] = 0
497
+ else:
498
+ score_table[i,j] = score
499
+ trace_table[i,j] = trace
500
+
501
+
502
+ @cython.boundscheck(False)
503
+ @cython.wraparound(False)
504
+ def _fill_align_table_affine(CodeType1[:] code1 not None,
505
+ CodeType2[:] code2 not None,
506
+ const int32[:,:] mat not None,
507
+ uint8[:,:] trace_table not None,
508
+ int32[:,:] m_table not None,
509
+ int32[:,:] g1_table not None,
510
+ int32[:,:] g2_table not None,
511
+ int lower_diag,
512
+ int upper_diag,
513
+ int gap_open,
514
+ int gap_ext,
515
+ bint local):
516
+ """
517
+ Fill an alignment table with affine gap penalty using dynamic
518
+ programming.
519
+
520
+ Parameters
521
+ ----------
522
+ code1, code2
523
+ The sequence code of each sequence to be aligned.
524
+ matrix
525
+ The score matrix obtained from the class:`SubstitutionMatrix`
526
+ object.
527
+ trace_table
528
+ A matrix containing values indicating the direction for the
529
+ traceback step.
530
+ The matrix is filled in this function.
531
+ m_table, g1_table, g2_table
532
+ The alignment tables containing the scores.
533
+ `m_table` contains values for matches.
534
+ `g1_table` contains values for gaps in the first sequence.
535
+ `g2_table` contains values for gaps in the second sequence.
536
+ The matrix is filled in this function.
537
+ gap_open
538
+ The gap opening penalty.
539
+ gap_ext
540
+ The gap extension penalty.
541
+ local
542
+ Indicates, whether a local alignment should be performed.
543
+ """
544
+
545
+ cdef int i, j
546
+ cdef int seq_i, seq_j
547
+ cdef int32 mm_score, g1m_score, g2m_score
548
+ cdef int32 mg1_score, g1g1_score
549
+ cdef int32 mg2_score, g2g2_score
550
+ cdef uint8 trace
551
+ cdef int32 m_score, g1_score, g2_score
552
+ cdef int32 similarity_score
553
+
554
+ # Starts at 1 since the first row and column are already fil
555
+ for seq_i in range(0, code1.shape[0]):
556
+ i = seq_i + 1
557
+ for seq_j in range(
558
+ max(0, seq_i + lower_diag),
559
+ min(code2.shape[0], seq_i + upper_diag+1)
560
+ ):
561
+ j = seq_j - seq_i - lower_diag + 1
562
+ # Calculate the scores for possible transitions
563
+ # into the current cell
564
+ similarity_score = mat[code1[seq_i], code2[seq_j]]
565
+ mm_score = m_table[i-1, j] + similarity_score
566
+ g1m_score = g1_table[i-1, j] + similarity_score
567
+ g2m_score = g2_table[i-1, j] + similarity_score
568
+ # No transition from g1_table to g2_table and vice versa
569
+ # Since this would mean adjacent gaps in both sequences
570
+ # A substitution makes more sense in this case
571
+ mg1_score = m_table[i, j-1] + gap_open
572
+ g1g1_score = g1_table[i, j-1] + gap_ext
573
+ mg2_score = m_table[i-1, j+1] + gap_open
574
+ g2g2_score = g2_table[i-1, j+1] + gap_ext
575
+
576
+ trace = get_trace_affine(
577
+ mm_score, g1m_score, g2m_score,
578
+ mg1_score, g1g1_score,
579
+ mg2_score, g2g2_score,
580
+ # The max score values to be written
581
+ &m_score, &g1_score, &g2_score
582
+ )
583
+
584
+ # Fill values into tables
585
+ # Local alignment specialty:
586
+ # If score is less than or equal to 0,
587
+ # then the score of the cell remains 0
588
+ # and the trace ends here
589
+ if local == True:
590
+ if m_score <= 0:
591
+ # End trace in specific table
592
+ # by filtering out the respective bits
593
+ trace &= ~(
594
+ TraceDirectionAffine.MATCH_TO_MATCH |
595
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
596
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
597
+ )
598
+ # m_table[i,j] remains 0
599
+ else:
600
+ m_table[i,j] = m_score
601
+ if g1_score <= 0:
602
+ trace &= ~(
603
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
604
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
605
+ )
606
+ # g1_table[i,j] remains negative infinity
607
+ else:
608
+ g1_table[i,j] = g1_score
609
+ if g2_score <= 0:
610
+ trace &= ~(
611
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
612
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
613
+ )
614
+ # g2_table[i,j] remains negative infinity
615
+ else:
616
+ g2_table[i,j] = g2_score
617
+ else:
618
+ m_table[i,j] = m_score
619
+ g1_table[i,j] = g1_score
620
+ g2_table[i,j] = g2_score
621
+ trace_table[i,j] = trace
622
+
623
+
624
+ def get_global_trace_starts(seq1_len, seq2_len, lower_diag, upper_diag):
625
+ band_width = upper_diag - lower_diag + 1
626
+
627
+ j = np.arange(1, band_width + 1)
628
+ seq_j = j + (seq1_len-1) + lower_diag - 1
629
+ # Start from the end from the first (shorter) sequence,
630
+ # if the table cell is in bounds of the second (longer) sequence,
631
+ # otherwise start from the end of the second sequence
632
+ i = np.where(
633
+ seq_j < seq2_len,
634
+ np.full(len(j), (seq1_len-1) + 1, dtype=int),
635
+ # Take:
636
+ #
637
+ # seq_j = j + (seq1_len-1) + lower_diag - 1
638
+ #
639
+ # Replace seq_j with last sequence position of second sequence
640
+ # and last sequence position of first sequence with seq_i:
641
+ #
642
+ # (seq2_len-1) = j + seq_i + lower_diag - 1
643
+ #
644
+ # Replace seq_i with corresponding i in trace table:
645
+ #
646
+ # (seq2_len-1) = j + (i - 1) + lower_diag - 1
647
+ #
648
+ # Resolve to i:
649
+ #
650
+ (seq2_len-1) - j - lower_diag + 2
651
+ )
652
+ return i, j