biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,587 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["align_ungapped", "align_optimal"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+ from .tracetable cimport follow_trace, get_trace_linear, get_trace_affine, \
12
+ TraceDirectionLinear, TraceDirectionAffine
13
+
14
+ from .alignment import Alignment
15
+ import numpy as np
16
+
17
+
18
+ ctypedef np.int32_t int32
19
+ ctypedef np.int64_t int64
20
+ ctypedef np.uint8_t uint8
21
+ ctypedef np.uint16_t uint16
22
+ ctypedef np.uint32_t uint32
23
+ ctypedef np.uint64_t uint64
24
+
25
+ ctypedef fused CodeType1:
26
+ uint8
27
+ uint16
28
+ uint32
29
+ uint64
30
+ ctypedef fused CodeType2:
31
+ uint8
32
+ uint16
33
+ uint32
34
+ uint64
35
+
36
+
37
+ def align_ungapped(seq1, seq2, matrix, score_only=False):
38
+ """
39
+ align_ungapped(seq1, seq2, matrix, score_only=False)
40
+
41
+ Align two sequences without insertion of gaps.
42
+
43
+ Both sequences need to have the same length.
44
+
45
+ Parameters
46
+ ----------
47
+ seq1, seq2 : Sequence
48
+ The sequences, whose similarity should be scored.
49
+ matrix : SubstitutionMatrix
50
+ The substitution matrix used for scoring.
51
+ score_only : bool, optional
52
+ If true return only the score instead of an alignment.
53
+
54
+ Returns
55
+ -------
56
+ score : Alignment or int
57
+ The resulting trivial alignment. If `score_only` is set to true,
58
+ only the score is returned.
59
+ """
60
+ if len(seq1) != len(seq2):
61
+ raise ValueError(
62
+ f"Different sequence lengths ({len(seq1):d} and {len(seq2):d})"
63
+ )
64
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
65
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
66
+ raise ValueError("The sequences' alphabets do not fit the matrix")
67
+ score = _add_scores(seq1.code, seq2.code, matrix.score_matrix())
68
+ if score_only:
69
+ return score
70
+ else:
71
+ # Sequences do not need to be actually aligned
72
+ # -> Create alignment with trivial trace
73
+ # [[0 0]
74
+ # [1 1]
75
+ # [2 2]
76
+ # ... ]
77
+ seq_length = len(seq1)
78
+ return Alignment(
79
+ sequences = [seq1, seq2],
80
+ trace = np.tile(np.arange(seq_length), 2)
81
+ .reshape(2, seq_length)
82
+ .transpose(),
83
+ score = score
84
+ )
85
+
86
+
87
+ @cython.boundscheck(False)
88
+ @cython.wraparound(False)
89
+ def _add_scores(CodeType1[:] code1 not None,
90
+ CodeType2[:] code2 not None,
91
+ const int32[:,:] matrix not None):
92
+ cdef int32 score = 0
93
+ cdef int i
94
+ for i in range(code1.shape[0]):
95
+ score += matrix[code1[i], code2[i]]
96
+ return score
97
+
98
+
99
+ def align_optimal(seq1, seq2, matrix, gap_penalty=-10,
100
+ terminal_penalty=True, local=False,
101
+ max_number=1000):
102
+ """
103
+ align_optimal(seq1, seq2, matrix, gap_penalty=-10,
104
+ terminal_penalty=True, local=False, max_number=1000)
105
+
106
+ Perform an optimal alignment of two sequences based on a
107
+ dynamic programming algorithm.
108
+
109
+ This algorithm yields an optimal alignment, i.e. the sequences
110
+ are aligned in the way that results in the highest similarity
111
+ score. This operation can be very time and space consuming,
112
+ because both scale linearly with each sequence length.
113
+
114
+ The aligned sequences do not need to be instances from the same
115
+ :class:`Sequence` subclass, since they do not need to have the same
116
+ alphabet. The only requirement is that the
117
+ :class:`SubstitutionMatrix`' alphabets extend the alphabets of the
118
+ two sequences.
119
+
120
+ This function can either perform a global alignment, based on the
121
+ Needleman-Wunsch algorithm :footcite:`Needleman1970` or a local
122
+ alignment, based on the Smith–Waterman algorithm
123
+ :footcite:`Smith1981`.
124
+
125
+ Furthermore this function supports affine gap penalties using the
126
+ Gotoh algorithm :footcite:`Gotoh1982`, however, this requires
127
+ approximately 4 times the RAM space and execution time.
128
+
129
+ Parameters
130
+ ----------
131
+ seq1, seq2 : Sequence
132
+ The sequences to be aligned.
133
+ matrix : SubstitutionMatrix
134
+ The substitution matrix used for scoring.
135
+ gap_penalty : int or tuple(int, int), optional
136
+ If an integer is provided, the value will be interpreted as
137
+ linear gap penalty.
138
+ If a tuple is provided, an affine gap penalty is used.
139
+ The first integer in the tuple is the gap opening penalty,
140
+ the second integer is the gap extension penalty.
141
+ The values need to be negative. (Default: *-10*)
142
+ terminal_penalty : bool, optional
143
+ If true, gap penalties are applied to terminal gaps.
144
+ If `local` is true, this parameter has no effect.
145
+ (Default: True)
146
+ local : bool, optional
147
+ If false, a global alignment is performed, otherwise a local
148
+ alignment is performed. (Default: False)
149
+ max_number : int, optional
150
+ The maximum number of alignments returned.
151
+ When the number of branches exceeds this value in the traceback
152
+ step, no further branches are created.
153
+ (Default: 1000)
154
+
155
+ Returns
156
+ -------
157
+ alignments : list, type=Alignment
158
+ A list of alignments.
159
+ Each alignment in the list has the same maximum similarity
160
+ score.
161
+
162
+ See also
163
+ --------
164
+ align_banded
165
+
166
+ References
167
+ ----------
168
+
169
+ .. footbibliography::
170
+
171
+ Examples
172
+ --------
173
+
174
+ >>> seq1 = NucleotideSequence("ATACGCTTGCT")
175
+ >>> seq2 = NucleotideSequence("AGGCGCAGCT")
176
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
177
+ >>> ali = align_optimal(seq1, seq2, matrix, gap_penalty=-6)
178
+ >>> for a in ali:
179
+ ... print(a, "\\n")
180
+ ATACGCTTGCT
181
+ AGGCGCA-GCT
182
+ <BLANKLINE>
183
+ ATACGCTTGCT
184
+ AGGCGC-AGCT
185
+ <BLANKLINE>
186
+ """
187
+ # Check matrix alphabets
188
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
189
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
190
+ raise ValueError("The sequences' alphabets do not fit the matrix")
191
+ # Check if gap penalty is linear or affine
192
+ if type(gap_penalty) == int:
193
+ if gap_penalty > 0:
194
+ raise ValueError("Gap penalty must be negative")
195
+ affine_penalty = False
196
+ elif type(gap_penalty) == tuple:
197
+ if gap_penalty[0] > 0 or gap_penalty[1] > 0:
198
+ raise ValueError("Gap penalty must be negative")
199
+ affine_penalty = True
200
+ else:
201
+ raise TypeError("Gap penalty must be either integer or tuple")
202
+ # Check if max_number is reasonable
203
+ if max_number < 1:
204
+ raise ValueError(
205
+ "Maximum number of returned alignments must be at least 1"
206
+ )
207
+
208
+
209
+ # This implementation uses transposed tables in comparison
210
+ # to the common visualization
211
+ # This means the first sequence is one the left
212
+ # and the second sequence is at the top
213
+ trace_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.uint8)
214
+ code1 = seq1.code
215
+ code2 = seq2.code
216
+
217
+ # Table filling
218
+ ###############
219
+ if affine_penalty:
220
+ # Affine gap penalty
221
+ gap_open = gap_penalty[0]
222
+ gap_ext = gap_penalty[1]
223
+ # Value for negative infinity
224
+ # Used to prevent unallowed state transitions
225
+ # Subtraction of gap_open, gap_ext and lowest score value
226
+ # to prevent integer overflow
227
+ neg_inf = np.iinfo(np.int32).min - gap_open - gap_ext
228
+ min_score = np.min(matrix.score_matrix())
229
+ if min_score < 0:
230
+ neg_inf -= min_score
231
+ # m_table, g1_table and g2_table are the 3 score tables
232
+ m_table = np.zeros((len(seq1)+1, len(seq2)+1), dtype=np.int32)
233
+ # Fill with negative infinity values to prevent that an
234
+ # alignment trace starts with a gap extension
235
+ # instead of a gap opening
236
+ g1_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
237
+ g2_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
238
+ # Disallow trace coming from the match table on the
239
+ # left column/top row, as these represent terminal gaps
240
+ m_table [0, 1:] = neg_inf
241
+ m_table [1:, 0] = neg_inf
242
+ # Initialize first row and column for global alignments
243
+ if not local:
244
+ if terminal_penalty:
245
+ # Terminal gaps are penalized
246
+ # -> Penalties in first row/column
247
+ g1_table[0, 1:] = (np.arange(len(seq2)) * gap_ext) + gap_open
248
+ g2_table[1:, 0] = (np.arange(len(seq1)) * gap_ext) + gap_open
249
+ else:
250
+ g1_table[0, 1:] = np.zeros(len(seq2))
251
+ g2_table[1:, 0] = np.zeros(len(seq1))
252
+ trace_table[0, 1] = TraceDirectionAffine.MATCH_TO_GAP_LEFT
253
+ trace_table[0, 2:] = TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
254
+ trace_table[1, 0] = TraceDirectionAffine.MATCH_TO_GAP_TOP
255
+ trace_table[2: ,0] = TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
256
+ else:
257
+ g1_table[0, 1:] = np.zeros(len(seq2))
258
+ g2_table[1:, 0] = np.zeros(len(seq1))
259
+ _fill_align_table_affine(code1, code2,
260
+ matrix.score_matrix(), trace_table,
261
+ m_table, g1_table, g2_table,
262
+ gap_open, gap_ext, terminal_penalty, local)
263
+ else:
264
+ # Linear gap penalty
265
+ # The table for saving the scores
266
+ score_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.int32)
267
+ # Initialize first row and column for global alignments
268
+ if not local:
269
+ if terminal_penalty:
270
+ # Terminal gaps are penalized
271
+ # -> Penalties in first row/column
272
+ score_table[:,0] = np.arange(len(seq1)+1) * gap_penalty
273
+ score_table[0,:] = np.arange(len(seq2)+1) * gap_penalty
274
+ trace_table[1:,0] = TraceDirectionLinear.GAP_TOP
275
+ trace_table[0,1:] = TraceDirectionLinear.GAP_LEFT
276
+ _fill_align_table(code1, code2, matrix.score_matrix(), trace_table,
277
+ score_table, gap_penalty, terminal_penalty, local)
278
+
279
+
280
+ # Traceback
281
+ ###########
282
+ # Stores all possible traces (= possible alignments)
283
+ # A trace stores the indices of the aligned symbols
284
+ # in both sequences
285
+ trace_list = []
286
+ # Lists of trace starting indices
287
+ i_list = np.zeros(0, dtype=int)
288
+ j_list = np.zeros(0, dtype=int)
289
+ # List of start states
290
+ # State specifies the table the trace starts in
291
+ state_list = np.zeros(0, dtype=int)
292
+ if local:
293
+ # The start point is the maximal score in the table
294
+ # Multiple starting points possible,
295
+ # when duplicates of maximal score exist
296
+ if affine_penalty:
297
+ # The maximum score in the gap score tables do not need to
298
+ # be considered, as these starting positions would indicate
299
+ # that the local alignment starts with a gap
300
+ # Hence the maximum score value in these tables is always
301
+ # less than in the match table
302
+ max_score = np.max(m_table)
303
+ i_list, j_list = np.where((m_table == max_score))
304
+ state_list = np.append(state_list, np.full(len(i_list), 1))
305
+ else:
306
+ max_score = np.max(score_table)
307
+ i_list, j_list = np.where((score_table == max_score))
308
+ # State is always 0 for linear gap penalty
309
+ # since there is only one table
310
+ state_list = np.zeros(len(i_list), dtype=int)
311
+ else:
312
+ # The start point is the last element in the table
313
+ # -1 in start indices due to sequence offset mentioned before
314
+ i_start = trace_table.shape[0] -1
315
+ j_start = trace_table.shape[1] -1
316
+ if affine_penalty:
317
+ max_score = max(m_table[i_start,j_start],
318
+ g1_table[i_start,j_start],
319
+ g2_table[i_start,j_start])
320
+ if m_table[i_start,j_start] == max_score:
321
+ i_list = np.append(i_list, i_start)
322
+ j_list = np.append(j_list, j_start)
323
+ state_list = np.append(state_list, 1)
324
+ if g1_table[i_start,j_start] == max_score:
325
+ i_list = np.append(i_list, i_start)
326
+ j_list = np.append(j_list, j_start)
327
+ state_list = np.append(state_list, 2)
328
+ if g2_table[i_start,j_start] == max_score:
329
+ i_list = np.append(i_list, i_start)
330
+ j_list = np.append(j_list, j_start)
331
+ state_list = np.append(state_list, 3)
332
+ else:
333
+ i_list = np.append(i_list, i_start)
334
+ j_list = np.append(j_list, j_start)
335
+ state_list = np.append(state_list, 0)
336
+ max_score = score_table[i_start,j_start]
337
+ # Follow the traces specified in state and indices lists
338
+ cdef int curr_trace_count
339
+ for k in range(len(i_list)):
340
+ i_start = i_list[k]
341
+ j_start = j_list[k]
342
+ state_start = state_list[k]
343
+ # Pessimistic array allocation:
344
+ # The maximum trace length arises from an alignment, where each
345
+ # symbol is aligned to a gap
346
+ trace = np.full(( i_start+1 + j_start+1, 2 ), -1, dtype=np.int64)
347
+ curr_trace_count = 1
348
+ follow_trace(
349
+ trace_table, False, i_start, j_start, 0, trace, trace_list,
350
+ state=state_start, curr_trace_count=&curr_trace_count,
351
+ max_trace_count=max_number,
352
+ # Diagonals are only needed for banded alignments
353
+ lower_diag=0, upper_diag=0
354
+ )
355
+
356
+ # Replace gap entries in trace with -1
357
+ for i, trace in enumerate(trace_list):
358
+ trace = np.flip(trace, axis=0)
359
+ gap_filter = np.zeros(trace.shape, dtype=bool)
360
+ gap_filter[np.unique(trace[:,0], return_index=True)[1], 0] = True
361
+ gap_filter[np.unique(trace[:,1], return_index=True)[1], 1] = True
362
+ trace[~gap_filter] = -1
363
+ trace_list[i] = trace
364
+
365
+ # Limit the number of generated alignments to `max_number`:
366
+ # In most cases this is achieved by discarding branches in
367
+ # 'follow_trace()', however, if multiple local alignment starts
368
+ # are used, the number of created traces are the number of
369
+ # starts times `max_number`
370
+ trace_list = trace_list[:max_number]
371
+ return [Alignment([seq1, seq2], trace, max_score) for trace in trace_list]
372
+
373
+
374
+ @cython.boundscheck(False)
375
+ @cython.wraparound(False)
376
+ def _fill_align_table(CodeType1[:] code1 not None,
377
+ CodeType2[:] code2 not None,
378
+ const int32[:,:] matrix not None,
379
+ uint8[:,:] trace_table not None,
380
+ int32[:,:] score_table not None,
381
+ int gap_penalty,
382
+ bint term_penalty,
383
+ bint local):
384
+ """
385
+ Fill an alignment table with linear gap penalty using dynamic
386
+ programming.
387
+
388
+ Parameters
389
+ ----------
390
+ code1, code2
391
+ The sequence code of each sequence to be aligned.
392
+ matrix
393
+ The score matrix obtained from the :class:`SubstitutionMatrix`
394
+ object.
395
+ trace_table
396
+ A matrix containing values indicating the direction for the
397
+ traceback step.
398
+ The matrix is filled in this function
399
+ score_table
400
+ The alignment table.
401
+ The matrix is filled in this function.
402
+ gap_penalty
403
+ The linear gap penalty.
404
+ term_penalty
405
+ Indicates, whether terminal gaps should be penalized.
406
+ local
407
+ Indicates, whether a local alignment should be performed.
408
+ """
409
+
410
+ cdef int i, j
411
+ cdef int max_i, max_j
412
+ cdef int32 from_diag, from_left, from_top
413
+ cdef uint8 trace
414
+ cdef int32 score
415
+
416
+ # For local alignments terminal gaps on the right side are ignored
417
+ # anyway, as the alignment should stop before
418
+ if local:
419
+ term_penalty = True
420
+ # Used in case terminal gaps are not penalized
421
+ i_max = score_table.shape[0] -1
422
+ j_max = score_table.shape[1] -1
423
+
424
+ # Starts at 1 since the first row and column are already filled
425
+ for i in range(1, score_table.shape[0]):
426
+ for j in range(1, score_table.shape[1]):
427
+ # Evaluate score from diagonal direction
428
+ # -1 in sequence index is necessary
429
+ # due to the shift of the sequences
430
+ # to the bottom/right in the table
431
+ from_diag = score_table[i-1, j-1] + matrix[code1[i-1], code2[j-1]]
432
+ # Evaluate score from left direction
433
+ if not term_penalty and i == i_max:
434
+ from_left = score_table[i, j-1]
435
+ else:
436
+ from_left = score_table[i, j-1] + gap_penalty
437
+ # Evaluate score from top direction
438
+ if not term_penalty and j == j_max:
439
+ from_top = score_table[i-1, j]
440
+ else:
441
+ from_top = score_table[i-1, j] + gap_penalty
442
+
443
+ trace = get_trace_linear(from_diag, from_left, from_top, &score)
444
+
445
+ # Local alignment specialty:
446
+ # If score is less than or equal to 0,
447
+ # then the score of the cell remains 0
448
+ # and the trace ends here
449
+ if local == True and score <= 0:
450
+ continue
451
+
452
+ score_table[i,j] = score
453
+ trace_table[i,j] = trace
454
+
455
+
456
+ @cython.boundscheck(False)
457
+ @cython.wraparound(False)
458
+ def _fill_align_table_affine(CodeType1[:] code1 not None,
459
+ CodeType2[:] code2 not None,
460
+ const int32[:,:] matrix not None,
461
+ uint8[:,:] trace_table not None,
462
+ int32[:,:] m_table not None,
463
+ int32[:,:] g1_table not None,
464
+ int32[:,:] g2_table not None,
465
+ int gap_open,
466
+ int gap_ext,
467
+ bint term_penalty,
468
+ bint local):
469
+ """
470
+ Fill an alignment table with affine gap penalty using dynamic
471
+ programming.
472
+
473
+ Parameters
474
+ ----------
475
+ code1, code2
476
+ The sequence code of each sequence to be aligned.
477
+ matrix
478
+ The score matrix obtained from the class:`SubstitutionMatrix`
479
+ object.
480
+ trace_table
481
+ A matrix containing values indicating the direction for the
482
+ traceback step.
483
+ The matrix is filled in this function.
484
+ m_table, g1_table, g2_table
485
+ The alignment tables containing the scores.
486
+ `m_table` contains values for matches.
487
+ `g1_table` contains values for gaps in the first sequence.
488
+ `g2_table` contains values for gaps in the second sequence.
489
+ The matrix is filled in this function.
490
+ gap_open
491
+ The gap opening penalty.
492
+ gap_ext
493
+ The gap extension penalty.
494
+ term_penalty
495
+ Indicates, whether terminal gaps should be penalized.
496
+ local
497
+ Indicates, whether a local alignment should be performed.
498
+ """
499
+
500
+ cdef int i, j
501
+ cdef int max_i, max_j
502
+ cdef int32 mm_score, g1m_score, g2m_score
503
+ cdef int32 mg1_score, g1g1_score
504
+ cdef int32 mg2_score, g2g2_score
505
+ cdef int32 m_score, g1_score, g2_score
506
+ cdef int32 similarity_score
507
+ cdef uint8 trace
508
+
509
+ # For local alignments terminal gaps on the right and the bottom are
510
+ # ignored anyway, as the alignment should stop before
511
+ if local:
512
+ term_penalty = True
513
+ # Used in case terminal gaps are not penalized
514
+ i_max = trace_table.shape[0] -1
515
+ j_max = trace_table.shape[1] -1
516
+
517
+ # Starts at 1 since the first row and column are already filled
518
+ for i in range(1, trace_table.shape[0]):
519
+ for j in range(1, trace_table.shape[1]):
520
+ # Calculate the scores for possible transitions
521
+ # into the current cell
522
+ similarity_score = matrix[code1[i-1], code2[j-1]]
523
+ mm_score = m_table[i-1,j-1] + similarity_score
524
+ g1m_score = g1_table[i-1,j-1] + similarity_score
525
+ g2m_score = g2_table[i-1,j-1] + similarity_score
526
+ # No transition from g1_table to g2_table and vice versa
527
+ # Since this would mean adjacent gaps in both sequences
528
+ # A substitution makes more sense in this case
529
+ if not term_penalty and i == i_max:
530
+ mg1_score = m_table[i,j-1]
531
+ g1g1_score = g1_table[i,j-1]
532
+ else:
533
+ mg1_score = m_table[i,j-1] + gap_open
534
+ g1g1_score = g1_table[i,j-1] + gap_ext
535
+ if not term_penalty and j == j_max:
536
+ mg2_score = m_table[i-1,j]
537
+ g2g2_score = g2_table[i-1,j]
538
+ else:
539
+ mg2_score = m_table[i-1,j] + gap_open
540
+ g2g2_score = g2_table[i-1,j] + gap_ext
541
+
542
+ trace = get_trace_affine(
543
+ mm_score, g1m_score, g2m_score,
544
+ mg1_score, g1g1_score,
545
+ mg2_score, g2g2_score,
546
+ # The max score values to be written
547
+ &m_score, &g1_score, &g2_score
548
+ )
549
+
550
+ # Fill values into tables
551
+ # Local alignment specialty:
552
+ # If score is less than or equal to 0,
553
+ # then the score of the cell remains 0
554
+ # and the trace ends here
555
+ if local == True:
556
+ if m_score <= 0:
557
+ # End trace in specific table
558
+ # by filtering out the respective bits
559
+ trace &= ~(
560
+ TraceDirectionAffine.MATCH_TO_MATCH |
561
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
562
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
563
+ )
564
+ # m_table[i,j] remains 0
565
+ else:
566
+ m_table[i,j] = m_score
567
+ if g1_score <= 0:
568
+ trace &= ~(
569
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
570
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
571
+ )
572
+ # g1_table[i,j] remains negative infinity
573
+ else:
574
+ g1_table[i,j] = g1_score
575
+ if g2_score <= 0:
576
+ trace &= ~(
577
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
578
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
579
+ )
580
+ # g2_table[i,j] remains negative infinity
581
+ else:
582
+ g2_table[i,j] = g2_score
583
+ else:
584
+ m_table[i,j] = m_score
585
+ g1_table[i,j] = g1_score
586
+ g2_table[i,j] = g2_score
587
+ trace_table[i,j] = trace