biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,585 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["align_ungapped", "align_optimal"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+ from .tracetable cimport follow_trace, get_trace_linear, get_trace_affine, \
12
+ TraceDirectionLinear, TraceDirectionAffine
13
+
14
+ from .alignment import Alignment
15
+ import numpy as np
16
+
17
+
18
+ ctypedef np.int32_t int32
19
+ ctypedef np.int64_t int64
20
+ ctypedef np.uint8_t uint8
21
+ ctypedef np.uint16_t uint16
22
+ ctypedef np.uint32_t uint32
23
+ ctypedef np.uint64_t uint64
24
+
25
+ ctypedef fused CodeType1:
26
+ uint8
27
+ uint16
28
+ uint32
29
+ uint64
30
+ ctypedef fused CodeType2:
31
+ uint8
32
+ uint16
33
+ uint32
34
+ uint64
35
+
36
+
37
+ def align_ungapped(seq1, seq2, matrix, score_only=False):
38
+ """
39
+ align_ungapped(seq1, seq2, matrix, score_only=False)
40
+
41
+ Align two sequences without insertion of gaps.
42
+
43
+ Both sequences need to have the same length.
44
+
45
+ Parameters
46
+ ----------
47
+ seq1, seq2 : Sequence
48
+ The sequences, whose similarity should be scored.
49
+ matrix : SubstitutionMatrix
50
+ The substitution matrix used for scoring.
51
+ score_only : bool, optional
52
+ If true return only the score instead of an alignment.
53
+
54
+ Returns
55
+ -------
56
+ score : Alignment or int
57
+ The resulting trivial alignment. If `score_only` is set to true,
58
+ only the score is returned.
59
+ """
60
+ if len(seq1) != len(seq2):
61
+ raise ValueError(
62
+ f"Different sequence lengths ({len(seq1):d} and {len(seq2):d})"
63
+ )
64
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
65
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
66
+ raise ValueError("The sequences' alphabets do not fit the matrix")
67
+ score = _add_scores(seq1.code, seq2.code, matrix.score_matrix())
68
+ if score_only:
69
+ return score
70
+ else:
71
+ # Sequences do not need to be actually aligned
72
+ # -> Create alignment with trivial trace
73
+ # [[0 0]
74
+ # [1 1]
75
+ # [2 2]
76
+ # ... ]
77
+ seq_length = len(seq1)
78
+ return Alignment(
79
+ sequences = [seq1, seq2],
80
+ trace = np.tile(np.arange(seq_length), 2)
81
+ .reshape(2, seq_length)
82
+ .transpose(),
83
+ score = score
84
+ )
85
+
86
+
87
+ @cython.boundscheck(False)
88
+ @cython.wraparound(False)
89
+ def _add_scores(CodeType1[:] code1 not None,
90
+ CodeType2[:] code2 not None,
91
+ const int32[:,:] matrix not None):
92
+ cdef int32 score = 0
93
+ cdef int i
94
+ for i in range(code1.shape[0]):
95
+ score += matrix[code1[i], code2[i]]
96
+ return score
97
+
98
+
99
+ def align_optimal(seq1, seq2, matrix, gap_penalty=-10,
100
+ terminal_penalty=True, local=False,
101
+ max_number=1000):
102
+ """
103
+ align_optimal(seq1, seq2, matrix, gap_penalty=-10,
104
+ terminal_penalty=True, local=False, max_number=1000)
105
+
106
+ Perform an optimal alignment of two sequences based on a
107
+ dynamic programming algorithm.
108
+
109
+ This algorithm yields an optimal alignment, i.e. the sequences
110
+ are aligned in the way that results in the highest similarity
111
+ score. This operation can be very time and space consuming,
112
+ because both scale linearly with each sequence length.
113
+
114
+ The aligned sequences do not need to be instances from the same
115
+ :class:`Sequence` subclass, since they do not need to have the same
116
+ alphabet. The only requirement is that the
117
+ :class:`SubstitutionMatrix`' alphabets extend the alphabets of the
118
+ two sequences.
119
+
120
+ This function can either perform a global alignment, based on the
121
+ Needleman-Wunsch algorithm :footcite:`Needleman1970` or a local
122
+ alignment, based on the Smith–Waterman algorithm
123
+ :footcite:`Smith1981`.
124
+
125
+ Furthermore this function supports affine gap penalties using the
126
+ Gotoh algorithm :footcite:`Gotoh1982`, however, this requires
127
+ approximately 4 times the RAM space and execution time.
128
+
129
+ Parameters
130
+ ----------
131
+ seq1, seq2 : Sequence
132
+ The sequences to be aligned.
133
+ matrix : SubstitutionMatrix
134
+ The substitution matrix used for scoring.
135
+ gap_penalty : int or tuple(int, int), optional
136
+ If an integer is provided, the value will be interpreted as
137
+ linear gap penalty.
138
+ If a tuple is provided, an affine gap penalty is used.
139
+ The first integer in the tuple is the gap opening penalty,
140
+ the second integer is the gap extension penalty.
141
+ The values need to be negative.
142
+ terminal_penalty : bool, optional
143
+ If true, gap penalties are applied to terminal gaps.
144
+ If `local` is true, this parameter has no effect.
145
+ local : bool, optional
146
+ If false, a global alignment is performed, otherwise a local
147
+ alignment is performed.
148
+ max_number : int, optional
149
+ The maximum number of alignments returned.
150
+ When the number of branches exceeds this value in the traceback
151
+ step, no further branches are created.
152
+
153
+ Returns
154
+ -------
155
+ alignments : list, type=Alignment
156
+ A list of alignments.
157
+ Each alignment in the list has the same maximum similarity
158
+ score.
159
+
160
+ See Also
161
+ --------
162
+ align_banded
163
+
164
+ References
165
+ ----------
166
+
167
+ .. footbibliography::
168
+
169
+ Examples
170
+ --------
171
+
172
+ >>> seq1 = NucleotideSequence("ATACGCTTGCT")
173
+ >>> seq2 = NucleotideSequence("AGGCGCAGCT")
174
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
175
+ >>> ali = align_optimal(seq1, seq2, matrix, gap_penalty=-6)
176
+ >>> for a in ali:
177
+ ... print(a, "\\n")
178
+ ATACGCTTGCT
179
+ AGGCGCA-GCT
180
+ <BLANKLINE>
181
+ ATACGCTTGCT
182
+ AGGCGC-AGCT
183
+ <BLANKLINE>
184
+ """
185
+ # Check matrix alphabets
186
+ if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
187
+ or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
188
+ raise ValueError("The sequences' alphabets do not fit the matrix")
189
+ # Check if gap penalty is linear or affine
190
+ if type(gap_penalty) == int:
191
+ if gap_penalty > 0:
192
+ raise ValueError("Gap penalty must be negative")
193
+ affine_penalty = False
194
+ elif type(gap_penalty) == tuple:
195
+ if gap_penalty[0] > 0 or gap_penalty[1] > 0:
196
+ raise ValueError("Gap penalty must be negative")
197
+ affine_penalty = True
198
+ else:
199
+ raise TypeError("Gap penalty must be either integer or tuple")
200
+ # Check if max_number is reasonable
201
+ if max_number < 1:
202
+ raise ValueError(
203
+ "Maximum number of returned alignments must be at least 1"
204
+ )
205
+
206
+
207
+ # This implementation uses transposed tables in comparison
208
+ # to the common visualization
209
+ # This means the first sequence is one the left
210
+ # and the second sequence is at the top
211
+ trace_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.uint8)
212
+ code1 = seq1.code
213
+ code2 = seq2.code
214
+
215
+ # Table filling
216
+ ###############
217
+ if affine_penalty:
218
+ # Affine gap penalty
219
+ gap_open = gap_penalty[0]
220
+ gap_ext = gap_penalty[1]
221
+ # Value for negative infinity
222
+ # Used to prevent unallowed state transitions
223
+ # Subtraction of gap_open, gap_ext and lowest score value
224
+ # to prevent integer overflow
225
+ neg_inf = np.iinfo(np.int32).min - gap_open - gap_ext
226
+ min_score = np.min(matrix.score_matrix())
227
+ if min_score < 0:
228
+ neg_inf -= min_score
229
+ # m_table, g1_table and g2_table are the 3 score tables
230
+ m_table = np.zeros((len(seq1)+1, len(seq2)+1), dtype=np.int32)
231
+ # Fill with negative infinity values to prevent that an
232
+ # alignment trace starts with a gap extension
233
+ # instead of a gap opening
234
+ g1_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
235
+ g2_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
236
+ # Disallow trace coming from the match table on the
237
+ # left column/top row, as these represent terminal gaps
238
+ m_table [0, 1:] = neg_inf
239
+ m_table [1:, 0] = neg_inf
240
+ # Initialize first row and column for global alignments
241
+ if not local:
242
+ if terminal_penalty:
243
+ # Terminal gaps are penalized
244
+ # -> Penalties in first row/column
245
+ g1_table[0, 1:] = (np.arange(len(seq2)) * gap_ext) + gap_open
246
+ g2_table[1:, 0] = (np.arange(len(seq1)) * gap_ext) + gap_open
247
+ else:
248
+ g1_table[0, 1:] = np.zeros(len(seq2))
249
+ g2_table[1:, 0] = np.zeros(len(seq1))
250
+ trace_table[0, 1] = TraceDirectionAffine.MATCH_TO_GAP_LEFT
251
+ trace_table[0, 2:] = TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
252
+ trace_table[1, 0] = TraceDirectionAffine.MATCH_TO_GAP_TOP
253
+ trace_table[2: ,0] = TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
254
+ else:
255
+ g1_table[0, 1:] = np.zeros(len(seq2))
256
+ g2_table[1:, 0] = np.zeros(len(seq1))
257
+ _fill_align_table_affine(code1, code2,
258
+ matrix.score_matrix(), trace_table,
259
+ m_table, g1_table, g2_table,
260
+ gap_open, gap_ext, terminal_penalty, local)
261
+ else:
262
+ # Linear gap penalty
263
+ # The table for saving the scores
264
+ score_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.int32)
265
+ # Initialize first row and column for global alignments
266
+ if not local:
267
+ if terminal_penalty:
268
+ # Terminal gaps are penalized
269
+ # -> Penalties in first row/column
270
+ score_table[:,0] = np.arange(len(seq1)+1) * gap_penalty
271
+ score_table[0,:] = np.arange(len(seq2)+1) * gap_penalty
272
+ trace_table[1:,0] = TraceDirectionLinear.GAP_TOP
273
+ trace_table[0,1:] = TraceDirectionLinear.GAP_LEFT
274
+ _fill_align_table(code1, code2, matrix.score_matrix(), trace_table,
275
+ score_table, gap_penalty, terminal_penalty, local)
276
+
277
+
278
+ # Traceback
279
+ ###########
280
+ # Stores all possible traces (= possible alignments)
281
+ # A trace stores the indices of the aligned symbols
282
+ # in both sequences
283
+ trace_list = []
284
+ # Lists of trace starting indices
285
+ i_list = np.zeros(0, dtype=int)
286
+ j_list = np.zeros(0, dtype=int)
287
+ # List of start states
288
+ # State specifies the table the trace starts in
289
+ state_list = np.zeros(0, dtype=int)
290
+ if local:
291
+ # The start point is the maximal score in the table
292
+ # Multiple starting points possible,
293
+ # when duplicates of maximal score exist
294
+ if affine_penalty:
295
+ # The maximum score in the gap score tables do not need to
296
+ # be considered, as these starting positions would indicate
297
+ # that the local alignment starts with a gap
298
+ # Hence the maximum score value in these tables is always
299
+ # less than in the match table
300
+ max_score = np.max(m_table)
301
+ i_list, j_list = np.where((m_table == max_score))
302
+ state_list = np.append(state_list, np.full(len(i_list), 1))
303
+ else:
304
+ max_score = np.max(score_table)
305
+ i_list, j_list = np.where((score_table == max_score))
306
+ # State is always 0 for linear gap penalty
307
+ # since there is only one table
308
+ state_list = np.zeros(len(i_list), dtype=int)
309
+ else:
310
+ # The start point is the last element in the table
311
+ # -1 in start indices due to sequence offset mentioned before
312
+ i_start = trace_table.shape[0] -1
313
+ j_start = trace_table.shape[1] -1
314
+ if affine_penalty:
315
+ max_score = max(m_table[i_start,j_start],
316
+ g1_table[i_start,j_start],
317
+ g2_table[i_start,j_start])
318
+ if m_table[i_start,j_start] == max_score:
319
+ i_list = np.append(i_list, i_start)
320
+ j_list = np.append(j_list, j_start)
321
+ state_list = np.append(state_list, 1)
322
+ if g1_table[i_start,j_start] == max_score:
323
+ i_list = np.append(i_list, i_start)
324
+ j_list = np.append(j_list, j_start)
325
+ state_list = np.append(state_list, 2)
326
+ if g2_table[i_start,j_start] == max_score:
327
+ i_list = np.append(i_list, i_start)
328
+ j_list = np.append(j_list, j_start)
329
+ state_list = np.append(state_list, 3)
330
+ else:
331
+ i_list = np.append(i_list, i_start)
332
+ j_list = np.append(j_list, j_start)
333
+ state_list = np.append(state_list, 0)
334
+ max_score = score_table[i_start,j_start]
335
+ # Follow the traces specified in state and indices lists
336
+ cdef int curr_trace_count
337
+ for k in range(len(i_list)):
338
+ i_start = i_list[k]
339
+ j_start = j_list[k]
340
+ state_start = state_list[k]
341
+ # Pessimistic array allocation:
342
+ # The maximum trace length arises from an alignment, where each
343
+ # symbol is aligned to a gap
344
+ trace = np.full(( i_start+1 + j_start+1, 2 ), -1, dtype=np.int64)
345
+ curr_trace_count = 1
346
+ follow_trace(
347
+ trace_table, False, i_start, j_start, 0, trace, trace_list,
348
+ state=state_start, curr_trace_count=&curr_trace_count,
349
+ max_trace_count=max_number,
350
+ # Diagonals are only needed for banded alignments
351
+ lower_diag=0, upper_diag=0
352
+ )
353
+
354
+ # Replace gap entries in trace with -1
355
+ for i, trace in enumerate(trace_list):
356
+ trace = np.flip(trace, axis=0)
357
+ gap_filter = np.zeros(trace.shape, dtype=bool)
358
+ gap_filter[np.unique(trace[:,0], return_index=True)[1], 0] = True
359
+ gap_filter[np.unique(trace[:,1], return_index=True)[1], 1] = True
360
+ trace[~gap_filter] = -1
361
+ trace_list[i] = trace
362
+
363
+ # Limit the number of generated alignments to `max_number`:
364
+ # In most cases this is achieved by discarding branches in
365
+ # 'follow_trace()', however, if multiple local alignment starts
366
+ # are used, the number of created traces are the number of
367
+ # starts times `max_number`
368
+ trace_list = trace_list[:max_number]
369
+ return [Alignment([seq1, seq2], trace, max_score) for trace in trace_list]
370
+
371
+
372
+ @cython.boundscheck(False)
373
+ @cython.wraparound(False)
374
+ def _fill_align_table(CodeType1[:] code1 not None,
375
+ CodeType2[:] code2 not None,
376
+ const int32[:,:] matrix not None,
377
+ uint8[:,:] trace_table not None,
378
+ int32[:,:] score_table not None,
379
+ int gap_penalty,
380
+ bint term_penalty,
381
+ bint local):
382
+ """
383
+ Fill an alignment table with linear gap penalty using dynamic
384
+ programming.
385
+
386
+ Parameters
387
+ ----------
388
+ code1, code2
389
+ The sequence code of each sequence to be aligned.
390
+ matrix
391
+ The score matrix obtained from the :class:`SubstitutionMatrix`
392
+ object.
393
+ trace_table
394
+ A matrix containing values indicating the direction for the
395
+ traceback step.
396
+ The matrix is filled in this function
397
+ score_table
398
+ The alignment table.
399
+ The matrix is filled in this function.
400
+ gap_penalty
401
+ The linear gap penalty.
402
+ term_penalty
403
+ Indicates, whether terminal gaps should be penalized.
404
+ local
405
+ Indicates, whether a local alignment should be performed.
406
+ """
407
+
408
+ cdef int i, j
409
+ cdef int max_i, max_j
410
+ cdef int32 from_diag, from_left, from_top
411
+ cdef uint8 trace
412
+ cdef int32 score
413
+
414
+ # For local alignments terminal gaps on the right side are ignored
415
+ # anyway, as the alignment should stop before
416
+ if local:
417
+ term_penalty = True
418
+ # Used in case terminal gaps are not penalized
419
+ i_max = score_table.shape[0] -1
420
+ j_max = score_table.shape[1] -1
421
+
422
+ # Starts at 1 since the first row and column are already filled
423
+ for i in range(1, score_table.shape[0]):
424
+ for j in range(1, score_table.shape[1]):
425
+ # Evaluate score from diagonal direction
426
+ # -1 in sequence index is necessary
427
+ # due to the shift of the sequences
428
+ # to the bottom/right in the table
429
+ from_diag = score_table[i-1, j-1] + matrix[code1[i-1], code2[j-1]]
430
+ # Evaluate score from left direction
431
+ if not term_penalty and i == i_max:
432
+ from_left = score_table[i, j-1]
433
+ else:
434
+ from_left = score_table[i, j-1] + gap_penalty
435
+ # Evaluate score from top direction
436
+ if not term_penalty and j == j_max:
437
+ from_top = score_table[i-1, j]
438
+ else:
439
+ from_top = score_table[i-1, j] + gap_penalty
440
+
441
+ trace = get_trace_linear(from_diag, from_left, from_top, &score)
442
+
443
+ # Local alignment specialty:
444
+ # If score is less than or equal to 0,
445
+ # then the score of the cell remains 0
446
+ # and the trace ends here
447
+ if local == True and score <= 0:
448
+ continue
449
+
450
+ score_table[i,j] = score
451
+ trace_table[i,j] = trace
452
+
453
+
454
+ @cython.boundscheck(False)
455
+ @cython.wraparound(False)
456
+ def _fill_align_table_affine(CodeType1[:] code1 not None,
457
+ CodeType2[:] code2 not None,
458
+ const int32[:,:] matrix not None,
459
+ uint8[:,:] trace_table not None,
460
+ int32[:,:] m_table not None,
461
+ int32[:,:] g1_table not None,
462
+ int32[:,:] g2_table not None,
463
+ int gap_open,
464
+ int gap_ext,
465
+ bint term_penalty,
466
+ bint local):
467
+ """
468
+ Fill an alignment table with affine gap penalty using dynamic
469
+ programming.
470
+
471
+ Parameters
472
+ ----------
473
+ code1, code2
474
+ The sequence code of each sequence to be aligned.
475
+ matrix
476
+ The score matrix obtained from the class:`SubstitutionMatrix`
477
+ object.
478
+ trace_table
479
+ A matrix containing values indicating the direction for the
480
+ traceback step.
481
+ The matrix is filled in this function.
482
+ m_table, g1_table, g2_table
483
+ The alignment tables containing the scores.
484
+ `m_table` contains values for matches.
485
+ `g1_table` contains values for gaps in the first sequence.
486
+ `g2_table` contains values for gaps in the second sequence.
487
+ The matrix is filled in this function.
488
+ gap_open
489
+ The gap opening penalty.
490
+ gap_ext
491
+ The gap extension penalty.
492
+ term_penalty
493
+ Indicates, whether terminal gaps should be penalized.
494
+ local
495
+ Indicates, whether a local alignment should be performed.
496
+ """
497
+
498
+ cdef int i, j
499
+ cdef int max_i, max_j
500
+ cdef int32 mm_score, g1m_score, g2m_score
501
+ cdef int32 mg1_score, g1g1_score
502
+ cdef int32 mg2_score, g2g2_score
503
+ cdef int32 m_score, g1_score, g2_score
504
+ cdef int32 similarity_score
505
+ cdef uint8 trace
506
+
507
+ # For local alignments terminal gaps on the right and the bottom are
508
+ # ignored anyway, as the alignment should stop before
509
+ if local:
510
+ term_penalty = True
511
+ # Used in case terminal gaps are not penalized
512
+ i_max = trace_table.shape[0] -1
513
+ j_max = trace_table.shape[1] -1
514
+
515
+ # Starts at 1 since the first row and column are already filled
516
+ for i in range(1, trace_table.shape[0]):
517
+ for j in range(1, trace_table.shape[1]):
518
+ # Calculate the scores for possible transitions
519
+ # into the current cell
520
+ similarity_score = matrix[code1[i-1], code2[j-1]]
521
+ mm_score = m_table[i-1,j-1] + similarity_score
522
+ g1m_score = g1_table[i-1,j-1] + similarity_score
523
+ g2m_score = g2_table[i-1,j-1] + similarity_score
524
+ # No transition from g1_table to g2_table and vice versa
525
+ # Since this would mean adjacent gaps in both sequences
526
+ # A substitution makes more sense in this case
527
+ if not term_penalty and i == i_max:
528
+ mg1_score = m_table[i,j-1]
529
+ g1g1_score = g1_table[i,j-1]
530
+ else:
531
+ mg1_score = m_table[i,j-1] + gap_open
532
+ g1g1_score = g1_table[i,j-1] + gap_ext
533
+ if not term_penalty and j == j_max:
534
+ mg2_score = m_table[i-1,j]
535
+ g2g2_score = g2_table[i-1,j]
536
+ else:
537
+ mg2_score = m_table[i-1,j] + gap_open
538
+ g2g2_score = g2_table[i-1,j] + gap_ext
539
+
540
+ trace = get_trace_affine(
541
+ mm_score, g1m_score, g2m_score,
542
+ mg1_score, g1g1_score,
543
+ mg2_score, g2g2_score,
544
+ # The max score values to be written
545
+ &m_score, &g1_score, &g2_score
546
+ )
547
+
548
+ # Fill values into tables
549
+ # Local alignment specialty:
550
+ # If score is less than or equal to 0,
551
+ # then the score of the cell remains 0
552
+ # and the trace ends here
553
+ if local == True:
554
+ if m_score <= 0:
555
+ # End trace in specific table
556
+ # by filtering out the respective bits
557
+ trace &= ~(
558
+ TraceDirectionAffine.MATCH_TO_MATCH |
559
+ TraceDirectionAffine.GAP_LEFT_TO_MATCH |
560
+ TraceDirectionAffine.GAP_TOP_TO_MATCH
561
+ )
562
+ # m_table[i,j] remains 0
563
+ else:
564
+ m_table[i,j] = m_score
565
+ if g1_score <= 0:
566
+ trace &= ~(
567
+ TraceDirectionAffine.MATCH_TO_GAP_LEFT |
568
+ TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
569
+ )
570
+ # g1_table[i,j] remains negative infinity
571
+ else:
572
+ g1_table[i,j] = g1_score
573
+ if g2_score <= 0:
574
+ trace &= ~(
575
+ TraceDirectionAffine.MATCH_TO_GAP_TOP |
576
+ TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
577
+ )
578
+ # g2_table[i,j] remains negative infinity
579
+ else:
580
+ g2_table[i,j] = g2_score
581
+ else:
582
+ m_table[i,j] = m_score
583
+ g1_table[i,j] = g1_score
584
+ g2_table[i,j] = g2_score
585
+ trace_table[i,j] = trace