biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,313 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Permutation", "RandomPermutation", "FrequencyPermutation"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import abc
13
+ import numpy as np
14
+
15
+
16
+ ctypedef np.int64_t int64
17
+
18
+
19
+ class Permutation(metaclass=abc.ABCMeta):
20
+ """
21
+ Provides an order for *k-mers*, usually used by *k-mer* subset
22
+ selectors such as :class:`MinimizerSelector`.
23
+ The method how such order is computed depends on the concrete
24
+ subclass of this abstract base class.
25
+
26
+ Without a :class:`Permutation` subset selectors usually resort to
27
+ the symbol order in the :class:`KmerAlphabet`.
28
+ That order is often the lexicographical order, which is known to
29
+ yield suboptimal *k-mer* selection many cases
30
+ :footcite:`Roberts2004`.
31
+
32
+ Attributes
33
+ ----------
34
+ min, max: int
35
+ The minimum and maximum value, the permutated value
36
+ (i.e. the return value of :meth:`permute()`)
37
+ can take.
38
+ Must be overriden by subclasses.
39
+ """
40
+
41
+
42
+ @property
43
+ @abc.abstractmethod
44
+ def min(self):
45
+ pass
46
+
47
+ @property
48
+ @abc.abstractmethod
49
+ def max(self):
50
+ pass
51
+
52
+
53
+ @abc.abstractmethod
54
+ def permute(self, kmers):
55
+ """
56
+ permute(kmers)
57
+
58
+ Give the given *k-mers* a new order.
59
+
60
+ Parameters
61
+ ----------
62
+ kmers : ndarray, dtype=np.int64
63
+ The *k-mers* to reorder given as *k-mer* code.
64
+
65
+ Returns
66
+ -------
67
+ order : ndarray, dtype=np.int64
68
+ The sort key for the new order, i.e. a *k-mer* ``A`` is
69
+ smaller than *k-mer* ``B``, if ``order[A] < order[B]``
70
+ The order value may not only contain positive but also
71
+ negative integers.
72
+ The order is unambiguous:
73
+ If ``A != B``, then ``order[A] != order[B]``.
74
+ """
75
+ pass
76
+
77
+
78
+ class RandomPermutation(Permutation):
79
+ r"""
80
+ Provide a pseudo-randomized order for *k-mers*.
81
+
82
+ Notes
83
+ -----
84
+
85
+ This class uses a simple full-period *linear congruential generator*
86
+ (LCG) to provide pseudo-randomized values:
87
+
88
+ .. math:: \text{order} = (a \, c_\text{k-mer} + 1) \mod 2^{64}.
89
+
90
+ The factor :math:`a` is taken from :footcite:`Steele2021` to ensure
91
+ full periodicity and good random behavior.
92
+ However, note that LCGs in general do not provide perfect random
93
+ behavior, but only *good-enough* values for this purpose.
94
+
95
+ Attributes
96
+ ----------
97
+ min, max: int
98
+ The minimum and maximum value, the permutated value
99
+ (i.e. the return value of :meth:`permute()`)
100
+ can take.
101
+
102
+ References
103
+ ----------
104
+
105
+ .. footbibliography::
106
+
107
+ Examples
108
+ --------
109
+
110
+ >>> kmer_alph = KmerAlphabet(NucleotideSequence.alphabet_unamb, k=2)
111
+ >>> permutation = RandomPermutation()
112
+ >>> # k-mer codes representing the k-mers from 'AA' to 'TT'
113
+ >>> # in lexicographic order
114
+ >>> kmer_codes = np.arange(len(kmer_alph))
115
+ >>> print(kmer_codes)
116
+ [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
117
+ >>> print(["".join(kmer_alph.decode(c)) for c in kmer_codes])
118
+ ['AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']
119
+ >>> # Shuffle order of these k-mer codes using the permutation
120
+ >>> order = permutation.permute(kmer_codes)
121
+ >>> print(order)
122
+ [ 1 -3372029247567499370 -6744058495134998741
123
+ 8330656331007053504 4958627083439554133 1586597835872054762
124
+ -1785431411695444609 -5157460659262943980 -8529489906830443351
125
+ 6545224919311608894 3173195671744109523 -198833575823389848
126
+ -3570862823390889219 -6942892070958388590 8131822755183663655
127
+ 4759793507616164284]
128
+ >>> # The order is not lexicographic anymore
129
+ >>> kmer_codes = kmer_codes[np.argsort(order)]
130
+ >>> print(["".join(kmer_alph.decode(c)) for c in kmer_codes])
131
+ ['GA', 'TC', 'AG', 'CT', 'TA', 'AC', 'CG', 'GT', 'AA', 'CC', 'GG', 'TT', 'CA', 'GC', 'TG', 'AT']
132
+ """
133
+
134
+ LCG_A = 0xd1342543de82ef95
135
+ LCG_C = 1
136
+
137
+
138
+ @property
139
+ def min(self):
140
+ return np.iinfo(np.int64).min
141
+
142
+ @property
143
+ def max(self):
144
+ return np.iinfo(np.int64).max
145
+
146
+
147
+ def permute(self, kmers):
148
+ kmers = kmers.astype(np.int64, copy=False)
149
+ # Cast to unsigned int to harness the m=2^64 LCG
150
+ kmers = kmers.view(np.uint64)
151
+ # Apply LCG
152
+ # Applying the modulo operator is not necessary
153
+ # is the corresponding bits are truncated automatically
154
+ permutation = RandomPermutation.LCG_A * kmers + RandomPermutation.LCG_C
155
+ # Convert back to required signed int64
156
+ # The resulting integer overflow changes the order, but this is
157
+ # no problem since the order is pseudo-random anyway
158
+ return permutation.view(np.int64)
159
+
160
+
161
+ class FrequencyPermutation(Permutation):
162
+ """
163
+ __init__(kmer_alphabet, counts)
164
+
165
+ Provide an order for *k-mers* from a given
166
+ :class:`KmerAlphabet`, such that less frequent *k-mers* are smaller
167
+ than more frequent *k-mers*.
168
+ The frequency of each *k-mer* can either be given directly via the
169
+ constructor or can be computed from a :class:`KmerTable` via
170
+ :meth:`from_table()`.
171
+
172
+ Parameters
173
+ ----------
174
+ kmer_alphabet : KmerAlphabet, length=n
175
+ The *k-mer* alphabet that defines the range of possible *k-mers*
176
+ that should be permuted.
177
+ counts : ndarray, shape=(n,), dtype=np.int64
178
+ The absolute frequency, i.e. the number of occurrences, of each
179
+ *k-mer* in `kmer_alphabet` in the sequence database of interest.
180
+ ``counts[c] = f``, where ``c`` is the *k-mer* code and ``f`` is
181
+ the corresponding frequency.
182
+
183
+ Attributes
184
+ ----------
185
+ min, max: int
186
+ The minimum and maximum value, the permutated value
187
+ (i.e. the return value of :meth:`permute()`)
188
+ can take.
189
+ kmer_alphabet : KmerAlphabet
190
+ The *k-mer* alphabet that defines the range of possible *k-mers*
191
+ that should be permuted.
192
+
193
+ Notes
194
+ -----
195
+
196
+ In actual sequences some sequence patterns appear in high quantity.
197
+ When selecting a subset of *k-mers*, e.g. via
198
+ :class:`MinimizerSelector`, it is desireable to select the
199
+ low-frequency *informative* *k-mers* to avoid spurious matches.
200
+ To achieve such selection this class can be used.
201
+
202
+ This class uses a table to look up the order.
203
+ Hence, the memory consumption is :math:`8 n^k` bytes,
204
+ where :math:`n` is the size of the base alphabet and :math:`k` is
205
+ the *k-mer* size.
206
+
207
+ Examples
208
+ --------
209
+
210
+ >>> alphabet = LetterAlphabet("abcdr")
211
+ >>> sequence = GeneralSequence(alphabet, "abracadabra")
212
+ >>> kmer_table = KmerTable.from_sequences(k=2, sequences=[sequence])
213
+ >>> print(kmer_table)
214
+ ab: (0, 0), (0, 7)
215
+ ac: (0, 3)
216
+ ad: (0, 5)
217
+ br: (0, 1), (0, 8)
218
+ ca: (0, 4)
219
+ da: (0, 6)
220
+ ra: (0, 2), (0, 9)
221
+ >>> # Create all k-mers in lexicographic order
222
+ >>> kmer_alph = kmer_table.kmer_alphabet
223
+ >>> kmer_codes = np.arange(0, len(kmer_alph))
224
+ >>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
225
+ ['...', 'da', 'db', 'dc', 'dd', 'dr', 'ra', 'rb', 'rc', 'rd', 'rr']
226
+ >>> # After applying the permutation the k-mers are ordered
227
+ >>> # by their frequency in the table
228
+ >>> # -> the most frequent k-mers have low rank
229
+ >>> permutation = FrequencyPermutation.from_table(kmer_table)
230
+ >>> order = permutation.permute(kmer_codes)
231
+ >>> print(order)
232
+ [ 0 22 18 19 1 2 3 4 5 23 20 6 7 8 9 21 10 11 12 13 24 14 15 16
233
+ 17]
234
+ >>> kmer_codes = kmer_codes[np.argsort(order)]
235
+ >>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
236
+ ['...', 'rc', 'rd', 'rr', 'ac', 'ad', 'ca', 'da', 'ab', 'br', 'ra']
237
+ """
238
+
239
+ def __init__(self, kmer_alphabet, counts):
240
+ if len(kmer_alphabet) != len(counts):
241
+ raise IndexError(
242
+ f"The k-mer alphabet has {len(kmer_alphabet)} k-mers, "
243
+ f"but {len(counts)} counts were given"
244
+ )
245
+ # 'order' maps a permutation to a k-mer
246
+ # Stability is important to get the same k-mer subset selection
247
+ # on different architectures
248
+ order = np.argsort(counts, kind="stable")
249
+ # '_permutation_table' should perform the reverse mapping
250
+ self._permutation_table = _invert_mapping(order)
251
+ self._kmer_alph = kmer_alphabet
252
+
253
+
254
+ @property
255
+ def min(self):
256
+ return 0
257
+
258
+ @property
259
+ def max(self):
260
+ return len(self._permutation_table) - 1
261
+
262
+ @property
263
+ def kmer_alphabet(self):
264
+ return self._kmer_alph
265
+
266
+
267
+ @staticmethod
268
+ def from_table(kmer_table):
269
+ """
270
+ from_table(kmer_table)
271
+
272
+ Create a :class:`FrequencyPermutation` from the *k-mer* counts
273
+ of a :class:`KmerTable`.
274
+
275
+ Parameters
276
+ ----------
277
+ kmer_table : KmerTable
278
+ The *k-mer* counts are taken from this table.
279
+
280
+ Returns
281
+ -------
282
+ permutation : FrequencyPermutation
283
+ The permutation is based on the counts.
284
+ """
285
+ return FrequencyPermutation(
286
+ kmer_table.kmer_alphabet, kmer_table.count()
287
+ )
288
+
289
+
290
+ def permute(self, kmers):
291
+ return self._permutation_table[kmers]
292
+
293
+
294
+ @cython.boundscheck(False)
295
+ @cython.wraparound(False)
296
+ def _invert_mapping(int64[:] mapping):
297
+ """
298
+ If `mapping` maps an unqiue integer ``A`` to an unique integer
299
+ ``B``, i.e. ``B = mapping[A]``, this function inverts the mapping
300
+ so that ``A = inverted[B]``.
301
+
302
+ Note that it is necessary that the mapping must be bijective and in
303
+ the range ``0..n``.
304
+ """
305
+ cdef int64 i
306
+ cdef int64 value
307
+
308
+ cdef int64[:] inverted = np.empty(mapping.shape[0], dtype=np.int64)
309
+ for i in range(mapping.shape[0]):
310
+ value = mapping[i]
311
+ inverted[value] = i
312
+
313
+ return np.asarray(inverted)