biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,555 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = [
8
+ "Alphabet",
9
+ "LetterAlphabet",
10
+ "AlphabetMapper",
11
+ "AlphabetError",
12
+ "common_alphabet",
13
+ ]
14
+
15
+ import string
16
+ from numbers import Integral
17
+ import numpy as np
18
+ from biotite.sequence.codec import decode_to_chars, encode_chars, map_sequence_code
19
+
20
+
21
+ class Alphabet(object):
22
+ """
23
+ This class defines the allowed symbols for a :class:`Sequence` and
24
+ handles the encoding/decoding between symbols and symbol codes.
25
+
26
+ An :class:`Alphabet` is created with the list of symbols, that can
27
+ be used in this context.
28
+ In most cases a symbol will be simply a letter, hence a string of
29
+ length 1. But in principle every hashable Python object can serve
30
+ as symbol.
31
+
32
+ The encoding of a symbol into a symbol code is
33
+ done in the following way: Find the first index in the symbol list,
34
+ where the list element equals the symbol. This index is the
35
+ symbol code. If the symbol is not found in the list, an
36
+ :class:`AlphabetError` is raised.
37
+
38
+ Internally, a dictionary is used for encoding, with symbols as keys
39
+ and symbol codes as values. Therefore, every symbol must be
40
+ hashable. For decoding the symbol list is indexed with the symbol
41
+ code.
42
+
43
+ If an alphabet *1* contains the same symbols and the same
44
+ symbol-code-mappings like another alphabet *2*, but alphabet *1*
45
+ introduces also new symbols, then alphabet *1* *extends* alphabet
46
+ *2*.
47
+ Per definition, every alphabet also extends itself.
48
+
49
+ Objects of this class are immutable.
50
+
51
+ Parameters
52
+ ----------
53
+ symbols : iterable object
54
+ The symbols, that are allowed in this alphabet. The
55
+ corresponding code for a symbol, is the index of that symbol
56
+ in this list.
57
+
58
+ Examples
59
+ --------
60
+ Create an Alphabet containing DNA letters and encode/decode a
61
+ letter/code:
62
+
63
+ >>> alph = Alphabet(["A","C","G","T"])
64
+ >>> print(alph.encode("G"))
65
+ 2
66
+ >>> print(alph.decode(2))
67
+ G
68
+ >>> try:
69
+ ... alph.encode("foo")
70
+ ... except Exception as e:
71
+ ... print(e)
72
+ Symbol 'foo' is not in the alphabet
73
+
74
+ Create an Alphabet of arbitrary objects:
75
+
76
+ >>> alph = Alphabet(["foo", 42, (1,2,3), 5, 3.141])
77
+ >>> print(alph.encode((1,2,3)))
78
+ 2
79
+ >>> print(alph.decode(4))
80
+ 3.141
81
+
82
+ On the subject of alphabet extension:
83
+ An alphabet always extends itself.
84
+
85
+ >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T"]))
86
+ True
87
+
88
+ An alphabet extends an alphabet when it contains additional symbols...
89
+
90
+ >>> Alphabet(["A","C","G","T","U"]).extends(Alphabet(["A","C","G","T"]))
91
+ True
92
+
93
+ ...but not vice versa
94
+
95
+ >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T","U"]))
96
+ False
97
+
98
+ Two alphabets with same symbols but different symbol-code-mappings
99
+
100
+ >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
101
+ False
102
+ """
103
+
104
+ def __init__(self, symbols):
105
+ if len(symbols) == 0:
106
+ raise ValueError("Symbol list is empty")
107
+ self._symbols = tuple(symbols)
108
+ self._symbol_dict = {}
109
+ for i, symbol in enumerate(symbols):
110
+ self._symbol_dict[symbol] = i
111
+
112
+ def __repr__(self):
113
+ """Represent Alphabet as a string for debugging."""
114
+ return f"Alphabet({self._symbols})"
115
+
116
+ def get_symbols(self):
117
+ """
118
+ Get the symbols in the alphabet.
119
+
120
+ Returns
121
+ -------
122
+ symbols : tuple
123
+ The symbols.
124
+ """
125
+ return self._symbols
126
+
127
+ def extends(self, alphabet):
128
+ """
129
+ Check, if this alphabet extends another alphabet.
130
+
131
+ Parameters
132
+ ----------
133
+ alphabet : Alphabet
134
+ The potential parent alphabet.
135
+
136
+ Returns
137
+ -------
138
+ result : bool
139
+ True, if this object extends `alphabet`, false otherwise.
140
+ """
141
+ if alphabet is self:
142
+ return True
143
+ elif len(alphabet) > len(self):
144
+ return False
145
+ else:
146
+ return alphabet.get_symbols() == self.get_symbols()[: len(alphabet)]
147
+
148
+ def encode(self, symbol):
149
+ """
150
+ Use the alphabet to encode a symbol.
151
+
152
+ Parameters
153
+ ----------
154
+ symbol : object
155
+ The object to encode into a symbol code.
156
+
157
+ Returns
158
+ -------
159
+ code : int
160
+ The symbol code of `symbol`.
161
+
162
+ Raises
163
+ ------
164
+ AlphabetError
165
+ If `symbol` is not in the alphabet.
166
+ """
167
+ try:
168
+ return self._symbol_dict[symbol]
169
+ except KeyError:
170
+ raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
171
+
172
+ def decode(self, code):
173
+ """
174
+ Use the alphabet to decode a symbol code.
175
+
176
+ Parameters
177
+ ----------
178
+ code : int
179
+ The symbol code to be decoded.
180
+
181
+ Returns
182
+ -------
183
+ symbol : object
184
+ The symbol corresponding to `code`.
185
+
186
+ Raises
187
+ ------
188
+ AlphabetError
189
+ If `code` is not a valid code in the alphabet.
190
+ """
191
+ if code < 0 or code >= len(self._symbols):
192
+ raise AlphabetError(f"'{code:d}' is not a valid code")
193
+ return self._symbols[code]
194
+
195
+ def encode_multiple(self, symbols, dtype=np.int64):
196
+ """
197
+ Encode a list of symbols.
198
+
199
+ Parameters
200
+ ----------
201
+ symbols : array-like
202
+ The symbols to encode.
203
+ dtype : dtype, optional
204
+ The dtype of the output ndarray.
205
+
206
+ Returns
207
+ -------
208
+ code : ndarray
209
+ The sequence code.
210
+ """
211
+ return np.array([self.encode(e) for e in symbols], dtype=dtype)
212
+
213
+ def decode_multiple(self, code):
214
+ """
215
+ Decode a sequence code into a list of symbols.
216
+
217
+ Parameters
218
+ ----------
219
+ code : ndarray
220
+ The sequence code to decode.
221
+
222
+ Returns
223
+ -------
224
+ symbols : list
225
+ The decoded list of symbols.
226
+ """
227
+ return [self.decode(c) for c in code]
228
+
229
+ def is_letter_alphabet(self):
230
+ """
231
+ Check whether the symbols in this alphabet are single printable
232
+ letters.
233
+ If so, the alphabet could be expressed by a `LetterAlphabet`.
234
+
235
+ Returns
236
+ -------
237
+ is_letter_alphabet : bool
238
+ True, if all symbols in the alphabet are 'str' or 'bytes',
239
+ have length 1 and are printable.
240
+ """
241
+ for symbol in self:
242
+ if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
243
+ return False
244
+ if isinstance(symbol, str):
245
+ symbol = symbol.encode("ASCII")
246
+ if symbol not in LetterAlphabet.PRINTABLES:
247
+ return False
248
+ return True
249
+
250
+ def __str__(self):
251
+ return str(self.get_symbols())
252
+
253
+ def __len__(self):
254
+ return len(self.get_symbols())
255
+
256
+ def __iter__(self):
257
+ return self.get_symbols().__iter__()
258
+
259
+ def __contains__(self, symbol):
260
+ return symbol in self.get_symbols()
261
+
262
+ def __hash__(self):
263
+ symbols = self.get_symbols()
264
+ if isinstance(symbols, tuple):
265
+ return hash(symbols)
266
+ else:
267
+ return hash(tuple(symbols))
268
+
269
+ def __eq__(self, item):
270
+ if item is self:
271
+ return True
272
+ if not isinstance(item, Alphabet):
273
+ return False
274
+ return self.get_symbols() == item.get_symbols()
275
+
276
+
277
+ class LetterAlphabet(Alphabet):
278
+ """
279
+ :class:`LetterAlphabet` is a an :class:`Alphabet` subclass
280
+ specialized for letter based alphabets, like DNA or protein
281
+ sequence alphabets.
282
+ The alphabet size is limited to the 94 printable, non-whitespace
283
+ characters.
284
+ Internally the symbols are saved as `bytes` objects.
285
+ The encoding and decoding process is a lot faster than for a
286
+ normal :class:`Alphabet`.
287
+
288
+ The performance gain comes through the use of *NumPy* and *Cython*
289
+ for encoding and decoding, without the need of a dictionary.
290
+
291
+ Parameters
292
+ ----------
293
+ symbols : iterable object or str or bytes
294
+ The symbols, that are allowed in this alphabet. The
295
+ corresponding code for a symbol, is the index of that symbol
296
+ in this list.
297
+ """
298
+
299
+ PRINTABLES = (string.digits + string.ascii_letters + string.punctuation).encode(
300
+ "ASCII"
301
+ )
302
+
303
+ def __init__(self, symbols):
304
+ if len(symbols) == 0:
305
+ raise ValueError("Symbol list is empty")
306
+ self._symbols = []
307
+ for symbol in symbols:
308
+ if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
309
+ raise ValueError(f"Symbol '{symbol}' is not a single letter")
310
+ if isinstance(symbol, str):
311
+ symbol = symbol.encode("ASCII")
312
+ if symbol not in LetterAlphabet.PRINTABLES:
313
+ raise ValueError(
314
+ f"Symbol {repr(symbol)} is not printable or whitespace"
315
+ )
316
+ self._symbols.append(symbol)
317
+ # Direct 'astype' conversion is not allowed by numpy
318
+ # -> frombuffer()
319
+ self._symbols = np.frombuffer(
320
+ np.array(self._symbols, dtype="|S1"), dtype=np.ubyte
321
+ )
322
+
323
+ def __repr__(self):
324
+ """Represent LetterAlphabet as a string for debugging."""
325
+ return f"LetterAlphabet({self.get_symbols()})"
326
+
327
+ def extends(self, alphabet):
328
+ if alphabet is self:
329
+ return True
330
+ elif isinstance(alphabet, LetterAlphabet):
331
+ if len(alphabet._symbols) > len(self._symbols):
332
+ return False
333
+ return np.all(alphabet._symbols == self._symbols[: len(alphabet._symbols)])
334
+ else:
335
+ return super().extends(alphabet)
336
+
337
+ def get_symbols(self):
338
+ return tuple([symbol.decode("ASCII") for symbol in self._symbols_as_bytes()])
339
+
340
+ def encode(self, symbol):
341
+ if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
342
+ raise AlphabetError(f"Symbol '{symbol}' is not a single letter")
343
+ indices = np.where(self._symbols == ord(symbol))[0]
344
+ if len(indices) == 0:
345
+ raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
346
+ return indices[0].item()
347
+
348
+ def decode(self, code, as_bytes=False):
349
+ if code < 0 or code >= len(self._symbols):
350
+ raise AlphabetError(f"'{code:d}' is not a valid code")
351
+ return chr(self._symbols[code])
352
+
353
+ def encode_multiple(self, symbols, dtype=None):
354
+ """
355
+ Encode multiple symbols.
356
+
357
+ Parameters
358
+ ----------
359
+ symbols : iterable object or str or bytes
360
+ The symbols to encode. The method is fastest when a
361
+ :class:`ndarray`, :class:`str` or :class:`bytes` object
362
+ containing the symbols is provided, instead of e.g. a list.
363
+ dtype : dtype, optional
364
+ For compatibility with superclass. The value is ignored.
365
+
366
+ Returns
367
+ -------
368
+ code : ndarray
369
+ The sequence code.
370
+ """
371
+ if isinstance(symbols, str):
372
+ symbols = np.frombuffer(symbols.encode("ASCII"), dtype=np.ubyte)
373
+ elif isinstance(symbols, bytes):
374
+ symbols = np.frombuffer(symbols, dtype=np.ubyte)
375
+ elif isinstance(symbols, np.ndarray):
376
+ symbols = np.frombuffer(symbols.astype(dtype="|S1"), dtype=np.ubyte)
377
+ else:
378
+ symbols = np.frombuffer(
379
+ np.array(list(symbols), dtype="|S1"), dtype=np.ubyte
380
+ )
381
+ return encode_chars(alphabet=self._symbols, symbols=symbols)
382
+
383
+ def decode_multiple(self, code, as_bytes=False):
384
+ """
385
+ Decode a sequence code into a list of symbols.
386
+
387
+ Parameters
388
+ ----------
389
+ code : ndarray, dtype=uint8
390
+ The sequence code to decode.
391
+ Works fastest if a :class:`ndarray` is provided.
392
+ as_bytes : bool, optional
393
+ If true, the output array will contain `bytes`
394
+ (dtype 'S1').
395
+ Otherwise, the the output array will contain `str`
396
+ (dtype 'U1').
397
+
398
+ Returns
399
+ -------
400
+ symbols : ndarray, dtype='U1' or dtype='S1'
401
+ The decoded list of symbols.
402
+ """
403
+ if not isinstance(code, np.ndarray):
404
+ code = np.array(code, dtype=np.uint8)
405
+ code = code.astype(np.uint8, copy=False)
406
+ symbols = decode_to_chars(alphabet=self._symbols, code=code)
407
+ # Symbols must be convverted from 'np.ubyte' to '|S1'
408
+ symbols = np.frombuffer(symbols, dtype="|S1")
409
+ if not as_bytes:
410
+ symbols = symbols.astype("U1")
411
+ return symbols
412
+
413
+ def is_letter_alphabet(self):
414
+ return True
415
+
416
+ def __contains__(self, symbol):
417
+ if not isinstance(symbol, (str, bytes)):
418
+ return False
419
+ return ord(symbol) in self._symbols
420
+
421
+ def __len__(self):
422
+ return len(self._symbols)
423
+
424
+ def _symbols_as_bytes(self):
425
+ "Properly convert from dtype 'np.ubyte' to '|S1'"
426
+ return np.frombuffer(self._symbols, dtype="|S1")
427
+
428
+
429
+ class AlphabetMapper(object):
430
+ """
431
+ This class is used for symbol code conversion from a source
432
+ alphabet into a target alphabet.
433
+
434
+ This means that the symbol codes are converted from one to another
435
+ alphabet so that the symbol itself is preserved.
436
+ This class works for single symbol codes or an entire sequence code
437
+ likewise.
438
+
439
+ Parameters
440
+ ----------
441
+ source_alphabet, target_alphabet : Alphabet
442
+ The codes are converted from the source alphabet into the
443
+ target alphabet.
444
+ The target alphabet must contain at least all symbols of the
445
+ source alphabet, but it is not required that the shared symbols
446
+ are in the same order.
447
+
448
+ Examples
449
+ --------
450
+
451
+ >>> source_alph = Alphabet(["A","C","G","T"])
452
+ >>> target_alph = Alphabet(["T","U","A","G","C"])
453
+ >>> mapper = AlphabetMapper(source_alph, target_alph)
454
+ >>> print(mapper[0])
455
+ 2
456
+ >>> print(mapper[1])
457
+ 4
458
+ >>> print(mapper[[1,1,3]])
459
+ [4 4 0]
460
+ >>> in_sequence = GeneralSequence(source_alph, "GCCTAT")
461
+ >>> print(in_sequence.code)
462
+ [2 1 1 3 0 3]
463
+ >>> print("".join(in_sequence.symbols))
464
+ GCCTAT
465
+ >>> out_sequence = GeneralSequence(target_alph)
466
+ >>> out_sequence.code = mapper[in_sequence.code]
467
+ >>> print(out_sequence.code)
468
+ [3 4 4 0 2 0]
469
+ >>> print("".join(out_sequence.symbols))
470
+ GCCTAT
471
+ """
472
+
473
+ def __init__(self, source_alphabet, target_alphabet):
474
+ if target_alphabet.extends(source_alphabet):
475
+ self._necessary_mapping = False
476
+ else:
477
+ self._necessary_mapping = True
478
+ self._mapper = np.zeros(
479
+ len(source_alphabet), dtype=AlphabetMapper._dtype(len(target_alphabet))
480
+ )
481
+ for old_code in range(len(source_alphabet)):
482
+ symbol = source_alphabet.decode(old_code)
483
+ new_code = target_alphabet.encode(symbol)
484
+ self._mapper[old_code] = new_code
485
+
486
+ def __getitem__(self, code):
487
+ if isinstance(code, Integral):
488
+ if self._necessary_mapping:
489
+ return self._mapper[code]
490
+ else:
491
+ return code
492
+ if not isinstance(code, np.ndarray) or code.dtype not in (
493
+ np.uint8,
494
+ np.uint16,
495
+ np.uint32,
496
+ np.uint64,
497
+ ):
498
+ code = np.array(code, dtype=np.uint64)
499
+ if self._necessary_mapping:
500
+ mapped_code = np.empty(len(code), dtype=self._mapper.dtype)
501
+ map_sequence_code(self._mapper, code, mapped_code)
502
+ return mapped_code
503
+ else:
504
+ return code
505
+
506
+ @staticmethod
507
+ def _dtype(alphabet_size):
508
+ _size_uint8 = np.iinfo(np.uint8).max + 1
509
+ _size_uint16 = np.iinfo(np.uint16).max + 1
510
+ _size_uint32 = np.iinfo(np.uint32).max + 1
511
+ if alphabet_size <= _size_uint8:
512
+ return np.uint8
513
+ elif alphabet_size <= _size_uint16:
514
+ return np.uint16
515
+ elif alphabet_size <= _size_uint32:
516
+ return np.uint32
517
+ else:
518
+ return np.uint64
519
+
520
+
521
+ class AlphabetError(Exception):
522
+ """
523
+ This exception is raised, when a code or a symbol is not in an
524
+ :class:`Alphabet`.
525
+ """
526
+
527
+ pass
528
+
529
+
530
+ def common_alphabet(alphabets):
531
+ """
532
+ Determine the alphabet from a list of alphabets, that
533
+ extends all alphabets.
534
+
535
+ Parameters
536
+ ----------
537
+ alphabets : iterable of Alphabet
538
+ The alphabets from which the common one should be identified.
539
+
540
+ Returns
541
+ -------
542
+ common_alphabet : Alphabet or None
543
+ The alphabet from `alphabets` that extends all alphabets.
544
+ ``None`` if no such common alphabet exists.
545
+ """
546
+ common_alphabet = None
547
+ for alphabet in alphabets:
548
+ if common_alphabet is None:
549
+ common_alphabet = alphabet
550
+ elif not common_alphabet.extends(alphabet):
551
+ if alphabet.extends(common_alphabet):
552
+ common_alphabet = alphabet
553
+ else:
554
+ return None
555
+ return common_alphabet