biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,155 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["encode_chars", "decode_to_chars", "map_sequence_code"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import numpy as np
13
+
14
+
15
+ ctypedef np.int64_t int64
16
+ ctypedef np.uint8_t uint8
17
+ ctypedef np.uint16_t uint16
18
+ ctypedef np.uint32_t uint32
19
+ ctypedef np.uint64_t uint64
20
+
21
+
22
+ @cython.boundscheck(False)
23
+ @cython.wraparound(False)
24
+ def encode_chars(const unsigned char[:] alphabet not None,
25
+ const unsigned char[:] symbols not None):
26
+ """
27
+ Encode an array of symbols into an array of symbol codes.
28
+
29
+ Only works for symbols that are printable ASCII characters.
30
+
31
+ Parameters
32
+ ----------
33
+ alphabet : ndarray, shape=(n,), dtype="|S1"
34
+ The alphabet as array.
35
+ It is indexed via ASCII values and the corresponding values are
36
+ the symbol codes.
37
+ symbols : ndarray, dtype="|S1"
38
+ The symbols (ASCII characters) to be encoded.
39
+
40
+ Returns
41
+ -------
42
+ code : ndarray, shape=(n,), dtype="|S1"
43
+ The encoded symbols.
44
+ """
45
+ cdef int i
46
+ # The last symbol code of the alphabet + 1 is always illegal
47
+ # Since this code cannot occur from symbol encoding
48
+ # it can be later used to check for illegal symbols
49
+ cdef uint8 illegal_code = alphabet.shape[0]
50
+ # An array based map that maps from symbol to code
51
+ # Since the maximum value of a char is 256
52
+ # the size of the map is known at compile time
53
+ cdef uint8 sym_to_code[256]
54
+ # Initially fill the map with the illegal symbol
55
+ # Consequently, the map will later return the illegal symbol
56
+ # when indexed with a character that is not part of the alphabet
57
+ sym_to_code[:] = [illegal_code] * 256
58
+ # Then fill in entries for the symbols of the alphabet
59
+ cdef unsigned char symbol
60
+ for i, symbol in enumerate(alphabet):
61
+ sym_to_code[symbol] = i
62
+
63
+ # Encode the symbols
64
+ code = np.empty(symbols.shape[0], dtype=np.uint8)
65
+ cdef uint8[:] code_view = code
66
+ cdef uint8 symbol_code
67
+ for i in range(symbols.shape[0]):
68
+ symbol_code = sym_to_code[symbols[i]]
69
+ # Check if the symbols is valid
70
+ if symbol_code == illegal_code:
71
+ illegal_symbol = chr(symbols[i])
72
+ # Local import to avoid circular imports
73
+ from .alphabet import AlphabetError
74
+ raise AlphabetError(
75
+ f"Symbol {repr(illegal_symbol)} is not in the alphabet"
76
+ )
77
+ code_view[i] = symbol_code
78
+
79
+ return code
80
+
81
+
82
+ @cython.boundscheck(False)
83
+ @cython.wraparound(False)
84
+ def decode_to_chars(const unsigned char[:] alphabet not None,
85
+ const uint8[:] code not None):
86
+ """
87
+ Decode an array of symbol codes into an array of symbols.
88
+
89
+ Only works for symbols that are printable ASCII characters.
90
+
91
+ Parameters
92
+ ----------
93
+ alphabet : ndarray, shape=(n,), dtype="|S1"
94
+ The alphabet as array.
95
+ It is indexed via ASCII values and the corresponding values are
96
+ the symbol codes.
97
+ code : ndarray, shape=(n,), dtype="|S1"
98
+ The code to be decoded.
99
+
100
+ Returns
101
+ -------
102
+ symbols : ndarray, dtype="|S1"
103
+ The resulting symbols (ASCII characters).
104
+ """
105
+ cdef int i
106
+ cdef int alphabet_length = alphabet.shape[0]
107
+
108
+ symbols = np.empty(code.shape[0], dtype=np.ubyte)
109
+ cdef uint8[:] symbols_view = symbols
110
+ cdef uint8 symbol_code
111
+ for i in range(code.shape[0]):
112
+ symbol_code = code[i]
113
+ if symbol_code >= alphabet_length:
114
+ # Local import to avoid circular imports
115
+ from .alphabet import AlphabetError
116
+ raise AlphabetError(f"'{symbol_code:d}' is not a valid code")
117
+ symbols_view[i] = alphabet[symbol_code]
118
+ return symbols
119
+
120
+
121
+ ctypedef fused CodeType1:
122
+ uint8
123
+ uint16
124
+ uint32
125
+ uint64
126
+ ctypedef fused CodeType2:
127
+ uint8
128
+ uint16
129
+ uint32
130
+ uint64
131
+ def map_sequence_code(CodeType2[:] mapping,
132
+ CodeType1[:] in_code, CodeType2[:] out_code):
133
+ """
134
+ Efficiently maps a sequence code into another alphabet using a
135
+ mapping.
136
+
137
+ Parameters
138
+ ----------
139
+ mapping : ndarray, dtype=int
140
+ Maps the input codes to output codes.
141
+ in_code : ndarray, shape=(n,), dtype=int
142
+ The symbol codes to be mapped.
143
+ out_code : ndarray, shape=(n,), dtype=int
144
+ An empty array, where the mapped symbols are stored.
145
+ This is a parameter instead of the return value in order to
146
+ choose the correct integer type.
147
+ """
148
+ cdef int64 i
149
+ if in_code.shape[0] != out_code.shape[0]:
150
+ raise ValueError(
151
+ f"Input sequence code has length {in_code.shape[0]}, "
152
+ f"but output sequence code has length {out_code.shape[0]}"
153
+ )
154
+ for i in range(in_code.shape[0]):
155
+ out_code[i] = mapping[in_code[i]]
@@ -0,0 +1,476 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["CodonTable"]
8
+
9
+ import copy
10
+ from numbers import Integral
11
+ from os.path import dirname, join, realpath
12
+ import numpy as np
13
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
14
+
15
+ # Abbreviations
16
+ _NUC_ALPH = NucleotideSequence.alphabet_unamb
17
+ _PROT_ALPH = ProteinSequence.alphabet
18
+
19
+ # Multiplier array that converts a codon in code representation
20
+ # into a unique integer
21
+ _radix = len(_NUC_ALPH)
22
+ _radix_multiplier = np.array([_radix**n for n in (2, 1, 0)], dtype=int)
23
+
24
+
25
+ class CodonTable(object):
26
+ """
27
+ A :class:`CodonTable` maps a codon (sequence of 3 nucleotides) to an
28
+ amino acid.
29
+ It also defines start codons. A :class:`CodonTable`
30
+ takes/outputs either the symbols or code of the codon/amino acid.
31
+
32
+ Furthermore, this class is able to give a list of codons that
33
+ corresponds to a given amino acid.
34
+
35
+ The :func:`load()` method allows loading of NCBI codon tables.
36
+
37
+ Objects of this class are immutable.
38
+
39
+ Parameters
40
+ ----------
41
+ codon_dict : dict of (str -> str)
42
+ A dictionary that maps codons to amino acids. The keys must be
43
+ strings of length 3 and the values strings of length 1
44
+ (all upper case).
45
+ The dictionary must provide entries for all 64 possible codons.
46
+ starts : iterable object of str
47
+ The start codons. Each entry must be a string of length 3
48
+ (all upper case).
49
+
50
+ Examples
51
+ --------
52
+
53
+ Get the amino acid coded by a given codon (symbol and code):
54
+
55
+ >>> table = CodonTable.default_table()
56
+ >>> print(table["ATG"])
57
+ M
58
+ >>> print(table[(1,2,3)])
59
+ 14
60
+
61
+ Get the codons coding for a given amino acid (symbol and code):
62
+
63
+ >>> table = CodonTable.default_table()
64
+ >>> print(table["M"])
65
+ ('ATG',)
66
+ >>> print(table[14])
67
+ ((0, 2, 0), (0, 2, 2), (1, 2, 0), (1, 2, 1), (1, 2, 2), (1, 2, 3))
68
+ """
69
+
70
+ # For efficient mapping of codon codes to amino acid codes,
71
+ # especially in in the 'map_codon_codes()' function, the class
72
+ # maps each possible codon into a unique number using a radix based
73
+ # approach.
74
+ # For example the codon (3,1,2) would be represented as
75
+ # 3*16 + 1*4 + 2**1 = 53
76
+
77
+ # file for builtin codon tables from NCBI
78
+ _table_file = join(dirname(realpath(__file__)), "codon_tables.txt")
79
+
80
+ def __init__(self, codon_dict, starts):
81
+ # Check if 'starts' is iterable object of length 3 string
82
+ for start in starts:
83
+ if not isinstance(start, str) or len(start) != 3:
84
+ raise ValueError(f"Invalid codon '{start}' as start codon")
85
+ # Internally store codons as single unique numbers
86
+ start_codon_codes = np.array(
87
+ [_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
88
+ )
89
+ self._starts = CodonTable._to_number(start_codon_codes)
90
+ # Use -1 as error code
91
+ # The array uses the number representation of codons as index
92
+ # and stores the corresponding symbol codes for amino acids
93
+ self._codons = np.full(_radix**3, -1, dtype=int)
94
+ for key, value in codon_dict.items():
95
+ codon_code = _NUC_ALPH.encode_multiple(key)
96
+ codon_number = CodonTable._to_number(codon_code)
97
+ aa_code = _PROT_ALPH.encode(value)
98
+ self._codons[codon_number] = aa_code
99
+ if (self._codons == -1).any():
100
+ # Find the missing codon
101
+ missing_index = np.where(self._codons == -1)[0][0]
102
+ codon_code = CodonTable._to_codon(missing_index)
103
+ codon = _NUC_ALPH.decode_multiple(codon_code)
104
+ codon_str = "".join(codon)
105
+ raise ValueError(f"Codon dictionary does not contain codon '{codon_str}'")
106
+
107
+ def __repr__(self):
108
+ """Represent CodonTable as a string for debugging."""
109
+ return f"CodonTable({self.codon_dict()}, {self.start_codons()})"
110
+
111
+ def __eq__(self, item):
112
+ if not isinstance(item, CodonTable):
113
+ return False
114
+ if self.codon_dict() != item.codon_dict():
115
+ return False
116
+ if self.start_codons() != item.start_codons():
117
+ return False
118
+ return True
119
+
120
+ def __ne__(self, item):
121
+ return not self == item
122
+
123
+ def __getitem__(self, item):
124
+ if isinstance(item, str):
125
+ if len(item) == 1:
126
+ # Amino acid -> return possible codons
127
+ aa_code = _PROT_ALPH.encode(item)
128
+ codon_numbers = np.where(self._codons == aa_code)[0]
129
+ codon_codes = CodonTable._to_codon(codon_numbers)
130
+ codons = tuple(
131
+ [
132
+ "".join(_NUC_ALPH.decode_multiple(codon_code))
133
+ for codon_code in codon_codes
134
+ ]
135
+ )
136
+ return codons
137
+ elif len(item) == 3:
138
+ # Codon -> return corresponding amino acid
139
+ codon_code = _NUC_ALPH.encode_multiple(item)
140
+ codon_number = CodonTable._to_number(codon_code)
141
+ aa_code = self._codons[codon_number]
142
+ aa = _PROT_ALPH.decode(aa_code)
143
+ return aa
144
+ else:
145
+ raise ValueError(f"'{item}' is an invalid index")
146
+ elif isinstance(item, int):
147
+ # Code for amino acid -> return possible codon codes
148
+ codon_numbers = np.where(self._codons == item)[0]
149
+ codon_codes = tuple(
150
+ [tuple(code.tolist()) for code in CodonTable._to_codon(codon_numbers)]
151
+ )
152
+ return codon_codes
153
+ else:
154
+ # Code for codon as any iterable object
155
+ # Code for codon -> return corresponding amino acid codes
156
+ if len(item) != 3:
157
+ raise ValueError(f"{item} is an invalid sequence code for a codon")
158
+ codon_number = CodonTable._to_number(item)
159
+ aa_code = self._codons[codon_number]
160
+ return aa_code
161
+
162
+ def map_codon_codes(self, codon_codes):
163
+ """
164
+ Efficiently map multiple codons to the corresponding amino
165
+ acids.
166
+
167
+ Parameters
168
+ ----------
169
+ codon_codes : ndarray, dtype=int, shape=(n,3)
170
+ The codons to be translated into amino acids.
171
+ The codons are given as symbol codes.
172
+ *n* is the amount of codons.
173
+
174
+ Returns
175
+ -------
176
+ aa_codes : ndarray, dtype=int, shape=(n,)
177
+ The amino acids as symbol codes.
178
+
179
+ Examples
180
+ --------
181
+ >>> dna = NucleotideSequence("ATGGTTTAA")
182
+ >>> sequence_code = dna.code
183
+ >>> print(sequence_code)
184
+ [0 3 2 2 3 3 3 0 0]
185
+ >>> # Reshape to get codons
186
+ >>> codon_codes = sequence_code.reshape(-1, 3)
187
+ >>> print(codon_codes)
188
+ [[0 3 2]
189
+ [2 3 3]
190
+ [3 0 0]]
191
+ >>> # Map to amino acids
192
+ >>> aa_codes = CodonTable.default_table().map_codon_codes(codon_codes)
193
+ >>> print(aa_codes)
194
+ [10 17 23]
195
+ >>> # Put into a protein sequence
196
+ >>> protein = ProteinSequence()
197
+ >>> protein.code = aa_codes
198
+ >>> print(protein)
199
+ MV*
200
+ """
201
+ if codon_codes.shape[-1] != 3:
202
+ raise ValueError(
203
+ f"Codons must be length 3, "
204
+ f"but size of last dimension is {codon_codes.shape[-1]}"
205
+ )
206
+ codon_numbers = CodonTable._to_number(codon_codes)
207
+ aa_codes = self._codons[codon_numbers]
208
+ return aa_codes
209
+
210
+ def codon_dict(self, code=False):
211
+ """
212
+ Get the codon to amino acid mappings dictionary.
213
+
214
+ Parameters
215
+ ----------
216
+ code : bool
217
+ If true, the dictionary contains keys and values as code.
218
+ Otherwise, the dictionary contains strings for codons and
219
+ amino acid.
220
+
221
+ Returns
222
+ -------
223
+ codon_dict : dict
224
+ The dictionary mapping codons to amino acids.
225
+ """
226
+ if code:
227
+ return {
228
+ tuple(CodonTable._to_codon(codon_number)): aa_code
229
+ for codon_number, aa_code in enumerate(self._codons)
230
+ }
231
+ else:
232
+ return {
233
+ "".join(_NUC_ALPH.decode_multiple(codon_code)): _PROT_ALPH.decode(
234
+ aa_code
235
+ )
236
+ for codon_code, aa_code in self.codon_dict(code=True).items()
237
+ }
238
+
239
+ def is_start_codon(self, codon_codes):
240
+ codon_numbers = CodonTable._to_number(codon_codes)
241
+ return np.isin(codon_numbers, self._starts)
242
+
243
+ def start_codons(self, code=False):
244
+ """
245
+ Get the start codons of the codon table.
246
+
247
+ Parameters
248
+ ----------
249
+ code : bool
250
+ If true, the code will be returned instead of strings.
251
+
252
+ Returns
253
+ -------
254
+ start_codons : tuple
255
+ The start codons. Contains strings or tuples, depending on
256
+ the `code` parameter.
257
+ """
258
+ if code:
259
+ return tuple(
260
+ [
261
+ tuple(CodonTable._to_codon(codon_number))
262
+ for codon_number in self._starts
263
+ ]
264
+ )
265
+ else:
266
+ return tuple(
267
+ [
268
+ "".join(_NUC_ALPH.decode_multiple(codon_code))
269
+ for codon_code in self.start_codons(code=True)
270
+ ]
271
+ )
272
+
273
+ def with_start_codons(self, starts):
274
+ """
275
+ Create an new :class:`CodonTable` with the same codon mappings,
276
+ but changed start codons.
277
+
278
+ Parameters
279
+ ----------
280
+ starts : iterable object of str
281
+ The new start codons.
282
+
283
+ Returns
284
+ -------
285
+ new_table : CodonTable
286
+ The codon table with the new start codons.
287
+ """
288
+ # Copy this table and replace the start codons
289
+ new_table = copy.deepcopy(self)
290
+ start_codon_codes = np.array(
291
+ [_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
292
+ )
293
+ new_table._starts = CodonTable._to_number(start_codon_codes)
294
+ return new_table
295
+
296
+ def with_codon_mappings(self, codon_dict):
297
+ """
298
+ Create an new :class:`CodonTable` with partially changed codon
299
+ mappings.
300
+
301
+ Parameters
302
+ ----------
303
+ codon_dict : dict of (str -> str)
304
+ The changed codon mappings.
305
+
306
+ Returns
307
+ -------
308
+ new_table : CodonTable
309
+ The codon table with changed codon mappings.
310
+ """
311
+ # Copy this table and replace the codon
312
+ new_table = copy.deepcopy(self)
313
+ for key, value in codon_dict.items():
314
+ codon_code = _NUC_ALPH.encode_multiple(key)
315
+ codon_number = CodonTable._to_number(codon_code)
316
+ aa_code = _PROT_ALPH.encode(value)
317
+ new_table._codons[codon_number] = aa_code
318
+ return new_table
319
+
320
+ def __str__(self):
321
+ string = ""
322
+ # ['A', 'C', 'G', 'T']
323
+ bases = _NUC_ALPH.get_symbols()
324
+ for b1 in bases:
325
+ for b2 in bases:
326
+ for b3 in bases:
327
+ codon = b1 + b2 + b3
328
+ string += codon + " " + self[codon]
329
+ # Indicator for start codon
330
+ codon_code = _NUC_ALPH.encode_multiple(codon)
331
+ if CodonTable._to_number(codon_code) in self._starts:
332
+ string += " i "
333
+ else:
334
+ string += " "
335
+ # Add space for next codon
336
+ string += " " * 3
337
+ # Remove terminal space
338
+ string = string[:-6]
339
+ # Jump to next line
340
+ string += "\n"
341
+ # Add empty line
342
+ string += "\n"
343
+ # Remove the two terminal new lines
344
+ string = string[:-2]
345
+ return string
346
+
347
+ @staticmethod
348
+ def _to_number(codons):
349
+ if not isinstance(codons, np.ndarray):
350
+ codons = np.array(list(codons), dtype=int)
351
+ return np.sum(_radix_multiplier * codons, axis=-1)
352
+
353
+ @staticmethod
354
+ def _to_codon(numbers):
355
+ if isinstance(numbers, Integral):
356
+ # Only a single number
357
+ return CodonTable._to_codon(np.array([numbers]))[0]
358
+ if not isinstance(numbers, np.ndarray):
359
+ numbers = np.array(list(numbers), dtype=int)
360
+ codons = np.zeros(numbers.shape + (3,), dtype=int)
361
+ for n in (2, 1, 0):
362
+ val = _radix**n
363
+ digit = numbers // val
364
+ codons[..., -(n + 1)] = digit
365
+ numbers = numbers - digit * val
366
+ return codons
367
+
368
+ @staticmethod
369
+ def load(table_name):
370
+ """
371
+ Load a NCBI codon table.
372
+
373
+ Parameters
374
+ ----------
375
+ table_name : str or int
376
+ If a string is given, it is interpreted as official NCBI
377
+ codon table name (e.g. "Vertebrate Mitochondrial").
378
+ An integer is interpreted as NCBI codon table ID.
379
+
380
+ Returns
381
+ -------
382
+ table : CodonTable
383
+ The NCBI codon table.
384
+ """
385
+ # Loads codon tables from codon_tables.txt
386
+ with open(CodonTable._table_file, "r") as f:
387
+ lines = f.read().split("\n")
388
+
389
+ # Extract data for codon table from file
390
+ table_found = False
391
+ aa = None
392
+ init = None
393
+ base1 = None
394
+ base2 = None
395
+ base3 = None
396
+ for line in lines:
397
+ if not line:
398
+ table_found = False
399
+ if isinstance(table_name, Integral) and line.startswith("id"):
400
+ # remove identifier 'id'
401
+ if table_name == int(line[2:]):
402
+ table_found = True
403
+ elif isinstance(table_name, str) and line.startswith("name"):
404
+ # Get list of table names from lines
405
+ # (separated with ';')
406
+ # remove identifier 'name'
407
+ names = [name.strip() for name in line[4:].split(";")]
408
+ if table_name in names:
409
+ table_found = True
410
+ if table_found:
411
+ if line.startswith("AA"):
412
+ # Remove identifier
413
+ aa = line[5:].strip()
414
+ elif line.startswith("Init"):
415
+ init = line[5:].strip()
416
+ elif line.startswith("Base1"):
417
+ base1 = line[5:].strip()
418
+ elif line.startswith("Base2"):
419
+ base2 = line[5:].strip()
420
+ elif line.startswith("Base3"):
421
+ base3 = line[5:].strip()
422
+
423
+ # Create codon table from data
424
+ if (
425
+ aa is not None
426
+ and init is not None
427
+ and base1 is not None
428
+ and base2 is not None
429
+ and base3 is not None
430
+ ):
431
+ symbol_dict = {}
432
+ starts = []
433
+ # aa, init and baseX all have the same length
434
+ for i in range(len(aa)):
435
+ codon = base1[i] + base2[i] + base3[i]
436
+ if init[i] == "i":
437
+ starts.append(codon)
438
+ symbol_dict[codon] = aa[i]
439
+ return CodonTable(symbol_dict, starts)
440
+ else:
441
+ raise ValueError(f"Codon table '{table_name}' was not found")
442
+
443
+ @staticmethod
444
+ def table_names():
445
+ """
446
+ The possible codon table names for :func:`load()`.
447
+
448
+ Returns
449
+ -------
450
+ names : list of str
451
+ List of valid codon table names.
452
+ """
453
+ with open(CodonTable._table_file, "r") as f:
454
+ lines = f.read().split("\n")
455
+ names = []
456
+ for line in lines:
457
+ if line.startswith("name"):
458
+ names.extend([name.strip() for name in line[4:].split(";")])
459
+ return names
460
+
461
+ @staticmethod
462
+ def default_table():
463
+ """
464
+ The default codon table.
465
+ The table is equal to the NCBI "Standard" codon table,
466
+ with the difference that only "ATG" is a start codon.
467
+
468
+ Returns
469
+ -------
470
+ table : CodonTable
471
+ The default codon table.
472
+ """
473
+ return _default_table
474
+
475
+
476
+ _default_table = CodonTable.load("Standard").with_start_codons(["ATG"])