biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,155 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["encode_chars", "decode_to_chars", "map_sequence_code"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import numpy as np
13
+
14
+
15
+ ctypedef np.int64_t int64
16
+ ctypedef np.uint8_t uint8
17
+ ctypedef np.uint16_t uint16
18
+ ctypedef np.uint32_t uint32
19
+ ctypedef np.uint64_t uint64
20
+
21
+
22
+ @cython.boundscheck(False)
23
+ @cython.wraparound(False)
24
+ def encode_chars(const unsigned char[:] alphabet not None,
25
+ const unsigned char[:] symbols not None):
26
+ """
27
+ Encode an array of symbols into an array of symbol codes.
28
+
29
+ Only works for symbols that are printable ASCII characters.
30
+
31
+ Parameters
32
+ ----------
33
+ alphabet : ndarray, shape=(n,), dtype="|S1"
34
+ The alphabet as array.
35
+ It is indexed via ASCII values and the corresponding values are
36
+ the symbol codes.
37
+ symbols : ndarray, dtype="|S1"
38
+ The symbols (ASCII characters) to be encoded.
39
+
40
+ Returns
41
+ -------
42
+ code : ndarray, shape=(n,), dtype="|S1"
43
+ The encoded symbols.
44
+ """
45
+ cdef int i
46
+ # The last symbol code of the alphabet + 1 is always illegal
47
+ # Since this code cannot occur from symbol encoding
48
+ # it can be later used to check for illegal symbols
49
+ cdef uint8 illegal_code = alphabet.shape[0]
50
+ # An array based map that maps from symbol to code
51
+ # Since the maximum value of a char is 256
52
+ # the size of the map is known at compile time
53
+ cdef uint8 sym_to_code[256]
54
+ # Initially fill the map with the illegal symbol
55
+ # Consequently, the map will later return the illegal symbol
56
+ # when indexed with a character that is not part of the alphabet
57
+ sym_to_code[:] = [illegal_code] * 256
58
+ # Then fill in entries for the symbols of the alphabet
59
+ cdef unsigned char symbol
60
+ for i, symbol in enumerate(alphabet):
61
+ sym_to_code[symbol] = i
62
+
63
+ # Encode the symbols
64
+ code = np.empty(symbols.shape[0], dtype=np.uint8)
65
+ cdef uint8[:] code_view = code
66
+ cdef uint8 symbol_code
67
+ for i in range(symbols.shape[0]):
68
+ symbol_code = sym_to_code[symbols[i]]
69
+ # Check if the symbols is valid
70
+ if symbol_code == illegal_code:
71
+ illegal_symbol = chr(symbols[i])
72
+ # Local import to avoid circular imports
73
+ from .alphabet import AlphabetError
74
+ raise AlphabetError(
75
+ f"Symbol {repr(illegal_symbol)} is not in the alphabet"
76
+ )
77
+ code_view[i] = symbol_code
78
+
79
+ return code
80
+
81
+
82
+ @cython.boundscheck(False)
83
+ @cython.wraparound(False)
84
+ def decode_to_chars(const unsigned char[:] alphabet not None,
85
+ const uint8[:] code not None):
86
+ """
87
+ Decode an array of symbol codes into an array of symbols.
88
+
89
+ Only works for symbols that are printable ASCII characters.
90
+
91
+ Parameters
92
+ ----------
93
+ alphabet : ndarray, shape=(n,), dtype="|S1"
94
+ The alphabet as array.
95
+ It is indexed via ASCII values and the corresponding values are
96
+ the symbol codes.
97
+ code : ndarray, shape=(n,), dtype="|S1"
98
+ The code to be decoded.
99
+
100
+ Returns
101
+ -------
102
+ symbols : ndarray, dtype="|S1"
103
+ The resulting symbols (ASCII characters).
104
+ """
105
+ cdef int i
106
+ cdef int alphabet_length = alphabet.shape[0]
107
+
108
+ symbols = np.empty(code.shape[0], dtype=np.ubyte)
109
+ cdef uint8[:] symbols_view = symbols
110
+ cdef uint8 symbol_code
111
+ for i in range(code.shape[0]):
112
+ symbol_code = code[i]
113
+ if symbol_code >= alphabet_length:
114
+ # Local import to avoid circular imports
115
+ from .alphabet import AlphabetError
116
+ raise AlphabetError(f"'{symbol_code:d}' is not a valid code")
117
+ symbols_view[i] = alphabet[symbol_code]
118
+ return symbols
119
+
120
+
121
+ ctypedef fused CodeType1:
122
+ uint8
123
+ uint16
124
+ uint32
125
+ uint64
126
+ ctypedef fused CodeType2:
127
+ uint8
128
+ uint16
129
+ uint32
130
+ uint64
131
+ def map_sequence_code(CodeType2[:] mapping,
132
+ CodeType1[:] in_code, CodeType2[:] out_code):
133
+ """
134
+ Efficiently maps a sequence code into another alphabet using a
135
+ mapping.
136
+
137
+ Parameters
138
+ ----------
139
+ mapping : ndarray, dtype=int
140
+ Maps the input codes to output codes.
141
+ in_code : ndarray, shape=(n,), dtype=int
142
+ The symbol codes to be mapped.
143
+ out_code : ndarray, shape=(n,), dtype=int
144
+ An empty array, where the mapped symbols are stored.
145
+ This is a parameter instead of the return value in order to
146
+ choose the correct integer type.
147
+ """
148
+ cdef int64 i
149
+ if in_code.shape[0] != out_code.shape[0]:
150
+ raise ValueError(
151
+ f"Input sequence code has length {in_code.shape[0]}, "
152
+ f"but output sequence code has length {out_code.shape[0]}"
153
+ )
154
+ for i in range(in_code.shape[0]):
155
+ out_code[i] = mapping[in_code[i]]
@@ -0,0 +1,466 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["CodonTable"]
8
+
9
+ import copy
10
+ from os.path import join, dirname, realpath
11
+ import numpy as np
12
+ from numbers import Integral
13
+ from .seqtypes import NucleotideSequence, ProteinSequence
14
+
15
+
16
+ # Abbreviations
17
+ _NUC_ALPH = NucleotideSequence.alphabet_unamb
18
+ _PROT_ALPH = ProteinSequence.alphabet
19
+
20
+ # Multiplier array that converts a codon in code representation
21
+ # into a unique integer
22
+ _radix = len(_NUC_ALPH)
23
+ _radix_multiplier = np.array([_radix**n for n in (2,1,0)], dtype=int)
24
+
25
+
26
+ class CodonTable(object):
27
+ """
28
+ A :class:`CodonTable` maps a codon (sequence of 3 nucleotides) to an
29
+ amino acid.
30
+ It also defines start codons. A :class:`CodonTable`
31
+ takes/outputs either the symbols or code of the codon/amino acid.
32
+
33
+ Furthermore, this class is able to give a list of codons that
34
+ corresponds to a given amino acid.
35
+
36
+ The :func:`load()` method allows loading of NCBI codon tables.
37
+
38
+ Objects of this class are immutable.
39
+
40
+ Parameters
41
+ ----------
42
+ codon_dict : dict of (str -> str)
43
+ A dictionary that maps codons to amino acids. The keys must be
44
+ strings of length 3 and the values strings of length 1
45
+ (all upper case).
46
+ The dictionary must provide entries for all 64 possible codons.
47
+ starts : iterable object of str
48
+ The start codons. Each entry must be a string of length 3
49
+ (all upper case).
50
+
51
+ Examples
52
+ --------
53
+
54
+ Get the amino acid coded by a given codon (symbol and code):
55
+
56
+ >>> table = CodonTable.default_table()
57
+ >>> print(table["ATG"])
58
+ M
59
+ >>> print(table[(1,2,3)])
60
+ 14
61
+
62
+ Get the codons coding for a given amino acid (symbol and code):
63
+
64
+ >>> table = CodonTable.default_table()
65
+ >>> print(table["M"])
66
+ ('ATG',)
67
+ >>> print(table[14])
68
+ ((0, 2, 0), (0, 2, 2), (1, 2, 0), (1, 2, 1), (1, 2, 2), (1, 2, 3))
69
+ """
70
+
71
+ # For efficient mapping of codon codes to amino acid codes,
72
+ # especially in in the 'map_codon_codes()' function, the class
73
+ # maps each possible codon into a unique number using a radix based
74
+ # approach.
75
+ # For example the codon (3,1,2) would be represented as
76
+ # 3*16 + 1*4 + 2**1 = 53
77
+
78
+ # file for builtin codon tables from NCBI
79
+ _table_file = join(dirname(realpath(__file__)), "codon_tables.txt")
80
+
81
+ def __init__(self, codon_dict, starts):
82
+ # Check if 'starts' is iterable object of length 3 string
83
+ for start in starts:
84
+ if not isinstance(start, str) or len(start) != 3:
85
+ raise ValueError(f"Invalid codon '{start}' as start codon")
86
+ # Internally store codons as single unique numbers
87
+ start_codon_codes = np.array(
88
+ [_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
89
+ )
90
+ self._starts = CodonTable._to_number(start_codon_codes)
91
+ # Use -1 as error code
92
+ # The array uses the number representation of codons as index
93
+ # and stores the corresponding symbol codes for amino acids
94
+ self._codons = np.full(_radix**3, -1, dtype=int)
95
+ for key, value in codon_dict.items():
96
+ codon_code = _NUC_ALPH.encode_multiple(key)
97
+ codon_number = CodonTable._to_number(codon_code)
98
+ aa_code = _PROT_ALPH.encode(value)
99
+ self._codons[codon_number] = aa_code
100
+ if (self._codons == -1).any():
101
+ # Find the missing codon
102
+ missing_index = np.where(self._codons == -1)[0][0]
103
+ codon_code = CodonTable._to_codon(missing_index)
104
+ codon = _NUC_ALPH.decode_multiple(codon_code)
105
+ codon_str = "".join(codon)
106
+ raise ValueError(
107
+ f"Codon dictionary does not contain codon '{codon_str}'"
108
+ )
109
+
110
+ def __repr__(self):
111
+ """Represent CodonTable as a string for debugging."""
112
+ return f"CodonTable({self.codon_dict()}, {self.start_codons()})"
113
+
114
+ def __eq__(self, item):
115
+ if not isinstance(item, CodonTable):
116
+ return False
117
+ if self.codon_dict() != item.codon_dict():
118
+ return False
119
+ if self.start_codons() != item.start_codons():
120
+ return False
121
+ return True
122
+
123
+ def __ne__(self, item):
124
+ return not self == item
125
+
126
+ def __getitem__(self, item):
127
+ if isinstance(item, str):
128
+ if len(item) == 1:
129
+ # Amino acid -> return possible codons
130
+ aa_code = _PROT_ALPH.encode(item)
131
+ codon_numbers = np.where(self._codons == aa_code)[0]
132
+ codon_codes = CodonTable._to_codon(codon_numbers)
133
+ codons = tuple(
134
+ ["".join(_NUC_ALPH.decode_multiple(codon_code))
135
+ for codon_code in codon_codes]
136
+ )
137
+ return codons
138
+ elif len(item) == 3:
139
+ # Codon -> return corresponding amino acid
140
+ codon_code = _NUC_ALPH.encode_multiple(item)
141
+ codon_number = CodonTable._to_number(codon_code)
142
+ aa_code = self._codons[codon_number]
143
+ aa = _PROT_ALPH.decode(aa_code)
144
+ return aa
145
+ else:
146
+ raise ValueError(f"'{item}' is an invalid index")
147
+ elif isinstance(item, int):
148
+ # Code for amino acid -> return possible codon codes
149
+ codon_numbers = np.where(self._codons == item)[0]
150
+ codon_codes = tuple(CodonTable._to_codon(codon_numbers))
151
+ codon_codes = tuple([tuple(code) for code in codon_codes])
152
+ return codon_codes
153
+ else:
154
+ # Code for codon as any iterable object
155
+ # Code for codon -> return corresponding amino acid codes
156
+ if len(item) != 3:
157
+ raise ValueError(
158
+ f"{item} is an invalid sequence code for a codon"
159
+ )
160
+ codon_number = CodonTable._to_number(item)
161
+ aa_code = self._codons[codon_number]
162
+ return aa_code
163
+
164
+ def map_codon_codes(self, codon_codes):
165
+ """
166
+ Efficiently map multiple codons to the corresponding amino
167
+ acids.
168
+
169
+ Parameters
170
+ ----------
171
+ codon_codes : ndarray, dtype=int, shape=(n,3)
172
+ The codons to be translated into amino acids.
173
+ The codons are given as symbol codes.
174
+ *n* is the amount of codons.
175
+
176
+ Returns
177
+ -------
178
+ aa_codes : ndarray, dtype=int, shape=(n,)
179
+ The amino acids as symbol codes.
180
+
181
+ Examples
182
+ --------
183
+ >>> dna = NucleotideSequence("ATGGTTTAA")
184
+ >>> sequence_code = dna.code
185
+ >>> print(sequence_code)
186
+ [0 3 2 2 3 3 3 0 0]
187
+ >>> # Reshape to get codons
188
+ >>> codon_codes = sequence_code.reshape(-1, 3)
189
+ >>> print(codon_codes)
190
+ [[0 3 2]
191
+ [2 3 3]
192
+ [3 0 0]]
193
+ >>> # Map to amino acids
194
+ >>> aa_codes = CodonTable.default_table().map_codon_codes(codon_codes)
195
+ >>> print(aa_codes)
196
+ [10 17 23]
197
+ >>> # Put into a protein sequence
198
+ >>> protein = ProteinSequence()
199
+ >>> protein.code = aa_codes
200
+ >>> print(protein)
201
+ MV*
202
+ """
203
+ if codon_codes.shape[-1] != 3:
204
+ raise ValueError(
205
+ f"Codons must be length 3, "
206
+ f"but size of last dimension is {codon_codes.shape[-1]}"
207
+ )
208
+ codon_numbers = CodonTable._to_number(codon_codes)
209
+ aa_codes = self._codons[codon_numbers]
210
+ return aa_codes
211
+
212
+ def codon_dict(self, code=False):
213
+ """
214
+ Get the codon to amino acid mappings dictionary.
215
+
216
+ Parameters
217
+ ----------
218
+ code : bool
219
+ If true, the dictionary contains keys and values as code.
220
+ Otherwise, the dictionary contains strings for codons and
221
+ amino acid. (Default: False)
222
+
223
+ Returns
224
+ -------
225
+ codon_dict : dict
226
+ The dictionary mapping codons to amino acids.
227
+ """
228
+ if code:
229
+ return {tuple(CodonTable._to_codon(codon_number)): aa_code
230
+ for codon_number, aa_code in enumerate(self._codons)}
231
+ else:
232
+ return {"".join(_NUC_ALPH.decode_multiple(codon_code)):
233
+ _PROT_ALPH.decode(aa_code)
234
+ for codon_code, aa_code
235
+ in self.codon_dict(code=True).items()}
236
+
237
+ def is_start_codon(self, codon_codes):
238
+ codon_numbers = CodonTable._to_number(codon_codes)
239
+ return np.isin(codon_numbers, self._starts)
240
+
241
+ def start_codons(self, code=False):
242
+ """
243
+ Get the start codons of the codon table.
244
+
245
+ Parameters
246
+ ----------
247
+ code : bool
248
+ If true, the code will be returned instead of strings.
249
+ (Default: False)
250
+
251
+ Returns
252
+ -------
253
+ start_codons : tuple
254
+ The start codons. Contains strings or tuples, depending on
255
+ the `code` parameter.
256
+ """
257
+ if code:
258
+ return tuple(
259
+ [tuple(CodonTable._to_codon(codon_number))
260
+ for codon_number in self._starts]
261
+ )
262
+ else:
263
+ return tuple(
264
+ ["".join(_NUC_ALPH.decode_multiple(codon_code))
265
+ for codon_code in self.start_codons(code=True)]
266
+ )
267
+
268
+ def with_start_codons(self, starts):
269
+ """
270
+ Create an new :class:`CodonTable` with the same codon mappings,
271
+ but changed start codons.
272
+
273
+ Parameters
274
+ ----------
275
+ starts : iterable object of str
276
+ The new start codons.
277
+
278
+ Returns
279
+ -------
280
+ new_table : CodonTable
281
+ The codon table with the new start codons.
282
+ """
283
+ # Copy this table and replace the start codons
284
+ new_table = copy.deepcopy(self)
285
+ start_codon_codes = np.array(
286
+ [_NUC_ALPH.encode_multiple(start) for start in starts], dtype=int
287
+ )
288
+ new_table._starts = CodonTable._to_number(start_codon_codes)
289
+ return new_table
290
+
291
+ def with_codon_mappings(self, codon_dict):
292
+ """
293
+ Create an new :class:`CodonTable` with partially changed codon
294
+ mappings.
295
+
296
+ Parameters
297
+ ----------
298
+ codon_dict : dict of (str -> str)
299
+ The changed codon mappings.
300
+
301
+ Returns
302
+ -------
303
+ new_table : CodonTable
304
+ The codon table with changed codon mappings.
305
+ """
306
+ # Copy this table and replace the codon
307
+ new_table = copy.deepcopy(self)
308
+ for key, value in codon_dict.items():
309
+ codon_code = _NUC_ALPH.encode_multiple(key)
310
+ codon_number = CodonTable._to_number(codon_code)
311
+ aa_code = _PROT_ALPH.encode(value)
312
+ new_table._codons[codon_number] = aa_code
313
+ return new_table
314
+
315
+ def __str__(self):
316
+ string = ""
317
+ # ['A', 'C', 'G', 'T']
318
+ bases = _NUC_ALPH.get_symbols()
319
+ for b1 in bases:
320
+ for b2 in bases:
321
+ for b3 in bases:
322
+ codon = b1 + b2 + b3
323
+ string += codon + " " + self[codon]
324
+ # Indicator for start codon
325
+ codon_code = _NUC_ALPH.encode_multiple(codon)
326
+ if CodonTable._to_number(codon_code) in self._starts:
327
+ string += " i "
328
+ else:
329
+ string += " "
330
+ # Add space for next codon
331
+ string += " "*3
332
+ # Remove terminal space
333
+ string = string [:-6]
334
+ # Jump to next line
335
+ string += "\n"
336
+ # Add empty line
337
+ string += "\n"
338
+ # Remove the two terminal new lines
339
+ string = string[:-2]
340
+ return string
341
+
342
+ @staticmethod
343
+ def _to_number(codons):
344
+ if not isinstance(codons, np.ndarray):
345
+ codons = np.array(list(codons), dtype=int)
346
+ return np.sum(_radix_multiplier * codons, axis=-1)
347
+
348
+ @staticmethod
349
+ def _to_codon(numbers):
350
+ if isinstance(numbers, Integral):
351
+ # Only a single number
352
+ return CodonTable._to_codon(np.array([numbers]))[0]
353
+ if not isinstance(numbers, np.ndarray):
354
+ numbers = np.array(list(numbers), dtype=int)
355
+ codons = np.zeros(numbers.shape + (3,), dtype=int)
356
+ for n in (2,1,0):
357
+ val = _radix**n
358
+ digit = numbers // val
359
+ codons[..., -(n+1)] = digit
360
+ numbers = numbers - digit * val
361
+ return codons
362
+
363
+ @staticmethod
364
+ def load(table_name):
365
+ """
366
+ Load a NCBI codon table.
367
+
368
+ Parameters
369
+ ----------
370
+ table_name : str or int
371
+ If a string is given, it is interpreted as official NCBI
372
+ codon table name (e.g. "Vertebrate Mitochondrial").
373
+ An integer is interpreted as NCBI codon table ID.
374
+
375
+ Returns
376
+ -------
377
+ table : CodonTable
378
+ The NCBI codon table.
379
+ """
380
+ # Loads codon tables from codon_tables.txt
381
+ with open(CodonTable._table_file, "r") as f:
382
+ lines = f.read().split("\n")
383
+
384
+ # Extract data for codon table from file
385
+ table_found = False
386
+ aa = None
387
+ init = None
388
+ base1 = None
389
+ base2 = None
390
+ base3 = None
391
+ for line in lines:
392
+ if not line:
393
+ table_found = False
394
+ if type(table_name) == int and line.startswith("id"):
395
+ # remove identifier 'id'
396
+ if table_name == int(line[2:]):
397
+ table_found = True
398
+ elif type(table_name) == str and line.startswith("name"):
399
+ # Get list of table names from lines
400
+ # (separated with ';')
401
+ # remove identifier 'name'
402
+ names = [name.strip() for name in line[4:].split(";")]
403
+ if table_name in names:
404
+ table_found = True
405
+ if table_found:
406
+ if line.startswith("AA"):
407
+ #Remove identifier
408
+ aa = line[5:].strip()
409
+ elif line.startswith("Init"):
410
+ init = line[5:].strip()
411
+ elif line.startswith("Base1"):
412
+ base1 = line[5:].strip()
413
+ elif line.startswith("Base2"):
414
+ base2 = line[5:].strip()
415
+ elif line.startswith("Base3"):
416
+ base3 = line[5:].strip()
417
+
418
+ # Create codon table from data
419
+ if aa is not None and init is not None \
420
+ and base1 is not None and base2 is not None and base3 is not None:
421
+ symbol_dict = {}
422
+ starts = []
423
+ # aa, init and baseX all have the same length
424
+ for i in range(len(aa)):
425
+ codon = base1[i] + base2[i] + base3[i]
426
+ if init[i] == "i":
427
+ starts.append(codon)
428
+ symbol_dict[codon] = aa[i]
429
+ return CodonTable(symbol_dict, starts)
430
+ else:
431
+ raise ValueError(f"Codon table '{table_name}' was not found")
432
+
433
+ @staticmethod
434
+ def table_names():
435
+ """
436
+ The possible codon table names for :func:`load()`.
437
+
438
+ Returns
439
+ -------
440
+ names : list of str
441
+ List of valid codon table names.
442
+ """
443
+ with open(CodonTable._table_file, "r") as f:
444
+ lines = f.read().split("\n")
445
+ names = []
446
+ for line in lines:
447
+ if line.startswith("name"):
448
+ names.extend([name.strip() for name in line[4:].split(";")])
449
+ return names
450
+
451
+ @staticmethod
452
+ def default_table():
453
+ """
454
+ The default codon table.
455
+ The table is equal to the NCBI "Standard" codon table,
456
+ with the difference that only "ATG" is a start codon.
457
+
458
+ Returns
459
+ -------
460
+ table : CodonTable
461
+ The default codon table.
462
+ """
463
+ return _default_table
464
+
465
+
466
+ _default_table = CodonTable.load("Standard").with_start_codons(["ATG"])