biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +6 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,713 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence"
6
+ __author__ = "Patrick Kunzmann", "Thomas Nevolianis"
7
+ __all__ = [
8
+ "GeneralSequence",
9
+ "NucleotideSequence",
10
+ "ProteinSequence",
11
+ "PositionalSequence",
12
+ "PurePositionalSequence",
13
+ ]
14
+
15
+ from dataclasses import dataclass, field
16
+ import numpy as np
17
+ from biotite.sequence.alphabet import (
18
+ Alphabet,
19
+ AlphabetError,
20
+ AlphabetMapper,
21
+ LetterAlphabet,
22
+ )
23
+ from biotite.sequence.sequence import Sequence
24
+
25
+
26
+ class GeneralSequence(Sequence):
27
+ """
28
+ This class allows the creation of a sequence with custom
29
+ :class:`Alphabet` without the need to subclass :class:`Sequence`.
30
+
31
+ Parameters
32
+ ----------
33
+ alphabet : Alphabet
34
+ The alphabet of this sequence.
35
+ sequence : iterable object, optional
36
+ The symbol sequence, the :class:`Sequence` is initialized with.
37
+ For alphabets containing single letter strings, this parameter
38
+ may also be a :class:`str` object.
39
+ By default the sequence is empty.
40
+ """
41
+
42
+ def __init__(self, alphabet, sequence=()):
43
+ self._alphabet = alphabet
44
+ super().__init__(sequence)
45
+
46
+ def __repr__(self):
47
+ """Represent GeneralSequence as a string for debugging."""
48
+ return (
49
+ f"GeneralSequence(Alphabet({self._alphabet}), "
50
+ f"[{', '.join([repr(symbol) for symbol in self.symbols])}])"
51
+ )
52
+
53
+ def __copy_create__(self):
54
+ return GeneralSequence(self._alphabet)
55
+
56
+ def get_alphabet(self):
57
+ return self._alphabet
58
+
59
+ def as_type(self, sequence):
60
+ """
61
+ Convert the :class:`GeneralSequence` into a sequence of another
62
+ :class:`Sequence` type.
63
+
64
+ This function simply replaces the sequence code of the given
65
+ sequence with the sequence code of this object.
66
+
67
+ Parameters
68
+ ----------
69
+ sequence : Sequence
70
+ The `Sequence` whose sequence code is replaced with the one
71
+ of this object.
72
+ The alphabet must equal or extend the alphabet of this
73
+ object.
74
+
75
+ Returns
76
+ -------
77
+ sequence : Sequence
78
+ The input `sequence` with replaced sequence code.
79
+
80
+ Raises
81
+ ------
82
+ AlphabetError
83
+ If the the :class:`Alphabet` of the input `sequence` does
84
+ not extend the :class:`Alphabet` of this sequence.
85
+ """
86
+ if not sequence.get_alphabet().extends(self._alphabet):
87
+ raise AlphabetError(
88
+ f"The alphabet of '{type(sequence).__name__}' "
89
+ f"is not compatible with the alphabet of this sequence"
90
+ )
91
+ sequence.code = self.code
92
+ return sequence
93
+
94
+
95
+ class NucleotideSequence(Sequence):
96
+ """
97
+ Representation of a nucleotide sequence (DNA or RNA).
98
+
99
+ This class may have one of two different alphabets:
100
+ :attr:`unambiguous_alphabet()` contains only the unambiguous DNA
101
+ letters 'A', 'C', 'G' and 'T'.
102
+ :attr:`ambiguous_alphabet()` uses an extended alphabet for ambiguous
103
+ letters.
104
+
105
+ Parameters
106
+ ----------
107
+ sequence : iterable object, optional
108
+ The initial DNA sequence. This may either be a list or a string.
109
+ May take upper or lower case letters.
110
+ By default the sequence is empty.
111
+ ambiguous : bool, optional
112
+ If true, the ambiguous alphabet is used. By default the
113
+ object tries to use the unambiguous alphabet. If this fails due
114
+ ambiguous letters in the sequence, the ambiguous alphabet
115
+ is used.
116
+ """
117
+
118
+ alphabet_unamb = LetterAlphabet(["A", "C", "G", "T"])
119
+ alphabet_amb = LetterAlphabet(
120
+ ["A", "C", "G", "T", "R", "Y", "W", "S", "M", "K", "H", "B", "V", "D", "N"]
121
+ )
122
+
123
+ compl_symbol_dict = {
124
+ "A": "T",
125
+ "C": "G",
126
+ "G": "C",
127
+ "T": "A",
128
+ "M": "K",
129
+ "R": "Y",
130
+ "W": "W",
131
+ "S": "S",
132
+ "Y": "R",
133
+ "K": "M",
134
+ "V": "B",
135
+ "H": "D",
136
+ "D": "H",
137
+ "B": "V",
138
+ "N": "N",
139
+ }
140
+ # List comprehension does not work in this scope
141
+ _compl_symbols = []
142
+ for _symbol in alphabet_amb.get_symbols():
143
+ _compl_symbols.append(compl_symbol_dict[_symbol])
144
+ _compl_alphabet_unamb = LetterAlphabet(_compl_symbols)
145
+ _compl_mapper = AlphabetMapper(_compl_alphabet_unamb, alphabet_amb)
146
+
147
+ def __init__(self, sequence=[], ambiguous=None):
148
+ if isinstance(sequence, str):
149
+ sequence = sequence.upper()
150
+ else:
151
+ sequence = [symbol.upper() for symbol in sequence]
152
+ if ambiguous is None:
153
+ try:
154
+ self._alphabet = NucleotideSequence.alphabet_unamb
155
+ seq_code = self._alphabet.encode_multiple(sequence)
156
+ except AlphabetError:
157
+ self._alphabet = NucleotideSequence.alphabet_amb
158
+ seq_code = self._alphabet.encode_multiple(sequence)
159
+ elif not ambiguous:
160
+ self._alphabet = NucleotideSequence.alphabet_unamb
161
+ seq_code = self._alphabet.encode_multiple(sequence)
162
+ else:
163
+ self._alphabet = NucleotideSequence.alphabet_amb
164
+ seq_code = self._alphabet.encode_multiple(sequence)
165
+ super().__init__()
166
+ self.code = seq_code
167
+
168
+ def __repr__(self):
169
+ """Represent NucleotideSequence as a string for debugging."""
170
+ if self._alphabet == NucleotideSequence.alphabet_amb:
171
+ ambiguous = True
172
+ else:
173
+ ambiguous = False
174
+ return f'NucleotideSequence("{"".join(self.symbols)}", ambiguous={ambiguous})'
175
+
176
+ def __copy_create__(self):
177
+ if self._alphabet == NucleotideSequence.alphabet_amb:
178
+ seq_copy = NucleotideSequence(ambiguous=True)
179
+ else:
180
+ seq_copy = NucleotideSequence(ambiguous=False)
181
+ return seq_copy
182
+
183
+ def get_alphabet(self):
184
+ return self._alphabet
185
+
186
+ def complement(self):
187
+ """
188
+ Get the complement nucleotide sequence.
189
+
190
+ Returns
191
+ -------
192
+ complement : NucleotideSequence
193
+ The complement sequence.
194
+
195
+ Examples
196
+ --------
197
+
198
+ >>> dna_seq = NucleotideSequence("ACGCTT")
199
+ >>> print(dna_seq.complement())
200
+ TGCGAA
201
+ >>> print(dna_seq.reverse().complement())
202
+ AAGCGT
203
+
204
+ """
205
+ # Interpreting the sequence code of this object in the
206
+ # complementary alphabet gives the complementary symbols
207
+ # In order to get the complementary symbols in the original
208
+ # alphabet, the sequence code is mapped from the complementary
209
+ # alphabet into the original alphabet
210
+ compl_code = NucleotideSequence._compl_mapper[self.code]
211
+ return self.copy(compl_code)
212
+
213
+ def translate(self, complete=False, codon_table=None, met_start=False):
214
+ """
215
+ Translate the nucleotide sequence into a protein sequence.
216
+
217
+ If `complete` is true, the entire sequence is translated,
218
+ beginning with the first codon and ending with the last codon,
219
+ even if stop codons occur during the translation.
220
+
221
+ Otherwise this method returns possible ORFs in the
222
+ sequence, even if not stop codon occurs in an ORF.
223
+
224
+ Parameters
225
+ ----------
226
+ complete : bool, optional
227
+ If true, the complete sequence is translated. In this case
228
+ the sequence length must be a multiple of 3.
229
+ Otherwise all ORFs are translated. (Default: False)
230
+ codon_table : CodonTable, optional
231
+ The codon table to be used. By default the default table
232
+ will be used
233
+ (NCBI "Standard" table with "ATG" as single start codon).
234
+ met_start : bool, optional
235
+ If true, the translation starts always with a 'methionine',
236
+ even if the start codon codes for another amino acid.
237
+ Otherwise the translation starts with the amino acid
238
+ the codon codes for. Only applies, if `complete` is false.
239
+ (Default: False)
240
+
241
+ Returns
242
+ -------
243
+ protein : ProteinSequence or list of ProteinSequence
244
+ The translated protein sequence. If `complete` is true,
245
+ only a single :class:`ProteinSequence` is returned. Otherwise
246
+ a list of :class:`ProteinSequence` is returned, which contains
247
+ every ORF.
248
+ pos : list of tuple (int, int)
249
+ Is only returned if `complete` is false. The list contains
250
+ a tuple for each ORF.
251
+ The first element of the tuple is the index of the
252
+ :class:`NucleotideSequence`, where the translation starts.
253
+ The second element is the exclusive stop index, it
254
+ represents the first nucleotide in the
255
+ :class:`NucleotideSequence` after a stop codon.
256
+
257
+ Examples
258
+ --------
259
+
260
+ >>> dna_seq = NucleotideSequence("AATGATGCTATAGAT")
261
+ >>> prot_seq = dna_seq.translate(complete=True)
262
+ >>> print(prot_seq)
263
+ NDAID
264
+ >>> prot_seqs, pos = dna_seq.translate(complete=False)
265
+ >>> for seq in prot_seqs:
266
+ ... print(seq)
267
+ MML*
268
+ ML*
269
+
270
+ """
271
+ if self._alphabet != NucleotideSequence.alphabet_unamb:
272
+ raise AlphabetError("Translation requires unambiguous alphabet")
273
+ # Determine codon_table
274
+ if codon_table is None:
275
+ # Import at this position to avoid circular import
276
+ from biotite.sequence.codon import CodonTable
277
+
278
+ codon_table = CodonTable.default_table()
279
+
280
+ if complete:
281
+ if len(self) % 3 != 0:
282
+ raise ValueError(
283
+ "Sequence length needs to be a multiple of 3 "
284
+ "for complete translation"
285
+ )
286
+ # Reshape code into (n,3), with n being the amount of codons
287
+ codons = self.code.reshape(-1, 3)
288
+ protein_seq = ProteinSequence()
289
+ protein_seq.code = codon_table.map_codon_codes(codons)
290
+ return protein_seq
291
+
292
+ else:
293
+ stop_code = ProteinSequence.alphabet.encode("*")
294
+ met_code = ProteinSequence.alphabet.encode("M")
295
+ protein_seqs = []
296
+ pos = []
297
+ code = self.code
298
+ # Create all three frames
299
+ for shift in range(3):
300
+ # The frame length is always a multiple of 3
301
+ # If there is a trailing partial codon, remove it
302
+ frame_length = ((len(code) - shift) // 3) * 3
303
+ frame = code[shift : shift + frame_length]
304
+ # Reshape frame into (n,3), with n being the amount of codons
305
+ frame_codons = frame.reshape(-1, 3)
306
+ # At first, translate frame completely
307
+ protein_code = codon_table.map_codon_codes(frame_codons)
308
+ # Iterate over all start codons in this frame
309
+ starts = np.where(codon_table.is_start_codon(frame_codons))[0]
310
+ for start_i in starts:
311
+ # Protein sequence beginning from start codon
312
+ code_from_start = protein_code[start_i:]
313
+ # Get all stop codon positions
314
+ # relative to 'code_from_start'
315
+ stops = np.where(code_from_start == stop_code)[0]
316
+ # Find first stop codon after start codon
317
+ # Include stop -> stops[0] + 1
318
+ stop_i = stops[0] + 1 if len(stops) > 0 else len(code_from_start)
319
+ code_from_start_to_stop = code_from_start[:stop_i]
320
+ prot_seq = ProteinSequence()
321
+ if met_start:
322
+ # Copy as the slice is edited
323
+ prot_seq.code = code_from_start_to_stop.copy()
324
+ prot_seq.code[0] = met_code
325
+ else:
326
+ prot_seq.code = code_from_start_to_stop
327
+ protein_seqs.append(prot_seq)
328
+ # Codon indices are transformed
329
+ # to nucleotide sequence indices
330
+ pos.append((shift + start_i * 3, shift + (start_i + stop_i) * 3))
331
+ # Sort by start position
332
+ order = np.argsort([start for start, stop in pos])
333
+ pos = [pos[i] for i in order]
334
+ protein_seqs = [protein_seqs[i] for i in order]
335
+ return protein_seqs, pos
336
+
337
+ @staticmethod
338
+ def unambiguous_alphabet():
339
+ """
340
+ Get the unambiguous nucleotide alphabet containing the symbols
341
+ ``A``, ``C``, ``G`` and ``T``.
342
+
343
+ Returns
344
+ -------
345
+ alphabet : LetterAlphabet
346
+ The unambiguous nucleotide alphabet.
347
+ """
348
+ return NucleotideSequence.alphabet_unamb
349
+
350
+ @staticmethod
351
+ def ambiguous_alphabet():
352
+ """
353
+ Get the ambiguous nucleotide alphabet containing the symbols
354
+ ``A``, ``C``, ``G`` and ``T`` and symbols describing
355
+ ambiguous combinations of these.
356
+
357
+ Returns
358
+ -------
359
+ alphabet : LetterAlphabet
360
+ The ambiguous nucleotide alphabet.
361
+ """
362
+ return NucleotideSequence.alphabet_amb
363
+
364
+
365
+ class ProteinSequence(Sequence):
366
+ """
367
+ Representation of a protein sequence.
368
+
369
+ Furthermore this class offers a conversion of amino acids from
370
+ 3-letter code into 1-letter code and vice versa.
371
+
372
+ Parameters
373
+ ----------
374
+ sequence : iterable object, optional
375
+ The initial protein sequence. This may either be a list or a
376
+ string. May take upper or lower case letters. If a list is
377
+ given, the list elements can be 1-letter or 3-letter amino acid
378
+ representations. By default the sequence is empty.
379
+
380
+ Notes
381
+ -----
382
+ The :class:`Alphabet` of this :class:`Sequence` class does not
383
+ support selenocysteine.
384
+ Please convert selenocysteine (``U``) into cysteine (``C``)
385
+ or use a custom :class:`Sequence` class, if the differentiation is
386
+ necessary.
387
+ """
388
+
389
+ _codon_table = None
390
+
391
+ alphabet = LetterAlphabet(
392
+ [
393
+ "A",
394
+ "C",
395
+ "D",
396
+ "E",
397
+ "F",
398
+ "G",
399
+ "H",
400
+ "I",
401
+ "K",
402
+ "L",
403
+ "M",
404
+ "N",
405
+ "P",
406
+ "Q",
407
+ "R",
408
+ "S",
409
+ "T",
410
+ "V",
411
+ "W",
412
+ "Y",
413
+ "B",
414
+ "Z",
415
+ "X",
416
+ "*",
417
+ ]
418
+ )
419
+
420
+ # Masses are taken from
421
+ # https://web.expasy.org/findmod/findmod_masses.html#AA
422
+
423
+ _mol_weight_average = np.array(
424
+ [
425
+ 71.0788, # A
426
+ 103.1388, # C
427
+ 115.0886, # D
428
+ 129.1155, # E
429
+ 147.1766, # F
430
+ 57.0519, # G
431
+ 137.1411, # H
432
+ 113.1594, # I
433
+ 128.1741, # K
434
+ 113.1594, # L
435
+ 131.1926, # M
436
+ 114.1038, # N
437
+ 97.1167, # P
438
+ 128.1307, # Q
439
+ 156.1875, # R
440
+ 87.0782, # S
441
+ 101.1051, # T
442
+ 99.1326, # V
443
+ 186.2132, # W
444
+ 163.1760, # Y
445
+ np.nan, # B
446
+ np.nan, # Z
447
+ np.nan, # X
448
+ np.nan, # *
449
+ ]
450
+ )
451
+
452
+ _mol_weight_monoisotopic = np.array(
453
+ [
454
+ 71.03711, # A
455
+ 103.00919, # C
456
+ 115.02694, # D
457
+ 129.04259, # E
458
+ 147.06841, # F
459
+ 57.02146, # G
460
+ 137.05891, # H
461
+ 113.08406, # I
462
+ 128.09496, # K
463
+ 113.08406, # L
464
+ 131.04049, # M
465
+ 114.04293, # N
466
+ 97.05276, # P
467
+ 128.05858, # Q
468
+ 156.10111, # R
469
+ 87.03203, # S
470
+ 101.04768, # T
471
+ 99.06841, # V
472
+ 186.07931, # W
473
+ 163.06333, # Y
474
+ np.nan, # B
475
+ np.nan, # Z
476
+ np.nan, # X
477
+ np.nan, # *
478
+ ]
479
+ )
480
+
481
+ _dict_1to3 = {
482
+ "A": "ALA",
483
+ "C": "CYS",
484
+ "D": "ASP",
485
+ "E": "GLU",
486
+ "F": "PHE",
487
+ "G": "GLY",
488
+ "H": "HIS",
489
+ "I": "ILE",
490
+ "K": "LYS",
491
+ "L": "LEU",
492
+ "M": "MET",
493
+ "N": "ASN",
494
+ "P": "PRO",
495
+ "Q": "GLN",
496
+ "R": "ARG",
497
+ "S": "SER",
498
+ "T": "THR",
499
+ "V": "VAL",
500
+ "W": "TRP",
501
+ "Y": "TYR",
502
+ "B": "ASX",
503
+ "Z": "GLX",
504
+ "X": "UNK",
505
+ "*": " * ",
506
+ }
507
+
508
+ _dict_3to1 = {}
509
+ for _key, _value in _dict_1to3.items():
510
+ _dict_3to1[_value] = _key
511
+ _dict_3to1["SEC"] = "C"
512
+ _dict_3to1["MSE"] = "M"
513
+
514
+ def __init__(self, sequence=()):
515
+ dict_3to1 = ProteinSequence._dict_3to1
516
+ # Convert 3-letter codes to single letter codes,
517
+ # if list contains 3-letter codes
518
+ sequence = [
519
+ dict_3to1[symbol.upper()] if len(symbol) == 3 else symbol.upper()
520
+ for symbol in sequence
521
+ ]
522
+ super().__init__(sequence)
523
+
524
+ def __repr__(self):
525
+ """Represent ProteinSequence as a string for debugging."""
526
+ return f'ProteinSequence("{"".join(self.symbols)}")'
527
+
528
+ def get_alphabet(self):
529
+ return ProteinSequence.alphabet
530
+
531
+ def remove_stops(self):
532
+ """
533
+ Remove *stop signals* from the sequence.
534
+
535
+ Returns
536
+ -------
537
+ no_stop : ProteinSequence
538
+ A copy of this sequence without stop signals.
539
+ """
540
+ stop_code = ProteinSequence.alphabet.encode("*")
541
+ no_stop = self.copy()
542
+ seq_code = no_stop.code
543
+ no_stop.code = seq_code[seq_code != stop_code]
544
+ return no_stop
545
+
546
+ @staticmethod
547
+ def convert_letter_3to1(symbol):
548
+ """
549
+ Convert a 3-letter to a 1-letter amino acid representation.
550
+
551
+ Parameters
552
+ ----------
553
+ symbol : string
554
+ 3-letter amino acid representation.
555
+
556
+ Returns
557
+ -------
558
+ convert : string
559
+ 1-letter amino acid representation.
560
+ """
561
+ return ProteinSequence._dict_3to1[symbol.upper()]
562
+
563
+ @staticmethod
564
+ def convert_letter_1to3(symbol):
565
+ """
566
+ Convert a 1-letter to a 3-letter amino acid representation.
567
+
568
+ Parameters
569
+ ----------
570
+ symbol : string
571
+ 1-letter amino acid representation.
572
+
573
+ Returns
574
+ -------
575
+ convert : string
576
+ 3-letter amino acid representation.
577
+ """
578
+ return ProteinSequence._dict_1to3[symbol.upper()]
579
+
580
+ def get_molecular_weight(self, monoisotopic=False):
581
+ """
582
+ Calculate the molecular weight of this protein.
583
+
584
+ Average protein molecular weight is calculated by the addition
585
+ of average isotopic masses of the amino acids
586
+ in the protein and the average isotopic mass of one water
587
+ molecule.
588
+
589
+ Returns
590
+ -------
591
+ weight : float
592
+ Molecular weight of the protein represented by the sequence.
593
+ Molecular weight values are given in Dalton (Da).
594
+ """
595
+ if monoisotopic:
596
+ weight = np.sum(self._mol_weight_monoisotopic[self.code]) + 18.015
597
+ else:
598
+ weight = np.sum(self._mol_weight_average[self.code]) + 18.015
599
+
600
+ if np.isnan(weight):
601
+ raise ValueError(
602
+ "Sequence contains ambiguous amino acids, " "cannot calculate weight"
603
+ )
604
+ return weight
605
+
606
+
607
+ class PositionalSequence(Sequence):
608
+ """
609
+ A sequence where each symbol is associated with a position.
610
+
611
+ For each individual position the sequence contains a separate
612
+ :class:`PositionalSequence.Symbol`, encoded by a custom alphabet for this sequence.
613
+ In consequence the symbol code is the position in the sequence itself.
614
+ This is useful for aligning sequences based on a position-specific
615
+ substitution matrix.
616
+
617
+ Parameters
618
+ ----------
619
+ original_sequence : seq.Sequence
620
+ The original sequence to create the positional sequence from.
621
+ """
622
+
623
+ @dataclass(frozen=True)
624
+ class Symbol:
625
+ """
626
+ Combination of a symbol and its position in a sequence.
627
+
628
+ Attributes
629
+ ----------
630
+ original_alphabet : Alphabet
631
+ The original alphabet, where the symbol stems from.
632
+ original_code : int
633
+ The code of the original symbol in the original alphabet.
634
+ position : int
635
+ The 0-based position of the symbol in the sequence.
636
+ symbol : object
637
+ The symbol from the original alphabet.
638
+
639
+ See Also
640
+ --------
641
+ PositionalSequence
642
+ The sequence type containing :class:`PositionalSymbol` objects.
643
+ """
644
+
645
+ original_alphabet: ...
646
+ original_code: ...
647
+ position: ...
648
+ symbol: ... = field(init=False)
649
+
650
+ def __post_init__(self):
651
+ sym = self.original_alphabet.decode(self.original_code)
652
+ super().__setattr__("symbol", sym)
653
+
654
+ def __str__(self):
655
+ return str(self.symbol)
656
+
657
+ def __init__(self, original_sequence):
658
+ self._orig_alphabet = original_sequence.get_alphabet()
659
+ self._alphabet = Alphabet(
660
+ [
661
+ PositionalSequence.Symbol(self._orig_alphabet, code, pos)
662
+ for pos, code in enumerate(original_sequence.code)
663
+ ]
664
+ )
665
+ self.code = np.arange(
666
+ len(original_sequence), dtype=Sequence.dtype(len(self._alphabet))
667
+ )
668
+
669
+ def reconstruct(self):
670
+ """
671
+ Reconstruct the original sequence from the positional sequence.
672
+
673
+ Returns
674
+ -------
675
+ original_sequence : GeneralSequence
676
+ The original sequence.
677
+ Although the actual type of the returned sequence is always a
678
+ :class:`GeneralSequence`, the alphabet and the symbols of the returned
679
+ sequence are equal to the original sequence.
680
+ """
681
+ original_sequence = GeneralSequence(self._orig_alphabet)
682
+ original_sequence.code = np.array([sym.original_code for sym in self._alphabet])
683
+ return original_sequence
684
+
685
+ def get_alphabet(self):
686
+ return self._alphabet
687
+
688
+ def __str__(self) -> str:
689
+ return "".join([str(sym) for sym in self.symbols])
690
+
691
+ def __repr__(self):
692
+ return f"PositionalSequence({self.reconstruct()!r})"
693
+
694
+
695
+ class PurePositionalSequence(Sequence):
696
+ """
697
+ An object of this class is a 'placeholder' sequence, where each symbol is the
698
+ position in the sequence itself.
699
+
700
+ This class is similar to :class:`PositionalSequence`, but the symbols are not
701
+ derived from an original sequence, but are the pure position.
702
+ Hence, there is no meaningful string representation of the sequence and its symbols.
703
+ """
704
+
705
+ def __init__(self, length):
706
+ self._alphabet = Alphabet(range(length))
707
+ self.code = np.arange(length, dtype=Sequence.dtype(length))
708
+
709
+ def get_alphabet(self):
710
+ return self._alphabet
711
+
712
+ def __repr__(self):
713
+ return f"PurePositionalSequence({len(self)})"