biotite 0.41.1__cp312-cp312-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,374 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ The module contains the :class:`Sequence` superclass and :class:`GeneralSequence`.
7
+ """
8
+
9
+ __name__ = "biotite.sequence"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["Sequence"]
12
+
13
+ import numbers
14
+ import abc
15
+ import numpy as np
16
+ from .alphabet import Alphabet, LetterAlphabet
17
+ from ..copyable import Copyable
18
+
19
+
20
+ _size_uint8 = np.iinfo(np.uint8 ).max +1
21
+ _size_uint16 = np.iinfo(np.uint16).max +1
22
+ _size_uint32 = np.iinfo(np.uint32).max +1
23
+
24
+
25
+ class Sequence(Copyable, metaclass=abc.ABCMeta):
26
+ """
27
+ The abstract base class for all sequence types.
28
+
29
+ A :class:`Sequence` can be seen as a succession of symbols, that are
30
+ elements in the allowed set of symbols, the :class:`Alphabet`.
31
+ Internally, a :class:`Sequence` object uses a *NumPy*
32
+ :class:`ndarray` of integers, where each integer represents a
33
+ symbol.
34
+ The :class:`Alphabet` of a :class:`Sequence` object is used to
35
+ encode each symbol, that is used to create the
36
+ :class:`Sequence`, into an integer. These integer values are called
37
+ symbol code, the encoding of an entire sequence of symbols is
38
+ called sequence code.
39
+
40
+ The size of the symbol code type in the array is determined by the
41
+ size of the :class:`Alphabet`:
42
+ If the :class:`Alphabet` contains 256 symbols or less, one byte is
43
+ used per array element; if the :class:`Alphabet` contains
44
+ between 257 and 65536 symbols, two bytes are used, and so on.
45
+
46
+ Two :class:`Sequence` objects are equal if they are instances of the
47
+ same class, have the same :class:`Alphabet` and have equal sequence
48
+ codes.
49
+ Comparison with a string or list of symbols evaluates always to
50
+ false.
51
+
52
+ A :class:`Sequence` can be indexed by any 1-D index a
53
+ :class:`ndarray` accepts.
54
+ If the index is a single integer, the decoded symbol at that
55
+ position is returned, otherwise a subsequence is returned.
56
+
57
+ Individual symbols of the sequence can also be exchanged in indexed
58
+ form: If the an integer is used as index, the item is treated as a
59
+ symbol. Any other index (slice, index list, boolean mask) expects
60
+ multiple symbols, either as list of symbols, as :class:`ndarray`
61
+ containing a sequence code or another :class:`Sequence` instance.
62
+ Concatenation of two sequences is achieved with the '+' operator.
63
+
64
+ Each subclass of :class:`Sequence` needs to overwrite the abstract
65
+ method :func:`get_alphabet()`, which specifies the alphabet the
66
+ :class:`Sequence` uses.
67
+
68
+ Parameters
69
+ ----------
70
+ sequence : iterable object, optional
71
+ The symbol sequence, the :class:`Sequence` is initialized with.
72
+ For alphabets containing single letter strings, this parameter
73
+ may also be a :class`str` object.
74
+ By default the sequence is empty.
75
+
76
+ Attributes
77
+ ----------
78
+ code : ndarray
79
+ The sequence code.
80
+ symbols : list
81
+ The list of symbols, represented by the sequence.
82
+ The list is generated by decoding the sequence code, when
83
+ this attribute is accessed. When this attribute is modified,
84
+ the new list of symbols is encoded into the sequence code.
85
+ alphabet : Alphabet
86
+ The alphabet of this sequence. Cannot be set.
87
+ Equal to `get_alphabet()`.
88
+
89
+ Examples
90
+ --------
91
+ Creating a DNA sequence from string and print the symbols and the
92
+ code:
93
+
94
+ >>> dna_seq = NucleotideSequence("ACGTA")
95
+ >>> print(dna_seq)
96
+ ACGTA
97
+ >>> print(dna_seq.code)
98
+ [0 1 2 3 0]
99
+ >>> print(dna_seq.symbols)
100
+ ['A' 'C' 'G' 'T' 'A']
101
+ >>> print(list(dna_seq))
102
+ ['A', 'C', 'G', 'T', 'A']
103
+
104
+ Sequence indexing:
105
+
106
+ >>> print(dna_seq[1:3])
107
+ CG
108
+ >>> print(dna_seq[[0,2,4]])
109
+ AGA
110
+ >>> print(dna_seq[np.array([False,False,True,True,True])])
111
+ GTA
112
+
113
+ Sequence manipulation:
114
+
115
+ >>> dna_copy = dna_seq.copy()
116
+ >>> dna_copy[2] = "C"
117
+ >>> print(dna_copy)
118
+ ACCTA
119
+ >>> dna_copy = dna_seq.copy()
120
+ >>> dna_copy[0:2] = dna_copy[3:5]
121
+ >>> print(dna_copy)
122
+ TAGTA
123
+ >>> dna_copy = dna_seq.copy()
124
+ >>> dna_copy[np.array([True,False,False,False,True])] = "T"
125
+ >>> print(dna_copy)
126
+ TCGTT
127
+ >>> dna_copy = dna_seq.copy()
128
+ >>> dna_copy[1:4] = np.array([0,1,2])
129
+ >>> print(dna_copy)
130
+ AACGA
131
+
132
+ Reverse sequence:
133
+
134
+ >>> dna_seq_rev = dna_seq.reverse()
135
+ >>> print(dna_seq_rev)
136
+ ATGCA
137
+
138
+ Concatenate the two sequences:
139
+
140
+ >>> dna_seq_concat = dna_seq + dna_seq_rev
141
+ >>> print(dna_seq_concat)
142
+ ACGTAATGCA
143
+
144
+ """
145
+
146
+ def __init__(self, sequence=()):
147
+ self.symbols = sequence
148
+
149
+ def copy(self, new_seq_code=None):
150
+ """
151
+ Copy the object.
152
+
153
+ Parameters
154
+ ----------
155
+ new_seq_code : ndarray, optional
156
+ If this parameter is set, the sequence code is set to this
157
+ value, rather than the original sequence code.
158
+
159
+ Returns
160
+ -------
161
+ copy
162
+ A copy of this object.
163
+ """
164
+ # Override in order to achieve better performance,
165
+ # in case only a subsequence is needed,
166
+ # because not the entire sequence code is copied then
167
+ clone = self.__copy_create__()
168
+ if new_seq_code is None:
169
+ clone.code = np.copy(self.code)
170
+ else:
171
+ clone.code = new_seq_code
172
+ self.__copy_fill__(clone)
173
+ return clone
174
+
175
+ @property
176
+ def symbols(self):
177
+ return self.get_alphabet().decode_multiple(self.code)
178
+
179
+ @symbols.setter
180
+ def symbols(self, value):
181
+ alph = self.get_alphabet()
182
+ dtype = Sequence.dtype(len(alph))
183
+ self._seq_code = alph.encode_multiple(value, dtype)
184
+
185
+ @property
186
+ def code(self):
187
+ return self._seq_code
188
+
189
+ @code.setter
190
+ def code(self, value):
191
+ dtype = Sequence.dtype(len(self.get_alphabet()))
192
+ if not isinstance(value, np.ndarray):
193
+ raise TypeError("Sequence code must be an integer ndarray")
194
+ self._seq_code = value.astype(dtype, copy=False)
195
+
196
+ @property
197
+ def alphabet(self):
198
+ return self.get_alphabet()
199
+
200
+ @abc.abstractmethod
201
+ def get_alphabet(self):
202
+ """
203
+ Get the :class:`Alphabet` of the :class:`Sequence`.
204
+
205
+ This method must be overwritten, when subclassing
206
+ :class:`Sequence`.
207
+
208
+ Returns
209
+ -------
210
+ alphabet : Alphabet
211
+ :class:`Sequence` alphabet.
212
+ """
213
+ pass
214
+
215
+ def reverse(self, copy=True):
216
+ """
217
+ Reverse the :class:`Sequence`.
218
+
219
+ Parameters
220
+ ----------
221
+ copy : bool, optional
222
+ If set to False, the code :class:`ndarray` of the returned
223
+ sequence is an array view to the sequence code of this
224
+ object.
225
+ In this case, manipulations on the returned sequence would
226
+ also affect this object.
227
+ Otherwise, the sequence code is copied.
228
+
229
+ Returns
230
+ -------
231
+ reversed : Sequence
232
+ The reversed :class:`Sequence`.
233
+
234
+ Examples
235
+ --------
236
+
237
+ >>> dna_seq = NucleotideSequence("ACGTA")
238
+ >>> dna_seq_rev = dna_seq.reverse()
239
+ >>> print(dna_seq_rev)
240
+ ATGCA
241
+ """
242
+ reversed_code = np.flip(self._seq_code, axis=0)
243
+ if copy:
244
+ reversed_code = np.copy(reversed_code)
245
+ return self.copy(reversed_code)
246
+
247
+ def is_valid(self):
248
+ """
249
+ Check, if the sequence contains a valid sequence code.
250
+
251
+ A sequence code is valid, if at each sequence position the
252
+ code is smaller than the size of the alphabet.
253
+
254
+ Invalid code means that the code cannot be decoded into
255
+ symbols. Furthermore invalid code can lead to serious
256
+ errors in alignments, since the substitution matrix
257
+ is indexed with an invalid index.
258
+
259
+ Returns
260
+ -------
261
+ valid : bool
262
+ True, if the sequence is valid, false otherwise.
263
+ """
264
+ return (self.code < len(self.get_alphabet())).all()
265
+
266
+ def get_symbol_frequency(self):
267
+ """
268
+ Get the number of occurences of each symbol in the sequence.
269
+
270
+ If a symbol does not occur in the sequence, but it is in the
271
+ alphabet, its number of occurences is 0.
272
+
273
+ Returns
274
+ -------
275
+ frequency : dict
276
+ A dictionary containing the symbols as keys and the
277
+ corresponding number of occurences in the sequence as
278
+ values.
279
+ """
280
+ counts = np.bincount(
281
+ self._seq_code, minlength=len(self.get_alphabet())
282
+ )
283
+ return {
284
+ symbol: count for symbol, count
285
+ in zip(self.get_alphabet().get_symbols(), counts)
286
+ }
287
+
288
+ def __getitem__(self, index):
289
+ alph = self.get_alphabet()
290
+ sub_seq = self._seq_code.__getitem__(index)
291
+ if isinstance(sub_seq, np.ndarray):
292
+ return self.copy(sub_seq)
293
+ else:
294
+ return alph.decode(sub_seq)
295
+
296
+ def __setitem__(self, index, item):
297
+ alph = self.get_alphabet()
298
+ if isinstance(index, numbers.Integral):
299
+ # Expect a single symbol
300
+ code = alph.encode(item)
301
+ else:
302
+ # Expect multiple symbols
303
+ if isinstance(item, Sequence):
304
+ code = item.code
305
+ elif isinstance(item, np.ndarray):
306
+ code = item
307
+ else:
308
+ # Default: item is iterable object of symbols
309
+ code = alph.encode_multiple(item)
310
+ self._seq_code.__setitem__(index, code)
311
+
312
+ def __len__(self):
313
+ return len(self._seq_code)
314
+
315
+ def __iter__(self):
316
+ alph = self.get_alphabet()
317
+ i = 0
318
+ while i < len(self):
319
+ yield alph.decode(self._seq_code[i])
320
+ i += 1
321
+
322
+ def __eq__(self, item):
323
+ if not isinstance(item, type(self)):
324
+ return False
325
+ if self.get_alphabet() != item.get_alphabet():
326
+ return False
327
+ return np.array_equal(self._seq_code, item._seq_code)
328
+
329
+ def __str__(self):
330
+ alph = self.get_alphabet()
331
+ if isinstance(alph, LetterAlphabet):
332
+ return alph.decode_multiple(self._seq_code, as_bytes=True)\
333
+ .tobytes().decode("ASCII")
334
+ else:
335
+ return "".join(alph.decode_multiple(self._seq_code))
336
+
337
+ def __add__(self, sequence):
338
+ if self.get_alphabet().extends(sequence.get_alphabet()):
339
+ new_code = np.concatenate((self._seq_code, sequence._seq_code))
340
+ new_seq = self.copy(new_code)
341
+ return new_seq
342
+ elif sequence.get_alphabet().extends(self.get_alphabet()):
343
+ new_code = np.concatenate((self._seq_code, sequence._seq_code))
344
+ new_seq = sequence.copy(new_code)
345
+ return new_seq
346
+ else:
347
+ raise ValueError("The sequences alphabets are not compatible")
348
+
349
+ @staticmethod
350
+ def dtype(alphabet_size):
351
+ """
352
+ Get the sequence code dtype required for the given size of the
353
+ alphabet.
354
+
355
+ Parameters
356
+ ----------
357
+ alpahabet_size : int
358
+ The size of the alphabet.
359
+
360
+ Returns
361
+ -------
362
+ dtype
363
+ The :class:`dtype`, that is large enough to store symbol
364
+ codes, that are encoded by an :class:`Alphabet` of the given
365
+ size.
366
+ """
367
+ if alphabet_size <= _size_uint8:
368
+ return np.uint8
369
+ elif alphabet_size <= _size_uint16:
370
+ return np.uint16
371
+ elif alphabet_size <= _size_uint32:
372
+ return np.uint32
373
+ else:
374
+ return np.uint64
@@ -0,0 +1,132 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for handling molecular structures.
7
+
8
+ In this context an atom is described by two kinds of attributes: the
9
+ coordinates and the annotations. The annotations include information
10
+ about polypetide chain id, residue id, residue name, hetero atom
11
+ information, atom name and optionally more. The coordinates are a
12
+ `NumPy` float :class:`ndarray` of length 3, containing the x, y and z
13
+ coordinates.
14
+
15
+ An :class:`Atom` contains data for a single atom, it stores the
16
+ annotations as scalar values and the coordinates as length 3
17
+ :class:`ndarray`.
18
+
19
+ An :class:`AtomArray` stores data for an entire structure model
20
+ containing *n* atoms.
21
+ Therefore the annotations are represented as :class:`ndarray` objects of
22
+ length *n*, the so called annotation arrays.
23
+ The coordinates are a *(n x 3)* :class:`ndarray`.
24
+
25
+ An :class:`AtomArrayStack` stores data for *m* models, where each model
26
+ contains the same atoms at different positions.
27
+ Hence, the annotation arrays are represented as :class:`ndarray` objects
28
+ of length *n* like the :class:`AtomArray`, while the coordinates are a
29
+ *(m x n x 3)* :class:`ndarray`.
30
+
31
+ Like an :class:`AtomArray` can be iterated to obtain :class:`Atom`
32
+ objects, an :class:`AtomArrayStack` yields :class:`AtomArray` objects.
33
+ All three types must not be subclassed.
34
+
35
+ The following annotation categories are mandatory:
36
+
37
+ ========= =========== ================= =======================================
38
+ Category Type Examples Description
39
+ ========= =========== ================= =======================================
40
+ chain_id string (U4) 'A','S','AB', ... Polypeptide chain
41
+ res_id int 1,2,3, ... Sequence position of residue
42
+ ins_code string (U1) '', 'A','B',.. PDB insertion code (iCode)
43
+ res_name string (U5) 'GLY','ALA', ... Residue name
44
+ hetero bool True, False False for ``ATOM``, true for ``HETATM``
45
+ atom_name string (U6) 'CA','N', ... Atom name
46
+ element string (U2) 'C','O','SE', ... Chemical Element
47
+ ========= =========== ================= =======================================
48
+
49
+ For all :class:`Atom`, :class:`AtomArray` and :class:`AtomArrayStack`
50
+ objects these annotations are initially set with default values.
51
+ Additionally to these annotations, an arbitrary amount of annotation
52
+ categories can be added via :func:`add_annotation()` or
53
+ :func:`set_annotation()`.
54
+ The annotation arrays can be accessed either via the method
55
+ :func:`get_annotation()` or directly (e.g. ``array.res_id``).
56
+
57
+ The following annotation categories are optionally used by some
58
+ functions:
59
+
60
+ ========= =========== ================= ============================
61
+ Category Type Examples Description
62
+ ========= =========== ================= ============================
63
+ atom_id int 1,2,3, ... Atom serial number
64
+ b_factor float 0.9, 12.3, ... Temperature factor
65
+ occupancy float .1, .3, .9, ... Occupancy
66
+ charge int -2,-1,0,1,2, ... Electric charge of the atom
67
+ ========= =========== ================= ============================
68
+
69
+ For each type, the attributes can be accessed directly.
70
+ Both :class:`AtomArray` and :class:`AtomArrayStack` support
71
+ *NumPy* style indexing.
72
+ The index is propagated to each attribute.
73
+ If a single integer is used as index,
74
+ an object with one dimension less is returned
75
+ (:class:`AtomArrayStack` -> :class:`AtomArray`,
76
+ :class:`AtomArray` -> :class:`Atom`).
77
+ If a slice, index array or a boolean mask is given, a substructure is
78
+ returned
79
+ (:class:`AtomArrayStack` -> :class:`AtomArrayStack`,
80
+ :class:`AtomArray` -> :class:`AtomArray`)
81
+ As in *NumPy*, these are not necessarily deep copies of the originals:
82
+ The attributes of the sliced object may still point to the original
83
+ :class:`ndarray`.
84
+ Use the :func:`copy()` method if a deep copy is required.
85
+
86
+ Bond information can be associated to an :class:`AtomArray` or
87
+ :class:`AtomArrayStack` by setting the ``bonds`` attribute with a
88
+ :class:`BondList`.
89
+ A :class:`BondList` specifies the indices of atoms that form chemical
90
+ bonds.
91
+ Some functionalities require that the input structure has an associated
92
+ :class:`BondList`.
93
+ If no :class:`BondList` is associated, the ``bonds`` attribute is
94
+ ``None``.
95
+
96
+ Based on the implementation in *NumPy* arrays, this package furthermore
97
+ contains a comprehensive set of functions for structure analysis,
98
+ manipulation and visualization.
99
+
100
+ The universal length unit in this package is Å.
101
+ """
102
+
103
+ __name__ = "biotite.structure"
104
+ __author__ = "Patrick Kunzmann"
105
+
106
+ from .atoms import *
107
+ from .bonds import *
108
+ from .box import *
109
+ from .celllist import *
110
+ from .charges import *
111
+ from .compare import *
112
+ from .density import *
113
+ from .dotbracket import *
114
+ from .error import *
115
+ from .filter import *
116
+ from .geometry import *
117
+ from .hbond import *
118
+ from .integrity import *
119
+ from .mechanics import *
120
+ from .molecules import *
121
+ from .pseudoknots import *
122
+ from .rdf import *
123
+ from .repair import *
124
+ from .residues import *
125
+ from .chains import *
126
+ from .sasa import *
127
+ from .sequence import *
128
+ from .sse import *
129
+ from .superimpose import *
130
+ from .transform import *
131
+ from .basepairs import *
132
+ # util and resutil are used internally