biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,374 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["MSAApp"]
8
+
9
+ import abc
10
+ from tempfile import NamedTemporaryFile
11
+ from collections import OrderedDict
12
+ import numpy as np
13
+ from .localapp import LocalApp, cleanup_tempfile
14
+ from .application import AppState, requires_state
15
+ from ..sequence.seqtypes import NucleotideSequence, ProteinSequence
16
+ from ..sequence.io.fasta.file import FastaFile
17
+ from ..sequence.align.alignment import Alignment
18
+ from .util import map_sequence, map_matrix
19
+
20
+
21
+ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
22
+ """
23
+ This is an abstract base class for multiple sequence alignment
24
+ software.
25
+
26
+ It handles conversion of :class:`Sequence` objects to FASTA input
27
+ and FASTA output to an :class:`Alignment` object.
28
+ Inheriting subclasses only need to incorporate the file path
29
+ of these FASTA files into the program arguments.
30
+
31
+ Furthermore, this class can handle custom substitution matrices,
32
+ if the underlying program supports these.
33
+
34
+ MSA software that supports alignment of protein sequences and custom
35
+ substitution matrices, can be used to align exotic, normally
36
+ unsupported sequence types:
37
+ At first the exotic sequences are mapped into protein sequences and
38
+ the custom substitution matrix is converted into a protein sequence
39
+ substitution matrix.
40
+ Then the protein sequences are aligned and finally the protein
41
+ sequences are mapped back into the original sequence types.
42
+ The mapping does not work, when the alphabet of the exotic
43
+ sequences is larger than the amino acid alphabet.
44
+
45
+ Internally this creates a :class:`Popen` instance, which handles
46
+ the execution.
47
+
48
+ Parameters
49
+ ----------
50
+ sequences : iterable object of Sequence
51
+ The sequences to be aligned.
52
+ bin_path : str, optional
53
+ Path of the MSA software binary.
54
+ matrix : SubstitutionMatrix, optional
55
+ A custom substitution matrix.
56
+ """
57
+
58
+ def __init__(self, sequences, bin_path, matrix=None):
59
+ super().__init__(bin_path)
60
+
61
+ if len(sequences) < 2:
62
+ raise ValueError("At least two sequences are required")
63
+ # Check if all sequences share the same alphabet
64
+ alphabet = sequences[0].get_alphabet()
65
+ for seq in sequences:
66
+ if seq.get_alphabet() != alphabet:
67
+ raise ValueError("Alphabets of the sequences are not equal")
68
+ # Check matrix symmetry
69
+ if matrix is not None and not matrix.is_symmetric():
70
+ raise ValueError(
71
+ "A symmetric matrix is required for "
72
+ "multiple sequence alignments"
73
+ )
74
+
75
+
76
+ # Check whether the program supports the alignment for the given
77
+ # sequence type
78
+ if ProteinSequence.alphabet.extends(alphabet) \
79
+ and self.supports_protein():
80
+ self._is_mapped = False
81
+ self._seqtype = "protein"
82
+ if matrix is not None:
83
+ if not self.supports_custom_protein_matrix():
84
+ raise TypeError(
85
+ "The software does not support custom "
86
+ "substitution matrices for protein sequences"
87
+ )
88
+ self._matrix = matrix
89
+ else:
90
+ self._matrix = None
91
+
92
+ elif NucleotideSequence.alphabet_amb.extends(alphabet) \
93
+ and self.supports_nucleotide():
94
+ self._is_mapped = False
95
+ self._seqtype = "nucleotide"
96
+ if matrix is not None:
97
+ if not self.supports_custom_nucleotide_matrix():
98
+ raise TypeError(
99
+ "The software does not support custom "
100
+ "substitution matrices for nucleotide sequences"
101
+ )
102
+ self._matrix = matrix
103
+ else:
104
+ self._matrix = None
105
+
106
+ else:
107
+ # For all other sequence types, try to map the sequence into
108
+ # a protein sequence
109
+ if not self.supports_protein():
110
+ # Alignment of a custom sequence type requires mapping
111
+ # into a protein sequence
112
+ raise TypeError(
113
+ f"The software cannot align sequences of type "
114
+ f"{type(sequences[0]).__name__}: "
115
+ f"No support for alignment of the mapped sequences"
116
+ )
117
+ if not self.supports_custom_protein_matrix():
118
+ # Alignment of a custom sequence type requires a custom
119
+ # substitution matrix
120
+ raise TypeError(
121
+ f"The software cannot align sequences of type "
122
+ f"{type(sequences[0]).__name__}: "
123
+ f"No support for custom substitution matrices"
124
+ )
125
+ self._is_mapped = True
126
+ self._sequences = sequences
127
+ # Sequence masquerades as protein
128
+ self._seqtype = "protein"
129
+ self._mapped_sequences = [
130
+ map_sequence(sequence) for sequence in sequences
131
+ ]
132
+ self._matrix = map_matrix(matrix)
133
+
134
+
135
+ self._sequences = sequences
136
+ self._in_file = NamedTemporaryFile(
137
+ "w", suffix=".fa", delete=False
138
+ )
139
+ self._out_file = NamedTemporaryFile(
140
+ "r", suffix=".fa", delete=False
141
+ )
142
+ self._matrix_file = NamedTemporaryFile(
143
+ "w", suffix=".mat", delete=False
144
+ )
145
+
146
+ def run(self):
147
+ sequences = self._sequences if not self._is_mapped \
148
+ else self._mapped_sequences
149
+ sequences_file = FastaFile()
150
+ for i, seq in enumerate(sequences):
151
+ sequences_file[str(i)] = str(seq)
152
+ sequences_file.write(self._in_file)
153
+ self._in_file.flush()
154
+ if self._matrix is not None:
155
+ self._matrix_file.write(str(self._matrix))
156
+ self._matrix_file.flush()
157
+ super().run()
158
+
159
+ def evaluate(self):
160
+ super().evaluate()
161
+ alignment_file = FastaFile.read(self._out_file)
162
+ seq_dict = OrderedDict(alignment_file)
163
+ # Get alignment
164
+ out_seq_str = [None] * len(seq_dict)
165
+ for i in range(len(self._sequences)):
166
+ out_seq_str[i] = seq_dict[str(i)]
167
+ trace = Alignment.trace_from_strings(out_seq_str)
168
+ self._alignment = Alignment(self._sequences, trace, None)
169
+ # Also obtain original order
170
+ self._order = np.zeros(len(seq_dict), dtype=int)
171
+ for i, seq_index in enumerate(seq_dict):
172
+ self._order[i] = int(seq_index)
173
+
174
+ def clean_up(self):
175
+ super().clean_up()
176
+ cleanup_tempfile(self._in_file)
177
+ cleanup_tempfile(self._out_file)
178
+ cleanup_tempfile(self._matrix_file)
179
+
180
+ @requires_state(AppState.JOINED)
181
+ def get_alignment(self):
182
+ """
183
+ Get the resulting multiple sequence alignment.
184
+
185
+ Returns
186
+ -------
187
+ alignment : Alignment
188
+ The global multiple sequence alignment.
189
+ """
190
+ return self._alignment
191
+
192
+ @requires_state(AppState.JOINED)
193
+ def get_alignment_order(self):
194
+ """
195
+ Get the order of the resulting multiple sequence alignment.
196
+
197
+ Usually the order of sequences in the output file is
198
+ different from the input file, e.g. the sequences are ordered
199
+ according to the guide tree.
200
+ After running an MSA software, the output sequence order of
201
+ the alignment rearranged so that it is the same as the input
202
+ order.
203
+ This method returns the order of the sequences intended by the
204
+ MSA software.
205
+
206
+ Returns
207
+ -------
208
+ order : ndarray, dtype=int
209
+ The sequence order intended by the MSA software.
210
+
211
+ Examples
212
+ --------
213
+ Align sequences and restore the original order:
214
+
215
+ app = ClustalOmegaApp(sequences)
216
+ app.start()
217
+ app.join()
218
+ alignment = app.get_alignment()
219
+ order = app.get_alignment_order()
220
+ alignment = alignment[:, order]
221
+ """
222
+ return self._order
223
+
224
+ def get_input_file_path(self):
225
+ """
226
+ Get input file path (FASTA format).
227
+
228
+ PROTECTED: Do not call from outside.
229
+
230
+ Returns
231
+ -------
232
+ path : str
233
+ Path of input file.
234
+ """
235
+ return self._in_file.name
236
+
237
+ def get_output_file_path(self):
238
+ """
239
+ Get output file path (FASTA format).
240
+
241
+ PROTECTED: Do not call from outside.
242
+
243
+ Returns
244
+ -------
245
+ path : str
246
+ Path of output file.
247
+ """
248
+ return self._out_file.name
249
+
250
+ def get_matrix_file_path(self):
251
+ """
252
+ Get file path for custom substitution matrix.
253
+
254
+ PROTECTED: Do not call from outside.
255
+
256
+ Returns
257
+ -------
258
+ path : str or None
259
+ Path of substitution matrix.
260
+ None if no matrix was given.
261
+ """
262
+ return self._matrix_file.name if self._matrix is not None else None
263
+
264
+ def get_seqtype(self):
265
+ """
266
+ Get the type of aligned sequences.
267
+
268
+ When a custom sequence type (neither nucleotide nor protein)
269
+ is mapped onto a protein sequence, the return value is also
270
+ ``'protein'``.
271
+
272
+ PROTECTED: Do not call from outside.
273
+
274
+ Returns
275
+ -------
276
+ seqtype : {'nucleotide', 'protein'}
277
+ Type of sequences to be aligned.
278
+ """
279
+ return self._seqtype
280
+
281
+ @staticmethod
282
+ @abc.abstractmethod
283
+ def supports_nucleotide():
284
+ """
285
+ Check whether this class supports nucleotide sequences for
286
+ alignment.
287
+
288
+ Returns
289
+ -------
290
+ support : bool
291
+ True, if the class has support, false otherwise.
292
+
293
+ PROTECTED: Override when inheriting.
294
+ """
295
+ pass
296
+
297
+ @staticmethod
298
+ @abc.abstractmethod
299
+ def supports_protein():
300
+ """
301
+ Check whether this class supports nucleotide sequences for
302
+ alignment.
303
+
304
+ Returns
305
+ -------
306
+ support : bool
307
+ True, if the class has support, false otherwise.
308
+
309
+ PROTECTED: Override when inheriting.
310
+ """
311
+ pass
312
+
313
+ @staticmethod
314
+ @abc.abstractmethod
315
+ def supports_custom_nucleotide_matrix():
316
+ """
317
+ Check whether this class supports custom substitution matrices
318
+ for protein sequence alignment.
319
+
320
+ Returns
321
+ -------
322
+ support : bool
323
+ True, if the class has support, false otherwise.
324
+
325
+ PROTECTED: Override when inheriting.
326
+ """
327
+ pass
328
+
329
+ @staticmethod
330
+ @abc.abstractmethod
331
+ def supports_custom_protein_matrix():
332
+ """
333
+ Check whether this class supports custom substitution matrices
334
+ for nucleotide sequence alignment.
335
+
336
+ Returns
337
+ -------
338
+ support : bool
339
+ True, if the class has support, false otherwise.
340
+
341
+ PROTECTED: Override when inheriting.
342
+ """
343
+ pass
344
+
345
+ @classmethod
346
+ def align(cls, sequences, bin_path=None, matrix=None):
347
+ """
348
+ Perform a multiple sequence alignment.
349
+
350
+ This is a convenience function, that wraps the :class:`MSAApp`
351
+ execution.
352
+
353
+ Parameters
354
+ ----------
355
+ sequences : iterable object of Sequence
356
+ The sequences to be aligned
357
+ bin_path : str, optional
358
+ Path of the MSA software binary. By default, the default
359
+ path will be used.
360
+ matrix : SubstitutionMatrix, optional
361
+ A custom substitution matrix.
362
+
363
+ Returns
364
+ -------
365
+ alignment : Alignment
366
+ The global multiple sequence alignment.
367
+ """
368
+ if bin_path is None:
369
+ app = cls(sequences, matrix=matrix)
370
+ else:
371
+ app = cls(sequences, bin_path, matrix=matrix)
372
+ app.start()
373
+ app.join()
374
+ return app.get_alignment()
@@ -0,0 +1,13 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for multiple sequence alignments using MUSCLE.
7
+ """
8
+
9
+ __name__ = "biotite.application.muscle"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app3 import *
13
+ from .app5 import *
@@ -0,0 +1,254 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.muscle"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["MuscleApp"]
8
+
9
+ import re
10
+ import numbers
11
+ import warnings
12
+ import subprocess
13
+ from tempfile import NamedTemporaryFile
14
+ from ..localapp import cleanup_tempfile
15
+ from ..msaapp import MSAApp
16
+ from ..application import AppState, VersionError, requires_state
17
+ from ...sequence.sequence import Sequence
18
+ from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
19
+ from ...sequence.align.matrix import SubstitutionMatrix
20
+ from ...sequence.align.alignment import Alignment
21
+ from ...sequence.phylo.tree import Tree
22
+
23
+
24
+ class MuscleApp(MSAApp):
25
+ """
26
+ Perform a multiple sequence alignment using MUSCLE version 3.
27
+
28
+ Parameters
29
+ ----------
30
+ sequences : list of Sequence
31
+ The sequences to be aligned.
32
+ bin_path : str, optional
33
+ Path of the MUSCLE binary.
34
+ matrix : SubstitutionMatrix, optional
35
+ A custom substitution matrix.
36
+
37
+ See also
38
+ --------
39
+ Muscle5App
40
+
41
+ Examples
42
+ --------
43
+
44
+ >>> seq1 = ProteinSequence("BIQTITE")
45
+ >>> seq2 = ProteinSequence("TITANITE")
46
+ >>> seq3 = ProteinSequence("BISMITE")
47
+ >>> seq4 = ProteinSequence("IQLITE")
48
+ >>> app = MuscleApp([seq1, seq2, seq3, seq4])
49
+ >>> app.start()
50
+ >>> app.join()
51
+ >>> alignment = app.get_alignment()
52
+ >>> print(alignment)
53
+ BIQT-ITE
54
+ TITANITE
55
+ BISM-ITE
56
+ -IQL-ITE
57
+ """
58
+
59
+ def __init__(self, sequences, bin_path="muscle", matrix=None):
60
+ major_version = get_version(bin_path)[0]
61
+ if major_version != 3:
62
+ raise VersionError(
63
+ f"Muscle 3 is required, got version {major_version}"
64
+ )
65
+
66
+ super().__init__(sequences, bin_path, matrix)
67
+ self._gap_open = None
68
+ self._gap_ext = None
69
+ self._terminal_penalty = None
70
+ self._tree1 = None
71
+ self._tree2 = None
72
+ self._out_tree1_file = NamedTemporaryFile(
73
+ "r", suffix=".tree", delete=False
74
+ )
75
+ self._out_tree2_file = NamedTemporaryFile(
76
+ "r", suffix=".tree", delete=False
77
+ )
78
+
79
+ def run(self):
80
+ args = [
81
+ "-quiet",
82
+ "-in", self.get_input_file_path(),
83
+ "-out", self.get_output_file_path(),
84
+ "-tree1", self._out_tree1_file.name,
85
+ "-tree2", self._out_tree2_file.name,
86
+ ]
87
+ if self.get_seqtype() == "protein":
88
+ args += ["-seqtype", "protein"]
89
+ else:
90
+ args += ["-seqtype", "dna"]
91
+ if self.get_matrix_file_path() is not None:
92
+ args += ["-matrix", self.get_matrix_file_path()]
93
+ if self._gap_open is not None and self._gap_ext is not None:
94
+ args += ["-gapopen", f"{self._gap_open:.1f}"]
95
+ args += ["-gapextend", f"{self._gap_ext:.1f}"]
96
+ # When the gap penalty is set,
97
+ # use the penalty also for hydrophobic regions
98
+ args += ["-hydrofactor", "1.0"]
99
+ # Use the recommendation of the documentation
100
+ args += ["-center", "0.0"]
101
+ self.set_arguments(args)
102
+ super().run()
103
+
104
+ def evaluate(self):
105
+ super().evaluate()
106
+
107
+ newick = self._out_tree1_file.read().replace("\n", "")
108
+ if len(newick) > 0:
109
+ self._tree1 = Tree.from_newick(newick)
110
+ else:
111
+ warnings.warn(
112
+ "MUSCLE did not write a tree file from the first iteration"
113
+ )
114
+
115
+ newick = self._out_tree2_file.read().replace("\n", "")
116
+ if len(newick) > 0:
117
+ self._tree2 = Tree.from_newick(newick)
118
+ else:
119
+ warnings.warn(
120
+ "MUSCLE did not write a tree file from the second iteration"
121
+ )
122
+
123
+ def clean_up(self):
124
+ super().clean_up()
125
+ cleanup_tempfile(self._out_tree1_file)
126
+ cleanup_tempfile(self._out_tree2_file)
127
+
128
+ @requires_state(AppState.CREATED)
129
+ def set_gap_penalty(self, gap_penalty):
130
+ """
131
+ Set the gap penalty for the alignment.
132
+
133
+ Parameters
134
+ ----------
135
+ gap_penalty : float or (tuple, dtype=int)
136
+ If a float is provided, the value will be interpreted as
137
+ general gap penalty.
138
+ If a tuple is provided, an affine gap penalty is used.
139
+ The first value in the tuple is the gap opening penalty,
140
+ the second value is the gap extension penalty.
141
+ The values need to be negative.
142
+ """
143
+ # Check if gap penalty is general or affine
144
+ if isinstance(gap_penalty, numbers.Real):
145
+ if gap_penalty > 0:
146
+ raise ValueError("Gap penalty must be negative")
147
+ self._gap_open = gap_penalty
148
+ self._gap_ext= gap_penalty
149
+ elif type(gap_penalty) == tuple:
150
+ if gap_penalty[0] > 0 or gap_penalty[1] > 0:
151
+ raise ValueError("Gap penalty must be negative")
152
+ self._gap_open = gap_penalty[0]
153
+ self._gap_ext = gap_penalty[1]
154
+ else:
155
+ raise TypeError("Gap penalty must be either float or tuple")
156
+
157
+ @requires_state(AppState.JOINED)
158
+ def get_guide_tree(self, iteration="identity"):
159
+ """
160
+ Get the guide tree created for the progressive alignment.
161
+
162
+ Parameters
163
+ ----------
164
+ iteration : {'kmer', 'identity'}
165
+ If 'kmer', the first iteration tree is returned.
166
+ This tree uses the sequences common *k*-mers as distance
167
+ measure.
168
+ If 'identity' the second iteration tree is returned.
169
+ This tree uses distances based on the pairwise sequence
170
+ identity after the first progressive alignment iteration.
171
+
172
+ Returns
173
+ -------
174
+ tree : Tree
175
+ The guide tree.
176
+ """
177
+ if iteration == "kmer":
178
+ return self._tree1
179
+ elif iteration == "identity":
180
+ return self._tree2
181
+ else:
182
+ raise ValueError("Iteration must be 'kmer' or 'identity'")
183
+
184
+ @staticmethod
185
+ def supports_nucleotide():
186
+ return True
187
+
188
+ @staticmethod
189
+ def supports_protein():
190
+ return True
191
+
192
+ @staticmethod
193
+ def supports_custom_nucleotide_matrix():
194
+ return False
195
+
196
+ @staticmethod
197
+ def supports_custom_protein_matrix():
198
+ return True
199
+
200
+ @classmethod
201
+ def align(cls, sequences, bin_path=None, matrix=None,
202
+ gap_penalty=None):
203
+ """
204
+ Perform a multiple sequence alignment.
205
+
206
+ This is a convenience function, that wraps the :class:`MuscleApp`
207
+ execution.
208
+
209
+ Parameters
210
+ ----------
211
+ sequences : iterable object of Sequence
212
+ The sequences to be aligned
213
+ bin_path : str, optional
214
+ Path of the MSA software binary. By default, the default path
215
+ will be used.
216
+ matrix : SubstitutionMatrix, optional
217
+ A custom substitution matrix.
218
+ gap_penalty : float or (tuple, dtype=int), optional
219
+ If a float is provided, the value will be interpreted as
220
+ general gap penalty.
221
+ If a tuple is provided, an affine gap penalty is used.
222
+ The first value in the tuple is the gap opening penalty,
223
+ the second value is the gap extension penalty.
224
+ The values need to be negative.
225
+
226
+ Returns
227
+ -------
228
+ alignment : Alignment
229
+ The global multiple sequence alignment.
230
+ """
231
+ if bin_path is None:
232
+ app = cls(sequences, matrix=matrix)
233
+ else:
234
+ app = cls(sequences, bin_path, matrix=matrix)
235
+ if gap_penalty is not None:
236
+ app.set_gap_penalty(gap_penalty)
237
+ app.start()
238
+ app.join()
239
+ return app.get_alignment()
240
+
241
+
242
+ def get_version(bin_path="muscle"):
243
+ output = subprocess.run(
244
+ [bin_path, "-version"], capture_output=True, text=True
245
+ )
246
+ # Find matches for version string containing major and minor version
247
+ match = re.search("\d+\.\d+", output.stdout)
248
+ if match is None:
249
+ raise subprocess.SubprocessError(
250
+ "Could not determine Muscle version"
251
+ )
252
+ version_string = match.group(0)
253
+ splitted = version_string.split(".")
254
+ return int(splitted[0]), int(splitted[1])