biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,363 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["MSAApp"]
8
+
9
+ import abc
10
+ from collections import OrderedDict
11
+ from tempfile import NamedTemporaryFile
12
+ import numpy as np
13
+ from biotite.application.application import AppState, requires_state
14
+ from biotite.application.localapp import LocalApp, cleanup_tempfile
15
+ from biotite.application.util import map_matrix, map_sequence
16
+ from biotite.sequence.align.alignment import Alignment
17
+ from biotite.sequence.io.fasta.file import FastaFile
18
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
19
+
20
+
21
+ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
22
+ """
23
+ This is an abstract base class for multiple sequence alignment
24
+ software.
25
+
26
+ It handles conversion of :class:`Sequence` objects to FASTA input
27
+ and FASTA output to an :class:`Alignment` object.
28
+ Inheriting subclasses only need to incorporate the file path
29
+ of these FASTA files into the program arguments.
30
+
31
+ Furthermore, this class can handle custom substitution matrices,
32
+ if the underlying program supports these.
33
+
34
+ MSA software that supports alignment of protein sequences and custom
35
+ substitution matrices, can be used to align exotic, normally
36
+ unsupported sequence types:
37
+ At first the exotic sequences are mapped into protein sequences and
38
+ the custom substitution matrix is converted into a protein sequence
39
+ substitution matrix.
40
+ Then the protein sequences are aligned and finally the protein
41
+ sequences are mapped back into the original sequence types.
42
+ The mapping does not work, when the alphabet of the exotic
43
+ sequences is larger than the amino acid alphabet.
44
+
45
+ Internally this creates a :class:`Popen` instance, which handles
46
+ the execution.
47
+
48
+ Parameters
49
+ ----------
50
+ sequences : iterable object of Sequence
51
+ The sequences to be aligned.
52
+ bin_path : str, optional
53
+ Path of the MSA software binary.
54
+ matrix : SubstitutionMatrix, optional
55
+ A custom substitution matrix.
56
+ """
57
+
58
+ def __init__(self, sequences, bin_path, matrix=None):
59
+ super().__init__(bin_path)
60
+
61
+ if len(sequences) < 2:
62
+ raise ValueError("At least two sequences are required")
63
+ # Check if all sequences share the same alphabet
64
+ alphabet = sequences[0].get_alphabet()
65
+ for seq in sequences:
66
+ if seq.get_alphabet() != alphabet:
67
+ raise ValueError("Alphabets of the sequences are not equal")
68
+ # Check matrix symmetry
69
+ if matrix is not None and not matrix.is_symmetric():
70
+ raise ValueError(
71
+ "A symmetric matrix is required for multiple sequence alignments"
72
+ )
73
+
74
+ # Check whether the program supports the alignment for the given
75
+ # sequence type
76
+ if ProteinSequence.alphabet.extends(alphabet) and self.supports_protein():
77
+ self._is_mapped = False
78
+ self._seqtype = "protein"
79
+ if matrix is not None:
80
+ if not self.supports_custom_protein_matrix():
81
+ raise TypeError(
82
+ "The software does not support custom "
83
+ "substitution matrices for protein sequences"
84
+ )
85
+ self._matrix = matrix
86
+ else:
87
+ self._matrix = None
88
+
89
+ elif (
90
+ NucleotideSequence.alphabet_amb.extends(alphabet)
91
+ and self.supports_nucleotide()
92
+ ):
93
+ self._is_mapped = False
94
+ self._seqtype = "nucleotide"
95
+ if matrix is not None:
96
+ if not self.supports_custom_nucleotide_matrix():
97
+ raise TypeError(
98
+ "The software does not support custom "
99
+ "substitution matrices for nucleotide sequences"
100
+ )
101
+ self._matrix = matrix
102
+ else:
103
+ self._matrix = None
104
+
105
+ else:
106
+ # For all other sequence types, try to map the sequence into
107
+ # a protein sequence
108
+ if not self.supports_protein():
109
+ # Alignment of a custom sequence type requires mapping
110
+ # into a protein sequence
111
+ raise TypeError(
112
+ f"The software cannot align sequences of type "
113
+ f"{type(sequences[0]).__name__}: "
114
+ f"No support for alignment of the mapped sequences"
115
+ )
116
+ if not self.supports_custom_protein_matrix():
117
+ # Alignment of a custom sequence type requires a custom
118
+ # substitution matrix
119
+ raise TypeError(
120
+ f"The software cannot align sequences of type "
121
+ f"{type(sequences[0]).__name__}: "
122
+ f"No support for custom substitution matrices"
123
+ )
124
+ self._is_mapped = True
125
+ self._sequences = sequences
126
+ # Sequence masquerades as protein
127
+ self._seqtype = "protein"
128
+ self._mapped_sequences = [map_sequence(sequence) for sequence in sequences]
129
+ self._matrix = map_matrix(matrix)
130
+
131
+ self._sequences = sequences
132
+ self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
133
+ self._out_file = NamedTemporaryFile("r", suffix=".fa", delete=False)
134
+ self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
135
+
136
+ def run(self):
137
+ sequences = self._sequences if not self._is_mapped else self._mapped_sequences
138
+ sequences_file = FastaFile()
139
+ for i, seq in enumerate(sequences):
140
+ sequences_file[str(i)] = str(seq)
141
+ sequences_file.write(self._in_file)
142
+ self._in_file.flush()
143
+ if self._matrix is not None:
144
+ self._matrix_file.write(str(self._matrix))
145
+ self._matrix_file.flush()
146
+ super().run()
147
+
148
+ def evaluate(self):
149
+ super().evaluate()
150
+ alignment_file = FastaFile.read(self._out_file)
151
+ seq_dict = OrderedDict(alignment_file)
152
+ # Get alignment
153
+ out_seq_str = [None] * len(seq_dict)
154
+ for i in range(len(self._sequences)):
155
+ out_seq_str[i] = seq_dict[str(i)]
156
+ trace = Alignment.trace_from_strings(out_seq_str)
157
+ self._alignment = Alignment(self._sequences, trace, None)
158
+ # Also obtain original order
159
+ self._order = np.zeros(len(seq_dict), dtype=int)
160
+ for i, seq_index in enumerate(seq_dict):
161
+ self._order[i] = int(seq_index)
162
+
163
+ def clean_up(self):
164
+ super().clean_up()
165
+ cleanup_tempfile(self._in_file)
166
+ cleanup_tempfile(self._out_file)
167
+ cleanup_tempfile(self._matrix_file)
168
+
169
+ @requires_state(AppState.JOINED)
170
+ def get_alignment(self):
171
+ """
172
+ Get the resulting multiple sequence alignment.
173
+
174
+ Returns
175
+ -------
176
+ alignment : Alignment
177
+ The global multiple sequence alignment.
178
+ """
179
+ return self._alignment
180
+
181
+ @requires_state(AppState.JOINED)
182
+ def get_alignment_order(self):
183
+ """
184
+ Get the order of the resulting multiple sequence alignment.
185
+
186
+ Usually the order of sequences in the output file is
187
+ different from the input file, e.g. the sequences are ordered
188
+ according to the guide tree.
189
+ After running an MSA software, the output sequence order of
190
+ the alignment rearranged so that it is the same as the input
191
+ order.
192
+ This method returns the order of the sequences intended by the
193
+ MSA software.
194
+
195
+ Returns
196
+ -------
197
+ order : ndarray, dtype=int
198
+ The sequence order intended by the MSA software.
199
+
200
+ Examples
201
+ --------
202
+ Align sequences and restore the original order:
203
+
204
+ app = ClustalOmegaApp(sequences)
205
+ app.start()
206
+ app.join()
207
+ alignment = app.get_alignment()
208
+ order = app.get_alignment_order()
209
+ alignment = alignment[:, order]
210
+ """
211
+ return self._order
212
+
213
+ def get_input_file_path(self):
214
+ """
215
+ Get input file path (FASTA format).
216
+
217
+ PROTECTED: Do not call from outside.
218
+
219
+ Returns
220
+ -------
221
+ path : str
222
+ Path of input file.
223
+ """
224
+ return self._in_file.name
225
+
226
+ def get_output_file_path(self):
227
+ """
228
+ Get output file path (FASTA format).
229
+
230
+ PROTECTED: Do not call from outside.
231
+
232
+ Returns
233
+ -------
234
+ path : str
235
+ Path of output file.
236
+ """
237
+ return self._out_file.name
238
+
239
+ def get_matrix_file_path(self):
240
+ """
241
+ Get file path for custom substitution matrix.
242
+
243
+ PROTECTED: Do not call from outside.
244
+
245
+ Returns
246
+ -------
247
+ path : str or None
248
+ Path of substitution matrix.
249
+ None if no matrix was given.
250
+ """
251
+ return self._matrix_file.name if self._matrix is not None else None
252
+
253
+ def get_seqtype(self):
254
+ """
255
+ Get the type of aligned sequences.
256
+
257
+ When a custom sequence type (neither nucleotide nor protein)
258
+ is mapped onto a protein sequence, the return value is also
259
+ ``'protein'``.
260
+
261
+ PROTECTED: Do not call from outside.
262
+
263
+ Returns
264
+ -------
265
+ seqtype : {'nucleotide', 'protein'}
266
+ Type of sequences to be aligned.
267
+ """
268
+ return self._seqtype
269
+
270
+ @staticmethod
271
+ @abc.abstractmethod
272
+ def supports_nucleotide():
273
+ """
274
+ Check whether this class supports nucleotide sequences for
275
+ alignment.
276
+
277
+ PROTECTED: Override when inheriting.
278
+
279
+ Returns
280
+ -------
281
+ support : bool
282
+ True, if the class has support, false otherwise.
283
+ """
284
+ pass
285
+
286
+ @staticmethod
287
+ @abc.abstractmethod
288
+ def supports_protein():
289
+ """
290
+ Check whether this class supports nucleotide sequences for
291
+ alignment.
292
+
293
+ PROTECTED: Override when inheriting.
294
+
295
+ Returns
296
+ -------
297
+ support : bool
298
+ True, if the class has support, false otherwise.
299
+ """
300
+ pass
301
+
302
+ @staticmethod
303
+ @abc.abstractmethod
304
+ def supports_custom_nucleotide_matrix():
305
+ """
306
+ Check whether this class supports custom substitution matrices
307
+ for protein sequence alignment.
308
+
309
+ PROTECTED: Override when inheriting.
310
+
311
+ Returns
312
+ -------
313
+ support : bool
314
+ True, if the class has support, false otherwise.
315
+ """
316
+ pass
317
+
318
+ @staticmethod
319
+ @abc.abstractmethod
320
+ def supports_custom_protein_matrix():
321
+ """
322
+ Check whether this class supports custom substitution matrices
323
+ for nucleotide sequence alignment.
324
+
325
+ PROTECTED: Override when inheriting.
326
+
327
+ Returns
328
+ -------
329
+ support : bool
330
+ True, if the class has support, false otherwise.
331
+ """
332
+ pass
333
+
334
+ @classmethod
335
+ def align(cls, sequences, bin_path=None, matrix=None):
336
+ """
337
+ Perform a multiple sequence alignment.
338
+
339
+ This is a convenience function, that wraps the :class:`MSAApp`
340
+ execution.
341
+
342
+ Parameters
343
+ ----------
344
+ sequences : iterable object of Sequence
345
+ The sequences to be aligned.
346
+ bin_path : str, optional
347
+ Path of the MSA software binary. By default, the default
348
+ path will be used.
349
+ matrix : SubstitutionMatrix, optional
350
+ A custom substitution matrix.
351
+
352
+ Returns
353
+ -------
354
+ alignment : Alignment
355
+ The global multiple sequence alignment.
356
+ """
357
+ if bin_path is None:
358
+ app = cls(sequences, matrix=matrix)
359
+ else:
360
+ app = cls(sequences, bin_path, matrix=matrix)
361
+ app.start()
362
+ app.join()
363
+ return app.get_alignment()
@@ -0,0 +1,13 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for multiple sequence alignments using MUSCLE.
7
+ """
8
+
9
+ __name__ = "biotite.application.muscle"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app3 import *
13
+ from .app5 import *
@@ -0,0 +1,227 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.muscle"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["MuscleApp"]
8
+
9
+ import numbers
10
+ import warnings
11
+ from collections.abc import Sequence
12
+ from tempfile import NamedTemporaryFile
13
+ from biotite.application.application import AppState, VersionError, requires_state
14
+ from biotite.application.localapp import cleanup_tempfile, get_version
15
+ from biotite.application.msaapp import MSAApp
16
+ from biotite.sequence.phylo.tree import Tree
17
+
18
+
19
+ class MuscleApp(MSAApp):
20
+ """
21
+ Perform a multiple sequence alignment using MUSCLE version 3.
22
+
23
+ Parameters
24
+ ----------
25
+ sequences : list of Sequence
26
+ The sequences to be aligned.
27
+ bin_path : str, optional
28
+ Path of the MUSCLE binary.
29
+ matrix : SubstitutionMatrix, optional
30
+ A custom substitution matrix.
31
+
32
+ See Also
33
+ --------
34
+ Muscle5App : Interface to MUSCLE version ``>=5``.
35
+
36
+ Examples
37
+ --------
38
+
39
+ >>> seq1 = ProteinSequence("BIQTITE")
40
+ >>> seq2 = ProteinSequence("TITANITE")
41
+ >>> seq3 = ProteinSequence("BISMITE")
42
+ >>> seq4 = ProteinSequence("IQLITE")
43
+ >>> app = MuscleApp([seq1, seq2, seq3, seq4])
44
+ >>> app.start()
45
+ >>> app.join()
46
+ >>> alignment = app.get_alignment()
47
+ >>> print(alignment)
48
+ BIQT-ITE
49
+ TITANITE
50
+ BISM-ITE
51
+ -IQL-ITE
52
+ """
53
+
54
+ def __init__(self, sequences, bin_path="muscle", matrix=None):
55
+ major_version = get_version(bin_path, "-version")[0]
56
+ if major_version != 3:
57
+ raise VersionError(f"Muscle 3 is required, got version {major_version}")
58
+
59
+ super().__init__(sequences, bin_path, matrix)
60
+ self._gap_open = None
61
+ self._gap_ext = None
62
+ self._terminal_penalty = None
63
+ self._tree1 = None
64
+ self._tree2 = None
65
+ self._out_tree1_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
66
+ self._out_tree2_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
67
+
68
+ def run(self):
69
+ args = [
70
+ "-quiet",
71
+ "-in",
72
+ self.get_input_file_path(),
73
+ "-out",
74
+ self.get_output_file_path(),
75
+ "-tree1",
76
+ self._out_tree1_file.name,
77
+ "-tree2",
78
+ self._out_tree2_file.name,
79
+ ]
80
+ if self.get_seqtype() == "protein":
81
+ args += ["-seqtype", "protein"]
82
+ else:
83
+ args += ["-seqtype", "dna"]
84
+ if self.get_matrix_file_path() is not None:
85
+ args += ["-matrix", self.get_matrix_file_path()]
86
+ if self._gap_open is not None and self._gap_ext is not None:
87
+ args += ["-gapopen", f"{self._gap_open:.1f}"]
88
+ args += ["-gapextend", f"{self._gap_ext:.1f}"]
89
+ # When the gap penalty is set,
90
+ # use the penalty also for hydrophobic regions
91
+ args += ["-hydrofactor", "1.0"]
92
+ # Use the recommendation of the documentation
93
+ args += ["-center", "0.0"]
94
+ self.set_arguments(args)
95
+ super().run()
96
+
97
+ def evaluate(self):
98
+ super().evaluate()
99
+
100
+ newick = self._out_tree1_file.read().replace("\n", "")
101
+ if len(newick) > 0:
102
+ self._tree1 = Tree.from_newick(newick)
103
+ else:
104
+ warnings.warn("MUSCLE did not write a tree file from the first iteration")
105
+
106
+ newick = self._out_tree2_file.read().replace("\n", "")
107
+ if len(newick) > 0:
108
+ self._tree2 = Tree.from_newick(newick)
109
+ else:
110
+ warnings.warn("MUSCLE did not write a tree file from the second iteration")
111
+
112
+ def clean_up(self):
113
+ super().clean_up()
114
+ cleanup_tempfile(self._out_tree1_file)
115
+ cleanup_tempfile(self._out_tree2_file)
116
+
117
+ @requires_state(AppState.CREATED)
118
+ def set_gap_penalty(self, gap_penalty):
119
+ """
120
+ Set the gap penalty for the alignment.
121
+
122
+ Parameters
123
+ ----------
124
+ gap_penalty : float or (tuple, dtype=int)
125
+ If a float is provided, the value will be interpreted as
126
+ general gap penalty.
127
+ If a tuple is provided, an affine gap penalty is used.
128
+ The first value in the tuple is the gap opening penalty,
129
+ the second value is the gap extension penalty.
130
+ The values need to be negative.
131
+ """
132
+ # Check if gap penalty is general or affine
133
+ if isinstance(gap_penalty, numbers.Real):
134
+ if gap_penalty > 0:
135
+ raise ValueError("Gap penalty must be negative")
136
+ self._gap_open = gap_penalty
137
+ self._gap_ext = gap_penalty
138
+ elif isinstance(gap_penalty, Sequence):
139
+ if gap_penalty[0] > 0 or gap_penalty[1] > 0:
140
+ raise ValueError("Gap penalty must be negative")
141
+ self._gap_open = gap_penalty[0]
142
+ self._gap_ext = gap_penalty[1]
143
+ else:
144
+ raise TypeError("Gap penalty must be either float or tuple")
145
+
146
+ @requires_state(AppState.JOINED)
147
+ def get_guide_tree(self, iteration="identity"):
148
+ """
149
+ Get the guide tree created for the progressive alignment.
150
+
151
+ Parameters
152
+ ----------
153
+ iteration : {'kmer', 'identity'}
154
+ If 'kmer', the first iteration tree is returned.
155
+ This tree uses the sequences common *k*-mers as distance
156
+ measure.
157
+ If 'identity' the second iteration tree is returned.
158
+ This tree uses distances based on the pairwise sequence
159
+ identity after the first progressive alignment iteration.
160
+
161
+ Returns
162
+ -------
163
+ tree : Tree
164
+ The guide tree.
165
+ """
166
+ if iteration == "kmer":
167
+ return self._tree1
168
+ elif iteration == "identity":
169
+ return self._tree2
170
+ else:
171
+ raise ValueError("Iteration must be 'kmer' or 'identity'")
172
+
173
+ @staticmethod
174
+ def supports_nucleotide():
175
+ return True
176
+
177
+ @staticmethod
178
+ def supports_protein():
179
+ return True
180
+
181
+ @staticmethod
182
+ def supports_custom_nucleotide_matrix():
183
+ return False
184
+
185
+ @staticmethod
186
+ def supports_custom_protein_matrix():
187
+ return True
188
+
189
+ @classmethod
190
+ def align(cls, sequences, bin_path=None, matrix=None, gap_penalty=None):
191
+ """
192
+ Perform a multiple sequence alignment.
193
+
194
+ This is a convenience function, that wraps the :class:`MuscleApp`
195
+ execution.
196
+
197
+ Parameters
198
+ ----------
199
+ sequences : iterable object of Sequence
200
+ The sequences to be aligned.
201
+ bin_path : str, optional
202
+ Path of the MSA software binary. By default, the default path
203
+ will be used.
204
+ matrix : SubstitutionMatrix, optional
205
+ A custom substitution matrix.
206
+ gap_penalty : float or (tuple, dtype=int), optional
207
+ If a float is provided, the value will be interpreted as
208
+ general gap penalty.
209
+ If a tuple is provided, an affine gap penalty is used.
210
+ The first value in the tuple is the gap opening penalty,
211
+ the second value is the gap extension penalty.
212
+ The values need to be negative.
213
+
214
+ Returns
215
+ -------
216
+ alignment : Alignment
217
+ The global multiple sequence alignment.
218
+ """
219
+ if bin_path is None:
220
+ app = cls(sequences, matrix=matrix)
221
+ else:
222
+ app = cls(sequences, bin_path, matrix=matrix)
223
+ if gap_penalty is not None:
224
+ app.set_gap_penalty(gap_penalty)
225
+ app.start()
226
+ app.join()
227
+ return app.get_alignment()