biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,763 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ import numbers
9
+ import textwrap
10
+ from collections.abc import Sequence
11
+ import numpy as np
12
+
13
+ __all__ = [
14
+ "Alignment",
15
+ "get_codes",
16
+ "get_symbols",
17
+ "get_sequence_identity",
18
+ "get_pairwise_sequence_identity",
19
+ "score",
20
+ "find_terminal_gaps",
21
+ "remove_terminal_gaps",
22
+ "remove_gaps",
23
+ ]
24
+
25
+
26
+ class Alignment(object):
27
+ """
28
+ An :class:`Alignment` object stores information about which symbols
29
+ of *n* sequences are aligned to each other and it stores the
30
+ corresponding alignment score.
31
+
32
+ Instead of saving a list of aligned symbols, this class saves the
33
+ original *n* sequences, that were aligned, and a so called *trace*,
34
+ which indicate the aligned symbols of these sequences.
35
+ The trace is a *(m x n)* :class:`ndarray` with alignment length
36
+ *m* and sequence count *n*.
37
+ Each element of the trace is the index in the corresponding
38
+ sequence.
39
+ A gap is represented by the value -1.
40
+
41
+ Furthermore this class provides multiple utility functions for
42
+ conversion into strings in order to make the alignment human
43
+ readable.
44
+
45
+ Unless an :class:`Alignment` object is the result of a multiple
46
+ sequence alignment, the object will contain only two sequences.
47
+
48
+ All attributes of this class are publicly accessible.
49
+
50
+ Parameters
51
+ ----------
52
+ sequences : list
53
+ A list of aligned sequences.
54
+ trace : ndarray, dtype=int, shape=(n,m)
55
+ The alignment trace.
56
+ score : int, optional
57
+ Alignment score.
58
+
59
+ Attributes
60
+ ----------
61
+ sequences : list
62
+ A list of aligned sequences.
63
+ trace : ndarray, dtype=int, shape=(n,m)
64
+ The alignment trace.
65
+ score : int
66
+ Alignment score.
67
+
68
+ Examples
69
+ --------
70
+
71
+ >>> seq1 = NucleotideSequence("CGTCAT")
72
+ >>> seq2 = NucleotideSequence("TCATGC")
73
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
74
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
75
+ >>> print(ali)
76
+ CGTCAT--
77
+ --TCATGC
78
+ >>> print(ali.trace)
79
+ [[ 0 -1]
80
+ [ 1 -1]
81
+ [ 2 0]
82
+ [ 3 1]
83
+ [ 4 2]
84
+ [ 5 3]
85
+ [-1 4]
86
+ [-1 5]]
87
+ >>> print(ali[1:4].trace)
88
+ [[ 1 -1]
89
+ [ 2 0]
90
+ [ 3 1]]
91
+ >>> print(ali[1:4, 0:1].trace)
92
+ [[1]
93
+ [2]
94
+ [3]]
95
+ """
96
+
97
+ def __init__(self, sequences, trace, score=None):
98
+ self.sequences = sequences.copy()
99
+ self.trace = trace
100
+ self.score = score
101
+
102
+ @staticmethod
103
+ def from_strings(sequence_strings, sequence_factory, gap_character="-"):
104
+ """
105
+ Create an :class:`Alignment` from strings that represent aligned sequences.
106
+
107
+ **DEPRECATED**: Use :meth:`Alignment.from_strings()` instead.
108
+
109
+ Parameters
110
+ ----------
111
+ sequence_strings : list of str
112
+ The strings, where each each one represents a sequence
113
+ (with gaps) in an alignment.
114
+ All strings must have the same length.
115
+ sequence_factory : Callable (str -> Sequence)
116
+ Callable that takes a sequence string (with gaps already removed) and
117
+ produces a :class:`Sequence` object.
118
+ gap_character : str, optional
119
+ This character is interpreted as gap.
120
+
121
+ Returns
122
+ -------
123
+ alignment : Alignment
124
+ The created alignment.
125
+
126
+ Examples
127
+ --------
128
+
129
+ >>> alignment = Alignment.from_strings(
130
+ ... [
131
+ ... "BIQTITE",
132
+ ... "-IQLITE"
133
+ ... ],
134
+ ... ProteinSequence,
135
+ ... )
136
+ >>> print(alignment)
137
+ BIQTITE
138
+ -IQLITE
139
+ >>> print(alignment.sequences[0])
140
+ BIQTITE
141
+ >>> print(alignment.sequences[1])
142
+ IQLITE
143
+ """
144
+ sequences = [
145
+ sequence_factory(seq_str.replace(gap_character, ""))
146
+ for seq_str in sequence_strings
147
+ ]
148
+ return Alignment(
149
+ sequences, Alignment.trace_from_strings(sequence_strings, gap_character)
150
+ )
151
+
152
+ @staticmethod
153
+ def trace_from_strings(sequence_strings, gap_character="-"):
154
+ """
155
+ Create a trace from strings that represent aligned sequences.
156
+
157
+ Parameters
158
+ ----------
159
+ sequence_strings : list of str
160
+ The strings, where each each one represents a sequence
161
+ (with gaps) in an alignment.
162
+ gap_character : str, optional
163
+ This character is interpreted as gap.
164
+
165
+ Returns
166
+ -------
167
+ trace : ndarray, dtype=int, shape=(n,2)
168
+ The created trace.
169
+
170
+ See Also
171
+ --------
172
+ from_strings:
173
+ Creates directly an :class:`Alignment` object.
174
+ """
175
+ if len(sequence_strings) < 2:
176
+ raise ValueError("An alignment must contain at least two sequences")
177
+ if any(
178
+ len(seq_str) != len(sequence_strings[0]) for seq_str in sequence_strings
179
+ ):
180
+ raise IndexError("All sequence strings must have the same length")
181
+
182
+ # Start with a trace filled with gaps (-1)
183
+ trace = np.full(
184
+ (len(sequence_strings[0]), len(sequence_strings)), -1, dtype=int
185
+ )
186
+ for i, seq_str in enumerate(sequence_strings):
187
+ # Convert into NumPy byte array to use vectorized operations
188
+ byte_array = np.frombuffer(seq_str.encode("ASCII"), dtype=np.ubyte)
189
+ # Fill the trace with the positions of each sequence where there is no gap
190
+ not_gap_mask = byte_array != ord(gap_character)
191
+ trace[not_gap_mask, i] = np.arange(np.count_nonzero(not_gap_mask))
192
+
193
+ return trace
194
+
195
+ def __repr__(self):
196
+ """Represent Alignment a string for debugging."""
197
+ return (
198
+ f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], "
199
+ f"np.{np.array_repr(self.trace)}, score={self.score})"
200
+ )
201
+
202
+ def _gapped_str(self, seq_index):
203
+ seq_str = ""
204
+ for i in range(len(self.trace)):
205
+ j = self.trace[i][seq_index]
206
+ if j != -1:
207
+ seq_str += str(self.sequences[seq_index][j])
208
+ else:
209
+ seq_str += "-"
210
+ return seq_str
211
+
212
+ def get_gapped_sequences(self):
213
+ """
214
+ Get a the string representation of the gapped sequences.
215
+
216
+ Returns
217
+ -------
218
+ sequences : list of str
219
+ The list of gapped sequence strings. The order is the same
220
+ as in `Alignment.sequences`.
221
+ """
222
+ return [self._gapped_str(i) for i in range(len(self.sequences))]
223
+
224
+ def __str__(self):
225
+ # Check if any of the sequences
226
+ # has an non-single letter alphabet
227
+ all_single_letter = True
228
+ for seq in self.sequences:
229
+ if not _is_single_letter(seq.alphabet):
230
+ all_single_letter = False
231
+ if all_single_letter:
232
+ # First dimension: sequence number,
233
+ # second dimension: line number
234
+ seq_str_lines_list = []
235
+ wrapper = textwrap.TextWrapper(break_on_hyphens=False)
236
+ for i in range(len(self.sequences)):
237
+ seq_str_lines_list.append(wrapper.wrap(self._gapped_str(i)))
238
+ ali_str = ""
239
+ for row_i in range(len(seq_str_lines_list[0])):
240
+ for seq_j in range(len(seq_str_lines_list)):
241
+ ali_str += seq_str_lines_list[seq_j][row_i] + "\n"
242
+ ali_str += "\n"
243
+ # Remove final line breaks
244
+ return ali_str[:-2]
245
+ else:
246
+ return super().__str__()
247
+
248
+ def __getitem__(self, index):
249
+ if isinstance(index, tuple):
250
+ if len(index) > 2:
251
+ raise IndexError("Only 1D or 2D indices are allowed")
252
+ if isinstance(index[0], numbers.Integral) or isinstance(
253
+ index[0], numbers.Integral
254
+ ):
255
+ raise IndexError(
256
+ "Integers are invalid indices for alignments, "
257
+ "a single sequence or alignment column cannot be "
258
+ "selected"
259
+ )
260
+ return Alignment(
261
+ Alignment._index_sequences(self.sequences, index[1]),
262
+ self.trace[index],
263
+ self.score,
264
+ )
265
+ else:
266
+ return Alignment(self.sequences, self.trace[index], self.score)
267
+
268
+ def __iter__(self):
269
+ raise TypeError("'Alignment' object is not iterable")
270
+
271
+ def __len__(self):
272
+ return len(self.trace)
273
+
274
+ def __eq__(self, item):
275
+ if not isinstance(item, Alignment):
276
+ return False
277
+ if self.sequences != item.sequences:
278
+ return False
279
+ if not np.array_equal(self.trace, item.trace):
280
+ return False
281
+ if self.score != item.score:
282
+ return False
283
+ return True
284
+
285
+ @staticmethod
286
+ def _index_sequences(sequences, index):
287
+ if isinstance(index, (list, tuple)) or (
288
+ isinstance(index, np.ndarray) and index.dtype != bool
289
+ ):
290
+ return [sequences[i] for i in index]
291
+ elif isinstance(index, np.ndarray) and index.dtype == bool:
292
+ return [seq for seq, mask in zip(sequences, index) if mask]
293
+ if isinstance(index, slice):
294
+ return sequences[index]
295
+ else:
296
+ raise IndexError(f"Invalid alignment index type '{type(index).__name__}'")
297
+
298
+
299
+ def get_codes(alignment):
300
+ """
301
+ Get the sequence codes of the sequences in the alignment.
302
+
303
+ The codes are built from the trace:
304
+ Instead of the indices of the aligned symbols (trace), the return
305
+ value contains the corresponding symbol codes for each index.
306
+ Gaps are still represented by *-1*.
307
+
308
+ Parameters
309
+ ----------
310
+ alignment : Alignment
311
+ The alignment to get the sequence codes for.
312
+
313
+ Returns
314
+ -------
315
+ codes : ndarray, dtype=int, shape=(n,m)
316
+ The sequence codes for the alignment.
317
+ The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
318
+ The array uses *-1* values for gaps.
319
+
320
+ Examples
321
+ --------
322
+
323
+ >>> seq1 = NucleotideSequence("CGTCAT")
324
+ >>> seq2 = NucleotideSequence("TCATGC")
325
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
326
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
327
+ >>> print(ali)
328
+ CGTCAT--
329
+ --TCATGC
330
+ >>> print(get_codes(ali))
331
+ [[ 1 2 3 1 0 3 -1 -1]
332
+ [-1 -1 3 1 0 3 2 1]]
333
+ """
334
+ trace = alignment.trace
335
+ sequences = alignment.sequences
336
+
337
+ # The number of sequences is the first dimension
338
+ codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
339
+ for i in range(len(sequences)):
340
+ # Mark -1 explicitly as int64 to avoid that the unsigned dtype
341
+ # of the sequence code is used
342
+ # (https://numpy.org/neps/nep-0050-scalar-promotion.html)
343
+ codes[i] = np.where(
344
+ trace[:, i] != -1, sequences[i].code[trace[:, i]], np.int64(-1)
345
+ )
346
+
347
+ return np.stack(codes)
348
+
349
+
350
+ def get_symbols(alignment):
351
+ """
352
+ Similar to :func:`get_codes()`, but contains the decoded symbols
353
+ instead of codes.
354
+ Gaps are still represented by *None* values.
355
+
356
+ Parameters
357
+ ----------
358
+ alignment : Alignment
359
+ The alignment to get the symbols for.
360
+
361
+ Returns
362
+ -------
363
+ symbols : list of list
364
+ The nested list of symbols.
365
+
366
+ See Also
367
+ --------
368
+ get_codes : Get the sequence codes of the sequences in the alignment.
369
+
370
+ Examples
371
+ --------
372
+
373
+ >>> seq1 = NucleotideSequence("CGTCAT")
374
+ >>> seq2 = NucleotideSequence("TCATGC")
375
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
376
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
377
+ >>> print(ali)
378
+ CGTCAT--
379
+ --TCATGC
380
+ >>> print(get_symbols(ali))
381
+ [['C', 'G', 'T', 'C', 'A', 'T', None, None], [None, None, 'T', 'C', 'A', 'T', 'G', 'C']]
382
+ """
383
+ codes = get_codes(alignment)
384
+ symbols = [None] * codes.shape[0]
385
+ for i in range(codes.shape[0]):
386
+ alphabet = alignment.sequences[i].get_alphabet()
387
+ codes_wo_gaps = codes[i, codes[i] != -1]
388
+ symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
389
+ if isinstance(symbols_wo_gaps, np.ndarray):
390
+ symbols_wo_gaps = symbols_wo_gaps.tolist()
391
+ symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
392
+ symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
393
+ symbols[i] = symbols_for_seq.tolist()
394
+ return symbols
395
+
396
+
397
+ def get_sequence_identity(alignment, mode="not_terminal"):
398
+ """
399
+ Calculate the sequence identity for an alignment.
400
+
401
+ The identity is equal to the matches divided by a measure for the
402
+ length of the alignment that depends on the `mode` parameter.
403
+
404
+ Parameters
405
+ ----------
406
+ alignment : Alignment
407
+ The alignment to calculate the identity for.
408
+ mode : {'all', 'not_terminal', 'shortest'}, optional
409
+ The calculation mode for alignment length.
410
+
411
+ - **all** - The number of matches divided by the number of
412
+ all alignment columns.
413
+ - **not_terminal** - The number of matches divided by the
414
+ number of alignment columns that are not terminal gaps in
415
+ any of the sequences.
416
+ - **shortest** - The number of matches divided by the
417
+ length of the shortest sequence.
418
+
419
+ Default is *not_terminal*.
420
+
421
+ Returns
422
+ -------
423
+ identity : float
424
+ The sequence identity, ranging between 0 and 1.
425
+
426
+ See Also
427
+ --------
428
+ get_pairwise_sequence_identity : Get sequence identity for each pair of alignment rows.
429
+ """
430
+ codes = get_codes(alignment)
431
+
432
+ # Count matches
433
+ matches = 0
434
+ for i in range(codes.shape[1]):
435
+ column = codes[:, i]
436
+ # One unique value -> all symbols match
437
+ unique_symbols = np.unique(column)
438
+ if len(unique_symbols) == 1 and unique_symbols[0] != -1:
439
+ matches += 1
440
+
441
+ # Calculate length
442
+ if mode == "all":
443
+ length = len(alignment)
444
+ elif mode == "not_terminal":
445
+ start, stop = find_terminal_gaps(alignment)
446
+ if stop <= start:
447
+ raise ValueError(
448
+ "Cannot calculate non-terminal identity, "
449
+ "at least two sequences have no overlap"
450
+ )
451
+ length = stop - start
452
+ elif mode == "shortest":
453
+ length = min([len(seq) for seq in alignment.sequences])
454
+ else:
455
+ raise ValueError(f"'{mode}' is an invalid calculation mode")
456
+
457
+ return matches / length
458
+
459
+
460
+ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
461
+ """
462
+ Calculate the pairwise sequence identity for an alignment.
463
+
464
+ The identity is equal to the matches divided by a measure for the
465
+ length of the alignment that depends on the `mode` parameter.
466
+
467
+ Parameters
468
+ ----------
469
+ alignment : Alignment, length=n
470
+ The alignment to calculate the pairwise sequence identity for.
471
+ mode : {'all', 'not_terminal', 'shortest'}, optional
472
+ The calculation mode for alignment length.
473
+
474
+ - **all** - The number of matches divided by the number of
475
+ all alignment columns.
476
+ - **not_terminal** - The number of matches divided by the
477
+ number of alignment columns that are not terminal gaps in
478
+ any of the two considered sequences.
479
+ - **shortest** - The number of matches divided by the
480
+ length of the shortest one of the two sequences.
481
+
482
+ Default is *not_terminal*.
483
+
484
+ Returns
485
+ -------
486
+ identity : ndarray, dtype=float, shape=(n,n)
487
+ The pairwise sequence identity, ranging between 0 and 1.
488
+
489
+ See Also
490
+ --------
491
+ get_sequence_identity : Get sequence identity over all alignment rows.
492
+ """
493
+ codes = get_codes(alignment)
494
+ n_seq = len(codes)
495
+
496
+ # Count matches
497
+ # Calculate at which positions the sequences are identical
498
+ # and are not gaps
499
+ equality_matrix = (
500
+ (codes[:, np.newaxis, :] == codes[np.newaxis, :, :])
501
+ & (codes[:, np.newaxis, :] != -1)
502
+ & (codes[np.newaxis, :, :] != -1)
503
+ )
504
+ # Sum these positions up
505
+ matches = np.count_nonzero(equality_matrix, axis=-1)
506
+
507
+ # Calculate length
508
+ if mode == "all":
509
+ length = len(alignment)
510
+ elif mode == "not_terminal":
511
+ length = np.zeros((n_seq, n_seq))
512
+ for i in range(n_seq):
513
+ for j in range(n_seq):
514
+ # Find latest start and earliest stop of all sequences
515
+ start, stop = find_terminal_gaps(alignment[:, [i, j]])
516
+ if stop <= start:
517
+ raise ValueError(
518
+ "Cannot calculate non-terminal identity, "
519
+ "as the two sequences have no overlap"
520
+ )
521
+ length[i, j] = stop - start
522
+ elif mode == "shortest":
523
+ length = np.zeros((n_seq, n_seq))
524
+ for i in range(n_seq):
525
+ for j in range(n_seq):
526
+ length[i, j] = min(
527
+ [len(alignment.sequences[i]), len(alignment.sequences[j])]
528
+ )
529
+ else:
530
+ raise ValueError(f"'{mode}' is an invalid calculation mode")
531
+
532
+ return matches / length
533
+
534
+
535
+ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
536
+ """
537
+ Calculate the similarity score of an alignment.
538
+
539
+ If the alignment contains more than two sequences,
540
+ all pairwise scores are counted.
541
+
542
+ Parameters
543
+ ----------
544
+ alignment : Alignment
545
+ The alignment to calculate the identity for.
546
+ matrix : SubstitutionMatrix
547
+ The substitution matrix used for scoring.
548
+ gap_penalty : int or (tuple, dtype=int), optional
549
+ If an integer is provided, the value will be interpreted as
550
+ general gap penalty. If a tuple is provided, an affine gap
551
+ penalty is used. The first integer in the tuple is the gap
552
+ opening penalty, the second integer is the gap extension
553
+ penalty.
554
+ The values need to be negative.
555
+ terminal_penalty : bool, optional
556
+ If true, gap penalties are applied to terminal gaps.
557
+
558
+ Returns
559
+ -------
560
+ score : int
561
+ The similarity score.
562
+ """
563
+ codes = get_codes(alignment)
564
+ matrix = matrix.score_matrix()
565
+
566
+ # Sum similarity scores (without gaps)
567
+ score = 0
568
+ # Iterate over all positions
569
+ for pos in range(codes.shape[1]):
570
+ column = codes[:, pos]
571
+ # Iterate over all possible pairs
572
+ # Do not count self-similarity
573
+ # and do not count similarity twice (not S(i,j) and S(j,i))
574
+ for i in range(codes.shape[0]):
575
+ for j in range(i + 1, codes.shape[0]):
576
+ code_i = column[i]
577
+ code_j = column[j]
578
+ # Ignore gaps
579
+ if code_i != -1 and code_j != -1:
580
+ score += matrix[code_i, code_j]
581
+
582
+ # Sum gap penalties
583
+ if isinstance(gap_penalty, numbers.Real):
584
+ gap_open = gap_penalty
585
+ gap_ext = gap_penalty
586
+ elif isinstance(gap_penalty, Sequence):
587
+ gap_open = gap_penalty[0]
588
+ gap_ext = gap_penalty[1]
589
+ else:
590
+ raise TypeError("Gap penalty must be either integer or tuple")
591
+ # Iterate over all sequences
592
+ for seq_code in codes:
593
+ in_gap = False
594
+ if terminal_penalty:
595
+ start_index = 0
596
+ stop_index = len(seq_code)
597
+ else:
598
+ # Find a start and stop index excluding terminal gaps
599
+ start_index, stop_index = find_terminal_gaps(alignment)
600
+ for i in range(start_index, stop_index):
601
+ if seq_code[i] == -1:
602
+ if in_gap:
603
+ score += gap_ext
604
+ else:
605
+ score += gap_open
606
+ in_gap = True
607
+ else:
608
+ in_gap = False
609
+ return score
610
+
611
+
612
+ def find_terminal_gaps(alignment):
613
+ """
614
+ Find the slice indices that would remove terminal gaps from an
615
+ alignment.
616
+
617
+ Terminal gaps are gaps that appear before all sequences start and
618
+ after any sequence ends.
619
+
620
+ Parameters
621
+ ----------
622
+ alignment : Alignment
623
+ The alignment, where the slice indices should be found in.
624
+
625
+ Returns
626
+ -------
627
+ start, stop : int
628
+ Indices that point to the start and exclusive stop of the
629
+ alignment columns without terminal gaps.
630
+ When these indices are used as slice index for an alignment or
631
+ trace, the index would remove terminal gaps.
632
+
633
+ See Also
634
+ --------
635
+ remove_terminal_gaps : Remove terminal gap columns directly.
636
+
637
+ Examples
638
+ --------
639
+
640
+ >>> sequences = [
641
+ ... NucleotideSequence(seq_string) for seq_string in (
642
+ ... "AAAAACTGATTC",
643
+ ... "AAACTGTTCA",
644
+ ... "CTGATTCAAA"
645
+ ... )
646
+ ... ]
647
+ >>> trace = np.transpose([
648
+ ... ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1),
649
+ ... (-1, -1, 0, 1, 2, 3, 4, 5, -1, 6, 7, 8, 9, -1, -1),
650
+ ... (-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
651
+ ... ])
652
+ >>> alignment = Alignment(sequences, trace)
653
+ >>> print(alignment)
654
+ AAAAACTGATTC---
655
+ --AAACTG-TTCA--
656
+ -----CTGATTCAAA
657
+ >>> print(find_terminal_gaps(alignment))
658
+ (5, 12)
659
+ """
660
+ trace = alignment.trace
661
+ # Find for each sequence the positions of non-gap symbols
662
+ no_gap_pos = [np.where(trace[:, i] != -1)[0] for i in range(trace.shape[1])]
663
+ # Find for each sequence the positions of the sequence start and end
664
+ # in the alignment
665
+ firsts = [no_gap_pos[i][0] for i in range(trace.shape[1])]
666
+ lasts = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
667
+ # The terminal gaps are before all sequences start and after any
668
+ # sequence ends
669
+ # Use exclusive stop -> -1
670
+ return np.max(firsts).item(), np.min(lasts).item() + 1
671
+
672
+
673
+ def remove_terminal_gaps(alignment):
674
+ """
675
+ Remove terminal gaps from an alignment.
676
+
677
+ Terminal gaps are gaps that appear before all sequences start and
678
+ after any sequence ends.
679
+
680
+ Parameters
681
+ ----------
682
+ alignment : Alignment
683
+ The alignment, where the terminal gaps should be removed from.
684
+
685
+ Returns
686
+ -------
687
+ truncated_alignment : Alignment
688
+ A shallow copy of the input `alignment` with an truncated trace,
689
+ that does not contain alignment columns with terminal gaps.
690
+
691
+ See Also
692
+ --------
693
+ find_terminal_gaps : Only find terminal gap columns.
694
+
695
+ Examples
696
+ --------
697
+
698
+ >>> sequences = [
699
+ ... NucleotideSequence(seq_string) for seq_string in (
700
+ ... "AAAAACTGATTC",
701
+ ... "AAACTGTTCA",
702
+ ... "CTGATTCAAA"
703
+ ... )
704
+ ... ]
705
+ >>> trace = np.transpose([
706
+ ... ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1),
707
+ ... (-1, -1, 0, 1, 2, 3, 4, 5, -1, 6, 7, 8, 9, -1, -1),
708
+ ... (-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
709
+ ... ])
710
+ >>> alignment = Alignment(sequences, trace)
711
+ >>> print(alignment)
712
+ AAAAACTGATTC---
713
+ --AAACTG-TTCA--
714
+ -----CTGATTCAAA
715
+ >>> truncated_alignment = remove_terminal_gaps(alignment)
716
+ >>> print(truncated_alignment)
717
+ CTGATTC
718
+ CTG-TTC
719
+ CTGATTC
720
+ """
721
+ start, stop = find_terminal_gaps(alignment)
722
+ if stop < start:
723
+ raise ValueError(
724
+ "Cannot remove terminal gaps, since at least two sequences have "
725
+ "no overlap and the resulting alignment would be empty"
726
+ )
727
+ return alignment[start:stop]
728
+
729
+
730
+ def remove_gaps(alignment):
731
+ """
732
+ Remove all gap columns from an alignment.
733
+
734
+ Parameters
735
+ ----------
736
+ alignment : Alignment
737
+ The alignment to be modified.
738
+
739
+ Returns
740
+ -------
741
+ truncated_alignment : Alignment
742
+ The alignment without gap columns.
743
+
744
+ See Also
745
+ --------
746
+ remove_terminal_gaps : Remove only terminal gap columns.
747
+ """
748
+ non_gap_mask = (alignment.trace != -1).all(axis=1)
749
+ return alignment[non_gap_mask]
750
+
751
+
752
+ def _is_single_letter(alphabet):
753
+ """
754
+ More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
755
+ It is sufficient that only only the string representation of each symbol is only
756
+ a single character.
757
+ """
758
+ if alphabet.is_letter_alphabet():
759
+ return True
760
+ for symbol in alphabet:
761
+ if len(str(symbol)) != 1:
762
+ return False
763
+ return True