biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,702 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ import numbers
9
+ import textwrap
10
+ from collections.abc import Sequence
11
+ import numpy as np
12
+
13
+ __all__ = [
14
+ "Alignment",
15
+ "get_codes",
16
+ "get_symbols",
17
+ "get_sequence_identity",
18
+ "get_pairwise_sequence_identity",
19
+ "score",
20
+ "find_terminal_gaps",
21
+ "remove_terminal_gaps",
22
+ "remove_gaps",
23
+ ]
24
+
25
+
26
+ class Alignment(object):
27
+ """
28
+ An :class:`Alignment` object stores information about which symbols
29
+ of *n* sequences are aligned to each other and it stores the
30
+ corresponding alignment score.
31
+
32
+ Instead of saving a list of aligned symbols, this class saves the
33
+ original *n* sequences, that were aligned, and a so called *trace*,
34
+ which indicate the aligned symbols of these sequences.
35
+ The trace is a *(m x n)* :class:`ndarray` with alignment length
36
+ *m* and sequence count *n*.
37
+ Each element of the trace is the index in the corresponding
38
+ sequence.
39
+ A gap is represented by the value -1.
40
+
41
+ Furthermore this class provides multiple utility functions for
42
+ conversion into strings in order to make the alignment human
43
+ readable.
44
+
45
+ Unless an :class:`Alignment` object is the result of an multiple
46
+ sequence alignment, the object will contain only two sequences.
47
+
48
+ All attributes of this class are publicly accessible.
49
+
50
+ Parameters
51
+ ----------
52
+ sequences : list
53
+ A list of aligned sequences.
54
+ trace : ndarray, dtype=int, shape=(n,m)
55
+ The alignment trace.
56
+ score : int, optional
57
+ Alignment score.
58
+
59
+ Attributes
60
+ ----------
61
+ sequences : list
62
+ A list of aligned sequences.
63
+ trace : ndarray, dtype=int, shape=(n,m)
64
+ The alignment trace.
65
+ score : int
66
+ Alignment score.
67
+
68
+ Examples
69
+ --------
70
+
71
+ >>> seq1 = NucleotideSequence("CGTCAT")
72
+ >>> seq2 = NucleotideSequence("TCATGC")
73
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
74
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
75
+ >>> print(ali)
76
+ CGTCAT--
77
+ --TCATGC
78
+ >>> print(ali.trace)
79
+ [[ 0 -1]
80
+ [ 1 -1]
81
+ [ 2 0]
82
+ [ 3 1]
83
+ [ 4 2]
84
+ [ 5 3]
85
+ [-1 4]
86
+ [-1 5]]
87
+ >>> print(ali[1:4].trace)
88
+ [[ 1 -1]
89
+ [ 2 0]
90
+ [ 3 1]]
91
+ >>> print(ali[1:4, 0:1].trace)
92
+ [[1]
93
+ [2]
94
+ [3]]
95
+ """
96
+
97
+ def __init__(self, sequences, trace, score=None):
98
+ self.sequences = sequences.copy()
99
+ self.trace = trace
100
+ self.score = score
101
+
102
+ def __repr__(self):
103
+ """Represent Alignment a string for debugging."""
104
+ return (
105
+ f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], "
106
+ f"np.{np.array_repr(self.trace)}, score={self.score})"
107
+ )
108
+
109
+ def _gapped_str(self, seq_index):
110
+ seq_str = ""
111
+ for i in range(len(self.trace)):
112
+ j = self.trace[i][seq_index]
113
+ if j != -1:
114
+ seq_str += str(self.sequences[seq_index][j])
115
+ else:
116
+ seq_str += "-"
117
+ return seq_str
118
+
119
+ def get_gapped_sequences(self):
120
+ """
121
+ Get a the string representation of the gapped sequences.
122
+
123
+ Returns
124
+ -------
125
+ sequences : list of str
126
+ The list of gapped sequence strings. The order is the same
127
+ as in `Alignment.sequences`.
128
+ """
129
+ return [self._gapped_str(i) for i in range(len(self.sequences))]
130
+
131
+ def __str__(self):
132
+ # Check if any of the sequences
133
+ # has an non-single letter alphabet
134
+ all_single_letter = True
135
+ for seq in self.sequences:
136
+ if not _is_single_letter(seq.alphabet):
137
+ all_single_letter = False
138
+ if all_single_letter:
139
+ # First dimension: sequence number,
140
+ # second dimension: line number
141
+ seq_str_lines_list = []
142
+ wrapper = textwrap.TextWrapper(break_on_hyphens=False)
143
+ for i in range(len(self.sequences)):
144
+ seq_str_lines_list.append(wrapper.wrap(self._gapped_str(i)))
145
+ ali_str = ""
146
+ for row_i in range(len(seq_str_lines_list[0])):
147
+ for seq_j in range(len(seq_str_lines_list)):
148
+ ali_str += seq_str_lines_list[seq_j][row_i] + "\n"
149
+ ali_str += "\n"
150
+ # Remove final line breaks
151
+ return ali_str[:-2]
152
+ else:
153
+ return super().__str__()
154
+
155
+ def __getitem__(self, index):
156
+ if isinstance(index, tuple):
157
+ if len(index) > 2:
158
+ raise IndexError("Only 1D or 2D indices are allowed")
159
+ if isinstance(index[0], numbers.Integral) or isinstance(
160
+ index[0], numbers.Integral
161
+ ):
162
+ raise IndexError(
163
+ "Integers are invalid indices for alignments, "
164
+ "a single sequence or alignment column cannot be "
165
+ "selected"
166
+ )
167
+ return Alignment(
168
+ Alignment._index_sequences(self.sequences, index[1]),
169
+ self.trace[index],
170
+ self.score,
171
+ )
172
+ else:
173
+ return Alignment(self.sequences, self.trace[index], self.score)
174
+
175
+ def __iter__(self):
176
+ raise TypeError("'Alignment' object is not iterable")
177
+
178
+ def __len__(self):
179
+ return len(self.trace)
180
+
181
+ def __eq__(self, item):
182
+ if not isinstance(item, Alignment):
183
+ return False
184
+ if self.sequences != item.sequences:
185
+ return False
186
+ if not np.array_equal(self.trace, item.trace):
187
+ return False
188
+ if self.score != item.score:
189
+ return False
190
+ return True
191
+
192
+ @staticmethod
193
+ def _index_sequences(sequences, index):
194
+ if isinstance(index, (list, tuple)) or (
195
+ isinstance(index, np.ndarray) and index.dtype != bool
196
+ ):
197
+ return [sequences[i] for i in index]
198
+ elif isinstance(index, np.ndarray) and index.dtype == bool:
199
+ return [seq for seq, mask in zip(sequences, index) if mask]
200
+ if isinstance(index, slice):
201
+ return sequences[index]
202
+ else:
203
+ raise IndexError(f"Invalid alignment index type '{type(index).__name__}'")
204
+
205
+ @staticmethod
206
+ def trace_from_strings(seq_str_list):
207
+ """
208
+ Create a trace from strings that represent aligned sequences.
209
+
210
+ Parameters
211
+ ----------
212
+ seq_str_list : list of str
213
+ The strings, where each each one represents a sequence
214
+ (with gaps) in an alignment.
215
+ A ``-`` is interpreted as gap.
216
+
217
+ Returns
218
+ -------
219
+ trace : ndarray, dtype=int, shape=(n,2)
220
+ The created trace.
221
+ """
222
+ if len(seq_str_list) < 2:
223
+ raise ValueError("An alignment must contain at least two sequences")
224
+ seq_i = np.zeros(len(seq_str_list))
225
+ trace = np.full((len(seq_str_list[0]), len(seq_str_list)), -1, dtype=int)
226
+ # Get length of string (same length for all strings)
227
+ # rather than length of list
228
+ for pos_i in range(len(seq_str_list[0])):
229
+ for str_j in range(len(seq_str_list)):
230
+ if seq_str_list[str_j][pos_i] == "-":
231
+ trace[pos_i, str_j] = -1
232
+ else:
233
+ trace[pos_i, str_j] = seq_i[str_j]
234
+ seq_i[str_j] += 1
235
+ return trace
236
+
237
+
238
+ def get_codes(alignment):
239
+ """
240
+ Get the sequence codes of the sequences in the alignment.
241
+
242
+ The codes are built from the trace:
243
+ Instead of the indices of the aligned symbols (trace), the return
244
+ value contains the corresponding symbol codes for each index.
245
+ Gaps are still represented by *-1*.
246
+
247
+ Parameters
248
+ ----------
249
+ alignment : Alignment
250
+ The alignment to get the sequence codes for.
251
+
252
+ Returns
253
+ -------
254
+ codes : ndarray, dtype=int, shape=(n,m)
255
+ The sequence codes for the alignment.
256
+ The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
257
+ The array uses *-1* values for gaps.
258
+
259
+ Examples
260
+ --------
261
+
262
+ >>> seq1 = NucleotideSequence("CGTCAT")
263
+ >>> seq2 = NucleotideSequence("TCATGC")
264
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
265
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
266
+ >>> print(ali)
267
+ CGTCAT--
268
+ --TCATGC
269
+ >>> print(get_codes(ali))
270
+ [[ 1 2 3 1 0 3 -1 -1]
271
+ [-1 -1 3 1 0 3 2 1]]
272
+ """
273
+ trace = alignment.trace
274
+ sequences = alignment.sequences
275
+
276
+ # The number of sequences is the first dimension
277
+ codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
278
+ for i in range(len(sequences)):
279
+ # Mark -1 explicitly as int64 to avoid that the unsigned dtype
280
+ # of the sequence code is used
281
+ # (https://numpy.org/neps/nep-0050-scalar-promotion.html)
282
+ codes[i] = np.where(
283
+ trace[:, i] != -1, sequences[i].code[trace[:, i]], np.int64(-1)
284
+ )
285
+
286
+ return np.stack(codes)
287
+
288
+
289
+ def get_symbols(alignment):
290
+ """
291
+ Similar to :func:`get_codes()`, but contains the decoded symbols
292
+ instead of codes.
293
+ Gaps are still represented by *None* values.
294
+
295
+ Parameters
296
+ ----------
297
+ alignment : Alignment
298
+ The alignment to get the symbols for.
299
+
300
+ Returns
301
+ -------
302
+ symbols : list of list
303
+ The nested list of symbols.
304
+
305
+ See Also
306
+ --------
307
+ get_codes : Get the sequence codes of the sequences in the alignment.
308
+
309
+ Examples
310
+ --------
311
+
312
+ >>> seq1 = NucleotideSequence("CGTCAT")
313
+ >>> seq2 = NucleotideSequence("TCATGC")
314
+ >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
315
+ >>> ali = align_optimal(seq1, seq2, matrix)[0]
316
+ >>> print(ali)
317
+ CGTCAT--
318
+ --TCATGC
319
+ >>> print(get_symbols(ali))
320
+ [['C', 'G', 'T', 'C', 'A', 'T', None, None], [None, None, 'T', 'C', 'A', 'T', 'G', 'C']]
321
+ """
322
+ codes = get_codes(alignment)
323
+ symbols = [None] * codes.shape[0]
324
+ for i in range(codes.shape[0]):
325
+ alphabet = alignment.sequences[i].get_alphabet()
326
+ codes_wo_gaps = codes[i, codes[i] != -1]
327
+ symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
328
+ if isinstance(symbols_wo_gaps, np.ndarray):
329
+ symbols_wo_gaps = symbols_wo_gaps.tolist()
330
+ symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
331
+ symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
332
+ symbols[i] = symbols_for_seq.tolist()
333
+ return symbols
334
+
335
+
336
+ def get_sequence_identity(alignment, mode="not_terminal"):
337
+ """
338
+ Calculate the sequence identity for an alignment.
339
+
340
+ The identity is equal to the matches divided by a measure for the
341
+ length of the alignment that depends on the `mode` parameter.
342
+
343
+ Parameters
344
+ ----------
345
+ alignment : Alignment
346
+ The alignment to calculate the identity for.
347
+ mode : {'all', 'not_terminal', 'shortest'}, optional
348
+ The calculation mode for alignment length.
349
+
350
+ - **all** - The number of matches divided by the number of
351
+ all alignment columns.
352
+ - **not_terminal** - The number of matches divided by the
353
+ number of alignment columns that are not terminal gaps in
354
+ any of the sequences.
355
+ - **shortest** - The number of matches divided by the
356
+ length of the shortest sequence.
357
+
358
+ Default is *not_terminal*.
359
+
360
+ Returns
361
+ -------
362
+ identity : float
363
+ The sequence identity, ranging between 0 and 1.
364
+
365
+ See Also
366
+ --------
367
+ get_pairwise_sequence_identity : Get sequence identity for each pair of alignment rows.
368
+ """
369
+ codes = get_codes(alignment)
370
+
371
+ # Count matches
372
+ matches = 0
373
+ for i in range(codes.shape[1]):
374
+ column = codes[:, i]
375
+ # One unique value -> all symbols match
376
+ unique_symbols = np.unique(column)
377
+ if len(unique_symbols) == 1 and unique_symbols[0] != -1:
378
+ matches += 1
379
+
380
+ # Calculate length
381
+ if mode == "all":
382
+ length = len(alignment)
383
+ elif mode == "not_terminal":
384
+ start, stop = find_terminal_gaps(alignment)
385
+ if stop <= start:
386
+ raise ValueError(
387
+ "Cannot calculate non-terminal identity, "
388
+ "at least two sequences have no overlap"
389
+ )
390
+ length = stop - start
391
+ elif mode == "shortest":
392
+ length = min([len(seq) for seq in alignment.sequences])
393
+ else:
394
+ raise ValueError(f"'{mode}' is an invalid calculation mode")
395
+
396
+ return matches / length
397
+
398
+
399
+ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
400
+ """
401
+ Calculate the pairwise sequence identity for an alignment.
402
+
403
+ The identity is equal to the matches divided by a measure for the
404
+ length of the alignment that depends on the `mode` parameter.
405
+
406
+ Parameters
407
+ ----------
408
+ alignment : Alignment, length=n
409
+ The alignment to calculate the pairwise sequence identity for.
410
+ mode : {'all', 'not_terminal', 'shortest'}, optional
411
+ The calculation mode for alignment length.
412
+
413
+ - **all** - The number of matches divided by the number of
414
+ all alignment columns.
415
+ - **not_terminal** - The number of matches divided by the
416
+ number of alignment columns that are not terminal gaps in
417
+ any of the two considered sequences.
418
+ - **shortest** - The number of matches divided by the
419
+ length of the shortest one of the two sequences.
420
+
421
+ Default is *not_terminal*.
422
+
423
+ Returns
424
+ -------
425
+ identity : ndarray, dtype=float, shape=(n,n)
426
+ The pairwise sequence identity, ranging between 0 and 1.
427
+
428
+ See Also
429
+ --------
430
+ get_sequence_identity : Get sequence identity over all alignment rows.
431
+ """
432
+ codes = get_codes(alignment)
433
+ n_seq = len(codes)
434
+
435
+ # Count matches
436
+ # Calculate at which positions the sequences are identical
437
+ # and are not gaps
438
+ equality_matrix = (
439
+ (codes[:, np.newaxis, :] == codes[np.newaxis, :, :])
440
+ & (codes[:, np.newaxis, :] != -1)
441
+ & (codes[np.newaxis, :, :] != -1)
442
+ )
443
+ # Sum these positions up
444
+ matches = np.count_nonzero(equality_matrix, axis=-1)
445
+
446
+ # Calculate length
447
+ if mode == "all":
448
+ length = len(alignment)
449
+ elif mode == "not_terminal":
450
+ length = np.zeros((n_seq, n_seq))
451
+ for i in range(n_seq):
452
+ for j in range(n_seq):
453
+ # Find latest start and earliest stop of all sequences
454
+ start, stop = find_terminal_gaps(alignment[:, [i, j]])
455
+ if stop <= start:
456
+ raise ValueError(
457
+ "Cannot calculate non-terminal identity, "
458
+ "as the two sequences have no overlap"
459
+ )
460
+ length[i, j] = stop - start
461
+ elif mode == "shortest":
462
+ length = np.zeros((n_seq, n_seq))
463
+ for i in range(n_seq):
464
+ for j in range(n_seq):
465
+ length[i, j] = min(
466
+ [len(alignment.sequences[i]), len(alignment.sequences[j])]
467
+ )
468
+ else:
469
+ raise ValueError(f"'{mode}' is an invalid calculation mode")
470
+
471
+ return matches / length
472
+
473
+
474
+ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
475
+ """
476
+ Calculate the similarity score of an alignment.
477
+
478
+ If the alignment contains more than two sequences,
479
+ all pairwise scores are counted.
480
+
481
+ Parameters
482
+ ----------
483
+ alignment : Alignment
484
+ The alignment to calculate the identity for.
485
+ matrix : SubstitutionMatrix
486
+ The substitution matrix used for scoring.
487
+ gap_penalty : int or (tuple, dtype=int), optional
488
+ If an integer is provided, the value will be interpreted as
489
+ general gap penalty. If a tuple is provided, an affine gap
490
+ penalty is used. The first integer in the tuple is the gap
491
+ opening penalty, the second integer is the gap extension
492
+ penalty.
493
+ The values need to be negative.
494
+ terminal_penalty : bool, optional
495
+ If true, gap penalties are applied to terminal gaps.
496
+
497
+ Returns
498
+ -------
499
+ score : int
500
+ The similarity score.
501
+ """
502
+ codes = get_codes(alignment)
503
+ matrix = matrix.score_matrix()
504
+
505
+ # Sum similarity scores (without gaps)
506
+ score = 0
507
+ # Iterate over all positions
508
+ for pos in range(codes.shape[1]):
509
+ column = codes[:, pos]
510
+ # Iterate over all possible pairs
511
+ # Do not count self-similarity
512
+ # and do not count similarity twice (not S(i,j) and S(j,i))
513
+ for i in range(codes.shape[0]):
514
+ for j in range(i + 1, codes.shape[0]):
515
+ code_i = column[i]
516
+ code_j = column[j]
517
+ # Ignore gaps
518
+ if code_i != -1 and code_j != -1:
519
+ score += matrix[code_i, code_j]
520
+
521
+ # Sum gap penalties
522
+ if isinstance(gap_penalty, numbers.Real):
523
+ gap_open = gap_penalty
524
+ gap_ext = gap_penalty
525
+ elif isinstance(gap_penalty, Sequence):
526
+ gap_open = gap_penalty[0]
527
+ gap_ext = gap_penalty[1]
528
+ else:
529
+ raise TypeError("Gap penalty must be either integer or tuple")
530
+ # Iterate over all sequences
531
+ for seq_code in codes:
532
+ in_gap = False
533
+ if terminal_penalty:
534
+ start_index = 0
535
+ stop_index = len(seq_code)
536
+ else:
537
+ # Find a start and stop index excluding terminal gaps
538
+ start_index, stop_index = find_terminal_gaps(alignment)
539
+ for i in range(start_index, stop_index):
540
+ if seq_code[i] == -1:
541
+ if in_gap:
542
+ score += gap_ext
543
+ else:
544
+ score += gap_open
545
+ in_gap = True
546
+ else:
547
+ in_gap = False
548
+ return score
549
+
550
+
551
+ def find_terminal_gaps(alignment):
552
+ """
553
+ Find the slice indices that would remove terminal gaps from an
554
+ alignment.
555
+
556
+ Terminal gaps are gaps that appear before all sequences start and
557
+ after any sequence ends.
558
+
559
+ Parameters
560
+ ----------
561
+ alignment : Alignment
562
+ The alignment, where the slice indices should be found in.
563
+
564
+ Returns
565
+ -------
566
+ start, stop : int
567
+ Indices that point to the start and exclusive stop of the
568
+ alignment columns without terminal gaps.
569
+ When these indices are used as slice index for an alignment or
570
+ trace, the index would remove terminal gaps.
571
+
572
+ See Also
573
+ --------
574
+ remove_terminal_gaps : Remove terminal gap columns directly.
575
+
576
+ Examples
577
+ --------
578
+
579
+ >>> sequences = [
580
+ ... NucleotideSequence(seq_string) for seq_string in (
581
+ ... "AAAAACTGATTC",
582
+ ... "AAACTGTTCA",
583
+ ... "CTGATTCAAA"
584
+ ... )
585
+ ... ]
586
+ >>> trace = np.transpose([
587
+ ... ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1),
588
+ ... (-1, -1, 0, 1, 2, 3, 4, 5, -1, 6, 7, 8, 9, -1, -1),
589
+ ... (-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
590
+ ... ])
591
+ >>> alignment = Alignment(sequences, trace)
592
+ >>> print(alignment)
593
+ AAAAACTGATTC---
594
+ --AAACTG-TTCA--
595
+ -----CTGATTCAAA
596
+ >>> print(find_terminal_gaps(alignment))
597
+ (5, 12)
598
+ """
599
+ trace = alignment.trace
600
+ # Find for each sequence the positions of non-gap symbols
601
+ no_gap_pos = [np.where(trace[:, i] != -1)[0] for i in range(trace.shape[1])]
602
+ # Find for each sequence the positions of the sequence start and end
603
+ # in the alignment
604
+ firsts = [no_gap_pos[i][0] for i in range(trace.shape[1])]
605
+ lasts = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
606
+ # The terminal gaps are before all sequences start and after any
607
+ # sequence ends
608
+ # Use exclusive stop -> -1
609
+ return np.max(firsts).item(), np.min(lasts).item() + 1
610
+
611
+
612
+ def remove_terminal_gaps(alignment):
613
+ """
614
+ Remove terminal gaps from an alignment.
615
+
616
+ Terminal gaps are gaps that appear before all sequences start and
617
+ after any sequence ends.
618
+
619
+ Parameters
620
+ ----------
621
+ alignment : Alignment
622
+ The alignment, where the terminal gaps should be removed from.
623
+
624
+ Returns
625
+ -------
626
+ truncated_alignment : Alignment
627
+ A shallow copy of the input `alignment` with an truncated trace,
628
+ that does not contain alignment columns with terminal gaps.
629
+
630
+ See Also
631
+ --------
632
+ find_terminal_gaps : Only find terminal gap columns.
633
+
634
+ Examples
635
+ --------
636
+
637
+ >>> sequences = [
638
+ ... NucleotideSequence(seq_string) for seq_string in (
639
+ ... "AAAAACTGATTC",
640
+ ... "AAACTGTTCA",
641
+ ... "CTGATTCAAA"
642
+ ... )
643
+ ... ]
644
+ >>> trace = np.transpose([
645
+ ... ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1),
646
+ ... (-1, -1, 0, 1, 2, 3, 4, 5, -1, 6, 7, 8, 9, -1, -1),
647
+ ... (-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
648
+ ... ])
649
+ >>> alignment = Alignment(sequences, trace)
650
+ >>> print(alignment)
651
+ AAAAACTGATTC---
652
+ --AAACTG-TTCA--
653
+ -----CTGATTCAAA
654
+ >>> truncated_alignment = remove_terminal_gaps(alignment)
655
+ >>> print(truncated_alignment)
656
+ CTGATTC
657
+ CTG-TTC
658
+ CTGATTC
659
+ """
660
+ start, stop = find_terminal_gaps(alignment)
661
+ if stop < start:
662
+ raise ValueError(
663
+ "Cannot remove terminal gaps, since at least two sequences have "
664
+ "no overlap and the resulting alignment would be empty"
665
+ )
666
+ return alignment[start:stop]
667
+
668
+
669
+ def remove_gaps(alignment):
670
+ """
671
+ Remove all gap columns from an alignment.
672
+
673
+ Parameters
674
+ ----------
675
+ alignment : Alignment
676
+ The alignment to be modified.
677
+
678
+ Returns
679
+ -------
680
+ truncated_alignment : Alignment
681
+ The alignment without gap columns.
682
+
683
+ See Also
684
+ --------
685
+ remove_terminal_gaps : Remove only terminal gap columns.
686
+ """
687
+ non_gap_mask = (alignment.trace != -1).all(axis=1)
688
+ return alignment[non_gap_mask]
689
+
690
+
691
+ def _is_single_letter(alphabet):
692
+ """
693
+ More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
694
+ It is sufficient that only only the string representation of each symbol is only
695
+ a single character.
696
+ """
697
+ if alphabet.is_letter_alphabet():
698
+ return True
699
+ for symbol in alphabet:
700
+ if len(str(symbol)) != 1:
701
+ return False
702
+ return True