biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,619 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.align"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["align_multiple"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+ from libc.math cimport log
12
+
13
+ import numpy as np
14
+ from .matrix import SubstitutionMatrix
15
+ from .alignment import Alignment
16
+ from .pairwise import align_optimal
17
+ from ..sequence import Sequence
18
+ from ..alphabet import Alphabet
19
+ from ..phylo.upgma import upgma
20
+ from ..phylo.tree import Tree, TreeNode, as_binary
21
+
22
+
23
+ ctypedef np.int32_t int32
24
+ ctypedef np.int64_t int64
25
+ ctypedef np.uint8_t uint8
26
+ ctypedef np.uint16_t uint16
27
+ ctypedef np.uint32_t uint32
28
+ ctypedef np.uint64_t uint64
29
+ ctypedef np.float32_t float32
30
+
31
+ ctypedef fused CodeType:
32
+ uint8
33
+ uint16
34
+ uint32
35
+ uint64
36
+
37
+
38
+ cdef float32 MAX_FLOAT = np.finfo(np.float32).max
39
+
40
+
41
+ class GapSymbol:
42
+
43
+ _instance = None
44
+
45
+ def __init__(self):
46
+ if GapSymbol._instance is not None:
47
+ raise ValueError(
48
+ "Cannot instantiate this singleton more than one time"
49
+ )
50
+ else:
51
+ GapSymbol._instance = self
52
+
53
+ @staticmethod
54
+ def instance():
55
+ if GapSymbol._instance is None:
56
+ GapSymbol._instance = GapSymbol()
57
+ return GapSymbol._instance
58
+
59
+ def __str__(self):
60
+ return "-"
61
+
62
+ def __hash__(self):
63
+ return 0
64
+
65
+
66
+ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
67
+ distances=None, guide_tree=None):
68
+ r"""
69
+ align_multiple(sequences, matrix, gap_penalty=-10,
70
+ terminal_penalty=True, distances=None,
71
+ guide_tree=None)
72
+
73
+ Perform a multiple sequence alignment using a progressive
74
+ alignment algorithm. :footcite:`Feng1987`
75
+
76
+ Based on pairwise sequence distances a guide tree is constructed.
77
+ The sequences are progessively aligned according to the tree,
78
+ following the rule 'Once a gap, always a gap'.
79
+
80
+ Parameters
81
+ ----------
82
+ sequences : list of Sequence
83
+ The sequences to be aligned.
84
+ The alpahbet of the substitution matrix must be equal or
85
+ extend the alphabet of each sequence.
86
+ matrix : SubstitutionMatrix
87
+ The substitution matrix used for scoring.
88
+ Must be symmetric.
89
+ gap_penalty : int or tuple(int, int), optional
90
+ If an integer is provided, the value will be interpreted as
91
+ general gap penalty. If a tuple is provided, an affine gap
92
+ penalty is used. The first integer in the tuple is the gap
93
+ opening penalty, the second integer is the gap extension
94
+ penalty.
95
+ The values need to be negative.
96
+ terminal_penalty : bool, optional
97
+ If true, gap penalties are applied to terminal gaps.
98
+ distances : ndarray, shape=(n,n)
99
+ Pairwise distances of the sequences.
100
+ The matrix must be symmetric and all entries must be larger
101
+ than 0.
102
+ By default the pairwise distances are calculated from
103
+ similarities obtained from optimal global pairwise alignments
104
+ (:func:`align_optimal()`).
105
+ The similarities are converted into distances using the method
106
+ proposed by Feng & Doolittle :footcite:`Feng1996`.
107
+ guide_tree : Tree
108
+ The guide tree to be used for the progressive alignment.
109
+ By default the guide tree is constructed from `distances`
110
+ via the UPGMA clustering method.
111
+
112
+ Returns
113
+ -------
114
+ alignment : Alignment
115
+ The global multiple sequence alignment of the input sequences.
116
+ order : ndarray, dtype=int
117
+ The sequence order represented by the guide tree.
118
+ When this order is applied to alignment sequence order,
119
+ similar sequences are adjacent to each other.
120
+ tree : Tree
121
+ The guide tree used for progressive alignment.
122
+ Equal to `guide_tree` if provided.
123
+ distance_matrix : ndarray, shape=(n,n), dtype=float32
124
+ The pairwise distance matrix used to construct the guide tree.
125
+ Equal to `distances` if provided.
126
+
127
+ Notes
128
+ -----
129
+ The similarity to distance conversion is performed according to the
130
+ following formula:
131
+
132
+ .. math:: D_{a,b} = -\ln\left(
133
+ \frac
134
+ { S_{a,b} - S_{a,b}^{rand} }
135
+ { S_{a,b}^{max} - S_{a,b}^{rand} }
136
+ \right)
137
+
138
+ .. math:: S_{a,b}^{max} = \frac{ S_{a,a} + S_{b,b} }{ 2 }
139
+
140
+ .. math:: S_{a,b}^{rand} = \frac{1}{L_{a,b}}
141
+ \left(
142
+ \sum_{x \in \Omega} \sum_{y \in \Omega}
143
+ s_{x,y} \cdot N_a(x) \cdot N_b(y)
144
+ \right)
145
+ + N_{a,b}^{open} \cdot p^{open} + N_{a,b}^{ext} \cdot p^{ext}
146
+
147
+ :math:`D_{a,b}` - The distance between the sequences *a* and *b*.
148
+
149
+ :math:`S_{a,b}` - The similarity score between the sequences *a* and *b*.
150
+
151
+ :math:`s_{x,y}` - The similarity score between the symbols *x* and *y*.
152
+
153
+ :math:`\Omega` - The sequence alphabet.
154
+
155
+ :math:`N_a(x)` - Number of occurences of symbol *x* in sequence *a*.
156
+
157
+ :math:`N_{a,b}^{open}, N_{a,b}^{ext}` - Number of gap openings/
158
+ extensions, in the alignment of *a* and *b*.
159
+
160
+ :math:`p^{open}, p^{ext}` - The penalty for a gap opening/extension.
161
+
162
+ :math:`L_{a,b}` - Number of columns in the alignment of *a* and *b*.
163
+
164
+ In rare cases of extremely unrelated sequences, :math:`S_{a,b}`
165
+ can be lower than :math:`S_{a,b}^{rand}`.
166
+ In this case the logarithm cannot be calculated and a
167
+ :class:`ValueError` is raised.
168
+
169
+ References
170
+ ----------
171
+
172
+ .. footbibliography::
173
+
174
+ Examples
175
+ --------
176
+
177
+ >>> seq1 = ProteinSequence("BIQTITE")
178
+ >>> seq2 = ProteinSequence("TITANITE")
179
+ >>> seq3 = ProteinSequence("BISMITE")
180
+ >>> seq4 = ProteinSequence("IQLITE")
181
+ >>> matrix = SubstitutionMatrix.std_protein_matrix()
182
+ >>>
183
+ >>> alignment, order, tree, distances = align_multiple(
184
+ ... [seq1, seq2, seq3, seq4], matrix
185
+ ... )
186
+ >>>
187
+ >>> print(alignment)
188
+ BIQT-ITE
189
+ TITANITE
190
+ BISM-ITE
191
+ -IQL-ITE
192
+ >>> print(alignment[:, order.tolist()])
193
+ -IQL-ITE
194
+ BISM-ITE
195
+ BIQT-ITE
196
+ TITANITE
197
+ >>> print(distances)
198
+ [[0.000 1.034 0.382 0.560]
199
+ [1.034 0.000 0.923 1.132]
200
+ [0.382 0.923 0.000 0.632]
201
+ [0.560 1.132 0.632 0.000]]
202
+ >>>
203
+ >>> print(tree.to_newick(
204
+ ... labels=["seq1", "seq2", "seq3", "seq4"], include_distance=False
205
+ ... ))
206
+ ((seq4,(seq3,seq1)),seq2);
207
+ """
208
+ if not matrix.is_symmetric():
209
+ raise ValueError("A symmetric substitution matrix is required")
210
+ alphabet = matrix.get_alphabet1()
211
+ for i, seq in enumerate(sequences):
212
+ if seq.code is None:
213
+ raise ValueError(f"Code of sequence {i} is 'None'")
214
+ if not alphabet.extends(seq.get_alphabet()):
215
+ raise ValueError(
216
+ f"The substitution matrix and sequence {i} have "
217
+ f"incompatible alphabets"
218
+ )
219
+
220
+ # Create guide tree
221
+ # Template parameter workaround
222
+ _T = sequences[0].code
223
+ if distances is None:
224
+ distances = _get_distance_matrix(
225
+ _T, sequences, matrix, gap_penalty, terminal_penalty
226
+ )
227
+ else:
228
+ distances = distances.astype(np.float32, copy=True)
229
+ if guide_tree is None:
230
+ guide_tree = upgma(distances)
231
+ else:
232
+ # Assure that every node in the guide tree is binary
233
+ guide_tree = as_binary(guide_tree)
234
+
235
+ # Create new matrix with neutral gap symbol
236
+ gap_symbol = GapSymbol.instance()
237
+ new_alphabet = Alphabet(
238
+ matrix.get_alphabet1().get_symbols() + (gap_symbol,)
239
+ )
240
+ new_score_matrix = np.zeros(
241
+ (len(new_alphabet), len(new_alphabet)), dtype=np.int32
242
+ )
243
+ # New substitution matrix is the same as the old one,
244
+ # except the neutral ghap symbol,
245
+ # that scores 0 with all other symbols
246
+ new_score_matrix[:-1,:-1] = matrix.score_matrix()
247
+ new_matrix = SubstitutionMatrix(
248
+ new_alphabet, new_alphabet, new_score_matrix
249
+ )
250
+
251
+ # Progressive alignment
252
+ gap_symbol_code = new_alphabet.encode(gap_symbol)
253
+ order, aligned_seqs = _progressive_align(
254
+ _T, sequences, guide_tree.root, distances, new_matrix,
255
+ gap_symbol_code, gap_penalty, terminal_penalty
256
+ )
257
+ aligned_seq_codes = [seq.code for seq in aligned_seqs]
258
+
259
+ # Remove neutral gap symbols and create actual trace
260
+ seq_i = np.zeros(len(aligned_seqs))
261
+ trace = np.full(
262
+ (len(aligned_seqs[0]), len(aligned_seqs)), -1, dtype=np.int64)
263
+ for j in range(trace.shape[1]):
264
+ seq_code = aligned_seq_codes[j]
265
+ seq_i = 0
266
+ for i in range(trace.shape[0]):
267
+ if seq_code[i] == gap_symbol_code:
268
+ trace[i,j] = -1
269
+ else:
270
+ trace[i,j] = seq_i
271
+ seq_i += 1
272
+ aligned_seq_codes = [
273
+ code[code != gap_symbol_code] for code in aligned_seq_codes
274
+ ]
275
+ for i in range(len(aligned_seqs)):
276
+ aligned_seqs[i].code = aligned_seq_codes[i]
277
+
278
+ # Reorder alignmets into original alignemnt
279
+ new_order = np.argsort(order)
280
+ aligned_seqs = [aligned_seqs[pos] for pos in new_order]
281
+ trace = trace[:, new_order]
282
+
283
+ return Alignment(aligned_seqs, trace), order, guide_tree, distances
284
+
285
+
286
+ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
287
+ gap_penalty, terminal_penalty):
288
+ """
289
+ Create all pairwise alignments for the given sequences and use the
290
+ method proposed by Feng & Doolittle to calculate the pairwise
291
+ distance matrix
292
+
293
+ Parameters
294
+ ----------
295
+ _T : ndarray, dtype=VARAIBLE
296
+ A little bit hacky workaround to get the correct dtype for the
297
+ sequence code of the sequences in a static way
298
+ (important for Cython).
299
+ sequences : list of Sequence, length=n
300
+ The sequences to get the distance matrix for.
301
+ matrix : SubstitutionMatrix
302
+ The substitution matrix used for the alignments.
303
+ gap_penalty : int or tuple(int, int)
304
+ A linear or affine gap penalty for the alignments.
305
+ terminal_penalty : bool
306
+ Whether to or not count terminal gap penalties for the
307
+ alignments.
308
+
309
+ Returns
310
+ -------
311
+ distances : ndarray, shape=(n,n), dtype=float32
312
+ The pairwise distance matrix.
313
+ """
314
+ cdef int i, j
315
+
316
+ cdef np.ndarray scores = np.zeros(
317
+ (len(sequences), len(sequences)), dtype=np.int32
318
+ )
319
+ cdef np.ndarray alignments = np.full(
320
+ (len(sequences), len(sequences)), None, dtype=object
321
+ )
322
+ for i in range(len(sequences)):
323
+ # Inclusive range
324
+ for j in range(i+1):
325
+ # For this method we only consider one alignment:
326
+ # Score is equal for all alignments
327
+ # Alignment length is equal for most alignments
328
+ alignment = align_optimal(
329
+ sequences[i], sequences[j], matrix,
330
+ gap_penalty, terminal_penalty, max_number=1
331
+ )[0]
332
+ scores[i,j] = alignment.score
333
+ alignments[i,j] = alignment
334
+
335
+ ### Distance calculation from similarity scores ###
336
+ # Calculate the occurences of each symbol code in each sequence
337
+ # This is used later for the random score
338
+ # Both alphabets are the same
339
+ cdef CodeType alphabet_size = len(matrix.get_alphabet1())
340
+ cdef np.ndarray code_count = np.zeros(
341
+ (len(sequences), alphabet_size), dtype=np.int32
342
+ )
343
+ cdef int32[:,:] code_count_v = code_count
344
+ for i in range(len(sequences)):
345
+ code_count[i] = np.bincount(sequences[i].code, minlength=alphabet_size)
346
+
347
+ cdef int gap_open=0, gap_ext=0
348
+ if type(gap_penalty) == int:
349
+ gap_open = gap_penalty
350
+ gap_ext = gap_penalty
351
+ elif type(gap_penalty) == tuple:
352
+ gap_open = gap_penalty[0]
353
+ gap_ext = gap_penalty[1]
354
+ else:
355
+ raise TypeError("Gap penalty must be either integer or tuple")
356
+
357
+ cdef const int32[:,:] score_matrix = matrix.score_matrix()
358
+ cdef int32[:,:] scores_v = scores
359
+ cdef np.ndarray distances = np.zeros(
360
+ (scores.shape[0], scores.shape[1]), dtype=np.float32
361
+ )
362
+ cdef float32[:,:] distances_v = distances
363
+ cdef CodeType[:] seq_code1, seq_code2
364
+ cdef CodeType code1, code2
365
+ cdef float32 score_rand, score_max
366
+
367
+ # Calculate distance
368
+ # i and j are indicating the alignment between the sequences i and j
369
+ for i in range(scores_v.shape[0]):
370
+ for j in range(i):
371
+ score_max = (scores_v[i,i] + scores_v[j,j]) / 2.0
372
+ score_rand = 0
373
+ for code1 in range(alphabet_size):
374
+ for code2 in range(alphabet_size):
375
+ score_rand += score_matrix[code1,code2] \
376
+ * code_count[i,code1] \
377
+ * code_count[j,code2]
378
+ score_rand /= alignments[i,j].trace.shape[0]
379
+ gap_open_count, gap_ext_count = _count_gaps(
380
+ alignments[i,j].trace.astype(np.int64, copy=False),
381
+ terminal_penalty
382
+ )
383
+ score_rand += gap_open_count * gap_open
384
+ score_rand += gap_ext_count * gap_ext
385
+ if scores_v[i,j] < score_rand:
386
+ # Randomized alignment is better than actual alignment
387
+ # -> the logaritmus argument would become negative
388
+ # resulting in an NaN distance
389
+ raise ValueError(
390
+ f"The randomized alignment of sequences {j} and {i} "
391
+ f"scores better than the real pairwise alignment, "
392
+ f"cannot calculate proper pairwise distance"
393
+ )
394
+ else:
395
+ distances_v[i,j] = -log(
396
+ (scores_v[i,j] - score_rand) / (score_max - score_rand)
397
+ )
398
+ # Pairwise distance matrix is symmetric
399
+ distances_v[j,i] = distances_v[i,j]
400
+ return distances
401
+
402
+
403
+ def _count_gaps(int64[:,:] trace_v, bint terminal_penalty):
404
+ """
405
+ Count the number of gap openings and gap extensions in an alignment
406
+ trace.
407
+
408
+ Parameters
409
+ ----------
410
+ trace_v : ndarary, shape=(n,2), dtype=int
411
+ The alignemnt trace.
412
+ terminal_penalty : bool
413
+ Whether to or not count terminal gap penalties.
414
+
415
+ Returns
416
+ -------
417
+ gap_open_count, gap_ext_count: int
418
+ The number of gap opening and gap extension columns
419
+ """
420
+ cdef int i, j
421
+ cdef int gap_open_count=0, gap_ext_count=0
422
+ cdef int start_index=-1, stop_index=-1
423
+
424
+ if not terminal_penalty:
425
+ # Ignore terminal gaps
426
+ # -> get start and exclusive stop column of the trace
427
+ # excluding terminal gaps
428
+ for i in range(trace_v.shape[0]):
429
+ # Check if all sequences have no gap at the given position
430
+ if trace_v[i,0] != -1 and trace_v[i,1] != -1:
431
+ start_index = i
432
+ break
433
+ # Reverse iteration
434
+ for i in range(trace_v.shape[0]-1, -1, -1):
435
+ # Check if all sequences have no gap at the given position
436
+ if trace_v[i,0] != -1 and trace_v[i,1] != -1:
437
+ stop_index = i+1
438
+ break
439
+ if start_index == -1 or stop_index == -1:
440
+ return 0, 0
441
+ trace_v = trace_v[start_index : stop_index]
442
+
443
+ if trace_v[0,0] == -1:
444
+ gap_open_count += 1
445
+ if trace_v[0,1] == -1:
446
+ gap_open_count += 1
447
+ for i in range(1, trace_v.shape[0]):
448
+ # trace_v.shape[1] = 2 due to pairwise alignemt
449
+ for j in range(trace_v.shape[1]):
450
+ if trace_v[i,j] == -1:
451
+ if trace_v[i-1,j] == -1:
452
+ gap_ext_count += 1
453
+ else:
454
+ gap_open_count += 1
455
+ return gap_open_count, gap_ext_count
456
+
457
+
458
+ def _progressive_align(CodeType[:] _T, sequences, tree_node,
459
+ float32[:,:]distances_v, matrix,
460
+ int gap_symbol_code, gap_penalty, terminal_penalty):
461
+ """
462
+ Conduct the progressive alignemt of the sequences that are
463
+ referred to by the given guide tree node.
464
+
465
+ At first the the two sub-MSAs are calculated from the child nodes
466
+ of the given node.
467
+ Then the sub-MSAs are combined to one MSA by aligning the two
468
+ sequences from both sub-MSAs with the lowest distance to each other,
469
+ taken from the pairwise distance matrix.
470
+ The gaps inserted in this pairwise alignment are also inserted
471
+ into all other sequences in the respective sub-MSA at the same
472
+ position.
473
+
474
+ Parameters
475
+ ----------
476
+ _T : ndarray, dtype=VARAIBLE
477
+ A little bit hacky workaround to get the correct dtype for the
478
+ sequence code of the sequences in a static way
479
+ (important for Cython).
480
+ sequences : list of Sequence, lebgth=n
481
+ All sequences that should be aligned in the MSA.
482
+ tree_node : TreeNode
483
+ This guide tree node defines, which of sequences in the
484
+ `sequences` parameter should be aligned in this call.
485
+ This is the only parameter that changes in the series of
486
+ recursive calls of this function.
487
+ distances_v : ndarray, shape=(n,n)
488
+ The pairwise distance matrix.
489
+ matrix : SubstitutionMatrix
490
+ The substitution matrix used for the alignments.
491
+ gap_symbol_code : int
492
+ The symbol code for the gap symbol.
493
+ gap_penalty : int or tuple(int, int)
494
+ A linear or affine gap penalty for the alignments.
495
+ terminal_penalty : bool
496
+ Whether to or not count terminal gap penalties for the
497
+ alignments.
498
+
499
+ Returns
500
+ -------
501
+ order : ndarray, shape=(m,), dtype=int
502
+ The index of each element in `aligned_sequences` in the
503
+ orginal `sequences` parameter.
504
+ aligned_sequences : list of Sequence, length=m
505
+ A list of the sequences that were aligned.
506
+ Instead of an :class:`Alignment` object that represents the gaps
507
+ as ``-1`` in the trace, the gaps are represented as dedicated
508
+ gap symbols in this case.
509
+ This allows for the pairwise alignemt of gapped sequences.
510
+ """
511
+ cdef int i=0, j=0
512
+ cdef int i_min=0, j_min=0
513
+ cdef float32 dist_min, dist
514
+ cdef int32[:] indices1_v, indices2_v
515
+ cdef np.ndarray incides1, incides2
516
+ cdef list aligned_seqs1, aligned_seqs2
517
+
518
+ if tree_node.is_leaf():
519
+ # Child node -> Cannot do an alignment
520
+ # -> Just return the sequence corresponding to the leaf node
521
+ # Copy sequences to avoid modification of input sequences
522
+ # when neutral gap character is inserted
523
+ return np.array([tree_node.index], dtype=np.int32), \
524
+ [sequences[tree_node.index].copy()]
525
+
526
+ else:
527
+ # Multiple alignment of sequences corresponding to both child nodes
528
+ child1, child2 = tree_node.children
529
+ incides1, aligned_seqs1 = _progressive_align(
530
+ _T, sequences, child1, distances_v, matrix,
531
+ gap_symbol_code, gap_penalty, terminal_penalty
532
+ )
533
+ indices1_v = incides1
534
+ incides2, aligned_seqs2 = _progressive_align(
535
+ _T, sequences, child2, distances_v, matrix,
536
+ gap_symbol_code, gap_penalty, terminal_penalty
537
+ )
538
+ indices2_v = incides2
539
+
540
+ # Find sequence pair with lowest distance
541
+ dist_min = MAX_FLOAT
542
+ for i in range(indices1_v.shape[0]):
543
+ for j in range(indices2_v.shape[0]):
544
+ dist = distances_v[indices1_v[i], indices2_v[j]]
545
+ if dist < dist_min:
546
+ dist_min = dist
547
+ i_min = i
548
+ j_min = j
549
+ # Alignment of sequence pair with lowest distance
550
+ # For this method we only consider one alignment:
551
+ alignment = align_optimal(
552
+ aligned_seqs1[i_min], aligned_seqs2[j_min], matrix,
553
+ gap_penalty, terminal_penalty, max_number=1
554
+ )[0]
555
+ # Place neutral gap symbol for position of new gaps
556
+ # in both sequence groups
557
+ for i in range(len(aligned_seqs1)):
558
+ seq = aligned_seqs1[i]
559
+ seq.code = _replace_gaps(
560
+ _T, alignment.trace[:,0], seq.code, gap_symbol_code
561
+ )
562
+ for i in range(len(aligned_seqs2)):
563
+ seq = aligned_seqs2[i]
564
+ seq.code = _replace_gaps(
565
+ _T, alignment.trace[:,1], seq.code, gap_symbol_code
566
+ )
567
+ return np.append(incides1, incides2), \
568
+ aligned_seqs1 + aligned_seqs2
569
+
570
+
571
+
572
+ def _replace_gaps(CodeType[:] _T,
573
+ int64[:] partial_trace_v,
574
+ np.ndarray seq_code,
575
+ int gap_symbol_code):
576
+ """
577
+ Replace gaps in a sequence in an :class:`Alignment` with a dedicated
578
+ gap symbol.
579
+
580
+ The replacement is required by the progressive alignment algorithm
581
+ to be able to align gapped sequences with each other.
582
+
583
+ Parameters
584
+ ----------
585
+ _T : ndarray, dtype=VARAIBLE
586
+ A little bit hacky workaround to get the correct dtype for the
587
+ sequence code of the sequences in a static way
588
+ (important for Cython).
589
+ partial_trace_v : ndarary, shape=(m,), dtype=int
590
+ The row of the alignemnt trace reffering to the given sequence.
591
+ seq_code : ndarary, shape=(n,)
592
+ The sequence code representing the given sequence.
593
+ gap_symbol_code : int
594
+ The symbol code for the gap symbol.
595
+
596
+ Returns
597
+ -------
598
+ new_seq_code : ndarary, shape=(m,)
599
+ The sequence code representing a new sequence, that is the given
600
+ sequence with inserted gap symbols.
601
+ """
602
+ cdef int i
603
+ cdef int64 index
604
+ cdef CodeType code
605
+
606
+ cdef CodeType[:] seq_code_v = seq_code
607
+ cdef np.ndarray new_seq_code = np.zeros(
608
+ partial_trace_v.shape[0], dtype=seq_code.dtype
609
+ )
610
+ cdef CodeType[:] new_seq_code_v = new_seq_code
611
+
612
+ for i in range(partial_trace_v.shape[0]):
613
+ index = partial_trace_v[i]
614
+ if index == -1:
615
+ new_seq_code_v[i] = gap_symbol_code
616
+ else:
617
+ new_seq_code_v[i] = seq_code[index]
618
+
619
+ return new_seq_code