biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1169 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.phylo"
6
+ __author__ = "Patrick Kunzmann, Tom David Müller"
7
+ __all__ = ["Tree", "TreeNode", "as_binary", "TreeError"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import copy
13
+ import numpy as np
14
+ import networkx as nx
15
+ from ...file import InvalidFileError
16
+ from ...copyable import Copyable
17
+
18
+
19
+ class Tree(Copyable):
20
+ """
21
+ __init__(root)
22
+
23
+ A :class:`Tree` represents a rooted tree
24
+ (e.g. alignment guide tree or phylogenetic tree).
25
+
26
+ The tree itself wraps a *root* :class:`TreeNode` object,
27
+ accessible via the :attr:`root` property.
28
+
29
+ A :class:`Tree` is not a container itself:
30
+ Objects, e.g species names or sequences, that are represented by the
31
+ nodes, cannot be stored directly in a :class:`Tree` or its nodes.
32
+ Instead, each leaf :class:`TreeNode` has a reference index:
33
+ These indices refer to a separate list or array, containing the
34
+ actual reference objects.
35
+
36
+ The property :attr:`leaves` contains a list of the leaf nodes,
37
+ where the index of the leaf node in this list is equal to the
38
+ reference index of the leaf node (``leaf.index``).
39
+
40
+ The amount of leaves in a tree can be determined via the
41
+ :func:`len()` function.
42
+
43
+ Objects of this class are immutable.
44
+
45
+ Parameters
46
+ ----------
47
+ root: TreeNode
48
+ The root of the tree.
49
+ The constructor calls the node's :func:`as_root()` method,
50
+ in order to make it immutable.
51
+
52
+ Attributes
53
+ ----------
54
+ root : TreeNode
55
+ The root node of the tree.
56
+ leaves : list of TreeNode
57
+ The leaf nodes of the tree.
58
+ The index of the leaf node in this list is equal to the
59
+ reference index of the leaf node.
60
+ This attribute is a shallow copy of the repsective internal
61
+ object.
62
+
63
+ Examples
64
+ --------
65
+
66
+ >>> objects = ["An object", "Another object", "Yet another one"]
67
+ >>> leaf1 = TreeNode(index=0)
68
+ >>> leaf2 = TreeNode(index=1)
69
+ >>> leaf3 = TreeNode(index=2)
70
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
71
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
72
+ >>> tree = Tree(root)
73
+ >>> print(tree)
74
+ ((0:5.0,1:7.0):3.0,2:10.0):0.0;
75
+ >>> print([objects[node.index] for node in tree.leaves])
76
+ ['An object', 'Another object', 'Yet another one']
77
+ """
78
+
79
+ def __init__(self, TreeNode root not None):
80
+ root.as_root()
81
+ self._root = root
82
+
83
+ cdef list leaves_unsorted = self._root.get_leaves()
84
+ cdef int leaf_count = len(leaves_unsorted)
85
+ cdef np.ndarray indices = np.array(
86
+ [leaf.index for leaf in leaves_unsorted]
87
+ )
88
+ self._leaves = [None] * leaf_count
89
+ cdef int i
90
+ cdef int index
91
+ for i in range(len(indices)):
92
+ index = indices[i]
93
+ if index >= leaf_count or index < 0:
94
+ raise TreeError("The tree's indices are out of range")
95
+ self._leaves[index] = leaves_unsorted[i]
96
+
97
+ def __copy_create__(self):
98
+ return Tree(self._root.copy())
99
+
100
+ @property
101
+ def root(self):
102
+ return self._root
103
+
104
+ @property
105
+ def leaves(self):
106
+ return copy.copy(self._leaves)
107
+
108
+ def as_graph(self):
109
+ """
110
+ as_graph()
111
+
112
+ Obtain a graph representation of the :class:`Tree`.
113
+
114
+ Returns
115
+ -------
116
+ bond_set : DiGraph
117
+ A *NetworkX* directed graph.
118
+ For a leaf node the graph node is its reference index.
119
+ For an intermediate and root node the graph node is a tuple
120
+ containing it children nodes.
121
+ Each edge has a ``"distance"`` attribute depicting the
122
+ distance between the nodes.
123
+ Each edge starts from the parent ends at its child.
124
+
125
+ Examples
126
+ --------
127
+
128
+ >>> leaves = [TreeNode(index=i) for i in range(3)]
129
+ >>> intermediate = TreeNode([leaves[0], leaves[1]], [2.0, 3.0])
130
+ >>> root = TreeNode([intermediate, leaves[2]], [1.0, 5.0])
131
+ >>> tree = Tree(root)
132
+ >>> graph = tree.as_graph()
133
+ >>> for node_i, node_j in graph.edges:
134
+ ... print(f"{str(node_i):12} -> {str(node_j):12}")
135
+ (0, 1) -> 0
136
+ (0, 1) -> 1
137
+ ((0, 1), 2) -> (0, 1)
138
+ ((0, 1), 2) -> 2
139
+ """
140
+ cdef tuple children
141
+ cdef bint children_already_handled
142
+ cdef TreeNode node, child, parent
143
+
144
+ graph = nx.DiGraph()
145
+
146
+ # This dict maps a TreeNode to its corresponding int or tuple
147
+ cdef dict node_repr = {}
148
+
149
+ # A First-In-First-Out queue for iterative handling of each node
150
+ # Starting with all leaf nodes
151
+ cdef list queue = copy.copy(self._leaves)
152
+ # A set representation of the same queue for efficient
153
+ # '__contains__()' operation
154
+ cdef set queue_set = set(self._leaves)
155
+ while len(queue) > 0:
156
+ node = queue.pop(0)
157
+
158
+ if node.is_leaf():
159
+ node_repr[node] = node.index
160
+ else:
161
+ children = node.children
162
+ children_handled = True
163
+ for child in children:
164
+ if child not in node_repr:
165
+ children_handled = False
166
+ # If the node representation of any child of this node
167
+ # is not calculated yet, put this node to the end of the
168
+ # queue and handle it later
169
+ if not children_handled:
170
+ queue.append(node)
171
+ continue
172
+ else:
173
+ repr = tuple(node_repr[child] for child in children)
174
+ node_repr[node] = repr
175
+ # Add adges to children in graph
176
+ for child in children:
177
+ graph.add_edge(
178
+ repr, node_repr[child], distance=child.distance
179
+ )
180
+
181
+ # This leads finally to termination of the loop:
182
+ # When the root node is handled the last element in the
183
+ # queue is handled and no new node is added to the queue
184
+ if not node.is_root():
185
+ parent = node.parent
186
+ # The parent node might be already in the queue from
187
+ # handling another child node
188
+ if parent not in queue_set:
189
+ queue.append(parent)
190
+ queue_set.add(parent)
191
+
192
+ # Node is handled
193
+ # -> not in 'queue' anymore
194
+ # -> remove also from 'queue_set'
195
+ queue_set.remove(node)
196
+
197
+ return graph
198
+
199
+
200
+
201
+ def get_distance(self, index1, index2, bint topological=False):
202
+ """
203
+ get_distance(index1, index2, topological=False)
204
+
205
+ Get the distance between two leaf nodes.
206
+
207
+ The distance is the sum of all distances from the each of the
208
+ two nodes to their lowest common ancestor.
209
+
210
+ Parameters
211
+ ----------
212
+ index1, index2 : int
213
+ The reference indices of the two leaf nodes, to calculate
214
+ the distance for.
215
+ topological : bool, optional
216
+ If True the topological distance is measured, i.e. all
217
+ child-parent distance is 1.
218
+ Otherwise, the distances from the `distance` attribute are
219
+ used.
220
+
221
+ Returns
222
+ -------
223
+ distance : float
224
+ The distance between the nodes.
225
+
226
+ Examples
227
+ --------
228
+
229
+ >>> leaf1 = TreeNode(index=0)
230
+ >>> leaf2 = TreeNode(index=1)
231
+ >>> leaf3 = TreeNode(index=2)
232
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
233
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
234
+ >>> tree = Tree(root)
235
+ >>> print(tree.get_distance(0,1))
236
+ 12.0
237
+ >>> print(tree.get_distance(0,2))
238
+ 18.0
239
+ >>> print(tree.get_distance(1,2))
240
+ 20.0
241
+ """
242
+ return self._leaves[index1].distance_to(
243
+ self._leaves[index2], topological
244
+ )
245
+
246
+ def to_newick(self, labels=None, bint include_distance=True,
247
+ round_distance=None):
248
+ """
249
+ to_newick(labels=None, include_distance=True)
250
+
251
+ Obtain the Newick notation of the tree.
252
+
253
+ Parameters
254
+ ----------
255
+ labels : iterable object of str
256
+ The labels the indices in the leaf nodes srefer to
257
+ include_distance : bool
258
+ If true, the distances are displayed in the newick notation,
259
+ otherwise they are omitted.
260
+ round_distance : int, optional
261
+ If set, the distances are rounded to the given number of
262
+ digits.
263
+
264
+ Returns
265
+ -------
266
+ newick : str
267
+ The Newick notation of the tree.
268
+
269
+ Examples
270
+ --------
271
+
272
+ >>> leaf1 = TreeNode(index=0)
273
+ >>> leaf2 = TreeNode(index=1)
274
+ >>> leaf3 = TreeNode(index=2)
275
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
276
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
277
+ >>> tree = Tree(root)
278
+ >>> print(tree.to_newick())
279
+ ((0:5.0,1:7.0):3.0,2:10.0):0.0;
280
+ >>> print(tree.to_newick(include_distance=False))
281
+ ((0,1),2);
282
+ >>> labels = ["foo", "bar", "foobar"]
283
+ >>> print(tree.to_newick(labels=labels, include_distance=False))
284
+ ((foo,bar),foobar);
285
+ """
286
+ return self._root.to_newick(
287
+ labels, include_distance, round_distance
288
+ ) + ";"
289
+
290
+ @staticmethod
291
+ def from_newick(str newick, list labels=None):
292
+ """
293
+ from_newick(newick, labels=None)
294
+
295
+ Create a tree from a Newick notation.
296
+
297
+ Parameters
298
+ ----------
299
+ newick : str
300
+ The Newick notation to create the tree from.
301
+ labels : list of str, optional
302
+ If the Newick notation contains labels, that are not
303
+ parseable into reference indices,
304
+ i.e. they are not integers, this parameter can be provided
305
+ to convert these labels into reference indices.
306
+ The corresponding index is the position of the label in the
307
+ provided list.
308
+
309
+ Returns
310
+ -------
311
+ tree : Tree
312
+ A tree created from the Newick notation
313
+
314
+ Notes
315
+ -----
316
+ This function does accept but does not require the Newick string
317
+ to have the terminal semicolon.
318
+
319
+ Keep in mind that the :class:`Tree` class does not support any
320
+ labels on intermediate nodes.
321
+ If the string contains such labels, they are discarded.
322
+ """
323
+ newick = newick.strip()
324
+ if len(newick) == 0:
325
+ raise InvalidFileError("Newick string is empty")
326
+ # Remove terminal colon as required by 'TreeNode.from_newick()'
327
+ if newick[-1] == ";":
328
+ newick = newick[:-1]
329
+ root, distance = TreeNode.from_newick(newick, labels)
330
+ return Tree(root)
331
+
332
+ def __str__(self):
333
+ return self.to_newick()
334
+
335
+ def __len__(self):
336
+ return len(self._leaves)
337
+
338
+ def __eq__(self, item):
339
+ if not isinstance(item, Tree):
340
+ return False
341
+ return self._root == item._root
342
+
343
+ def __hash__(self):
344
+ return hash(self._root)
345
+
346
+
347
+ cdef class TreeNode:
348
+ """
349
+ __init__(children=None, distances=None, index=None)
350
+
351
+ :class:`TreeNode` objects are part of a rooted tree
352
+ (e.g. alignment guide tree).
353
+ There are two :class:`TreeNode` subtypes:
354
+
355
+ - Leaf node - Cannot have child nodes but has an index referring
356
+ to an array-like reference object.
357
+ - Intermediate node - Has child nodes but no reference index
358
+
359
+ This subtype is determined based on whether child nodes were given
360
+ to the constructor.
361
+
362
+ Every :class:`TreeNode` has a reference to its parent node.
363
+ A root node is node without a parent node, that is finalized
364
+ using `as_root()`.
365
+ The call of this function prevents that a the node can be used as
366
+ child.
367
+
368
+ :class:`TreeNode` objects are semi-immutable:
369
+ The child nodes or the reference index are fixed at the time of
370
+ creation.
371
+ Only the parent can be set once, when the parent node is created.
372
+ :class:`TreeNode` objects that are finalized using `as_root()` are
373
+ completely immutable.
374
+
375
+ All object properties are read-only.
376
+
377
+ Parameters
378
+ ----------
379
+ children: array-like object of TreeNode, length=n, optional
380
+ The children of this node.
381
+ As this causes the creation of an intermediate node,
382
+ this parameter cannot be used in combination with `index`.
383
+ distances: array-like object of float, length=n, optional
384
+ The distances of the child nodes to this node.
385
+ Must be set if `children` is set.
386
+ index: int, optional
387
+ Index to a reference array-like object
388
+ (e.g. list of sequences or labels).
389
+ Must be a positive integer.
390
+ As this causes the creation of a leaf node, this parameter
391
+ cannot be used in combination with the other parameters.
392
+
393
+ Attributes
394
+ ----------
395
+ parent : TreeNode
396
+ The parent node.
397
+ `None` if node has no parent.
398
+ children : tuple of TreeNode
399
+ The child nodes.
400
+ `None` if node is a leaf node.
401
+ index : int
402
+ The index to a reference array-like object.
403
+ `None` if node is not a leaf node.
404
+ distance : float
405
+ Distance to parent node.
406
+ `None` if `parent` is `Ǹone`.
407
+
408
+ Examples
409
+ --------
410
+ Creating leaf nodes:
411
+
412
+ >>> leaf1 = TreeNode(index=0)
413
+ >>> leaf2 = TreeNode(index=1)
414
+ >>> leaf3 = TreeNode(index=2)
415
+
416
+ Creating intermediate nodes as parent of those leaf nodes:
417
+
418
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
419
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
420
+ >>> print(root)
421
+ ((0:5.0,1:7.0):3.0,2:10.0):0.0
422
+ """
423
+
424
+ cdef int _index
425
+ cdef float _distance
426
+ cdef bint _is_root
427
+ cdef TreeNode _parent
428
+ cdef tuple _children
429
+
430
+ def __cinit__(self, children=None, distances=None, index=None):
431
+ self._is_root = False
432
+ self._distance = 0
433
+ self._parent = None
434
+ cdef TreeNode child
435
+ cdef float distance
436
+ if index is None:
437
+ # Node is intermediate -> has children
438
+ if children is None or distances is None:
439
+ raise TypeError(
440
+ "Either reference index (for terminal node) or "
441
+ "child nodes including the distance "
442
+ "(for intermediate node) must be set"
443
+ )
444
+ for item in children:
445
+ if not isinstance(item, TreeNode):
446
+ raise TypeError(
447
+ f"Expected 'TreeNode', but got '{type(item).__name__}'"
448
+ )
449
+ for item in distances:
450
+ if not isinstance(item, float) and not isinstance(item, int):
451
+ raise TypeError(
452
+ f"Expected 'float' or 'int', "
453
+ f"but got '{type(item).__name__}'"
454
+ )
455
+ if len(children) == 0:
456
+ raise TreeError(
457
+ "Intermediate nodes must at least contain one child node"
458
+ )
459
+ if len(children) != len(distances):
460
+ raise ValueError(
461
+ "The number of children must equal the number of distances"
462
+ )
463
+ for i in range(len(children)):
464
+ for j in range(len(children)):
465
+ if i != j and children[i] is children[j]:
466
+ raise TreeError(
467
+ "Two child nodes cannot be the same object"
468
+ )
469
+ self._index = -1
470
+ self._children = tuple(children)
471
+ for child, distance in zip(children, distances):
472
+ child._set_parent(self, distance)
473
+ elif index < 0:
474
+ raise ValueError("Index cannot be negative")
475
+ else:
476
+ # Node is terminal -> has no children
477
+ if children is not None or distances is not None:
478
+ raise TypeError(
479
+ "Reference index and child nodes are mutually exclusive"
480
+ )
481
+ self._index = index
482
+ self._children = None
483
+
484
+ def _set_parent(self, TreeNode parent not None, float distance):
485
+ if self._parent is not None or self._is_root:
486
+ raise TreeError("Node already has a parent")
487
+ self._parent = parent
488
+ self._distance = distance
489
+
490
+ def copy(self):
491
+ """
492
+ copy()
493
+
494
+ Create a deep copy of this :class:`TreeNode`.
495
+
496
+ The copy includes this node, its reference index and deep copies
497
+ of its child nodes.
498
+ The parent node and the distance to it is not included.
499
+ """
500
+ if self.is_leaf():
501
+ return TreeNode(index=self._index)
502
+ else:
503
+ distances = [child.distance for child in self._children]
504
+ children_clones = [child.copy() for child in self._children]
505
+ return TreeNode(children_clones, distances)
506
+
507
+ @property
508
+ def index(self):
509
+ return None if self._index == -1 else self._index
510
+
511
+ @property
512
+ def children(self):
513
+ return self._children
514
+
515
+ @property
516
+ def parent(self):
517
+ return self._parent
518
+
519
+ @property
520
+ def distance(self):
521
+ return None if self._parent is None else self._distance
522
+
523
+ def is_leaf(self):
524
+ """
525
+ is_leaf()
526
+
527
+ Check if the node is a leaf node.
528
+
529
+ Returns
530
+ -------
531
+ is_leaf : bool
532
+ True if the node is a leaf node, false otherwise.
533
+ """
534
+ return False if self._index == -1 else True
535
+
536
+ def is_root(self):
537
+ """
538
+ is_root()
539
+
540
+ Check if the node is a root node.
541
+
542
+ Returns
543
+ -------
544
+ is_root : bool
545
+ True if the node is a root node, false otherwise.
546
+ """
547
+ return bool(self._is_root)
548
+
549
+ def as_root(self):
550
+ """
551
+ as_root()
552
+
553
+ Convert the node into a root node.
554
+
555
+ When a root node is used as `child` parameter in the
556
+ construction of a potential parent node, a :class:`TreeError` is
557
+ raised.
558
+ """
559
+ if self._parent is not None:
560
+ raise TreeError("Node has parent, cannot be a root node")
561
+ self._is_root = True
562
+
563
+ def distance_to(self, TreeNode node, bint topological=False):
564
+ """
565
+ distance_to(node, topological=False)
566
+
567
+ Get the distance of this node to another node.
568
+
569
+ The distance is the sum of all distances from this and the other
570
+ node to the lowest common ancestor.
571
+
572
+ Parameters
573
+ ----------
574
+ node : TreeNode
575
+ The second node for distance calculation.
576
+
577
+
578
+ Returns
579
+ -------
580
+ distance : float
581
+ The distance of this node to `node`.
582
+
583
+ Raises
584
+ ------
585
+ TreeError
586
+ If the nodes have no common ancestor.
587
+
588
+ Examples
589
+ --------
590
+
591
+ >>> leaf1 = TreeNode(index=0)
592
+ >>> leaf2 = TreeNode(index=1)
593
+ >>> leaf3 = TreeNode(index=2)
594
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
595
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
596
+ >>> print(leaf1.distance_to(leaf2))
597
+ 12.0
598
+ >>> print(leaf1.distance_to(leaf3))
599
+ 18.0
600
+ """
601
+ # Sum distances until LCA has been reached
602
+ cdef float distance = 0
603
+ cdef TreeNode current_node = None
604
+ cdef TreeNode lca = self.lowest_common_ancestor(node)
605
+ if lca is None:
606
+ raise TreeError("The nodes do not have a common ancestor")
607
+ current_node = self
608
+ while current_node is not lca:
609
+ if topological:
610
+ distance += 1
611
+ else:
612
+ distance += current_node._distance
613
+ current_node = current_node._parent
614
+ current_node = node
615
+ while current_node is not lca:
616
+ if topological:
617
+ distance += 1
618
+ else:
619
+ distance += current_node._distance
620
+ current_node = current_node._parent
621
+ return distance
622
+
623
+ def lowest_common_ancestor(self, TreeNode node):
624
+ """
625
+ lowest_common_ancestor(node)
626
+
627
+ Get the lowest common ancestor of this node and another node.
628
+
629
+ Parameters
630
+ ----------
631
+ node : TreeNode
632
+ The node to get the lowest common ancestor with.
633
+
634
+ Returns
635
+ -------
636
+ ancestor : TreeNode or None
637
+ The lowest common ancestor. `None` if the nodes have no
638
+ common ancestor, i.e. they are not in the same tree
639
+ """
640
+ cdef int i
641
+ cdef TreeNode lca = None
642
+ # Create two paths from the leaves to root
643
+ cdef list self_path = _create_path_to_root(self)
644
+ cdef list other_path = _create_path_to_root(node)
645
+ # Reverse Iteration through path (beginning from root)
646
+ # until the paths diverge
647
+ for i in range(-1, -min(len(self_path), len(other_path))-1, -1):
648
+ if self_path[i] is other_path[i]:
649
+ # Same node -> common ancestor
650
+ lca = self_path[i]
651
+ else:
652
+ # Different node -> Not common ancestor
653
+ # -> return last common ancewstor found
654
+ break
655
+ return lca
656
+
657
+ def get_indices(self):
658
+ """
659
+ get_indices()
660
+
661
+ Get an array of reference indices that leaf nodes of this node
662
+ contain.
663
+
664
+ This method identifies all leaf nodes, which have this node as
665
+ ancestor and puts the contained indices into an array.
666
+ If this node is a leaf node itself, the array contains the
667
+ reference index of this node as single element.
668
+
669
+ Returns
670
+ -------
671
+ indices : ndarray, dtype=int32
672
+ The reference indices of direct and indirect child leaf
673
+ nodes of this node.
674
+
675
+ Examples
676
+ --------
677
+
678
+ >>> leaf0 = TreeNode(index=0)
679
+ >>> leaf1 = TreeNode(index=1)
680
+ >>> leaf2 = TreeNode(index=2)
681
+ >>> leaf3 = TreeNode(index=3)
682
+ >>> intr0 = TreeNode([leaf0, leaf2], [0, 0])
683
+ >>> intr1 = TreeNode([leaf1, leaf3], [0, 0])
684
+ >>> root = TreeNode([intr0, intr1], [0, 0])
685
+ >>> print(leaf0.get_indices())
686
+ [0]
687
+ >>> print(intr0.get_indices())
688
+ [0 2]
689
+ >>> print(intr1.get_indices())
690
+ [1 3]
691
+ >>> print(root.get_indices())
692
+ [0 2 1 3]
693
+ """
694
+ cdef TreeNode leaf
695
+ return np.array(
696
+ [leaf._index for leaf in self.get_leaves()], dtype=np.int32
697
+ )
698
+
699
+ def get_leaves(self):
700
+ """
701
+ get_leaves()
702
+
703
+ Get a list of leaf nodes that are direct or indirect child nodes
704
+ of this node.
705
+
706
+ This method identifies all leaf nodes, which have this node as
707
+ ancestor.
708
+ If this node is a leaf node itself, the list contains this node
709
+ as single element.
710
+
711
+ Returns
712
+ -------
713
+ leaf_nodes : list
714
+ The leaf nodes, that are direct or indirect child nodes
715
+ of this node.
716
+ """
717
+ cdef list leaf_list = []
718
+ # delegate to 'cdef' method
719
+ # to reduce overhead of recursive function calling
720
+ _get_leaves(self, leaf_list)
721
+ return leaf_list
722
+
723
+ def get_leaf_count(self):
724
+ """"
725
+ get_leaf_count()
726
+
727
+ Get the number of direct or indirect leaves of this ńode.
728
+
729
+ This method identifies all leaf nodes, which have this node as
730
+ ancestor.
731
+ If this node is a leaf node itself, 1 is returned.
732
+ """
733
+ return _get_leaf_count(self)
734
+
735
+ def to_newick(self, labels=None, bint include_distance=True,
736
+ round_distance=None):
737
+ """
738
+ to_newick(labels=None, include_distance=True)
739
+
740
+ Obtain the node represented in Newick notation.
741
+
742
+ The terminal semicolon is not included.
743
+
744
+ Parameters
745
+ ----------
746
+ labels : iterable object of str
747
+ The labels the indices in the leaf nodes refer to
748
+ include_distance : bool
749
+ If true, the distances are displayed in the newick notation,
750
+ otherwise they are omitted.
751
+ round_distance : int, optional
752
+ If set, the distances are rounded to the given number of
753
+ digits.
754
+
755
+ Returns
756
+ -------
757
+ newick : str
758
+ The Newick notation of the node.
759
+
760
+ Examples
761
+ --------
762
+
763
+ >>> leaf1 = TreeNode(index=0)
764
+ >>> leaf2 = TreeNode(index=1)
765
+ >>> leaf3 = TreeNode(index=2)
766
+ >>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
767
+ >>> root = TreeNode([inter, leaf3], [3.0, 10.0])
768
+ >>> print(root.to_newick())
769
+ ((0:5.0,1:7.0):3.0,2:10.0):0.0
770
+ >>> print(root.to_newick(include_distance=False))
771
+ ((0,1),2)
772
+ >>> labels = ["foo", "bar", "foobar"]
773
+ >>> print(root.to_newick(labels=labels, include_distance=False))
774
+ ((foo,bar),foobar)
775
+ """
776
+ if self.is_leaf():
777
+ if labels is not None:
778
+ for label in labels:
779
+ label = labels[self._index]
780
+ # Characters that are part of the Newick syntax
781
+ # are illegal
782
+ illegal_chars = [",",":",";","(",")"]
783
+ for char in illegal_chars:
784
+ if char in label:
785
+ raise ValueError(
786
+ f"Label '{label}' contains "
787
+ f"illegal character '{char}'"
788
+ )
789
+ else:
790
+ label = str(self._index)
791
+ if include_distance:
792
+ if round_distance is None:
793
+ return f"{label}:{self._distance}"
794
+ else:
795
+ return f"{label}:{self._distance:.{round_distance}f}"
796
+ else:
797
+ return f"{label}"
798
+ else:
799
+ # Build string in a recursive way
800
+ child_strings = [child.to_newick(
801
+ labels, include_distance, round_distance
802
+ ) for child in self._children]
803
+ if include_distance:
804
+ if round_distance is None:
805
+ return f"({','.join(child_strings)}):{self._distance}"
806
+ else:
807
+ return (
808
+ f"({','.join(child_strings)}):"
809
+ f"{self._distance:.{round_distance}f}"
810
+ )
811
+ else:
812
+ return f"({','.join(child_strings)})"
813
+
814
+ @staticmethod
815
+ def from_newick(str newick, list labels=None):
816
+ """
817
+ from_newick(newick, labels=None)
818
+
819
+ Create a node and all its child nodes from a Newick notation.
820
+
821
+ Parameters
822
+ ----------
823
+ newick : str
824
+ The Newick notation to create the node from.
825
+ labels : list of str, optional
826
+ If the Newick notation contains labels, that are not
827
+ parseable into reference indices,
828
+ i.e. they are not integers, this parameter can be provided
829
+ to convert these labels into reference indices.
830
+ The corresponding index is the position of the label in the
831
+ provided list.
832
+
833
+ Returns
834
+ -------
835
+ node : TreeNode
836
+ The tree node parsed from the Newick notation.
837
+ distance : float
838
+ Distance of the node to its parent. If the newick notation
839
+ does not provide a distance, it is set to 0 by default.
840
+
841
+ Notes
842
+ -----
843
+ The provided Newick notation must not have a terminal semicolon.
844
+ If you have a Newick notation that covers an entire tree, you
845
+ may use the same method in the :class:`Tree` class instead.
846
+ Keep in mind that the :class:`TreeNode` class does support any
847
+ labels on intermediate nodes.
848
+ If the string contains such labels, they are discarded.
849
+ """
850
+ cdef int i
851
+ cdef int subnewick_start_i = -1
852
+ cdef int subnewick_stop_i = -1
853
+ cdef int level = 0
854
+ cdef list comma_pos
855
+ cdef list children
856
+ cdef list distances
857
+ cdef int pos
858
+ cdef int next_pos
859
+
860
+ # Ignore any whitespace
861
+ newick = "".join(newick.split())
862
+
863
+ # Find brackets belonging to sub-newick
864
+ # e.g. (A:0.1,B:0.2):0.5
865
+ # ^ ^
866
+ for i in range(len(newick)):
867
+ char = newick[i]
868
+ if char == "(":
869
+ subnewick_start_i = i
870
+ break
871
+ if char == ")":
872
+ raise InvalidFileError("Bracket closed before it was opened")
873
+ for i in reversed(range(len(newick))):
874
+ char = newick[i]
875
+ if char == ")":
876
+ subnewick_stop_i = i+1
877
+ break
878
+ if char == "(":
879
+ raise InvalidFileError("Bracket was opened but not closed")
880
+
881
+ if subnewick_start_i == -1 and subnewick_stop_i == -1:
882
+ # No brackets -> no sub-newwick -> Leaf node
883
+ label_and_distance = newick
884
+ try:
885
+ label, distance = label_and_distance.split(":")
886
+ distance = float(distance)
887
+ except ValueError:
888
+ # No colon -> No distance is provided
889
+ distance = 0
890
+ label = label_and_distance
891
+ index = int(label) if labels is None else labels.index(label)
892
+ return TreeNode(index=index), distance
893
+
894
+ else:
895
+ # Intermediate node
896
+ if subnewick_stop_i == len(newick):
897
+ # Node with neither distance nor label
898
+ label = None
899
+ distance = 0
900
+ else:
901
+ label_and_distance = newick[subnewick_stop_i:]
902
+ try:
903
+ label, distance = label_and_distance.split(":")
904
+ distance = float(distance)
905
+ except ValueError:
906
+ # No colon -> No distance is provided
907
+ distance = 0
908
+ label = label_and_distance
909
+ # Label of intermediate nodes is discarded
910
+ distance = float(distance)
911
+
912
+ subnewick = newick[subnewick_start_i+1 : subnewick_stop_i-1]
913
+ if len(subnewick) == 0:
914
+ raise InvalidFileError(
915
+ "Intermediate node must at least have one child"
916
+ )
917
+ # Parse childs
918
+ # Split subnewick at ',' if ',' is at current level
919
+ # (not in a subsubnewick)
920
+ comma_pos = []
921
+ for i, char in enumerate(subnewick):
922
+ if char == "(":
923
+ level += 1
924
+ elif char == ")":
925
+ level -= 1
926
+ elif char == ",":
927
+ if level == 0:
928
+ comma_pos.append(i)
929
+ if level < 0:
930
+ raise InvalidFileError(
931
+ "Bracket closed before it was opened"
932
+ )
933
+
934
+ children = []
935
+ distances = []
936
+ # Recursive tree construction
937
+ for i, pos in enumerate(comma_pos):
938
+ if i == 0:
939
+ # (A,B),(C,D),(E,F)
940
+ # -----
941
+ child, dist = TreeNode.from_newick(
942
+ subnewick[:pos], labels=labels
943
+ )
944
+ else:
945
+ # (A,B),(C,D),(E,F)
946
+ # -----
947
+ prev_pos = comma_pos[i-1]
948
+ child, dist = TreeNode.from_newick(
949
+ subnewick[prev_pos+1 : pos], labels=labels
950
+ )
951
+ children.append(child)
952
+ distances.append(dist)
953
+ # Node after last comma
954
+ # (A,B),(C,D),(E,F)
955
+ # -----
956
+ if len(comma_pos) != 0:
957
+ child, dist = TreeNode.from_newick(
958
+ subnewick[comma_pos[-1]+1:], labels=labels
959
+ )
960
+ else:
961
+ # Single child node:
962
+ child, dist = TreeNode.from_newick(
963
+ subnewick, labels=labels
964
+ )
965
+ children.append(child)
966
+ distances.append(dist)
967
+ return TreeNode(children, distances), distance
968
+
969
+ def __str__(self):
970
+ return self.to_newick()
971
+
972
+ def __eq__(self, item):
973
+ if not isinstance(item, TreeNode):
974
+ return False
975
+ cdef TreeNode node = item
976
+ if self._distance != node._distance:
977
+ return False
978
+ if self._index !=-1:
979
+ if self._index != node._index:
980
+ return False
981
+ else:
982
+ if frozenset(self._children) != frozenset(node._children):
983
+ return False
984
+ return True
985
+
986
+ def __hash__(self):
987
+ # Order of children is not important -> set
988
+ children_set = frozenset(self._children) \
989
+ if self._children is not None else None
990
+ return hash((self._index, children_set, self._distance))
991
+
992
+
993
+ cdef _get_leaves(TreeNode node, list leaf_list):
994
+ cdef TreeNode child
995
+ if node._index == -1:
996
+ # Intermediate node -> Recursive calls
997
+ for child in node._children:
998
+ _get_leaves(child, leaf_list)
999
+ else:
1000
+ # Node itself is leaf node -> add node -> terminate
1001
+ leaf_list.append(node)
1002
+
1003
+
1004
+ cdef int _get_leaf_count(TreeNode node):
1005
+ cdef TreeNode child
1006
+ cdef int count = 0
1007
+ if node._index == -1:
1008
+ # Intermediate node -> Recursive calls
1009
+ for child in node._children:
1010
+ count += _get_leaf_count(child)
1011
+ return count
1012
+ else:
1013
+ # Leaf node -> return count of itself = 1
1014
+ return 1
1015
+
1016
+
1017
+ cdef list _create_path_to_root(TreeNode node):
1018
+ """
1019
+ Create a list of nodes representing the path from this node to the
1020
+ specified node
1021
+ """
1022
+ cdef list path = []
1023
+ cdef TreeNode current_node = node
1024
+ while current_node is not None:
1025
+ path.append(current_node)
1026
+ current_node = current_node._parent
1027
+ return path
1028
+
1029
+
1030
+
1031
+ def as_binary(tree_or_node):
1032
+ """
1033
+ as_binary(tree_or_node)
1034
+
1035
+ Convert a tree into a binary tree.
1036
+
1037
+ In general a :class:`TreeNode` can have more or less than two
1038
+ children.
1039
+ However guide trees usually expect each intermediate node to have
1040
+ exactly two child nodes.
1041
+ This function creates a binary :class:`Tree` (or :class:`TreeNode`)
1042
+ for the given :class:`Tree` (or :class:`TreeNode`):
1043
+ Intermediate nodes that have only a single child are deleted and its
1044
+ parent node is directly connected to its child node.
1045
+ Intermediate nodes that have more than two childs are divided into
1046
+ multiple nodes (distances are preserved).
1047
+
1048
+ Parameters
1049
+ ----------
1050
+ tree_or_node : Tree or TreeNode
1051
+ The tree or node to be converted into a binary tree or node.
1052
+
1053
+ Returns
1054
+ -------
1055
+ binary_tree_or_node : Tree or TreeNode
1056
+ The converted tree or node.
1057
+ """
1058
+ if isinstance(tree_or_node, Tree):
1059
+ node, _ = _as_binary(tree_or_node.root)
1060
+ return Tree(node)
1061
+ elif isinstance(tree_or_node, TreeNode):
1062
+ node, _ = _as_binary(tree_or_node)
1063
+ return _as_binary(node)
1064
+ else:
1065
+ raise TypeError(
1066
+ f"Expected 'Tree' or 'TreeNode', not {type(tree_or_node).__name__}"
1067
+ )
1068
+
1069
+ cdef _as_binary(TreeNode node):
1070
+ """
1071
+ The actual logic wrapped by :func:`as_binary()`.
1072
+
1073
+ Parameters
1074
+ ----------
1075
+ node : TreeNode
1076
+ The node to be converted.
1077
+
1078
+ Returns
1079
+ -------
1080
+ binary_node: TreeNode
1081
+ The converted node.
1082
+ distance : float
1083
+ The distance of the converted node to its parent
1084
+ """
1085
+ cdef TreeNode child
1086
+ cdef TreeNode current_div_node
1087
+ cdef tuple children
1088
+ cdef list rem_children
1089
+ cdef list distances
1090
+ cdef float distance
1091
+
1092
+ children = node.children
1093
+ if children is None:
1094
+ # Leaf node
1095
+ return TreeNode(index=node.index), node.distance
1096
+ elif len(children) == 1:
1097
+ # Intermediate node with one child
1098
+ # -> Omit node and directly connect its child to its parent
1099
+ # The distances are added
1100
+ #
1101
+ # |-- |--
1102
+ # | |
1103
+ # --|--|-- -> ----|--
1104
+ # | |
1105
+ # |-- |--
1106
+ #
1107
+ child, distance = _as_binary(node.children[0])
1108
+ if node.is_root():
1109
+ # Child is new root -> No distance to parent
1110
+ return child, None
1111
+ else:
1112
+ return child, node.distance + distance
1113
+ elif len(children) > 2:
1114
+ # Intermediate node with more than two childs
1115
+ # -> Create a new node having two childs:
1116
+ # - One of the childs of the original node
1117
+ # - The original node with one child less (distance = 0)
1118
+ # Repeat until all children are put into binary nodes
1119
+ #
1120
+ # |-- |--
1121
+ # | --| |--
1122
+ # --|-- -> |--|
1123
+ # | |--
1124
+ # |--
1125
+ #
1126
+ # The remaining children
1127
+ rem_children, distances = [list(tup) for tup in zip(
1128
+ *[_as_binary(child) for child in children]
1129
+ )]
1130
+ current_div_node = None
1131
+ while len(rem_children) > 0:
1132
+ if current_div_node is None:
1133
+ # The bottom-most node is created
1134
+ #-> Gets two of the remaining childs
1135
+ current_div_node = TreeNode(
1136
+ rem_children[:2],
1137
+ distances[:2]
1138
+ )
1139
+ # Pop the two utilized remaining childs from the list
1140
+ rem_children.pop(0)
1141
+ rem_children.pop(0)
1142
+ distances.pop(0)
1143
+ distances.pop(0)
1144
+ else:
1145
+ # A node is created that gets one remaining child
1146
+ # and the intermediate node from the last step
1147
+ current_div_node = TreeNode(
1148
+ (current_div_node, rem_children[0]),
1149
+ (0, distances[0])
1150
+ )
1151
+ # Pop the utilized remaining child from the list
1152
+ rem_children.pop(0)
1153
+ distances.pop(0)
1154
+ return current_div_node, node.distance
1155
+ else:
1156
+ # Intermediate node with exactly two childs
1157
+ # -> Keep node unchanged
1158
+ binary_children, distances = [list(tup) for tup in zip(
1159
+ *[_as_binary(child) for child in children]
1160
+ )]
1161
+ return TreeNode(binary_children, distances), node.distance
1162
+
1163
+
1164
+
1165
+ class TreeError(Exception):
1166
+ """
1167
+ An exception that occurs in context of tree topology.
1168
+ """
1169
+ pass