phylo2vec 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/Cargo.lock +1 -1
  2. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/PKG-INFO +46 -4
  3. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/README.md +39 -2
  4. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/ancestry.py +3 -3
  5. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/base/edges.py +2 -2
  6. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/newick.py +1 -1
  7. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/pairs.py +4 -5
  8. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/writer.py +1 -1
  9. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/metrics/pairwise.py +14 -6
  10. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/opt/__init__.py +2 -1
  11. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/_base.py +55 -12
  12. phylo2vec-1.3.0/phylo2vec/opt/_gradme.py +200 -0
  13. phylo2vec-1.3.0/phylo2vec/opt/_gradme_losses.py +117 -0
  14. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/opt/_hc.py +29 -25
  15. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/_hc_losses.py +4 -4
  16. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/mod.rs +63 -1
  17. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/vector.rs +199 -23
  18. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/utils/newick.py +4 -4
  19. phylo2vec-1.3.0/phylo2vec/utils/vector.py +258 -0
  20. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/Cargo.toml +1 -1
  21. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/README.md +39 -2
  22. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/ancestry.py +3 -3
  23. {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/base/edges.py +2 -2
  24. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/newick.py +1 -1
  25. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/pairs.py +4 -5
  26. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/writer.py +1 -1
  27. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/metrics/pairwise.py +14 -6
  28. {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/opt/__init__.py +2 -1
  29. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/_base.py +55 -12
  30. phylo2vec-1.3.0/py-phylo2vec/phylo2vec/opt/_gradme.py +200 -0
  31. phylo2vec-1.3.0/py-phylo2vec/phylo2vec/opt/_gradme_losses.py +117 -0
  32. {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/opt/_hc.py +29 -25
  33. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/_hc_losses.py +4 -4
  34. {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/utils/newick.py +4 -4
  35. phylo2vec-1.3.0/py-phylo2vec/phylo2vec/utils/vector.py +258 -0
  36. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/src/lib.rs +12 -0
  37. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_metrics.py +23 -1
  38. phylo2vec-1.3.0/py-phylo2vec/tests/test_reorder.py +154 -0
  39. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/pyproject.toml +9 -2
  40. phylo2vec-1.2.0/phylo2vec/utils/vector.py +0 -365
  41. phylo2vec-1.2.0/py-phylo2vec/phylo2vec/utils/vector.py +0 -365
  42. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/Cargo.toml +0 -0
  43. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/LICENSE +0 -0
  44. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/Cargo.toml +0 -0
  45. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/README.md +0 -0
  46. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/__init__.py +0 -0
  47. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/__init__.py +0 -0
  48. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/bench_main.rs +0 -0
  49. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/benchmarks/core.rs +0 -0
  50. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/benchmarks/mod.rs +0 -0
  51. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/__init__.py +0 -0
  52. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/_base.py +0 -0
  53. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/data/__init__.py +0 -0
  54. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/__init__.py +0 -0
  55. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/fluA.md +0 -0
  56. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/h3n2.md +0 -0
  57. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/m501.md +0 -0
  58. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/yeast.md +0 -0
  59. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/zika.md +0 -0
  60. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/__init__.py +0 -0
  61. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/_validation.py +0 -0
  62. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/reader.py +0 -0
  63. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/metrics/__init__.py +0 -0
  64. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/README.md +0 -0
  65. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/lib.rs +0 -0
  66. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/main.rs +0 -0
  67. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/profile_main.rs +0 -0
  68. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/mod.rs +0 -0
  69. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/avl.rs +0 -0
  70. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/matrix/mod.rs +0 -0
  71. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/newick/mod.rs +0 -0
  72. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/newick/newick_patterns.rs +0 -0
  73. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/types.rs +0 -0
  74. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/utils.rs +0 -0
  75. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/utils/__init__.py +0 -0
  76. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/utils/matrix.py +0 -0
  77. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/LICENSE +0 -0
  78. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/benchmarks/test_bench.py +0 -0
  79. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/__init__.py +0 -0
  80. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/__init__.py +0 -0
  81. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/__init__.py +0 -0
  82. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/_base.py +0 -0
  83. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/data/__init__.py +0 -0
  84. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/__init__.py +0 -0
  85. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/fluA.md +0 -0
  86. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/h3n2.md +0 -0
  87. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/m501.md +0 -0
  88. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/yeast.md +0 -0
  89. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/zika.md +0 -0
  90. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/__init__.py +0 -0
  91. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/_validation.py +0 -0
  92. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/reader.py +0 -0
  93. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/metrics/__init__.py +0 -0
  94. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/README.md +0 -0
  95. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/utils/__init__.py +0 -0
  96. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/utils/matrix.py +0 -0
  97. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/__init__.py +0 -0
  98. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/config.py +0 -0
  99. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_base.py +0 -0
  100. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_io.py +0 -0
  101. {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_utils.py +0 -0
@@ -544,7 +544,7 @@ dependencies = [
544
544
 
545
545
  [[package]]
546
546
  name = "py-phylo2vec"
547
- version = "1.2.0"
547
+ version = "1.3.0"
548
548
  dependencies = [
549
549
  "phylo2vec",
550
550
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phylo2vec
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
6
6
  Classifier: Programming Language :: Python
@@ -11,12 +11,17 @@ Classifier: Programming Language :: Python :: 3.12
11
11
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
12
12
  Classifier: Operating System :: Unix
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
- Requires-Dist: numba>=0.56.4
15
14
  Requires-Dist: numpy>=1.22,<2.1
16
15
  Requires-Dist: biopython>=1.84.0
17
16
  Requires-Dist: joblib>=1.2.0
18
17
  Requires-Dist: ete3==3.1.3
19
18
  Requires-Dist: six>=1.16.0
19
+ Requires-Dist: jax ; extra == 'opt'
20
+ Requires-Dist: joblib ; extra == 'opt'
21
+ Requires-Dist: optax ; extra == 'opt'
22
+ Requires-Dist: rpy2 ; extra == 'opt'
23
+ Requires-Dist: tqdm ; extra == 'opt'
24
+ Provides-Extra: opt
20
25
  License-File: LICENSE
21
26
  Summary: Phylo2Vec: integer vector representation of binary (phylogenetic) trees
22
27
  Author: Neil Scheidwasser <neil.clow@sund.ku.dk>
@@ -85,8 +90,10 @@ in Rust.
85
90
 
86
91
  ### Installing R package
87
92
 
88
- To install the R package, first you need to retrieve one of the compiled file
89
- within the package [releases](https://github.com/sbhattlab/phylo2vec/releases).
93
+ #### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
94
+
95
+ Retrieve one of the compiled binaries from the
96
+ [releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
90
97
  Once the file is downloaded, simply run `install.packages` in your R command
91
98
  line.
92
99
 
@@ -94,6 +101,41 @@ line.
94
101
  install.packages("/path/to/package_file", repos = NULL, type = 'source')
95
102
  ```
96
103
 
104
+ #### Option 2: using `devtools`
105
+
106
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
107
+ build the core package.
108
+
109
+ ```R
110
+ devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
111
+ ```
112
+
113
+ Note: to download a specific version, use:
114
+
115
+ ```R
116
+ devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
117
+ ```
118
+
119
+ #### Option 3: manual installation
120
+
121
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
122
+ build the core package.
123
+
124
+ Clone the repository and run the following `install.packages` in your R command
125
+ line.
126
+
127
+ Note: to download a specific version, you can use `git checkout` to a desired
128
+ tag.
129
+
130
+ ```bash
131
+ git clone https://github.com/sbhattlab/phylo2vec
132
+ cd phylo2vec
133
+ ```
134
+
135
+ ```R
136
+ install.packages("./r-phylo2vec", repos = NULL, type = 'source')
137
+ ```
138
+
97
139
  ## Basic Usage
98
140
 
99
141
  ### Python
@@ -58,8 +58,10 @@ in Rust.
58
58
 
59
59
  ### Installing R package
60
60
 
61
- To install the R package, first you need to retrieve one of the compiled file
62
- within the package [releases](https://github.com/sbhattlab/phylo2vec/releases).
61
+ #### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
62
+
63
+ Retrieve one of the compiled binaries from the
64
+ [releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
63
65
  Once the file is downloaded, simply run `install.packages` in your R command
64
66
  line.
65
67
 
@@ -67,6 +69,41 @@ line.
67
69
  install.packages("/path/to/package_file", repos = NULL, type = 'source')
68
70
  ```
69
71
 
72
+ #### Option 2: using `devtools`
73
+
74
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
75
+ build the core package.
76
+
77
+ ```R
78
+ devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
79
+ ```
80
+
81
+ Note: to download a specific version, use:
82
+
83
+ ```R
84
+ devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
85
+ ```
86
+
87
+ #### Option 3: manual installation
88
+
89
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
90
+ build the core package.
91
+
92
+ Clone the repository and run the following `install.packages` in your R command
93
+ line.
94
+
95
+ Note: to download a specific version, you can use `git checkout` to a desired
96
+ tag.
97
+
98
+ ```bash
99
+ git clone https://github.com/sbhattlab/phylo2vec
100
+ cd phylo2vec
101
+ ```
102
+
103
+ ```R
104
+ install.packages("./r-phylo2vec", repos = NULL, type = 'source')
105
+ ```
106
+
70
107
  ## Basic Usage
71
108
 
72
109
  ### Python
@@ -20,7 +20,7 @@ def from_ancestry(ancestry: np.ndarray) -> np.ndarray:
20
20
 
21
21
  Returns
22
22
  -------
23
- numpy.array
23
+ numpy.ndarray
24
24
  Phylo2Vec vector
25
25
  """
26
26
  v_list = core.from_ancestry(ancestry)
@@ -50,12 +50,12 @@ def to_ancestry(v: np.ndarray) -> np.ndarray:
50
50
 
51
51
  Parameters
52
52
  ----------
53
- v : numpy.array
53
+ v : numpy.ndarray
54
54
  Phylo2Vec vector
55
55
 
56
56
  Returns
57
57
  -------
58
- ancestry : numpy.array
58
+ ancestry : numpy.ndarray
59
59
  Ancestry matrix
60
60
  1st column: child 1
61
61
  2nd column: child 2
@@ -21,7 +21,7 @@ def from_edges(edges: List[Tuple[int, int]]) -> np.ndarray:
21
21
 
22
22
  Returns
23
23
  -------
24
- v : numpy.array
24
+ v : numpy.ndarray
25
25
  Phylo2Vec vector
26
26
  """
27
27
  v = core.from_edges(edges)
@@ -35,7 +35,7 @@ def to_edges(v: np.ndarray) -> List[Tuple[int, int]]:
35
35
 
36
36
  Parameters
37
37
  ----------
38
- v : numpy.array
38
+ v : numpy.ndarray
39
39
  Phylo2Vec vector
40
40
 
41
41
  Returns
@@ -31,7 +31,7 @@ def to_newick(vector_or_matrix: np.ndarray) -> str:
31
31
 
32
32
  Parameters
33
33
  ----------
34
- vector_or_matrix : numpy.array
34
+ vector_or_matrix : numpy.ndarray
35
35
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
36
36
 
37
37
  Returns
@@ -11,10 +11,9 @@ from typing import List, Tuple
11
11
  def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
12
12
  """Convert a list of pairs to a Phylo2Vec vector
13
13
 
14
- Each pair is represented as a list of pairs (B, L)
14
+ Each pair is represented as a tuple (B, L)
15
15
  indicating that leaf L descends from branch B.
16
16
 
17
-
18
17
  Parameters
19
18
  ----------
20
19
  pairs : List[Tuple[int, int]]
@@ -22,7 +21,7 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
22
21
 
23
22
  Returns
24
23
  -------
25
- v : numpy.array
24
+ v : numpy.ndarray
26
25
  Phylo2Vec vector
27
26
  """
28
27
  v = core.from_pairs(pairs)
@@ -32,12 +31,12 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
32
31
  def to_pairs(v: np.ndarray) -> List[Tuple[int, int]]:
33
32
  """Convert a Phylo2Vec vector to a list of pairs
34
33
 
35
- Each pair is represented as a list of pairs (B, L)
34
+ Each pair is represented as a tuple (B, L)
36
35
  indicating that leaf L descends from branch B.
37
36
 
38
37
  Parameters
39
38
  ----------
40
- v : numpy.array
39
+ v : numpy.ndarray
41
40
  Phylo2Vec vector
42
41
 
43
42
  Returns
@@ -44,7 +44,7 @@ def save_newick(
44
44
 
45
45
  Parameters
46
46
  ----------
47
- vector_or_matrix : numpy.array
47
+ vector_or_matrix : numpy.ndarray
48
48
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
49
49
  filepath : str
50
50
  Path to the output file
@@ -4,6 +4,7 @@ import warnings
4
4
  import numpy as np
5
5
 
6
6
  from phylo2vec import _phylo2vec_core as core
7
+ from phylo2vec.utils.matrix import check_matrix
7
8
  from phylo2vec.utils.vector import check_vector
8
9
 
9
10
 
@@ -14,7 +15,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
14
15
 
15
16
  Parameters
16
17
  ----------
17
- vector_or_matrix : numpy.array
18
+ vector_or_matrix : numpy.ndarray
18
19
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
19
20
 
20
21
  Returns
@@ -46,7 +47,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
46
47
  PAIRWISE_DISTANCES = {"cophenetic": cophenetic_distances}
47
48
 
48
49
 
49
- def pairwise_distances(v, metric="cophenetic"):
50
+ def pairwise_distances(vector_or_matrix, metric="cophenetic"):
50
51
  """
51
52
  Compute a pairwise distance matrix
52
53
  for tree nodes from a Phylo2Vec vector.
@@ -55,8 +56,8 @@ def pairwise_distances(v, metric="cophenetic"):
55
56
 
56
57
  Parameters
57
58
  ----------
58
- v : numpy.ndarray
59
- Phylo2Vec vector
59
+ vector_or_matrix : numpy.ndarray
60
+ Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
60
61
  metric : str, optional
61
62
  Pairwise distance metric, by default "cophenetic"
62
63
 
@@ -65,8 +66,15 @@ def pairwise_distances(v, metric="cophenetic"):
65
66
  numpy.ndarray
66
67
  Distance matrix
67
68
  """
68
- check_vector(v)
69
+ if vector_or_matrix.ndim == 2:
70
+ check_matrix(vector_or_matrix)
71
+ elif vector_or_matrix.ndim == 1:
72
+ check_vector(vector_or_matrix)
73
+ else:
74
+ raise ValueError(
75
+ "vector_or_matrix should either be a vector (ndim == 1) or matrix (ndim == 2)"
76
+ )
69
77
 
70
78
  func = PAIRWISE_DISTANCES[metric]
71
79
 
72
- return func(v)
80
+ return func(vector_or_matrix)
@@ -1,6 +1,7 @@
1
1
  """Phylo2Vec-based optimisation methods."""
2
2
 
3
3
  from ._base import BaseOptimizer
4
+ from ._gradme import GradMEOptimizer
4
5
  from ._hc import HillClimbingOptimizer
5
6
 
6
- __all__ = ["BaseOptimizer", "HillClimbingOptimizer"]
7
+ __all__ = ["BaseOptimizer", "GradMEOptimizer", "HillClimbingOptimizer"]
@@ -1,16 +1,46 @@
1
1
  """Base class for all optimisation methods in Phylo2Vec."""
2
2
 
3
+ import multiprocessing
3
4
  import random
5
+ import time
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, List, Final
4
9
 
5
- import numba as nb
6
10
  import numpy as np
7
11
 
8
12
  from phylo2vec.datasets import read_fasta
9
13
  from phylo2vec.utils.vector import sample_vector
10
14
 
15
+ # Multiprocessing
16
+ DEFAULT_N_JOBS: Final = multiprocessing.cpu_count() // 4
17
+ MIN_N_JOBS: Final = 4
18
+ # Seeding
11
19
  MAX_SEED = 42
12
20
 
13
21
 
22
+ @dataclass
23
+ class BaseResult:
24
+ """Result of the optimization process.
25
+
26
+ Attributes
27
+ ----------
28
+ v_opt : numpy.ndarray
29
+ Optimized phylo2vec vector.
30
+ label_mapping : Dict[int, str]
31
+ Mapping of leaf labels (integer) to taxa.
32
+ best_score : float
33
+ The best score achieved during optimization.
34
+ scores : List[float]
35
+ List of scores obtained during optimization.
36
+ """
37
+
38
+ v: np.ndarray
39
+ label_mapping: Dict[int, str]
40
+ best_score: float
41
+ scores: List[float]
42
+
43
+
14
44
  class BaseOptimizer:
15
45
  """
16
46
  Base class for all phylo2vec-based optimizers
@@ -21,25 +51,28 @@ class BaseOptimizer:
21
51
  Random seed, by default None
22
52
  """
23
53
 
24
- def __init__(self, random_seed=None):
54
+ def __init__(self, random_seed=None, verbose=False, n_jobs=None):
25
55
  self.random_seed = (
26
56
  random.randint(0, MAX_SEED) if random_seed is None else random_seed
27
57
  )
28
58
  random.seed(self.random_seed)
29
59
  np.random.seed(self.random_seed)
30
60
 
61
+ self.verbose = verbose
62
+
63
+ self.n_jobs = self._infer_n_jobs(n_jobs)
64
+
31
65
  @staticmethod
32
- def _make_label_mapping(records):
33
- label_mapping = nb.typed.Dict.empty(
34
- key_type=nb.types.int64, value_type=nb.types.unicode_type
35
- )
66
+ def _infer_n_jobs(n_jobs=None):
67
+ return n_jobs or max(MIN_N_JOBS, DEFAULT_N_JOBS)
36
68
 
37
- for i, r in enumerate(records):
38
- label_mapping[i] = r.id.replace(" ", ".")
69
+ @staticmethod
70
+ def _make_label_mapping(records):
71
+ label_mapping = dict(enumerate(r.id.replace(" ", ".") for r in records))
39
72
 
40
73
  return label_mapping
41
74
 
42
- def fit(self, fasta_path):
75
+ def fit(self, fasta_path) -> BaseResult:
43
76
  """Fit an optimizer to a fasta file
44
77
 
45
78
  Parameters
@@ -51,7 +84,7 @@ class BaseOptimizer:
51
84
  -------
52
85
  v_opt : numpy.ndarray
53
86
  Optimized phylo2vec vector
54
- label_mapping : dict[int, str]
87
+ label_mapping : List[str]
55
88
  Mapping of leaf labels (integer) to taxa
56
89
  losses : array-like
57
90
  List/Array of collected losses
@@ -67,9 +100,19 @@ class BaseOptimizer:
67
100
 
68
101
  v_init = sample_vector(n_leaves)
69
102
 
70
- v_opt, label_mapping, losses = self._optimise(fasta_path, v_init, label_mapping)
103
+ start_time = time.time()
104
+
105
+ result = self._optimise(fasta_path, v_init, label_mapping)
106
+
107
+ end_time = time.time()
108
+
109
+ if self.verbose:
110
+ print(
111
+ f"Optimisation finished in {end_time - start_time:.2f} seconds "
112
+ f"with {len(result.scores)} loss evaluations."
113
+ )
71
114
 
72
- return v_opt, label_mapping, losses
115
+ return result
73
116
 
74
117
  def _optimise(self, fasta_path, v, label_mapping):
75
118
  raise NotImplementedError
@@ -0,0 +1,200 @@
1
+ """Methods for GradME optimisation."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import jax.numpy as jnp
6
+ import optax
7
+ import rpy2
8
+ import rpy2.robjects as ro
9
+
10
+ from jax import jit, value_and_grad
11
+ from rpy2.robjects import pandas2ri
12
+ from rpy2.robjects.conversion import localconverter
13
+ from rpy2.robjects.packages import importr
14
+ from tqdm import tqdm
15
+
16
+ from phylo2vec.opt._base import BaseOptimizer, BaseResult
17
+ from phylo2vec.opt._gradme_losses import gradme_loss
18
+ from phylo2vec.utils.vector import queue_shuffle, reroot_at_random
19
+
20
+
21
+ # Disable rpy2 warning
22
+ rpy2.rinterface_lib.callbacks.consolewrite_warnerror = lambda *args: None
23
+
24
+
25
+ @dataclass
26
+ class GradMEResult(BaseResult):
27
+ """Result of the GradME optimization.
28
+
29
+ See BaseResult for more details.
30
+
31
+ Attributes
32
+ ----------
33
+ W : jax.numpy.ndarray
34
+ The optimized weight matrix representing the phylogenetic tree.
35
+ """
36
+
37
+ W: jnp.ndarray
38
+
39
+
40
+ class GradMEOptimizer(BaseOptimizer):
41
+ """GradME Optimizer for phylogenetic trees.
42
+
43
+ This optimizer uses the GradME algorithm to optimize phylogenetic trees.
44
+ It computes the loss using the GradME loss function and updates the tree
45
+ representation accordingly.
46
+
47
+ Parameters
48
+ ----------
49
+ random_seed : int, optional
50
+ Random seed for reproducibility, by default None
51
+ n_jobs : int, optional
52
+ Number of parallel jobs, by default None
53
+ verbose : bool, optional
54
+ Verbosity level, by default False
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ model,
60
+ solver="adafactor",
61
+ learning_rate=1.5,
62
+ rooted=False,
63
+ n_shuffles=100,
64
+ n_iter_per_step=5000,
65
+ tol=1e-8,
66
+ random_seed=None,
67
+ n_jobs=None,
68
+ verbose=False,
69
+ ):
70
+ super().__init__(random_seed=random_seed, n_jobs=n_jobs, verbose=verbose)
71
+
72
+ self.model = model
73
+
74
+ self.optimizer = getattr(optax, solver)(learning_rate=learning_rate)
75
+ self.learning_rate = learning_rate
76
+ self.rooted = rooted
77
+ self.n_shuffles = n_shuffles
78
+ self.n_iter_per_step = n_iter_per_step
79
+ self.tol = tol
80
+
81
+ def _optimise(
82
+ self,
83
+ fasta_path,
84
+ v,
85
+ label_mapping,
86
+ ):
87
+ data = self.pdist(fasta_path, self.model)
88
+ dm = jnp.asarray(data)
89
+ k = dm.shape[0] - 1
90
+
91
+ # Forward and backward pass function
92
+ value_and_grad_fun = jit(value_and_grad(gradme_loss))
93
+
94
+ # Initial "best" score, set as an arbitrarily high value
95
+ best_score = 1e8
96
+
97
+ # List of scores obtained during optimization
98
+ scores = []
99
+
100
+ iterator = range(self.n_shuffles)
101
+
102
+ if self.verbose:
103
+ iterator = tqdm(iterator)
104
+
105
+ for _ in iterator:
106
+ w_in = self._init_W(k)
107
+
108
+ w_out = self._step(
109
+ w_in,
110
+ dm,
111
+ value_and_grad_fun,
112
+ )
113
+
114
+ v = w_out.argmax(1)
115
+
116
+ w_discrete = jnp.eye(w_out.shape[0])[v]
117
+
118
+ score = gradme_loss(w_discrete, dm, rooted=True)
119
+
120
+ best_score = min(best_score, score)
121
+
122
+ scores.append(best_score)
123
+
124
+ if not self.rooted:
125
+ v = reroot_at_random(v)
126
+
127
+ # Queue shuffle
128
+ _, vec_mapping = queue_shuffle(v, shuffle_cherries=True)
129
+
130
+ # Re-arrange the label mapping and the distance matrix
131
+ col_order = []
132
+ for i, idx in enumerate(vec_mapping):
133
+ label_mapping[i] = label_mapping[idx]
134
+ col_order.append(label_mapping[i])
135
+
136
+ dm = jnp.asarray(data.loc[col_order, col_order])
137
+
138
+ if self.verbose:
139
+ iterator.set_postfix({"\033[95m Best score ": best_score})
140
+
141
+ v = jnp.eye(w_out.shape[0])[w_out.argmax(1)]
142
+
143
+ best_params = GradMEResult(
144
+ v=v,
145
+ best_score=best_score,
146
+ scores=scores,
147
+ W=w_out,
148
+ label_mapping=label_mapping,
149
+ )
150
+
151
+ return best_params
152
+
153
+ @staticmethod
154
+ def _init_W(k, eps=1e-8):
155
+ x = jnp.tril(jnp.ones((k, k)))
156
+
157
+ w_init = x / (x.sum(1)[:, jnp.newaxis] + eps)
158
+
159
+ return w_init
160
+
161
+ def _step(self, w, dm, value_and_grad_fun):
162
+ state = self.optimizer.init(w)
163
+
164
+ prev_loss = 1e8
165
+
166
+ for _ in range(self.n_iter_per_step):
167
+ loss, gradients = value_and_grad_fun(w, dm, self.rooted)
168
+
169
+ if jnp.abs(loss - prev_loss) < self.tol:
170
+ break
171
+
172
+ prev_loss = loss
173
+
174
+ updates, state = self.optimizer.update(gradients, state, w)
175
+
176
+ w = optax.apply_updates(w, updates)
177
+
178
+ return w
179
+
180
+ @staticmethod
181
+ def pdist(fasta_path, model):
182
+ with localconverter(ro.default_converter + pandas2ri.converter):
183
+ importr("ape")
184
+
185
+ ro.globalenv["fasta_path"] = fasta_path
186
+ ro.globalenv["model"] = model
187
+
188
+ # DNA Evolution model: F81 + Gamma
189
+ dm = ro.r(
190
+ """
191
+ aln <- read.FASTA(fasta_path, type = "DNA")
192
+
193
+ dm <- dist.dna(aln, model = model)
194
+
195
+ D <- as.data.frame(as.matrix(dm))
196
+ D
197
+ """
198
+ )
199
+
200
+ return dm