phylo2vec 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/Cargo.lock +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/PKG-INFO +46 -4
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/README.md +39 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/ancestry.py +3 -3
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/base/edges.py +2 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/newick.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/pairs.py +4 -5
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/writer.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/metrics/pairwise.py +14 -6
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/opt/__init__.py +2 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/_base.py +55 -12
- phylo2vec-1.3.0/phylo2vec/opt/_gradme.py +200 -0
- phylo2vec-1.3.0/phylo2vec/opt/_gradme_losses.py +117 -0
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/opt/_hc.py +29 -25
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/_hc_losses.py +4 -4
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/mod.rs +63 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/vector.rs +199 -23
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.0}/phylo2vec/utils/newick.py +4 -4
- phylo2vec-1.3.0/phylo2vec/utils/vector.py +258 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/Cargo.toml +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/README.md +39 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/ancestry.py +3 -3
- {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/base/edges.py +2 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/newick.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/pairs.py +4 -5
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/writer.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/metrics/pairwise.py +14 -6
- {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/opt/__init__.py +2 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/_base.py +55 -12
- phylo2vec-1.3.0/py-phylo2vec/phylo2vec/opt/_gradme.py +200 -0
- phylo2vec-1.3.0/py-phylo2vec/phylo2vec/opt/_gradme_losses.py +117 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/opt/_hc.py +29 -25
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/_hc_losses.py +4 -4
- {phylo2vec-1.2.0 → phylo2vec-1.3.0/py-phylo2vec}/phylo2vec/utils/newick.py +4 -4
- phylo2vec-1.3.0/py-phylo2vec/phylo2vec/utils/vector.py +258 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/src/lib.rs +12 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_metrics.py +23 -1
- phylo2vec-1.3.0/py-phylo2vec/tests/test_reorder.py +154 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/pyproject.toml +9 -2
- phylo2vec-1.2.0/phylo2vec/utils/vector.py +0 -365
- phylo2vec-1.2.0/py-phylo2vec/phylo2vec/utils/vector.py +0 -365
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/Cargo.toml +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/LICENSE +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/Cargo.toml +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/base/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/bench_main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/benchmarks/core.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/benches/benchmarks/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/data/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/fluA.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/h3n2.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/m501.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/yeast.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/datasets/descr/zika.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/_validation.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/io/reader.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/metrics/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/opt/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/lib.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/profile_main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/avl.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/matrix/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/newick/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/ops/newick/newick_patterns.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/tree_vec/types.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/src/utils.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/utils/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/phylo2vec/utils/matrix.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/LICENSE +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/benchmarks/test_bench.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/base/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/data/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/fluA.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/h3n2.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/m501.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/yeast.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/datasets/descr/zika.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/_validation.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/io/reader.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/metrics/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/opt/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/utils/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/phylo2vec/utils/matrix.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/config.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_io.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.0}/py-phylo2vec/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: phylo2vec
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
6
6
|
Classifier: Programming Language :: Python
|
|
@@ -11,12 +11,17 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
11
11
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
12
12
|
Classifier: Operating System :: Unix
|
|
13
13
|
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Requires-Dist: numba>=0.56.4
|
|
15
14
|
Requires-Dist: numpy>=1.22,<2.1
|
|
16
15
|
Requires-Dist: biopython>=1.84.0
|
|
17
16
|
Requires-Dist: joblib>=1.2.0
|
|
18
17
|
Requires-Dist: ete3==3.1.3
|
|
19
18
|
Requires-Dist: six>=1.16.0
|
|
19
|
+
Requires-Dist: jax ; extra == 'opt'
|
|
20
|
+
Requires-Dist: joblib ; extra == 'opt'
|
|
21
|
+
Requires-Dist: optax ; extra == 'opt'
|
|
22
|
+
Requires-Dist: rpy2 ; extra == 'opt'
|
|
23
|
+
Requires-Dist: tqdm ; extra == 'opt'
|
|
24
|
+
Provides-Extra: opt
|
|
20
25
|
License-File: LICENSE
|
|
21
26
|
Summary: Phylo2Vec: integer vector representation of binary (phylogenetic) trees
|
|
22
27
|
Author: Neil Scheidwasser <neil.clow@sund.ku.dk>
|
|
@@ -85,8 +90,10 @@ in Rust.
|
|
|
85
90
|
|
|
86
91
|
### Installing R package
|
|
87
92
|
|
|
88
|
-
|
|
89
|
-
|
|
93
|
+
#### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
|
|
94
|
+
|
|
95
|
+
Retrieve one of the compiled binaries from the
|
|
96
|
+
[releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
|
|
90
97
|
Once the file is downloaded, simply run `install.packages` in your R command
|
|
91
98
|
line.
|
|
92
99
|
|
|
@@ -94,6 +101,41 @@ line.
|
|
|
94
101
|
install.packages("/path/to/package_file", repos = NULL, type = 'source')
|
|
95
102
|
```
|
|
96
103
|
|
|
104
|
+
#### Option 2: using `devtools`
|
|
105
|
+
|
|
106
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
107
|
+
build the core package.
|
|
108
|
+
|
|
109
|
+
```R
|
|
110
|
+
devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Note: to download a specific version, use:
|
|
114
|
+
|
|
115
|
+
```R
|
|
116
|
+
devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Option 3: manual installation
|
|
120
|
+
|
|
121
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
122
|
+
build the core package.
|
|
123
|
+
|
|
124
|
+
Clone the repository and run the following `install.packages` in your R command
|
|
125
|
+
line.
|
|
126
|
+
|
|
127
|
+
Note: to download a specific version, you can use `git checkout` to a desired
|
|
128
|
+
tag.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
git clone https://github.com/sbhattlab/phylo2vec
|
|
132
|
+
cd phylo2vec
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
```R
|
|
136
|
+
install.packages("./r-phylo2vec", repos = NULL, type = 'source')
|
|
137
|
+
```
|
|
138
|
+
|
|
97
139
|
## Basic Usage
|
|
98
140
|
|
|
99
141
|
### Python
|
|
@@ -58,8 +58,10 @@ in Rust.
|
|
|
58
58
|
|
|
59
59
|
### Installing R package
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
#### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
|
|
62
|
+
|
|
63
|
+
Retrieve one of the compiled binaries from the
|
|
64
|
+
[releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
|
|
63
65
|
Once the file is downloaded, simply run `install.packages` in your R command
|
|
64
66
|
line.
|
|
65
67
|
|
|
@@ -67,6 +69,41 @@ line.
|
|
|
67
69
|
install.packages("/path/to/package_file", repos = NULL, type = 'source')
|
|
68
70
|
```
|
|
69
71
|
|
|
72
|
+
#### Option 2: using `devtools`
|
|
73
|
+
|
|
74
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
75
|
+
build the core package.
|
|
76
|
+
|
|
77
|
+
```R
|
|
78
|
+
devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Note: to download a specific version, use:
|
|
82
|
+
|
|
83
|
+
```R
|
|
84
|
+
devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Option 3: manual installation
|
|
88
|
+
|
|
89
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
90
|
+
build the core package.
|
|
91
|
+
|
|
92
|
+
Clone the repository and run the following `install.packages` in your R command
|
|
93
|
+
line.
|
|
94
|
+
|
|
95
|
+
Note: to download a specific version, you can use `git checkout` to a desired
|
|
96
|
+
tag.
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
git clone https://github.com/sbhattlab/phylo2vec
|
|
100
|
+
cd phylo2vec
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```R
|
|
104
|
+
install.packages("./r-phylo2vec", repos = NULL, type = 'source')
|
|
105
|
+
```
|
|
106
|
+
|
|
70
107
|
## Basic Usage
|
|
71
108
|
|
|
72
109
|
### Python
|
|
@@ -20,7 +20,7 @@ def from_ancestry(ancestry: np.ndarray) -> np.ndarray:
|
|
|
20
20
|
|
|
21
21
|
Returns
|
|
22
22
|
-------
|
|
23
|
-
numpy.
|
|
23
|
+
numpy.ndarray
|
|
24
24
|
Phylo2Vec vector
|
|
25
25
|
"""
|
|
26
26
|
v_list = core.from_ancestry(ancestry)
|
|
@@ -50,12 +50,12 @@ def to_ancestry(v: np.ndarray) -> np.ndarray:
|
|
|
50
50
|
|
|
51
51
|
Parameters
|
|
52
52
|
----------
|
|
53
|
-
v : numpy.
|
|
53
|
+
v : numpy.ndarray
|
|
54
54
|
Phylo2Vec vector
|
|
55
55
|
|
|
56
56
|
Returns
|
|
57
57
|
-------
|
|
58
|
-
ancestry : numpy.
|
|
58
|
+
ancestry : numpy.ndarray
|
|
59
59
|
Ancestry matrix
|
|
60
60
|
1st column: child 1
|
|
61
61
|
2nd column: child 2
|
|
@@ -21,7 +21,7 @@ def from_edges(edges: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
21
21
|
|
|
22
22
|
Returns
|
|
23
23
|
-------
|
|
24
|
-
v : numpy.
|
|
24
|
+
v : numpy.ndarray
|
|
25
25
|
Phylo2Vec vector
|
|
26
26
|
"""
|
|
27
27
|
v = core.from_edges(edges)
|
|
@@ -35,7 +35,7 @@ def to_edges(v: np.ndarray) -> List[Tuple[int, int]]:
|
|
|
35
35
|
|
|
36
36
|
Parameters
|
|
37
37
|
----------
|
|
38
|
-
v : numpy.
|
|
38
|
+
v : numpy.ndarray
|
|
39
39
|
Phylo2Vec vector
|
|
40
40
|
|
|
41
41
|
Returns
|
|
@@ -11,10 +11,9 @@ from typing import List, Tuple
|
|
|
11
11
|
def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
12
12
|
"""Convert a list of pairs to a Phylo2Vec vector
|
|
13
13
|
|
|
14
|
-
Each pair is represented as a
|
|
14
|
+
Each pair is represented as a tuple (B, L)
|
|
15
15
|
indicating that leaf L descends from branch B.
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
Parameters
|
|
19
18
|
----------
|
|
20
19
|
pairs : List[Tuple[int, int]]
|
|
@@ -22,7 +21,7 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
22
21
|
|
|
23
22
|
Returns
|
|
24
23
|
-------
|
|
25
|
-
v : numpy.
|
|
24
|
+
v : numpy.ndarray
|
|
26
25
|
Phylo2Vec vector
|
|
27
26
|
"""
|
|
28
27
|
v = core.from_pairs(pairs)
|
|
@@ -32,12 +31,12 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
32
31
|
def to_pairs(v: np.ndarray) -> List[Tuple[int, int]]:
|
|
33
32
|
"""Convert a Phylo2Vec vector to a list of pairs
|
|
34
33
|
|
|
35
|
-
Each pair is represented as a
|
|
34
|
+
Each pair is represented as a tuple (B, L)
|
|
36
35
|
indicating that leaf L descends from branch B.
|
|
37
36
|
|
|
38
37
|
Parameters
|
|
39
38
|
----------
|
|
40
|
-
v : numpy.
|
|
39
|
+
v : numpy.ndarray
|
|
41
40
|
Phylo2Vec vector
|
|
42
41
|
|
|
43
42
|
Returns
|
|
@@ -4,6 +4,7 @@ import warnings
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
from phylo2vec import _phylo2vec_core as core
|
|
7
|
+
from phylo2vec.utils.matrix import check_matrix
|
|
7
8
|
from phylo2vec.utils.vector import check_vector
|
|
8
9
|
|
|
9
10
|
|
|
@@ -14,7 +15,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
|
|
|
14
15
|
|
|
15
16
|
Parameters
|
|
16
17
|
----------
|
|
17
|
-
vector_or_matrix : numpy.
|
|
18
|
+
vector_or_matrix : numpy.ndarray
|
|
18
19
|
Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
|
|
19
20
|
|
|
20
21
|
Returns
|
|
@@ -46,7 +47,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
|
|
|
46
47
|
PAIRWISE_DISTANCES = {"cophenetic": cophenetic_distances}
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
def pairwise_distances(
|
|
50
|
+
def pairwise_distances(vector_or_matrix, metric="cophenetic"):
|
|
50
51
|
"""
|
|
51
52
|
Compute a pairwise distance matrix
|
|
52
53
|
for tree nodes from a Phylo2Vec vector.
|
|
@@ -55,8 +56,8 @@ def pairwise_distances(v, metric="cophenetic"):
|
|
|
55
56
|
|
|
56
57
|
Parameters
|
|
57
58
|
----------
|
|
58
|
-
|
|
59
|
-
Phylo2Vec vector
|
|
59
|
+
vector_or_matrix : numpy.ndarray
|
|
60
|
+
Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
|
|
60
61
|
metric : str, optional
|
|
61
62
|
Pairwise distance metric, by default "cophenetic"
|
|
62
63
|
|
|
@@ -65,8 +66,15 @@ def pairwise_distances(v, metric="cophenetic"):
|
|
|
65
66
|
numpy.ndarray
|
|
66
67
|
Distance matrix
|
|
67
68
|
"""
|
|
68
|
-
|
|
69
|
+
if vector_or_matrix.ndim == 2:
|
|
70
|
+
check_matrix(vector_or_matrix)
|
|
71
|
+
elif vector_or_matrix.ndim == 1:
|
|
72
|
+
check_vector(vector_or_matrix)
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
"vector_or_matrix should either be a vector (ndim == 1) or matrix (ndim == 2)"
|
|
76
|
+
)
|
|
69
77
|
|
|
70
78
|
func = PAIRWISE_DISTANCES[metric]
|
|
71
79
|
|
|
72
|
-
return func(
|
|
80
|
+
return func(vector_or_matrix)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Phylo2Vec-based optimisation methods."""
|
|
2
2
|
|
|
3
3
|
from ._base import BaseOptimizer
|
|
4
|
+
from ._gradme import GradMEOptimizer
|
|
4
5
|
from ._hc import HillClimbingOptimizer
|
|
5
6
|
|
|
6
|
-
__all__ = ["BaseOptimizer", "HillClimbingOptimizer"]
|
|
7
|
+
__all__ = ["BaseOptimizer", "GradMEOptimizer", "HillClimbingOptimizer"]
|
|
@@ -1,16 +1,46 @@
|
|
|
1
1
|
"""Base class for all optimisation methods in Phylo2Vec."""
|
|
2
2
|
|
|
3
|
+
import multiprocessing
|
|
3
4
|
import random
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Dict, List, Final
|
|
4
9
|
|
|
5
|
-
import numba as nb
|
|
6
10
|
import numpy as np
|
|
7
11
|
|
|
8
12
|
from phylo2vec.datasets import read_fasta
|
|
9
13
|
from phylo2vec.utils.vector import sample_vector
|
|
10
14
|
|
|
15
|
+
# Multiprocessing
|
|
16
|
+
DEFAULT_N_JOBS: Final = multiprocessing.cpu_count() // 4
|
|
17
|
+
MIN_N_JOBS: Final = 4
|
|
18
|
+
# Seeding
|
|
11
19
|
MAX_SEED = 42
|
|
12
20
|
|
|
13
21
|
|
|
22
|
+
@dataclass
|
|
23
|
+
class BaseResult:
|
|
24
|
+
"""Result of the optimization process.
|
|
25
|
+
|
|
26
|
+
Attributes
|
|
27
|
+
----------
|
|
28
|
+
v_opt : numpy.ndarray
|
|
29
|
+
Optimized phylo2vec vector.
|
|
30
|
+
label_mapping : Dict[int, str]
|
|
31
|
+
Mapping of leaf labels (integer) to taxa.
|
|
32
|
+
best_score : float
|
|
33
|
+
The best score achieved during optimization.
|
|
34
|
+
scores : List[float]
|
|
35
|
+
List of scores obtained during optimization.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
v: np.ndarray
|
|
39
|
+
label_mapping: Dict[int, str]
|
|
40
|
+
best_score: float
|
|
41
|
+
scores: List[float]
|
|
42
|
+
|
|
43
|
+
|
|
14
44
|
class BaseOptimizer:
|
|
15
45
|
"""
|
|
16
46
|
Base class for all phylo2vec-based optimizers
|
|
@@ -21,25 +51,28 @@ class BaseOptimizer:
|
|
|
21
51
|
Random seed, by default None
|
|
22
52
|
"""
|
|
23
53
|
|
|
24
|
-
def __init__(self, random_seed=None):
|
|
54
|
+
def __init__(self, random_seed=None, verbose=False, n_jobs=None):
|
|
25
55
|
self.random_seed = (
|
|
26
56
|
random.randint(0, MAX_SEED) if random_seed is None else random_seed
|
|
27
57
|
)
|
|
28
58
|
random.seed(self.random_seed)
|
|
29
59
|
np.random.seed(self.random_seed)
|
|
30
60
|
|
|
61
|
+
self.verbose = verbose
|
|
62
|
+
|
|
63
|
+
self.n_jobs = self._infer_n_jobs(n_jobs)
|
|
64
|
+
|
|
31
65
|
@staticmethod
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
key_type=nb.types.int64, value_type=nb.types.unicode_type
|
|
35
|
-
)
|
|
66
|
+
def _infer_n_jobs(n_jobs=None):
|
|
67
|
+
return n_jobs or max(MIN_N_JOBS, DEFAULT_N_JOBS)
|
|
36
68
|
|
|
37
|
-
|
|
38
|
-
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _make_label_mapping(records):
|
|
71
|
+
label_mapping = dict(enumerate(r.id.replace(" ", ".") for r in records))
|
|
39
72
|
|
|
40
73
|
return label_mapping
|
|
41
74
|
|
|
42
|
-
def fit(self, fasta_path):
|
|
75
|
+
def fit(self, fasta_path) -> BaseResult:
|
|
43
76
|
"""Fit an optimizer to a fasta file
|
|
44
77
|
|
|
45
78
|
Parameters
|
|
@@ -51,7 +84,7 @@ class BaseOptimizer:
|
|
|
51
84
|
-------
|
|
52
85
|
v_opt : numpy.ndarray
|
|
53
86
|
Optimized phylo2vec vector
|
|
54
|
-
label_mapping :
|
|
87
|
+
label_mapping : List[str]
|
|
55
88
|
Mapping of leaf labels (integer) to taxa
|
|
56
89
|
losses : array-like
|
|
57
90
|
List/Array of collected losses
|
|
@@ -67,9 +100,19 @@ class BaseOptimizer:
|
|
|
67
100
|
|
|
68
101
|
v_init = sample_vector(n_leaves)
|
|
69
102
|
|
|
70
|
-
|
|
103
|
+
start_time = time.time()
|
|
104
|
+
|
|
105
|
+
result = self._optimise(fasta_path, v_init, label_mapping)
|
|
106
|
+
|
|
107
|
+
end_time = time.time()
|
|
108
|
+
|
|
109
|
+
if self.verbose:
|
|
110
|
+
print(
|
|
111
|
+
f"Optimisation finished in {end_time - start_time:.2f} seconds "
|
|
112
|
+
f"with {len(result.scores)} loss evaluations."
|
|
113
|
+
)
|
|
71
114
|
|
|
72
|
-
return
|
|
115
|
+
return result
|
|
73
116
|
|
|
74
117
|
def _optimise(self, fasta_path, v, label_mapping):
|
|
75
118
|
raise NotImplementedError
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Methods for GradME optimisation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
import jax.numpy as jnp
|
|
6
|
+
import optax
|
|
7
|
+
import rpy2
|
|
8
|
+
import rpy2.robjects as ro
|
|
9
|
+
|
|
10
|
+
from jax import jit, value_and_grad
|
|
11
|
+
from rpy2.robjects import pandas2ri
|
|
12
|
+
from rpy2.robjects.conversion import localconverter
|
|
13
|
+
from rpy2.robjects.packages import importr
|
|
14
|
+
from tqdm import tqdm
|
|
15
|
+
|
|
16
|
+
from phylo2vec.opt._base import BaseOptimizer, BaseResult
|
|
17
|
+
from phylo2vec.opt._gradme_losses import gradme_loss
|
|
18
|
+
from phylo2vec.utils.vector import queue_shuffle, reroot_at_random
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Disable rpy2 warning
|
|
22
|
+
rpy2.rinterface_lib.callbacks.consolewrite_warnerror = lambda *args: None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class GradMEResult(BaseResult):
|
|
27
|
+
"""Result of the GradME optimization.
|
|
28
|
+
|
|
29
|
+
See BaseResult for more details.
|
|
30
|
+
|
|
31
|
+
Attributes
|
|
32
|
+
----------
|
|
33
|
+
W : jax.numpy.ndarray
|
|
34
|
+
The optimized weight matrix representing the phylogenetic tree.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
W: jnp.ndarray
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GradMEOptimizer(BaseOptimizer):
|
|
41
|
+
"""GradME Optimizer for phylogenetic trees.
|
|
42
|
+
|
|
43
|
+
This optimizer uses the GradME algorithm to optimize phylogenetic trees.
|
|
44
|
+
It computes the loss using the GradME loss function and updates the tree
|
|
45
|
+
representation accordingly.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
random_seed : int, optional
|
|
50
|
+
Random seed for reproducibility, by default None
|
|
51
|
+
n_jobs : int, optional
|
|
52
|
+
Number of parallel jobs, by default None
|
|
53
|
+
verbose : bool, optional
|
|
54
|
+
Verbosity level, by default False
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
model,
|
|
60
|
+
solver="adafactor",
|
|
61
|
+
learning_rate=1.5,
|
|
62
|
+
rooted=False,
|
|
63
|
+
n_shuffles=100,
|
|
64
|
+
n_iter_per_step=5000,
|
|
65
|
+
tol=1e-8,
|
|
66
|
+
random_seed=None,
|
|
67
|
+
n_jobs=None,
|
|
68
|
+
verbose=False,
|
|
69
|
+
):
|
|
70
|
+
super().__init__(random_seed=random_seed, n_jobs=n_jobs, verbose=verbose)
|
|
71
|
+
|
|
72
|
+
self.model = model
|
|
73
|
+
|
|
74
|
+
self.optimizer = getattr(optax, solver)(learning_rate=learning_rate)
|
|
75
|
+
self.learning_rate = learning_rate
|
|
76
|
+
self.rooted = rooted
|
|
77
|
+
self.n_shuffles = n_shuffles
|
|
78
|
+
self.n_iter_per_step = n_iter_per_step
|
|
79
|
+
self.tol = tol
|
|
80
|
+
|
|
81
|
+
def _optimise(
|
|
82
|
+
self,
|
|
83
|
+
fasta_path,
|
|
84
|
+
v,
|
|
85
|
+
label_mapping,
|
|
86
|
+
):
|
|
87
|
+
data = self.pdist(fasta_path, self.model)
|
|
88
|
+
dm = jnp.asarray(data)
|
|
89
|
+
k = dm.shape[0] - 1
|
|
90
|
+
|
|
91
|
+
# Forward and backward pass function
|
|
92
|
+
value_and_grad_fun = jit(value_and_grad(gradme_loss))
|
|
93
|
+
|
|
94
|
+
# Initial "best" score, set as an arbitrarily high value
|
|
95
|
+
best_score = 1e8
|
|
96
|
+
|
|
97
|
+
# List of scores obtained during optimization
|
|
98
|
+
scores = []
|
|
99
|
+
|
|
100
|
+
iterator = range(self.n_shuffles)
|
|
101
|
+
|
|
102
|
+
if self.verbose:
|
|
103
|
+
iterator = tqdm(iterator)
|
|
104
|
+
|
|
105
|
+
for _ in iterator:
|
|
106
|
+
w_in = self._init_W(k)
|
|
107
|
+
|
|
108
|
+
w_out = self._step(
|
|
109
|
+
w_in,
|
|
110
|
+
dm,
|
|
111
|
+
value_and_grad_fun,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
v = w_out.argmax(1)
|
|
115
|
+
|
|
116
|
+
w_discrete = jnp.eye(w_out.shape[0])[v]
|
|
117
|
+
|
|
118
|
+
score = gradme_loss(w_discrete, dm, rooted=True)
|
|
119
|
+
|
|
120
|
+
best_score = min(best_score, score)
|
|
121
|
+
|
|
122
|
+
scores.append(best_score)
|
|
123
|
+
|
|
124
|
+
if not self.rooted:
|
|
125
|
+
v = reroot_at_random(v)
|
|
126
|
+
|
|
127
|
+
# Queue shuffle
|
|
128
|
+
_, vec_mapping = queue_shuffle(v, shuffle_cherries=True)
|
|
129
|
+
|
|
130
|
+
# Re-arrange the label mapping and the distance matrix
|
|
131
|
+
col_order = []
|
|
132
|
+
for i, idx in enumerate(vec_mapping):
|
|
133
|
+
label_mapping[i] = label_mapping[idx]
|
|
134
|
+
col_order.append(label_mapping[i])
|
|
135
|
+
|
|
136
|
+
dm = jnp.asarray(data.loc[col_order, col_order])
|
|
137
|
+
|
|
138
|
+
if self.verbose:
|
|
139
|
+
iterator.set_postfix({"\033[95m Best score ": best_score})
|
|
140
|
+
|
|
141
|
+
v = jnp.eye(w_out.shape[0])[w_out.argmax(1)]
|
|
142
|
+
|
|
143
|
+
best_params = GradMEResult(
|
|
144
|
+
v=v,
|
|
145
|
+
best_score=best_score,
|
|
146
|
+
scores=scores,
|
|
147
|
+
W=w_out,
|
|
148
|
+
label_mapping=label_mapping,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return best_params
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def _init_W(k, eps=1e-8):
|
|
155
|
+
x = jnp.tril(jnp.ones((k, k)))
|
|
156
|
+
|
|
157
|
+
w_init = x / (x.sum(1)[:, jnp.newaxis] + eps)
|
|
158
|
+
|
|
159
|
+
return w_init
|
|
160
|
+
|
|
161
|
+
def _step(self, w, dm, value_and_grad_fun):
|
|
162
|
+
state = self.optimizer.init(w)
|
|
163
|
+
|
|
164
|
+
prev_loss = 1e8
|
|
165
|
+
|
|
166
|
+
for _ in range(self.n_iter_per_step):
|
|
167
|
+
loss, gradients = value_and_grad_fun(w, dm, self.rooted)
|
|
168
|
+
|
|
169
|
+
if jnp.abs(loss - prev_loss) < self.tol:
|
|
170
|
+
break
|
|
171
|
+
|
|
172
|
+
prev_loss = loss
|
|
173
|
+
|
|
174
|
+
updates, state = self.optimizer.update(gradients, state, w)
|
|
175
|
+
|
|
176
|
+
w = optax.apply_updates(w, updates)
|
|
177
|
+
|
|
178
|
+
return w
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def pdist(fasta_path, model):
|
|
182
|
+
with localconverter(ro.default_converter + pandas2ri.converter):
|
|
183
|
+
importr("ape")
|
|
184
|
+
|
|
185
|
+
ro.globalenv["fasta_path"] = fasta_path
|
|
186
|
+
ro.globalenv["model"] = model
|
|
187
|
+
|
|
188
|
+
# DNA Evolution model: F81 + Gamma
|
|
189
|
+
dm = ro.r(
|
|
190
|
+
"""
|
|
191
|
+
aln <- read.FASTA(fasta_path, type = "DNA")
|
|
192
|
+
|
|
193
|
+
dm <- dist.dna(aln, model = model)
|
|
194
|
+
|
|
195
|
+
D <- as.data.frame(as.matrix(dm))
|
|
196
|
+
D
|
|
197
|
+
"""
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return dm
|