phylo2vec 1.2.0__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/Cargo.lock +2 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/PKG-INFO +46 -4
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/README.md +39 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/Cargo.toml +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/ancestry.py +3 -3
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/base/edges.py +2 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/newick.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/pairs.py +4 -5
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/writer.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/metrics/pairwise.py +14 -6
- phylo2vec-1.3.1/phylo2vec/opt/__init__.py +7 -0
- phylo2vec-1.3.1/phylo2vec/opt/_base.py +132 -0
- phylo2vec-1.3.1/phylo2vec/opt/_beagle_losses.py +803 -0
- phylo2vec-1.3.1/phylo2vec/opt/_gradme.py +321 -0
- phylo2vec-1.3.1/phylo2vec/opt/_gradme_losses.py +163 -0
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/opt/_hc.py +103 -58
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/opt/_hc_losses.py +17 -20
- phylo2vec-1.3.1/phylo2vec/opt/utils.py +31 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/matrix/mod.rs +8 -20
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/mod.rs +60 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/vector.rs +298 -47
- {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/utils/newick.py +4 -4
- phylo2vec-1.3.1/phylo2vec/utils/vector.py +302 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/Cargo.toml +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/README.md +39 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/ancestry.py +3 -3
- {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/base/edges.py +2 -2
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/newick.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/pairs.py +4 -5
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/writer.py +1 -1
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/metrics/pairwise.py +14 -6
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/__init__.py +7 -0
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_base.py +132 -0
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_beagle_losses.py +803 -0
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_gradme.py +321 -0
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_gradme_losses.py +163 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/opt/_hc.py +103 -58
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/opt/_hc_losses.py +17 -20
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/utils.py +31 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/utils/newick.py +4 -4
- phylo2vec-1.3.1/py-phylo2vec/phylo2vec/utils/vector.py +302 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/src/lib.rs +12 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_metrics.py +23 -1
- phylo2vec-1.3.1/py-phylo2vec/tests/test_reorder.py +180 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/pyproject.toml +9 -2
- phylo2vec-1.2.0/phylo2vec/opt/__init__.py +0 -6
- phylo2vec-1.2.0/phylo2vec/opt/_base.py +0 -89
- phylo2vec-1.2.0/phylo2vec/utils/vector.py +0 -365
- phylo2vec-1.2.0/py-phylo2vec/phylo2vec/opt/__init__.py +0 -6
- phylo2vec-1.2.0/py-phylo2vec/phylo2vec/opt/_base.py +0 -89
- phylo2vec-1.2.0/py-phylo2vec/phylo2vec/utils/vector.py +0 -365
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/Cargo.toml +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/LICENSE +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/bench_main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/benchmarks/core.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/benchmarks/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/data/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/fluA.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/h3n2.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/m501.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/yeast.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/zika.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/_validation.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/reader.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/metrics/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/opt/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/lib.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/profile_main.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/avl.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/newick/mod.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/newick/newick_patterns.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/types.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/utils.rs +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/utils/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/utils/matrix.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/LICENSE +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/benchmarks/test_bench.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/data/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/fluA.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/h3n2.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/m501.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/yeast.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/zika.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/_validation.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/reader.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/metrics/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/opt/README.md +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/utils/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/utils/matrix.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/__init__.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/config.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_base.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_io.py +0 -0
- {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_utils.py +0 -0
|
@@ -460,7 +460,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
|
|
460
460
|
|
|
461
461
|
[[package]]
|
|
462
462
|
name = "phylo2vec"
|
|
463
|
-
version = "0.4.
|
|
463
|
+
version = "0.4.1"
|
|
464
464
|
dependencies = [
|
|
465
465
|
"criterion",
|
|
466
466
|
"rand",
|
|
@@ -544,7 +544,7 @@ dependencies = [
|
|
|
544
544
|
|
|
545
545
|
[[package]]
|
|
546
546
|
name = "py-phylo2vec"
|
|
547
|
-
version = "1.
|
|
547
|
+
version = "1.3.1"
|
|
548
548
|
dependencies = [
|
|
549
549
|
"phylo2vec",
|
|
550
550
|
"pyo3",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: phylo2vec
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
6
6
|
Classifier: Programming Language :: Python
|
|
@@ -11,12 +11,17 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
11
11
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
12
12
|
Classifier: Operating System :: Unix
|
|
13
13
|
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Requires-Dist: numba>=0.56.4
|
|
15
14
|
Requires-Dist: numpy>=1.22,<2.1
|
|
16
15
|
Requires-Dist: biopython>=1.84.0
|
|
17
16
|
Requires-Dist: joblib>=1.2.0
|
|
18
17
|
Requires-Dist: ete3==3.1.3
|
|
19
18
|
Requires-Dist: six>=1.16.0
|
|
19
|
+
Requires-Dist: jax ; extra == 'opt'
|
|
20
|
+
Requires-Dist: joblib ; extra == 'opt'
|
|
21
|
+
Requires-Dist: optax ; extra == 'opt'
|
|
22
|
+
Requires-Dist: rpy2 ; extra == 'opt'
|
|
23
|
+
Requires-Dist: tqdm ; extra == 'opt'
|
|
24
|
+
Provides-Extra: opt
|
|
20
25
|
License-File: LICENSE
|
|
21
26
|
Summary: Phylo2Vec: integer vector representation of binary (phylogenetic) trees
|
|
22
27
|
Author: Neil Scheidwasser <neil.clow@sund.ku.dk>
|
|
@@ -85,8 +90,10 @@ in Rust.
|
|
|
85
90
|
|
|
86
91
|
### Installing R package
|
|
87
92
|
|
|
88
|
-
|
|
89
|
-
|
|
93
|
+
#### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
|
|
94
|
+
|
|
95
|
+
Retrieve one of the compiled binaries from the
|
|
96
|
+
[releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
|
|
90
97
|
Once the file is downloaded, simply run `install.packages` in your R command
|
|
91
98
|
line.
|
|
92
99
|
|
|
@@ -94,6 +101,41 @@ line.
|
|
|
94
101
|
install.packages("/path/to/package_file", repos = NULL, type = 'source')
|
|
95
102
|
```
|
|
96
103
|
|
|
104
|
+
#### Option 2: using `devtools`
|
|
105
|
+
|
|
106
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
107
|
+
build the core package.
|
|
108
|
+
|
|
109
|
+
```R
|
|
110
|
+
devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Note: to download a specific version, use:
|
|
114
|
+
|
|
115
|
+
```R
|
|
116
|
+
devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Option 3: manual installation
|
|
120
|
+
|
|
121
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
122
|
+
build the core package.
|
|
123
|
+
|
|
124
|
+
Clone the repository and run the following `install.packages` in your R command
|
|
125
|
+
line.
|
|
126
|
+
|
|
127
|
+
Note: to download a specific version, you can use `git checkout` to a desired
|
|
128
|
+
tag.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
git clone https://github.com/sbhattlab/phylo2vec
|
|
132
|
+
cd phylo2vec
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
```R
|
|
136
|
+
install.packages("./r-phylo2vec", repos = NULL, type = 'source')
|
|
137
|
+
```
|
|
138
|
+
|
|
97
139
|
## Basic Usage
|
|
98
140
|
|
|
99
141
|
### Python
|
|
@@ -58,8 +58,10 @@ in Rust.
|
|
|
58
58
|
|
|
59
59
|
### Installing R package
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
#### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
|
|
62
|
+
|
|
63
|
+
Retrieve one of the compiled binaries from the
|
|
64
|
+
[releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
|
|
63
65
|
Once the file is downloaded, simply run `install.packages` in your R command
|
|
64
66
|
line.
|
|
65
67
|
|
|
@@ -67,6 +69,41 @@ line.
|
|
|
67
69
|
install.packages("/path/to/package_file", repos = NULL, type = 'source')
|
|
68
70
|
```
|
|
69
71
|
|
|
72
|
+
#### Option 2: using `devtools`
|
|
73
|
+
|
|
74
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
75
|
+
build the core package.
|
|
76
|
+
|
|
77
|
+
```R
|
|
78
|
+
devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Note: to download a specific version, use:
|
|
82
|
+
|
|
83
|
+
```R
|
|
84
|
+
devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Option 3: manual installation
|
|
88
|
+
|
|
89
|
+
⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
|
|
90
|
+
build the core package.
|
|
91
|
+
|
|
92
|
+
Clone the repository and run the following `install.packages` in your R command
|
|
93
|
+
line.
|
|
94
|
+
|
|
95
|
+
Note: to download a specific version, you can use `git checkout` to a desired
|
|
96
|
+
tag.
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
git clone https://github.com/sbhattlab/phylo2vec
|
|
100
|
+
cd phylo2vec
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```R
|
|
104
|
+
install.packages("./r-phylo2vec", repos = NULL, type = 'source')
|
|
105
|
+
```
|
|
106
|
+
|
|
70
107
|
## Basic Usage
|
|
71
108
|
|
|
72
109
|
### Python
|
|
@@ -20,7 +20,7 @@ def from_ancestry(ancestry: np.ndarray) -> np.ndarray:
|
|
|
20
20
|
|
|
21
21
|
Returns
|
|
22
22
|
-------
|
|
23
|
-
numpy.
|
|
23
|
+
numpy.ndarray
|
|
24
24
|
Phylo2Vec vector
|
|
25
25
|
"""
|
|
26
26
|
v_list = core.from_ancestry(ancestry)
|
|
@@ -50,12 +50,12 @@ def to_ancestry(v: np.ndarray) -> np.ndarray:
|
|
|
50
50
|
|
|
51
51
|
Parameters
|
|
52
52
|
----------
|
|
53
|
-
v : numpy.
|
|
53
|
+
v : numpy.ndarray
|
|
54
54
|
Phylo2Vec vector
|
|
55
55
|
|
|
56
56
|
Returns
|
|
57
57
|
-------
|
|
58
|
-
ancestry : numpy.
|
|
58
|
+
ancestry : numpy.ndarray
|
|
59
59
|
Ancestry matrix
|
|
60
60
|
1st column: child 1
|
|
61
61
|
2nd column: child 2
|
|
@@ -21,7 +21,7 @@ def from_edges(edges: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
21
21
|
|
|
22
22
|
Returns
|
|
23
23
|
-------
|
|
24
|
-
v : numpy.
|
|
24
|
+
v : numpy.ndarray
|
|
25
25
|
Phylo2Vec vector
|
|
26
26
|
"""
|
|
27
27
|
v = core.from_edges(edges)
|
|
@@ -35,7 +35,7 @@ def to_edges(v: np.ndarray) -> List[Tuple[int, int]]:
|
|
|
35
35
|
|
|
36
36
|
Parameters
|
|
37
37
|
----------
|
|
38
|
-
v : numpy.
|
|
38
|
+
v : numpy.ndarray
|
|
39
39
|
Phylo2Vec vector
|
|
40
40
|
|
|
41
41
|
Returns
|
|
@@ -11,10 +11,9 @@ from typing import List, Tuple
|
|
|
11
11
|
def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
12
12
|
"""Convert a list of pairs to a Phylo2Vec vector
|
|
13
13
|
|
|
14
|
-
Each pair is represented as a
|
|
14
|
+
Each pair is represented as a tuple (B, L)
|
|
15
15
|
indicating that leaf L descends from branch B.
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
Parameters
|
|
19
18
|
----------
|
|
20
19
|
pairs : List[Tuple[int, int]]
|
|
@@ -22,7 +21,7 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
22
21
|
|
|
23
22
|
Returns
|
|
24
23
|
-------
|
|
25
|
-
v : numpy.
|
|
24
|
+
v : numpy.ndarray
|
|
26
25
|
Phylo2Vec vector
|
|
27
26
|
"""
|
|
28
27
|
v = core.from_pairs(pairs)
|
|
@@ -32,12 +31,12 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
|
|
|
32
31
|
def to_pairs(v: np.ndarray) -> List[Tuple[int, int]]:
|
|
33
32
|
"""Convert a Phylo2Vec vector to a list of pairs
|
|
34
33
|
|
|
35
|
-
Each pair is represented as a
|
|
34
|
+
Each pair is represented as a tuple (B, L)
|
|
36
35
|
indicating that leaf L descends from branch B.
|
|
37
36
|
|
|
38
37
|
Parameters
|
|
39
38
|
----------
|
|
40
|
-
v : numpy.
|
|
39
|
+
v : numpy.ndarray
|
|
41
40
|
Phylo2Vec vector
|
|
42
41
|
|
|
43
42
|
Returns
|
|
@@ -4,6 +4,7 @@ import warnings
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
from phylo2vec import _phylo2vec_core as core
|
|
7
|
+
from phylo2vec.utils.matrix import check_matrix
|
|
7
8
|
from phylo2vec.utils.vector import check_vector
|
|
8
9
|
|
|
9
10
|
|
|
@@ -14,7 +15,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
|
|
|
14
15
|
|
|
15
16
|
Parameters
|
|
16
17
|
----------
|
|
17
|
-
vector_or_matrix : numpy.
|
|
18
|
+
vector_or_matrix : numpy.ndarray
|
|
18
19
|
Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
|
|
19
20
|
|
|
20
21
|
Returns
|
|
@@ -46,7 +47,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
|
|
|
46
47
|
PAIRWISE_DISTANCES = {"cophenetic": cophenetic_distances}
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
def pairwise_distances(
|
|
50
|
+
def pairwise_distances(vector_or_matrix, metric="cophenetic"):
|
|
50
51
|
"""
|
|
51
52
|
Compute a pairwise distance matrix
|
|
52
53
|
for tree nodes from a Phylo2Vec vector.
|
|
@@ -55,8 +56,8 @@ def pairwise_distances(v, metric="cophenetic"):
|
|
|
55
56
|
|
|
56
57
|
Parameters
|
|
57
58
|
----------
|
|
58
|
-
|
|
59
|
-
Phylo2Vec vector
|
|
59
|
+
vector_or_matrix : numpy.ndarray
|
|
60
|
+
Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
|
|
60
61
|
metric : str, optional
|
|
61
62
|
Pairwise distance metric, by default "cophenetic"
|
|
62
63
|
|
|
@@ -65,8 +66,15 @@ def pairwise_distances(v, metric="cophenetic"):
|
|
|
65
66
|
numpy.ndarray
|
|
66
67
|
Distance matrix
|
|
67
68
|
"""
|
|
68
|
-
|
|
69
|
+
if vector_or_matrix.ndim == 2:
|
|
70
|
+
check_matrix(vector_or_matrix)
|
|
71
|
+
elif vector_or_matrix.ndim == 1:
|
|
72
|
+
check_vector(vector_or_matrix)
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
"vector_or_matrix should either be a vector (ndim == 1) or matrix (ndim == 2)"
|
|
76
|
+
)
|
|
69
77
|
|
|
70
78
|
func = PAIRWISE_DISTANCES[metric]
|
|
71
79
|
|
|
72
|
-
return func(
|
|
80
|
+
return func(vector_or_matrix)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Base class for all optimisation methods in Phylo2Vec."""
|
|
2
|
+
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import random
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Dict, List, Final
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from phylo2vec.datasets import read_fasta
|
|
13
|
+
from phylo2vec.utils.matrix import sample_matrix
|
|
14
|
+
from phylo2vec.utils.vector import sample_vector
|
|
15
|
+
|
|
16
|
+
# Multiprocessing
|
|
17
|
+
DEFAULT_N_JOBS: Final = multiprocessing.cpu_count() // 4
|
|
18
|
+
MIN_N_JOBS: Final = 4
|
|
19
|
+
# Seeding
|
|
20
|
+
MAX_SEED = 42
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class BaseResult:
|
|
25
|
+
"""Result of the optimization process.
|
|
26
|
+
|
|
27
|
+
Attributes
|
|
28
|
+
----------
|
|
29
|
+
best : numpy.ndarray
|
|
30
|
+
Optimized phylo2vec vector or matrix.
|
|
31
|
+
label_mapping : Dict[int, str]
|
|
32
|
+
Mapping of leaf labels (integer) to taxa.
|
|
33
|
+
best_score : float
|
|
34
|
+
The best score achieved during optimization.
|
|
35
|
+
scores : List[float]
|
|
36
|
+
List of scores obtained during optimization.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
best: np.ndarray
|
|
40
|
+
label_mapping: Dict[int, str]
|
|
41
|
+
best_score: float
|
|
42
|
+
scores: List[float]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class BaseOptimizer:
|
|
46
|
+
"""
|
|
47
|
+
Base class for all phylo2vec-based optimizers
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
random_seed : int, optional
|
|
52
|
+
Random seed, by default None
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, mode="vector", random_seed=None, verbose=False, n_jobs=None):
|
|
56
|
+
|
|
57
|
+
assert mode in ["vector", "matrix"], "Mode must be either 'vector' or 'matrix'."
|
|
58
|
+
self.mode = mode
|
|
59
|
+
|
|
60
|
+
self.random_seed = random_seed or random.randint(0, MAX_SEED)
|
|
61
|
+
random.seed(self.random_seed)
|
|
62
|
+
np.random.seed(self.random_seed)
|
|
63
|
+
|
|
64
|
+
self.verbose = verbose
|
|
65
|
+
|
|
66
|
+
self.n_jobs = self._infer_n_jobs(n_jobs)
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def _infer_n_jobs(n_jobs=None):
|
|
70
|
+
return n_jobs or max(MIN_N_JOBS, DEFAULT_N_JOBS)
|
|
71
|
+
|
|
72
|
+
def fit(self, fasta_path) -> BaseResult:
|
|
73
|
+
"""Fit an optimizer to a fasta file
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
fasta_path : str
|
|
78
|
+
Path to fasta file
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
result : BaseResult
|
|
83
|
+
Result of the optimization process, containing the optimized vector,
|
|
84
|
+
label mapping, best score, and scores during optimization.
|
|
85
|
+
"""
|
|
86
|
+
# TODO: figure out how to change this when the user selects load fasta
|
|
87
|
+
# Probably an boolean "preloaded"
|
|
88
|
+
# If True, change the fasta path to include the module name
|
|
89
|
+
records = read_fasta(fasta_path)
|
|
90
|
+
|
|
91
|
+
# Make a label mapping from the records
|
|
92
|
+
label_mapping = dict(enumerate(r.id.replace(" ", ".") for r in records))
|
|
93
|
+
|
|
94
|
+
n_leaves = len(label_mapping)
|
|
95
|
+
|
|
96
|
+
if self.mode == "vector":
|
|
97
|
+
obj_init = sample_vector(n_leaves)
|
|
98
|
+
elif self.mode == "matrix":
|
|
99
|
+
obj_init = sample_matrix(n_leaves)
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError(f"Unknown mode: {self.mode}")
|
|
102
|
+
|
|
103
|
+
start_time = time.time()
|
|
104
|
+
|
|
105
|
+
result = self._optimise(fasta_path, obj_init, label_mapping)
|
|
106
|
+
|
|
107
|
+
end_time = time.time()
|
|
108
|
+
|
|
109
|
+
if self.verbose:
|
|
110
|
+
print(
|
|
111
|
+
f"Optimisation finished in {end_time - start_time:.2f} seconds "
|
|
112
|
+
f"with {len(result.scores)} loss evaluations."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def _optimise(self, fasta_path, tree, label_mapping):
|
|
118
|
+
raise NotImplementedError
|
|
119
|
+
|
|
120
|
+
def __repr__(self):
|
|
121
|
+
# TODO: maybe something like sklearn pprint
|
|
122
|
+
# https://github.com/scikit-learn/scikit-learn/blob/093e0cf14aff026cca6097e8c42f83b735d26358/sklearn/utils/_pprint.py#L116
|
|
123
|
+
format_string = f"{self.__class__.__name__}("
|
|
124
|
+
|
|
125
|
+
for item in vars(self):
|
|
126
|
+
format_string += "\n"
|
|
127
|
+
# TODO: pprint if dict?
|
|
128
|
+
format_string += f"\t{item}={repr(self.__getattribute__(item))},"
|
|
129
|
+
|
|
130
|
+
format_string = format_string[:-1] + "\n)"
|
|
131
|
+
|
|
132
|
+
return format_string
|