phylo2vec 1.2.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/Cargo.lock +2 -2
  2. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/PKG-INFO +46 -4
  3. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/README.md +39 -2
  4. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/Cargo.toml +1 -1
  5. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/ancestry.py +3 -3
  6. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/base/edges.py +2 -2
  7. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/newick.py +1 -1
  8. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/pairs.py +4 -5
  9. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/writer.py +1 -1
  10. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/metrics/pairwise.py +14 -6
  11. phylo2vec-1.3.1/phylo2vec/opt/__init__.py +7 -0
  12. phylo2vec-1.3.1/phylo2vec/opt/_base.py +132 -0
  13. phylo2vec-1.3.1/phylo2vec/opt/_beagle_losses.py +803 -0
  14. phylo2vec-1.3.1/phylo2vec/opt/_gradme.py +321 -0
  15. phylo2vec-1.3.1/phylo2vec/opt/_gradme_losses.py +163 -0
  16. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/opt/_hc.py +103 -58
  17. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/opt/_hc_losses.py +17 -20
  18. phylo2vec-1.3.1/phylo2vec/opt/utils.py +31 -0
  19. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/matrix/mod.rs +8 -20
  20. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/mod.rs +60 -1
  21. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/vector.rs +298 -47
  22. {phylo2vec-1.2.0/py-phylo2vec → phylo2vec-1.3.1}/phylo2vec/utils/newick.py +4 -4
  23. phylo2vec-1.3.1/phylo2vec/utils/vector.py +302 -0
  24. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/Cargo.toml +1 -1
  25. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/README.md +39 -2
  26. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/ancestry.py +3 -3
  27. {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/base/edges.py +2 -2
  28. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/newick.py +1 -1
  29. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/pairs.py +4 -5
  30. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/writer.py +1 -1
  31. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/metrics/pairwise.py +14 -6
  32. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/__init__.py +7 -0
  33. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_base.py +132 -0
  34. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_beagle_losses.py +803 -0
  35. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_gradme.py +321 -0
  36. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/_gradme_losses.py +163 -0
  37. {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/opt/_hc.py +103 -58
  38. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/opt/_hc_losses.py +17 -20
  39. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/opt/utils.py +31 -0
  40. {phylo2vec-1.2.0 → phylo2vec-1.3.1/py-phylo2vec}/phylo2vec/utils/newick.py +4 -4
  41. phylo2vec-1.3.1/py-phylo2vec/phylo2vec/utils/vector.py +302 -0
  42. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/src/lib.rs +12 -0
  43. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_metrics.py +23 -1
  44. phylo2vec-1.3.1/py-phylo2vec/tests/test_reorder.py +180 -0
  45. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/pyproject.toml +9 -2
  46. phylo2vec-1.2.0/phylo2vec/opt/__init__.py +0 -6
  47. phylo2vec-1.2.0/phylo2vec/opt/_base.py +0 -89
  48. phylo2vec-1.2.0/phylo2vec/utils/vector.py +0 -365
  49. phylo2vec-1.2.0/py-phylo2vec/phylo2vec/opt/__init__.py +0 -6
  50. phylo2vec-1.2.0/py-phylo2vec/phylo2vec/opt/_base.py +0 -89
  51. phylo2vec-1.2.0/py-phylo2vec/phylo2vec/utils/vector.py +0 -365
  52. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/Cargo.toml +0 -0
  53. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/LICENSE +0 -0
  54. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/README.md +0 -0
  55. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/__init__.py +0 -0
  56. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/base/__init__.py +0 -0
  57. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/bench_main.rs +0 -0
  58. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/benchmarks/core.rs +0 -0
  59. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/benches/benchmarks/mod.rs +0 -0
  60. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/__init__.py +0 -0
  61. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/_base.py +0 -0
  62. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/data/__init__.py +0 -0
  63. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/__init__.py +0 -0
  64. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/fluA.md +0 -0
  65. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/h3n2.md +0 -0
  66. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/m501.md +0 -0
  67. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/yeast.md +0 -0
  68. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/datasets/descr/zika.md +0 -0
  69. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/__init__.py +0 -0
  70. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/_validation.py +0 -0
  71. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/io/reader.py +0 -0
  72. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/metrics/__init__.py +0 -0
  73. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/opt/README.md +0 -0
  74. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/lib.rs +0 -0
  75. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/main.rs +0 -0
  76. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/profile_main.rs +0 -0
  77. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/mod.rs +0 -0
  78. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/avl.rs +0 -0
  79. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/newick/mod.rs +0 -0
  80. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/ops/newick/newick_patterns.rs +0 -0
  81. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/tree_vec/types.rs +0 -0
  82. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/src/utils.rs +0 -0
  83. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/utils/__init__.py +0 -0
  84. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/phylo2vec/utils/matrix.py +0 -0
  85. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/LICENSE +0 -0
  86. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/benchmarks/test_bench.py +0 -0
  87. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/__init__.py +0 -0
  88. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/base/__init__.py +0 -0
  89. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/__init__.py +0 -0
  90. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/_base.py +0 -0
  91. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/data/__init__.py +0 -0
  92. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/__init__.py +0 -0
  93. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/fluA.md +0 -0
  94. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/h3n2.md +0 -0
  95. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/m501.md +0 -0
  96. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/yeast.md +0 -0
  97. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/datasets/descr/zika.md +0 -0
  98. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/__init__.py +0 -0
  99. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/_validation.py +0 -0
  100. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/io/reader.py +0 -0
  101. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/metrics/__init__.py +0 -0
  102. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/opt/README.md +0 -0
  103. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/utils/__init__.py +0 -0
  104. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/phylo2vec/utils/matrix.py +0 -0
  105. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/__init__.py +0 -0
  106. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/config.py +0 -0
  107. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_base.py +0 -0
  108. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_io.py +0 -0
  109. {phylo2vec-1.2.0 → phylo2vec-1.3.1}/py-phylo2vec/tests/test_utils.py +0 -0
@@ -460,7 +460,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
460
460
 
461
461
  [[package]]
462
462
  name = "phylo2vec"
463
- version = "0.4.0"
463
+ version = "0.4.1"
464
464
  dependencies = [
465
465
  "criterion",
466
466
  "rand",
@@ -544,7 +544,7 @@ dependencies = [
544
544
 
545
545
  [[package]]
546
546
  name = "py-phylo2vec"
547
- version = "1.2.0"
547
+ version = "1.3.1"
548
548
  dependencies = [
549
549
  "phylo2vec",
550
550
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phylo2vec
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
6
6
  Classifier: Programming Language :: Python
@@ -11,12 +11,17 @@ Classifier: Programming Language :: Python :: 3.12
11
11
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
12
12
  Classifier: Operating System :: Unix
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
- Requires-Dist: numba>=0.56.4
15
14
  Requires-Dist: numpy>=1.22,<2.1
16
15
  Requires-Dist: biopython>=1.84.0
17
16
  Requires-Dist: joblib>=1.2.0
18
17
  Requires-Dist: ete3==3.1.3
19
18
  Requires-Dist: six>=1.16.0
19
+ Requires-Dist: jax ; extra == 'opt'
20
+ Requires-Dist: joblib ; extra == 'opt'
21
+ Requires-Dist: optax ; extra == 'opt'
22
+ Requires-Dist: rpy2 ; extra == 'opt'
23
+ Requires-Dist: tqdm ; extra == 'opt'
24
+ Provides-Extra: opt
20
25
  License-File: LICENSE
21
26
  Summary: Phylo2Vec: integer vector representation of binary (phylogenetic) trees
22
27
  Author: Neil Scheidwasser <neil.clow@sund.ku.dk>
@@ -85,8 +90,10 @@ in Rust.
85
90
 
86
91
  ### Installing R package
87
92
 
88
- To install the R package, first you need to retrieve one of the compiled file
89
- within the package [releases](https://github.com/sbhattlab/phylo2vec/releases).
93
+ #### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
94
+
95
+ Retrieve one of the compiled binaries from the
96
+ [releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
90
97
  Once the file is downloaded, simply run `install.packages` in your R command
91
98
  line.
92
99
 
@@ -94,6 +101,41 @@ line.
94
101
  install.packages("/path/to/package_file", repos = NULL, type = 'source')
95
102
  ```
96
103
 
104
+ #### Option 2: using `devtools`
105
+
106
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
107
+ build the core package.
108
+
109
+ ```R
110
+ devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
111
+ ```
112
+
113
+ Note: to download a specific version, use:
114
+
115
+ ```R
116
+ devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
117
+ ```
118
+
119
+ #### Option 3: manual installation
120
+
121
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
122
+ build the core package.
123
+
124
+ Clone the repository and run the following `install.packages` in your R command
125
+ line.
126
+
127
+ Note: to download a specific version, you can use `git checkout` to a desired
128
+ tag.
129
+
130
+ ```bash
131
+ git clone https://github.com/sbhattlab/phylo2vec
132
+ cd phylo2vec
133
+ ```
134
+
135
+ ```R
136
+ install.packages("./r-phylo2vec", repos = NULL, type = 'source')
137
+ ```
138
+
97
139
  ## Basic Usage
98
140
 
99
141
  ### Python
@@ -58,8 +58,10 @@ in Rust.
58
58
 
59
59
  ### Installing R package
60
60
 
61
- To install the R package, first you need to retrieve one of the compiled file
62
- within the package [releases](https://github.com/sbhattlab/phylo2vec/releases).
61
+ #### Option 1: from a release (Windows, Mac, Ubuntu >= 22.04)
62
+
63
+ Retrieve one of the compiled binaries from the
64
+ [releases](https://github.com/sbhattlab/phylo2vec/releases) that fits your OS.
63
65
  Once the file is downloaded, simply run `install.packages` in your R command
64
66
  line.
65
67
 
@@ -67,6 +69,41 @@ line.
67
69
  install.packages("/path/to/package_file", repos = NULL, type = 'source')
68
70
  ```
69
71
 
72
+ #### Option 2: using `devtools`
73
+
74
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
75
+ build the core package.
76
+
77
+ ```R
78
+ devtools::install_github("sbhattlab/phylo2vec", subdir="./r-phylo2vec", build = FALSE)
79
+ ```
80
+
81
+ Note: to download a specific version, use:
82
+
83
+ ```R
84
+ devtools::install_github("sbhattlab/phylo2vec@vX.Y.Z", subdir="./r-phylo2vec", build = FALSE)
85
+ ```
86
+
87
+ #### Option 3: manual installation
88
+
89
+ ⚠️ This requires installing [Rust](https://www.rust-lang.org/tools/install) to
90
+ build the core package.
91
+
92
+ Clone the repository and run the following `install.packages` in your R command
93
+ line.
94
+
95
+ Note: to download a specific version, you can use `git checkout` to a desired
96
+ tag.
97
+
98
+ ```bash
99
+ git clone https://github.com/sbhattlab/phylo2vec
100
+ cd phylo2vec
101
+ ```
102
+
103
+ ```R
104
+ install.packages("./r-phylo2vec", repos = NULL, type = 'source')
105
+ ```
106
+
70
107
  ## Basic Usage
71
108
 
72
109
  ### Python
@@ -1,7 +1,7 @@
1
1
  [package]
2
2
  name = "phylo2vec"
3
3
  # Rust core version
4
- version = "0.4.0"
4
+ version = "0.4.1"
5
5
  edition.workspace = true
6
6
  authors.workspace = true
7
7
  description.workspace = true
@@ -20,7 +20,7 @@ def from_ancestry(ancestry: np.ndarray) -> np.ndarray:
20
20
 
21
21
  Returns
22
22
  -------
23
- numpy.array
23
+ numpy.ndarray
24
24
  Phylo2Vec vector
25
25
  """
26
26
  v_list = core.from_ancestry(ancestry)
@@ -50,12 +50,12 @@ def to_ancestry(v: np.ndarray) -> np.ndarray:
50
50
 
51
51
  Parameters
52
52
  ----------
53
- v : numpy.array
53
+ v : numpy.ndarray
54
54
  Phylo2Vec vector
55
55
 
56
56
  Returns
57
57
  -------
58
- ancestry : numpy.array
58
+ ancestry : numpy.ndarray
59
59
  Ancestry matrix
60
60
  1st column: child 1
61
61
  2nd column: child 2
@@ -21,7 +21,7 @@ def from_edges(edges: List[Tuple[int, int]]) -> np.ndarray:
21
21
 
22
22
  Returns
23
23
  -------
24
- v : numpy.array
24
+ v : numpy.ndarray
25
25
  Phylo2Vec vector
26
26
  """
27
27
  v = core.from_edges(edges)
@@ -35,7 +35,7 @@ def to_edges(v: np.ndarray) -> List[Tuple[int, int]]:
35
35
 
36
36
  Parameters
37
37
  ----------
38
- v : numpy.array
38
+ v : numpy.ndarray
39
39
  Phylo2Vec vector
40
40
 
41
41
  Returns
@@ -31,7 +31,7 @@ def to_newick(vector_or_matrix: np.ndarray) -> str:
31
31
 
32
32
  Parameters
33
33
  ----------
34
- vector_or_matrix : numpy.array
34
+ vector_or_matrix : numpy.ndarray
35
35
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
36
36
 
37
37
  Returns
@@ -11,10 +11,9 @@ from typing import List, Tuple
11
11
  def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
12
12
  """Convert a list of pairs to a Phylo2Vec vector
13
13
 
14
- Each pair is represented as a list of pairs (B, L)
14
+ Each pair is represented as a tuple (B, L)
15
15
  indicating that leaf L descends from branch B.
16
16
 
17
-
18
17
  Parameters
19
18
  ----------
20
19
  pairs : List[Tuple[int, int]]
@@ -22,7 +21,7 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
22
21
 
23
22
  Returns
24
23
  -------
25
- v : numpy.array
24
+ v : numpy.ndarray
26
25
  Phylo2Vec vector
27
26
  """
28
27
  v = core.from_pairs(pairs)
@@ -32,12 +31,12 @@ def from_pairs(pairs: List[Tuple[int, int]]) -> np.ndarray:
32
31
  def to_pairs(v: np.ndarray) -> List[Tuple[int, int]]:
33
32
  """Convert a Phylo2Vec vector to a list of pairs
34
33
 
35
- Each pair is represented as a list of pairs (B, L)
34
+ Each pair is represented as a tuple (B, L)
36
35
  indicating that leaf L descends from branch B.
37
36
 
38
37
  Parameters
39
38
  ----------
40
- v : numpy.array
39
+ v : numpy.ndarray
41
40
  Phylo2Vec vector
42
41
 
43
42
  Returns
@@ -44,7 +44,7 @@ def save_newick(
44
44
 
45
45
  Parameters
46
46
  ----------
47
- vector_or_matrix : numpy.array
47
+ vector_or_matrix : numpy.ndarray
48
48
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
49
49
  filepath : str
50
50
  Path to the output file
@@ -4,6 +4,7 @@ import warnings
4
4
  import numpy as np
5
5
 
6
6
  from phylo2vec import _phylo2vec_core as core
7
+ from phylo2vec.utils.matrix import check_matrix
7
8
  from phylo2vec.utils.vector import check_vector
8
9
 
9
10
 
@@ -14,7 +15,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
14
15
 
15
16
  Parameters
16
17
  ----------
17
- vector_or_matrix : numpy.array
18
+ vector_or_matrix : numpy.ndarray
18
19
  Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
19
20
 
20
21
  Returns
@@ -46,7 +47,7 @@ def cophenetic_distances(vector_or_matrix, unrooted=False):
46
47
  PAIRWISE_DISTANCES = {"cophenetic": cophenetic_distances}
47
48
 
48
49
 
49
- def pairwise_distances(v, metric="cophenetic"):
50
+ def pairwise_distances(vector_or_matrix, metric="cophenetic"):
50
51
  """
51
52
  Compute a pairwise distance matrix
52
53
  for tree nodes from a Phylo2Vec vector.
@@ -55,8 +56,8 @@ def pairwise_distances(v, metric="cophenetic"):
55
56
 
56
57
  Parameters
57
58
  ----------
58
- v : numpy.ndarray
59
- Phylo2Vec vector
59
+ vector_or_matrix : numpy.ndarray
60
+ Phylo2Vec vector (ndim == 1)/matrix (ndim == 2)
60
61
  metric : str, optional
61
62
  Pairwise distance metric, by default "cophenetic"
62
63
 
@@ -65,8 +66,15 @@ def pairwise_distances(v, metric="cophenetic"):
65
66
  numpy.ndarray
66
67
  Distance matrix
67
68
  """
68
- check_vector(v)
69
+ if vector_or_matrix.ndim == 2:
70
+ check_matrix(vector_or_matrix)
71
+ elif vector_or_matrix.ndim == 1:
72
+ check_vector(vector_or_matrix)
73
+ else:
74
+ raise ValueError(
75
+ "vector_or_matrix should either be a vector (ndim == 1) or matrix (ndim == 2)"
76
+ )
69
77
 
70
78
  func = PAIRWISE_DISTANCES[metric]
71
79
 
72
- return func(v)
80
+ return func(vector_or_matrix)
@@ -0,0 +1,7 @@
1
+ """Phylo2Vec-based optimisation methods."""
2
+
3
+ from ._gradme import GradME
4
+ from ._gradme_losses import gradme_loss
5
+ from ._hc import HillClimbing
6
+
7
+ __all__ = ["GradME", "gradme_loss", "HillClimbing"]
@@ -0,0 +1,132 @@
1
+ """Base class for all optimisation methods in Phylo2Vec."""
2
+
3
+ import multiprocessing
4
+ import random
5
+ import time
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, List, Final
9
+
10
+ import numpy as np
11
+
12
+ from phylo2vec.datasets import read_fasta
13
+ from phylo2vec.utils.matrix import sample_matrix
14
+ from phylo2vec.utils.vector import sample_vector
15
+
16
+ # Multiprocessing
17
+ DEFAULT_N_JOBS: Final = multiprocessing.cpu_count() // 4
18
+ MIN_N_JOBS: Final = 4
19
+ # Seeding
20
+ MAX_SEED = 42
21
+
22
+
23
+ @dataclass
24
+ class BaseResult:
25
+ """Result of the optimization process.
26
+
27
+ Attributes
28
+ ----------
29
+ best : numpy.ndarray
30
+ Optimized phylo2vec vector or matrix.
31
+ label_mapping : Dict[int, str]
32
+ Mapping of leaf labels (integer) to taxa.
33
+ best_score : float
34
+ The best score achieved during optimization.
35
+ scores : List[float]
36
+ List of scores obtained during optimization.
37
+ """
38
+
39
+ best: np.ndarray
40
+ label_mapping: Dict[int, str]
41
+ best_score: float
42
+ scores: List[float]
43
+
44
+
45
+ class BaseOptimizer:
46
+ """
47
+ Base class for all phylo2vec-based optimizers
48
+
49
+ Parameters
50
+ ----------
51
+ random_seed : int, optional
52
+ Random seed, by default None
53
+ """
54
+
55
+ def __init__(self, mode="vector", random_seed=None, verbose=False, n_jobs=None):
56
+
57
+ assert mode in ["vector", "matrix"], "Mode must be either 'vector' or 'matrix'."
58
+ self.mode = mode
59
+
60
+ self.random_seed = random_seed or random.randint(0, MAX_SEED)
61
+ random.seed(self.random_seed)
62
+ np.random.seed(self.random_seed)
63
+
64
+ self.verbose = verbose
65
+
66
+ self.n_jobs = self._infer_n_jobs(n_jobs)
67
+
68
+ @staticmethod
69
+ def _infer_n_jobs(n_jobs=None):
70
+ return n_jobs or max(MIN_N_JOBS, DEFAULT_N_JOBS)
71
+
72
+ def fit(self, fasta_path) -> BaseResult:
73
+ """Fit an optimizer to a fasta file
74
+
75
+ Parameters
76
+ ----------
77
+ fasta_path : str
78
+ Path to fasta file
79
+
80
+ Returns
81
+ -------
82
+ result : BaseResult
83
+ Result of the optimization process, containing the optimized vector,
84
+ label mapping, best score, and scores during optimization.
85
+ """
86
+ # TODO: figure out how to change this when the user selects load fasta
87
+ # Probably an boolean "preloaded"
88
+ # If True, change the fasta path to include the module name
89
+ records = read_fasta(fasta_path)
90
+
91
+ # Make a label mapping from the records
92
+ label_mapping = dict(enumerate(r.id.replace(" ", ".") for r in records))
93
+
94
+ n_leaves = len(label_mapping)
95
+
96
+ if self.mode == "vector":
97
+ obj_init = sample_vector(n_leaves)
98
+ elif self.mode == "matrix":
99
+ obj_init = sample_matrix(n_leaves)
100
+ else:
101
+ raise ValueError(f"Unknown mode: {self.mode}")
102
+
103
+ start_time = time.time()
104
+
105
+ result = self._optimise(fasta_path, obj_init, label_mapping)
106
+
107
+ end_time = time.time()
108
+
109
+ if self.verbose:
110
+ print(
111
+ f"Optimisation finished in {end_time - start_time:.2f} seconds "
112
+ f"with {len(result.scores)} loss evaluations."
113
+ )
114
+
115
+ return result
116
+
117
+ def _optimise(self, fasta_path, tree, label_mapping):
118
+ raise NotImplementedError
119
+
120
+ def __repr__(self):
121
+ # TODO: maybe something like sklearn pprint
122
+ # https://github.com/scikit-learn/scikit-learn/blob/093e0cf14aff026cca6097e8c42f83b735d26358/sklearn/utils/_pprint.py#L116
123
+ format_string = f"{self.__class__.__name__}("
124
+
125
+ for item in vars(self):
126
+ format_string += "\n"
127
+ # TODO: pprint if dict?
128
+ format_string += f"\t{item}={repr(self.__getattribute__(item))},"
129
+
130
+ format_string = format_string[:-1] + "\n)"
131
+
132
+ return format_string