phylogenie 1.0.8__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phylogenie-1.0.8 → phylogenie-2.0.0}/PKG-INFO +6 -18
- {phylogenie-1.0.8 → phylogenie-2.0.0}/README.md +5 -17
- phylogenie-2.0.0/phylogenie/generators/__init__.py +14 -0
- phylogenie-2.0.0/phylogenie/generators/alisim.py +71 -0
- phylogenie-2.0.0/phylogenie/generators/configs.py +41 -0
- {phylogenie-1.0.8/phylogenie/core → phylogenie-2.0.0/phylogenie/generators}/dataset.py +25 -23
- {phylogenie-1.0.8/phylogenie/core → phylogenie-2.0.0/phylogenie/generators}/factories.py +42 -52
- phylogenie-2.0.0/phylogenie/generators/trees.py +220 -0
- phylogenie-2.0.0/phylogenie/generators/typeguards.py +32 -0
- phylogenie-2.0.0/phylogenie/io.py +92 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/main.py +2 -2
- phylogenie-2.0.0/phylogenie/msa.py +72 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/skyline/matrix.py +62 -45
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/skyline/vector.py +8 -6
- phylogenie-2.0.0/phylogenie/tree.py +53 -0
- phylogenie-2.0.0/phylogenie/treesimulator/__init__.py +21 -0
- phylogenie-2.0.0/phylogenie/treesimulator/events.py +256 -0
- phylogenie-2.0.0/phylogenie/treesimulator/gillespie.py +66 -0
- phylogenie-2.0.0/phylogenie/treesimulator/model.py +100 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/typings.py +0 -2
- {phylogenie-1.0.8 → phylogenie-2.0.0}/pyproject.toml +1 -1
- phylogenie-1.0.8/phylogenie/backend/__init__.py +0 -0
- phylogenie-1.0.8/phylogenie/backend/remaster/__init__.py +0 -21
- phylogenie-1.0.8/phylogenie/backend/remaster/generate.py +0 -187
- phylogenie-1.0.8/phylogenie/backend/remaster/reactions.py +0 -165
- phylogenie-1.0.8/phylogenie/backend/treesimulator.py +0 -163
- phylogenie-1.0.8/phylogenie/configs.py +0 -5
- phylogenie-1.0.8/phylogenie/core/__init__.py +0 -14
- phylogenie-1.0.8/phylogenie/core/configs.py +0 -37
- phylogenie-1.0.8/phylogenie/core/context/__init__.py +0 -4
- phylogenie-1.0.8/phylogenie/core/context/configs.py +0 -28
- phylogenie-1.0.8/phylogenie/core/context/distributions.py +0 -125
- phylogenie-1.0.8/phylogenie/core/context/factories.py +0 -54
- phylogenie-1.0.8/phylogenie/core/msas/__init__.py +0 -10
- phylogenie-1.0.8/phylogenie/core/msas/alisim.py +0 -35
- phylogenie-1.0.8/phylogenie/core/msas/base.py +0 -51
- phylogenie-1.0.8/phylogenie/core/trees/__init__.py +0 -11
- phylogenie-1.0.8/phylogenie/core/trees/base.py +0 -13
- phylogenie-1.0.8/phylogenie/core/trees/remaster/__init__.py +0 -3
- phylogenie-1.0.8/phylogenie/core/trees/remaster/configs.py +0 -14
- phylogenie-1.0.8/phylogenie/core/trees/remaster/factories.py +0 -26
- phylogenie-1.0.8/phylogenie/core/trees/remaster/generator.py +0 -177
- phylogenie-1.0.8/phylogenie/core/trees/treesimulator.py +0 -199
- phylogenie-1.0.8/phylogenie/core/typeguards.py +0 -32
- {phylogenie-1.0.8 → phylogenie-2.0.0}/LICENSE.txt +0 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/__init__.py +0 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/py.typed +0 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/skyline/__init__.py +0 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/skyline/parameter.py +0 -0
- {phylogenie-1.0.8 → phylogenie-2.0.0}/phylogenie/typeguards.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: phylogenie
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Generate phylogenetic datasets with minimal setup effort
|
|
5
5
|
Author: Gabriele Marino
|
|
6
6
|
Author-email: gabmarino.8601@gmail.com
|
|
@@ -23,9 +23,9 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
|
|
24
24
|
---
|
|
25
25
|
|
|
26
|
-
[](https://github.com/evolbioinfo/treesimulator)
|
|
27
|
-
[](https://tgvaughan.github.io/remaster/)
|
|
28
26
|
[](https://iqtree.github.io/doc/AliSim)
|
|
27
|
+
[](https://pypi.org/project/phylogenie/)
|
|
28
|
+
[](https://pypi.org/project/phylogenie/)
|
|
29
29
|
|
|
30
30
|
Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
|
|
31
31
|
|
|
@@ -46,7 +46,7 @@ Phylogenie comes packed with useful features, including:
|
|
|
46
46
|
Simply specify the number of cores to use, and Phylogenie handles multiprocessing automatically.
|
|
47
47
|
|
|
48
48
|
- **Pre-implemented parameterizations** 🎯
|
|
49
|
-
Include canonical, fossilized birth-death, epidemiological, birth-death with exposed-infectious (BDEI), birth-death with superspreading (BDSS),
|
|
49
|
+
Include canonical, fossilized birth-death, epidemiological, birth-death with exposed-infectious (BDEI), birth-death with superspreading (BDSS), and more.
|
|
50
50
|
|
|
51
51
|
- **Skyline parameter support** 🪜
|
|
52
52
|
Support for piece-wise constant parameters.
|
|
@@ -54,9 +54,6 @@ Phylogenie comes packed with useful features, including:
|
|
|
54
54
|
- **Arithmetic operations on parameters** 🧮
|
|
55
55
|
Perform flexible arithmetic operations between parameters directly within the config file.
|
|
56
56
|
|
|
57
|
-
- **Support for common phylogenetic simulation tools** 🛠️
|
|
58
|
-
Compatible backends include ReMASTER, TreeSimulator, and AliSim.
|
|
59
|
-
|
|
60
57
|
- **Modular and extendible architecture** 🧩
|
|
61
58
|
Easily add new simulation backends as needed.
|
|
62
59
|
|
|
@@ -76,18 +73,9 @@ cd phylogenie
|
|
|
76
73
|
pip install .
|
|
77
74
|
```
|
|
78
75
|
|
|
79
|
-
## 🛠 Backend
|
|
80
|
-
|
|
81
|
-
Phylogenie works with the following simulation backends:
|
|
82
|
-
|
|
83
|
-
- **[TreeSimulator](https://github.com/evolbioinfo/treesimulator)**
|
|
84
|
-
A [Python](https://www.python.org/) package for simulating phylogenetic trees. It is automatically installed with Phylogenie, so you can use it right away.
|
|
85
|
-
|
|
86
|
-
- **[ReMASTER](https://tgvaughan.github.io/remaster/)**
|
|
87
|
-
A [BEAST2](https://www.beast2.org/) package designed for tree simulation. To use ReMASTER as a backend, you need to install it separately.
|
|
76
|
+
## 🛠 Backend dependency
|
|
88
77
|
|
|
89
|
-
|
|
90
|
-
A tool for simulating multiple sequence alignments (MSAs). It is distributed with [IQ-TREE](https://iqtree.github.io/) and also requires separate installation if you wish to use it as a backend.
|
|
78
|
+
Phylogenie relies on [AliSim](https://iqtree.github.io/doc/AliSim) for simulating multiple sequence alignments (MSAs). AliSim is a powerful MSAs simulation tool distributed with [IQ-TREE](https://iqtree.github.io/), and requires separate installation to use it as a simulation backend.
|
|
91
79
|
|
|
92
80
|
## 🚀 Quick Start
|
|
93
81
|
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
[](https://github.com/evolbioinfo/treesimulator)
|
|
8
|
-
[](https://tgvaughan.github.io/remaster/)
|
|
9
7
|
[](https://iqtree.github.io/doc/AliSim)
|
|
8
|
+
[](https://pypi.org/project/phylogenie/)
|
|
9
|
+
[](https://pypi.org/project/phylogenie/)
|
|
10
10
|
|
|
11
11
|
Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
|
|
12
12
|
|
|
@@ -27,7 +27,7 @@ Phylogenie comes packed with useful features, including:
|
|
|
27
27
|
Simply specify the number of cores to use, and Phylogenie handles multiprocessing automatically.
|
|
28
28
|
|
|
29
29
|
- **Pre-implemented parameterizations** 🎯
|
|
30
|
-
Include canonical, fossilized birth-death, epidemiological, birth-death with exposed-infectious (BDEI), birth-death with superspreading (BDSS),
|
|
30
|
+
Include canonical, fossilized birth-death, epidemiological, birth-death with exposed-infectious (BDEI), birth-death with superspreading (BDSS), and more.
|
|
31
31
|
|
|
32
32
|
- **Skyline parameter support** 🪜
|
|
33
33
|
Support for piece-wise constant parameters.
|
|
@@ -35,9 +35,6 @@ Phylogenie comes packed with useful features, including:
|
|
|
35
35
|
- **Arithmetic operations on parameters** 🧮
|
|
36
36
|
Perform flexible arithmetic operations between parameters directly within the config file.
|
|
37
37
|
|
|
38
|
-
- **Support for common phylogenetic simulation tools** 🛠️
|
|
39
|
-
Compatible backends include ReMASTER, TreeSimulator, and AliSim.
|
|
40
|
-
|
|
41
38
|
- **Modular and extendible architecture** 🧩
|
|
42
39
|
Easily add new simulation backends as needed.
|
|
43
40
|
|
|
@@ -57,18 +54,9 @@ cd phylogenie
|
|
|
57
54
|
pip install .
|
|
58
55
|
```
|
|
59
56
|
|
|
60
|
-
## 🛠 Backend
|
|
61
|
-
|
|
62
|
-
Phylogenie works with the following simulation backends:
|
|
63
|
-
|
|
64
|
-
- **[TreeSimulator](https://github.com/evolbioinfo/treesimulator)**
|
|
65
|
-
A [Python](https://www.python.org/) package for simulating phylogenetic trees. It is automatically installed with Phylogenie, so you can use it right away.
|
|
66
|
-
|
|
67
|
-
- **[ReMASTER](https://tgvaughan.github.io/remaster/)**
|
|
68
|
-
A [BEAST2](https://www.beast2.org/) package designed for tree simulation. To use ReMASTER as a backend, you need to install it separately.
|
|
57
|
+
## 🛠 Backend dependency
|
|
69
58
|
|
|
70
|
-
|
|
71
|
-
A tool for simulating multiple sequence alignments (MSAs). It is distributed with [IQ-TREE](https://iqtree.github.io/) and also requires separate installation if you wish to use it as a backend.
|
|
59
|
+
Phylogenie relies on [AliSim](https://iqtree.github.io/doc/AliSim) for simulating multiple sequence alignments (MSAs). AliSim is a powerful MSAs simulation tool distributed with [IQ-TREE](https://iqtree.github.io/), and requires separate installation to use it as a simulation backend.
|
|
72
60
|
|
|
73
61
|
## 🚀 Quick Start
|
|
74
62
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from phylogenie.generators.alisim import AliSimDatasetGenerator
|
|
6
|
+
from phylogenie.generators.dataset import DatasetGenerator
|
|
7
|
+
from phylogenie.generators.trees import TreeDatasetGeneratorConfig
|
|
8
|
+
|
|
9
|
+
DatasetGeneratorConfig = Annotated[
|
|
10
|
+
TreeDatasetGeneratorConfig | AliSimDatasetGenerator,
|
|
11
|
+
Field(discriminator="data_type"),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
__all__ = ["DatasetGeneratorConfig", "DatasetGenerator"]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
|
|
6
|
+
from numpy.random import Generator
|
|
7
|
+
|
|
8
|
+
from phylogenie.generators.dataset import DatasetGenerator, DataType
|
|
9
|
+
from phylogenie.generators.trees import TreeDatasetGeneratorConfig
|
|
10
|
+
from phylogenie.io import dump_newick
|
|
11
|
+
|
|
12
|
+
MSAS_DIRNAME = "MSAs"
|
|
13
|
+
TREES_DIRNAME = "trees"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AliSimDatasetGenerator(DatasetGenerator):
|
|
17
|
+
data_type: Literal[DataType.MSAS] = DataType.MSAS
|
|
18
|
+
trees: TreeDatasetGeneratorConfig
|
|
19
|
+
keep_trees: bool = False
|
|
20
|
+
iqtree_path: str = "iqtree2"
|
|
21
|
+
args: dict[str, str | int | float]
|
|
22
|
+
|
|
23
|
+
def _generate_one_from_tree(
|
|
24
|
+
self, filename: str, tree_file: str, rng: Generator, data: dict[str, Any]
|
|
25
|
+
) -> None:
|
|
26
|
+
command = [
|
|
27
|
+
self.iqtree_path,
|
|
28
|
+
"--alisim",
|
|
29
|
+
filename,
|
|
30
|
+
"--tree",
|
|
31
|
+
tree_file,
|
|
32
|
+
"--seed",
|
|
33
|
+
str(rng.integers(2**32)),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
for key, value in self.args.items():
|
|
37
|
+
command.extend(
|
|
38
|
+
[key, value.format(**data) if isinstance(value, str) else str(value)]
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
command.extend(["-af", "fasta"])
|
|
42
|
+
subprocess.run(command, check=True, stdout=subprocess.DEVNULL)
|
|
43
|
+
subprocess.run(["rm", f"{tree_file}.log"], check=True)
|
|
44
|
+
|
|
45
|
+
def _generate_one(
|
|
46
|
+
self, filename: str, rng: Generator, data: dict[str, Any]
|
|
47
|
+
) -> None:
|
|
48
|
+
if self.keep_trees:
|
|
49
|
+
base_dir = Path(filename).parent
|
|
50
|
+
file_id = Path(filename).stem
|
|
51
|
+
tree_filename = os.path.join(base_dir, TREES_DIRNAME, file_id)
|
|
52
|
+
msas_dir = os.path.join(base_dir, MSAS_DIRNAME)
|
|
53
|
+
os.makedirs(msas_dir, exist_ok=True)
|
|
54
|
+
msa_filename = os.path.join(msas_dir, file_id)
|
|
55
|
+
else:
|
|
56
|
+
tree_filename = f"{filename}.temp-tree"
|
|
57
|
+
msa_filename = filename
|
|
58
|
+
|
|
59
|
+
tree = self.trees.simulate_one(rng, data)
|
|
60
|
+
if tree is None:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
for leaf in tree.get_leaves():
|
|
64
|
+
leaf.id += f"|{leaf.get_time()}"
|
|
65
|
+
dump_newick(tree, f"{tree_filename}.nwk")
|
|
66
|
+
|
|
67
|
+
self._generate_one_from_tree(
|
|
68
|
+
filename=msa_filename, tree_file=f"{tree_filename}.nwk", rng=rng, data=data
|
|
69
|
+
)
|
|
70
|
+
if not self.keep_trees:
|
|
71
|
+
os.remove(f"{tree_filename}.nwk")
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from pydantic import BaseModel, ConfigDict
|
|
2
|
+
|
|
3
|
+
import phylogenie.typings as pgt
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DistributionConfig(BaseModel):
|
|
7
|
+
type: str
|
|
8
|
+
model_config = ConfigDict(extra="allow")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
IntegerConfig = str | int
|
|
12
|
+
ScalarConfig = str | pgt.Scalar
|
|
13
|
+
ManyScalarsConfig = str | list[ScalarConfig]
|
|
14
|
+
OneOrManyScalarsConfig = ScalarConfig | list[ScalarConfig]
|
|
15
|
+
OneOrMany2DScalarsConfig = ScalarConfig | list[list[ScalarConfig]]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StrictBaseModel(BaseModel):
|
|
19
|
+
model_config = ConfigDict(extra="forbid")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SkylineParameterModel(StrictBaseModel):
|
|
23
|
+
value: ManyScalarsConfig
|
|
24
|
+
change_times: ManyScalarsConfig
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SkylineVectorModel(StrictBaseModel):
|
|
28
|
+
value: str | list[OneOrManyScalarsConfig]
|
|
29
|
+
change_times: ManyScalarsConfig
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SkylineMatrixModel(StrictBaseModel):
|
|
33
|
+
value: str | list[OneOrMany2DScalarsConfig]
|
|
34
|
+
change_times: ManyScalarsConfig
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
SkylineParameterConfig = ScalarConfig | SkylineParameterModel
|
|
38
|
+
SkylineVectorConfig = (
|
|
39
|
+
str | pgt.Scalar | list[SkylineParameterConfig] | SkylineVectorModel
|
|
40
|
+
)
|
|
41
|
+
SkylineMatrixConfig = str | pgt.Scalar | list[SkylineVectorConfig] | SkylineMatrixModel
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from enum import Enum
|
|
4
|
+
from itertools import product
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
import joblib
|
|
8
|
+
import numpy as np
|
|
6
9
|
import pandas as pd
|
|
7
10
|
from numpy.random import Generator, default_rng
|
|
8
11
|
from tqdm import tqdm
|
|
9
12
|
|
|
10
|
-
|
|
11
|
-
from phylogenie.configs import StrictBaseModel
|
|
12
|
-
from phylogenie.core.context import ContextConfig, context_factory
|
|
13
|
+
from phylogenie.generators.configs import DistributionConfig, StrictBaseModel
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class DataType(str, Enum):
|
|
@@ -17,51 +18,52 @@ class DataType(str, Enum):
|
|
|
17
18
|
MSAS = "msas"
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
DATA_DIRNAME = "data"
|
|
22
|
+
METADATA_FILENAME = "metadata.csv"
|
|
23
|
+
|
|
24
|
+
|
|
20
25
|
class DatasetGenerator(ABC, StrictBaseModel):
|
|
21
|
-
output_dir: str = "phylogenie-
|
|
22
|
-
data_dir: str = "data"
|
|
23
|
-
metadata_filename: str = "metadata.csv"
|
|
26
|
+
output_dir: str = "phylogenie-outputs"
|
|
24
27
|
n_samples: int | dict[str, int] = 1
|
|
25
28
|
n_jobs: int = -1
|
|
26
29
|
seed: int | None = None
|
|
27
|
-
context:
|
|
30
|
+
context: dict[str, DistributionConfig] | None = None
|
|
28
31
|
|
|
29
32
|
@abstractmethod
|
|
30
|
-
def _generate_one(
|
|
33
|
+
def _generate_one(
|
|
34
|
+
self, filename: str, rng: Generator, data: dict[str, Any]
|
|
35
|
+
) -> None: ...
|
|
31
36
|
|
|
32
37
|
def generate_one(
|
|
33
|
-
self, filename: str, data:
|
|
38
|
+
self, filename: str, data: dict[str, Any] | None = None, seed: int | None = None
|
|
34
39
|
) -> None:
|
|
35
40
|
data = {} if data is None else data
|
|
36
41
|
self._generate_one(filename=filename, rng=default_rng(seed), data=data)
|
|
37
42
|
|
|
38
43
|
def _generate(self, rng: Generator, n_samples: int, output_dir: str) -> None:
|
|
39
|
-
data_dir = os.path.join(output_dir,
|
|
40
|
-
metadata_file = os.path.join(output_dir, self.metadata_filename)
|
|
44
|
+
data_dir = os.path.join(output_dir, DATA_DIRNAME)
|
|
41
45
|
if os.path.exists(data_dir):
|
|
42
46
|
print(f"Output directory {data_dir} already exists. Skipping.")
|
|
43
47
|
return
|
|
44
48
|
os.makedirs(data_dir)
|
|
45
49
|
|
|
46
|
-
data = [
|
|
47
|
-
|
|
48
|
-
for
|
|
49
|
-
|
|
50
|
+
data: list[dict[str, Any]] = [{}] * n_samples
|
|
51
|
+
if self.context is not None:
|
|
52
|
+
for d, (k, v) in product(data, self.context.items()):
|
|
53
|
+
args = v.model_extra if v.model_extra is not None else {}
|
|
54
|
+
d[k] = np.array(getattr(rng, v.type)(**args)).tolist()
|
|
55
|
+
df = pd.DataFrame([{"file_id": str(i), **d} for i, d in enumerate(data)])
|
|
56
|
+
df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
|
|
50
57
|
|
|
51
58
|
joblib.Parallel(n_jobs=self.n_jobs)(
|
|
52
59
|
joblib.delayed(self.generate_one)(
|
|
53
60
|
filename=os.path.join(data_dir, str(i)),
|
|
54
|
-
data=
|
|
55
|
-
seed=int(rng.integers(
|
|
56
|
-
)
|
|
57
|
-
for i, d in tqdm(
|
|
58
|
-
enumerate(data), total=n_samples, desc=f"Generating {data_dir}..."
|
|
61
|
+
data=data[i],
|
|
62
|
+
seed=int(rng.integers(2**32)),
|
|
59
63
|
)
|
|
64
|
+
for i in tqdm(range(n_samples), desc=f"Generating {data_dir}...")
|
|
60
65
|
)
|
|
61
66
|
|
|
62
|
-
df = pd.DataFrame([{"file_id": str(i), **d} for i, d in enumerate(data)])
|
|
63
|
-
df.to_csv(metadata_file, index=False)
|
|
64
|
-
|
|
65
67
|
def generate(self) -> None:
|
|
66
68
|
rng = default_rng(self.seed)
|
|
67
69
|
if isinstance(self.n_samples, dict):
|
|
@@ -2,8 +2,8 @@ from typing import Any
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
import phylogenie.
|
|
6
|
-
import phylogenie.
|
|
5
|
+
import phylogenie.generators.configs as cfg
|
|
6
|
+
import phylogenie.generators.typeguards as ctg
|
|
7
7
|
import phylogenie.typeguards as tg
|
|
8
8
|
import phylogenie.typings as pgt
|
|
9
9
|
from phylogenie.skyline import (
|
|
@@ -16,7 +16,7 @@ from phylogenie.skyline import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def _eval_expression(expression: str, data:
|
|
19
|
+
def _eval_expression(expression: str, data: dict[str, Any]) -> Any:
|
|
20
20
|
return np.array(
|
|
21
21
|
eval(
|
|
22
22
|
expression,
|
|
@@ -29,7 +29,7 @@ def _eval_expression(expression: str, data: pgt.Data) -> Any:
|
|
|
29
29
|
).tolist()
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def
|
|
32
|
+
def integer(x: cfg.IntegerConfig, data: dict[str, Any]) -> int:
|
|
33
33
|
if isinstance(x, str):
|
|
34
34
|
e = _eval_expression(x, data)
|
|
35
35
|
if isinstance(e, int):
|
|
@@ -40,7 +40,7 @@ def int_factory(x: cfg.IntConfig, data: pgt.Data) -> int:
|
|
|
40
40
|
return x
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def
|
|
43
|
+
def scalar(x: cfg.ScalarConfig, data: dict[str, Any]) -> pgt.Scalar:
|
|
44
44
|
if isinstance(x, str):
|
|
45
45
|
e = _eval_expression(x, data)
|
|
46
46
|
if isinstance(e, pgt.Scalar):
|
|
@@ -51,18 +51,7 @@ def scalar_factory(x: cfg.ScalarConfig, data: pgt.Data) -> pgt.Scalar:
|
|
|
51
51
|
return x
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def
|
|
55
|
-
if isinstance(x, str):
|
|
56
|
-
e = _eval_expression(x, data)
|
|
57
|
-
if tg.is_many_ints(e):
|
|
58
|
-
return e
|
|
59
|
-
raise ValueError(
|
|
60
|
-
f"Expression '{x}' evaluated to {e} of type {type(e)}, expected a sequence of integers."
|
|
61
|
-
)
|
|
62
|
-
return [int_factory(v, data) for v in x]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def many_scalars_factory(x: cfg.ManyScalarsConfig, data: pgt.Data) -> pgt.ManyScalars:
|
|
54
|
+
def many_scalars(x: cfg.ManyScalarsConfig, data: dict[str, Any]) -> pgt.ManyScalars:
|
|
66
55
|
if isinstance(x, str):
|
|
67
56
|
e = _eval_expression(x, data)
|
|
68
57
|
if tg.is_many_scalars(e):
|
|
@@ -70,11 +59,11 @@ def many_scalars_factory(x: cfg.ManyScalarsConfig, data: pgt.Data) -> pgt.ManySc
|
|
|
70
59
|
raise ValueError(
|
|
71
60
|
f"Expression '{x}' evaluated to {e} of type {type(e)}, expected a sequence of scalars."
|
|
72
61
|
)
|
|
73
|
-
return [
|
|
62
|
+
return [scalar(v, data) for v in x]
|
|
74
63
|
|
|
75
64
|
|
|
76
|
-
def
|
|
77
|
-
x: cfg.OneOrManyScalarsConfig, data:
|
|
65
|
+
def one_or_many_scalars(
|
|
66
|
+
x: cfg.OneOrManyScalarsConfig, data: dict[str, Any]
|
|
78
67
|
) -> pgt.OneOrManyScalars:
|
|
79
68
|
if isinstance(x, str):
|
|
80
69
|
e = _eval_expression(x, data)
|
|
@@ -85,22 +74,22 @@ def one_or_many_scalars_factory(
|
|
|
85
74
|
)
|
|
86
75
|
if isinstance(x, pgt.Scalar):
|
|
87
76
|
return x
|
|
88
|
-
return
|
|
77
|
+
return many_scalars(x, data)
|
|
89
78
|
|
|
90
79
|
|
|
91
|
-
def
|
|
92
|
-
x: cfg.
|
|
80
|
+
def skyline_parameter(
|
|
81
|
+
x: cfg.SkylineParameterConfig, data: dict[str, Any]
|
|
93
82
|
) -> SkylineParameterLike:
|
|
94
83
|
if isinstance(x, cfg.ScalarConfig):
|
|
95
|
-
return
|
|
84
|
+
return scalar(x, data)
|
|
96
85
|
return SkylineParameter(
|
|
97
|
-
value=
|
|
98
|
-
change_times=
|
|
86
|
+
value=many_scalars(x.value, data),
|
|
87
|
+
change_times=many_scalars(x.change_times, data),
|
|
99
88
|
)
|
|
100
89
|
|
|
101
90
|
|
|
102
|
-
def
|
|
103
|
-
x: cfg.
|
|
91
|
+
def skyline_vector(
|
|
92
|
+
x: cfg.SkylineVectorConfig, data: dict[str, Any]
|
|
104
93
|
) -> SkylineVectorCoercible:
|
|
105
94
|
if isinstance(x, str):
|
|
106
95
|
e = _eval_expression(x, data)
|
|
@@ -111,12 +100,12 @@ def skyline_vector_coercible_factory(
|
|
|
111
100
|
)
|
|
112
101
|
if isinstance(x, pgt.Scalar):
|
|
113
102
|
return x
|
|
114
|
-
if ctg.
|
|
115
|
-
return [
|
|
103
|
+
if ctg.is_list_of_skyline_parameter_configs(x):
|
|
104
|
+
return [skyline_parameter(p, data) for p in x]
|
|
116
105
|
|
|
117
|
-
assert isinstance(x, cfg.
|
|
106
|
+
assert isinstance(x, cfg.SkylineVectorModel)
|
|
118
107
|
|
|
119
|
-
change_times =
|
|
108
|
+
change_times = many_scalars(x.change_times, data)
|
|
120
109
|
if isinstance(x.value, str):
|
|
121
110
|
e = _eval_expression(x.value, data)
|
|
122
111
|
if tg.is_many_one_or_many_scalars(e):
|
|
@@ -126,7 +115,7 @@ def skyline_vector_coercible_factory(
|
|
|
126
115
|
f"Expression '{x.value}' evaluated to {e} of type {type(e)}, which cannot be coerced to a valid value for a SkylineVector (expected a sequence composed of scalars and/or sequences of scalars)."
|
|
127
116
|
)
|
|
128
117
|
else:
|
|
129
|
-
value = [
|
|
118
|
+
value = [one_or_many_scalars(v, data) for v in x.value]
|
|
130
119
|
|
|
131
120
|
if tg.is_many_scalars(value):
|
|
132
121
|
return SkylineParameter(value=value, change_times=change_times)
|
|
@@ -142,8 +131,8 @@ def skyline_vector_coercible_factory(
|
|
|
142
131
|
return SkylineVector(value=value, change_times=change_times)
|
|
143
132
|
|
|
144
133
|
|
|
145
|
-
def
|
|
146
|
-
x: cfg.OneOrMany2DScalarsConfig, data:
|
|
134
|
+
def one_or_many_2D_scalars(
|
|
135
|
+
x: cfg.OneOrMany2DScalarsConfig, data: dict[str, Any]
|
|
147
136
|
) -> pgt.OneOrMany2DScalars:
|
|
148
137
|
if isinstance(x, str):
|
|
149
138
|
e = _eval_expression(x, data)
|
|
@@ -154,11 +143,11 @@ def one_or_many_2D_scalars_factory(
|
|
|
154
143
|
)
|
|
155
144
|
if isinstance(x, pgt.Scalar):
|
|
156
145
|
return x
|
|
157
|
-
return [
|
|
146
|
+
return [many_scalars(v, data) for v in x]
|
|
158
147
|
|
|
159
148
|
|
|
160
|
-
def
|
|
161
|
-
x: cfg.
|
|
149
|
+
def skyline_matrix(
|
|
150
|
+
x: cfg.SkylineMatrixConfig, data: dict[str, Any]
|
|
162
151
|
) -> SkylineMatrixCoercible:
|
|
163
152
|
if isinstance(x, str):
|
|
164
153
|
e = _eval_expression(x, data)
|
|
@@ -169,12 +158,12 @@ def skyline_matrix_coercible_factory(
|
|
|
169
158
|
)
|
|
170
159
|
if isinstance(x, pgt.Scalar):
|
|
171
160
|
return x
|
|
172
|
-
if ctg.
|
|
173
|
-
return [
|
|
161
|
+
if ctg.is_list_of_skyline_vector_configs(x):
|
|
162
|
+
return [skyline_vector(v, data) for v in x]
|
|
174
163
|
|
|
175
|
-
assert isinstance(x, cfg.
|
|
164
|
+
assert isinstance(x, cfg.SkylineMatrixModel)
|
|
176
165
|
|
|
177
|
-
change_times =
|
|
166
|
+
change_times = many_scalars(x.change_times, data)
|
|
178
167
|
if isinstance(x.value, str):
|
|
179
168
|
e = _eval_expression(x.value, data)
|
|
180
169
|
if tg.is_many_one_or_many_2D_scalars(e):
|
|
@@ -184,26 +173,27 @@ def skyline_matrix_coercible_factory(
|
|
|
184
173
|
f"Expression '{x.value}' evaluated to {e} of type {type(e)}, which cannot be coerced to a valid value for a SkylineMatrix (expected a sequence composed of scalars and/or nested (2D) sequences of scalars)."
|
|
185
174
|
)
|
|
186
175
|
else:
|
|
187
|
-
value = [
|
|
176
|
+
value = [one_or_many_2D_scalars(v, data) for v in x.value]
|
|
188
177
|
|
|
189
178
|
if tg.is_many_scalars(value):
|
|
190
179
|
return SkylineParameter(value=value, change_times=change_times)
|
|
191
180
|
|
|
192
|
-
|
|
181
|
+
shapes: set[tuple[int, int]] = set()
|
|
193
182
|
for elem in value:
|
|
194
183
|
if tg.is_many_2D_scalars(elem):
|
|
195
|
-
|
|
196
|
-
|
|
184
|
+
Ms = len(elem)
|
|
185
|
+
Ns = {len(row) for row in elem}
|
|
186
|
+
if len(Ns) > 1:
|
|
197
187
|
raise ValueError(
|
|
198
|
-
f"
|
|
188
|
+
f"The values of a SkylineMatrix config must be scalars or nested (2D) lists of them with a consistent row length (config {x.value} yielded element {elem} with row lengths {Ns})."
|
|
199
189
|
)
|
|
200
|
-
|
|
190
|
+
shapes.add((Ms, Ns.pop()))
|
|
201
191
|
|
|
202
|
-
if len(
|
|
192
|
+
if len(shapes) > 1:
|
|
203
193
|
raise ValueError(
|
|
204
|
-
f"All elements in the value of a SkylineMatrix config must be scalars or
|
|
194
|
+
f"All elements in the value of a SkylineMatrix config must be scalars or nested (2D) lists of them with the same shape (config {x.value} yielded value={value} with inconsistent shapes {shapes})."
|
|
205
195
|
)
|
|
206
|
-
(N,) =
|
|
207
|
-
value = [[[
|
|
196
|
+
((M, N),) = shapes
|
|
197
|
+
value = [[[e] * N] * M if isinstance(e, pgt.Scalar) else e for e in value]
|
|
208
198
|
|
|
209
199
|
return SkylineMatrix(value=value, change_times=change_times)
|