phylogenie 2.1.6__py3-none-any.whl → 2.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of phylogenie might be problematic. Click here for more details.
- phylogenie/generators/alisim.py +1 -1
- phylogenie/generators/configs.py +1 -1
- phylogenie/generators/dataset.py +2 -2
- phylogenie/generators/factories.py +1 -1
- phylogenie/generators/trees.py +7 -1
- phylogenie/io.py +12 -2
- phylogenie/models.py +17 -0
- phylogenie/plot.py +39 -0
- phylogenie/tree.py +32 -20
- phylogenie/treesimulator/__init__.py +7 -0
- phylogenie/treesimulator/events/__init__.py +2 -0
- phylogenie/treesimulator/events/mutations.py +11 -3
- phylogenie/treesimulator/features.py +39 -0
- phylogenie/treesimulator/gillespie.py +10 -5
- phylogenie/treesimulator/model.py +24 -10
- phylogenie/utils.py +22 -11
- {phylogenie-2.1.6.dist-info → phylogenie-2.1.8.dist-info}/METADATA +4 -1
- phylogenie-2.1.8.dist-info/RECORD +35 -0
- phylogenie-2.1.6.dist-info/RECORD +0 -32
- {phylogenie-2.1.6.dist-info → phylogenie-2.1.8.dist-info}/LICENSE.txt +0 -0
- {phylogenie-2.1.6.dist-info → phylogenie-2.1.8.dist-info}/WHEEL +0 -0
- {phylogenie-2.1.6.dist-info → phylogenie-2.1.8.dist-info}/entry_points.txt +0 -0
phylogenie/generators/alisim.py
CHANGED
|
@@ -72,7 +72,7 @@ class AliSimDatasetGenerator(DatasetGenerator):
|
|
|
72
72
|
)
|
|
73
73
|
|
|
74
74
|
for leaf in tree.get_leaves():
|
|
75
|
-
leaf.
|
|
75
|
+
leaf.name += f"|{leaf.get_time()}"
|
|
76
76
|
dump_newick(tree, f"{tree_filename}.nwk")
|
|
77
77
|
|
|
78
78
|
self._generate_one_from_tree(msa_filename, f"{tree_filename}.nwk", rng, d)
|
phylogenie/generators/configs.py
CHANGED
phylogenie/generators/dataset.py
CHANGED
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from numpy.random import Generator, default_rng
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
|
|
11
|
-
from phylogenie.
|
|
11
|
+
from phylogenie.models import Distribution, StrictBaseModel
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class DataType(str, Enum):
|
|
@@ -31,7 +31,7 @@ class DatasetGenerator(ABC, StrictBaseModel):
|
|
|
31
31
|
def generate_one(
|
|
32
32
|
self,
|
|
33
33
|
filename: str,
|
|
34
|
-
context: dict[str,
|
|
34
|
+
context: dict[str, Distribution] | None = None,
|
|
35
35
|
seed: int | None = None,
|
|
36
36
|
) -> dict[str, Any]: ...
|
|
37
37
|
|
|
@@ -8,6 +8,7 @@ import phylogenie.generators.configs as cfg
|
|
|
8
8
|
import phylogenie.generators.typeguards as ctg
|
|
9
9
|
import phylogenie.typeguards as tg
|
|
10
10
|
import phylogenie.typings as pgt
|
|
11
|
+
from phylogenie.models import Distribution
|
|
11
12
|
from phylogenie.skyline import (
|
|
12
13
|
SkylineMatrix,
|
|
13
14
|
SkylineMatrixCoercible,
|
|
@@ -16,7 +17,6 @@ from phylogenie.skyline import (
|
|
|
16
17
|
SkylineVector,
|
|
17
18
|
SkylineVectorCoercible,
|
|
18
19
|
)
|
|
19
|
-
from phylogenie.utils import Distribution
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def _eval_expression(expression: str, data: dict[str, Any]) -> Any:
|
phylogenie/generators/trees.py
CHANGED
|
@@ -19,9 +19,11 @@ from phylogenie.generators.factories import (
|
|
|
19
19
|
skyline_vector,
|
|
20
20
|
)
|
|
21
21
|
from phylogenie.io import dump_newick
|
|
22
|
+
from phylogenie.models import Distribution
|
|
22
23
|
from phylogenie.tree import Tree
|
|
23
24
|
from phylogenie.treesimulator import (
|
|
24
25
|
Event,
|
|
26
|
+
Feature,
|
|
25
27
|
Mutation,
|
|
26
28
|
get_BD_events,
|
|
27
29
|
get_BDEI_events,
|
|
@@ -30,6 +32,7 @@ from phylogenie.treesimulator import (
|
|
|
30
32
|
get_contact_tracing_events,
|
|
31
33
|
get_epidemiological_events,
|
|
32
34
|
get_FBD_events,
|
|
35
|
+
set_features,
|
|
33
36
|
simulate_tree,
|
|
34
37
|
)
|
|
35
38
|
|
|
@@ -51,6 +54,7 @@ class TreeDatasetGenerator(DatasetGenerator):
|
|
|
51
54
|
init_state: str | None = None
|
|
52
55
|
sampling_probability_at_present: cfg.Scalar = 0.0
|
|
53
56
|
timeout: float = np.inf
|
|
57
|
+
node_features: list[Feature] | None = None
|
|
54
58
|
|
|
55
59
|
@abstractmethod
|
|
56
60
|
def _get_events(self, data: dict[str, Any]) -> list[Event]: ...
|
|
@@ -77,7 +81,7 @@ class TreeDatasetGenerator(DatasetGenerator):
|
|
|
77
81
|
def generate_one(
|
|
78
82
|
self,
|
|
79
83
|
filename: str,
|
|
80
|
-
context: dict[str,
|
|
84
|
+
context: dict[str, Distribution] | None = None,
|
|
81
85
|
seed: int | None = None,
|
|
82
86
|
) -> dict[str, Any]:
|
|
83
87
|
d = {"file_id": Path(filename).stem}
|
|
@@ -86,6 +90,8 @@ class TreeDatasetGenerator(DatasetGenerator):
|
|
|
86
90
|
try:
|
|
87
91
|
d.update(data(context, rng))
|
|
88
92
|
tree = self.simulate_one(d, seed)
|
|
93
|
+
if self.node_features is not None:
|
|
94
|
+
set_features(tree, self.node_features)
|
|
89
95
|
dump_newick(tree, f"{filename}.nwk")
|
|
90
96
|
break
|
|
91
97
|
except TimeoutError:
|
phylogenie/io.py
CHANGED
|
@@ -44,7 +44,12 @@ def _parse_newick(newick: str) -> Tree:
|
|
|
44
44
|
raise ValueError(f"Expected '&&NHX' for node features.")
|
|
45
45
|
for feature in features[1:]:
|
|
46
46
|
key, value = feature.split("=", 1)
|
|
47
|
-
|
|
47
|
+
try:
|
|
48
|
+
current_node.set(key, eval(value))
|
|
49
|
+
except Exception as e:
|
|
50
|
+
raise ValueError(
|
|
51
|
+
f"Error setting node feature `{key}` to `{value}`: {e}"
|
|
52
|
+
)
|
|
48
53
|
|
|
49
54
|
if newick[i] == ")":
|
|
50
55
|
current_children = current_nodes
|
|
@@ -65,13 +70,18 @@ def load_newick(filepath: str) -> Tree | list[Tree]:
|
|
|
65
70
|
|
|
66
71
|
def _to_newick(tree: Tree) -> str:
|
|
67
72
|
children_newick = ",".join([_to_newick(child) for child in tree.children])
|
|
68
|
-
newick = tree.
|
|
73
|
+
newick = tree.name
|
|
69
74
|
if children_newick:
|
|
70
75
|
newick = f"({children_newick}){newick}"
|
|
71
76
|
if tree.branch_length is not None:
|
|
72
77
|
newick += f":{tree.branch_length}"
|
|
73
78
|
if tree.features:
|
|
74
79
|
reprs = {k: repr(v).replace("'", '"') for k, v in tree.features.items()}
|
|
80
|
+
for k, r in reprs.items():
|
|
81
|
+
if ":" in r or "=" in r or "]" in r:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"Cannot serialize feature `{k}` with value `{r}`: contains reserved characters."
|
|
84
|
+
)
|
|
75
85
|
features = [f"{k}={repr}" for k, repr in reprs.items()]
|
|
76
86
|
newick += f"[&&NHX:{':'.join(features)}]"
|
|
77
87
|
return newick
|
phylogenie/models.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StrictBaseModel(BaseModel):
|
|
7
|
+
model_config = ConfigDict(extra="forbid")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Distribution(BaseModel):
|
|
11
|
+
type: str
|
|
12
|
+
model_config = ConfigDict(extra="allow")
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def args(self) -> dict[str, Any]:
|
|
16
|
+
assert self.model_extra is not None
|
|
17
|
+
return self.model_extra
|
phylogenie/plot.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import matplotlib.colors as mcolors
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
from phylogenie import Tree
|
|
5
|
+
from phylogenie.tree import Tree
|
|
6
|
+
from phylogenie.utils import get_times
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def plot_tree(
|
|
10
|
+
tree: Tree,
|
|
11
|
+
ax: plt.Axes | None = None, # pyright: ignore
|
|
12
|
+
color_by: str | None = None,
|
|
13
|
+
default_color: str = "black",
|
|
14
|
+
cmap: str = "tab20",
|
|
15
|
+
) -> plt.Axes: # pyright: ignore
|
|
16
|
+
if ax is None:
|
|
17
|
+
ax = plt.gca()
|
|
18
|
+
|
|
19
|
+
xs = get_times(tree)
|
|
20
|
+
ys = {node.name: i for i, node in enumerate(tree.inorder_traversal())}
|
|
21
|
+
if color_by is not None:
|
|
22
|
+
features = set(node.get(color_by) for node in tree)
|
|
23
|
+
feature_colors = {
|
|
24
|
+
f: mcolors.to_hex(plt.get_cmap(cmap, len(features))(i))
|
|
25
|
+
for i, f in enumerate(features)
|
|
26
|
+
}
|
|
27
|
+
colors = {node.name: feature_colors[node.get(color_by)] for node in tree}
|
|
28
|
+
else:
|
|
29
|
+
colors = {node.name: default_color for node in tree}
|
|
30
|
+
|
|
31
|
+
for node in tree:
|
|
32
|
+
if node.parent is None:
|
|
33
|
+
continue
|
|
34
|
+
x0, y0 = xs[node.parent.name], ys[node.parent.name]
|
|
35
|
+
x1, y1 = xs[node.name], ys[node.name]
|
|
36
|
+
ax.plot([x0, x0], [y0, y1], color=colors[node.name]) # pyright: ignore
|
|
37
|
+
ax.plot([x0, x1], [y1, y1], color=colors[node.name]) # pyright: ignore
|
|
38
|
+
ax.set_yticks([]) # pyright: ignore
|
|
39
|
+
return ax
|
phylogenie/tree.py
CHANGED
|
@@ -3,8 +3,8 @@ from typing import Any
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class Tree:
|
|
6
|
-
def __init__(self,
|
|
7
|
-
self.
|
|
6
|
+
def __init__(self, name: str = "", branch_length: float | None = None):
|
|
7
|
+
self.name = name
|
|
8
8
|
self.branch_length = branch_length
|
|
9
9
|
self._parent: Tree | None = None
|
|
10
10
|
self._children: list[Tree] = []
|
|
@@ -20,7 +20,7 @@ class Tree:
|
|
|
20
20
|
|
|
21
21
|
@property
|
|
22
22
|
def features(self) -> dict[str, Any]:
|
|
23
|
-
return self._features
|
|
23
|
+
return self._features.copy()
|
|
24
24
|
|
|
25
25
|
def add_child(self, child: "Tree") -> "Tree":
|
|
26
26
|
child._parent = self
|
|
@@ -36,6 +36,17 @@ class Tree:
|
|
|
36
36
|
if node is not None:
|
|
37
37
|
node._children.append(self)
|
|
38
38
|
|
|
39
|
+
def inorder_traversal(self) -> Iterator["Tree"]:
|
|
40
|
+
if self.is_leaf():
|
|
41
|
+
yield self
|
|
42
|
+
return
|
|
43
|
+
if len(self.children) != 2:
|
|
44
|
+
raise ValueError("Inorder traversal is only defined for binary trees.")
|
|
45
|
+
left, right = self.children
|
|
46
|
+
yield from left.inorder_traversal()
|
|
47
|
+
yield self
|
|
48
|
+
yield from right.inorder_traversal()
|
|
49
|
+
|
|
39
50
|
def preorder_traversal(self) -> Iterator["Tree"]:
|
|
40
51
|
yield self
|
|
41
52
|
for child in self.children:
|
|
@@ -46,41 +57,42 @@ class Tree:
|
|
|
46
57
|
yield from child.postorder_traversal()
|
|
47
58
|
yield self
|
|
48
59
|
|
|
49
|
-
def get_node(self,
|
|
60
|
+
def get_node(self, name: str) -> "Tree":
|
|
50
61
|
for node in self:
|
|
51
|
-
if node.
|
|
62
|
+
if node.name == name:
|
|
52
63
|
return node
|
|
53
|
-
raise ValueError(f"Node with
|
|
64
|
+
raise ValueError(f"Node with name {name} not found.")
|
|
54
65
|
|
|
55
66
|
def is_leaf(self) -> bool:
|
|
56
67
|
return not self.children
|
|
57
68
|
|
|
58
|
-
def get_leaves(self) ->
|
|
59
|
-
return
|
|
69
|
+
def get_leaves(self) -> tuple["Tree", ...]:
|
|
70
|
+
return tuple(node for node in self if not node.children)
|
|
71
|
+
|
|
72
|
+
def parse_branch_length(self) -> float:
|
|
73
|
+
if self.branch_length is None:
|
|
74
|
+
raise ValueError(f"Branch length of node {self.name} is not set.")
|
|
75
|
+
return self.branch_length
|
|
60
76
|
|
|
61
77
|
def get_time(self) -> float:
|
|
62
78
|
parent_time = 0 if self.parent is None else self.parent.get_time()
|
|
63
|
-
|
|
64
|
-
if self.parent is not None:
|
|
65
|
-
raise ValueError(
|
|
66
|
-
f"Branch length of non-root node {self.id} is not set."
|
|
67
|
-
)
|
|
68
|
-
return 0.0
|
|
69
|
-
return self.branch_length + parent_time
|
|
79
|
+
return self.parse_branch_length() + parent_time
|
|
70
80
|
|
|
71
81
|
def set(self, key: str, value: Any) -> None:
|
|
72
82
|
self._features[key] = value
|
|
73
83
|
|
|
84
|
+
def update_features(self, features: dict[str, Any]) -> None:
|
|
85
|
+
self._features.update(features)
|
|
86
|
+
|
|
74
87
|
def get(self, key: str) -> Any:
|
|
75
|
-
return self._features
|
|
88
|
+
return self._features[key]
|
|
76
89
|
|
|
77
90
|
def delete(self, key: str) -> None:
|
|
78
91
|
del self._features[key]
|
|
79
92
|
|
|
80
93
|
def copy(self):
|
|
81
|
-
new_tree = Tree(self.
|
|
82
|
-
|
|
83
|
-
new_tree.set(key, value)
|
|
94
|
+
new_tree = Tree(self.name, self.branch_length)
|
|
95
|
+
new_tree.update_features(self._features)
|
|
84
96
|
for child in self.children:
|
|
85
97
|
new_tree.add_child(child.copy())
|
|
86
98
|
return new_tree
|
|
@@ -89,4 +101,4 @@ class Tree:
|
|
|
89
101
|
return self.preorder_traversal()
|
|
90
102
|
|
|
91
103
|
def __repr__(self) -> str:
|
|
92
|
-
return f"TreeNode(
|
|
104
|
+
return f"TreeNode(name='{self.name}', branch_length={self.branch_length}, features={self.features})"
|
|
@@ -15,8 +15,11 @@ from phylogenie.treesimulator.events import (
|
|
|
15
15
|
get_contact_tracing_events,
|
|
16
16
|
get_epidemiological_events,
|
|
17
17
|
get_FBD_events,
|
|
18
|
+
get_mutation_id,
|
|
18
19
|
)
|
|
20
|
+
from phylogenie.treesimulator.features import Feature, set_features
|
|
19
21
|
from phylogenie.treesimulator.gillespie import generate_trees, simulate_tree
|
|
22
|
+
from phylogenie.treesimulator.model import get_node_state
|
|
20
23
|
|
|
21
24
|
__all__ = [
|
|
22
25
|
"Birth",
|
|
@@ -37,4 +40,8 @@ __all__ = [
|
|
|
37
40
|
"get_FBD_events",
|
|
38
41
|
"generate_trees",
|
|
39
42
|
"simulate_tree",
|
|
43
|
+
"get_mutation_id",
|
|
44
|
+
"get_node_state",
|
|
45
|
+
"Feature",
|
|
46
|
+
"set_features",
|
|
40
47
|
]
|
|
@@ -18,6 +18,7 @@ from phylogenie.treesimulator.events.core import (
|
|
|
18
18
|
)
|
|
19
19
|
from phylogenie.treesimulator.events.mutations import Mutation
|
|
20
20
|
from phylogenie.treesimulator.events.mutations import TargetType as MutationTargetType
|
|
21
|
+
from phylogenie.treesimulator.events.mutations import get_mutation_id
|
|
21
22
|
|
|
22
23
|
__all__ = [
|
|
23
24
|
"Birth",
|
|
@@ -36,4 +37,5 @@ __all__ = [
|
|
|
36
37
|
"get_contact_tracing_events",
|
|
37
38
|
"get_epidemiological_events",
|
|
38
39
|
"get_FBD_events",
|
|
40
|
+
"get_mutation_id",
|
|
39
41
|
]
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from copy import deepcopy
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Type
|
|
4
5
|
|
|
5
6
|
from numpy.random import Generator
|
|
6
7
|
|
|
8
|
+
from phylogenie.models import Distribution
|
|
7
9
|
from phylogenie.skyline import SkylineParameterLike
|
|
8
10
|
from phylogenie.treesimulator.events.contact_tracing import (
|
|
9
11
|
BirthWithContactTracing,
|
|
@@ -17,7 +19,6 @@ from phylogenie.treesimulator.events.core import (
|
|
|
17
19
|
Sampling,
|
|
18
20
|
)
|
|
19
21
|
from phylogenie.treesimulator.model import Model
|
|
20
|
-
from phylogenie.utils import Distribution
|
|
21
22
|
|
|
22
23
|
MUTATION_PREFIX = "MUT-"
|
|
23
24
|
MUTATIONS_KEY = "MUTATIONS"
|
|
@@ -29,10 +30,17 @@ def _get_mutation(state: str) -> str | None:
|
|
|
29
30
|
|
|
30
31
|
def _get_mutated_state(mutation_id: int, state: str) -> str:
|
|
31
32
|
if state.startswith(MUTATION_PREFIX):
|
|
32
|
-
state = state.split(".")
|
|
33
|
+
_, state = state.split(".")
|
|
33
34
|
return f"{MUTATION_PREFIX}{mutation_id}.{state}"
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
def get_mutation_id(node_name: str) -> int:
|
|
38
|
+
match = re.search(rf"{MUTATION_PREFIX}(\d+)\.", node_name)
|
|
39
|
+
if match:
|
|
40
|
+
return int(match.group(1))
|
|
41
|
+
return 0
|
|
42
|
+
|
|
43
|
+
|
|
36
44
|
class TargetType(str, Enum):
|
|
37
45
|
BIRTH = "birth"
|
|
38
46
|
DEATH = "death"
|
|
@@ -93,7 +101,7 @@ class Mutation(Event):
|
|
|
93
101
|
model.add_event(event)
|
|
94
102
|
|
|
95
103
|
def __repr__(self) -> str:
|
|
96
|
-
return f"Mutation(state={self.state}, rate={self.rate}
|
|
104
|
+
return f"Mutation(state={self.state}, rate={self.rate})"
|
|
97
105
|
|
|
98
106
|
|
|
99
107
|
TARGETS: dict[TargetType, tuple[Type[Event], ...]] = {
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from phylogenie.tree import Tree
|
|
5
|
+
from phylogenie.treesimulator.events import get_mutation_id
|
|
6
|
+
from phylogenie.treesimulator.model import get_node_state
|
|
7
|
+
from phylogenie.utils import get_heights, get_n_tips, get_times
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_states(tree: Tree) -> dict[str, str]:
|
|
11
|
+
return {node.name: get_node_state(node.name) for node in tree}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_mutations(tree: Tree) -> dict[str, int]:
|
|
15
|
+
return {node.name: get_mutation_id(node.name) for node in tree}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Feature(str, Enum):
|
|
19
|
+
STATE = "state"
|
|
20
|
+
MUTATION = "mutation"
|
|
21
|
+
N_TIPS = "n_tips"
|
|
22
|
+
TIME = "time"
|
|
23
|
+
HEIGHT = "height"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
FEATURES_EXTRACTORS = {
|
|
27
|
+
Feature.STATE: _get_states,
|
|
28
|
+
Feature.MUTATION: _get_mutations,
|
|
29
|
+
Feature.N_TIPS: get_n_tips,
|
|
30
|
+
Feature.TIME: get_times,
|
|
31
|
+
Feature.HEIGHT: get_heights,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def set_features(tree: Tree, features: Iterable[Feature]) -> None:
|
|
36
|
+
for feature in features:
|
|
37
|
+
feature_maps = FEATURES_EXTRACTORS[feature](tree)
|
|
38
|
+
for node in tree:
|
|
39
|
+
node.set(feature.value, feature_maps[node.name])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import time
|
|
3
|
-
from collections.abc import Sequence
|
|
3
|
+
from collections.abc import Iterable, Sequence
|
|
4
4
|
|
|
5
5
|
import joblib
|
|
6
6
|
import numpy as np
|
|
@@ -9,6 +9,7 @@ from tqdm import tqdm
|
|
|
9
9
|
|
|
10
10
|
from phylogenie.io import dump_newick
|
|
11
11
|
from phylogenie.tree import Tree
|
|
12
|
+
from phylogenie.treesimulator.features import Feature, set_features
|
|
12
13
|
from phylogenie.treesimulator.model import Event, Model
|
|
13
14
|
|
|
14
15
|
|
|
@@ -69,7 +70,7 @@ def simulate_tree(
|
|
|
69
70
|
if (
|
|
70
71
|
not any(rates)
|
|
71
72
|
or max_tips is not None
|
|
72
|
-
and model.n_sampled
|
|
73
|
+
and model.n_sampled >= max_tips
|
|
73
74
|
or target_n_tips is not None
|
|
74
75
|
and model.n_sampled >= target_n_tips
|
|
75
76
|
):
|
|
@@ -101,12 +102,13 @@ def simulate_tree(
|
|
|
101
102
|
def generate_trees(
|
|
102
103
|
output_dir: str,
|
|
103
104
|
n_trees: int,
|
|
104
|
-
events:
|
|
105
|
+
events: Sequence[Event],
|
|
105
106
|
min_tips: int = 1,
|
|
106
|
-
max_tips: int =
|
|
107
|
+
max_tips: int | None = None,
|
|
107
108
|
max_time: float = np.inf,
|
|
108
109
|
init_state: str | None = None,
|
|
109
110
|
sampling_probability_at_present: float = 0.0,
|
|
111
|
+
node_features: Iterable[Feature] | None = None,
|
|
110
112
|
seed: int | None = None,
|
|
111
113
|
n_jobs: int = -1,
|
|
112
114
|
timeout: float = np.inf,
|
|
@@ -114,7 +116,7 @@ def generate_trees(
|
|
|
114
116
|
def _simulate_tree(seed: int) -> Tree:
|
|
115
117
|
while True:
|
|
116
118
|
try:
|
|
117
|
-
|
|
119
|
+
tree = simulate_tree(
|
|
118
120
|
events=events,
|
|
119
121
|
min_tips=min_tips,
|
|
120
122
|
max_tips=max_tips,
|
|
@@ -124,6 +126,9 @@ def generate_trees(
|
|
|
124
126
|
seed=seed,
|
|
125
127
|
timeout=timeout,
|
|
126
128
|
)
|
|
129
|
+
if node_features is not None:
|
|
130
|
+
set_features(tree, node_features)
|
|
131
|
+
return tree
|
|
127
132
|
except TimeoutError:
|
|
128
133
|
print("Simulation timed out, retrying with a different seed...")
|
|
129
134
|
seed += 1
|
|
@@ -11,6 +11,13 @@ from phylogenie.skyline import SkylineParameterLike, skyline_parameter
|
|
|
11
11
|
from phylogenie.tree import Tree
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
@dataclass
|
|
15
|
+
class Individual:
|
|
16
|
+
id: int
|
|
17
|
+
node: Tree
|
|
18
|
+
state: str
|
|
19
|
+
|
|
20
|
+
|
|
14
21
|
class Event(ABC):
|
|
15
22
|
def __init__(self, state: str, rate: SkylineParameterLike):
|
|
16
23
|
self.state = state
|
|
@@ -30,11 +37,17 @@ class Event(ABC):
|
|
|
30
37
|
def apply(self, model: "Model", time: float, rng: Generator) -> None: ...
|
|
31
38
|
|
|
32
39
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
def _get_node_name(node_id: int, state: str) -> str:
|
|
41
|
+
return f"{node_id}|{state}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_node_state(node_name: str) -> str:
|
|
45
|
+
try:
|
|
46
|
+
return node_name.split("|")[1]
|
|
47
|
+
except IndexError:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Invalid node name: {node_name} (expected format 'id|state')."
|
|
50
|
+
)
|
|
38
51
|
|
|
39
52
|
|
|
40
53
|
class Model:
|
|
@@ -61,7 +74,8 @@ class Model:
|
|
|
61
74
|
|
|
62
75
|
def _get_new_node(self, state: str) -> Tree:
|
|
63
76
|
self._next_node_id += 1
|
|
64
|
-
|
|
77
|
+
node = Tree(_get_node_name(self._next_node_id, state))
|
|
78
|
+
return node
|
|
65
79
|
|
|
66
80
|
def _get_new_individual(self, state: str) -> Individual:
|
|
67
81
|
self._next_individual_id += 1
|
|
@@ -74,7 +88,7 @@ class Model:
|
|
|
74
88
|
|
|
75
89
|
def _set_branch_length(self, node: Tree, time: float) -> None:
|
|
76
90
|
if node.branch_length is not None:
|
|
77
|
-
raise ValueError(f"Branch length of node {node.
|
|
91
|
+
raise ValueError(f"Branch length of node {node.name} is already set.")
|
|
78
92
|
node.branch_length = (
|
|
79
93
|
time if node.parent is None else time - node.parent.get_time()
|
|
80
94
|
)
|
|
@@ -108,12 +122,12 @@ class Model:
|
|
|
108
122
|
def sample(self, id: int, time: float, removal: bool) -> None:
|
|
109
123
|
individual = self._population[id]
|
|
110
124
|
if removal:
|
|
111
|
-
self._sampled.add(individual.node.
|
|
125
|
+
self._sampled.add(individual.node.name)
|
|
112
126
|
self.remove(id, time)
|
|
113
127
|
else:
|
|
114
128
|
sample_node = self._get_new_node(individual.state)
|
|
115
129
|
sample_node.branch_length = 0.0
|
|
116
|
-
self._sampled.add(sample_node.
|
|
130
|
+
self._sampled.add(sample_node.name)
|
|
117
131
|
individual.node.add_child(sample_node)
|
|
118
132
|
self._stem(individual, time)
|
|
119
133
|
|
|
@@ -123,7 +137,7 @@ class Model:
|
|
|
123
137
|
def get_sampled_tree(self) -> Tree:
|
|
124
138
|
tree = self._tree.copy()
|
|
125
139
|
for node in list(tree.postorder_traversal()):
|
|
126
|
-
if node.
|
|
140
|
+
if node.name not in self._sampled and not node.children:
|
|
127
141
|
if node.parent is None:
|
|
128
142
|
raise ValueError("No samples in the tree.")
|
|
129
143
|
else:
|
phylogenie/utils.py
CHANGED
|
@@ -1,17 +1,28 @@
|
|
|
1
|
-
from
|
|
1
|
+
from phylogenie.tree import Tree
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel, ConfigDict
|
|
4
3
|
|
|
4
|
+
def get_n_tips(tree: Tree) -> dict[str, int]:
|
|
5
|
+
n_tips: dict[str, int] = {}
|
|
6
|
+
for node in tree.postorder_traversal():
|
|
7
|
+
n_tips[node.name] = (
|
|
8
|
+
1 if node.is_leaf() else sum(n_tips[child.name] for child in node.children)
|
|
9
|
+
)
|
|
10
|
+
return n_tips
|
|
5
11
|
|
|
6
|
-
class StrictBaseModel(BaseModel):
|
|
7
|
-
model_config = ConfigDict(extra="forbid")
|
|
8
12
|
|
|
13
|
+
def get_times(tree: Tree) -> dict[str, float]:
|
|
14
|
+
times: dict[str, float] = {}
|
|
15
|
+
for node in tree:
|
|
16
|
+
parent_time = 0 if node.parent is None else times[node.parent.name]
|
|
17
|
+
times[node.name] = node.parse_branch_length() + parent_time
|
|
18
|
+
return times
|
|
9
19
|
|
|
10
|
-
class Distribution(BaseModel):
|
|
11
|
-
type: str
|
|
12
|
-
model_config = ConfigDict(extra="allow")
|
|
13
20
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
21
|
+
def get_heights(tree: Tree) -> dict[str, int]:
|
|
22
|
+
heights: dict[str, int] = {}
|
|
23
|
+
for node in tree.postorder_traversal():
|
|
24
|
+
if node.is_leaf():
|
|
25
|
+
heights[node.name] = 0
|
|
26
|
+
else:
|
|
27
|
+
heights[node.name] = 1 + max(heights[child.name] for child in node.children)
|
|
28
|
+
return heights
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: phylogenie
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Summary: Generate phylogenetic datasets with minimal setup effort
|
|
5
5
|
Author: Gabriele Marino
|
|
6
6
|
Author-email: gabmarino.8601@gmail.com
|
|
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Requires-Dist: joblib (>=1.4.2,<2.0.0)
|
|
13
|
+
Requires-Dist: matplotlib (>=3.10.6,<4.0.0)
|
|
13
14
|
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
|
14
15
|
Requires-Dist: pydantic (>=2.11.5,<3.0.0)
|
|
15
16
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
@@ -24,6 +25,8 @@ Description-Content-Type: text/markdown
|
|
|
24
25
|
|
|
25
26
|
[](https://iqtree.github.io/doc/AliSim)
|
|
26
27
|
[](https://pypi.org/project/phylogenie/)
|
|
28
|
+

|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
|
|
29
32
|
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
phylogenie/__init__.py,sha256=T2mRLsYtoLlWt8GlxrrUnfXJ9XVioq7hTvVq3uJpwQI,2215
|
|
2
|
+
phylogenie/generators/__init__.py,sha256=zsOxy28-9j9alOQLIgrOAFfmM58NNHO_NEtW-KXQXAY,888
|
|
3
|
+
phylogenie/generators/alisim.py,sha256=G7p6tfcDWncg4xl3NhEXxgikcZDUi_RdE33zWn_CvzY,2704
|
|
4
|
+
phylogenie/generators/configs.py,sha256=WFoeKpgj9ZQIom7BKqwpgXbriiQGg3jFBMLoD8KButk,1073
|
|
5
|
+
phylogenie/generators/dataset.py,sha256=pPwW9yxm9fkU0PPllFq8EsPlqau8tth-4OatbA_hEHo,2120
|
|
6
|
+
phylogenie/generators/factories.py,sha256=TuVFQWRjq33Hewjw_Lp8tQ0l_IPtqYDyQCNJhtiHpw8,7882
|
|
7
|
+
phylogenie/generators/trees.py,sha256=6tHS013RNRyIsObeYq2Kx9it7Yc0TgiMkeAHbskHzAM,10748
|
|
8
|
+
phylogenie/generators/typeguards.py,sha256=yj4VkhOaUXJ2OrY-6zhOeY9C4yKIQxjZtk2d-vIxttQ,828
|
|
9
|
+
phylogenie/io.py,sha256=nwy8DOknt0HqF9qMeFZHrCmSXpM5AGrU5oajwTtD6vY,3973
|
|
10
|
+
phylogenie/main.py,sha256=vtvSpQxBNlYABoFQ25czl-l3fIr4QRo3svWVd-jcArw,1170
|
|
11
|
+
phylogenie/models.py,sha256=pCg9ob0RpLUHwM49x4knKxL4FNPr3-EU_6zMXsvxtAg,370
|
|
12
|
+
phylogenie/msa.py,sha256=JDGyZUsAq6-m-SQjoCDjAkAZIxfgyl_PDIhdYn5HOow,2064
|
|
13
|
+
phylogenie/plot.py,sha256=yLJ331UNta0J3d8qdLBtLdIXsC3Qd0anA1qf_5AUwK8,1310
|
|
14
|
+
phylogenie/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
phylogenie/skyline/__init__.py,sha256=7pF4CUb4ZCLzNYJNhOjpuTOLTRhlK7L6ugfccNqjIGo,620
|
|
16
|
+
phylogenie/skyline/matrix.py,sha256=Gl8OgKjtieG0NwPYiPimKI36gefV8fm_OeorjdXxPTs,9146
|
|
17
|
+
phylogenie/skyline/parameter.py,sha256=EM9qlPt0JhMBy3TbztM0dj24BaGNEy8KWKdTObDKhbI,4644
|
|
18
|
+
phylogenie/skyline/vector.py,sha256=bJP7_FNX_Klt6wXqsyfj0KX3VNj6-dIhzCKSJuQcOV0,7115
|
|
19
|
+
phylogenie/tree.py,sha256=63A--s8C8K685KzZ_3hslkKM-lpqSM39-VFIuwBlIjk,3257
|
|
20
|
+
phylogenie/treesimulator/__init__.py,sha256=yqS2vtYMhdWSXc9RAnX1dd4zAqSQweMLyVKTnJLfGTU,1106
|
|
21
|
+
phylogenie/treesimulator/events/__init__.py,sha256=6zSgZ0MEUMvTK4yPlSolJnRWzCARLS-jYreTzh45mQo,1033
|
|
22
|
+
phylogenie/treesimulator/events/contact_tracing.py,sha256=_nJ85yhgGkeruQgMHvGpDYoyhheBf8M4LgZWiWdi5dY,4801
|
|
23
|
+
phylogenie/treesimulator/events/core.py,sha256=RF7oHzAjkU675PnczaVc66d9gNrHBL-IhmVHtcy7MKE,7949
|
|
24
|
+
phylogenie/treesimulator/events/mutations.py,sha256=erEvgfiv_X3G_DwK9Hqu-fAR8otupfwq66cp5tRZamM,3591
|
|
25
|
+
phylogenie/treesimulator/features.py,sha256=f1t7DE_dw8MhD-FAzQr0j8pxHdvLENy5AQeAjJyOjOQ,1082
|
|
26
|
+
phylogenie/treesimulator/gillespie.py,sha256=LZHB2Ko147E78LoUCtN_BN7NYO1xhMYRy5PUZbN93c0,5283
|
|
27
|
+
phylogenie/treesimulator/model.py,sha256=Ct0lfn6maKtjuFxivWx1MbFHvH3Y-fiJ0XXMdkN3Cak,5775
|
|
28
|
+
phylogenie/typeguards.py,sha256=JtqmbEWJZBRHbWgCvcl6nrWm3VcBfzRbklbTBYHItn0,1325
|
|
29
|
+
phylogenie/typings.py,sha256=GknvAFXyiaWeeYJ8Lk5d6E2VHT-xW6ONEojYbtJYiB8,476
|
|
30
|
+
phylogenie/utils.py,sha256=JDe6_8-9AFYf7SuTrqUXTZ7n3CA-goQGg9vTi5UcHB8,878
|
|
31
|
+
phylogenie-2.1.8.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
|
|
32
|
+
phylogenie-2.1.8.dist-info/METADATA,sha256=-F1xhii9rWBPtXGyjIr0UTVWbsrD7wnswCbw--LoS8I,5476
|
|
33
|
+
phylogenie-2.1.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
34
|
+
phylogenie-2.1.8.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
|
|
35
|
+
phylogenie-2.1.8.dist-info/RECORD,,
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
phylogenie/__init__.py,sha256=T2mRLsYtoLlWt8GlxrrUnfXJ9XVioq7hTvVq3uJpwQI,2215
|
|
2
|
-
phylogenie/generators/__init__.py,sha256=zsOxy28-9j9alOQLIgrOAFfmM58NNHO_NEtW-KXQXAY,888
|
|
3
|
-
phylogenie/generators/alisim.py,sha256=6IQEn8zNoVSLdHLRT1HBnrv1wfZzYaT-38SQNXE_MU4,2702
|
|
4
|
-
phylogenie/generators/configs.py,sha256=AiiFS6rpH9BPwDKCkT4SVrRzfLFFrwRCJM4CRj0Srdk,1072
|
|
5
|
-
phylogenie/generators/dataset.py,sha256=loVKC_1G7gzkPDN9W3GF-Rj9od8AeOJgIC0aJJa-4KA,2110
|
|
6
|
-
phylogenie/generators/factories.py,sha256=OKCNg9jwF2KahZCJKTdJVxhLTeLNBDMOqJs7XSVXTqY,7881
|
|
7
|
-
phylogenie/generators/trees.py,sha256=KEDvyrfMjUPWwyp1xnhWpatNnjmDLPhSSQl6Msx19Ms,10508
|
|
8
|
-
phylogenie/generators/typeguards.py,sha256=yj4VkhOaUXJ2OrY-6zhOeY9C4yKIQxjZtk2d-vIxttQ,828
|
|
9
|
-
phylogenie/io.py,sha256=y7nQIvLgCvqELsXFKfm1GgKJO_saoQ-7zQpE3Kvajzc,3509
|
|
10
|
-
phylogenie/main.py,sha256=vtvSpQxBNlYABoFQ25czl-l3fIr4QRo3svWVd-jcArw,1170
|
|
11
|
-
phylogenie/msa.py,sha256=JDGyZUsAq6-m-SQjoCDjAkAZIxfgyl_PDIhdYn5HOow,2064
|
|
12
|
-
phylogenie/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
phylogenie/skyline/__init__.py,sha256=7pF4CUb4ZCLzNYJNhOjpuTOLTRhlK7L6ugfccNqjIGo,620
|
|
14
|
-
phylogenie/skyline/matrix.py,sha256=Gl8OgKjtieG0NwPYiPimKI36gefV8fm_OeorjdXxPTs,9146
|
|
15
|
-
phylogenie/skyline/parameter.py,sha256=EM9qlPt0JhMBy3TbztM0dj24BaGNEy8KWKdTObDKhbI,4644
|
|
16
|
-
phylogenie/skyline/vector.py,sha256=bJP7_FNX_Klt6wXqsyfj0KX3VNj6-dIhzCKSJuQcOV0,7115
|
|
17
|
-
phylogenie/tree.py,sha256=KTx3m_tJPdeBqA5i0SA3dwCIobxsFcJwZFunMfDmVBY,2791
|
|
18
|
-
phylogenie/treesimulator/__init__.py,sha256=XG_xwETKWgDmCihqNUFCcMHtFg4WvZu5qbqWn9Dndt8,879
|
|
19
|
-
phylogenie/treesimulator/events/__init__.py,sha256=UGfvXOVJ_ZAkk_8sBPihjmxciiaEnXZEPFIY53sttWI,940
|
|
20
|
-
phylogenie/treesimulator/events/contact_tracing.py,sha256=_nJ85yhgGkeruQgMHvGpDYoyhheBf8M4LgZWiWdi5dY,4801
|
|
21
|
-
phylogenie/treesimulator/events/core.py,sha256=RF7oHzAjkU675PnczaVc66d9gNrHBL-IhmVHtcy7MKE,7949
|
|
22
|
-
phylogenie/treesimulator/events/mutations.py,sha256=xkXUIppbLIWZqKwVf-hi7d-_pS42TG2EPVfJA_grxBg,3443
|
|
23
|
-
phylogenie/treesimulator/gillespie.py,sha256=EfEbuMBQSk9izamBPaQ9rNKA4NvtrI3XVm891G2iMeM,5014
|
|
24
|
-
phylogenie/treesimulator/model.py,sha256=0Im6cFTlpMlJrSP4pTTKtvLT9qrQWV8MSTesAsBxT8g,5422
|
|
25
|
-
phylogenie/typeguards.py,sha256=JtqmbEWJZBRHbWgCvcl6nrWm3VcBfzRbklbTBYHItn0,1325
|
|
26
|
-
phylogenie/typings.py,sha256=GknvAFXyiaWeeYJ8Lk5d6E2VHT-xW6ONEojYbtJYiB8,476
|
|
27
|
-
phylogenie/utils.py,sha256=pCg9ob0RpLUHwM49x4knKxL4FNPr3-EU_6zMXsvxtAg,370
|
|
28
|
-
phylogenie-2.1.6.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
|
|
29
|
-
phylogenie-2.1.6.dist-info/METADATA,sha256=dkSGsLeSFma9nDy-6J_gAHlOEs2NxjUeJvo4Ey4x5WM,5375
|
|
30
|
-
phylogenie-2.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
31
|
-
phylogenie-2.1.6.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
|
|
32
|
-
phylogenie-2.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|