phylogenie 2.1.6__tar.gz → 2.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of phylogenie might be problematic. Click here for more details.

Files changed (34) hide show
  1. {phylogenie-2.1.6 → phylogenie-2.1.8}/PKG-INFO +4 -1
  2. {phylogenie-2.1.6 → phylogenie-2.1.8}/README.md +2 -0
  3. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/alisim.py +1 -1
  4. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/configs.py +1 -1
  5. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/dataset.py +2 -2
  6. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/factories.py +1 -1
  7. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/trees.py +7 -1
  8. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/io.py +12 -2
  9. phylogenie-2.1.8/phylogenie/plot.py +39 -0
  10. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/tree.py +32 -20
  11. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/__init__.py +7 -0
  12. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/events/__init__.py +2 -0
  13. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/events/mutations.py +11 -3
  14. phylogenie-2.1.8/phylogenie/treesimulator/features.py +39 -0
  15. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/gillespie.py +10 -5
  16. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/model.py +24 -10
  17. phylogenie-2.1.8/phylogenie/utils.py +28 -0
  18. {phylogenie-2.1.6 → phylogenie-2.1.8}/pyproject.toml +2 -1
  19. {phylogenie-2.1.6 → phylogenie-2.1.8}/LICENSE.txt +0 -0
  20. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/__init__.py +0 -0
  21. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/__init__.py +0 -0
  22. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/generators/typeguards.py +0 -0
  23. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/main.py +0 -0
  24. /phylogenie-2.1.6/phylogenie/utils.py → /phylogenie-2.1.8/phylogenie/models.py +0 -0
  25. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/msa.py +0 -0
  26. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/py.typed +0 -0
  27. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/skyline/__init__.py +0 -0
  28. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/skyline/matrix.py +0 -0
  29. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/skyline/parameter.py +0 -0
  30. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/skyline/vector.py +0 -0
  31. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/events/contact_tracing.py +0 -0
  32. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/treesimulator/events/core.py +0 -0
  33. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/typeguards.py +0 -0
  34. {phylogenie-2.1.6 → phylogenie-2.1.8}/phylogenie/typings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phylogenie
3
- Version: 2.1.6
3
+ Version: 2.1.8
4
4
  Summary: Generate phylogenetic datasets with minimal setup effort
5
5
  Author: Gabriele Marino
6
6
  Author-email: gabmarino.8601@gmail.com
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.10
10
10
  Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Requires-Dist: joblib (>=1.4.2,<2.0.0)
13
+ Requires-Dist: matplotlib (>=3.10.6,<4.0.0)
13
14
  Requires-Dist: pandas (>=2.2.2,<3.0.0)
14
15
  Requires-Dist: pydantic (>=2.11.5,<3.0.0)
15
16
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
@@ -24,6 +25,8 @@ Description-Content-Type: text/markdown
24
25
 
25
26
  [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
26
27
  [![PyPI version](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
28
+ ![Downloads](https://img.shields.io/pypi/dm/phylogenie)
29
+
27
30
 
28
31
  Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
29
32
 
@@ -6,6 +6,8 @@
6
6
 
7
7
  [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
8
8
  [![PyPI version](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
9
+ ![Downloads](https://img.shields.io/pypi/dm/phylogenie)
10
+
9
11
 
10
12
  Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
11
13
 
@@ -72,7 +72,7 @@ class AliSimDatasetGenerator(DatasetGenerator):
72
72
  )
73
73
 
74
74
  for leaf in tree.get_leaves():
75
- leaf.id += f"|{leaf.get_time()}"
75
+ leaf.name += f"|{leaf.get_time()}"
76
76
  dump_newick(tree, f"{tree_filename}.nwk")
77
77
 
78
78
  self._generate_one_from_tree(msa_filename, f"{tree_filename}.nwk", rng, d)
@@ -1,6 +1,6 @@
1
1
  import phylogenie.typings as pgt
2
+ from phylogenie.models import Distribution, StrictBaseModel
2
3
  from phylogenie.treesimulator import MutationTargetType
3
- from phylogenie.utils import Distribution, StrictBaseModel
4
4
 
5
5
  Integer = str | int
6
6
  Scalar = str | pgt.Scalar
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from numpy.random import Generator, default_rng
9
9
  from tqdm import tqdm
10
10
 
11
- from phylogenie.utils import Distribution, StrictBaseModel
11
+ from phylogenie.models import Distribution, StrictBaseModel
12
12
 
13
13
 
14
14
  class DataType(str, Enum):
@@ -31,7 +31,7 @@ class DatasetGenerator(ABC, StrictBaseModel):
31
31
  def generate_one(
32
32
  self,
33
33
  filename: str,
34
- context: dict[str, Any] | None = None,
34
+ context: dict[str, Distribution] | None = None,
35
35
  seed: int | None = None,
36
36
  ) -> dict[str, Any]: ...
37
37
 
@@ -8,6 +8,7 @@ import phylogenie.generators.configs as cfg
8
8
  import phylogenie.generators.typeguards as ctg
9
9
  import phylogenie.typeguards as tg
10
10
  import phylogenie.typings as pgt
11
+ from phylogenie.models import Distribution
11
12
  from phylogenie.skyline import (
12
13
  SkylineMatrix,
13
14
  SkylineMatrixCoercible,
@@ -16,7 +17,6 @@ from phylogenie.skyline import (
16
17
  SkylineVector,
17
18
  SkylineVectorCoercible,
18
19
  )
19
- from phylogenie.utils import Distribution
20
20
 
21
21
 
22
22
  def _eval_expression(expression: str, data: dict[str, Any]) -> Any:
@@ -19,9 +19,11 @@ from phylogenie.generators.factories import (
19
19
  skyline_vector,
20
20
  )
21
21
  from phylogenie.io import dump_newick
22
+ from phylogenie.models import Distribution
22
23
  from phylogenie.tree import Tree
23
24
  from phylogenie.treesimulator import (
24
25
  Event,
26
+ Feature,
25
27
  Mutation,
26
28
  get_BD_events,
27
29
  get_BDEI_events,
@@ -30,6 +32,7 @@ from phylogenie.treesimulator import (
30
32
  get_contact_tracing_events,
31
33
  get_epidemiological_events,
32
34
  get_FBD_events,
35
+ set_features,
33
36
  simulate_tree,
34
37
  )
35
38
 
@@ -51,6 +54,7 @@ class TreeDatasetGenerator(DatasetGenerator):
51
54
  init_state: str | None = None
52
55
  sampling_probability_at_present: cfg.Scalar = 0.0
53
56
  timeout: float = np.inf
57
+ node_features: list[Feature] | None = None
54
58
 
55
59
  @abstractmethod
56
60
  def _get_events(self, data: dict[str, Any]) -> list[Event]: ...
@@ -77,7 +81,7 @@ class TreeDatasetGenerator(DatasetGenerator):
77
81
  def generate_one(
78
82
  self,
79
83
  filename: str,
80
- context: dict[str, Any] | None = None,
84
+ context: dict[str, Distribution] | None = None,
81
85
  seed: int | None = None,
82
86
  ) -> dict[str, Any]:
83
87
  d = {"file_id": Path(filename).stem}
@@ -86,6 +90,8 @@ class TreeDatasetGenerator(DatasetGenerator):
86
90
  try:
87
91
  d.update(data(context, rng))
88
92
  tree = self.simulate_one(d, seed)
93
+ if self.node_features is not None:
94
+ set_features(tree, self.node_features)
89
95
  dump_newick(tree, f"{filename}.nwk")
90
96
  break
91
97
  except TimeoutError:
@@ -44,7 +44,12 @@ def _parse_newick(newick: str) -> Tree:
44
44
  raise ValueError(f"Expected '&&NHX' for node features.")
45
45
  for feature in features[1:]:
46
46
  key, value = feature.split("=", 1)
47
- current_node.set(key, eval(value))
47
+ try:
48
+ current_node.set(key, eval(value))
49
+ except Exception as e:
50
+ raise ValueError(
51
+ f"Error setting node feature `{key}` to `{value}`: {e}"
52
+ )
48
53
 
49
54
  if newick[i] == ")":
50
55
  current_children = current_nodes
@@ -65,13 +70,18 @@ def load_newick(filepath: str) -> Tree | list[Tree]:
65
70
 
66
71
  def _to_newick(tree: Tree) -> str:
67
72
  children_newick = ",".join([_to_newick(child) for child in tree.children])
68
- newick = tree.id
73
+ newick = tree.name
69
74
  if children_newick:
70
75
  newick = f"({children_newick}){newick}"
71
76
  if tree.branch_length is not None:
72
77
  newick += f":{tree.branch_length}"
73
78
  if tree.features:
74
79
  reprs = {k: repr(v).replace("'", '"') for k, v in tree.features.items()}
80
+ for k, r in reprs.items():
81
+ if ":" in r or "=" in r or "]" in r:
82
+ raise ValueError(
83
+ f"Cannot serialize feature `{k}` with value `{r}`: contains reserved characters."
84
+ )
75
85
  features = [f"{k}={repr}" for k, repr in reprs.items()]
76
86
  newick += f"[&&NHX:{':'.join(features)}]"
77
87
  return newick
@@ -0,0 +1,39 @@
1
+ import matplotlib.colors as mcolors
2
+ import matplotlib.pyplot as plt
3
+
4
+ from phylogenie import Tree
5
+ from phylogenie.tree import Tree
6
+ from phylogenie.utils import get_times
7
+
8
+
9
+ def plot_tree(
10
+ tree: Tree,
11
+ ax: plt.Axes | None = None, # pyright: ignore
12
+ color_by: str | None = None,
13
+ default_color: str = "black",
14
+ cmap: str = "tab20",
15
+ ) -> plt.Axes: # pyright: ignore
16
+ if ax is None:
17
+ ax = plt.gca()
18
+
19
+ xs = get_times(tree)
20
+ ys = {node.name: i for i, node in enumerate(tree.inorder_traversal())}
21
+ if color_by is not None:
22
+ features = set(node.get(color_by) for node in tree)
23
+ feature_colors = {
24
+ f: mcolors.to_hex(plt.get_cmap(cmap, len(features))(i))
25
+ for i, f in enumerate(features)
26
+ }
27
+ colors = {node.name: feature_colors[node.get(color_by)] for node in tree}
28
+ else:
29
+ colors = {node.name: default_color for node in tree}
30
+
31
+ for node in tree:
32
+ if node.parent is None:
33
+ continue
34
+ x0, y0 = xs[node.parent.name], ys[node.parent.name]
35
+ x1, y1 = xs[node.name], ys[node.name]
36
+ ax.plot([x0, x0], [y0, y1], color=colors[node.name]) # pyright: ignore
37
+ ax.plot([x0, x1], [y1, y1], color=colors[node.name]) # pyright: ignore
38
+ ax.set_yticks([]) # pyright: ignore
39
+ return ax
@@ -3,8 +3,8 @@ from typing import Any
3
3
 
4
4
 
5
5
  class Tree:
6
- def __init__(self, id: str = "", branch_length: float | None = None):
7
- self.id = id
6
+ def __init__(self, name: str = "", branch_length: float | None = None):
7
+ self.name = name
8
8
  self.branch_length = branch_length
9
9
  self._parent: Tree | None = None
10
10
  self._children: list[Tree] = []
@@ -20,7 +20,7 @@ class Tree:
20
20
 
21
21
  @property
22
22
  def features(self) -> dict[str, Any]:
23
- return self._features
23
+ return self._features.copy()
24
24
 
25
25
  def add_child(self, child: "Tree") -> "Tree":
26
26
  child._parent = self
@@ -36,6 +36,17 @@ class Tree:
36
36
  if node is not None:
37
37
  node._children.append(self)
38
38
 
39
+ def inorder_traversal(self) -> Iterator["Tree"]:
40
+ if self.is_leaf():
41
+ yield self
42
+ return
43
+ if len(self.children) != 2:
44
+ raise ValueError("Inorder traversal is only defined for binary trees.")
45
+ left, right = self.children
46
+ yield from left.inorder_traversal()
47
+ yield self
48
+ yield from right.inorder_traversal()
49
+
39
50
  def preorder_traversal(self) -> Iterator["Tree"]:
40
51
  yield self
41
52
  for child in self.children:
@@ -46,41 +57,42 @@ class Tree:
46
57
  yield from child.postorder_traversal()
47
58
  yield self
48
59
 
49
- def get_node(self, id: str) -> "Tree":
60
+ def get_node(self, name: str) -> "Tree":
50
61
  for node in self:
51
- if node.id == id:
62
+ if node.name == name:
52
63
  return node
53
- raise ValueError(f"Node with id {id} not found.")
64
+ raise ValueError(f"Node with name {name} not found.")
54
65
 
55
66
  def is_leaf(self) -> bool:
56
67
  return not self.children
57
68
 
58
- def get_leaves(self) -> list["Tree"]:
59
- return [node for node in self if not node.children]
69
+ def get_leaves(self) -> tuple["Tree", ...]:
70
+ return tuple(node for node in self if not node.children)
71
+
72
+ def parse_branch_length(self) -> float:
73
+ if self.branch_length is None:
74
+ raise ValueError(f"Branch length of node {self.name} is not set.")
75
+ return self.branch_length
60
76
 
61
77
  def get_time(self) -> float:
62
78
  parent_time = 0 if self.parent is None else self.parent.get_time()
63
- if self.branch_length is None:
64
- if self.parent is not None:
65
- raise ValueError(
66
- f"Branch length of non-root node {self.id} is not set."
67
- )
68
- return 0.0
69
- return self.branch_length + parent_time
79
+ return self.parse_branch_length() + parent_time
70
80
 
71
81
  def set(self, key: str, value: Any) -> None:
72
82
  self._features[key] = value
73
83
 
84
+ def update_features(self, features: dict[str, Any]) -> None:
85
+ self._features.update(features)
86
+
74
87
  def get(self, key: str) -> Any:
75
- return self._features.get(key)
88
+ return self._features[key]
76
89
 
77
90
  def delete(self, key: str) -> None:
78
91
  del self._features[key]
79
92
 
80
93
  def copy(self):
81
- new_tree = Tree(self.id, self.branch_length)
82
- for key, value in self._features.items():
83
- new_tree.set(key, value)
94
+ new_tree = Tree(self.name, self.branch_length)
95
+ new_tree.update_features(self._features)
84
96
  for child in self.children:
85
97
  new_tree.add_child(child.copy())
86
98
  return new_tree
@@ -89,4 +101,4 @@ class Tree:
89
101
  return self.preorder_traversal()
90
102
 
91
103
  def __repr__(self) -> str:
92
- return f"TreeNode(id='{self.id}', branch_length={self.branch_length}, features={self.features})"
104
+ return f"TreeNode(name='{self.name}', branch_length={self.branch_length}, features={self.features})"
@@ -15,8 +15,11 @@ from phylogenie.treesimulator.events import (
15
15
  get_contact_tracing_events,
16
16
  get_epidemiological_events,
17
17
  get_FBD_events,
18
+ get_mutation_id,
18
19
  )
20
+ from phylogenie.treesimulator.features import Feature, set_features
19
21
  from phylogenie.treesimulator.gillespie import generate_trees, simulate_tree
22
+ from phylogenie.treesimulator.model import get_node_state
20
23
 
21
24
  __all__ = [
22
25
  "Birth",
@@ -37,4 +40,8 @@ __all__ = [
37
40
  "get_FBD_events",
38
41
  "generate_trees",
39
42
  "simulate_tree",
43
+ "get_mutation_id",
44
+ "get_node_state",
45
+ "Feature",
46
+ "set_features",
40
47
  ]
@@ -18,6 +18,7 @@ from phylogenie.treesimulator.events.core import (
18
18
  )
19
19
  from phylogenie.treesimulator.events.mutations import Mutation
20
20
  from phylogenie.treesimulator.events.mutations import TargetType as MutationTargetType
21
+ from phylogenie.treesimulator.events.mutations import get_mutation_id
21
22
 
22
23
  __all__ = [
23
24
  "Birth",
@@ -36,4 +37,5 @@ __all__ = [
36
37
  "get_contact_tracing_events",
37
38
  "get_epidemiological_events",
38
39
  "get_FBD_events",
40
+ "get_mutation_id",
39
41
  ]
@@ -1,9 +1,11 @@
1
+ import re
1
2
  from copy import deepcopy
2
3
  from enum import Enum
3
4
  from typing import Type
4
5
 
5
6
  from numpy.random import Generator
6
7
 
8
+ from phylogenie.models import Distribution
7
9
  from phylogenie.skyline import SkylineParameterLike
8
10
  from phylogenie.treesimulator.events.contact_tracing import (
9
11
  BirthWithContactTracing,
@@ -17,7 +19,6 @@ from phylogenie.treesimulator.events.core import (
17
19
  Sampling,
18
20
  )
19
21
  from phylogenie.treesimulator.model import Model
20
- from phylogenie.utils import Distribution
21
22
 
22
23
  MUTATION_PREFIX = "MUT-"
23
24
  MUTATIONS_KEY = "MUTATIONS"
@@ -29,10 +30,17 @@ def _get_mutation(state: str) -> str | None:
29
30
 
30
31
  def _get_mutated_state(mutation_id: int, state: str) -> str:
31
32
  if state.startswith(MUTATION_PREFIX):
32
- state = state.split(".")[1]
33
+ _, state = state.split(".")
33
34
  return f"{MUTATION_PREFIX}{mutation_id}.{state}"
34
35
 
35
36
 
37
+ def get_mutation_id(node_name: str) -> int:
38
+ match = re.search(rf"{MUTATION_PREFIX}(\d+)\.", node_name)
39
+ if match:
40
+ return int(match.group(1))
41
+ return 0
42
+
43
+
36
44
  class TargetType(str, Enum):
37
45
  BIRTH = "birth"
38
46
  DEATH = "death"
@@ -93,7 +101,7 @@ class Mutation(Event):
93
101
  model.add_event(event)
94
102
 
95
103
  def __repr__(self) -> str:
96
- return f"Mutation(state={self.state}, rate={self.rate}, rate_scalers={self.rate_scalers})"
104
+ return f"Mutation(state={self.state}, rate={self.rate})"
97
105
 
98
106
 
99
107
  TARGETS: dict[TargetType, tuple[Type[Event], ...]] = {
@@ -0,0 +1,39 @@
1
+ from collections.abc import Iterable
2
+ from enum import Enum
3
+
4
+ from phylogenie.tree import Tree
5
+ from phylogenie.treesimulator.events import get_mutation_id
6
+ from phylogenie.treesimulator.model import get_node_state
7
+ from phylogenie.utils import get_heights, get_n_tips, get_times
8
+
9
+
10
+ def _get_states(tree: Tree) -> dict[str, str]:
11
+ return {node.name: get_node_state(node.name) for node in tree}
12
+
13
+
14
+ def _get_mutations(tree: Tree) -> dict[str, int]:
15
+ return {node.name: get_mutation_id(node.name) for node in tree}
16
+
17
+
18
+ class Feature(str, Enum):
19
+ STATE = "state"
20
+ MUTATION = "mutation"
21
+ N_TIPS = "n_tips"
22
+ TIME = "time"
23
+ HEIGHT = "height"
24
+
25
+
26
+ FEATURES_EXTRACTORS = {
27
+ Feature.STATE: _get_states,
28
+ Feature.MUTATION: _get_mutations,
29
+ Feature.N_TIPS: get_n_tips,
30
+ Feature.TIME: get_times,
31
+ Feature.HEIGHT: get_heights,
32
+ }
33
+
34
+
35
+ def set_features(tree: Tree, features: Iterable[Feature]) -> None:
36
+ for feature in features:
37
+ feature_maps = FEATURES_EXTRACTORS[feature](tree)
38
+ for node in tree:
39
+ node.set(feature.value, feature_maps[node.name])
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import time
3
- from collections.abc import Sequence
3
+ from collections.abc import Iterable, Sequence
4
4
 
5
5
  import joblib
6
6
  import numpy as np
@@ -9,6 +9,7 @@ from tqdm import tqdm
9
9
 
10
10
  from phylogenie.io import dump_newick
11
11
  from phylogenie.tree import Tree
12
+ from phylogenie.treesimulator.features import Feature, set_features
12
13
  from phylogenie.treesimulator.model import Event, Model
13
14
 
14
15
 
@@ -69,7 +70,7 @@ def simulate_tree(
69
70
  if (
70
71
  not any(rates)
71
72
  or max_tips is not None
72
- and model.n_sampled > max_tips
73
+ and model.n_sampled >= max_tips
73
74
  or target_n_tips is not None
74
75
  and model.n_sampled >= target_n_tips
75
76
  ):
@@ -101,12 +102,13 @@ def simulate_tree(
101
102
  def generate_trees(
102
103
  output_dir: str,
103
104
  n_trees: int,
104
- events: list[Event],
105
+ events: Sequence[Event],
105
106
  min_tips: int = 1,
106
- max_tips: int = 2**32,
107
+ max_tips: int | None = None,
107
108
  max_time: float = np.inf,
108
109
  init_state: str | None = None,
109
110
  sampling_probability_at_present: float = 0.0,
111
+ node_features: Iterable[Feature] | None = None,
110
112
  seed: int | None = None,
111
113
  n_jobs: int = -1,
112
114
  timeout: float = np.inf,
@@ -114,7 +116,7 @@ def generate_trees(
114
116
  def _simulate_tree(seed: int) -> Tree:
115
117
  while True:
116
118
  try:
117
- return simulate_tree(
119
+ tree = simulate_tree(
118
120
  events=events,
119
121
  min_tips=min_tips,
120
122
  max_tips=max_tips,
@@ -124,6 +126,9 @@ def generate_trees(
124
126
  seed=seed,
125
127
  timeout=timeout,
126
128
  )
129
+ if node_features is not None:
130
+ set_features(tree, node_features)
131
+ return tree
127
132
  except TimeoutError:
128
133
  print("Simulation timed out, retrying with a different seed...")
129
134
  seed += 1
@@ -11,6 +11,13 @@ from phylogenie.skyline import SkylineParameterLike, skyline_parameter
11
11
  from phylogenie.tree import Tree
12
12
 
13
13
 
14
+ @dataclass
15
+ class Individual:
16
+ id: int
17
+ node: Tree
18
+ state: str
19
+
20
+
14
21
  class Event(ABC):
15
22
  def __init__(self, state: str, rate: SkylineParameterLike):
16
23
  self.state = state
@@ -30,11 +37,17 @@ class Event(ABC):
30
37
  def apply(self, model: "Model", time: float, rng: Generator) -> None: ...
31
38
 
32
39
 
33
- @dataclass
34
- class Individual:
35
- id: int
36
- node: Tree
37
- state: str
40
+ def _get_node_name(node_id: int, state: str) -> str:
41
+ return f"{node_id}|{state}"
42
+
43
+
44
+ def get_node_state(node_name: str) -> str:
45
+ try:
46
+ return node_name.split("|")[1]
47
+ except IndexError:
48
+ raise ValueError(
49
+ f"Invalid node name: {node_name} (expected format 'id|state')."
50
+ )
38
51
 
39
52
 
40
53
  class Model:
@@ -61,7 +74,8 @@ class Model:
61
74
 
62
75
  def _get_new_node(self, state: str) -> Tree:
63
76
  self._next_node_id += 1
64
- return Tree(f"{self._next_node_id}|{state}")
77
+ node = Tree(_get_node_name(self._next_node_id, state))
78
+ return node
65
79
 
66
80
  def _get_new_individual(self, state: str) -> Individual:
67
81
  self._next_individual_id += 1
@@ -74,7 +88,7 @@ class Model:
74
88
 
75
89
  def _set_branch_length(self, node: Tree, time: float) -> None:
76
90
  if node.branch_length is not None:
77
- raise ValueError(f"Branch length of node {node.id} is already set.")
91
+ raise ValueError(f"Branch length of node {node.name} is already set.")
78
92
  node.branch_length = (
79
93
  time if node.parent is None else time - node.parent.get_time()
80
94
  )
@@ -108,12 +122,12 @@ class Model:
108
122
  def sample(self, id: int, time: float, removal: bool) -> None:
109
123
  individual = self._population[id]
110
124
  if removal:
111
- self._sampled.add(individual.node.id)
125
+ self._sampled.add(individual.node.name)
112
126
  self.remove(id, time)
113
127
  else:
114
128
  sample_node = self._get_new_node(individual.state)
115
129
  sample_node.branch_length = 0.0
116
- self._sampled.add(sample_node.id)
130
+ self._sampled.add(sample_node.name)
117
131
  individual.node.add_child(sample_node)
118
132
  self._stem(individual, time)
119
133
 
@@ -123,7 +137,7 @@ class Model:
123
137
  def get_sampled_tree(self) -> Tree:
124
138
  tree = self._tree.copy()
125
139
  for node in list(tree.postorder_traversal()):
126
- if node.id not in self._sampled and not node.children:
140
+ if node.name not in self._sampled and not node.children:
127
141
  if node.parent is None:
128
142
  raise ValueError("No samples in the tree.")
129
143
  else:
@@ -0,0 +1,28 @@
1
+ from phylogenie.tree import Tree
2
+
3
+
4
+ def get_n_tips(tree: Tree) -> dict[str, int]:
5
+ n_tips: dict[str, int] = {}
6
+ for node in tree.postorder_traversal():
7
+ n_tips[node.name] = (
8
+ 1 if node.is_leaf() else sum(n_tips[child.name] for child in node.children)
9
+ )
10
+ return n_tips
11
+
12
+
13
+ def get_times(tree: Tree) -> dict[str, float]:
14
+ times: dict[str, float] = {}
15
+ for node in tree:
16
+ parent_time = 0 if node.parent is None else times[node.parent.name]
17
+ times[node.name] = node.parse_branch_length() + parent_time
18
+ return times
19
+
20
+
21
+ def get_heights(tree: Tree) -> dict[str, int]:
22
+ heights: dict[str, int] = {}
23
+ for node in tree.postorder_traversal():
24
+ if node.is_leaf():
25
+ heights[node.name] = 0
26
+ else:
27
+ heights[node.name] = 1 + max(heights[child.name] for child in node.children)
28
+ return heights
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "phylogenie"
3
- version = "2.1.6"
3
+ version = "2.1.8"
4
4
  description = "Generate phylogenetic datasets with minimal setup effort"
5
5
  authors = ["Gabriele Marino <gabmarino.8601@gmail.com>"]
6
6
  readme = "README.md"
@@ -9,6 +9,7 @@ readme = "README.md"
9
9
  python = "^3.10"
10
10
 
11
11
  joblib = "^1.4.2"
12
+ matplotlib = "^3.10.6"
12
13
  pandas = "^2.2.2"
13
14
  pydantic = "^2.11.5"
14
15
  pyyaml = "^6.0.2"
File without changes
File without changes