phylogenie 1.0.8__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. phylogenie/generators/__init__.py +14 -0
  2. phylogenie/generators/alisim.py +71 -0
  3. phylogenie/generators/configs.py +41 -0
  4. phylogenie/{core → generators}/dataset.py +25 -23
  5. phylogenie/{core → generators}/factories.py +42 -52
  6. phylogenie/generators/trees.py +220 -0
  7. phylogenie/generators/typeguards.py +32 -0
  8. phylogenie/io.py +92 -0
  9. phylogenie/main.py +2 -2
  10. phylogenie/msa.py +72 -0
  11. phylogenie/skyline/matrix.py +62 -45
  12. phylogenie/skyline/vector.py +8 -6
  13. phylogenie/tree.py +53 -0
  14. phylogenie/treesimulator/__init__.py +21 -0
  15. phylogenie/treesimulator/events.py +256 -0
  16. phylogenie/treesimulator/gillespie.py +66 -0
  17. phylogenie/treesimulator/model.py +100 -0
  18. phylogenie/typings.py +0 -2
  19. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/METADATA +6 -18
  20. phylogenie-2.0.0.dist-info/RECORD +28 -0
  21. phylogenie/backend/__init__.py +0 -0
  22. phylogenie/backend/remaster/__init__.py +0 -21
  23. phylogenie/backend/remaster/generate.py +0 -187
  24. phylogenie/backend/remaster/reactions.py +0 -165
  25. phylogenie/backend/treesimulator.py +0 -163
  26. phylogenie/configs.py +0 -5
  27. phylogenie/core/__init__.py +0 -14
  28. phylogenie/core/configs.py +0 -37
  29. phylogenie/core/context/__init__.py +0 -4
  30. phylogenie/core/context/configs.py +0 -28
  31. phylogenie/core/context/distributions.py +0 -125
  32. phylogenie/core/context/factories.py +0 -54
  33. phylogenie/core/msas/__init__.py +0 -10
  34. phylogenie/core/msas/alisim.py +0 -35
  35. phylogenie/core/msas/base.py +0 -51
  36. phylogenie/core/trees/__init__.py +0 -11
  37. phylogenie/core/trees/base.py +0 -13
  38. phylogenie/core/trees/remaster/__init__.py +0 -3
  39. phylogenie/core/trees/remaster/configs.py +0 -14
  40. phylogenie/core/trees/remaster/factories.py +0 -26
  41. phylogenie/core/trees/remaster/generator.py +0 -177
  42. phylogenie/core/trees/treesimulator.py +0 -199
  43. phylogenie/core/typeguards.py +0 -32
  44. phylogenie-1.0.8.dist-info/RECORD +0 -39
  45. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/LICENSE.txt +0 -0
  46. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/WHEEL +0 -0
  47. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/entry_points.txt +0 -0
phylogenie/io.py ADDED
@@ -0,0 +1,92 @@
1
+ from typing import Callable
2
+
3
+ from phylogenie.msa import MSA, Sequence
4
+ from phylogenie.tree import Tree
5
+
6
+
7
+ def _parse_newick(newick: str) -> Tree:
8
+ newick = newick.strip()
9
+ stack: list[list[Tree]] = []
10
+ current_children: list[Tree] = []
11
+ current_nodes: list[Tree] = []
12
+ i = 0
13
+ while i < len(newick):
14
+
15
+ def _parse_chars(stoppers: list[str]) -> str:
16
+ nonlocal i
17
+ chars = ""
18
+ while newick[i] not in stoppers:
19
+ chars += newick[i]
20
+ i += 1
21
+ return chars
22
+
23
+ if newick[i] == "(":
24
+ stack.append(current_nodes)
25
+ current_nodes = []
26
+ else:
27
+ id = _parse_chars([":", ",", ")", ";"])
28
+ branch_length = None
29
+ if newick[i] == ":":
30
+ i += 1
31
+ branch_length = _parse_chars([",", ")", ";"])
32
+
33
+ current_node = Tree(
34
+ id=id,
35
+ branch_length=(None if branch_length is None else float(branch_length)),
36
+ )
37
+ for node in current_children:
38
+ current_node.add_child(node)
39
+ current_children = []
40
+ current_nodes.append(current_node)
41
+
42
+ if newick[i] == ")":
43
+ current_children = current_nodes
44
+ current_nodes = stack.pop()
45
+ elif newick[i] == ";":
46
+ return current_node
47
+
48
+ i += 1
49
+
50
+ raise ValueError("Newick string does not end with a semicolon.")
51
+
52
+
53
+ def load_newick(filepath: str) -> Tree | list[Tree]:
54
+ with open(filepath, "r") as file:
55
+ trees = [_parse_newick(newick) for newick in file]
56
+ return trees[0] if len(trees) == 1 else trees
57
+
58
+
59
+ def _to_newick(tree: Tree) -> str:
60
+ children_newick = ",".join([_to_newick(child) for child in tree.children])
61
+ newick = tree.id
62
+ if children_newick:
63
+ newick = f"({children_newick}){newick}"
64
+ if tree.branch_length is not None:
65
+ newick += f":{tree.branch_length}"
66
+ return newick
67
+
68
+
69
+ def dump_newick(tree: Tree, filepath: str) -> None:
70
+ with open(filepath, "w") as file:
71
+ file.write(_to_newick(tree) + ";")
72
+
73
+
74
+ def load_fasta(
75
+ fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
76
+ ) -> MSA:
77
+ sequences: list[Sequence] = []
78
+ with open(fasta_file, "r") as f:
79
+ for line in f:
80
+ if not line.startswith(">"):
81
+ raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
82
+ id = line[1:].strip()
83
+ if extract_time_from_id is not None:
84
+ time = extract_time_from_id(id)
85
+ else:
86
+ try:
87
+ time = float(id.split("|")[-1])
88
+ except ValueError:
89
+ time = None
90
+ chars = next(f).strip()
91
+ sequences.append(Sequence(id, chars, time))
92
+ return MSA(sequences)
phylogenie/main.py CHANGED
@@ -5,8 +5,8 @@ from glob import glob
5
5
  from pydantic import TypeAdapter
6
6
  from yaml import safe_load
7
7
 
8
- from phylogenie.core import DatasetGeneratorConfig
9
- from phylogenie.core.dataset import DatasetGenerator
8
+ from phylogenie.generators import DatasetGeneratorConfig
9
+ from phylogenie.generators.dataset import DatasetGenerator
10
10
 
11
11
 
12
12
  def run(config_path: str) -> None:
phylogenie/msa.py ADDED
@@ -0,0 +1,72 @@
1
+ from collections.abc import Iterator
2
+ from dataclasses import dataclass
3
+
4
+ import numpy as np
5
+
6
+
7
+ @dataclass
8
+ class Sequence:
9
+ id: str
10
+ chars: str
11
+ time: float | None = None
12
+
13
+
14
+ class MSA:
15
+ def __init__(self, sequences: list[Sequence]):
16
+ self.sequences = sequences
17
+ lengths = {len(sequence.chars) for sequence in sequences}
18
+ if len(lengths) > 1:
19
+ raise ValueError(
20
+ f"All sequences in the alignment must have the same length (got lengths: {lengths})"
21
+ )
22
+
23
+ @property
24
+ def ids(self) -> list[str]:
25
+ return [sequence.id for sequence in self.sequences]
26
+
27
+ @property
28
+ def times(self) -> list[float]:
29
+ times: list[float] = []
30
+ for sequence in self:
31
+ if sequence.time is None:
32
+ raise ValueError(f"Time is not set for sequence {sequence.id}.")
33
+ times.append(sequence.time)
34
+ return times
35
+
36
+ @property
37
+ def alignment(self) -> list[list[str]]:
38
+ return [list(sequence.chars) for sequence in self.sequences]
39
+
40
+ @property
41
+ def n_sequences(self) -> int:
42
+ return len(self.sequences)
43
+
44
+ @property
45
+ def n_sites(self) -> int:
46
+ return len(self.alignment[0])
47
+
48
+ @property
49
+ def shape(self) -> tuple[int, int]:
50
+ return self.n_sequences, self.n_sites
51
+
52
+ def __len__(self) -> int:
53
+ return self.n_sequences
54
+
55
+ def __getitem__(self, item: int) -> Sequence:
56
+ return self.sequences[item]
57
+
58
+ def __iter__(self) -> Iterator[Sequence]:
59
+ return iter(self.sequences)
60
+
61
+ def count_informative_sites(self) -> int:
62
+ n_informative_sites = 0
63
+ for column in np.array(self.alignment).T:
64
+ column: np.typing.NDArray[np.str_]
65
+ _, char_counts = np.unique(column, return_counts=True)
66
+ is_informative_char = char_counts >= 2
67
+ if (is_informative_char).sum() >= 2:
68
+ n_informative_sites += 1
69
+ return n_informative_sites
70
+
71
+ def count_unique_sequences(self) -> int:
72
+ return len(np.unique(self.alignment, axis=0))
@@ -11,7 +11,6 @@ from phylogenie.skyline.vector import (
11
11
  SkylineVectorOperand,
12
12
  is_many_skyline_vectors_coercible,
13
13
  is_many_skyline_vectors_like,
14
- is_skyline_vector_coercible,
15
14
  is_skyline_vector_like,
16
15
  is_skyline_vector_operand,
17
16
  skyline_vector,
@@ -34,17 +33,25 @@ class SkylineMatrix:
34
33
  ):
35
34
  if params is not None and value is None and change_times is None:
36
35
  if is_many_skyline_vectors_like(params):
37
- self.params = [skyline_vector(p, len(params)) for p in params]
36
+ self.params = [
37
+ p if isinstance(p, SkylineVector) else SkylineVector(p)
38
+ for p in params
39
+ ]
38
40
  else:
39
41
  raise TypeError(
40
42
  f"It is impossible to create a SkylineMatrix from `params` {params} of type {type(params)}. Please provide a sequence composed of SkylineVectorLike objects (a SkylineVectorLike object can either be a SkylineVector or a sequence of scalars and/or SkylineParameters)."
41
43
  )
44
+ lengths = {len(p) for p in self.params}
45
+ if len(lengths) > 1:
46
+ raise ValueError(
47
+ f"All `params` must have the same length to create a SkylineMatrix (got params={params} with lengths {lengths})."
48
+ )
42
49
  elif params is None and value is not None and change_times is not None:
43
50
  if tg.is_many_3D_scalars(value):
44
- matrix_lengths = {len(matrix) for matrix in value}
45
- if any(ml != len(value[0]) for ml in matrix_lengths):
51
+ lengths = {len(matrix) for matrix in value}
52
+ if len(lengths) > 1:
46
53
  raise ValueError(
47
- f"All matrices in the `value` of a SkylineMatrix must have the same length (got value={value} with matrix lengths={matrix_lengths})."
54
+ f"All matrices in the `value` of a SkylineMatrix must have the same number of rows (got matrices={value} with row lengths {lengths})."
48
55
  )
49
56
  else:
50
57
  raise TypeError(
@@ -62,9 +69,17 @@ class SkylineMatrix:
62
69
  )
63
70
 
64
71
  @property
65
- def N(self) -> int:
72
+ def n_rows(self) -> int:
66
73
  return len(self.params)
67
74
 
75
+ @property
76
+ def n_cols(self) -> int:
77
+ return len(self.params[0])
78
+
79
+ @property
80
+ def shape(self) -> tuple[int, int]:
81
+ return self.n_rows, self.n_cols
82
+
68
83
  @property
69
84
  def change_times(self) -> pgt.Vector1D:
70
85
  return sorted(set([t for row in self.params for t in row.change_times]))
@@ -83,12 +98,12 @@ class SkylineMatrix:
83
98
  [SkylineVector, SkylineVector | SkylineParameter], SkylineVector
84
99
  ],
85
100
  ) -> "SkylineMatrix":
86
- if is_skyline_vector_operand(other):
87
- other = skyline_vector(other, N=self.N)
101
+ if is_skyline_matrix_operand(other):
102
+ other = skyline_matrix(other, self.n_rows, self.n_cols)
88
103
  elif isinstance(other, SkylineMatrix):
89
- if other.N != self.N:
104
+ if other.shape != self.shape:
90
105
  raise ValueError(
91
- f"Expected a SkylineMatrix with the same size as self (N={self.N}), but got {other} with N={other.N}."
106
+ f"It is impossible to operate on SkylineMatrices of different shapes (got self={self.shape} and other={other.shape})."
92
107
  )
93
108
  else:
94
109
  return NotImplemented
@@ -122,7 +137,7 @@ class SkylineMatrix:
122
137
 
123
138
  @property
124
139
  def T(self) -> "SkylineMatrix":
125
- return SkylineMatrix([[v[i] for v in self] for i in range(self.N)])
140
+ return SkylineMatrix([[v[i] for v in self] for i in range(self.n_cols)])
126
141
 
127
142
  def __bool__(self) -> bool:
128
143
  return any(self.params)
@@ -137,52 +152,57 @@ class SkylineMatrix:
137
152
  return iter(self.params)
138
153
 
139
154
  def __len__(self) -> int:
140
- return self.N
155
+ return self.n_rows
141
156
 
142
157
  @overload
143
158
  def __getitem__(self, item: int) -> SkylineVector: ...
144
159
  @overload
145
- def __getitem__(self, item: slice) -> list[SkylineVector]: ...
160
+ def __getitem__(self, item: slice) -> "SkylineMatrix": ...
161
+ @overload
146
162
  def __getitem__(
147
- self, item: int | slice
148
- ) -> Union[SkylineVector, list[SkylineVector]]:
149
- return self.params[item]
163
+ self, item: tuple[int | slice, int | slice]
164
+ ) -> Union[SkylineParameter, SkylineVector, "SkylineMatrix"]: ...
165
+ def __getitem__(
166
+ self, item: int | slice | tuple[int | slice, int | slice]
167
+ ) -> Union[SkylineParameter | SkylineVector, "SkylineMatrix"]:
168
+ if isinstance(item, int):
169
+ return self.params[item]
170
+ if isinstance(item, slice):
171
+ return SkylineMatrix(self.params[item])
172
+ row_idx, col_idx = item
173
+ if isinstance(row_idx, int):
174
+ return self.params[row_idx][col_idx]
175
+ if isinstance(col_idx, int):
176
+ return SkylineVector([row[col_idx] for row in self.params[row_idx]])
177
+ return SkylineMatrix([row[col_idx] for row in self.params[row_idx]])
150
178
 
151
179
  def __setitem__(self, item: int, value: SkylineVectorLike) -> None:
152
180
  if not is_skyline_vector_like(value):
153
181
  raise TypeError(
154
- f"It is impossible to set item {item} of SkylineMatrix with value {value} of type {type(value)}. Please provide a SkylineVectorLike object (i.e., a SkylineVector or a sequence of scalars and/or SkylineParameters)."
182
+ f"It is impossible to set item of SkylineMatrix to value {value} of type {type(value)}. Please provide a SkylineVectorLike object (i.e., a SkylineVector or a sequence of scalars and/or SkylineParameters)."
155
183
  )
156
- self.params[item] = skyline_vector(value, N=self.N)
184
+ self.params[item] = skyline_vector(value, self.n_cols)
157
185
 
158
186
 
159
187
  def skyline_matrix(
160
- x: SkylineMatrixCoercible, N: int, zero_diagonal: bool = False
188
+ x: SkylineMatrixCoercible, n_rows: int, n_cols: int
161
189
  ) -> SkylineMatrix:
162
- if N <= 0:
190
+ if n_rows <= 0 or n_cols <= 0:
163
191
  raise ValueError(
164
- f"N must be a positive integer for SkylineMatrix construction (got N={N})."
192
+ f" n_rows and n_cols must be positive integers to create a SkylineMatrix (got n_rows={n_rows} and n_cols={n_cols})."
165
193
  )
166
- if is_skyline_vector_coercible(x):
167
- x = SkylineMatrix([[p] * N for p in skyline_vector(x, N)])
168
- if zero_diagonal:
169
- for i in range(N):
170
- x[i][i] = 0
171
- return x
172
- elif is_many_skyline_vectors_coercible(x):
173
- x = SkylineMatrix(
174
- [
175
- [
176
- (
177
- 0
178
- if i == j and is_skyline_parameter_like(v) and zero_diagonal
179
- else p
180
- )
181
- for j, p in enumerate(skyline_vector(v, N))
182
- ]
183
- for i, v in enumerate(x)
184
- ]
194
+
195
+ if is_skyline_parameter_like(x):
196
+ return SkylineMatrix([[x] * n_cols] * n_rows)
197
+ if is_skyline_vector_like(x) or is_many_skyline_vectors_coercible(x):
198
+ if len(x) == n_rows:
199
+ return SkylineMatrix([skyline_vector(p, n_cols) for p in x])
200
+ elif len(x) == n_cols:
201
+ return SkylineMatrix([skyline_vector(p, n_rows) for p in x]).T
202
+ raise ValueError(
203
+ f"Expected a SkylineVectorLike of size {n_rows} or {n_cols}, got {x} of size {len(x)}."
185
204
  )
205
+
186
206
  if not isinstance(x, SkylineMatrix):
187
207
  raise TypeError(
188
208
  f"It is impossible to coerce {x} of type {type(x)} into a SkylineMatrix. Please provide either:\n"
@@ -191,12 +211,9 @@ def skyline_matrix(
191
211
  "- a sequence of SkylineVectorCoercible objects."
192
212
  )
193
213
 
194
- if x.N != N:
214
+ if x.shape != (n_rows, n_cols):
195
215
  raise ValueError(
196
- f"Expected an {N}x{N} SkylineMatrix, got {x} of shape {x.N}x{x.N}."
216
+ f"Expected an SkylineMatrix of shape ({n_rows}, {n_cols}), got {x} of shape {x.shape}."
197
217
  )
198
218
 
199
- if zero_diagonal and any(x[i][i] for i in range(x.N)):
200
- raise ValueError(f"Expected a SkylineMatrix with zero diagonal, but got {x}.")
201
-
202
219
  return x
@@ -56,10 +56,10 @@ class SkylineVector:
56
56
  )
57
57
  elif params is None and value is not None and change_times is not None:
58
58
  if tg.is_many_2D_scalars(value):
59
- vector_lengths = {len(vector) for vector in value}
60
- if any(vl != len(value[0]) for vl in vector_lengths):
59
+ lengths = {len(vector) for vector in value}
60
+ if len(lengths) > 1:
61
61
  raise ValueError(
62
- f"All rows in the `value` of a SkylineVector must have the same length (got value={value} with vector lengths={vector_lengths})."
62
+ f"All rows in the `value` of a SkylineVector must have the same length (got value={value} with lengths {lengths})."
63
63
  )
64
64
  else:
65
65
  raise TypeError(
@@ -143,10 +143,12 @@ class SkylineVector:
143
143
  @overload
144
144
  def __getitem__(self, item: int) -> SkylineParameter: ...
145
145
  @overload
146
- def __getitem__(self, item: slice) -> list[SkylineParameter]: ...
146
+ def __getitem__(self, item: slice) -> "SkylineVector": ...
147
147
  def __getitem__(
148
148
  self, item: int | slice
149
- ) -> Union[SkylineParameter, list[SkylineParameter]]:
149
+ ) -> Union[SkylineParameter, "SkylineVector"]:
150
+ if isinstance(item, slice):
151
+ return SkylineVector(self.params[item])
150
152
  return self.params[item]
151
153
 
152
154
  def __setitem__(self, item: int, value: SkylineParameterLike) -> None:
@@ -160,7 +162,7 @@ class SkylineVector:
160
162
  def skyline_vector(x: SkylineVectorCoercible, N: int) -> SkylineVector:
161
163
  if N <= 0:
162
164
  raise ValueError(
163
- f"N must be a positive integer for SkylineVector construction (got N={N})."
165
+ f"N must be a positive integer to create a SkylineVector (got N={N})."
164
166
  )
165
167
  if is_skyline_parameter_like(x):
166
168
  return SkylineVector([skyline_parameter(x)] * N)
phylogenie/tree.py ADDED
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterator
4
+
5
+
6
+ class Tree:
7
+ def __init__(self, id: str = "", branch_length: float | None = None):
8
+ self.id = id
9
+ self.branch_length = branch_length
10
+ self.parent: Tree | None = None
11
+ self.children: list[Tree] = []
12
+
13
+ def add_child(self, child: Tree) -> Tree:
14
+ child.parent = self
15
+ self.children.append(child)
16
+ return self
17
+
18
+ def preorder_traversal(self) -> Iterator[Tree]:
19
+ yield self
20
+ for child in self.children:
21
+ yield from child.preorder_traversal()
22
+
23
+ def postorder_traversal(self) -> Iterator[Tree]:
24
+ for child in self.children:
25
+ yield from child.postorder_traversal()
26
+ yield self
27
+
28
+ def get_node(self, id: str) -> Tree:
29
+ for node in self:
30
+ if node.id == id:
31
+ return node
32
+ raise ValueError(f"Node with id {id} not found.")
33
+
34
+ def get_leaves(self) -> list[Tree]:
35
+ return [node for node in self if not node.children]
36
+
37
+ def get_time(self) -> float:
38
+ parent_time = 0 if self.parent is None else self.parent.get_time()
39
+ if self.branch_length is None:
40
+ raise ValueError(f"Branch length of node {self.id} is not set.")
41
+ return self.branch_length + parent_time
42
+
43
+ def copy(self) -> Tree:
44
+ new_tree = Tree(self.id, self.branch_length)
45
+ for child in self.children:
46
+ new_tree.add_child(child.copy())
47
+ return new_tree
48
+
49
+ def __iter__(self) -> Iterator[Tree]:
50
+ return self.preorder_traversal()
51
+
52
+ def __repr__(self) -> str:
53
+ return f"TreeNode(id='{self.id}', branch_length={self.branch_length})"
@@ -0,0 +1,21 @@
1
+ from phylogenie.treesimulator.events import (
2
+ Event,
3
+ get_BD_events,
4
+ get_BDEI_events,
5
+ get_BDSS_events,
6
+ get_canonical_events,
7
+ get_epidemiological_events,
8
+ get_FBD_events,
9
+ )
10
+ from phylogenie.treesimulator.gillespie import simulate_tree
11
+
12
+ __all__ = [
13
+ "Event",
14
+ "get_BD_events",
15
+ "get_BDEI_events",
16
+ "get_BDSS_events",
17
+ "get_canonical_events",
18
+ "get_epidemiological_events",
19
+ "get_FBD_events",
20
+ "simulate_tree",
21
+ ]