phylogenie 2.1.23__py3-none-any.whl → 2.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of phylogenie might be problematic. Click here for more details.
- phylogenie/__init__.py +2 -1
- phylogenie/draw.py +2 -1
- phylogenie/generators/alisim.py +2 -1
- phylogenie/generators/configs.py +2 -6
- phylogenie/generators/dataset.py +1 -1
- phylogenie/generators/factories.py +8 -0
- phylogenie/generators/trees.py +18 -31
- phylogenie/io/__init__.py +5 -0
- phylogenie/io/fasta.py +25 -0
- phylogenie/{io.py → io/newick.py} +12 -33
- phylogenie/io/nexus.py +43 -0
- phylogenie/skyline/matrix.py +11 -7
- phylogenie/skyline/parameter.py +12 -4
- phylogenie/skyline/vector.py +12 -6
- phylogenie/treesimulator/__init__.py +3 -3
- phylogenie/treesimulator/events/__init__.py +1 -7
- phylogenie/treesimulator/events/base.py +26 -0
- phylogenie/treesimulator/events/contact_tracing.py +2 -1
- phylogenie/treesimulator/events/core.py +2 -1
- phylogenie/treesimulator/features.py +1 -1
- phylogenie/treesimulator/gillespie.py +36 -18
- phylogenie/treesimulator/model.py +1 -34
- phylogenie/treesimulator/{events/mutations.py → mutations.py} +43 -44
- phylogenie/typings.py +3 -3
- {phylogenie-2.1.23.dist-info → phylogenie-2.1.25.dist-info}/METADATA +1 -1
- phylogenie-2.1.25.dist-info/RECORD +39 -0
- phylogenie-2.1.23.dist-info/RECORD +0 -35
- {phylogenie-2.1.23.dist-info → phylogenie-2.1.25.dist-info}/LICENSE.txt +0 -0
- {phylogenie-2.1.23.dist-info → phylogenie-2.1.25.dist-info}/WHEEL +0 -0
- {phylogenie-2.1.23.dist-info → phylogenie-2.1.25.dist-info}/entry_points.txt +0 -0
phylogenie/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ from phylogenie.generators import (
|
|
|
11
11
|
FBDTreeDatasetGenerator,
|
|
12
12
|
TreeDatasetGeneratorConfig,
|
|
13
13
|
)
|
|
14
|
-
from phylogenie.io import dump_newick, load_fasta, load_newick
|
|
14
|
+
from phylogenie.io import dump_newick, load_fasta, load_newick, load_nexus
|
|
15
15
|
from phylogenie.msa import MSA
|
|
16
16
|
from phylogenie.skyline import (
|
|
17
17
|
SkylineMatrix,
|
|
@@ -100,6 +100,7 @@ __all__ = [
|
|
|
100
100
|
"generate_trees",
|
|
101
101
|
"simulate_tree",
|
|
102
102
|
"dump_newick",
|
|
103
|
+
"load_nexus",
|
|
103
104
|
"load_fasta",
|
|
104
105
|
"load_newick",
|
|
105
106
|
"MSA",
|
phylogenie/draw.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from itertools import islice
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
import matplotlib.colors as mcolors
|
|
@@ -25,7 +26,7 @@ def _draw_colored_tree(tree: Tree, ax: Axes, colors: Color | dict[Tree, Color])
|
|
|
25
26
|
|
|
26
27
|
xs = (
|
|
27
28
|
get_node_depth_levels(tree)
|
|
28
|
-
if any(node.branch_length is None for node in tree)
|
|
29
|
+
if any(node.branch_length is None for node in islice(tree, 1, None))
|
|
29
30
|
else get_node_depths(tree)
|
|
30
31
|
)
|
|
31
32
|
ys: dict[Tree, float] = {node: i for i, node in enumerate(tree.get_leaves())}
|
phylogenie/generators/alisim.py
CHANGED
|
@@ -65,12 +65,13 @@ class AliSimDatasetGenerator(DatasetGenerator):
|
|
|
65
65
|
while True:
|
|
66
66
|
d.update(data(context, rng))
|
|
67
67
|
try:
|
|
68
|
-
tree = self.trees.simulate_one(d, seed)
|
|
68
|
+
tree, metadata = self.trees.simulate_one(d, seed)
|
|
69
69
|
break
|
|
70
70
|
except TimeoutError:
|
|
71
71
|
print(
|
|
72
72
|
"Tree simulation timed out, retrying with different parameters..."
|
|
73
73
|
)
|
|
74
|
+
d.update(metadata)
|
|
74
75
|
|
|
75
76
|
times = get_node_depths(tree)
|
|
76
77
|
for leaf in tree.get_leaves():
|
phylogenie/generators/configs.py
CHANGED
|
@@ -29,10 +29,6 @@ SkylineVector = str | pgt.Scalar | pgt.Many[SkylineParameter] | SkylineVectorMod
|
|
|
29
29
|
SkylineMatrix = str | pgt.Scalar | pgt.Many[SkylineVector] | SkylineMatrixModel | None
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class
|
|
33
|
-
|
|
34
|
-
rate: SkylineParameter
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class Mutation(Event):
|
|
32
|
+
class Mutation(StrictBaseModel):
|
|
33
|
+
rate: Scalar
|
|
38
34
|
rate_scalers: dict[MutationTargetType, Distribution]
|
phylogenie/generators/dataset.py
CHANGED
|
@@ -56,7 +56,7 @@ class DatasetGenerator(ABC, StrictBaseModel):
|
|
|
56
56
|
for i in range(n_samples)
|
|
57
57
|
)
|
|
58
58
|
df = pd.DataFrame(
|
|
59
|
-
[
|
|
59
|
+
[j for j in tqdm(jobs, f"Generating {data_dir}...", n_samples)]
|
|
60
60
|
)
|
|
61
61
|
df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
|
|
62
62
|
|
|
@@ -17,6 +17,7 @@ from phylogenie.skyline import (
|
|
|
17
17
|
SkylineVector,
|
|
18
18
|
SkylineVectorCoercible,
|
|
19
19
|
)
|
|
20
|
+
from phylogenie.treesimulator import Mutation
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def _eval_expression(expression: str, data: dict[str, Any]) -> Any:
|
|
@@ -221,6 +222,13 @@ def distribution(x: Distribution, data: dict[str, Any]) -> Distribution:
|
|
|
221
222
|
return Distribution(type=x.type, **args)
|
|
222
223
|
|
|
223
224
|
|
|
225
|
+
def mutation(x: cfg.Mutation, data: dict[str, Any]) -> Mutation:
|
|
226
|
+
return Mutation(
|
|
227
|
+
scalar(x.rate, data),
|
|
228
|
+
{k: distribution(v, data) for k, v in x.rate_scalers.items()},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
224
232
|
def data(context: dict[str, Distribution] | None, rng: Generator) -> dict[str, Any]:
|
|
225
233
|
if context is None:
|
|
226
234
|
return {}
|
phylogenie/generators/trees.py
CHANGED
|
@@ -11,8 +11,8 @@ import phylogenie.generators.configs as cfg
|
|
|
11
11
|
from phylogenie.generators.dataset import DatasetGenerator, DataType
|
|
12
12
|
from phylogenie.generators.factories import (
|
|
13
13
|
data,
|
|
14
|
-
distribution,
|
|
15
14
|
integer,
|
|
15
|
+
mutation,
|
|
16
16
|
scalar,
|
|
17
17
|
skyline_matrix,
|
|
18
18
|
skyline_parameter,
|
|
@@ -24,7 +24,6 @@ from phylogenie.tree import Tree
|
|
|
24
24
|
from phylogenie.treesimulator import (
|
|
25
25
|
Event,
|
|
26
26
|
Feature,
|
|
27
|
-
Mutation,
|
|
28
27
|
get_BD_events,
|
|
29
28
|
get_BDEI_events,
|
|
30
29
|
get_BDSS_events,
|
|
@@ -48,6 +47,7 @@ class ParameterizationType(str, Enum):
|
|
|
48
47
|
|
|
49
48
|
class TreeDatasetGenerator(DatasetGenerator):
|
|
50
49
|
data_type: Literal[DataType.TREES] = DataType.TREES
|
|
50
|
+
mutations: list[cfg.Mutation] | None = None
|
|
51
51
|
min_tips: cfg.Integer = 1
|
|
52
52
|
max_tips: cfg.Integer | None = None
|
|
53
53
|
max_time: cfg.Scalar = np.inf
|
|
@@ -59,14 +59,22 @@ class TreeDatasetGenerator(DatasetGenerator):
|
|
|
59
59
|
@abstractmethod
|
|
60
60
|
def _get_events(self, data: dict[str, Any]) -> list[Event]: ...
|
|
61
61
|
|
|
62
|
-
def simulate_one(
|
|
62
|
+
def simulate_one(
|
|
63
|
+
self, data: dict[str, Any], seed: int | None = None
|
|
64
|
+
) -> tuple[Tree, dict[str, Any]]:
|
|
63
65
|
init_state = (
|
|
64
66
|
self.init_state
|
|
65
67
|
if self.init_state is None
|
|
66
68
|
else self.init_state.format(**data)
|
|
67
69
|
)
|
|
70
|
+
mutations = (
|
|
71
|
+
None
|
|
72
|
+
if self.mutations is None
|
|
73
|
+
else [mutation(m, data) for m in self.mutations]
|
|
74
|
+
)
|
|
68
75
|
return simulate_tree(
|
|
69
76
|
events=self._get_events(data),
|
|
77
|
+
mutations=mutations,
|
|
70
78
|
min_tips=integer(self.min_tips, data),
|
|
71
79
|
max_tips=None if self.max_tips is None else integer(self.max_tips, data),
|
|
72
80
|
max_time=scalar(self.max_time, data),
|
|
@@ -89,14 +97,14 @@ class TreeDatasetGenerator(DatasetGenerator):
|
|
|
89
97
|
while True:
|
|
90
98
|
try:
|
|
91
99
|
d.update(data(context, rng))
|
|
92
|
-
tree = self.simulate_one(d, seed)
|
|
100
|
+
tree, metadata = self.simulate_one(d, seed)
|
|
93
101
|
if self.node_features is not None:
|
|
94
102
|
set_features(tree, self.node_features)
|
|
95
103
|
dump_newick(tree, f"{filename}.nwk")
|
|
96
104
|
break
|
|
97
105
|
except TimeoutError:
|
|
98
106
|
print("Simulation timed out, retrying with different parameters...")
|
|
99
|
-
return d
|
|
107
|
+
return d | metadata
|
|
100
108
|
|
|
101
109
|
|
|
102
110
|
class CanonicalTreeDatasetGenerator(TreeDatasetGenerator):
|
|
@@ -147,12 +155,11 @@ class FBDTreeDatasetGenerator(TreeDatasetGenerator):
|
|
|
147
155
|
)
|
|
148
156
|
|
|
149
157
|
|
|
150
|
-
class
|
|
158
|
+
class ContactTracingTreeDatasetGenerator(TreeDatasetGenerator):
|
|
151
159
|
max_notified_contacts: cfg.Integer = 1
|
|
152
160
|
notification_probability: cfg.SkylineParameter = 0.0
|
|
153
161
|
sampling_rate_after_notification: cfg.SkylineParameter = np.inf
|
|
154
162
|
samplable_states_after_notification: list[str] | None = None
|
|
155
|
-
mutations: tuple[cfg.Mutation, ...] = Field(default_factory=tuple)
|
|
156
163
|
|
|
157
164
|
@abstractmethod
|
|
158
165
|
def _get_base_events(self, data: dict[str, Any]) -> list[Event]: ...
|
|
@@ -171,30 +178,10 @@ class TreeDatasetGeneratorForEpidemiology(TreeDatasetGenerator):
|
|
|
171
178
|
),
|
|
172
179
|
samplable_states_after_notification=self.samplable_states_after_notification,
|
|
173
180
|
)
|
|
174
|
-
all_states = list({e.state for e in events})
|
|
175
|
-
for mutation in self.mutations:
|
|
176
|
-
states = mutation.states
|
|
177
|
-
if isinstance(states, str):
|
|
178
|
-
states = [states]
|
|
179
|
-
elif states is None:
|
|
180
|
-
states = all_states
|
|
181
|
-
for state in states:
|
|
182
|
-
if state not in all_states:
|
|
183
|
-
raise ValueError(
|
|
184
|
-
f"Mutation state '{state}' is not found in states {all_states}."
|
|
185
|
-
)
|
|
186
|
-
rate_scalers = {
|
|
187
|
-
t: distribution(r, data) for t, r in mutation.rate_scalers.items()
|
|
188
|
-
}
|
|
189
|
-
events.append(
|
|
190
|
-
Mutation(
|
|
191
|
-
state, skyline_parameter(mutation.rate, data), rate_scalers
|
|
192
|
-
)
|
|
193
|
-
)
|
|
194
181
|
return events
|
|
195
182
|
|
|
196
183
|
|
|
197
|
-
class EpidemiologicalTreeDatasetGenerator(
|
|
184
|
+
class EpidemiologicalTreeDatasetGenerator(ContactTracingTreeDatasetGenerator):
|
|
198
185
|
parameterization: Literal[ParameterizationType.EPIDEMIOLOGICAL] = (
|
|
199
186
|
ParameterizationType.EPIDEMIOLOGICAL
|
|
200
187
|
)
|
|
@@ -220,7 +207,7 @@ class EpidemiologicalTreeDatasetGenerator(TreeDatasetGeneratorForEpidemiology):
|
|
|
220
207
|
)
|
|
221
208
|
|
|
222
209
|
|
|
223
|
-
class BDTreeDatasetGenerator(
|
|
210
|
+
class BDTreeDatasetGenerator(ContactTracingTreeDatasetGenerator):
|
|
224
211
|
parameterization: Literal[ParameterizationType.BD] = ParameterizationType.BD
|
|
225
212
|
reproduction_number: cfg.SkylineParameter
|
|
226
213
|
infectious_period: cfg.SkylineParameter
|
|
@@ -234,7 +221,7 @@ class BDTreeDatasetGenerator(TreeDatasetGeneratorForEpidemiology):
|
|
|
234
221
|
)
|
|
235
222
|
|
|
236
223
|
|
|
237
|
-
class BDEITreeDatasetGenerator(
|
|
224
|
+
class BDEITreeDatasetGenerator(ContactTracingTreeDatasetGenerator):
|
|
238
225
|
parameterization: Literal[ParameterizationType.BDEI] = ParameterizationType.BDEI
|
|
239
226
|
reproduction_number: cfg.SkylineParameter
|
|
240
227
|
infectious_period: cfg.SkylineParameter
|
|
@@ -250,7 +237,7 @@ class BDEITreeDatasetGenerator(TreeDatasetGeneratorForEpidemiology):
|
|
|
250
237
|
)
|
|
251
238
|
|
|
252
239
|
|
|
253
|
-
class BDSSTreeDatasetGenerator(
|
|
240
|
+
class BDSSTreeDatasetGenerator(ContactTracingTreeDatasetGenerator):
|
|
254
241
|
parameterization: Literal[ParameterizationType.BDSS] = ParameterizationType.BDSS
|
|
255
242
|
reproduction_number: cfg.SkylineParameter
|
|
256
243
|
infectious_period: cfg.SkylineParameter
|
phylogenie/io/fasta.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
from phylogenie.msa import MSA, Sequence
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_fasta(
|
|
7
|
+
fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
|
|
8
|
+
) -> MSA:
|
|
9
|
+
sequences: list[Sequence] = []
|
|
10
|
+
with open(fasta_file, "r") as f:
|
|
11
|
+
for line in f:
|
|
12
|
+
if not line.startswith(">"):
|
|
13
|
+
raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
|
|
14
|
+
id = line[1:].strip()
|
|
15
|
+
time = None
|
|
16
|
+
if extract_time_from_id is not None:
|
|
17
|
+
time = extract_time_from_id(id)
|
|
18
|
+
elif "|" in id:
|
|
19
|
+
try:
|
|
20
|
+
time = float(id.split("|")[-1])
|
|
21
|
+
except ValueError:
|
|
22
|
+
pass
|
|
23
|
+
chars = next(f).strip()
|
|
24
|
+
sequences.append(Sequence(id, chars, time))
|
|
25
|
+
return MSA(sequences)
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Callable
|
|
3
2
|
|
|
4
|
-
from phylogenie.msa import MSA, Sequence
|
|
5
3
|
from phylogenie.tree import Tree
|
|
6
4
|
|
|
7
5
|
|
|
8
|
-
def
|
|
6
|
+
def parse_newick(newick: str, translations: dict[str, str] | None = None) -> Tree:
|
|
9
7
|
newick = newick.strip()
|
|
8
|
+
newick = re.sub(r"^\[\&[^\]]*\]", "", newick).strip()
|
|
9
|
+
|
|
10
10
|
stack: list[list[Tree]] = []
|
|
11
11
|
current_children: list[Tree] = []
|
|
12
12
|
current_nodes: list[Tree] = []
|
|
13
13
|
i = 0
|
|
14
|
-
while
|
|
14
|
+
while True:
|
|
15
15
|
|
|
16
16
|
def _read_chars(stoppers: list[str]) -> str:
|
|
17
17
|
nonlocal i
|
|
@@ -29,7 +29,10 @@ def _parse_newick(newick: str) -> Tree:
|
|
|
29
29
|
i += 1
|
|
30
30
|
continue
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
name = _read_chars([":", "[", ",", ")", ";"])
|
|
33
|
+
if translations is not None and name in translations:
|
|
34
|
+
name = translations[name]
|
|
35
|
+
current_node = Tree(name)
|
|
33
36
|
|
|
34
37
|
if newick[i] == "[":
|
|
35
38
|
i += 1
|
|
@@ -62,17 +65,15 @@ def _parse_newick(newick: str) -> Tree:
|
|
|
62
65
|
|
|
63
66
|
i += 1
|
|
64
67
|
|
|
65
|
-
raise ValueError("Newick string should end with ';'")
|
|
66
|
-
|
|
67
68
|
|
|
68
69
|
def load_newick(filepath: str) -> Tree | list[Tree]:
|
|
69
70
|
with open(filepath, "r") as file:
|
|
70
|
-
trees = [
|
|
71
|
+
trees = [parse_newick(newick) for newick in file]
|
|
71
72
|
return trees[0] if len(trees) == 1 else trees
|
|
72
73
|
|
|
73
74
|
|
|
74
|
-
def
|
|
75
|
-
children_newick = ",".join([
|
|
75
|
+
def to_newick(tree: Tree) -> str:
|
|
76
|
+
children_newick = ",".join([to_newick(child) for child in tree.children])
|
|
76
77
|
newick = tree.name
|
|
77
78
|
if tree.features:
|
|
78
79
|
reprs = {k: repr(v).replace("'", '"') for k, v in tree.features.items()}
|
|
@@ -99,26 +100,4 @@ def dump_newick(trees: Tree | list[Tree], filepath: str) -> None:
|
|
|
99
100
|
trees = [trees]
|
|
100
101
|
with open(filepath, "w") as file:
|
|
101
102
|
for t in trees:
|
|
102
|
-
file.write(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def load_fasta(
|
|
106
|
-
fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
|
|
107
|
-
) -> MSA:
|
|
108
|
-
sequences: list[Sequence] = []
|
|
109
|
-
with open(fasta_file, "r") as f:
|
|
110
|
-
for line in f:
|
|
111
|
-
if not line.startswith(">"):
|
|
112
|
-
raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
|
|
113
|
-
id = line[1:].strip()
|
|
114
|
-
time = None
|
|
115
|
-
if extract_time_from_id is not None:
|
|
116
|
-
time = extract_time_from_id(id)
|
|
117
|
-
elif "|" in id:
|
|
118
|
-
try:
|
|
119
|
-
time = float(id.split("|")[-1])
|
|
120
|
-
except ValueError:
|
|
121
|
-
pass
|
|
122
|
-
chars = next(f).strip()
|
|
123
|
-
sequences.append(Sequence(id, chars, time))
|
|
124
|
-
return MSA(sequences)
|
|
103
|
+
file.write(to_newick(t) + ";\n")
|
phylogenie/io/nexus.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
|
|
4
|
+
from phylogenie.io.newick import parse_newick
|
|
5
|
+
from phylogenie.tree import Tree
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _parse_translate_block(lines: Iterator[str]) -> dict[str, str]:
|
|
9
|
+
translations: dict[str, str] = {}
|
|
10
|
+
for line in lines:
|
|
11
|
+
match = re.match(r"\s*(\d+)\s+['\"]?([^'\",;]+)['\"]?", line)
|
|
12
|
+
if match is None:
|
|
13
|
+
if ";" in line:
|
|
14
|
+
return translations
|
|
15
|
+
else:
|
|
16
|
+
raise ValueError(f"Invalid translate line: {line.strip()}")
|
|
17
|
+
translations[match.group(1)] = match.group(2)
|
|
18
|
+
raise ValueError("Translate block not terminated with ';'")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _parse_trees_block(lines: Iterator[str]) -> dict[str, Tree]:
|
|
22
|
+
trees: dict[str, Tree] = {}
|
|
23
|
+
translations = {}
|
|
24
|
+
for line in lines:
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if line.upper() == "TRANSLATE":
|
|
27
|
+
translations = _parse_translate_block(lines)
|
|
28
|
+
elif line.upper() == "END;":
|
|
29
|
+
return trees
|
|
30
|
+
else:
|
|
31
|
+
match = re.match(r"^TREE\s*\*?\s+(\S+)\s*=\s*(.+)$", line, re.IGNORECASE)
|
|
32
|
+
if match is None:
|
|
33
|
+
raise ValueError(f"Invalid tree line. Expected 'TREE name = newick'")
|
|
34
|
+
trees[match.group(1)] = parse_newick(match.group(2), translations)
|
|
35
|
+
return trees
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_nexus(nexus_file: str) -> dict[str, Tree]:
|
|
39
|
+
with open(nexus_file, "r") as f:
|
|
40
|
+
for line in f:
|
|
41
|
+
if line.strip().upper() == "BEGIN TREES;":
|
|
42
|
+
return _parse_trees_block(f)
|
|
43
|
+
raise ValueError("No TREES block found in the NEXUS file.")
|
phylogenie/skyline/matrix.py
CHANGED
|
@@ -33,7 +33,7 @@ class SkylineMatrix:
|
|
|
33
33
|
):
|
|
34
34
|
if params is not None and value is None and change_times is None:
|
|
35
35
|
if is_many_skyline_vectors_like(params):
|
|
36
|
-
self.
|
|
36
|
+
self._params = [
|
|
37
37
|
p if isinstance(p, SkylineVector) else SkylineVector(p)
|
|
38
38
|
for p in params
|
|
39
39
|
]
|
|
@@ -41,7 +41,7 @@ class SkylineMatrix:
|
|
|
41
41
|
raise TypeError(
|
|
42
42
|
f"It is impossible to create a SkylineMatrix from `params` {params} of type {type(params)}. Please provide a sequence composed of SkylineVectorLike objects (a SkylineVectorLike object can either be a SkylineVector or a sequence of scalars and/or SkylineParameters)."
|
|
43
43
|
)
|
|
44
|
-
lengths = {len(p) for p in self.
|
|
44
|
+
lengths = {len(p) for p in self._params}
|
|
45
45
|
if len(lengths) > 1:
|
|
46
46
|
raise ValueError(
|
|
47
47
|
f"All `params` must have the same length to create a SkylineMatrix (got params={params} with lengths {lengths})."
|
|
@@ -57,7 +57,7 @@ class SkylineMatrix:
|
|
|
57
57
|
raise TypeError(
|
|
58
58
|
f"It is impossible to create a SkylineMatrix from `value` {value} of type {type(value)}. Please provide a nested (3D) sequence of scalar values."
|
|
59
59
|
)
|
|
60
|
-
self.
|
|
60
|
+
self._params = [
|
|
61
61
|
SkylineVector(
|
|
62
62
|
value=[matrix[i] for matrix in value], change_times=change_times
|
|
63
63
|
)
|
|
@@ -68,6 +68,10 @@ class SkylineMatrix:
|
|
|
68
68
|
"Either `params` or both `value` and `change_times` must be provided to create a SkylineMatrix."
|
|
69
69
|
)
|
|
70
70
|
|
|
71
|
+
@property
|
|
72
|
+
def params(self) -> tuple[SkylineVector, ...]:
|
|
73
|
+
return tuple(self._params)
|
|
74
|
+
|
|
71
75
|
@property
|
|
72
76
|
def n_rows(self) -> int:
|
|
73
77
|
return len(self.params)
|
|
@@ -82,14 +86,14 @@ class SkylineMatrix:
|
|
|
82
86
|
|
|
83
87
|
@property
|
|
84
88
|
def change_times(self) -> pgt.Vector1D:
|
|
85
|
-
return sorted(set([t for row in self.params for t in row.change_times]))
|
|
89
|
+
return tuple(sorted(set([t for row in self.params for t in row.change_times])))
|
|
86
90
|
|
|
87
91
|
@property
|
|
88
92
|
def value(self) -> pgt.Vector3D:
|
|
89
|
-
return
|
|
93
|
+
return tuple(self.get_value_at_time(t) for t in (0, *self.change_times))
|
|
90
94
|
|
|
91
95
|
def get_value_at_time(self, time: pgt.Scalar) -> pgt.Vector2D:
|
|
92
|
-
return
|
|
96
|
+
return tuple(param.get_value_at_time(time) for param in self.params)
|
|
93
97
|
|
|
94
98
|
def _operate(
|
|
95
99
|
self,
|
|
@@ -185,7 +189,7 @@ class SkylineMatrix:
|
|
|
185
189
|
raise TypeError(
|
|
186
190
|
f"It is impossible to set item of SkylineMatrix to value {value} of type {type(value)}. Please provide a SkylineVectorLike object (i.e., a SkylineVector or a sequence of scalars and/or SkylineParameters)."
|
|
187
191
|
)
|
|
188
|
-
self.
|
|
192
|
+
self._params[item] = skyline_vector(value, self.n_cols)
|
|
189
193
|
|
|
190
194
|
|
|
191
195
|
def skyline_matrix(
|
phylogenie/skyline/parameter.py
CHANGED
|
@@ -52,12 +52,20 @@ class SkylineParameter:
|
|
|
52
52
|
f"`change_times` must be non-negative (got change_times={change_times})."
|
|
53
53
|
)
|
|
54
54
|
|
|
55
|
-
self.
|
|
56
|
-
self.
|
|
55
|
+
self._value = [value[0]]
|
|
56
|
+
self._change_times: list[pgt.Scalar] = []
|
|
57
57
|
for i in range(1, len(value)):
|
|
58
58
|
if value[i] != value[i - 1]:
|
|
59
|
-
self.
|
|
60
|
-
self.
|
|
59
|
+
self._value.append(value[i])
|
|
60
|
+
self._change_times.append(change_times[i - 1])
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def value(self) -> pgt.Vector1D:
|
|
64
|
+
return tuple(self._value)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def change_times(self) -> pgt.Vector1D:
|
|
68
|
+
return tuple(self._change_times)
|
|
61
69
|
|
|
62
70
|
def get_value_at_time(self, t: pgt.Scalar) -> pgt.Scalar:
|
|
63
71
|
if t < 0:
|
phylogenie/skyline/vector.py
CHANGED
|
@@ -47,7 +47,7 @@ class SkylineVector:
|
|
|
47
47
|
):
|
|
48
48
|
if params is not None and value is None and change_times is None:
|
|
49
49
|
if is_many_skyline_parameters_like(params):
|
|
50
|
-
self.
|
|
50
|
+
self._params = [skyline_parameter(param) for param in params]
|
|
51
51
|
else:
|
|
52
52
|
raise TypeError(
|
|
53
53
|
f"It is impossible to create a SkylineVector from `params` {params} of type {type(params)}. Please provide a sequence of SkylineParameterLike objects (a SkylineParameterLike object can either be a SkylineParameter or a scalar)."
|
|
@@ -63,7 +63,7 @@ class SkylineVector:
|
|
|
63
63
|
raise TypeError(
|
|
64
64
|
f"It is impossible to create a SkylineVector from `value` {value} of type {type(value)}. Please provide a nested (2D) sequence of scalar values."
|
|
65
65
|
)
|
|
66
|
-
self.
|
|
66
|
+
self._params = [
|
|
67
67
|
SkylineParameter([vector[i] for vector in value], change_times)
|
|
68
68
|
for i in range(len(value[0]))
|
|
69
69
|
]
|
|
@@ -72,20 +72,26 @@ class SkylineVector:
|
|
|
72
72
|
"Either `params` or both `value` and `change_times` must be provided to create a SkylineVector."
|
|
73
73
|
)
|
|
74
74
|
|
|
75
|
+
@property
|
|
76
|
+
def params(self) -> tuple[SkylineParameter, ...]:
|
|
77
|
+
return tuple(self._params)
|
|
78
|
+
|
|
75
79
|
@property
|
|
76
80
|
def change_times(self) -> pgt.Vector1D:
|
|
77
|
-
return
|
|
81
|
+
return tuple(
|
|
82
|
+
sorted(set(t for param in self.params for t in param.change_times))
|
|
83
|
+
)
|
|
78
84
|
|
|
79
85
|
@property
|
|
80
86
|
def value(self) -> pgt.Vector2D:
|
|
81
|
-
return
|
|
87
|
+
return tuple(self.get_value_at_time(t) for t in (0, *self.change_times))
|
|
82
88
|
|
|
83
89
|
@property
|
|
84
90
|
def N(self) -> int:
|
|
85
91
|
return len(self.params)
|
|
86
92
|
|
|
87
93
|
def get_value_at_time(self, t: pgt.Scalar) -> pgt.Vector1D:
|
|
88
|
-
return
|
|
94
|
+
return tuple(param.get_value_at_time(t) for param in self.params)
|
|
89
95
|
|
|
90
96
|
def _operate(
|
|
91
97
|
self,
|
|
@@ -154,7 +160,7 @@ class SkylineVector:
|
|
|
154
160
|
raise TypeError(
|
|
155
161
|
f"It is impossible to set item {item} of SkylineVector with value {value} of type {type(value)}. Please provide a SkylineParameterLike object (i.e., a scalar or a SkylineParameter)."
|
|
156
162
|
)
|
|
157
|
-
self.
|
|
163
|
+
self._params[item] = skyline_parameter(value)
|
|
158
164
|
|
|
159
165
|
|
|
160
166
|
def skyline_vector(x: SkylineVectorCoercible, N: int) -> SkylineVector:
|
|
@@ -4,8 +4,6 @@ from phylogenie.treesimulator.events import (
|
|
|
4
4
|
Death,
|
|
5
5
|
Event,
|
|
6
6
|
Migration,
|
|
7
|
-
Mutation,
|
|
8
|
-
MutationTargetType,
|
|
9
7
|
Sampling,
|
|
10
8
|
SamplingWithContactTracing,
|
|
11
9
|
get_BD_events,
|
|
@@ -15,11 +13,13 @@ from phylogenie.treesimulator.events import (
|
|
|
15
13
|
get_contact_tracing_events,
|
|
16
14
|
get_epidemiological_events,
|
|
17
15
|
get_FBD_events,
|
|
18
|
-
get_mutation_id,
|
|
19
16
|
)
|
|
20
17
|
from phylogenie.treesimulator.features import Feature, set_features
|
|
21
18
|
from phylogenie.treesimulator.gillespie import generate_trees, simulate_tree
|
|
22
19
|
from phylogenie.treesimulator.model import get_node_state
|
|
20
|
+
from phylogenie.treesimulator.mutations import Mutation
|
|
21
|
+
from phylogenie.treesimulator.mutations import TargetType as MutationTargetType
|
|
22
|
+
from phylogenie.treesimulator.mutations import get_mutation_id
|
|
23
23
|
|
|
24
24
|
__all__ = [
|
|
25
25
|
"Birth",
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from phylogenie.treesimulator.events.base import Event
|
|
1
2
|
from phylogenie.treesimulator.events.contact_tracing import (
|
|
2
3
|
BirthWithContactTracing,
|
|
3
4
|
SamplingWithContactTracing,
|
|
@@ -6,7 +7,6 @@ from phylogenie.treesimulator.events.contact_tracing import (
|
|
|
6
7
|
from phylogenie.treesimulator.events.core import (
|
|
7
8
|
Birth,
|
|
8
9
|
Death,
|
|
9
|
-
Event,
|
|
10
10
|
Migration,
|
|
11
11
|
Sampling,
|
|
12
12
|
get_BD_events,
|
|
@@ -16,9 +16,6 @@ from phylogenie.treesimulator.events.core import (
|
|
|
16
16
|
get_epidemiological_events,
|
|
17
17
|
get_FBD_events,
|
|
18
18
|
)
|
|
19
|
-
from phylogenie.treesimulator.events.mutations import Mutation
|
|
20
|
-
from phylogenie.treesimulator.events.mutations import TargetType as MutationTargetType
|
|
21
|
-
from phylogenie.treesimulator.events.mutations import get_mutation_id
|
|
22
19
|
|
|
23
20
|
__all__ = [
|
|
24
21
|
"Birth",
|
|
@@ -26,10 +23,8 @@ __all__ = [
|
|
|
26
23
|
"Death",
|
|
27
24
|
"Event",
|
|
28
25
|
"Migration",
|
|
29
|
-
"Mutation",
|
|
30
26
|
"Sampling",
|
|
31
27
|
"SamplingWithContactTracing",
|
|
32
|
-
"MutationTargetType",
|
|
33
28
|
"get_BD_events",
|
|
34
29
|
"get_BDEI_events",
|
|
35
30
|
"get_BDSS_events",
|
|
@@ -37,5 +32,4 @@ __all__ = [
|
|
|
37
32
|
"get_contact_tracing_events",
|
|
38
33
|
"get_epidemiological_events",
|
|
39
34
|
"get_FBD_events",
|
|
40
|
-
"get_mutation_id",
|
|
41
35
|
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from numpy.random import Generator
|
|
5
|
+
|
|
6
|
+
from phylogenie.skyline import SkylineParameterLike, skyline_parameter
|
|
7
|
+
from phylogenie.treesimulator.model import Model
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Event(ABC):
|
|
11
|
+
def __init__(self, state: str, rate: SkylineParameterLike):
|
|
12
|
+
self.state = state
|
|
13
|
+
self.rate = skyline_parameter(rate)
|
|
14
|
+
|
|
15
|
+
def draw_individual(self, model: Model, rng: Generator) -> int:
|
|
16
|
+
return rng.choice(model.get_population(self.state))
|
|
17
|
+
|
|
18
|
+
def get_propensity(self, model: Model, time: float) -> float:
|
|
19
|
+
n_individuals = model.count_individuals(self.state)
|
|
20
|
+
rate = self.rate.get_value_at_time(time)
|
|
21
|
+
if rate == np.inf and not n_individuals:
|
|
22
|
+
return 0
|
|
23
|
+
return rate * n_individuals
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def apply(self, model: Model, time: float, rng: Generator) -> None: ...
|
|
@@ -5,8 +5,9 @@ import numpy as np
|
|
|
5
5
|
from numpy.random import Generator
|
|
6
6
|
|
|
7
7
|
from phylogenie.skyline import SkylineParameterLike, skyline_parameter
|
|
8
|
+
from phylogenie.treesimulator.events.base import Event
|
|
8
9
|
from phylogenie.treesimulator.events.core import Birth, Death, Migration, Sampling
|
|
9
|
-
from phylogenie.treesimulator.model import
|
|
10
|
+
from phylogenie.treesimulator.model import Model
|
|
10
11
|
|
|
11
12
|
CT_POSTFIX = "-CT"
|
|
12
13
|
CONTACTS_KEY = "CONTACTS"
|
|
@@ -7,7 +7,8 @@ from phylogenie.skyline import (
|
|
|
7
7
|
skyline_matrix,
|
|
8
8
|
skyline_vector,
|
|
9
9
|
)
|
|
10
|
-
from phylogenie.treesimulator.
|
|
10
|
+
from phylogenie.treesimulator.events.base import Event
|
|
11
|
+
from phylogenie.treesimulator.model import Model
|
|
11
12
|
|
|
12
13
|
INFECTIOUS_STATE = "I"
|
|
13
14
|
EXPOSED_STATE = "E"
|
|
@@ -2,8 +2,8 @@ from collections.abc import Iterable
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
|
|
4
4
|
from phylogenie.tree import Tree
|
|
5
|
-
from phylogenie.treesimulator.events import get_mutation_id
|
|
6
5
|
from phylogenie.treesimulator.model import get_node_state
|
|
6
|
+
from phylogenie.treesimulator.mutations import get_mutation_id
|
|
7
7
|
from phylogenie.utils import (
|
|
8
8
|
get_node_depth_levels,
|
|
9
9
|
get_node_depths,
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import time
|
|
3
3
|
from collections.abc import Iterable, Sequence
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import joblib
|
|
6
7
|
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
7
9
|
from numpy.random import default_rng
|
|
8
10
|
from tqdm import tqdm
|
|
9
11
|
|
|
10
12
|
from phylogenie.io import dump_newick
|
|
11
13
|
from phylogenie.tree import Tree
|
|
14
|
+
from phylogenie.treesimulator.events import Event
|
|
12
15
|
from phylogenie.treesimulator.features import Feature, set_features
|
|
13
|
-
from phylogenie.treesimulator.model import
|
|
16
|
+
from phylogenie.treesimulator.model import Model
|
|
17
|
+
from phylogenie.treesimulator.mutations import Mutation
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
def simulate_tree(
|
|
17
21
|
events: Sequence[Event],
|
|
22
|
+
mutations: Sequence[Mutation] | None = None,
|
|
18
23
|
min_tips: int = 1,
|
|
19
24
|
max_tips: int | None = None,
|
|
20
25
|
max_time: float = np.inf,
|
|
@@ -22,10 +27,7 @@ def simulate_tree(
|
|
|
22
27
|
sampling_probability_at_present: float = 0.0,
|
|
23
28
|
seed: int | None = None,
|
|
24
29
|
timeout: float = np.inf,
|
|
25
|
-
) -> Tree:
|
|
26
|
-
if max_time == np.inf and max_tips is None:
|
|
27
|
-
raise ValueError("Either max_time or max_tips must be specified.")
|
|
28
|
-
|
|
30
|
+
) -> tuple[Tree, dict[str, Any]]:
|
|
29
31
|
if max_time == np.inf and sampling_probability_at_present:
|
|
30
32
|
raise ValueError(
|
|
31
33
|
"sampling_probability_at_present cannot be set when max_time is infinite."
|
|
@@ -41,15 +43,22 @@ def simulate_tree(
|
|
|
41
43
|
elif init_state not in states:
|
|
42
44
|
raise ValueError(f"Init state {init_state} not found in event states: {states}")
|
|
43
45
|
|
|
46
|
+
if mutations is None:
|
|
47
|
+
mutations = []
|
|
48
|
+
|
|
44
49
|
rng = default_rng(seed)
|
|
45
50
|
start_clock = time.perf_counter()
|
|
46
51
|
while True:
|
|
47
|
-
model = Model(init_state
|
|
52
|
+
model = Model(init_state)
|
|
53
|
+
metadata: dict[str, Any] = {}
|
|
54
|
+
run_events = list(events)
|
|
48
55
|
current_time = 0.0
|
|
49
56
|
change_times = sorted(set(t for e in events for t in e.rate.change_times))
|
|
50
57
|
next_change_time = change_times.pop(0) if change_times else np.inf
|
|
58
|
+
|
|
51
59
|
if max_time == np.inf:
|
|
52
|
-
|
|
60
|
+
if max_tips is None:
|
|
61
|
+
raise ValueError("Either max_time or max_tips must be specified.")
|
|
53
62
|
target_n_tips = rng.integers(min_tips, max_tips + 1)
|
|
54
63
|
else:
|
|
55
64
|
target_n_tips = None
|
|
@@ -58,10 +67,9 @@ def simulate_tree(
|
|
|
58
67
|
if time.perf_counter() - start_clock > timeout:
|
|
59
68
|
raise TimeoutError("Simulation timed out.")
|
|
60
69
|
|
|
61
|
-
|
|
62
|
-
rates = [e.get_propensity(model, current_time) for e in events]
|
|
70
|
+
rates = [e.get_propensity(model, current_time) for e in run_events]
|
|
63
71
|
|
|
64
|
-
instantaneous_events = [e for e, r in zip(
|
|
72
|
+
instantaneous_events = [e for e, r in zip(run_events, rates) if r == np.inf]
|
|
65
73
|
if instantaneous_events:
|
|
66
74
|
event = instantaneous_events[rng.integers(len(instantaneous_events))]
|
|
67
75
|
event.apply(model, current_time, rng)
|
|
@@ -76,6 +84,7 @@ def simulate_tree(
|
|
|
76
84
|
):
|
|
77
85
|
break
|
|
78
86
|
|
|
87
|
+
rates.extend(m.rate for m in mutations)
|
|
79
88
|
time_step = rng.exponential(1 / sum(rates))
|
|
80
89
|
if current_time + time_step >= next_change_time:
|
|
81
90
|
current_time = next_change_time
|
|
@@ -86,8 +95,13 @@ def simulate_tree(
|
|
|
86
95
|
break
|
|
87
96
|
current_time += time_step
|
|
88
97
|
|
|
89
|
-
|
|
90
|
-
|
|
98
|
+
targets = run_events + list(mutations)
|
|
99
|
+
target_idx = np.searchsorted(np.cumsum(rates) / sum(rates), rng.random())
|
|
100
|
+
target = targets[int(target_idx)]
|
|
101
|
+
if isinstance(target, Event):
|
|
102
|
+
target.apply(model, current_time, rng)
|
|
103
|
+
else:
|
|
104
|
+
metadata.update(target.apply(model, run_events, current_time, rng))
|
|
91
105
|
|
|
92
106
|
for individual in model.get_population():
|
|
93
107
|
if rng.random() < sampling_probability_at_present:
|
|
@@ -96,7 +110,7 @@ def simulate_tree(
|
|
|
96
110
|
if min_tips <= model.n_sampled and (
|
|
97
111
|
max_tips is None or model.n_sampled <= max_tips
|
|
98
112
|
):
|
|
99
|
-
return model.get_sampled_tree()
|
|
113
|
+
return (model.get_sampled_tree(), metadata)
|
|
100
114
|
|
|
101
115
|
|
|
102
116
|
def generate_trees(
|
|
@@ -112,11 +126,11 @@ def generate_trees(
|
|
|
112
126
|
seed: int | None = None,
|
|
113
127
|
n_jobs: int = -1,
|
|
114
128
|
timeout: float = np.inf,
|
|
115
|
-
) ->
|
|
116
|
-
def _simulate_tree(seed: int) -> Tree:
|
|
129
|
+
) -> pd.DataFrame:
|
|
130
|
+
def _simulate_tree(seed: int) -> tuple[Tree, dict[str, Any]]:
|
|
117
131
|
while True:
|
|
118
132
|
try:
|
|
119
|
-
tree = simulate_tree(
|
|
133
|
+
tree, metadata = simulate_tree(
|
|
120
134
|
events=events,
|
|
121
135
|
min_tips=min_tips,
|
|
122
136
|
max_tips=max_tips,
|
|
@@ -128,7 +142,7 @@ def generate_trees(
|
|
|
128
142
|
)
|
|
129
143
|
if node_features is not None:
|
|
130
144
|
set_features(tree, node_features)
|
|
131
|
-
return tree
|
|
145
|
+
return (tree, metadata)
|
|
132
146
|
except TimeoutError:
|
|
133
147
|
print("Simulation timed out, retrying with a different seed...")
|
|
134
148
|
seed += 1
|
|
@@ -142,7 +156,11 @@ def generate_trees(
|
|
|
142
156
|
joblib.delayed(_simulate_tree)(seed=int(rng.integers(2**32)))
|
|
143
157
|
for _ in range(n_trees)
|
|
144
158
|
)
|
|
145
|
-
|
|
159
|
+
|
|
160
|
+
df: list[dict[str, Any]] = []
|
|
161
|
+
for i, (tree, metadata) in tqdm(
|
|
146
162
|
enumerate(jobs), total=n_trees, desc=f"Generating trees in {output_dir}..."
|
|
147
163
|
):
|
|
164
|
+
df.append({"file_id": i} | metadata)
|
|
148
165
|
dump_newick(tree, os.path.join(output_dir, f"{i}.nwk"))
|
|
166
|
+
return pd.DataFrame(df)
|
|
@@ -1,13 +1,7 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
1
|
from collections import defaultdict
|
|
3
|
-
from collections.abc import Sequence
|
|
4
2
|
from dataclasses import dataclass
|
|
5
3
|
from typing import Any
|
|
6
4
|
|
|
7
|
-
import numpy as np
|
|
8
|
-
from numpy.random import Generator
|
|
9
|
-
|
|
10
|
-
from phylogenie.skyline import SkylineParameterLike, skyline_parameter
|
|
11
5
|
from phylogenie.tree import Tree
|
|
12
6
|
|
|
13
7
|
|
|
@@ -18,25 +12,6 @@ class Individual:
|
|
|
18
12
|
state: str
|
|
19
13
|
|
|
20
14
|
|
|
21
|
-
class Event(ABC):
|
|
22
|
-
def __init__(self, state: str, rate: SkylineParameterLike):
|
|
23
|
-
self.state = state
|
|
24
|
-
self.rate = skyline_parameter(rate)
|
|
25
|
-
|
|
26
|
-
def draw_individual(self, model: "Model", rng: Generator) -> int:
|
|
27
|
-
return rng.choice(model.get_population(self.state))
|
|
28
|
-
|
|
29
|
-
def get_propensity(self, model: "Model", time: float) -> float:
|
|
30
|
-
n_individuals = model.count_individuals(self.state)
|
|
31
|
-
rate = self.rate.get_value_at_time(time)
|
|
32
|
-
if rate == np.inf and not n_individuals:
|
|
33
|
-
return 0
|
|
34
|
-
return rate * n_individuals
|
|
35
|
-
|
|
36
|
-
@abstractmethod
|
|
37
|
-
def apply(self, model: "Model", time: float, rng: Generator) -> None: ...
|
|
38
|
-
|
|
39
|
-
|
|
40
15
|
def _get_node_name(node_id: int, state: str) -> str:
|
|
41
16
|
return f"{node_id}|{state}"
|
|
42
17
|
|
|
@@ -51,27 +26,19 @@ def get_node_state(node_name: str) -> str:
|
|
|
51
26
|
|
|
52
27
|
|
|
53
28
|
class Model:
|
|
54
|
-
def __init__(self, init_state: str
|
|
29
|
+
def __init__(self, init_state: str):
|
|
55
30
|
self._next_node_id = 0
|
|
56
31
|
self._next_individual_id = 0
|
|
57
32
|
self._population: dict[int, Individual] = {}
|
|
58
33
|
self._states: dict[str, set[int]] = defaultdict(set)
|
|
59
34
|
self._sampled: set[str] = set()
|
|
60
35
|
self._tree = self._get_new_individual(init_state).node
|
|
61
|
-
self._events = list(events)
|
|
62
36
|
self.context: dict[str, Any] = {}
|
|
63
37
|
|
|
64
38
|
@property
|
|
65
39
|
def n_sampled(self) -> int:
|
|
66
40
|
return len(self._sampled)
|
|
67
41
|
|
|
68
|
-
@property
|
|
69
|
-
def events(self) -> tuple[Event, ...]:
|
|
70
|
-
return tuple(self._events)
|
|
71
|
-
|
|
72
|
-
def add_event(self, event: Event) -> None:
|
|
73
|
-
self._events.append(event)
|
|
74
|
-
|
|
75
42
|
def _get_new_node(self, state: str) -> Tree:
|
|
76
43
|
self._next_node_id += 1
|
|
77
44
|
node = Tree(_get_node_name(self._next_node_id, state))
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from copy import deepcopy
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Type
|
|
4
|
+
from typing import Any, Type
|
|
5
5
|
|
|
6
6
|
from numpy.random import Generator
|
|
7
7
|
|
|
8
8
|
from phylogenie.models import Distribution
|
|
9
|
-
from phylogenie.skyline import SkylineParameterLike
|
|
10
9
|
from phylogenie.treesimulator.events.contact_tracing import (
|
|
11
10
|
BirthWithContactTracing,
|
|
12
11
|
SamplingWithContactTracing,
|
|
@@ -21,7 +20,7 @@ from phylogenie.treesimulator.events.core import (
|
|
|
21
20
|
from phylogenie.treesimulator.model import Model
|
|
22
21
|
|
|
23
22
|
MUTATION_PREFIX = "MUT-"
|
|
24
|
-
|
|
23
|
+
NEXT_MUTATION_ID = "NEXT_MUTATION_ID"
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def _get_mutation(state: str) -> str | None:
|
|
@@ -46,68 +45,68 @@ class TargetType(str, Enum):
|
|
|
46
45
|
DEATH = "death"
|
|
47
46
|
MIGRATION = "migration"
|
|
48
47
|
SAMPLING = "sampling"
|
|
49
|
-
MUTATION = "mutation"
|
|
50
48
|
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
50
|
+
EVENT_TARGET_TYPES: dict[Type[Event], TargetType] = {
|
|
51
|
+
Birth: TargetType.BIRTH,
|
|
52
|
+
BirthWithContactTracing: TargetType.BIRTH,
|
|
53
|
+
Death: TargetType.DEATH,
|
|
54
|
+
Migration: TargetType.MIGRATION,
|
|
55
|
+
Sampling: TargetType.SAMPLING,
|
|
56
|
+
SamplingWithContactTracing: TargetType.SAMPLING,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Mutation:
|
|
61
|
+
def __init__(self, rate: float, rate_scalers: dict[TargetType, Distribution]):
|
|
62
|
+
self.rate = rate
|
|
60
63
|
self.rate_scalers = rate_scalers
|
|
61
64
|
|
|
62
|
-
def apply(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
model.context
|
|
66
|
-
|
|
65
|
+
def apply(
|
|
66
|
+
self, model: Model, events: list[Event], time: float, rng: Generator
|
|
67
|
+
) -> dict[str, Any]:
|
|
68
|
+
if NEXT_MUTATION_ID not in model.context:
|
|
69
|
+
model.context[NEXT_MUTATION_ID] = 0
|
|
70
|
+
model.context[NEXT_MUTATION_ID] += 1
|
|
71
|
+
mutation_id = model.context[NEXT_MUTATION_ID]
|
|
67
72
|
|
|
68
|
-
individual =
|
|
69
|
-
model.
|
|
73
|
+
individual = rng.choice(model.get_population())
|
|
74
|
+
state = model.get_state(individual)
|
|
75
|
+
model.migrate(individual, _get_mutated_state(mutation_id, state), time)
|
|
70
76
|
|
|
71
|
-
rate_scalers = {
|
|
77
|
+
rate_scalers: dict[TargetType, float] = {
|
|
72
78
|
target_type: getattr(rng, rate_scaler.type)(**rate_scaler.args)
|
|
73
79
|
for target_type, rate_scaler in self.rate_scalers.items()
|
|
74
80
|
}
|
|
75
81
|
|
|
82
|
+
metadata: dict[str, Any] = {}
|
|
76
83
|
for event in [
|
|
77
84
|
deepcopy(e)
|
|
78
|
-
for e in
|
|
79
|
-
if _get_mutation(
|
|
85
|
+
for e in events
|
|
86
|
+
if _get_mutation(state) == _get_mutation(e.state)
|
|
80
87
|
]:
|
|
81
88
|
event.state = _get_mutated_state(mutation_id, event.state)
|
|
82
89
|
if isinstance(event, Birth | BirthWithContactTracing):
|
|
83
90
|
event.child_state = _get_mutated_state(mutation_id, event.child_state)
|
|
84
91
|
elif isinstance(event, Migration):
|
|
85
92
|
event.target_state = _get_mutated_state(mutation_id, event.target_state)
|
|
86
|
-
elif not isinstance(
|
|
87
|
-
event, Mutation | Death | Sampling | SamplingWithContactTracing
|
|
88
|
-
):
|
|
93
|
+
elif not isinstance(event, Death | Sampling | SamplingWithContactTracing):
|
|
89
94
|
raise ValueError(
|
|
90
|
-
f"Mutation not
|
|
95
|
+
f"Mutation not implemented for event of type {type(event)}."
|
|
91
96
|
)
|
|
92
97
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
event.rate
|
|
100
|
-
|
|
101
|
-
model.add_event(event)
|
|
98
|
+
target_type = EVENT_TARGET_TYPES[type(event)]
|
|
99
|
+
if target_type in rate_scalers:
|
|
100
|
+
event.rate *= rate_scalers[target_type]
|
|
101
|
+
metadata[f"{MUTATION_PREFIX}{mutation_id}.{target_type}.rate.value"] = (
|
|
102
|
+
event.rate.value[0]
|
|
103
|
+
if len(event.rate.value) == 1
|
|
104
|
+
else list(event.rate.value)
|
|
105
|
+
)
|
|
102
106
|
|
|
103
|
-
|
|
104
|
-
return f"Mutation(state={self.state}, rate={self.rate})"
|
|
107
|
+
events.append(event)
|
|
105
108
|
|
|
109
|
+
return metadata
|
|
106
110
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
TargetType.DEATH: (Death,),
|
|
110
|
-
TargetType.MIGRATION: (Migration,),
|
|
111
|
-
TargetType.SAMPLING: (Sampling, SamplingWithContactTracing),
|
|
112
|
-
TargetType.MUTATION: (Mutation,),
|
|
113
|
-
}
|
|
111
|
+
def __repr__(self) -> str:
|
|
112
|
+
return f"Mutation(rate={self.rate}, rate_scalers={self.rate_scalers})"
|
phylogenie/typings.py
CHANGED
|
@@ -15,6 +15,6 @@ OneOrMany2DScalars = OneOrMany2D[Scalar]
|
|
|
15
15
|
Many2DScalars = Many2D[Scalar]
|
|
16
16
|
Many3DScalars = Many3D[Scalar]
|
|
17
17
|
|
|
18
|
-
Vector1D =
|
|
19
|
-
Vector2D =
|
|
20
|
-
Vector3D =
|
|
18
|
+
Vector1D = tuple[Scalar, ...]
|
|
19
|
+
Vector2D = tuple[Vector1D, ...]
|
|
20
|
+
Vector3D = tuple[Vector2D, ...]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
phylogenie/__init__.py,sha256=KV83hJ153WAwCIWEPWXtSsqdI_ncojxiXzt1X_lC7SU,2879
|
|
2
|
+
phylogenie/draw.py,sha256=37eQDK8TKmL8bxSzXvi7jNvrR7ulj7MByR5MA1FtyyE,5317
|
|
3
|
+
phylogenie/generators/__init__.py,sha256=zsOxy28-9j9alOQLIgrOAFfmM58NNHO_NEtW-KXQXAY,888
|
|
4
|
+
phylogenie/generators/alisim.py,sha256=1YQLpOG_Bpn9YqExQqEu-wz1MDGCbpPmTdhrBb6TbWc,2820
|
|
5
|
+
phylogenie/generators/configs.py,sha256=Xp8kk8sDy3EzKSgfdsVuMep1zEvdVX6U86yFgn6MS1I,999
|
|
6
|
+
phylogenie/generators/dataset.py,sha256=kY92diePr2IjiLejHLixJoYRc-2LpM-GBt3wkX9SYvA,2109
|
|
7
|
+
phylogenie/generators/factories.py,sha256=A-r33L5eYe2Shc-o7y6RHe8GoAcTTQw61249_lzC1BY,8123
|
|
8
|
+
phylogenie/generators/trees.py,sha256=HrrSxpejURs2DXrV5PBm3OuD9RgSbSu_YBI_ukWbEcA,10119
|
|
9
|
+
phylogenie/generators/typeguards.py,sha256=yj4VkhOaUXJ2OrY-6zhOeY9C4yKIQxjZtk2d-vIxttQ,828
|
|
10
|
+
phylogenie/io/__init__.py,sha256=gtRYtDdZSTlWCj3I4vmMJSAs93jdz5RySkCakD3sxlQ,214
|
|
11
|
+
phylogenie/io/fasta.py,sha256=IWtNb_RQLR6kvS0G826wB9SodkCGfugddoUHx78Yrec,837
|
|
12
|
+
phylogenie/io/newick.py,sha256=vw0fafh1LL0SXQifIIa1TQ7g5KgTCIAX6vzq-bUOrKE,3396
|
|
13
|
+
phylogenie/io/nexus.py,sha256=IKbV8lJ_Q053iYJ7JzVQPCUqSkSfmiRpUchFTrLHZuE,1551
|
|
14
|
+
phylogenie/main.py,sha256=vtvSpQxBNlYABoFQ25czl-l3fIr4QRo3svWVd-jcArw,1170
|
|
15
|
+
phylogenie/models.py,sha256=pCg9ob0RpLUHwM49x4knKxL4FNPr3-EU_6zMXsvxtAg,370
|
|
16
|
+
phylogenie/msa.py,sha256=JDGyZUsAq6-m-SQjoCDjAkAZIxfgyl_PDIhdYn5HOow,2064
|
|
17
|
+
phylogenie/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
phylogenie/skyline/__init__.py,sha256=7pF4CUb4ZCLzNYJNhOjpuTOLTRhlK7L6ugfccNqjIGo,620
|
|
19
|
+
phylogenie/skyline/matrix.py,sha256=v4SitY7VbXprqlqQckjWTzW5hwRmCyIF595R6IJMxWw,9268
|
|
20
|
+
phylogenie/skyline/parameter.py,sha256=TVqkqirGXNN-VP8hnIJACPkOxUan6LkGa5o_JcPfwbY,4834
|
|
21
|
+
phylogenie/skyline/vector.py,sha256=60jtp7PieiEaEH0Tp6zNjNKjyzpN_nT5uwBUXbgeATk,7261
|
|
22
|
+
phylogenie/tree.py,sha256=P1uM6s32TsODpvNJQIPMix9oj39vGSw_wsHYp2wmy5U,5246
|
|
23
|
+
phylogenie/treesimulator/__init__.py,sha256=5UF2S3Evos0-7GJVSQxx6SICaKxcrmU7o1T8JS_cMKk,1246
|
|
24
|
+
phylogenie/treesimulator/events/__init__.py,sha256=5ncG2JMh7RcSgG5BPR012UujxoTrczD0YCv-khSGSHM,792
|
|
25
|
+
phylogenie/treesimulator/events/base.py,sha256=ruyje6lOgBVcsxzqLcEkvjX8TetvHk2xXqTCSz4apKM,875
|
|
26
|
+
phylogenie/treesimulator/events/contact_tracing.py,sha256=bFu00rj5t1ToTuTwzFOqdttodyBAQ9VqUbZpAbvTMSg,4849
|
|
27
|
+
phylogenie/treesimulator/events/core.py,sha256=rALA4LVXsWbtRn0R-SOoAD9w9zDLxmzwzGcQetTVtWU,7997
|
|
28
|
+
phylogenie/treesimulator/features.py,sha256=wWD1aLs4qpFmw7GZpGolhfk7Ym6v9g-cIv2c717cLKU,1359
|
|
29
|
+
phylogenie/treesimulator/gillespie.py,sha256=Xj-ntrhPVH6vMuAe7-Q7x1gA5Zr6xoYoeVLnZQKfcRU,6005
|
|
30
|
+
phylogenie/treesimulator/model.py,sha256=lhDwmBFQ8Qh8qVGZPgED0vehtPC3DE7_CgCV_8rPB-A,4641
|
|
31
|
+
phylogenie/treesimulator/mutations.py,sha256=Wv0qJqjGAMNTYftiCCaSXfZQA6pj8pSrmrOLyTI86ko,3635
|
|
32
|
+
phylogenie/typeguards.py,sha256=JtqmbEWJZBRHbWgCvcl6nrWm3VcBfzRbklbTBYHItn0,1325
|
|
33
|
+
phylogenie/typings.py,sha256=p694PBe_tk25A6N8vGGWxuqoDtt3nHFUsIYJrwR_76Y,494
|
|
34
|
+
phylogenie/utils.py,sha256=ehVk_2kvjW8Q_EyM2kxBPHYiK-KlPmZQx7JeVN6Fh-E,5419
|
|
35
|
+
phylogenie-2.1.25.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
|
|
36
|
+
phylogenie-2.1.25.dist-info/METADATA,sha256=smzOlX4LSGx_MzMBneW1humdTaUL8FG-TekNtKS1AtE,5477
|
|
37
|
+
phylogenie-2.1.25.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
38
|
+
phylogenie-2.1.25.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
|
|
39
|
+
phylogenie-2.1.25.dist-info/RECORD,,
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
phylogenie/__init__.py,sha256=8ulA-U7-WnBLNsqRYqPOPrJG8X4ZzyEU02oaDbR_Hxs,2849
|
|
2
|
-
phylogenie/draw.py,sha256=WCjr_HCn-wCuxwkDhHA9Ijl1Sm6av6FBLkyR0MK2bVM,5271
|
|
3
|
-
phylogenie/generators/__init__.py,sha256=zsOxy28-9j9alOQLIgrOAFfmM58NNHO_NEtW-KXQXAY,888
|
|
4
|
-
phylogenie/generators/alisim.py,sha256=0aCLuGInifWgAvfh7zARWSKF4EMw3TjlPXMLSECui0k,2783
|
|
5
|
-
phylogenie/generators/configs.py,sha256=WFoeKpgj9ZQIom7BKqwpgXbriiQGg3jFBMLoD8KButk,1073
|
|
6
|
-
phylogenie/generators/dataset.py,sha256=pPwW9yxm9fkU0PPllFq8EsPlqau8tth-4OatbA_hEHo,2120
|
|
7
|
-
phylogenie/generators/factories.py,sha256=TuVFQWRjq33Hewjw_Lp8tQ0l_IPtqYDyQCNJhtiHpw8,7882
|
|
8
|
-
phylogenie/generators/trees.py,sha256=6tHS013RNRyIsObeYq2Kx9it7Yc0TgiMkeAHbskHzAM,10748
|
|
9
|
-
phylogenie/generators/typeguards.py,sha256=yj4VkhOaUXJ2OrY-6zhOeY9C4yKIQxjZtk2d-vIxttQ,828
|
|
10
|
-
phylogenie/io.py,sha256=vUG2yVtoV98tNHut46uSuB3VPj6s64VDhUp1EaSk0o0,4084
|
|
11
|
-
phylogenie/main.py,sha256=vtvSpQxBNlYABoFQ25czl-l3fIr4QRo3svWVd-jcArw,1170
|
|
12
|
-
phylogenie/models.py,sha256=pCg9ob0RpLUHwM49x4knKxL4FNPr3-EU_6zMXsvxtAg,370
|
|
13
|
-
phylogenie/msa.py,sha256=JDGyZUsAq6-m-SQjoCDjAkAZIxfgyl_PDIhdYn5HOow,2064
|
|
14
|
-
phylogenie/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
phylogenie/skyline/__init__.py,sha256=7pF4CUb4ZCLzNYJNhOjpuTOLTRhlK7L6ugfccNqjIGo,620
|
|
16
|
-
phylogenie/skyline/matrix.py,sha256=Gl8OgKjtieG0NwPYiPimKI36gefV8fm_OeorjdXxPTs,9146
|
|
17
|
-
phylogenie/skyline/parameter.py,sha256=EM9qlPt0JhMBy3TbztM0dj24BaGNEy8KWKdTObDKhbI,4644
|
|
18
|
-
phylogenie/skyline/vector.py,sha256=bJP7_FNX_Klt6wXqsyfj0KX3VNj6-dIhzCKSJuQcOV0,7115
|
|
19
|
-
phylogenie/tree.py,sha256=P1uM6s32TsODpvNJQIPMix9oj39vGSw_wsHYp2wmy5U,5246
|
|
20
|
-
phylogenie/treesimulator/__init__.py,sha256=yqS2vtYMhdWSXc9RAnX1dd4zAqSQweMLyVKTnJLfGTU,1106
|
|
21
|
-
phylogenie/treesimulator/events/__init__.py,sha256=6zSgZ0MEUMvTK4yPlSolJnRWzCARLS-jYreTzh45mQo,1033
|
|
22
|
-
phylogenie/treesimulator/events/contact_tracing.py,sha256=_nJ85yhgGkeruQgMHvGpDYoyhheBf8M4LgZWiWdi5dY,4801
|
|
23
|
-
phylogenie/treesimulator/events/core.py,sha256=RF7oHzAjkU675PnczaVc66d9gNrHBL-IhmVHtcy7MKE,7949
|
|
24
|
-
phylogenie/treesimulator/events/mutations.py,sha256=erEvgfiv_X3G_DwK9Hqu-fAR8otupfwq66cp5tRZamM,3591
|
|
25
|
-
phylogenie/treesimulator/features.py,sha256=Wj1rjbOAHHE9XqhrLND2GMgqTB_4M9MdAD5OWoUoatc,1356
|
|
26
|
-
phylogenie/treesimulator/gillespie.py,sha256=LZHB2Ko147E78LoUCtN_BN7NYO1xhMYRy5PUZbN93c0,5283
|
|
27
|
-
phylogenie/treesimulator/model.py,sha256=Nyg6R8XmMwZMSw1-dII81sU9uU7tDe-NMs8v1qKE4_M,5746
|
|
28
|
-
phylogenie/typeguards.py,sha256=JtqmbEWJZBRHbWgCvcl6nrWm3VcBfzRbklbTBYHItn0,1325
|
|
29
|
-
phylogenie/typings.py,sha256=GknvAFXyiaWeeYJ8Lk5d6E2VHT-xW6ONEojYbtJYiB8,476
|
|
30
|
-
phylogenie/utils.py,sha256=ehVk_2kvjW8Q_EyM2kxBPHYiK-KlPmZQx7JeVN6Fh-E,5419
|
|
31
|
-
phylogenie-2.1.23.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
|
|
32
|
-
phylogenie-2.1.23.dist-info/METADATA,sha256=ym-NHK9hkvBR4kH_KGCnLnckV_iEzvFk0fdMzrkVudQ,5477
|
|
33
|
-
phylogenie-2.1.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
34
|
-
phylogenie-2.1.23.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
|
|
35
|
-
phylogenie-2.1.23.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|