phylogenie 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phylogenie-2.0.2 → phylogenie-2.0.4}/PKG-INFO +2 -2
- {phylogenie-2.0.2 → phylogenie-2.0.4}/README.md +1 -1
- phylogenie-2.0.4/phylogenie/__init__.py +72 -0
- phylogenie-2.0.4/phylogenie/generators/__init__.py +32 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/dataset.py +2 -1
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/io.py +1 -1
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/events.py +38 -50
- {phylogenie-2.0.2 → phylogenie-2.0.4}/pyproject.toml +1 -1
- phylogenie-2.0.2/phylogenie/__init__.py +0 -0
- phylogenie-2.0.2/phylogenie/generators/__init__.py +0 -14
- {phylogenie-2.0.2 → phylogenie-2.0.4}/LICENSE.txt +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/alisim.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/configs.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/factories.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/trees.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/typeguards.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/main.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/msa.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/py.typed +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/__init__.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/matrix.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/parameter.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/vector.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/tree.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/__init__.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/gillespie.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/model.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/typeguards.py +0 -0
- {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/typings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: phylogenie
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: Generate phylogenetic datasets with minimal setup effort
|
|
5
5
|
Author: Gabriele Marino
|
|
6
6
|
Author-email: gabmarino.8601@gmail.com
|
|
@@ -23,7 +23,7 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
---
|
|
24
24
|
|
|
25
25
|
[](https://iqtree.github.io/doc/AliSim)
|
|
26
|
-
[](https://pypi.org/project/phylogenie/)
|
|
27
27
|
[](https://pypi.org/project/phylogenie/)
|
|
28
28
|
|
|
29
29
|
Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
[](https://iqtree.github.io/doc/AliSim)
|
|
8
|
-
[](https://pypi.org/project/phylogenie/)
|
|
9
9
|
[](https://pypi.org/project/phylogenie/)
|
|
10
10
|
|
|
11
11
|
Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from phylogenie.generators import (
|
|
2
|
+
AliSimDatasetGenerator,
|
|
3
|
+
BDEITreeDatasetGenerator,
|
|
4
|
+
BDSSTreeDatasetGenerator,
|
|
5
|
+
BDTreeDatasetGenerator,
|
|
6
|
+
CanonicalTreeDatasetGenerator,
|
|
7
|
+
DatasetGenerator,
|
|
8
|
+
DatasetGeneratorConfig,
|
|
9
|
+
EpidemiologicalTreeDatasetGenerator,
|
|
10
|
+
FBDTreeDatasetGenerator,
|
|
11
|
+
TreeDatasetGeneratorConfig,
|
|
12
|
+
)
|
|
13
|
+
from phylogenie.io import load_fasta, load_newick
|
|
14
|
+
from phylogenie.msa import MSA
|
|
15
|
+
from phylogenie.skyline import (
|
|
16
|
+
SkylineMatrix,
|
|
17
|
+
SkylineMatrixCoercible,
|
|
18
|
+
SkylineParameter,
|
|
19
|
+
SkylineParameterLike,
|
|
20
|
+
SkylineVector,
|
|
21
|
+
SkylineVectorCoercible,
|
|
22
|
+
SkylineVectorLike,
|
|
23
|
+
skyline_matrix,
|
|
24
|
+
skyline_parameter,
|
|
25
|
+
skyline_vector,
|
|
26
|
+
)
|
|
27
|
+
from phylogenie.tree import Tree
|
|
28
|
+
from phylogenie.treesimulator import (
|
|
29
|
+
Event,
|
|
30
|
+
get_BD_events,
|
|
31
|
+
get_BDEI_events,
|
|
32
|
+
get_BDSS_events,
|
|
33
|
+
get_canonical_events,
|
|
34
|
+
get_epidemiological_events,
|
|
35
|
+
get_FBD_events,
|
|
36
|
+
simulate_tree,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"AliSimDatasetGenerator",
|
|
41
|
+
"BDEITreeDatasetGenerator",
|
|
42
|
+
"BDSSTreeDatasetGenerator",
|
|
43
|
+
"BDTreeDatasetGenerator",
|
|
44
|
+
"CanonicalTreeDatasetGenerator",
|
|
45
|
+
"DatasetGenerator",
|
|
46
|
+
"DatasetGeneratorConfig",
|
|
47
|
+
"EpidemiologicalTreeDatasetGenerator",
|
|
48
|
+
"FBDTreeDatasetGenerator",
|
|
49
|
+
"SkylineMatrix",
|
|
50
|
+
"SkylineMatrixCoercible",
|
|
51
|
+
"skyline_matrix",
|
|
52
|
+
"SkylineParameter",
|
|
53
|
+
"SkylineParameterLike",
|
|
54
|
+
"skyline_parameter",
|
|
55
|
+
"SkylineVector",
|
|
56
|
+
"SkylineVectorCoercible",
|
|
57
|
+
"SkylineVectorLike",
|
|
58
|
+
"skyline_vector",
|
|
59
|
+
"Tree",
|
|
60
|
+
"TreeDatasetGeneratorConfig",
|
|
61
|
+
"Event",
|
|
62
|
+
"get_BD_events",
|
|
63
|
+
"get_BDEI_events",
|
|
64
|
+
"get_BDSS_events",
|
|
65
|
+
"get_canonical_events",
|
|
66
|
+
"get_epidemiological_events",
|
|
67
|
+
"get_FBD_events",
|
|
68
|
+
"simulate_tree",
|
|
69
|
+
"load_fasta",
|
|
70
|
+
"load_newick",
|
|
71
|
+
"MSA",
|
|
72
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from phylogenie.generators.alisim import AliSimDatasetGenerator
|
|
6
|
+
from phylogenie.generators.dataset import DatasetGenerator
|
|
7
|
+
from phylogenie.generators.trees import (
|
|
8
|
+
BDEITreeDatasetGenerator,
|
|
9
|
+
BDSSTreeDatasetGenerator,
|
|
10
|
+
BDTreeDatasetGenerator,
|
|
11
|
+
CanonicalTreeDatasetGenerator,
|
|
12
|
+
EpidemiologicalTreeDatasetGenerator,
|
|
13
|
+
FBDTreeDatasetGenerator,
|
|
14
|
+
TreeDatasetGeneratorConfig,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
DatasetGeneratorConfig = Annotated[
|
|
18
|
+
TreeDatasetGeneratorConfig | AliSimDatasetGenerator,
|
|
19
|
+
Field(discriminator="data_type"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"DatasetGeneratorConfig",
|
|
24
|
+
"DatasetGenerator",
|
|
25
|
+
"AliSimDatasetGenerator",
|
|
26
|
+
"CanonicalTreeDatasetGenerator",
|
|
27
|
+
"EpidemiologicalTreeDatasetGenerator",
|
|
28
|
+
"FBDTreeDatasetGenerator",
|
|
29
|
+
"BDTreeDatasetGenerator",
|
|
30
|
+
"BDEITreeDatasetGenerator",
|
|
31
|
+
"BDSSTreeDatasetGenerator",
|
|
32
|
+
]
|
|
@@ -47,11 +47,12 @@ class DatasetGenerator(ABC, StrictBaseModel):
|
|
|
47
47
|
return
|
|
48
48
|
os.makedirs(data_dir)
|
|
49
49
|
|
|
50
|
-
data: list[dict[str, Any]] = [{}
|
|
50
|
+
data: list[dict[str, Any]] = [{} for _ in range(n_samples)]
|
|
51
51
|
if self.context is not None:
|
|
52
52
|
for d, (k, v) in product(data, self.context.items()):
|
|
53
53
|
args = v.model_extra if v.model_extra is not None else {}
|
|
54
54
|
d[k] = np.array(getattr(rng, v.type)(**args)).tolist()
|
|
55
|
+
print(data)
|
|
55
56
|
df = pd.DataFrame([{"file_id": str(i), **d} for i, d in enumerate(data)])
|
|
56
57
|
df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
|
|
57
58
|
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from collections.abc import Iterator
|
|
3
2
|
|
|
4
3
|
from numpy.random import Generator
|
|
5
4
|
|
|
6
5
|
from phylogenie.skyline import (
|
|
7
6
|
SkylineMatrixCoercible,
|
|
8
|
-
SkylineParameter,
|
|
9
7
|
SkylineParameterLike,
|
|
10
8
|
SkylineVectorCoercible,
|
|
11
9
|
skyline_matrix,
|
|
@@ -97,12 +95,6 @@ def get_canonical_events(
|
|
|
97
95
|
death_rates = skyline_vector(death_rates, N)
|
|
98
96
|
sampling_rates = skyline_vector(sampling_rates, N)
|
|
99
97
|
removal_probabilities = skyline_vector(removal_probabilities, N)
|
|
100
|
-
if N == 1 and migration_rates is not None:
|
|
101
|
-
raise ValueError(f"Migration rates cannot be provided for a single state.")
|
|
102
|
-
if N == 1 and birth_rates_among_states is not None:
|
|
103
|
-
raise ValueError(
|
|
104
|
-
f"Birth rates among states cannot be provided for a single state."
|
|
105
|
-
)
|
|
106
98
|
|
|
107
99
|
events: list[Event] = []
|
|
108
100
|
for i in range(N):
|
|
@@ -111,24 +103,23 @@ def get_canonical_events(
|
|
|
111
103
|
events.append(DeathEvent(death_rates[i], state))
|
|
112
104
|
events.append(SamplingEvent(sampling_rates[i], state, removal_probabilities[i]))
|
|
113
105
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
)
|
|
106
|
+
if states is not None and migration_rates is not None:
|
|
107
|
+
migration_rates = skyline_matrix(migration_rates, N, N - 1)
|
|
108
|
+
for i, state in enumerate(states):
|
|
109
|
+
for j, other_state in enumerate([s for s in states if s != state]):
|
|
110
|
+
events.append(MigrationEvent(migration_rates[i, j], state, other_state))
|
|
111
|
+
elif migration_rates is not None:
|
|
112
|
+
raise ValueError(f"Migration rates require states to be provided.")
|
|
113
|
+
|
|
114
|
+
if states is not None and birth_rates_among_states is not None:
|
|
115
|
+
birth_rates_among_states = skyline_matrix(birth_rates_among_states, N, N - 1)
|
|
116
|
+
for i, state in enumerate(states):
|
|
117
|
+
for j, other_state in enumerate([s for s in states if s != state]):
|
|
118
|
+
events.append(
|
|
119
|
+
BirthEvent(birth_rates_among_states[i, j], state, other_state)
|
|
120
|
+
)
|
|
121
|
+
elif birth_rates_among_states is not None:
|
|
122
|
+
raise ValueError(f"Birth rates among states require states to be provided.")
|
|
132
123
|
|
|
133
124
|
return [event for event in events if event.rate]
|
|
134
125
|
|
|
@@ -148,20 +139,20 @@ def get_epidemiological_events(
|
|
|
148
139
|
become_uninfectious_rates = skyline_vector(become_uninfectious_rates, N)
|
|
149
140
|
sampling_proportions = skyline_vector(sampling_proportions, N)
|
|
150
141
|
removal_probabilities = skyline_vector(removal_probabilities, N)
|
|
151
|
-
if N == 1 and reproduction_numbers_among_states is not None:
|
|
152
|
-
raise ValueError(
|
|
153
|
-
f"Reproduction numbers among states cannot be provided for a single state."
|
|
154
|
-
)
|
|
155
142
|
|
|
156
143
|
birth_rates = reproduction_numbers * become_uninfectious_rates
|
|
157
144
|
sampling_rates = become_uninfectious_rates * sampling_proportions
|
|
158
|
-
birth_rates_among_states = (
|
|
159
|
-
None
|
|
160
|
-
if reproduction_numbers_among_states is None
|
|
161
|
-
else skyline_matrix(reproduction_numbers_among_states, N, N - 1)
|
|
162
|
-
* become_uninfectious_rates
|
|
163
|
-
)
|
|
164
145
|
death_rates = become_uninfectious_rates - removal_probabilities * sampling_rates
|
|
146
|
+
birth_rates_among_states = None
|
|
147
|
+
if states is None and reproduction_numbers_among_states is not None:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Reproduction numbers among states require states to be provided."
|
|
150
|
+
)
|
|
151
|
+
elif reproduction_numbers_among_states is not None:
|
|
152
|
+
birth_rates_among_states = (
|
|
153
|
+
skyline_matrix(reproduction_numbers_among_states, N, N - 1)
|
|
154
|
+
* become_uninfectious_rates
|
|
155
|
+
)
|
|
165
156
|
|
|
166
157
|
return get_canonical_events(
|
|
167
158
|
states=states,
|
|
@@ -189,23 +180,20 @@ def get_FBD_events(
|
|
|
189
180
|
turnover = skyline_vector(turnover, N)
|
|
190
181
|
sampling_proportions = skyline_vector(sampling_proportions, N)
|
|
191
182
|
removal_probabilities = skyline_vector(removal_probabilities, N)
|
|
192
|
-
if N == 1 and diversification_between_types is not None:
|
|
193
|
-
raise ValueError(
|
|
194
|
-
f"Diversification rates among states cannot be provided for a single state."
|
|
195
|
-
)
|
|
196
183
|
|
|
197
184
|
birth_rates = diversification / (1 - turnover)
|
|
198
185
|
death_rates = turnover * birth_rates
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
186
|
+
sampling_rates_dividend = 1 - removal_probabilities * sampling_proportions
|
|
187
|
+
sampling_rates = sampling_proportions * death_rates / sampling_rates_dividend
|
|
188
|
+
birth_rates_among_states = None
|
|
189
|
+
if states is None and diversification_between_types is not None:
|
|
190
|
+
raise ValueError(
|
|
191
|
+
f"Diversification rates among states require states to be provided."
|
|
192
|
+
)
|
|
193
|
+
elif diversification_between_types is not None:
|
|
194
|
+
birth_rates_among_states = (
|
|
195
|
+
skyline_matrix(diversification_between_types, N, N - 1) + death_rates
|
|
196
|
+
)
|
|
209
197
|
|
|
210
198
|
return get_canonical_events(
|
|
211
199
|
states=states,
|
|
File without changes
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from typing import Annotated
|
|
2
|
-
|
|
3
|
-
from pydantic import Field
|
|
4
|
-
|
|
5
|
-
from phylogenie.generators.alisim import AliSimDatasetGenerator
|
|
6
|
-
from phylogenie.generators.dataset import DatasetGenerator
|
|
7
|
-
from phylogenie.generators.trees import TreeDatasetGeneratorConfig
|
|
8
|
-
|
|
9
|
-
DatasetGeneratorConfig = Annotated[
|
|
10
|
-
TreeDatasetGeneratorConfig | AliSimDatasetGenerator,
|
|
11
|
-
Field(discriminator="data_type"),
|
|
12
|
-
]
|
|
13
|
-
|
|
14
|
-
__all__ = ["DatasetGeneratorConfig", "DatasetGenerator"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|