phylogenie 2.0.2__tar.gz → 2.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {phylogenie-2.0.2 → phylogenie-2.0.4}/PKG-INFO +2 -2
  2. {phylogenie-2.0.2 → phylogenie-2.0.4}/README.md +1 -1
  3. phylogenie-2.0.4/phylogenie/__init__.py +72 -0
  4. phylogenie-2.0.4/phylogenie/generators/__init__.py +32 -0
  5. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/dataset.py +2 -1
  6. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/io.py +1 -1
  7. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/events.py +38 -50
  8. {phylogenie-2.0.2 → phylogenie-2.0.4}/pyproject.toml +1 -1
  9. phylogenie-2.0.2/phylogenie/__init__.py +0 -0
  10. phylogenie-2.0.2/phylogenie/generators/__init__.py +0 -14
  11. {phylogenie-2.0.2 → phylogenie-2.0.4}/LICENSE.txt +0 -0
  12. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/alisim.py +0 -0
  13. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/configs.py +0 -0
  14. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/factories.py +0 -0
  15. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/trees.py +0 -0
  16. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/generators/typeguards.py +0 -0
  17. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/main.py +0 -0
  18. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/msa.py +0 -0
  19. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/py.typed +0 -0
  20. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/__init__.py +0 -0
  21. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/matrix.py +0 -0
  22. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/parameter.py +0 -0
  23. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/skyline/vector.py +0 -0
  24. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/tree.py +0 -0
  25. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/__init__.py +0 -0
  26. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/gillespie.py +0 -0
  27. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/treesimulator/model.py +0 -0
  28. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/typeguards.py +0 -0
  29. {phylogenie-2.0.2 → phylogenie-2.0.4}/phylogenie/typings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phylogenie
3
- Version: 2.0.2
3
+ Version: 2.0.4
4
4
  Summary: Generate phylogenetic datasets with minimal setup effort
5
5
  Author: Gabriele Marino
6
6
  Author-email: gabmarino.8601@gmail.com
@@ -23,7 +23,7 @@ Description-Content-Type: text/markdown
23
23
  ---
24
24
 
25
25
  [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
26
- [![PyPI package](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
26
+ [![PyPI version](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
27
27
  [![PyPI downloads](https://shields.io/pypi/dm/phylogenie)](https://pypi.org/project/phylogenie/)
28
28
 
29
29
  Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
@@ -5,7 +5,7 @@
5
5
  ---
6
6
 
7
7
  [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
8
- [![PyPI package](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
8
+ [![PyPI version](https://img.shields.io/pypi/v/phylogenie)](https://pypi.org/project/phylogenie/)
9
9
  [![PyPI downloads](https://shields.io/pypi/dm/phylogenie)](https://pypi.org/project/phylogenie/)
10
10
 
11
11
  Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
@@ -0,0 +1,72 @@
1
+ from phylogenie.generators import (
2
+ AliSimDatasetGenerator,
3
+ BDEITreeDatasetGenerator,
4
+ BDSSTreeDatasetGenerator,
5
+ BDTreeDatasetGenerator,
6
+ CanonicalTreeDatasetGenerator,
7
+ DatasetGenerator,
8
+ DatasetGeneratorConfig,
9
+ EpidemiologicalTreeDatasetGenerator,
10
+ FBDTreeDatasetGenerator,
11
+ TreeDatasetGeneratorConfig,
12
+ )
13
+ from phylogenie.io import load_fasta, load_newick
14
+ from phylogenie.msa import MSA
15
+ from phylogenie.skyline import (
16
+ SkylineMatrix,
17
+ SkylineMatrixCoercible,
18
+ SkylineParameter,
19
+ SkylineParameterLike,
20
+ SkylineVector,
21
+ SkylineVectorCoercible,
22
+ SkylineVectorLike,
23
+ skyline_matrix,
24
+ skyline_parameter,
25
+ skyline_vector,
26
+ )
27
+ from phylogenie.tree import Tree
28
+ from phylogenie.treesimulator import (
29
+ Event,
30
+ get_BD_events,
31
+ get_BDEI_events,
32
+ get_BDSS_events,
33
+ get_canonical_events,
34
+ get_epidemiological_events,
35
+ get_FBD_events,
36
+ simulate_tree,
37
+ )
38
+
39
+ __all__ = [
40
+ "AliSimDatasetGenerator",
41
+ "BDEITreeDatasetGenerator",
42
+ "BDSSTreeDatasetGenerator",
43
+ "BDTreeDatasetGenerator",
44
+ "CanonicalTreeDatasetGenerator",
45
+ "DatasetGenerator",
46
+ "DatasetGeneratorConfig",
47
+ "EpidemiologicalTreeDatasetGenerator",
48
+ "FBDTreeDatasetGenerator",
49
+ "SkylineMatrix",
50
+ "SkylineMatrixCoercible",
51
+ "skyline_matrix",
52
+ "SkylineParameter",
53
+ "SkylineParameterLike",
54
+ "skyline_parameter",
55
+ "SkylineVector",
56
+ "SkylineVectorCoercible",
57
+ "SkylineVectorLike",
58
+ "skyline_vector",
59
+ "Tree",
60
+ "TreeDatasetGeneratorConfig",
61
+ "Event",
62
+ "get_BD_events",
63
+ "get_BDEI_events",
64
+ "get_BDSS_events",
65
+ "get_canonical_events",
66
+ "get_epidemiological_events",
67
+ "get_FBD_events",
68
+ "simulate_tree",
69
+ "load_fasta",
70
+ "load_newick",
71
+ "MSA",
72
+ ]
@@ -0,0 +1,32 @@
1
+ from typing import Annotated
2
+
3
+ from pydantic import Field
4
+
5
+ from phylogenie.generators.alisim import AliSimDatasetGenerator
6
+ from phylogenie.generators.dataset import DatasetGenerator
7
+ from phylogenie.generators.trees import (
8
+ BDEITreeDatasetGenerator,
9
+ BDSSTreeDatasetGenerator,
10
+ BDTreeDatasetGenerator,
11
+ CanonicalTreeDatasetGenerator,
12
+ EpidemiologicalTreeDatasetGenerator,
13
+ FBDTreeDatasetGenerator,
14
+ TreeDatasetGeneratorConfig,
15
+ )
16
+
17
+ DatasetGeneratorConfig = Annotated[
18
+ TreeDatasetGeneratorConfig | AliSimDatasetGenerator,
19
+ Field(discriminator="data_type"),
20
+ ]
21
+
22
+ __all__ = [
23
+ "DatasetGeneratorConfig",
24
+ "DatasetGenerator",
25
+ "AliSimDatasetGenerator",
26
+ "CanonicalTreeDatasetGenerator",
27
+ "EpidemiologicalTreeDatasetGenerator",
28
+ "FBDTreeDatasetGenerator",
29
+ "BDTreeDatasetGenerator",
30
+ "BDEITreeDatasetGenerator",
31
+ "BDSSTreeDatasetGenerator",
32
+ ]
@@ -47,11 +47,12 @@ class DatasetGenerator(ABC, StrictBaseModel):
47
47
  return
48
48
  os.makedirs(data_dir)
49
49
 
50
- data: list[dict[str, Any]] = [{}] * n_samples
50
+ data: list[dict[str, Any]] = [{} for _ in range(n_samples)]
51
51
  if self.context is not None:
52
52
  for d, (k, v) in product(data, self.context.items()):
53
53
  args = v.model_extra if v.model_extra is not None else {}
54
54
  d[k] = np.array(getattr(rng, v.type)(**args)).tolist()
55
+ print(data)
55
56
  df = pd.DataFrame([{"file_id": str(i), **d} for i, d in enumerate(data)])
56
57
  df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
57
58
 
@@ -85,7 +85,7 @@ def load_fasta(
85
85
  else:
86
86
  try:
87
87
  time = float(id.split("|")[-1])
88
- except ValueError:
88
+ except:
89
89
  time = None
90
90
  chars = next(f).strip()
91
91
  sequences.append(Sequence(id, chars, time))
@@ -1,11 +1,9 @@
1
1
  from abc import ABC, abstractmethod
2
- from collections.abc import Iterator
3
2
 
4
3
  from numpy.random import Generator
5
4
 
6
5
  from phylogenie.skyline import (
7
6
  SkylineMatrixCoercible,
8
- SkylineParameter,
9
7
  SkylineParameterLike,
10
8
  SkylineVectorCoercible,
11
9
  skyline_matrix,
@@ -97,12 +95,6 @@ def get_canonical_events(
97
95
  death_rates = skyline_vector(death_rates, N)
98
96
  sampling_rates = skyline_vector(sampling_rates, N)
99
97
  removal_probabilities = skyline_vector(removal_probabilities, N)
100
- if N == 1 and migration_rates is not None:
101
- raise ValueError(f"Migration rates cannot be provided for a single state.")
102
- if N == 1 and birth_rates_among_states is not None:
103
- raise ValueError(
104
- f"Birth rates among states cannot be provided for a single state."
105
- )
106
98
 
107
99
  events: list[Event] = []
108
100
  for i in range(N):
@@ -111,24 +103,23 @@ def get_canonical_events(
111
103
  events.append(DeathEvent(death_rates[i], state))
112
104
  events.append(SamplingEvent(sampling_rates[i], state, removal_probabilities[i]))
113
105
 
114
- def _parse_matrix_events(
115
- matrix: SkylineMatrixCoercible | None,
116
- ) -> Iterator[tuple[SkylineParameter, str, str]]:
117
- if matrix is not None:
118
- matrix = skyline_matrix(matrix, N, N - 1)
119
- assert states is not None
120
- for i, state in enumerate(states):
121
- for j, other_state in enumerate([s for s in states if s != state]):
122
- yield matrix[i, j], state, other_state
123
-
124
- events.extend(
125
- MigrationEvent(rate, state, other_state)
126
- for rate, state, other_state in _parse_matrix_events(migration_rates)
127
- )
128
- events.extend(
129
- BirthEvent(rate, state, other_state)
130
- for rate, state, other_state in _parse_matrix_events(birth_rates_among_states)
131
- )
106
+ if states is not None and migration_rates is not None:
107
+ migration_rates = skyline_matrix(migration_rates, N, N - 1)
108
+ for i, state in enumerate(states):
109
+ for j, other_state in enumerate([s for s in states if s != state]):
110
+ events.append(MigrationEvent(migration_rates[i, j], state, other_state))
111
+ elif migration_rates is not None:
112
+ raise ValueError(f"Migration rates require states to be provided.")
113
+
114
+ if states is not None and birth_rates_among_states is not None:
115
+ birth_rates_among_states = skyline_matrix(birth_rates_among_states, N, N - 1)
116
+ for i, state in enumerate(states):
117
+ for j, other_state in enumerate([s for s in states if s != state]):
118
+ events.append(
119
+ BirthEvent(birth_rates_among_states[i, j], state, other_state)
120
+ )
121
+ elif birth_rates_among_states is not None:
122
+ raise ValueError(f"Birth rates among states require states to be provided.")
132
123
 
133
124
  return [event for event in events if event.rate]
134
125
 
@@ -148,20 +139,20 @@ def get_epidemiological_events(
148
139
  become_uninfectious_rates = skyline_vector(become_uninfectious_rates, N)
149
140
  sampling_proportions = skyline_vector(sampling_proportions, N)
150
141
  removal_probabilities = skyline_vector(removal_probabilities, N)
151
- if N == 1 and reproduction_numbers_among_states is not None:
152
- raise ValueError(
153
- f"Reproduction numbers among states cannot be provided for a single state."
154
- )
155
142
 
156
143
  birth_rates = reproduction_numbers * become_uninfectious_rates
157
144
  sampling_rates = become_uninfectious_rates * sampling_proportions
158
- birth_rates_among_states = (
159
- None
160
- if reproduction_numbers_among_states is None
161
- else skyline_matrix(reproduction_numbers_among_states, N, N - 1)
162
- * become_uninfectious_rates
163
- )
164
145
  death_rates = become_uninfectious_rates - removal_probabilities * sampling_rates
146
+ birth_rates_among_states = None
147
+ if states is None and reproduction_numbers_among_states is not None:
148
+ raise ValueError(
149
+ f"Reproduction numbers among states require states to be provided."
150
+ )
151
+ elif reproduction_numbers_among_states is not None:
152
+ birth_rates_among_states = (
153
+ skyline_matrix(reproduction_numbers_among_states, N, N - 1)
154
+ * become_uninfectious_rates
155
+ )
165
156
 
166
157
  return get_canonical_events(
167
158
  states=states,
@@ -189,23 +180,20 @@ def get_FBD_events(
189
180
  turnover = skyline_vector(turnover, N)
190
181
  sampling_proportions = skyline_vector(sampling_proportions, N)
191
182
  removal_probabilities = skyline_vector(removal_probabilities, N)
192
- if N == 1 and diversification_between_types is not None:
193
- raise ValueError(
194
- f"Diversification rates among states cannot be provided for a single state."
195
- )
196
183
 
197
184
  birth_rates = diversification / (1 - turnover)
198
185
  death_rates = turnover * birth_rates
199
- sampling_rates = (
200
- sampling_proportions
201
- * death_rates
202
- / (1 - removal_probabilities * sampling_proportions)
203
- )
204
- birth_rates_among_states = (
205
- None
206
- if diversification_between_types is None
207
- else skyline_matrix(diversification_between_types, N, N - 1) + death_rates
208
- )
186
+ sampling_rates_dividend = 1 - removal_probabilities * sampling_proportions
187
+ sampling_rates = sampling_proportions * death_rates / sampling_rates_dividend
188
+ birth_rates_among_states = None
189
+ if states is None and diversification_between_types is not None:
190
+ raise ValueError(
191
+ f"Diversification rates among states require states to be provided."
192
+ )
193
+ elif diversification_between_types is not None:
194
+ birth_rates_among_states = (
195
+ skyline_matrix(diversification_between_types, N, N - 1) + death_rates
196
+ )
209
197
 
210
198
  return get_canonical_events(
211
199
  states=states,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "phylogenie"
3
- version = "2.0.2"
3
+ version = "2.0.4"
4
4
  description = "Generate phylogenetic datasets with minimal setup effort"
5
5
  authors = ["Gabriele Marino <gabmarino.8601@gmail.com>"]
6
6
  readme = "README.md"
File without changes
@@ -1,14 +0,0 @@
1
- from typing import Annotated
2
-
3
- from pydantic import Field
4
-
5
- from phylogenie.generators.alisim import AliSimDatasetGenerator
6
- from phylogenie.generators.dataset import DatasetGenerator
7
- from phylogenie.generators.trees import TreeDatasetGeneratorConfig
8
-
9
- DatasetGeneratorConfig = Annotated[
10
- TreeDatasetGeneratorConfig | AliSimDatasetGenerator,
11
- Field(discriminator="data_type"),
12
- ]
13
-
14
- __all__ = ["DatasetGeneratorConfig", "DatasetGenerator"]
File without changes
File without changes