phylogenie 1.0.8__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. phylogenie/generators/__init__.py +14 -0
  2. phylogenie/generators/alisim.py +71 -0
  3. phylogenie/generators/configs.py +41 -0
  4. phylogenie/{core → generators}/dataset.py +25 -23
  5. phylogenie/{core → generators}/factories.py +42 -52
  6. phylogenie/generators/trees.py +220 -0
  7. phylogenie/generators/typeguards.py +32 -0
  8. phylogenie/io.py +92 -0
  9. phylogenie/main.py +2 -2
  10. phylogenie/msa.py +72 -0
  11. phylogenie/skyline/matrix.py +62 -45
  12. phylogenie/skyline/vector.py +8 -6
  13. phylogenie/tree.py +53 -0
  14. phylogenie/treesimulator/__init__.py +21 -0
  15. phylogenie/treesimulator/events.py +256 -0
  16. phylogenie/treesimulator/gillespie.py +66 -0
  17. phylogenie/treesimulator/model.py +100 -0
  18. phylogenie/typings.py +0 -2
  19. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/METADATA +6 -18
  20. phylogenie-2.0.0.dist-info/RECORD +28 -0
  21. phylogenie/backend/__init__.py +0 -0
  22. phylogenie/backend/remaster/__init__.py +0 -21
  23. phylogenie/backend/remaster/generate.py +0 -187
  24. phylogenie/backend/remaster/reactions.py +0 -165
  25. phylogenie/backend/treesimulator.py +0 -163
  26. phylogenie/configs.py +0 -5
  27. phylogenie/core/__init__.py +0 -14
  28. phylogenie/core/configs.py +0 -37
  29. phylogenie/core/context/__init__.py +0 -4
  30. phylogenie/core/context/configs.py +0 -28
  31. phylogenie/core/context/distributions.py +0 -125
  32. phylogenie/core/context/factories.py +0 -54
  33. phylogenie/core/msas/__init__.py +0 -10
  34. phylogenie/core/msas/alisim.py +0 -35
  35. phylogenie/core/msas/base.py +0 -51
  36. phylogenie/core/trees/__init__.py +0 -11
  37. phylogenie/core/trees/base.py +0 -13
  38. phylogenie/core/trees/remaster/__init__.py +0 -3
  39. phylogenie/core/trees/remaster/configs.py +0 -14
  40. phylogenie/core/trees/remaster/factories.py +0 -26
  41. phylogenie/core/trees/remaster/generator.py +0 -177
  42. phylogenie/core/trees/treesimulator.py +0 -199
  43. phylogenie/core/typeguards.py +0 -32
  44. phylogenie-1.0.8.dist-info/RECORD +0 -39
  45. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/LICENSE.txt +0 -0
  46. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/WHEEL +0 -0
  47. {phylogenie-1.0.8.dist-info → phylogenie-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,125 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from enum import Enum
3
- from typing import Annotated, Generic, Literal, TypeVar
4
-
5
- from numpy.random import Generator, default_rng
6
- from pydantic import Field
7
-
8
- import phylogenie.typings as pgt
9
- from phylogenie.configs import StrictBaseModel
10
-
11
- _T = TypeVar("_T")
12
-
13
-
14
- class Type(str, Enum):
15
- UNIFORM = "uniform"
16
- NORMAL = "normal"
17
- LOGNORMAL = "lognormal"
18
- WEIBULL = "weibull"
19
- EXPONENTIAL = "exponential"
20
- GAMMA = "gamma"
21
- BETA = "beta"
22
- INT_UNIFORM = "int-uniform"
23
- CATEGORICAL = "categorical"
24
-
25
-
26
- class Distribution(StrictBaseModel, ABC, Generic[_T]):
27
- @abstractmethod
28
- def _sample(self, rng: Generator) -> _T: ...
29
-
30
- def sample(self, rng: int | Generator | None = None) -> _T:
31
- if not isinstance(rng, Generator):
32
- rng = default_rng(rng)
33
- return self._sample(rng)
34
-
35
-
36
- class Scalar(Distribution[pgt.Scalar]): ...
37
-
38
-
39
- class Uniform(Scalar):
40
- type: Literal[Type.UNIFORM] = Type.UNIFORM
41
- low: float
42
- high: float
43
-
44
- def _sample(self, rng: Generator) -> float:
45
- return rng.uniform(self.low, self.high)
46
-
47
-
48
- class Normal(Scalar):
49
- type: Literal[Type.NORMAL] = Type.NORMAL
50
- mean: float
51
- std: float
52
-
53
- def _sample(self, rng: Generator) -> float:
54
- return rng.normal(self.mean, self.std)
55
-
56
-
57
- class LogNormal(Scalar):
58
- type: Literal[Type.LOGNORMAL] = Type.LOGNORMAL
59
- mean: float
60
- std: float
61
-
62
- def _sample(self, rng: Generator) -> float:
63
- return rng.lognormal(self.mean, self.std)
64
-
65
-
66
- class Weibull(Scalar):
67
- type: Literal[Type.WEIBULL] = Type.WEIBULL
68
- scale: float
69
- shape: float
70
-
71
- def _sample(self, rng: Generator) -> float:
72
- return rng.weibull(self.shape) * self.scale
73
-
74
-
75
- class Exponential(Scalar):
76
- type: Literal[Type.EXPONENTIAL] = Type.EXPONENTIAL
77
- scale: float
78
-
79
- def _sample(self, rng: Generator) -> float:
80
- return rng.exponential(self.scale)
81
-
82
-
83
- class Gamma(Scalar):
84
- type: Literal[Type.GAMMA] = Type.GAMMA
85
- scale: float
86
- shape: float
87
-
88
- def _sample(self, rng: Generator) -> float:
89
- return rng.gamma(self.shape, self.scale)
90
-
91
-
92
- class Beta(Scalar):
93
- type: Literal[Type.BETA] = Type.BETA
94
- alpha: float
95
- beta: float
96
-
97
- def _sample(self, rng: Generator) -> float:
98
- return rng.beta(self.alpha, self.beta)
99
-
100
-
101
- class IntUniform(Scalar):
102
- type: Literal[Type.INT_UNIFORM] = Type.INT_UNIFORM
103
- low: int
104
- high: int
105
-
106
- def _sample(self, rng: Generator) -> int:
107
- return int(rng.integers(self.low, self.high))
108
-
109
-
110
- class Categorical(Distribution[str]):
111
- type: Literal[Type.CATEGORICAL] = Type.CATEGORICAL
112
- categories: list[str]
113
- probabilities: list[float]
114
-
115
- def _sample(self, rng: Generator) -> str:
116
- return str(rng.choice(self.categories, p=self.probabilities))
117
-
118
-
119
- ScalarDistributionConfig = Annotated[
120
- Uniform | Normal | LogNormal | Weibull | Exponential | Gamma | Beta | IntUniform,
121
- Field(discriminator="type"),
122
- ]
123
- DistributionConfig = Annotated[
124
- ScalarDistributionConfig | Categorical, Field(discriminator="type")
125
- ]
@@ -1,54 +0,0 @@
1
- from numpy.random import Generator
2
-
3
- import phylogenie.core.context.configs as cfg
4
- import phylogenie.typings as pgt
5
- from phylogenie.core.context import distributions
6
-
7
-
8
- def _sample_vector1D(x: distributions.Scalar, N: int, rng: Generator) -> pgt.Vector1D:
9
- return [x.sample(rng) for _ in range(N)]
10
-
11
-
12
- def _sample_vector2D(
13
- x: distributions.Scalar,
14
- size: tuple[int, int],
15
- zero_diagonal: bool,
16
- rng: Generator,
17
- ) -> pgt.Vector2D:
18
- n_rows, n_cols = size
19
- v = [_sample_vector1D(x, n_cols, rng) for _ in range(n_rows)]
20
- if zero_diagonal:
21
- if n_rows != n_cols:
22
- raise ValueError(
23
- f"It is impossible to initialize a non-square matrix with zero the diagonal (got x={x}, size={size} and zero_diagonal=True)"
24
- )
25
- for i in range(n_rows):
26
- v[i][i] = 0
27
- return v
28
-
29
-
30
- def _sample_vector3D(
31
- x: distributions.Scalar,
32
- size: tuple[int, int, int],
33
- zero_diagonal: bool,
34
- rng: Generator,
35
- ) -> pgt.Vector3D:
36
- n_matrices, n_rows, n_cols = size
37
- return [
38
- _sample_vector2D(x, (n_rows, n_cols), zero_diagonal, rng)
39
- for _ in range(n_matrices)
40
- ]
41
-
42
-
43
- def context_factory(x: cfg.ContextConfig, rng: Generator) -> pgt.Data:
44
- data: pgt.Data = {}
45
- for key, value in x.items():
46
- if isinstance(value, distributions.Distribution):
47
- data[key] = value.sample(rng)
48
- elif isinstance(value, cfg.Vector1DModel):
49
- data[key] = _sample_vector1D(value.x, value.size, rng)
50
- elif isinstance(value, cfg.Vector2DModel):
51
- data[key] = _sample_vector2D(value.x, value.size, value.zero_diagonal, rng)
52
- else:
53
- data[key] = _sample_vector3D(value.x, value.size, value.zero_diagonal, rng)
54
- return data
@@ -1,10 +0,0 @@
1
- from typing import Annotated
2
-
3
- from pydantic import Field
4
-
5
- from phylogenie.core.msas.alisim import AliSimGenerator
6
-
7
- MSAsGeneratorConfig = Annotated[
8
- AliSimGenerator,
9
- Field(discriminator="backend"),
10
- ]
@@ -1,35 +0,0 @@
1
- import subprocess
2
- from typing import Literal
3
-
4
- from numpy.random import Generator
5
-
6
- import phylogenie.typings as pgt
7
- from phylogenie.core.msas.base import BackendType, MSAsGenerator
8
-
9
-
10
- class AliSimGenerator(MSAsGenerator):
11
- backend: Literal[BackendType.ALISIM] = BackendType.ALISIM
12
- iqtree_path: str = "iqtree2"
13
- args: dict[str, str | int | float]
14
-
15
- def _generate_one_from_tree(
16
- self, filename: str, tree_file: str, rng: Generator, data: pgt.Data
17
- ) -> None:
18
- command = [
19
- self.iqtree_path,
20
- "--alisim",
21
- filename,
22
- "--tree",
23
- tree_file,
24
- "--seed",
25
- str(rng.integers(0, 2**32 - 1)),
26
- ]
27
-
28
- for key, value in self.args.items():
29
- command.extend(
30
- [key, value.format(**data) if isinstance(value, str) else str(value)]
31
- )
32
-
33
- command.extend(["-af", "fasta"])
34
- subprocess.run(command, check=True, stdout=subprocess.DEVNULL)
35
- subprocess.run(["rm", f"{tree_file}.log"], check=True)
@@ -1,51 +0,0 @@
1
- import os
2
- from abc import abstractmethod
3
- from enum import Enum
4
- from pathlib import Path
5
- from typing import Literal
6
-
7
- from numpy.random import Generator
8
-
9
- import phylogenie.typings as pgt
10
- from phylogenie.core.dataset import DatasetGenerator, DataType
11
- from phylogenie.core.trees import TreesGeneratorConfig
12
-
13
-
14
- class BackendType(str, Enum):
15
- ALISIM = "alisim"
16
-
17
-
18
- MSAS_DIRNAME = "MSAs"
19
- TREES_DIRNAME = "trees"
20
-
21
-
22
- class MSAsGenerator(DatasetGenerator):
23
- data_type: Literal[DataType.MSAS] = DataType.MSAS
24
- trees: TreesGeneratorConfig
25
- keep_trees: bool = False
26
-
27
- @abstractmethod
28
- def _generate_one_from_tree(
29
- self, filename: str, tree_file: str, rng: Generator, data: pgt.Data
30
- ) -> None: ...
31
-
32
- def _generate_one(self, filename: str, rng: Generator, data: pgt.Data) -> None:
33
- if self.keep_trees:
34
- base_dir = Path(filename).parent
35
- file_id = Path(filename).stem
36
- tree_filename = os.path.join(base_dir, TREES_DIRNAME, file_id)
37
- msas_dir = os.path.join(base_dir, MSAS_DIRNAME)
38
- os.makedirs(msas_dir, exist_ok=True)
39
- msa_filename = os.path.join(msas_dir, file_id)
40
- else:
41
- tree_filename = f"{filename}.temp-tree"
42
- msa_filename = filename
43
-
44
- self.trees.generate_one(
45
- filename=tree_filename, data=data, seed=int(rng.integers(0, 2**32 - 1))
46
- )
47
- self._generate_one_from_tree(
48
- filename=msa_filename, tree_file=f"{tree_filename}.nwk", rng=rng, data=data
49
- )
50
- if not self.keep_trees:
51
- os.remove(f"{tree_filename}.nwk")
@@ -1,11 +0,0 @@
1
- from typing import Annotated
2
-
3
- from pydantic import Field
4
-
5
- from phylogenie.core.trees.remaster import ReMASTERGeneratorConfig
6
- from phylogenie.core.trees.treesimulator import TreeSimulatorGeneratorConfig
7
-
8
- TreesGeneratorConfig = Annotated[
9
- ReMASTERGeneratorConfig | TreeSimulatorGeneratorConfig,
10
- Field(discriminator="backend"),
11
- ]
@@ -1,13 +0,0 @@
1
- from enum import Enum
2
- from typing import Literal
3
-
4
- from phylogenie.core.dataset import DatasetGenerator, DataType
5
-
6
-
7
- class BackendType(str, Enum):
8
- REMASTER = "remaster"
9
- TREESIMULATOR = "treesimulator"
10
-
11
-
12
- class TreesGenerator(DatasetGenerator):
13
- data_type: Literal[DataType.TREES] = DataType.TREES
@@ -1,3 +0,0 @@
1
- from phylogenie.core.trees.remaster.generator import ReMASTERGeneratorConfig
2
-
3
- __all__ = ["ReMASTERGeneratorConfig"]
@@ -1,14 +0,0 @@
1
- import phylogenie.core.configs as cfg
2
- from phylogenie.configs import StrictBaseModel
3
-
4
-
5
- class ReactionConfig(StrictBaseModel):
6
- rate: cfg.SkylineParameterLikeConfig
7
- value: str
8
-
9
-
10
- class PunctualReactionConfig(StrictBaseModel):
11
- times: cfg.ManyScalarsConfig
12
- value: str
13
- p: cfg.ManyScalarsConfig | None = None
14
- n: cfg.ManyIntsConfig | None = None
@@ -1,26 +0,0 @@
1
- import phylogenie.core.trees.remaster.configs as cfg
2
- import phylogenie.typings as pgt
3
- from phylogenie.backend.remaster import PunctualReaction, Reaction
4
- from phylogenie.core.factories import (
5
- many_ints_factory,
6
- many_scalars_factory,
7
- skyline_parameter_like_factory,
8
- )
9
-
10
-
11
- def reaction_factory(x: cfg.ReactionConfig, data: pgt.Data) -> Reaction:
12
- return Reaction(
13
- rate=skyline_parameter_like_factory(x.rate, data),
14
- value=x.value,
15
- )
16
-
17
-
18
- def punctual_reaction_factory(
19
- x: cfg.PunctualReactionConfig, data: pgt.Data
20
- ) -> PunctualReaction:
21
- return PunctualReaction(
22
- times=many_scalars_factory(x.times, data),
23
- value=x.value,
24
- p=None if x.p is None else many_scalars_factory(x.p, data),
25
- n=None if x.n is None else many_ints_factory(x.n, data),
26
- )
@@ -1,177 +0,0 @@
1
- from collections.abc import Iterable
2
- from enum import Enum
3
- from typing import Annotated, Literal
4
-
5
- from numpy.random import Generator
6
- from pydantic import Field
7
-
8
- import phylogenie.core.configs as cfg
9
- import phylogenie.typings as pgt
10
- from phylogenie.backend.remaster import (
11
- DEFAULT_POPULATION,
12
- SAMPLE_POPULATION,
13
- Reaction,
14
- generate_trees,
15
- get_canonical_reactions,
16
- get_epidemiological_reactions,
17
- get_FBD_reactions,
18
- )
19
- from phylogenie.core.factories import (
20
- skyline_matrix_coercible_factory,
21
- skyline_vector_coercible_factory,
22
- )
23
- from phylogenie.core.trees.base import BackendType, TreesGenerator
24
- from phylogenie.core.trees.remaster.configs import (
25
- PunctualReactionConfig,
26
- ReactionConfig,
27
- )
28
- from phylogenie.core.trees.remaster.factories import (
29
- punctual_reaction_factory,
30
- reaction_factory,
31
- )
32
-
33
-
34
- class ParameterizationType(str, Enum):
35
- CANONICAL = "canonical"
36
- EPIDEMIOLOGICAL = "epidemiological"
37
- FBD = "fbd"
38
-
39
-
40
- class ReMASTERGenerator(TreesGenerator):
41
- backend: Literal[BackendType.REMASTER] = BackendType.REMASTER
42
- beast_path: str = "beast"
43
- populations: str | list[str] = DEFAULT_POPULATION
44
- init_population: str = DEFAULT_POPULATION
45
- sample_population: str = SAMPLE_POPULATION
46
- reactions: Iterable[ReactionConfig] = Field(default_factory=tuple)
47
- punctual_reactions: Iterable[PunctualReactionConfig] = Field(default_factory=tuple)
48
- trajectory_attrs: dict[str, str | int | float] = Field(default_factory=dict)
49
-
50
- def _generate_one_from_extra_reactions(
51
- self, filename: str, rng: Generator, data: pgt.Data, reactions: list[Reaction]
52
- ) -> None:
53
- generate_trees(
54
- tree_filename=f"{filename}.nwk",
55
- populations=self.populations,
56
- init_population=self.init_population.format(**data),
57
- sample_population=self.sample_population,
58
- reactions=[reaction_factory(r, data) for r in self.reactions] + reactions,
59
- punctual_reactions=[
60
- punctual_reaction_factory(r, data) for r in self.punctual_reactions
61
- ],
62
- trajectory_attrs={
63
- k: v.format(**data) if isinstance(v, str) else str(v)
64
- for k, v in self.trajectory_attrs.items()
65
- },
66
- seed=int(rng.integers(0, 2**31 - 1)),
67
- beast_path=self.beast_path,
68
- )
69
-
70
-
71
- class CanonicalReMASTERGenerator(ReMASTERGenerator):
72
- parameterization: Literal[ParameterizationType.CANONICAL] = (
73
- ParameterizationType.CANONICAL
74
- )
75
- birth_rates: cfg.SkylineVectorCoercibleConfig = 0
76
- death_rates: cfg.SkylineVectorCoercibleConfig = 0
77
- sampling_rates: cfg.SkylineVectorCoercibleConfig = 0
78
- removal_probabilities: cfg.SkylineVectorCoercibleConfig = 0
79
- migration_rates: cfg.SkylineMatrixCoercibleConfig = 0
80
- birth_rates_among_demes: cfg.SkylineMatrixCoercibleConfig = 0
81
-
82
- def _generate_one(self, filename: str, rng: Generator, data: pgt.Data) -> None:
83
- reactions = get_canonical_reactions(
84
- populations=self.populations,
85
- sample_population=self.sample_population,
86
- birth_rates=skyline_vector_coercible_factory(self.birth_rates, data),
87
- death_rates=skyline_vector_coercible_factory(self.death_rates, data),
88
- sampling_rates=skyline_vector_coercible_factory(self.sampling_rates, data),
89
- removal_probabilities=skyline_vector_coercible_factory(
90
- self.removal_probabilities, data
91
- ),
92
- migration_rates=skyline_matrix_coercible_factory(
93
- self.migration_rates, data
94
- ),
95
- birth_rates_among_demes=skyline_matrix_coercible_factory(
96
- self.birth_rates_among_demes, data
97
- ),
98
- )
99
- self._generate_one_from_extra_reactions(filename, rng, data, reactions)
100
-
101
-
102
- class EpidemiologicalReMASTERGenerator(ReMASTERGenerator):
103
- parameterization: Literal[ParameterizationType.EPIDEMIOLOGICAL] = (
104
- ParameterizationType.EPIDEMIOLOGICAL
105
- )
106
- reproduction_numbers: cfg.SkylineVectorCoercibleConfig = 0
107
- become_uninfectious_rates: cfg.SkylineVectorCoercibleConfig = 0
108
- sampling_proportions: cfg.SkylineVectorCoercibleConfig = 0
109
- removal_probabilities: cfg.SkylineVectorCoercibleConfig = 0
110
- migration_rates: cfg.SkylineMatrixCoercibleConfig = 0
111
- reproduction_numbers_among_demes: cfg.SkylineMatrixCoercibleConfig = 0
112
-
113
- def _generate_one(self, filename: str, rng: Generator, data: pgt.Data) -> None:
114
- reactions = get_epidemiological_reactions(
115
- populations=self.populations,
116
- sample_population=self.sample_population,
117
- reproduction_numbers=skyline_vector_coercible_factory(
118
- self.reproduction_numbers, data
119
- ),
120
- become_uninfectious_rates=skyline_vector_coercible_factory(
121
- self.become_uninfectious_rates, data
122
- ),
123
- sampling_proportions=skyline_vector_coercible_factory(
124
- self.sampling_proportions, data
125
- ),
126
- removal_probabilities=skyline_vector_coercible_factory(
127
- self.removal_probabilities, data
128
- ),
129
- migration_rates=skyline_matrix_coercible_factory(
130
- self.migration_rates, data
131
- ),
132
- reproduction_numbers_among_demes=skyline_matrix_coercible_factory(
133
- self.reproduction_numbers_among_demes, data
134
- ),
135
- )
136
- self._generate_one_from_extra_reactions(filename, rng, data, reactions)
137
-
138
-
139
- class FBDReMASTERGenerator(ReMASTERGenerator):
140
- parameterization: Literal[ParameterizationType.FBD] = ParameterizationType.FBD
141
- diversification: cfg.SkylineVectorCoercibleConfig = 0
142
- turnover: cfg.SkylineVectorCoercibleConfig = 0
143
- sampling_proportions: cfg.SkylineVectorCoercibleConfig = 0
144
- removal_probabilities: cfg.SkylineVectorCoercibleConfig = 0
145
- migration_rates: cfg.SkylineMatrixCoercibleConfig = 0
146
- diversification_between_types: cfg.SkylineMatrixCoercibleConfig = 0
147
-
148
- def _generate_one(self, filename: str, rng: Generator, data: pgt.Data) -> None:
149
- reactions = get_FBD_reactions(
150
- populations=self.populations,
151
- sample_population=self.sample_population,
152
- diversification=skyline_vector_coercible_factory(
153
- self.diversification, data
154
- ),
155
- turnover=skyline_vector_coercible_factory(self.turnover, data),
156
- sampling_proportions=skyline_vector_coercible_factory(
157
- self.sampling_proportions, data
158
- ),
159
- removal_probabilities=skyline_vector_coercible_factory(
160
- self.removal_probabilities, data
161
- ),
162
- migration_rates=skyline_matrix_coercible_factory(
163
- self.migration_rates, data
164
- ),
165
- diversification_between_types=skyline_matrix_coercible_factory(
166
- self.diversification_between_types, data
167
- ),
168
- )
169
- self._generate_one_from_extra_reactions(filename, rng, data, reactions)
170
-
171
-
172
- ReMASTERGeneratorConfig = Annotated[
173
- CanonicalReMASTERGenerator
174
- | EpidemiologicalReMASTERGenerator
175
- | FBDReMASTERGenerator,
176
- Field(discriminator="parameterization"),
177
- ]