phylogenie 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -151,6 +151,7 @@ def generate_trees(
151
151
  output_xml_file: str | None = None,
152
152
  n_simulations: int = 1,
153
153
  seed: int | None = None,
154
+ beast_path: str = "beast",
154
155
  ) -> None:
155
156
  if isinstance(populations, str):
156
157
  populations = [populations]
@@ -175,7 +176,7 @@ def generate_trees(
175
176
  n_simulations=n_simulations,
176
177
  )
177
178
 
178
- cmd = ["beast"]
179
+ cmd = [beast_path]
179
180
  if seed is not None:
180
181
  cmd.extend(["-seed", str(seed)])
181
182
  cmd.append(xml_file)
@@ -19,12 +19,12 @@ class DataType(str, Enum):
19
19
 
20
20
  class DatasetGenerator(ABC, StrictBaseModel):
21
21
  output_dir: str = "phylogenie-out"
22
+ data_dir: str = "data"
23
+ metadata_filename: str = "metadata.csv"
22
24
  n_samples: int | dict[str, int] = 1
23
- context: ContextConfig | None = None
24
25
  n_jobs: int = -1
25
26
  seed: int | None = None
26
- data_dir: str = "data"
27
- metadata_filename: str = "metadata.csv"
27
+ context: ContextConfig | None = None
28
28
 
29
29
  @abstractmethod
30
30
  def _generate_one(self, filename: str, rng: Generator, data: Data) -> None: ...
@@ -39,6 +39,7 @@ class ParameterizationType(str, Enum):
39
39
 
40
40
  class ReMASTERGenerator(TreesGenerator):
41
41
  backend: Literal[BackendType.REMASTER] = BackendType.REMASTER
42
+ beast_path: str = "beast"
42
43
  populations: str | list[str] = DEFAULT_POPULATION
43
44
  init_population: str = DEFAULT_POPULATION
44
45
  sample_population: str = SAMPLE_POPULATION
@@ -63,6 +64,7 @@ class ReMASTERGenerator(TreesGenerator):
63
64
  for k, v in self.trajectory_attrs.items()
64
65
  },
65
66
  seed=int(rng.integers(0, 2**31 - 1)),
67
+ beast_path=self.beast_path,
66
68
  )
67
69
 
68
70
  def _generate_one(self, filename: str, rng: Generator, data: Data) -> None:
@@ -35,12 +35,12 @@ class TreeSimulatorGenerator(TreesGenerator):
35
35
  min_tips: cfg.IntConfig
36
36
  max_tips: cfg.IntConfig
37
37
  T: cfg.ScalarConfig = np.inf
38
+ root_state: str | None = None
38
39
  state_frequencies: list[float] | None = None
39
40
  notification_probability: cfg.SkylineParameterLikeConfig = 0
40
41
  notification_sampling_rate: cfg.SkylineParameterLikeConfig = np.inf
41
42
  allow_irremovable_states: bool = False
42
43
  max_notified_contacts: cfg.IntConfig = 1
43
- root_state: str | None = None
44
44
 
45
45
  def _generate_one_from_params(
46
46
  self, filename: str, rng: Generator, data: Data, params: TreeParams
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GABRIELE MARINO
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.1
2
+ Name: phylogenie
3
+ Version: 1.0.1
4
+ Summary: Generate phylogenetic datasets with minimal setup effort
5
+ Author: gabriele-marino
6
+ Author-email: gabmarino.8601@gmail.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: joblib (>=1.4.2,<2.0.0)
13
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
14
+ Requires-Dist: pydantic (>=2.11.5,<3.0.0)
15
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
16
+ Requires-Dist: tqdm (>=4.66.4,<5.0.0)
17
+ Requires-Dist: treesimulator (>=0.2.15,<0.3.0)
18
+ Description-Content-Type: text/markdown
19
+
20
+ <p align="center">
21
+ <img src="https://raw.githubusercontent.com/gabriele-marino/phylogenie/main/logo.png" style="width:100%; height:auto;"/>
22
+ </p>
23
+
24
+ ---
25
+
26
+ [![TreeSimulator](https://img.shields.io/badge/Powered%20by-TreeSimulator-green?style=flat-square)](https://github.com/evolbioinfo/treesimulator)
27
+ [![Remaster](https://img.shields.io/badge/Powered%20by-Remaster-blue?style=flat-square)](https://tgvaughan.github.io/remaster/)
28
+ [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
29
+
30
+ Phylogenie is a [Python](https://www.python.org/) package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments (MSAs)—with minimal setup effort. Simply specify the distributions from which your parameters should be sampled, and Phylogenie will handle the rest!
31
+
32
+ ## ✨ Features
33
+
34
+ Phylogenie comes packed with useful features, including:
35
+
36
+ - **Simulate tree and multiple sequence alignment (MSA) datasets from parameter distributions** 🌳🧬
37
+ Define distributions over your parameters and sample a different combination of parameters for each dataset sample.
38
+
39
+ - **Automatic metadata management** 🗂️
40
+ Phylogenie stores each parameter combination sampled during dataset generation in a `.csv` file.
41
+
42
+ - **Generalizable configurations** 🔄
43
+ Easily apply the same configuration across multiple dataset splits (e.g., train, validation, test).
44
+
45
+ - **Multiprocessing support** ⚙️💻
46
+ Simply specify the number of cores to use, and Phylogenie handles multiprocessing automatically.
47
+
48
+ - **Pre-implemented parameterizations** 🎯
49
+ Include canonical, fossilized birth-death, epidemiological, birth-death with exposed-infectious (BDEI), contact-tracing (CT), and more.
50
+
51
+ - **Skyline parameter support** 🪜
52
+ Support for piece-wise constant parameters.
53
+
54
+ - **Arithmetic operations on parameters** 🧮
55
+ Perform flexible arithmetic operations between parameters directly within the config file.
56
+
57
+ - **Support for common phylogenetic simulation tools** 🛠️
58
+ Compatible backends include ReMASTER, TreeSimulator, and AliSim.
59
+
60
+ - **Modular and extendible architecture** 🧩
61
+ Easily add new simulation backends as needed.
62
+
63
+ ## 📦 Installation
64
+ Phylogenie requires [Python](https://www.python.org/) 3.10 to be installed on your system. There are several ways to install Python and managing different Python versions. One popular option is to use [pyenv](https://github.com/pyenv/pyenv).
65
+
66
+ Once you have Python set up, you can install Phylogenie directly from PyPI:
67
+
68
+ ```bash
69
+ pip install phylogenie
70
+ ```
71
+
72
+ Or install from source:
73
+ ```bash
74
+ git clone https://github.com/gabriele-marino/phylogenie.git
75
+ cd phylogenie
76
+ pip install .
77
+ ```
78
+
79
+ ## 🛠 Backend dependencies
80
+
81
+ Phylogenie works with the following simulation backends:
82
+
83
+ - **[TreeSimulator](https://github.com/evolbioinfo/treesimulator)**
84
+ A [Python](https://www.python.org/) package for simulating phylogenetic trees. It is automatically installed with Phylogenie, so you can use it right away.
85
+
86
+ - **[ReMASTER](https://tgvaughan.github.io/remaster/)**
87
+ A [BEAST2](https://www.beast2.org/) package designed for tree simulation. To use ReMASTER as a backend, you need to install it separately.
88
+
89
+ - **[AliSim](https://iqtree.github.io/doc/AliSim)**
90
+ A tool for simulating multiple sequence alignments (MSAs). It is distributed with [IQ-TREE](https://iqtree.github.io/) and also requires separate installation if you wish to use it as a backend.
91
+
92
+ ## 🚀 Quick Start
93
+
94
+ Once you have installed Phylogenie, check out the [examples](https://github.com/gabriele-marino/phylogenie/tree/main/examples) folder.
95
+ It includes a collection of thoroughly commented configuration files, organized as a step-by-step tutorial. These examples will help you understand how to use Phylogenie in practice and can be easily adapted to fit your own workflow.
96
+
97
+ For quick start, pick your favorite config file and run Phylogenie with:
98
+ ```bash
99
+ phylogenie examples/<config_file>.yaml
100
+ ```
101
+ This command will create the output dataset in the folder specified inside the configuration file, including data directories and metadata files for each dataset split defined in the config.
102
+
103
+ >❗ *Tip*: Can’t choose just one config file?
104
+ You can run them all at once by pointing Phylogenie to the folder! Just use: `phylogenie examples`. In this mode, Phylogenie will automatically find all `.yaml` files in the folder you specified and run for each of them!
105
+
106
+ ## 📖 Documentation
107
+
108
+ - The [examples](https://github.com/gabriele-marino/phylogenie/tree/main/examples) folder contains many ready-to-use, extensively commented configuration files that serve as a step-by-step tutorial to guide you through using Phylogenie. You can explore them to learn how it works or adapt them directly to your own workflows.
109
+ - A complete user guide and API reference are under development. In the meantime, feel free to [reach out](mailto:gabmarino.8601@email.com) if you have any questions about integrating Phylogenie into your workflows.
110
+
111
+ ## 📄 License
112
+
113
+ This project is licensed under [MIT License](https://raw.githubusercontent.com/gabriele-marino/phylogenie/main/LICENSE.txt).
114
+
115
+ ## 📫 Contact
116
+
117
+ For questions, bug reports, or feature requests, please, consider opening an [issue on GitHub](https://github.com/gabriele-marino/phylogenie/issues), or [contact me directly](mailto:gabmarino.8601@email.com).
118
+
119
+ If you need help with the configuration files, feel free to reach out — I am always very available and happy to assist!
120
+
@@ -1,7 +1,7 @@
1
1
  phylogenie/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  phylogenie/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  phylogenie/backend/remaster/__init__.py,sha256=g1oMKi6SX60Geq_e2AjBlf7-pDvLfrsT3gW6AORdbMo,509
4
- phylogenie/backend/remaster/generate.py,sha256=mMYsHkSjXEd4nEYqFf5msmTPeWZqOopsKtOGt139TpA,6146
4
+ phylogenie/backend/remaster/generate.py,sha256=S4eRtdFdnIwUqWTy6lcZTnMcy6SDXJwGPr7oCKczgjc,6180
5
5
  phylogenie/backend/remaster/reactions.py,sha256=UMXW-cEeWGOqTbf9CtHf9GMgfofT5apxZUiBpr8YVBU,5819
6
6
  phylogenie/backend/treesimulator.py,sha256=ReJ1KTSKVSmAS4vGn1-XKYKqXluYX6uFSNjCs3kGISg,4557
7
7
  phylogenie/configs.py,sha256=HtRUWZ-zNq1--zTBWL3QFXX27Ybw5x1qSWcmx7Sz8YA,125
@@ -11,7 +11,7 @@ phylogenie/core/context/__init__.py,sha256=ZiCweJgf1REKbhZTfHuzz1lIgVmio9bTYW3-s
11
11
  phylogenie/core/context/configs.py,sha256=zd-ADFzJbb6KPkol-tXxSdS8LUBeQYQq8fDzXot8WM0,730
12
12
  phylogenie/core/context/distributions.py,sha256=QF14tM2ibjE7f6WK3s4hTaz_sLQBTNVr2ZBNe2refeE,3059
13
13
  phylogenie/core/context/factories.py,sha256=2gmvG5abZmmVcCfWie0L3jnwZxgZ0TtV1XElSnZgDzo,1459
14
- phylogenie/core/dataset.py,sha256=ZiMkSAWmhJU6XXxuvm9n6vTCPdEChH1_oN75qrCUGI0,2431
14
+ phylogenie/core/dataset.py,sha256=vk9TfjVmT_eSXzu8dHSsiz6G3kBsVk8nOCPg8CCmBQA,2431
15
15
  phylogenie/core/factories.py,sha256=XEijHIGCxikUhq_IGTq79MppvGXJVX6nIkf3a36ifa4,5972
16
16
  phylogenie/core/msas/__init__.py,sha256=-2XjTmiTA6zAwiLs2ksKecCrSbNLheo7KKjDyvuLipg,207
17
17
  phylogenie/core/msas/alisim.py,sha256=iF0Urq1wc83oEscZ35drAlBquinWi13MXXfFg2OqUZc,1051
@@ -21,8 +21,8 @@ phylogenie/core/trees/base.py,sha256=sNBCJRtWGYaMog4WoyAkrK4F2SXrgjXrxjuVQ6Ae5Js
21
21
  phylogenie/core/trees/remaster/__init__.py,sha256=FfgXYjkeosb22Anbp78re2NssWtNcNNaj7hFQZx8JLE,116
22
22
  phylogenie/core/trees/remaster/configs.py,sha256=Bp1-Oj3Vac1_S6VdofGxHjp_0FAACrIBDo8w0NbTS2Q,377
23
23
  phylogenie/core/trees/remaster/factories.py,sha256=F7BAWj2Y-2bmQfPGW73lyvQxUgHjRpx1eEzKZLIZ4hk,863
24
- phylogenie/core/trees/remaster/generator.py,sha256=o_eZ-7bLLs6yc2t1NNk8_ib7CPRLoYRUAN6rljr86Co,7036
25
- phylogenie/core/trees/treesimulator.py,sha256=Z8OwmFU1RgwW3omprd982Qb7n1bW0zLaPpAFzagWzhQ,5855
24
+ phylogenie/core/trees/remaster/generator.py,sha256=y6DaUmotAjUi08uuBVEIUkKXgxVEBPrTx7gnjpPHT4s,7106
25
+ phylogenie/core/trees/treesimulator.py,sha256=8L2KxwZEw_vO6mcZaUM2DoY4IavkmFoYwcjcDo8f0m0,5855
26
26
  phylogenie/core/typeguards.py,sha256=nxgN8NjiasKR2AJ3USELp4uUHEyNjJmOstYqqLpRtDg,1133
27
27
  phylogenie/core/typings.py,sha256=4b50GphFOT8fEZ9qsnDYhx7uqDUxA5yly83BiSlpCgI,171
28
28
  phylogenie/main.py,sha256=n_joau3dWJIq0ZMHe4a_1_2GigTFagkfzUFuQEMlyRI,1158
@@ -34,7 +34,8 @@ phylogenie/skyline/vector.py,sha256=U66oRwx3FN5HSiIAy-9pAKz-ft62JqTKUtp6Sb5zyEg,
34
34
  phylogenie/typeguards.py,sha256=3EwPVxQfJ8IOp33-tkbCwLFCmLh7OyKcyLyhUkJuwoU,1230
35
35
  phylogenie/typings.py,sha256=aYAbZlvtbijxz9nYwRmhE8kbuYGgZAa95pvrUGEWz2Q,562
36
36
  phylogenie/utils.py,sha256=Omj4NOlnA0iZiyAzeWtsUq7G5sv-0cqxPJTiH2phcmk,615
37
- phylogenie-1.0.0.dist-info/METADATA,sha256=3FhNhpLDJ4Qh5voXg0Xd1dORZ0I08YNVBjwVKe15sq0,1369
38
- phylogenie-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
39
- phylogenie-1.0.0.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
40
- phylogenie-1.0.0.dist-info/RECORD,,
37
+ phylogenie-1.0.1.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
38
+ phylogenie-1.0.1.dist-info/METADATA,sha256=0eg-dGzGxfdWTXdhbLrytoGnX5FExhq-ah60FO59EeQ,6251
39
+ phylogenie-1.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
40
+ phylogenie-1.0.1.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
41
+ phylogenie-1.0.1.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: phylogenie
3
- Version: 1.0.0
4
- Summary: Generate phylogenetic datasets with minimal set up overhead
5
- Author: gabriele-marino
6
- Author-email: gabmarino.8601@gmail.com
7
- Requires-Python: >=3.10,<4.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.10
10
- Classifier: Programming Language :: Python :: 3.11
11
- Classifier: Programming Language :: Python :: 3.12
12
- Requires-Dist: joblib (>=1.4.2,<2.0.0)
13
- Requires-Dist: pandas (>=2.2.2,<3.0.0)
14
- Requires-Dist: pydantic (>=2.11.5,<3.0.0)
15
- Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
16
- Requires-Dist: tqdm (>=4.66.4,<5.0.0)
17
- Requires-Dist: treesimulator (>=0.2.15,<0.3.0)
18
- Description-Content-Type: text/markdown
19
-
20
- <p align="center">
21
- <img src="logo.png" alt="Project Logo" style="width:100%; height:auto;" />
22
- </p>
23
-
24
- ---
25
-
26
- [![TreeSimulator](https://img.shields.io/badge/Powered%20by-TreeSimulator-green?style=flat-square)](https://github.com/evolbioinfo/treesimulator)
27
- [![Remaster](https://img.shields.io/badge/Powered%20by-Remaster-blue?style=flat-square)](https://tgvaughan.github.io/remaster/)
28
- [![AliSim](https://img.shields.io/badge/Powered%20by-AliSim-orange?style=flat-square)](https://iqtree.github.io/doc/AliSim)
29
-
30
- **Phylogenie** is a Python package designed to easily simulate phylogenetic datasets—such as trees and multiple sequence alignments—with minimal setup effort.
31
-