phylogenie 2.1.23__tar.gz → 2.1.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of phylogenie might be problematic. Click here for more details.
- {phylogenie-2.1.23 → phylogenie-2.1.24}/PKG-INFO +1 -1
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/__init__.py +2 -1
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/draw.py +2 -1
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/dataset.py +1 -1
- phylogenie-2.1.24/phylogenie/io/__init__.py +5 -0
- phylogenie-2.1.24/phylogenie/io/fasta.py +25 -0
- phylogenie-2.1.23/phylogenie/io.py → phylogenie-2.1.24/phylogenie/io/newick.py +12 -33
- phylogenie-2.1.24/phylogenie/io/nexus.py +43 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/pyproject.toml +1 -1
- {phylogenie-2.1.23 → phylogenie-2.1.24}/LICENSE.txt +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/README.md +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/__init__.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/alisim.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/configs.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/factories.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/trees.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/generators/typeguards.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/main.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/models.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/msa.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/py.typed +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/skyline/__init__.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/skyline/matrix.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/skyline/parameter.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/skyline/vector.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/tree.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/__init__.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/events/__init__.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/events/contact_tracing.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/events/core.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/events/mutations.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/features.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/gillespie.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/treesimulator/model.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/typeguards.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/typings.py +0 -0
- {phylogenie-2.1.23 → phylogenie-2.1.24}/phylogenie/utils.py +0 -0
|
@@ -11,7 +11,7 @@ from phylogenie.generators import (
|
|
|
11
11
|
FBDTreeDatasetGenerator,
|
|
12
12
|
TreeDatasetGeneratorConfig,
|
|
13
13
|
)
|
|
14
|
-
from phylogenie.io import dump_newick, load_fasta, load_newick
|
|
14
|
+
from phylogenie.io import dump_newick, load_fasta, load_newick, load_nexus
|
|
15
15
|
from phylogenie.msa import MSA
|
|
16
16
|
from phylogenie.skyline import (
|
|
17
17
|
SkylineMatrix,
|
|
@@ -100,6 +100,7 @@ __all__ = [
|
|
|
100
100
|
"generate_trees",
|
|
101
101
|
"simulate_tree",
|
|
102
102
|
"dump_newick",
|
|
103
|
+
"load_nexus",
|
|
103
104
|
"load_fasta",
|
|
104
105
|
"load_newick",
|
|
105
106
|
"MSA",
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from itertools import islice
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
import matplotlib.colors as mcolors
|
|
@@ -25,7 +26,7 @@ def _draw_colored_tree(tree: Tree, ax: Axes, colors: Color | dict[Tree, Color])
|
|
|
25
26
|
|
|
26
27
|
xs = (
|
|
27
28
|
get_node_depth_levels(tree)
|
|
28
|
-
if any(node.branch_length is None for node in tree)
|
|
29
|
+
if any(node.branch_length is None for node in islice(tree, 1, None))
|
|
29
30
|
else get_node_depths(tree)
|
|
30
31
|
)
|
|
31
32
|
ys: dict[Tree, float] = {node: i for i, node in enumerate(tree.get_leaves())}
|
|
@@ -56,7 +56,7 @@ class DatasetGenerator(ABC, StrictBaseModel):
|
|
|
56
56
|
for i in range(n_samples)
|
|
57
57
|
)
|
|
58
58
|
df = pd.DataFrame(
|
|
59
|
-
[
|
|
59
|
+
[j for j in tqdm(jobs, f"Generating {data_dir}...", n_samples)]
|
|
60
60
|
)
|
|
61
61
|
df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
|
|
62
62
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
from phylogenie.msa import MSA, Sequence
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_fasta(
|
|
7
|
+
fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
|
|
8
|
+
) -> MSA:
|
|
9
|
+
sequences: list[Sequence] = []
|
|
10
|
+
with open(fasta_file, "r") as f:
|
|
11
|
+
for line in f:
|
|
12
|
+
if not line.startswith(">"):
|
|
13
|
+
raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
|
|
14
|
+
id = line[1:].strip()
|
|
15
|
+
time = None
|
|
16
|
+
if extract_time_from_id is not None:
|
|
17
|
+
time = extract_time_from_id(id)
|
|
18
|
+
elif "|" in id:
|
|
19
|
+
try:
|
|
20
|
+
time = float(id.split("|")[-1])
|
|
21
|
+
except ValueError:
|
|
22
|
+
pass
|
|
23
|
+
chars = next(f).strip()
|
|
24
|
+
sequences.append(Sequence(id, chars, time))
|
|
25
|
+
return MSA(sequences)
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Callable
|
|
3
2
|
|
|
4
|
-
from phylogenie.msa import MSA, Sequence
|
|
5
3
|
from phylogenie.tree import Tree
|
|
6
4
|
|
|
7
5
|
|
|
8
|
-
def
|
|
6
|
+
def parse_newick(newick: str, translations: dict[str, str] | None = None) -> Tree:
|
|
9
7
|
newick = newick.strip()
|
|
8
|
+
newick = re.sub(r"^\[\&[^\]]*\]", "", newick).strip()
|
|
9
|
+
|
|
10
10
|
stack: list[list[Tree]] = []
|
|
11
11
|
current_children: list[Tree] = []
|
|
12
12
|
current_nodes: list[Tree] = []
|
|
13
13
|
i = 0
|
|
14
|
-
while
|
|
14
|
+
while True:
|
|
15
15
|
|
|
16
16
|
def _read_chars(stoppers: list[str]) -> str:
|
|
17
17
|
nonlocal i
|
|
@@ -29,7 +29,10 @@ def _parse_newick(newick: str) -> Tree:
|
|
|
29
29
|
i += 1
|
|
30
30
|
continue
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
name = _read_chars([":", "[", ",", ")", ";"])
|
|
33
|
+
if translations is not None and name in translations:
|
|
34
|
+
name = translations[name]
|
|
35
|
+
current_node = Tree(name)
|
|
33
36
|
|
|
34
37
|
if newick[i] == "[":
|
|
35
38
|
i += 1
|
|
@@ -62,17 +65,15 @@ def _parse_newick(newick: str) -> Tree:
|
|
|
62
65
|
|
|
63
66
|
i += 1
|
|
64
67
|
|
|
65
|
-
raise ValueError("Newick string should end with ';'")
|
|
66
|
-
|
|
67
68
|
|
|
68
69
|
def load_newick(filepath: str) -> Tree | list[Tree]:
|
|
69
70
|
with open(filepath, "r") as file:
|
|
70
|
-
trees = [
|
|
71
|
+
trees = [parse_newick(newick) for newick in file]
|
|
71
72
|
return trees[0] if len(trees) == 1 else trees
|
|
72
73
|
|
|
73
74
|
|
|
74
|
-
def
|
|
75
|
-
children_newick = ",".join([
|
|
75
|
+
def to_newick(tree: Tree) -> str:
|
|
76
|
+
children_newick = ",".join([to_newick(child) for child in tree.children])
|
|
76
77
|
newick = tree.name
|
|
77
78
|
if tree.features:
|
|
78
79
|
reprs = {k: repr(v).replace("'", '"') for k, v in tree.features.items()}
|
|
@@ -99,26 +100,4 @@ def dump_newick(trees: Tree | list[Tree], filepath: str) -> None:
|
|
|
99
100
|
trees = [trees]
|
|
100
101
|
with open(filepath, "w") as file:
|
|
101
102
|
for t in trees:
|
|
102
|
-
file.write(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def load_fasta(
|
|
106
|
-
fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
|
|
107
|
-
) -> MSA:
|
|
108
|
-
sequences: list[Sequence] = []
|
|
109
|
-
with open(fasta_file, "r") as f:
|
|
110
|
-
for line in f:
|
|
111
|
-
if not line.startswith(">"):
|
|
112
|
-
raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
|
|
113
|
-
id = line[1:].strip()
|
|
114
|
-
time = None
|
|
115
|
-
if extract_time_from_id is not None:
|
|
116
|
-
time = extract_time_from_id(id)
|
|
117
|
-
elif "|" in id:
|
|
118
|
-
try:
|
|
119
|
-
time = float(id.split("|")[-1])
|
|
120
|
-
except ValueError:
|
|
121
|
-
pass
|
|
122
|
-
chars = next(f).strip()
|
|
123
|
-
sequences.append(Sequence(id, chars, time))
|
|
124
|
-
return MSA(sequences)
|
|
103
|
+
file.write(to_newick(t) + ";\n")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
|
|
4
|
+
from phylogenie.io.newick import parse_newick
|
|
5
|
+
from phylogenie.tree import Tree
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _parse_translate_block(lines: Iterator[str]) -> dict[str, str]:
|
|
9
|
+
translations: dict[str, str] = {}
|
|
10
|
+
for line in lines:
|
|
11
|
+
match = re.match(r"\s*(\d+)\s+['\"]?([^'\",;]+)['\"]?", line)
|
|
12
|
+
if match is None:
|
|
13
|
+
if ";" in line:
|
|
14
|
+
return translations
|
|
15
|
+
else:
|
|
16
|
+
raise ValueError(f"Invalid translate line: {line.strip()}")
|
|
17
|
+
translations[match.group(1)] = match.group(2)
|
|
18
|
+
raise ValueError("Translate block not terminated with ';'")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _parse_trees_block(lines: Iterator[str]) -> dict[str, Tree]:
|
|
22
|
+
trees: dict[str, Tree] = {}
|
|
23
|
+
translations = {}
|
|
24
|
+
for line in lines:
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if line.upper() == "TRANSLATE":
|
|
27
|
+
translations = _parse_translate_block(lines)
|
|
28
|
+
elif line.upper() == "END;":
|
|
29
|
+
return trees
|
|
30
|
+
else:
|
|
31
|
+
match = re.match(r"^TREE\s*\*?\s+(\S+)\s*=\s*(.+)$", line, re.IGNORECASE)
|
|
32
|
+
if match is None:
|
|
33
|
+
raise ValueError(f"Invalid tree line. Expected 'TREE name = newick'")
|
|
34
|
+
trees[match.group(1)] = parse_newick(match.group(2), translations)
|
|
35
|
+
return trees
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_nexus(nexus_file: str) -> dict[str, Tree]:
|
|
39
|
+
with open(nexus_file, "r") as f:
|
|
40
|
+
for line in f:
|
|
41
|
+
if line.strip().upper() == "BEGIN TREES;":
|
|
42
|
+
return _parse_trees_block(f)
|
|
43
|
+
raise ValueError("No TREES block found in the NEXUS file.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|