phylogenie 2.1.23__py3-none-any.whl → 2.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phylogenie/__init__.py CHANGED
@@ -11,7 +11,7 @@ from phylogenie.generators import (
11
11
  FBDTreeDatasetGenerator,
12
12
  TreeDatasetGeneratorConfig,
13
13
  )
14
- from phylogenie.io import dump_newick, load_fasta, load_newick
14
+ from phylogenie.io import dump_newick, load_fasta, load_newick, load_nexus
15
15
  from phylogenie.msa import MSA
16
16
  from phylogenie.skyline import (
17
17
  SkylineMatrix,
@@ -100,6 +100,7 @@ __all__ = [
100
100
  "generate_trees",
101
101
  "simulate_tree",
102
102
  "dump_newick",
103
+ "load_nexus",
103
104
  "load_fasta",
104
105
  "load_newick",
105
106
  "MSA",
phylogenie/draw.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from enum import Enum
2
+ from itertools import islice
2
3
  from typing import Any
3
4
 
4
5
  import matplotlib.colors as mcolors
@@ -25,7 +26,7 @@ def _draw_colored_tree(tree: Tree, ax: Axes, colors: Color | dict[Tree, Color])
25
26
 
26
27
  xs = (
27
28
  get_node_depth_levels(tree)
28
- if any(node.branch_length is None for node in tree)
29
+ if any(node.branch_length is None for node in islice(tree, 1, None))
29
30
  else get_node_depths(tree)
30
31
  )
31
32
  ys: dict[Tree, float] = {node: i for i, node in enumerate(tree.get_leaves())}
@@ -56,7 +56,7 @@ class DatasetGenerator(ABC, StrictBaseModel):
56
56
  for i in range(n_samples)
57
57
  )
58
58
  df = pd.DataFrame(
59
- [r for r in tqdm(jobs, total=n_samples, desc=f"Generating {data_dir}...")]
59
+ [j for j in tqdm(jobs, f"Generating {data_dir}...", n_samples)]
60
60
  )
61
61
  df.to_csv(os.path.join(output_dir, METADATA_FILENAME), index=False)
62
62
 
@@ -0,0 +1,5 @@
1
+ from phylogenie.io.fasta import load_fasta
2
+ from phylogenie.io.newick import dump_newick, load_newick
3
+ from phylogenie.io.nexus import load_nexus
4
+
5
+ __all__ = ["load_fasta", "load_newick", "dump_newick", "load_nexus"]
phylogenie/io/fasta.py ADDED
@@ -0,0 +1,25 @@
1
+ from typing import Callable
2
+
3
+ from phylogenie.msa import MSA, Sequence
4
+
5
+
6
+ def load_fasta(
7
+ fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
8
+ ) -> MSA:
9
+ sequences: list[Sequence] = []
10
+ with open(fasta_file, "r") as f:
11
+ for line in f:
12
+ if not line.startswith(">"):
13
+ raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
14
+ id = line[1:].strip()
15
+ time = None
16
+ if extract_time_from_id is not None:
17
+ time = extract_time_from_id(id)
18
+ elif "|" in id:
19
+ try:
20
+ time = float(id.split("|")[-1])
21
+ except ValueError:
22
+ pass
23
+ chars = next(f).strip()
24
+ sequences.append(Sequence(id, chars, time))
25
+ return MSA(sequences)
@@ -1,17 +1,17 @@
1
1
  import re
2
- from typing import Callable
3
2
 
4
- from phylogenie.msa import MSA, Sequence
5
3
  from phylogenie.tree import Tree
6
4
 
7
5
 
8
- def _parse_newick(newick: str) -> Tree:
6
+ def parse_newick(newick: str, translations: dict[str, str] | None = None) -> Tree:
9
7
  newick = newick.strip()
8
+ newick = re.sub(r"^\[\&[^\]]*\]", "", newick).strip()
9
+
10
10
  stack: list[list[Tree]] = []
11
11
  current_children: list[Tree] = []
12
12
  current_nodes: list[Tree] = []
13
13
  i = 0
14
- while i < len(newick):
14
+ while True:
15
15
 
16
16
  def _read_chars(stoppers: list[str]) -> str:
17
17
  nonlocal i
@@ -29,7 +29,10 @@ def _parse_newick(newick: str) -> Tree:
29
29
  i += 1
30
30
  continue
31
31
 
32
- current_node = Tree(_read_chars(["[", ":", ",", ")", ";"]))
32
+ name = _read_chars([":", "[", ",", ")", ";"])
33
+ if translations is not None and name in translations:
34
+ name = translations[name]
35
+ current_node = Tree(name)
33
36
 
34
37
  if newick[i] == "[":
35
38
  i += 1
@@ -62,17 +65,15 @@ def _parse_newick(newick: str) -> Tree:
62
65
 
63
66
  i += 1
64
67
 
65
- raise ValueError("Newick string should end with ';'")
66
-
67
68
 
68
69
  def load_newick(filepath: str) -> Tree | list[Tree]:
69
70
  with open(filepath, "r") as file:
70
- trees = [_parse_newick(newick) for newick in file]
71
+ trees = [parse_newick(newick) for newick in file]
71
72
  return trees[0] if len(trees) == 1 else trees
72
73
 
73
74
 
74
- def _to_newick(tree: Tree) -> str:
75
- children_newick = ",".join([_to_newick(child) for child in tree.children])
75
+ def to_newick(tree: Tree) -> str:
76
+ children_newick = ",".join([to_newick(child) for child in tree.children])
76
77
  newick = tree.name
77
78
  if tree.features:
78
79
  reprs = {k: repr(v).replace("'", '"') for k, v in tree.features.items()}
@@ -99,26 +100,4 @@ def dump_newick(trees: Tree | list[Tree], filepath: str) -> None:
99
100
  trees = [trees]
100
101
  with open(filepath, "w") as file:
101
102
  for t in trees:
102
- file.write(_to_newick(t) + ";\n")
103
-
104
-
105
- def load_fasta(
106
- fasta_file: str, extract_time_from_id: Callable[[str], float] | None = None
107
- ) -> MSA:
108
- sequences: list[Sequence] = []
109
- with open(fasta_file, "r") as f:
110
- for line in f:
111
- if not line.startswith(">"):
112
- raise ValueError(f"Invalid FASTA format: expected '>', got '{line[0]}'")
113
- id = line[1:].strip()
114
- time = None
115
- if extract_time_from_id is not None:
116
- time = extract_time_from_id(id)
117
- elif "|" in id:
118
- try:
119
- time = float(id.split("|")[-1])
120
- except ValueError:
121
- pass
122
- chars = next(f).strip()
123
- sequences.append(Sequence(id, chars, time))
124
- return MSA(sequences)
103
+ file.write(to_newick(t) + ";\n")
phylogenie/io/nexus.py ADDED
@@ -0,0 +1,43 @@
1
+ import re
2
+ from collections.abc import Iterator
3
+
4
+ from phylogenie.io.newick import parse_newick
5
+ from phylogenie.tree import Tree
6
+
7
+
8
+ def _parse_translate_block(lines: Iterator[str]) -> dict[str, str]:
9
+ translations: dict[str, str] = {}
10
+ for line in lines:
11
+ match = re.match(r"\s*(\d+)\s+['\"]?([^'\",;]+)['\"]?", line)
12
+ if match is None:
13
+ if ";" in line:
14
+ return translations
15
+ else:
16
+ raise ValueError(f"Invalid translate line: {line.strip()}")
17
+ translations[match.group(1)] = match.group(2)
18
+ raise ValueError("Translate block not terminated with ';'")
19
+
20
+
21
+ def _parse_trees_block(lines: Iterator[str]) -> dict[str, Tree]:
22
+ trees: dict[str, Tree] = {}
23
+ translations = {}
24
+ for line in lines:
25
+ line = line.strip()
26
+ if line.upper() == "TRANSLATE":
27
+ translations = _parse_translate_block(lines)
28
+ elif line.upper() == "END;":
29
+ return trees
30
+ else:
31
+ match = re.match(r"^TREE\s*\*?\s+(\S+)\s*=\s*(.+)$", line, re.IGNORECASE)
32
+ if match is None:
33
+ raise ValueError(f"Invalid tree line. Expected 'TREE name = newick'")
34
+ trees[match.group(1)] = parse_newick(match.group(2), translations)
35
+ return trees
36
+
37
+
38
+ def load_nexus(nexus_file: str) -> dict[str, Tree]:
39
+ with open(nexus_file, "r") as f:
40
+ for line in f:
41
+ if line.strip().upper() == "BEGIN TREES;":
42
+ return _parse_trees_block(f)
43
+ raise ValueError("No TREES block found in the NEXUS file.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phylogenie
3
- Version: 2.1.23
3
+ Version: 2.1.24
4
4
  Summary: Generate phylogenetic datasets with minimal setup effort
5
5
  Author: Gabriele Marino
6
6
  Author-email: gabmarino.8601@gmail.com
@@ -1,13 +1,16 @@
1
- phylogenie/__init__.py,sha256=8ulA-U7-WnBLNsqRYqPOPrJG8X4ZzyEU02oaDbR_Hxs,2849
2
- phylogenie/draw.py,sha256=WCjr_HCn-wCuxwkDhHA9Ijl1Sm6av6FBLkyR0MK2bVM,5271
1
+ phylogenie/__init__.py,sha256=KV83hJ153WAwCIWEPWXtSsqdI_ncojxiXzt1X_lC7SU,2879
2
+ phylogenie/draw.py,sha256=37eQDK8TKmL8bxSzXvi7jNvrR7ulj7MByR5MA1FtyyE,5317
3
3
  phylogenie/generators/__init__.py,sha256=zsOxy28-9j9alOQLIgrOAFfmM58NNHO_NEtW-KXQXAY,888
4
4
  phylogenie/generators/alisim.py,sha256=0aCLuGInifWgAvfh7zARWSKF4EMw3TjlPXMLSECui0k,2783
5
5
  phylogenie/generators/configs.py,sha256=WFoeKpgj9ZQIom7BKqwpgXbriiQGg3jFBMLoD8KButk,1073
6
- phylogenie/generators/dataset.py,sha256=pPwW9yxm9fkU0PPllFq8EsPlqau8tth-4OatbA_hEHo,2120
6
+ phylogenie/generators/dataset.py,sha256=kY92diePr2IjiLejHLixJoYRc-2LpM-GBt3wkX9SYvA,2109
7
7
  phylogenie/generators/factories.py,sha256=TuVFQWRjq33Hewjw_Lp8tQ0l_IPtqYDyQCNJhtiHpw8,7882
8
8
  phylogenie/generators/trees.py,sha256=6tHS013RNRyIsObeYq2Kx9it7Yc0TgiMkeAHbskHzAM,10748
9
9
  phylogenie/generators/typeguards.py,sha256=yj4VkhOaUXJ2OrY-6zhOeY9C4yKIQxjZtk2d-vIxttQ,828
10
- phylogenie/io.py,sha256=vUG2yVtoV98tNHut46uSuB3VPj6s64VDhUp1EaSk0o0,4084
10
+ phylogenie/io/__init__.py,sha256=gtRYtDdZSTlWCj3I4vmMJSAs93jdz5RySkCakD3sxlQ,214
11
+ phylogenie/io/fasta.py,sha256=IWtNb_RQLR6kvS0G826wB9SodkCGfugddoUHx78Yrec,837
12
+ phylogenie/io/newick.py,sha256=vw0fafh1LL0SXQifIIa1TQ7g5KgTCIAX6vzq-bUOrKE,3396
13
+ phylogenie/io/nexus.py,sha256=IKbV8lJ_Q053iYJ7JzVQPCUqSkSfmiRpUchFTrLHZuE,1551
11
14
  phylogenie/main.py,sha256=vtvSpQxBNlYABoFQ25czl-l3fIr4QRo3svWVd-jcArw,1170
12
15
  phylogenie/models.py,sha256=pCg9ob0RpLUHwM49x4knKxL4FNPr3-EU_6zMXsvxtAg,370
13
16
  phylogenie/msa.py,sha256=JDGyZUsAq6-m-SQjoCDjAkAZIxfgyl_PDIhdYn5HOow,2064
@@ -28,8 +31,8 @@ phylogenie/treesimulator/model.py,sha256=Nyg6R8XmMwZMSw1-dII81sU9uU7tDe-NMs8v1qK
28
31
  phylogenie/typeguards.py,sha256=JtqmbEWJZBRHbWgCvcl6nrWm3VcBfzRbklbTBYHItn0,1325
29
32
  phylogenie/typings.py,sha256=GknvAFXyiaWeeYJ8Lk5d6E2VHT-xW6ONEojYbtJYiB8,476
30
33
  phylogenie/utils.py,sha256=ehVk_2kvjW8Q_EyM2kxBPHYiK-KlPmZQx7JeVN6Fh-E,5419
31
- phylogenie-2.1.23.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
32
- phylogenie-2.1.23.dist-info/METADATA,sha256=ym-NHK9hkvBR4kH_KGCnLnckV_iEzvFk0fdMzrkVudQ,5477
33
- phylogenie-2.1.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
34
- phylogenie-2.1.23.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
35
- phylogenie-2.1.23.dist-info/RECORD,,
34
+ phylogenie-2.1.24.dist-info/LICENSE.txt,sha256=NUrDqElK-eD3I0WqC004CJsy6cs0JgsAoebDv_42-pw,1071
35
+ phylogenie-2.1.24.dist-info/METADATA,sha256=18OWTAKMES6VnsF454GNsr7NRDZ2AQNPprpkcxDfbZk,5477
36
+ phylogenie-2.1.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
37
+ phylogenie-2.1.24.dist-info/entry_points.txt,sha256=Rt6_usN0FkBX1ZfiqCirjMN9FKOgFLG8rydcQ8kugeE,51
38
+ phylogenie-2.1.24.dist-info/RECORD,,