PyPI - LineageTree - Versions diffs - 1.6.1__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

LineageTree 1.6.1py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

LineageTree/__init__.py +1 -1
LineageTree/lineageTree.py +274 -273
LineageTree/lineageTreeManager.py +50 -23
LineageTree/loaders.py +284 -42
LineageTree/tree_styles.py +99 -66
LineageTree/utils.py +7 -11
{LineageTree-1.6.1.dist-info → LineageTree-1.8.0.dist-info}/METADATA +11 -10
LineageTree-1.8.0.dist-info/RECORD +11 -0
{LineageTree-1.6.1.dist-info → LineageTree-1.8.0.dist-info}/WHEEL +1 -1
LineageTree-1.6.1.dist-info/RECORD +0 -11
{LineageTree-1.6.1.dist-info → LineageTree-1.8.0.dist-info}/LICENSE +0 -0
{LineageTree-1.6.1.dist-info → LineageTree-1.8.0.dist-info}/top_level.txt +0 -0

LineageTree/lineageTreeManager.py CHANGED Viewed

@@ -3,11 +3,14 @@ import pickle as pkl
 import warnings
 from functools import partial
+import numpy as np
 try:
     from edist import uted
 except ImportError:
     warnings.warn(
-        "No edist installed therefore you will not be able to compute the tree edit distance."
+        "No edist installed therefore you will not be able to compute the tree edit distance.",
+        stacklevel=2,
     )
 from LineageTree import lineageTree
@@ -17,7 +20,6 @@ from .tree_styles import tree_style
 class lineageTreeManager:
     def __init__(self):
         self.lineagetrees = {}
-        # self.classification = {"Wt": {}, "Ptb": {}}
         self.lineageTree_counter = 0
         self.registered = {}
@@ -25,6 +27,15 @@ class lineageTreeManager:
         self.lineageTree_counter += 1
         return self.lineageTree_counter - 1
+    @property
+    def gcd(self):
+        if len(self.lineagetrees) >= 1:
+            all_time_res = [
+                embryo._time_resolution
+                for embryo in self.lineagetrees.values()
+            ]
+            return np.gcd.reduce(all_time_res)
     def add(
         self, other_tree: lineageTree, name: str = "", classification: str = ""
     ):
@@ -38,7 +49,7 @@ class lineageTreeManager:
             name (str, optional): Then name of. Defaults to "".
         """
-        if isinstance(other_tree, lineageTree):
+        if isinstance(other_tree, lineageTree) and other_tree.time_resolution:
             for tree in self.lineagetrees.values():
                 if tree == other_tree:
                     return False
@@ -52,25 +63,14 @@ class lineageTreeManager:
                     name = f"Lineagetree {next(self)}"
                     self.lineagetrees[name] = other_tree
                     self.lineagetrees[name].name = name
-                # try:
-                #     name = other_tree.name
-                #     self.lineagetrees[name] = other_tree
-                # except:
-                #     self.lineagetrees[
-                #         f"Lineagetree {next(self)}"
-                #     ] = other_tree
-        # if classification in ("Wt", "Ptb"):
-        #     self.classification[type] = {name: other_tree}
+        else:
+            raise Exception(
+                "Please add a LineageTree object or add time resolution to the LineageTree added."
+            )
     def __add__(self, other):
         self.add(other)
-    # def classify_existing(self, key, classification: str):
-    #     if classification in ("Wt", "Ptb"):
-    #         self.classification[classification] = {key: self.lineagetrees[key]}
-    #     else:
-    #         return False
     def write(self, fname: str):
         """Saves the manager
@@ -118,9 +118,9 @@ class lineageTreeManager:
         n2: int,
         embryo_2,
         end_time2: int,
-        style="fragmented",
-        node_lengths: tuple = (1, 5, 7),
-        registration=None,
+        style="simple",
+        downsample: int = 2,
+        registration=None,  # will be added as a later feature
     ):
         """Compute the unordered tree edit distance from Zhang 1996 between the trees spawned
         by two nodes `n1` from lineagetree1 and `n2` lineagetree2. The topology of the trees
@@ -139,21 +139,48 @@ class lineageTreeManager:
         """
         tree = tree_style[style].value
+        lcm = (
+            self.lineagetrees[embryo_1]._time_resolution
+            * self.lineagetrees[embryo_2]._time_resolution
+        ) / self.gcd
+        if style == "downsampled":
+            if downsample % (lcm / 10) != 0:
+                raise Exception(
+                    f"Use a valid downsampling rate (multiple of {lcm/10})"
+                )
+            time_res = [
+                downsample / self.lineagetrees[embryo_2].time_resolution,
+                downsample / self.lineagetrees[embryo_1].time_resolution,
+            ]
+        elif style == "full":
+            time_res = [
+                lcm / 10 / self.lineagetrees[embryo_2].time_resolution,
+                lcm / 10 / self.lineagetrees[embryo_1].time_resolution,
+            ]
+        else:
+            time_res = [
+                self.lineagetrees[embryo_1]._time_resolution,
+                self.lineagetrees[embryo_2]._time_resolution,
+            ]
+            time_res = [i / self.gcd for i in time_res]
         tree1 = tree(
             lT=self.lineagetrees[embryo_1],
-            node_length=node_lengths,
+            downsample=downsample,
             end_time=end_time1,
             root=n1,
+            time_scale=time_res[0],
         )
         tree2 = tree(
             lT=self.lineagetrees[embryo_2],
-            node_length=node_lengths,
+            downsample=downsample,
             end_time=end_time2,
             root=n2,
+            time_scale=time_res[1],
         )
         delta = tree1.delta
         _, times1 = tree1.tree
         _, times2 = tree2.tree
         nodes1, adj1, corres1 = tree1.edist
         nodes2, adj2, corres2 = tree2.edist
         if len(nodes1) == len(nodes2) == 0:

LineageTree/loaders.py CHANGED Viewed

@@ -1,11 +1,94 @@
 import csv
+import os
 import pickle as pkl
+import struct
 import xml.etree.ElementTree as ET
-import os
+from warnings import warn
 import numpy as np
 class lineageTreeLoaders:
+    implicit_l_t = {
+        "AB": "P0",
+        "P1": "P0",
+        "EMS": "P1",
+        "P2": "P1",
+        "MS": "EMS",
+        "E": "EMS",
+        "C": "P2",
+        "P3": "P2",
+        "D": "P3",
+        "P4": "P3",
+        "Z2": "P4",
+        "Z3": "P4",
+    }
+    def read_from_csv(
+        self, file_path: str, z_mult: float, link: int = 1, delim: str = ","
+    ):
+        """
+        TODO: write doc
+        """
+        with open(file_path) as f:
+            lines = f.readlines()
+            f.close()
+        self.time_nodes = {}
+        self.time_edges = {}
+        unique_id = 0
+        self.nodes = set()
+        self.edges = set()
+        self.successor = {}
+        self.predecessor = {}
+        self.pos = {}
+        self.time_id = {}
+        self.time = {}
+        self.lin = {}
+        self.C_lin = {}
+        if not link:
+            self.displacement = {}
+        lines_to_int = []
+        corres = {}
+        for line in lines:
+            lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
+        lines_to_int = np.array(lines_to_int)
+        if link == 2:
+            lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
+        else:
+            lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
+        for line in lines_to_int:
+            if link == 1:
+                id_, t, z, y, x, pred, lin_id = line
+            elif link == 2:
+                t, z, y, x, id_, pred, lin_id = line
+            else:
+                id_, t, z, y, x, dz, dy, dx = line
+                pred = None
+                lin_id = None
+            t = int(t)
+            pos = np.array([x, y, z])
+            C = unique_id
+            corres[id_] = C
+            pos[-1] = pos[-1] * z_mult
+            if pred in corres:
+                M = corres[pred]
+                self.predecessor[C] = [M]
+                self.successor.setdefault(M, []).append(C)
+                self.edges.add((M, C))
+                self.time_edges.setdefault(t, set()).add((M, C))
+                self.lin.setdefault(lin_id, []).append(C)
+                self.C_lin[C] = lin_id
+            self.pos[C] = pos
+            self.nodes.add(C)
+            self.time_nodes.setdefault(t, set()).add(C)
+            self.time[C] = t
+            if not link:
+                self.displacement[C] = np.array([dx, dy, dz * z_mult])
+            unique_id += 1
+        self.max_id = unique_id - 1
+        self.t_b = min(self.time_nodes)
+        self.t_e = max(self.time_nodes)
     def read_from_ASTEC(self, file_path: str, eigen: bool = False):
         """
         Read an `xml` or `pkl` file produced by the ASTEC algorithm.
@@ -246,6 +329,147 @@ class lineageTreeLoaders:
                 new_dict[k] = v
         return new_dict
+    def read_from_binary(self, fname: str):
+        """
+        Reads a binary lineageTree file name.
+        Format description: see self.to_binary
+        Args:
+            fname: string, path to the binary file
+            reverse_time: bool, not used
+        """
+        q_size = struct.calcsize("q")
+        H_size = struct.calcsize("H")
+        d_size = struct.calcsize("d")
+        with open(fname, "rb") as f:
+            len_tree = struct.unpack("q", f.read(q_size))[0]
+            len_time = struct.unpack("q", f.read(q_size))[0]
+            len_pos = struct.unpack("q", f.read(q_size))[0]
+            number_sequence = list(
+                struct.unpack("q" * len_tree, f.read(q_size * len_tree))
+            )
+            time_sequence = list(
+                struct.unpack("H" * len_time, f.read(H_size * len_time))
+            )
+            pos_sequence = np.array(
+                struct.unpack("d" * len_pos, f.read(d_size * len_pos))
+            )
+            f.close()
+        successor = {}
+        predecessor = {}
+        time = {}
+        time_nodes = {}
+        time_edges = {}
+        pos = {}
+        is_root = {}
+        nodes = []
+        edges = []
+        waiting_list = []
+        i = 0
+        done = False
+        if max(number_sequence[::2]) == -1:
+            tmp = number_sequence[1::2]
+            if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
+                time = dict(list(zip(tmp, time_sequence)))
+                for c, t in time.items():
+                    time_nodes.setdefault(t, set()).add(c)
+                pos = dict(
+                    list(zip(tmp, np.reshape(pos_sequence, (len_time, 3))))
+                )
+                is_root = {c: True for c in tmp}
+                nodes = tmp
+                done = True
+        while (
+            i < len(number_sequence) and not done
+        ):  # , c in enumerate(number_sequence[:-1]):
+            c = number_sequence[i]
+            if c == -1:
+                if waiting_list != []:
+                    prev_mother = waiting_list.pop()
+                    successor[prev_mother].insert(0, number_sequence[i + 1])
+                    edges.append((prev_mother, number_sequence[i + 1]))
+                    time_edges.setdefault(t, set()).add(
+                        (prev_mother, number_sequence[i + 1])
+                    )
+                    is_root[number_sequence[i + 1]] = False
+                    t = time[prev_mother] + 1
+                else:
+                    t = time_sequence.pop(0)
+                    is_root[number_sequence[i + 1]] = True
+            elif c == -2:
+                successor[waiting_list[-1]] = [number_sequence[i + 1]]
+                edges.append((waiting_list[-1], number_sequence[i + 1]))
+                time_edges.setdefault(t, set()).add(
+                    (waiting_list[-1], number_sequence[i + 1])
+                )
+                is_root[number_sequence[i + 1]] = False
+                pos[waiting_list[-1]] = pos_sequence[:3]
+                pos_sequence = pos_sequence[3:]
+                nodes.append(waiting_list[-1])
+                time[waiting_list[-1]] = t
+                time_nodes.setdefault(t, set()).add(waiting_list[-1])
+                t += 1
+            elif number_sequence[i + 1] >= 0:
+                successor[c] = [number_sequence[i + 1]]
+                edges.append((c, number_sequence[i + 1]))
+                time_edges.setdefault(t, set()).add(
+                    (c, number_sequence[i + 1])
+                )
+                is_root[number_sequence[i + 1]] = False
+                pos[c] = pos_sequence[:3]
+                pos_sequence = pos_sequence[3:]
+                nodes.append(c)
+                time[c] = t
+                time_nodes.setdefault(t, set()).add(c)
+                t += 1
+            elif number_sequence[i + 1] == -2:
+                waiting_list += [c]
+            elif number_sequence[i + 1] == -1:
+                pos[c] = pos_sequence[:3]
+                pos_sequence = pos_sequence[3:]
+                nodes.append(c)
+                time[c] = t
+                time_nodes.setdefault(t, set()).add(c)
+                t += 1
+                i += 1
+                if waiting_list != []:
+                    prev_mother = waiting_list.pop()
+                    successor[prev_mother].insert(0, number_sequence[i + 1])
+                    edges.append((prev_mother, number_sequence[i + 1]))
+                    time_edges.setdefault(t, set()).add(
+                        (prev_mother, number_sequence[i + 1])
+                    )
+                    if i + 1 < len(number_sequence):
+                        is_root[number_sequence[i + 1]] = False
+                    t = time[prev_mother] + 1
+                else:
+                    if len(time_sequence) > 0:
+                        t = time_sequence.pop(0)
+                    if i + 1 < len(number_sequence):
+                        is_root[number_sequence[i + 1]] = True
+            i += 1
+        predecessor = {vi: [k] for k, v in successor.items() for vi in v}
+        self.successor = successor
+        self.predecessor = predecessor
+        self.time = time
+        self.time_nodes = time_nodes
+        self.time_edges = time_edges
+        self.pos = pos
+        self.nodes = set(nodes)
+        self.t_b = min(time_nodes)
+        self.t_e = max(time_nodes)
+        self.is_root = is_root
+        self.max_id = max(self.nodes)
     def read_from_txt_for_celegans(self, file: str):
         """
         Read a C. elegans lineage tree
@@ -253,20 +477,6 @@ class lineageTreeLoaders:
         Args:
             file (str): Path to the file to read
         """
-        implicit_l_t = {
-            "AB": "P0",
-            "P1": "P0",
-            "EMS": "P1",
-            "P2": "P1",
-            "MS": "EMS",
-            "E": "EMS",
-            "C": "P2",
-            "P3": "P2",
-            "D": "P3",
-            "P4": "P3",
-            "Z2": "P4",
-            "Z3": "P4",
-        }
         with open(file) as f:
             raw = f.readlines()[1:]
             f.close()
@@ -295,12 +505,9 @@ class lineageTreeLoaders:
                         p = name_to_id[self.name[c]]
                     elif self.name[c][:-1] in name_to_id:
                         p = name_to_id[self.name[c][:-1]]
-                    elif implicit_l_t.get(self.name[c]) in name_to_id:
-                        p = name_to_id[implicit_l_t.get(self.name[c])]
+                    elif self.implicit_l_t.get(self.name[c]) in name_to_id:
+                        p = name_to_id[self.implicit_l_t.get(self.name[c])]
                     else:
-                        print(
-                            "error, cell %s has no predecessors" % self.name[c]
-                        )
                         p = None
                     self.predecessor.setdefault(c, []).append(p)
                     self.successor.setdefault(p, []).append(c)
@@ -321,21 +528,6 @@ class lineageTreeLoaders:
             file (str): Path to the file to read
         """
-        implicit_l_t = {
-            "AB": "P0",
-            "P1": "P0",
-            "EMS": "P1",
-            "P2": "P1",
-            "MS": "EMS",
-            "E": "EMS",
-            "C": "P2",
-            "P3": "P2",
-            "D": "P3",
-            "P4": "P3",
-            "Z2": "P4",
-            "Z3": "P4",
-        }
         def split_line(line):
             return (
                 line.split()[0],
@@ -382,11 +574,12 @@ class lineageTreeLoaders:
                         p = name_to_id[self.name[c]]
                     elif self.name[c][:-1] in name_to_id:
                         p = name_to_id[self.name[c][:-1]]
-                    elif implicit_l_t.get(self.name[c]) in name_to_id:
-                        p = name_to_id[implicit_l_t.get(self.name[c])]
+                    elif self.implicit_l_t.get(self.name[c]) in name_to_id:
+                        p = name_to_id[self.implicit_l_t.get(self.name[c])]
                     else:
-                        print(
-                            "error, cell %s has no predecessors" % self.name[c]
+                        warn(
+                            f"error, cell {self.name[c]} has no predecessors",
+                            stacklevel=2,
                         )
                         p = None
                     self.predecessor.setdefault(c, []).append(p)
@@ -428,9 +621,6 @@ class lineageTreeLoaders:
         self.intensity = {}
         self.W = {}
         for t in range(tb, te + 1):
-            print(t, end=" ")
-            if t % 10 == 0:
-                print()
             tree = ET.parse(file_format.format(t=t))
             root = tree.getroot()
             self.time_nodes[t] = set()
@@ -652,3 +842,55 @@ class lineageTreeLoaders:
                     tracks[t_id].append((s, t))
         self.t_b = min(self.time_nodes.keys())
         self.t_e = max(self.time_nodes.keys())
+    def read_C_elegans_bao(self, path):
+        cell_times = {}
+        self.expression = {}
+        with open(path) as f:
+            for line in f:
+                if "cell_name" not in line:
+                    cell_times[line.split("\t")[0]] = list(
+                        line.split("\t")[-1].split(",")
+                    )
+        new_dict = {}
+        end_dict = {}
+        self.t_e = 0
+        self.t_b = 0
+        for c, lc in cell_times.items():
+            new_dict[c] = self.add_node(0)
+            tmp = self.add_branch(
+                new_dict[c],
+                length=len(lc) - 1,
+                reverse=True,
+                move_timepoints=True,
+            )
+            for i, node in enumerate(self.get_cycle(tmp)):
+                self.expression[node] = int(lc[i])
+            self._labels[self.get_cycle(tmp)[0]] = c
+            self._labels.pop(tmp)
+            end_dict[c] = self.get_cycle(new_dict[c])[-1]
+        cell_names = list(cell_times.keys())
+        c_to_p = {}
+        while cell_names:
+            cur = cell_names.pop()
+            if cur[:-1] in cell_names:
+                c_to_p[cur] = cur[:-1]
+        c_to_p.update(self.implicit_l_t)
+        for c, p in c_to_p.items():
+            if p in cell_times:
+                cyc = end_dict[p]
+                self.predecessor[new_dict[c]] = [cyc]
+                if cyc not in self.successor:
+                    self.successor[cyc] = []
+                self.successor[cyc].append(new_dict[c])
+        self.time_nodes.clear()
+        for root in self.roots:
+            to_do = [root]
+            while to_do:
+                cur = to_do.pop()
+                self.time_nodes.setdefault(self.time[cur], set()).add(cur)
+                _next = self.successor.get(cur, [])
+                to_do += _next
+                for n in _next:
+                    self.time[n] = self.time[cur] + 1
+        self.t_e = max(self.time.values())

LineageTree 1.6.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

LineageTree 1.6.1py3-none-any.whl → 1.8.0py3-none-any.whl