LineageTree 1.6.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,14 @@ import pickle as pkl
3
3
  import warnings
4
4
  from functools import partial
5
5
 
6
+ import numpy as np
7
+
6
8
  try:
7
9
  from edist import uted
8
10
  except ImportError:
9
11
  warnings.warn(
10
- "No edist installed therefore you will not be able to compute the tree edit distance."
12
+ "No edist installed therefore you will not be able to compute the tree edit distance.",
13
+ stacklevel=2,
11
14
  )
12
15
  from LineageTree import lineageTree
13
16
 
@@ -17,7 +20,6 @@ from .tree_styles import tree_style
17
20
  class lineageTreeManager:
18
21
  def __init__(self):
19
22
  self.lineagetrees = {}
20
- # self.classification = {"Wt": {}, "Ptb": {}}
21
23
  self.lineageTree_counter = 0
22
24
  self.registered = {}
23
25
 
@@ -25,6 +27,15 @@ class lineageTreeManager:
25
27
  self.lineageTree_counter += 1
26
28
  return self.lineageTree_counter - 1
27
29
 
30
+ @property
31
+ def gcd(self):
32
+ if len(self.lineagetrees) >= 1:
33
+ all_time_res = [
34
+ embryo._time_resolution
35
+ for embryo in self.lineagetrees.values()
36
+ ]
37
+ return np.gcd.reduce(all_time_res)
38
+
28
39
  def add(
29
40
  self, other_tree: lineageTree, name: str = "", classification: str = ""
30
41
  ):
@@ -38,7 +49,7 @@ class lineageTreeManager:
38
49
  name (str, optional): Then name of. Defaults to "".
39
50
 
40
51
  """
41
- if isinstance(other_tree, lineageTree):
52
+ if isinstance(other_tree, lineageTree) and other_tree.time_resolution:
42
53
  for tree in self.lineagetrees.values():
43
54
  if tree == other_tree:
44
55
  return False
@@ -52,25 +63,14 @@ class lineageTreeManager:
52
63
  name = f"Lineagetree {next(self)}"
53
64
  self.lineagetrees[name] = other_tree
54
65
  self.lineagetrees[name].name = name
55
- # try:
56
- # name = other_tree.name
57
- # self.lineagetrees[name] = other_tree
58
- # except:
59
- # self.lineagetrees[
60
- # f"Lineagetree {next(self)}"
61
- # ] = other_tree
62
- # if classification in ("Wt", "Ptb"):
63
- # self.classification[type] = {name: other_tree}
66
+ else:
67
+ raise Exception(
68
+ "Please add a LineageTree object or add time resolution to the LineageTree added."
69
+ )
64
70
 
65
71
  def __add__(self, other):
66
72
  self.add(other)
67
73
 
68
- # def classify_existing(self, key, classification: str):
69
- # if classification in ("Wt", "Ptb"):
70
- # self.classification[classification] = {key: self.lineagetrees[key]}
71
- # else:
72
- # return False
73
-
74
74
  def write(self, fname: str):
75
75
  """Saves the manager
76
76
 
@@ -118,9 +118,9 @@ class lineageTreeManager:
118
118
  n2: int,
119
119
  embryo_2,
120
120
  end_time2: int,
121
- style="fragmented",
122
- node_lengths: tuple = (1, 5, 7),
123
- registration=None,
121
+ style="simple",
122
+ downsample: int = 2,
123
+ registration=None, # will be added as a later feature
124
124
  ):
125
125
  """Compute the unordered tree edit distance from Zhang 1996 between the trees spawned
126
126
  by two nodes `n1` from lineagetree1 and `n2` lineagetree2. The topology of the trees
@@ -139,21 +139,48 @@ class lineageTreeManager:
139
139
  """
140
140
 
141
141
  tree = tree_style[style].value
142
+ lcm = (
143
+ self.lineagetrees[embryo_1]._time_resolution
144
+ * self.lineagetrees[embryo_2]._time_resolution
145
+ ) / self.gcd
146
+ if style == "downsampled":
147
+ if downsample % (lcm / 10) != 0:
148
+ raise Exception(
149
+ f"Use a valid downsampling rate (multiple of {lcm/10})"
150
+ )
151
+ time_res = [
152
+ downsample / self.lineagetrees[embryo_2].time_resolution,
153
+ downsample / self.lineagetrees[embryo_1].time_resolution,
154
+ ]
155
+ elif style == "full":
156
+ time_res = [
157
+ lcm / 10 / self.lineagetrees[embryo_2].time_resolution,
158
+ lcm / 10 / self.lineagetrees[embryo_1].time_resolution,
159
+ ]
160
+ else:
161
+ time_res = [
162
+ self.lineagetrees[embryo_1]._time_resolution,
163
+ self.lineagetrees[embryo_2]._time_resolution,
164
+ ]
165
+ time_res = [i / self.gcd for i in time_res]
142
166
  tree1 = tree(
143
167
  lT=self.lineagetrees[embryo_1],
144
- node_length=node_lengths,
168
+ downsample=downsample,
145
169
  end_time=end_time1,
146
170
  root=n1,
171
+ time_scale=time_res[0],
147
172
  )
148
173
  tree2 = tree(
149
174
  lT=self.lineagetrees[embryo_2],
150
- node_length=node_lengths,
175
+ downsample=downsample,
151
176
  end_time=end_time2,
152
177
  root=n2,
178
+ time_scale=time_res[1],
153
179
  )
154
180
  delta = tree1.delta
155
181
  _, times1 = tree1.tree
156
182
  _, times2 = tree2.tree
183
+
157
184
  nodes1, adj1, corres1 = tree1.edist
158
185
  nodes2, adj2, corres2 = tree2.edist
159
186
  if len(nodes1) == len(nodes2) == 0:
LineageTree/loaders.py CHANGED
@@ -1,11 +1,94 @@
1
1
  import csv
2
+ import os
2
3
  import pickle as pkl
4
+ import struct
3
5
  import xml.etree.ElementTree as ET
4
- import os
6
+ from warnings import warn
7
+
5
8
  import numpy as np
6
9
 
7
10
 
8
11
  class lineageTreeLoaders:
12
+ implicit_l_t = {
13
+ "AB": "P0",
14
+ "P1": "P0",
15
+ "EMS": "P1",
16
+ "P2": "P1",
17
+ "MS": "EMS",
18
+ "E": "EMS",
19
+ "C": "P2",
20
+ "P3": "P2",
21
+ "D": "P3",
22
+ "P4": "P3",
23
+ "Z2": "P4",
24
+ "Z3": "P4",
25
+ }
26
+
27
+ def read_from_csv(
28
+ self, file_path: str, z_mult: float, link: int = 1, delim: str = ","
29
+ ):
30
+ """
31
+ TODO: write doc
32
+ """
33
+ with open(file_path) as f:
34
+ lines = f.readlines()
35
+ f.close()
36
+ self.time_nodes = {}
37
+ self.time_edges = {}
38
+ unique_id = 0
39
+ self.nodes = set()
40
+ self.edges = set()
41
+ self.successor = {}
42
+ self.predecessor = {}
43
+ self.pos = {}
44
+ self.time_id = {}
45
+ self.time = {}
46
+ self.lin = {}
47
+ self.C_lin = {}
48
+ if not link:
49
+ self.displacement = {}
50
+ lines_to_int = []
51
+ corres = {}
52
+ for line in lines:
53
+ lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
54
+ lines_to_int = np.array(lines_to_int)
55
+ if link == 2:
56
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
57
+ else:
58
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
59
+ for line in lines_to_int:
60
+ if link == 1:
61
+ id_, t, z, y, x, pred, lin_id = line
62
+ elif link == 2:
63
+ t, z, y, x, id_, pred, lin_id = line
64
+ else:
65
+ id_, t, z, y, x, dz, dy, dx = line
66
+ pred = None
67
+ lin_id = None
68
+ t = int(t)
69
+ pos = np.array([x, y, z])
70
+ C = unique_id
71
+ corres[id_] = C
72
+ pos[-1] = pos[-1] * z_mult
73
+ if pred in corres:
74
+ M = corres[pred]
75
+ self.predecessor[C] = [M]
76
+ self.successor.setdefault(M, []).append(C)
77
+ self.edges.add((M, C))
78
+ self.time_edges.setdefault(t, set()).add((M, C))
79
+ self.lin.setdefault(lin_id, []).append(C)
80
+ self.C_lin[C] = lin_id
81
+ self.pos[C] = pos
82
+ self.nodes.add(C)
83
+ self.time_nodes.setdefault(t, set()).add(C)
84
+ self.time[C] = t
85
+ if not link:
86
+ self.displacement[C] = np.array([dx, dy, dz * z_mult])
87
+ unique_id += 1
88
+ self.max_id = unique_id - 1
89
+ self.t_b = min(self.time_nodes)
90
+ self.t_e = max(self.time_nodes)
91
+
9
92
  def read_from_ASTEC(self, file_path: str, eigen: bool = False):
10
93
  """
11
94
  Read an `xml` or `pkl` file produced by the ASTEC algorithm.
@@ -246,6 +329,147 @@ class lineageTreeLoaders:
246
329
  new_dict[k] = v
247
330
  return new_dict
248
331
 
332
+ def read_from_binary(self, fname: str):
333
+ """
334
+ Reads a binary lineageTree file name.
335
+ Format description: see self.to_binary
336
+
337
+ Args:
338
+ fname: string, path to the binary file
339
+ reverse_time: bool, not used
340
+ """
341
+ q_size = struct.calcsize("q")
342
+ H_size = struct.calcsize("H")
343
+ d_size = struct.calcsize("d")
344
+
345
+ with open(fname, "rb") as f:
346
+ len_tree = struct.unpack("q", f.read(q_size))[0]
347
+ len_time = struct.unpack("q", f.read(q_size))[0]
348
+ len_pos = struct.unpack("q", f.read(q_size))[0]
349
+ number_sequence = list(
350
+ struct.unpack("q" * len_tree, f.read(q_size * len_tree))
351
+ )
352
+ time_sequence = list(
353
+ struct.unpack("H" * len_time, f.read(H_size * len_time))
354
+ )
355
+ pos_sequence = np.array(
356
+ struct.unpack("d" * len_pos, f.read(d_size * len_pos))
357
+ )
358
+
359
+ f.close()
360
+
361
+ successor = {}
362
+ predecessor = {}
363
+ time = {}
364
+ time_nodes = {}
365
+ time_edges = {}
366
+ pos = {}
367
+ is_root = {}
368
+ nodes = []
369
+ edges = []
370
+ waiting_list = []
371
+ i = 0
372
+ done = False
373
+ if max(number_sequence[::2]) == -1:
374
+ tmp = number_sequence[1::2]
375
+ if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
376
+ time = dict(list(zip(tmp, time_sequence)))
377
+ for c, t in time.items():
378
+ time_nodes.setdefault(t, set()).add(c)
379
+ pos = dict(
380
+ list(zip(tmp, np.reshape(pos_sequence, (len_time, 3))))
381
+ )
382
+ is_root = {c: True for c in tmp}
383
+ nodes = tmp
384
+ done = True
385
+ while (
386
+ i < len(number_sequence) and not done
387
+ ): # , c in enumerate(number_sequence[:-1]):
388
+ c = number_sequence[i]
389
+ if c == -1:
390
+ if waiting_list != []:
391
+ prev_mother = waiting_list.pop()
392
+ successor[prev_mother].insert(0, number_sequence[i + 1])
393
+ edges.append((prev_mother, number_sequence[i + 1]))
394
+ time_edges.setdefault(t, set()).add(
395
+ (prev_mother, number_sequence[i + 1])
396
+ )
397
+ is_root[number_sequence[i + 1]] = False
398
+ t = time[prev_mother] + 1
399
+ else:
400
+ t = time_sequence.pop(0)
401
+ is_root[number_sequence[i + 1]] = True
402
+
403
+ elif c == -2:
404
+ successor[waiting_list[-1]] = [number_sequence[i + 1]]
405
+ edges.append((waiting_list[-1], number_sequence[i + 1]))
406
+ time_edges.setdefault(t, set()).add(
407
+ (waiting_list[-1], number_sequence[i + 1])
408
+ )
409
+ is_root[number_sequence[i + 1]] = False
410
+ pos[waiting_list[-1]] = pos_sequence[:3]
411
+ pos_sequence = pos_sequence[3:]
412
+ nodes.append(waiting_list[-1])
413
+ time[waiting_list[-1]] = t
414
+ time_nodes.setdefault(t, set()).add(waiting_list[-1])
415
+ t += 1
416
+
417
+ elif number_sequence[i + 1] >= 0:
418
+ successor[c] = [number_sequence[i + 1]]
419
+ edges.append((c, number_sequence[i + 1]))
420
+ time_edges.setdefault(t, set()).add(
421
+ (c, number_sequence[i + 1])
422
+ )
423
+ is_root[number_sequence[i + 1]] = False
424
+ pos[c] = pos_sequence[:3]
425
+ pos_sequence = pos_sequence[3:]
426
+ nodes.append(c)
427
+ time[c] = t
428
+ time_nodes.setdefault(t, set()).add(c)
429
+ t += 1
430
+
431
+ elif number_sequence[i + 1] == -2:
432
+ waiting_list += [c]
433
+
434
+ elif number_sequence[i + 1] == -1:
435
+ pos[c] = pos_sequence[:3]
436
+ pos_sequence = pos_sequence[3:]
437
+ nodes.append(c)
438
+ time[c] = t
439
+ time_nodes.setdefault(t, set()).add(c)
440
+ t += 1
441
+ i += 1
442
+ if waiting_list != []:
443
+ prev_mother = waiting_list.pop()
444
+ successor[prev_mother].insert(0, number_sequence[i + 1])
445
+ edges.append((prev_mother, number_sequence[i + 1]))
446
+ time_edges.setdefault(t, set()).add(
447
+ (prev_mother, number_sequence[i + 1])
448
+ )
449
+ if i + 1 < len(number_sequence):
450
+ is_root[number_sequence[i + 1]] = False
451
+ t = time[prev_mother] + 1
452
+ else:
453
+ if len(time_sequence) > 0:
454
+ t = time_sequence.pop(0)
455
+ if i + 1 < len(number_sequence):
456
+ is_root[number_sequence[i + 1]] = True
457
+ i += 1
458
+
459
+ predecessor = {vi: [k] for k, v in successor.items() for vi in v}
460
+
461
+ self.successor = successor
462
+ self.predecessor = predecessor
463
+ self.time = time
464
+ self.time_nodes = time_nodes
465
+ self.time_edges = time_edges
466
+ self.pos = pos
467
+ self.nodes = set(nodes)
468
+ self.t_b = min(time_nodes)
469
+ self.t_e = max(time_nodes)
470
+ self.is_root = is_root
471
+ self.max_id = max(self.nodes)
472
+
249
473
  def read_from_txt_for_celegans(self, file: str):
250
474
  """
251
475
  Read a C. elegans lineage tree
@@ -253,20 +477,6 @@ class lineageTreeLoaders:
253
477
  Args:
254
478
  file (str): Path to the file to read
255
479
  """
256
- implicit_l_t = {
257
- "AB": "P0",
258
- "P1": "P0",
259
- "EMS": "P1",
260
- "P2": "P1",
261
- "MS": "EMS",
262
- "E": "EMS",
263
- "C": "P2",
264
- "P3": "P2",
265
- "D": "P3",
266
- "P4": "P3",
267
- "Z2": "P4",
268
- "Z3": "P4",
269
- }
270
480
  with open(file) as f:
271
481
  raw = f.readlines()[1:]
272
482
  f.close()
@@ -295,12 +505,9 @@ class lineageTreeLoaders:
295
505
  p = name_to_id[self.name[c]]
296
506
  elif self.name[c][:-1] in name_to_id:
297
507
  p = name_to_id[self.name[c][:-1]]
298
- elif implicit_l_t.get(self.name[c]) in name_to_id:
299
- p = name_to_id[implicit_l_t.get(self.name[c])]
508
+ elif self.implicit_l_t.get(self.name[c]) in name_to_id:
509
+ p = name_to_id[self.implicit_l_t.get(self.name[c])]
300
510
  else:
301
- print(
302
- "error, cell %s has no predecessors" % self.name[c]
303
- )
304
511
  p = None
305
512
  self.predecessor.setdefault(c, []).append(p)
306
513
  self.successor.setdefault(p, []).append(c)
@@ -321,21 +528,6 @@ class lineageTreeLoaders:
321
528
  file (str): Path to the file to read
322
529
  """
323
530
 
324
- implicit_l_t = {
325
- "AB": "P0",
326
- "P1": "P0",
327
- "EMS": "P1",
328
- "P2": "P1",
329
- "MS": "EMS",
330
- "E": "EMS",
331
- "C": "P2",
332
- "P3": "P2",
333
- "D": "P3",
334
- "P4": "P3",
335
- "Z2": "P4",
336
- "Z3": "P4",
337
- }
338
-
339
531
  def split_line(line):
340
532
  return (
341
533
  line.split()[0],
@@ -382,11 +574,12 @@ class lineageTreeLoaders:
382
574
  p = name_to_id[self.name[c]]
383
575
  elif self.name[c][:-1] in name_to_id:
384
576
  p = name_to_id[self.name[c][:-1]]
385
- elif implicit_l_t.get(self.name[c]) in name_to_id:
386
- p = name_to_id[implicit_l_t.get(self.name[c])]
577
+ elif self.implicit_l_t.get(self.name[c]) in name_to_id:
578
+ p = name_to_id[self.implicit_l_t.get(self.name[c])]
387
579
  else:
388
- print(
389
- "error, cell %s has no predecessors" % self.name[c]
580
+ warn(
581
+ f"error, cell {self.name[c]} has no predecessors",
582
+ stacklevel=2,
390
583
  )
391
584
  p = None
392
585
  self.predecessor.setdefault(c, []).append(p)
@@ -428,9 +621,6 @@ class lineageTreeLoaders:
428
621
  self.intensity = {}
429
622
  self.W = {}
430
623
  for t in range(tb, te + 1):
431
- print(t, end=" ")
432
- if t % 10 == 0:
433
- print()
434
624
  tree = ET.parse(file_format.format(t=t))
435
625
  root = tree.getroot()
436
626
  self.time_nodes[t] = set()
@@ -652,3 +842,55 @@ class lineageTreeLoaders:
652
842
  tracks[t_id].append((s, t))
653
843
  self.t_b = min(self.time_nodes.keys())
654
844
  self.t_e = max(self.time_nodes.keys())
845
+
846
+ def read_C_elegans_bao(self, path):
847
+ cell_times = {}
848
+ self.expression = {}
849
+ with open(path) as f:
850
+ for line in f:
851
+ if "cell_name" not in line:
852
+ cell_times[line.split("\t")[0]] = list(
853
+ line.split("\t")[-1].split(",")
854
+ )
855
+ new_dict = {}
856
+ end_dict = {}
857
+ self.t_e = 0
858
+ self.t_b = 0
859
+ for c, lc in cell_times.items():
860
+ new_dict[c] = self.add_node(0)
861
+ tmp = self.add_branch(
862
+ new_dict[c],
863
+ length=len(lc) - 1,
864
+ reverse=True,
865
+ move_timepoints=True,
866
+ )
867
+ for i, node in enumerate(self.get_cycle(tmp)):
868
+ self.expression[node] = int(lc[i])
869
+ self._labels[self.get_cycle(tmp)[0]] = c
870
+ self._labels.pop(tmp)
871
+ end_dict[c] = self.get_cycle(new_dict[c])[-1]
872
+ cell_names = list(cell_times.keys())
873
+ c_to_p = {}
874
+ while cell_names:
875
+ cur = cell_names.pop()
876
+ if cur[:-1] in cell_names:
877
+ c_to_p[cur] = cur[:-1]
878
+ c_to_p.update(self.implicit_l_t)
879
+ for c, p in c_to_p.items():
880
+ if p in cell_times:
881
+ cyc = end_dict[p]
882
+ self.predecessor[new_dict[c]] = [cyc]
883
+ if cyc not in self.successor:
884
+ self.successor[cyc] = []
885
+ self.successor[cyc].append(new_dict[c])
886
+ self.time_nodes.clear()
887
+ for root in self.roots:
888
+ to_do = [root]
889
+ while to_do:
890
+ cur = to_do.pop()
891
+ self.time_nodes.setdefault(self.time[cur], set()).add(cur)
892
+ _next = self.successor.get(cur, [])
893
+ to_do += _next
894
+ for n in _next:
895
+ self.time[n] = self.time[cur] + 1
896
+ self.t_e = max(self.time.values())