LineageTree 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LineageTree/__init__.py +1 -1
- LineageTree/lineageTree.py +177 -855
- LineageTree/loaders.py +654 -0
- LineageTree/tree_styles.py +6 -10
- LineageTree/utils.py +84 -137
- {LineageTree-1.5.1.dist-info → LineageTree-1.6.1.dist-info}/METADATA +1 -1
- LineageTree-1.6.1.dist-info/RECORD +11 -0
- LineageTree-1.5.1.dist-info/RECORD +0 -10
- {LineageTree-1.5.1.dist-info → LineageTree-1.6.1.dist-info}/LICENSE +0 -0
- {LineageTree-1.5.1.dist-info → LineageTree-1.6.1.dist-info}/WHEEL +0 -0
- {LineageTree-1.5.1.dist-info → LineageTree-1.6.1.dist-info}/top_level.txt +0 -0
LineageTree/loaders.py
ADDED
@@ -0,0 +1,654 @@
|
|
1
|
+
import csv
|
2
|
+
import pickle as pkl
|
3
|
+
import xml.etree.ElementTree as ET
|
4
|
+
import os
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
|
8
|
+
class lineageTreeLoaders:
|
9
|
+
def read_from_ASTEC(self, file_path: str, eigen: bool = False):
|
10
|
+
"""
|
11
|
+
Read an `xml` or `pkl` file produced by the ASTEC algorithm.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
file_path (str): path to an output generated by ASTEC
|
15
|
+
eigen (bool): whether or not to read the eigen values, default False
|
16
|
+
"""
|
17
|
+
self._astec_keydictionary = {
|
18
|
+
"cell_lineage": [
|
19
|
+
"lineage_tree",
|
20
|
+
"lin_tree",
|
21
|
+
"Lineage tree",
|
22
|
+
"cell_lineage",
|
23
|
+
],
|
24
|
+
"cell_h_min": ["cell_h_min", "h_mins_information"],
|
25
|
+
"cell_volume": [
|
26
|
+
"cell_volume",
|
27
|
+
"volumes_information",
|
28
|
+
"volumes information",
|
29
|
+
"vol",
|
30
|
+
],
|
31
|
+
"cell_surface": ["cell_surface", "cell surface"],
|
32
|
+
"cell_compactness": [
|
33
|
+
"cell_compactness",
|
34
|
+
"Cell Compactness",
|
35
|
+
"compacity",
|
36
|
+
"cell_sphericity",
|
37
|
+
],
|
38
|
+
"cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
|
39
|
+
"cell_labels_in_time": [
|
40
|
+
"cell_labels_in_time",
|
41
|
+
"Cells labels in time",
|
42
|
+
"time_labels",
|
43
|
+
],
|
44
|
+
"cell_barycenter": [
|
45
|
+
"cell_barycenter",
|
46
|
+
"Barycenters",
|
47
|
+
"barycenters",
|
48
|
+
],
|
49
|
+
"cell_fate": ["cell_fate", "Fate"],
|
50
|
+
"cell_fate_2": ["cell_fate_2", "Fate2"],
|
51
|
+
"cell_fate_3": ["cell_fate_3", "Fate3"],
|
52
|
+
"cell_fate_4": ["cell_fate_4", "Fate4"],
|
53
|
+
"all_cells": [
|
54
|
+
"all_cells",
|
55
|
+
"All Cells",
|
56
|
+
"All_Cells",
|
57
|
+
"all cells",
|
58
|
+
"tot_cells",
|
59
|
+
],
|
60
|
+
"cell_principal_values": [
|
61
|
+
"cell_principal_values",
|
62
|
+
"Principal values",
|
63
|
+
],
|
64
|
+
"cell_name": ["cell_name", "Names", "names", "cell_names"],
|
65
|
+
"cell_contact_surface": [
|
66
|
+
"cell_contact_surface",
|
67
|
+
"cell_cell_contact_information",
|
68
|
+
],
|
69
|
+
"cell_history": [
|
70
|
+
"cell_history",
|
71
|
+
"Cells history",
|
72
|
+
"cell_life",
|
73
|
+
"life",
|
74
|
+
],
|
75
|
+
"cell_principal_vectors": [
|
76
|
+
"cell_principal_vectors",
|
77
|
+
"Principal vectors",
|
78
|
+
],
|
79
|
+
"cell_naming_score": ["cell_naming_score", "Scores", "scores"],
|
80
|
+
"problematic_cells": ["problematic_cells"],
|
81
|
+
"unknown_key": ["unknown_key"],
|
82
|
+
}
|
83
|
+
|
84
|
+
if os.path.splitext(file_path)[-1] == ".xml":
|
85
|
+
tmp_data = self._read_from_ASTEC_xml(file_path)
|
86
|
+
else:
|
87
|
+
tmp_data = self._read_from_ASTEC_pkl(file_path, eigen)
|
88
|
+
|
89
|
+
# make sure these are all named liked they are in tmp_data (or change dictionary above)
|
90
|
+
self.name = {}
|
91
|
+
if "cell_volume" in tmp_data:
|
92
|
+
self.volume = {}
|
93
|
+
if "cell_fate" in tmp_data:
|
94
|
+
self.fates = {}
|
95
|
+
if "cell_barycenter" in tmp_data:
|
96
|
+
self.pos = {}
|
97
|
+
self.lT2pkl = {}
|
98
|
+
self.pkl2lT = {}
|
99
|
+
self.contact = {}
|
100
|
+
self.prob_cells = set()
|
101
|
+
self.image_label = {}
|
102
|
+
|
103
|
+
lt = tmp_data["cell_lineage"]
|
104
|
+
|
105
|
+
if "cell_contact_surface" in tmp_data:
|
106
|
+
do_surf = True
|
107
|
+
surfaces = tmp_data["cell_contact_surface"]
|
108
|
+
else:
|
109
|
+
do_surf = False
|
110
|
+
|
111
|
+
inv = {vi: [c] for c, v in lt.items() for vi in v}
|
112
|
+
nodes = set(lt).union(inv)
|
113
|
+
|
114
|
+
unique_id = 0
|
115
|
+
|
116
|
+
for n in nodes:
|
117
|
+
t = n // 10**4
|
118
|
+
self.image_label[unique_id] = n % 10**4
|
119
|
+
self.lT2pkl[unique_id] = n
|
120
|
+
self.pkl2lT[n] = unique_id
|
121
|
+
self.time_nodes.setdefault(t, set()).add(unique_id)
|
122
|
+
self.nodes.add(unique_id)
|
123
|
+
self.time[unique_id] = t
|
124
|
+
if "cell_volume" in tmp_data:
|
125
|
+
self.volume[unique_id] = tmp_data["cell_volume"].get(n, 0.0)
|
126
|
+
if "cell_fate" in tmp_data:
|
127
|
+
self.fates[unique_id] = tmp_data["cell_fate"].get(n, "")
|
128
|
+
if "cell_barycenter" in tmp_data:
|
129
|
+
self.pos[unique_id] = tmp_data["cell_barycenter"].get(
|
130
|
+
n, np.zeros(3)
|
131
|
+
)
|
132
|
+
|
133
|
+
unique_id += 1
|
134
|
+
if do_surf:
|
135
|
+
for c in nodes:
|
136
|
+
if c in surfaces and c in self.pkl2lT:
|
137
|
+
self.contact[self.pkl2lT[c]] = {
|
138
|
+
self.pkl2lT.get(n, -1): s
|
139
|
+
for n, s in surfaces[c].items()
|
140
|
+
if n % 10**4 == 1 or n in self.pkl2lT
|
141
|
+
}
|
142
|
+
|
143
|
+
for n, new_id in self.pkl2lT.items():
|
144
|
+
if n in inv:
|
145
|
+
self.predecessor[new_id] = [self.pkl2lT[ni] for ni in inv[n]]
|
146
|
+
if n in lt:
|
147
|
+
self.successor[new_id] = [
|
148
|
+
self.pkl2lT[ni] for ni in lt[n] if ni in self.pkl2lT
|
149
|
+
]
|
150
|
+
|
151
|
+
for ni in self.successor[new_id]:
|
152
|
+
self.time_edges.setdefault(t - 1, set()).add((new_id, ni))
|
153
|
+
|
154
|
+
self.t_b = min(self.time_nodes)
|
155
|
+
self.t_e = max(self.time_nodes)
|
156
|
+
self.max_id = unique_id
|
157
|
+
|
158
|
+
# do this in the end of the process, skip lineage tree and whatever is stored already
|
159
|
+
discard = {
|
160
|
+
"cell_volume",
|
161
|
+
"cell_fate",
|
162
|
+
"cell_barycenter",
|
163
|
+
"cell_contact_surface",
|
164
|
+
"cell_lineage",
|
165
|
+
"all_cells",
|
166
|
+
"cell_history",
|
167
|
+
"problematic_cells",
|
168
|
+
"cell_labels_in_time",
|
169
|
+
}
|
170
|
+
self.specific_properties = []
|
171
|
+
for prop_name, prop_values in tmp_data.items():
|
172
|
+
if not (prop_name in discard or hasattr(self, prop_name)):
|
173
|
+
if isinstance(prop_values, dict):
|
174
|
+
dictionary = {
|
175
|
+
self.pkl2lT.get(k, -1): v
|
176
|
+
for k, v in prop_values.items()
|
177
|
+
}
|
178
|
+
# is it a regular dictionary or a dictionary with dictionaries inside?
|
179
|
+
for key, value in dictionary.items():
|
180
|
+
if isinstance(value, dict):
|
181
|
+
# rename all ids from old to new
|
182
|
+
dictionary[key] = {
|
183
|
+
self.pkl2lT.get(k, -1): v
|
184
|
+
for k, v in value.items()
|
185
|
+
}
|
186
|
+
self.__dict__[prop_name] = dictionary
|
187
|
+
self.specific_properties.append(prop_name)
|
188
|
+
# is any of this necessary? Or does it mean it anyways does not contain
|
189
|
+
# information about the id and a simple else: is enough?
|
190
|
+
elif (
|
191
|
+
isinstance(prop_values, (list, set, np.ndarray))
|
192
|
+
and prop_name not in []
|
193
|
+
):
|
194
|
+
self.__dict__[prop_name] = prop_values
|
195
|
+
self.specific_properties.append(prop_name)
|
196
|
+
|
197
|
+
# what else could it be?
|
198
|
+
|
199
|
+
# add a list of all available properties
|
200
|
+
|
201
|
+
def _read_from_ASTEC_xml(self, file_path: str):
|
202
|
+
def _set_dictionary_value(root):
|
203
|
+
if len(root) == 0:
|
204
|
+
if root.text is None:
|
205
|
+
return None
|
206
|
+
else:
|
207
|
+
return eval(root.text)
|
208
|
+
else:
|
209
|
+
dictionary = {}
|
210
|
+
for child in root:
|
211
|
+
key = child.tag
|
212
|
+
if child.tag == "cell":
|
213
|
+
key = int(child.attrib["cell-id"])
|
214
|
+
dictionary[key] = _set_dictionary_value(child)
|
215
|
+
return dictionary
|
216
|
+
|
217
|
+
tree = ET.parse(file_path)
|
218
|
+
root = tree.getroot()
|
219
|
+
dictionary = {}
|
220
|
+
|
221
|
+
for k, _v in self._astec_keydictionary.items():
|
222
|
+
if root.tag == k:
|
223
|
+
dictionary[str(root.tag)] = _set_dictionary_value(root)
|
224
|
+
break
|
225
|
+
else:
|
226
|
+
for child in root:
|
227
|
+
value = _set_dictionary_value(child)
|
228
|
+
if value is not None:
|
229
|
+
dictionary[str(child.tag)] = value
|
230
|
+
return dictionary
|
231
|
+
|
232
|
+
def _read_from_ASTEC_pkl(self, file_path: str, eigen: bool = False):
|
233
|
+
with open(file_path, "rb") as f:
|
234
|
+
tmp_data = pkl.load(f, encoding="latin1")
|
235
|
+
f.close()
|
236
|
+
new_ref = {}
|
237
|
+
for k, v in self._astec_keydictionary.items():
|
238
|
+
for key in v:
|
239
|
+
new_ref[key] = k
|
240
|
+
new_dict = {}
|
241
|
+
|
242
|
+
for k, v in tmp_data.items():
|
243
|
+
if k in new_ref:
|
244
|
+
new_dict[new_ref[k]] = v
|
245
|
+
else:
|
246
|
+
new_dict[k] = v
|
247
|
+
return new_dict
|
248
|
+
|
249
|
+
def read_from_txt_for_celegans(self, file: str):
|
250
|
+
"""
|
251
|
+
Read a C. elegans lineage tree
|
252
|
+
|
253
|
+
Args:
|
254
|
+
file (str): Path to the file to read
|
255
|
+
"""
|
256
|
+
implicit_l_t = {
|
257
|
+
"AB": "P0",
|
258
|
+
"P1": "P0",
|
259
|
+
"EMS": "P1",
|
260
|
+
"P2": "P1",
|
261
|
+
"MS": "EMS",
|
262
|
+
"E": "EMS",
|
263
|
+
"C": "P2",
|
264
|
+
"P3": "P2",
|
265
|
+
"D": "P3",
|
266
|
+
"P4": "P3",
|
267
|
+
"Z2": "P4",
|
268
|
+
"Z3": "P4",
|
269
|
+
}
|
270
|
+
with open(file) as f:
|
271
|
+
raw = f.readlines()[1:]
|
272
|
+
f.close()
|
273
|
+
self.name = {}
|
274
|
+
|
275
|
+
unique_id = 0
|
276
|
+
for line in raw:
|
277
|
+
t = int(line.split("\t")[0])
|
278
|
+
self.name[unique_id] = line.split("\t")[1]
|
279
|
+
position = np.array(line.split("\t")[2:5], dtype=float)
|
280
|
+
self.time_nodes.setdefault(t, set()).add(unique_id)
|
281
|
+
self.nodes.add(unique_id)
|
282
|
+
self.pos[unique_id] = position
|
283
|
+
self.time[unique_id] = t
|
284
|
+
unique_id += 1
|
285
|
+
|
286
|
+
self.t_b = min(self.time_nodes)
|
287
|
+
self.t_e = max(self.time_nodes)
|
288
|
+
|
289
|
+
for t, cells in self.time_nodes.items():
|
290
|
+
if t != self.t_b:
|
291
|
+
prev_cells = self.time_nodes[t - 1]
|
292
|
+
name_to_id = {self.name[c]: c for c in prev_cells}
|
293
|
+
for c in cells:
|
294
|
+
if self.name[c] in name_to_id:
|
295
|
+
p = name_to_id[self.name[c]]
|
296
|
+
elif self.name[c][:-1] in name_to_id:
|
297
|
+
p = name_to_id[self.name[c][:-1]]
|
298
|
+
elif implicit_l_t.get(self.name[c]) in name_to_id:
|
299
|
+
p = name_to_id[implicit_l_t.get(self.name[c])]
|
300
|
+
else:
|
301
|
+
print(
|
302
|
+
"error, cell %s has no predecessors" % self.name[c]
|
303
|
+
)
|
304
|
+
p = None
|
305
|
+
self.predecessor.setdefault(c, []).append(p)
|
306
|
+
self.successor.setdefault(p, []).append(c)
|
307
|
+
self.time_edges.setdefault(t - 1, set()).add((p, c))
|
308
|
+
self.max_id = unique_id
|
309
|
+
|
310
|
+
def read_from_txt_for_celegans_CAO(
|
311
|
+
self,
|
312
|
+
file: str,
|
313
|
+
reorder: bool = False,
|
314
|
+
raw_size: float = None,
|
315
|
+
shape: float = None,
|
316
|
+
):
|
317
|
+
"""
|
318
|
+
Read a C. elegans lineage tree from Cao et al.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
file (str): Path to the file to read
|
322
|
+
"""
|
323
|
+
|
324
|
+
implicit_l_t = {
|
325
|
+
"AB": "P0",
|
326
|
+
"P1": "P0",
|
327
|
+
"EMS": "P1",
|
328
|
+
"P2": "P1",
|
329
|
+
"MS": "EMS",
|
330
|
+
"E": "EMS",
|
331
|
+
"C": "P2",
|
332
|
+
"P3": "P2",
|
333
|
+
"D": "P3",
|
334
|
+
"P4": "P3",
|
335
|
+
"Z2": "P4",
|
336
|
+
"Z3": "P4",
|
337
|
+
}
|
338
|
+
|
339
|
+
def split_line(line):
|
340
|
+
return (
|
341
|
+
line.split()[0],
|
342
|
+
eval(line.split()[1]),
|
343
|
+
eval(line.split()[2]),
|
344
|
+
eval(line.split()[3]),
|
345
|
+
eval(line.split()[4]),
|
346
|
+
)
|
347
|
+
|
348
|
+
with open(file) as f:
|
349
|
+
raw = f.readlines()[1:]
|
350
|
+
f.close()
|
351
|
+
self.name = {}
|
352
|
+
|
353
|
+
unique_id = 0
|
354
|
+
for name, t, z, x, y in map(split_line, raw):
|
355
|
+
self.name[unique_id] = name
|
356
|
+
position = np.array([x, y, z], dtype=np.float)
|
357
|
+
self.time_nodes.setdefault(t, set()).add(unique_id)
|
358
|
+
self.nodes.add(unique_id)
|
359
|
+
if reorder:
|
360
|
+
|
361
|
+
def flip(x):
|
362
|
+
return np.array([x[0], x[1], raw_size[2] - x[2]])
|
363
|
+
|
364
|
+
def adjust(x):
|
365
|
+
return (shape / raw_size * flip(x))[[1, 0, 2]]
|
366
|
+
|
367
|
+
self.pos[unique_id] = adjust(position)
|
368
|
+
else:
|
369
|
+
self.pos[unique_id] = position
|
370
|
+
self.time[unique_id] = t
|
371
|
+
unique_id += 1
|
372
|
+
|
373
|
+
self.t_b = min(self.time_nodes)
|
374
|
+
self.t_e = max(self.time_nodes)
|
375
|
+
|
376
|
+
for t, cells in self.time_nodes.items():
|
377
|
+
if t != self.t_b:
|
378
|
+
prev_cells = self.time_nodes[t - 1]
|
379
|
+
name_to_id = {self.name[c]: c for c in prev_cells}
|
380
|
+
for c in cells:
|
381
|
+
if self.name[c] in name_to_id:
|
382
|
+
p = name_to_id[self.name[c]]
|
383
|
+
elif self.name[c][:-1] in name_to_id:
|
384
|
+
p = name_to_id[self.name[c][:-1]]
|
385
|
+
elif implicit_l_t.get(self.name[c]) in name_to_id:
|
386
|
+
p = name_to_id[implicit_l_t.get(self.name[c])]
|
387
|
+
else:
|
388
|
+
print(
|
389
|
+
"error, cell %s has no predecessors" % self.name[c]
|
390
|
+
)
|
391
|
+
p = None
|
392
|
+
self.predecessor.setdefault(c, []).append(p)
|
393
|
+
self.successor.setdefault(p, []).append(c)
|
394
|
+
self.time_edges.setdefault(t - 1, set()).add((p, c))
|
395
|
+
self.max_id = unique_id
|
396
|
+
|
397
|
+
def read_tgmm_xml(
|
398
|
+
self, file_format: str, tb: int, te: int, z_mult: float = 1.0
|
399
|
+
):
|
400
|
+
"""Reads a lineage tree from TGMM xml output.
|
401
|
+
|
402
|
+
Args:
|
403
|
+
file_format (str): path to the xmls location.
|
404
|
+
it should be written as follow:
|
405
|
+
path/to/xml/standard_name_t{t:06d}.xml where (as an example)
|
406
|
+
{t:06d} means a series of 6 digits representing the time and
|
407
|
+
if the time values is smaller that 6 digits, the missing
|
408
|
+
digits are filed with 0s
|
409
|
+
tb (int): first time point to read
|
410
|
+
te (int): last time point to read
|
411
|
+
z_mult (float): aspect ratio
|
412
|
+
"""
|
413
|
+
self.time_nodes = {}
|
414
|
+
self.time_edges = {}
|
415
|
+
unique_id = 0
|
416
|
+
self.nodes = set()
|
417
|
+
self.successor = {}
|
418
|
+
self.predecessor = {}
|
419
|
+
self.pos = {}
|
420
|
+
self.time_id = {}
|
421
|
+
self.time = {}
|
422
|
+
self.mother_not_found = []
|
423
|
+
self.ind_cells = {}
|
424
|
+
self.svIdx = {}
|
425
|
+
self.lin = {}
|
426
|
+
self.C_lin = {}
|
427
|
+
self.coeffs = {}
|
428
|
+
self.intensity = {}
|
429
|
+
self.W = {}
|
430
|
+
for t in range(tb, te + 1):
|
431
|
+
print(t, end=" ")
|
432
|
+
if t % 10 == 0:
|
433
|
+
print()
|
434
|
+
tree = ET.parse(file_format.format(t=t))
|
435
|
+
root = tree.getroot()
|
436
|
+
self.time_nodes[t] = set()
|
437
|
+
self.time_edges[t] = set()
|
438
|
+
for it in root:
|
439
|
+
if (
|
440
|
+
"-1.#IND" not in it.attrib["m"]
|
441
|
+
and "nan" not in it.attrib["m"]
|
442
|
+
):
|
443
|
+
M_id, pos, cell_id, svIdx, lin_id = (
|
444
|
+
int(it.attrib["parent"]),
|
445
|
+
[
|
446
|
+
float(v)
|
447
|
+
for v in it.attrib["m"].split(" ")
|
448
|
+
if v != ""
|
449
|
+
],
|
450
|
+
int(it.attrib["id"]),
|
451
|
+
[
|
452
|
+
int(v)
|
453
|
+
for v in it.attrib["svIdx"].split(" ")
|
454
|
+
if v != ""
|
455
|
+
],
|
456
|
+
int(it.attrib["lineage"]),
|
457
|
+
)
|
458
|
+
try:
|
459
|
+
alpha, W, nu, alphaPrior = (
|
460
|
+
float(it.attrib["alpha"]),
|
461
|
+
[
|
462
|
+
float(v)
|
463
|
+
for v in it.attrib["W"].split(" ")
|
464
|
+
if v != ""
|
465
|
+
],
|
466
|
+
float(it.attrib["nu"]),
|
467
|
+
float(it.attrib["alphaPrior"]),
|
468
|
+
)
|
469
|
+
pos = np.array(pos)
|
470
|
+
C = unique_id
|
471
|
+
pos[-1] = pos[-1] * z_mult
|
472
|
+
if (t - 1, M_id) in self.time_id:
|
473
|
+
M = self.time_id[(t - 1, M_id)]
|
474
|
+
self.successor.setdefault(M, []).append(C)
|
475
|
+
self.predecessor.setdefault(C, []).append(M)
|
476
|
+
self.time_edges[t].add((M, C))
|
477
|
+
else:
|
478
|
+
if M_id != -1:
|
479
|
+
self.mother_not_found.append(C)
|
480
|
+
self.pos[C] = pos
|
481
|
+
self.nodes.add(C)
|
482
|
+
self.time_nodes[t].add(C)
|
483
|
+
self.time_id[(t, cell_id)] = C
|
484
|
+
self.time[C] = t
|
485
|
+
self.svIdx[C] = svIdx
|
486
|
+
self.lin.setdefault(lin_id, []).append(C)
|
487
|
+
self.C_lin[C] = lin_id
|
488
|
+
self.intensity[C] = max(alpha - alphaPrior, 0)
|
489
|
+
tmp = list(np.array(W) * nu)
|
490
|
+
self.W[C] = np.array(W).reshape(3, 3)
|
491
|
+
self.coeffs[C] = (
|
492
|
+
tmp[:3] + tmp[4:6] + tmp[8:9] + list(pos)
|
493
|
+
)
|
494
|
+
unique_id += 1
|
495
|
+
except Exception:
|
496
|
+
pass
|
497
|
+
else:
|
498
|
+
if t in self.ind_cells:
|
499
|
+
self.ind_cells[t] += 1
|
500
|
+
else:
|
501
|
+
self.ind_cells[t] = 1
|
502
|
+
self.max_id = unique_id - 1
|
503
|
+
|
504
|
+
def read_from_mastodon(self, path: str, name: str):
|
505
|
+
"""
|
506
|
+
TODO: write doc
|
507
|
+
"""
|
508
|
+
from mastodon_reader import MastodonReader
|
509
|
+
|
510
|
+
mr = MastodonReader(path)
|
511
|
+
spots, links = mr.read_tables()
|
512
|
+
|
513
|
+
self.node_name = {}
|
514
|
+
|
515
|
+
for c in spots.iloc:
|
516
|
+
unique_id = c.name
|
517
|
+
x, y, z = c.x, c.y, c.z
|
518
|
+
t = c.t
|
519
|
+
n = c[name] if name is not None else ""
|
520
|
+
self.time_nodes.setdefault(t, set()).add(unique_id)
|
521
|
+
self.nodes.add(unique_id)
|
522
|
+
self.time[unique_id] = t
|
523
|
+
self.node_name[unique_id] = n
|
524
|
+
self.pos[unique_id] = np.array([x, y, z])
|
525
|
+
|
526
|
+
for e in links.iloc:
|
527
|
+
source = e.source_idx
|
528
|
+
target = e.target_idx
|
529
|
+
self.predecessor.setdefault(target, []).append(source)
|
530
|
+
self.successor.setdefault(source, []).append(target)
|
531
|
+
self.time_edges.setdefault(self.time[source], set()).add(
|
532
|
+
(source, target)
|
533
|
+
)
|
534
|
+
self.t_b = min(self.time_nodes.keys())
|
535
|
+
self.t_e = max(self.time_nodes.keys())
|
536
|
+
|
537
|
+
def read_from_mastodon_csv(self, path: str):
|
538
|
+
"""
|
539
|
+
TODO: Write doc
|
540
|
+
"""
|
541
|
+
spots = []
|
542
|
+
links = []
|
543
|
+
self.node_name = {}
|
544
|
+
|
545
|
+
with open(path[0], encoding="utf-8", errors="ignore") as file:
|
546
|
+
csvreader = csv.reader(file)
|
547
|
+
for row in csvreader:
|
548
|
+
spots.append(row)
|
549
|
+
spots = spots[3:]
|
550
|
+
|
551
|
+
with open(path[1], encoding="utf-8", errors="ignore") as file:
|
552
|
+
csvreader = csv.reader(file)
|
553
|
+
for row in csvreader:
|
554
|
+
links.append(row)
|
555
|
+
links = links[3:]
|
556
|
+
|
557
|
+
for spot in spots:
|
558
|
+
unique_id = int(spot[1])
|
559
|
+
x, y, z = spot[5:8]
|
560
|
+
t = int(spot[4])
|
561
|
+
self.time_nodes.setdefault(t, set()).add(unique_id)
|
562
|
+
self.nodes.add(unique_id)
|
563
|
+
self.time[unique_id] = t
|
564
|
+
self.node_name[unique_id] = spot[1]
|
565
|
+
self.pos[unique_id] = np.array([x, y, z], dtype=float)
|
566
|
+
|
567
|
+
for link in links:
|
568
|
+
source = int(float(link[4]))
|
569
|
+
target = int(float(link[5]))
|
570
|
+
self.predecessor.setdefault(target, []).append(source)
|
571
|
+
self.successor.setdefault(source, []).append(target)
|
572
|
+
self.time_edges.setdefault(self.time[source], set()).add(
|
573
|
+
(source, target)
|
574
|
+
)
|
575
|
+
self.t_b = min(self.time_nodes.keys())
|
576
|
+
self.t_e = max(self.time_nodes.keys())
|
577
|
+
|
578
|
+
def read_from_mamut_xml(self, path: str):
|
579
|
+
"""Read a lineage tree from a MaMuT xml.
|
580
|
+
|
581
|
+
Args:
|
582
|
+
path (str): path to the MaMut xml
|
583
|
+
"""
|
584
|
+
tree = ET.parse(path)
|
585
|
+
for elem in tree.getroot():
|
586
|
+
if elem.tag == "Model":
|
587
|
+
Model = elem
|
588
|
+
FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
|
589
|
+
|
590
|
+
for attr in self.xml_attributes:
|
591
|
+
self.__dict__[attr] = {}
|
592
|
+
self.time_nodes = {}
|
593
|
+
self.time_edges = {}
|
594
|
+
self.nodes = set()
|
595
|
+
self.pos = {}
|
596
|
+
self.time = {}
|
597
|
+
self.node_name = {}
|
598
|
+
for frame in AllSpots:
|
599
|
+
t = int(frame.attrib["frame"])
|
600
|
+
self.time_nodes[t] = set()
|
601
|
+
for cell in frame:
|
602
|
+
cell_id, n, x, y, z = (
|
603
|
+
int(cell.attrib["ID"]),
|
604
|
+
cell.attrib["name"],
|
605
|
+
float(cell.attrib["POSITION_X"]),
|
606
|
+
float(cell.attrib["POSITION_Y"]),
|
607
|
+
float(cell.attrib["POSITION_Z"]),
|
608
|
+
)
|
609
|
+
self.time_nodes[t].add(cell_id)
|
610
|
+
self.nodes.add(cell_id)
|
611
|
+
self.pos[cell_id] = np.array([x, y, z])
|
612
|
+
self.time[cell_id] = t
|
613
|
+
self.node_name[cell_id] = n
|
614
|
+
if "TISSUE_NAME" in cell.attrib:
|
615
|
+
if not hasattr(self, "fate"):
|
616
|
+
self.fate = {}
|
617
|
+
self.fate[cell_id] = cell.attrib["TISSUE_NAME"]
|
618
|
+
if "TISSUE_TYPE" in cell.attrib:
|
619
|
+
if not hasattr(self, "fate_nb"):
|
620
|
+
self.fate_nb = {}
|
621
|
+
self.fate_nb[cell_id] = eval(cell.attrib["TISSUE_TYPE"])
|
622
|
+
for attr in cell.attrib:
|
623
|
+
if attr in self.xml_attributes:
|
624
|
+
self.__dict__[attr][cell_id] = eval(cell.attrib[attr])
|
625
|
+
|
626
|
+
tracks = {}
|
627
|
+
self.successor = {}
|
628
|
+
self.predecessor = {}
|
629
|
+
self.track_name = {}
|
630
|
+
for track in AllTracks:
|
631
|
+
if "TRACK_DURATION" in track.attrib:
|
632
|
+
t_id, _ = (
|
633
|
+
int(track.attrib["TRACK_ID"]),
|
634
|
+
float(track.attrib["TRACK_DURATION"]),
|
635
|
+
)
|
636
|
+
else:
|
637
|
+
t_id = int(track.attrib["TRACK_ID"])
|
638
|
+
t_name = track.attrib["name"]
|
639
|
+
tracks[t_id] = []
|
640
|
+
for edge in track:
|
641
|
+
s, t = (
|
642
|
+
int(edge.attrib["SPOT_SOURCE_ID"]),
|
643
|
+
int(edge.attrib["SPOT_TARGET_ID"]),
|
644
|
+
)
|
645
|
+
if s in self.nodes and t in self.nodes:
|
646
|
+
if self.time[s] > self.time[t]:
|
647
|
+
s, t = t, s
|
648
|
+
self.successor.setdefault(s, []).append(t)
|
649
|
+
self.predecessor.setdefault(t, []).append(s)
|
650
|
+
self.track_name[s] = t_name
|
651
|
+
self.track_name[t] = t_name
|
652
|
+
tracks[t_id].append((s, t))
|
653
|
+
self.t_b = min(self.time_nodes.keys())
|
654
|
+
self.t_e = max(self.time_nodes.keys())
|
LineageTree/tree_styles.py
CHANGED
@@ -172,7 +172,7 @@ class simple_tree(abstract_trees):
|
|
172
172
|
cycle = cycle[cycle_times <= self.end_time]
|
173
173
|
if cycle.size:
|
174
174
|
_next = self.lT[cycle[-1]]
|
175
|
-
if
|
175
|
+
if len(_next) > 1 and self.lT.time[cycle[-1]] < self.end_time:
|
176
176
|
out_dict[current] = _next
|
177
177
|
to_do.extend(_next)
|
178
178
|
else:
|
@@ -186,16 +186,14 @@ class simple_tree(abstract_trees):
|
|
186
186
|
return super().delta(x, y, corres1, corres2, times1, times2)
|
187
187
|
|
188
188
|
def get_norm(self):
|
189
|
-
return len(
|
190
|
-
self.lT.get_sub_tree(self.root, end_time=self.end_time)
|
191
|
-
)
|
189
|
+
return len(self.lT.get_sub_tree(self.root, end_time=self.end_time))
|
192
190
|
|
193
191
|
|
194
192
|
class fragmented_tree(abstract_trees):
|
195
193
|
"""Similar idea to simple tree, but tries to correct its flaws.
|
196
|
-
|
197
|
-
|
198
|
-
|
194
|
+
Instead of having branches with length == life cycle of cell,nodes of specific length are added on the
|
195
|
+
edges of the branch, providing both accuratr results and speed.
|
196
|
+
It's the recommended method for calculating edit distances on developing embryos.
|
199
197
|
"""
|
200
198
|
|
201
199
|
def __init__(self, **kwargs):
|
@@ -255,9 +253,7 @@ class fragmented_tree(abstract_trees):
|
|
255
253
|
return self.out_dict, self.times
|
256
254
|
|
257
255
|
def get_norm(self):
|
258
|
-
return len(
|
259
|
-
self.lT.get_sub_tree(self.root, end_time=self.end_time)
|
260
|
-
)
|
256
|
+
return len(self.lT.get_sub_tree(self.root, end_time=self.end_time))
|
261
257
|
|
262
258
|
def delta(self, x, y, corres1, corres2, times1, times2):
|
263
259
|
return super().delta(x, y, corres1, corres2, times1, times2)
|