LineageTree 1.8.0__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
LineageTree/loaders.py CHANGED
@@ -3,894 +3,972 @@ import os
3
3
  import pickle as pkl
4
4
  import struct
5
5
  import xml.etree.ElementTree as ET
6
+ from pathlib import Path
6
7
  from warnings import warn
7
8
 
8
9
  import numpy as np
9
10
 
10
-
11
- class lineageTreeLoaders:
12
- implicit_l_t = {
13
- "AB": "P0",
14
- "P1": "P0",
15
- "EMS": "P1",
16
- "P2": "P1",
17
- "MS": "EMS",
18
- "E": "EMS",
19
- "C": "P2",
20
- "P3": "P2",
21
- "D": "P3",
22
- "P4": "P3",
23
- "Z2": "P4",
24
- "Z3": "P4",
25
- }
26
-
27
- def read_from_csv(
28
- self, file_path: str, z_mult: float, link: int = 1, delim: str = ","
29
- ):
30
- """
31
- TODO: write doc
32
- """
33
- with open(file_path) as f:
34
- lines = f.readlines()
35
- f.close()
36
- self.time_nodes = {}
37
- self.time_edges = {}
38
- unique_id = 0
39
- self.nodes = set()
40
- self.edges = set()
41
- self.successor = {}
42
- self.predecessor = {}
43
- self.pos = {}
44
- self.time_id = {}
45
- self.time = {}
46
- self.lin = {}
47
- self.C_lin = {}
48
- if not link:
49
- self.displacement = {}
50
- lines_to_int = []
51
- corres = {}
52
- for line in lines:
53
- lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
54
- lines_to_int = np.array(lines_to_int)
55
- if link == 2:
56
- lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
11
+ from .lineageTree import lineageTree
12
+
13
+ IMPLICIT_L_T = {
14
+ "AB": "P0",
15
+ "P1": "P0",
16
+ "EMS": "P1",
17
+ "P2": "P1",
18
+ "MS": "EMS",
19
+ "E": "EMS",
20
+ "C": "P2",
21
+ "P3": "P2",
22
+ "D": "P3",
23
+ "P4": "P3",
24
+ "Z2": "P4",
25
+ "Z3": "P4",
26
+ }
27
+
28
+ ASTEC_KEYDICTIONARY = {
29
+ "cell_lineage": [
30
+ "lineage_tree",
31
+ "lin_tree",
32
+ "Lineage tree",
33
+ "cell_lineage",
34
+ ],
35
+ "cell_h_min": ["cell_h_min", "h_mins_information"],
36
+ "cell_volume": [
37
+ "cell_volume",
38
+ "volumes_information",
39
+ "volumes information",
40
+ "vol",
41
+ ],
42
+ "cell_surface": ["cell_surface", "cell surface"],
43
+ "cell_compactness": [
44
+ "cell_compactness",
45
+ "Cell Compactness",
46
+ "compacity",
47
+ "cell_sphericity",
48
+ ],
49
+ "cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
50
+ "cell_labels_in_time": [
51
+ "cell_labels_in_time",
52
+ "Cells labels in time",
53
+ "time_labels",
54
+ ],
55
+ "cell_barycenter": [
56
+ "cell_barycenter",
57
+ "Barycenters",
58
+ "barycenters",
59
+ ],
60
+ "cell_fate": ["cell_fate", "Fate"],
61
+ "cell_fate_2": ["cell_fate_2", "Fate2"],
62
+ "cell_fate_3": ["cell_fate_3", "Fate3"],
63
+ "cell_fate_4": ["cell_fate_4", "Fate4"],
64
+ "all_cells": [
65
+ "all_cells",
66
+ "All Cells",
67
+ "All_Cells",
68
+ "all cells",
69
+ "tot_cells",
70
+ ],
71
+ "cell_principal_values": [
72
+ "cell_principal_values",
73
+ "Principal values",
74
+ ],
75
+ "cell_name": ["cell_name", "Names", "names", "cell_names"],
76
+ "cell_contact_surface": [
77
+ "cell_contact_surface",
78
+ "cell_cell_contact_information",
79
+ ],
80
+ "cell_history": [
81
+ "cell_history",
82
+ "Cells history",
83
+ "cell_life",
84
+ "life",
85
+ ],
86
+ "cell_principal_vectors": [
87
+ "cell_principal_vectors",
88
+ "Principal vectors",
89
+ ],
90
+ "cell_naming_score": ["cell_naming_score", "Scores", "scores"],
91
+ "problematic_cells": ["problematic_cells"],
92
+ "unknown_key": ["unknown_key"],
93
+ }
94
+
95
+
96
+ def read_from_csv(
97
+ file_path: str,
98
+ z_mult: float,
99
+ link: int = 1,
100
+ delim: str = ",",
101
+ name: None | str = None,
102
+ ) -> lineageTree:
103
+ """Read a lineage tree from a csv file with the following format:
104
+ id, time, z, y, x, id, pred_id, lin_id
105
+
106
+ Parameters
107
+ ----------
108
+ file_path : str
109
+ path to the csv file
110
+ z_mult : float
111
+ aspect ratio
112
+ link : int
113
+ 1 if the csv file is ordered by id, 2 if ordered by pred_id
114
+ delim : str, default=","
115
+ delimiter used in the csv file
116
+ name : None or str, optional
117
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
118
+ will be the name attribute, otherwise the name will be the stem of the file path.
119
+
120
+ Returns
121
+ -------
122
+ lineageTree
123
+ lineage tree
124
+ """
125
+ with open(file_path) as f:
126
+ lines = f.readlines()
127
+ f.close()
128
+ successor = {}
129
+ pos = {}
130
+ time = {}
131
+ lines_to_int = []
132
+ corres = {}
133
+ for line in lines:
134
+ lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
135
+ lines_to_int = np.array(lines_to_int)
136
+ if link == 2:
137
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
138
+ else:
139
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
140
+ for unique_id, line in enumerate(lines_to_int):
141
+ if link == 1:
142
+ id_, t, z, y, x, pred, lin_id = line
143
+ elif link == 2:
144
+ t, z, y, x, id_, pred, lin_id = line
57
145
  else:
58
- lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
59
- for line in lines_to_int:
60
- if link == 1:
61
- id_, t, z, y, x, pred, lin_id = line
62
- elif link == 2:
63
- t, z, y, x, id_, pred, lin_id = line
146
+ id_, t, z, y, x, *_ = line
147
+ pred = None
148
+ t = int(t)
149
+ positions = np.array([x, y, z])
150
+ C = unique_id
151
+ corres[id_] = C
152
+ positions[-1] = positions[-1] * z_mult
153
+ if pred in corres:
154
+ M = corres[pred]
155
+ successor.setdefault(M, []).append(C)
156
+ pos[C] = positions
157
+ time[C] = t
158
+ if not name:
159
+ tmp_name = Path(file_path).stem
160
+ if name == "":
161
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
162
+ name = tmp_name
163
+ return lineageTree(successor=successor, time=time, pos=pos, name=name)
164
+
165
+
166
+ def _read_from_ASTEC_xml(file_path: str):
167
+ def _set_dictionary_value(root):
168
+ if len(root) == 0:
169
+ if root.text is None:
170
+ return None
64
171
  else:
65
- id_, t, z, y, x, dz, dy, dx = line
66
- pred = None
67
- lin_id = None
68
- t = int(t)
69
- pos = np.array([x, y, z])
70
- C = unique_id
71
- corres[id_] = C
72
- pos[-1] = pos[-1] * z_mult
73
- if pred in corres:
74
- M = corres[pred]
75
- self.predecessor[C] = [M]
76
- self.successor.setdefault(M, []).append(C)
77
- self.edges.add((M, C))
78
- self.time_edges.setdefault(t, set()).add((M, C))
79
- self.lin.setdefault(lin_id, []).append(C)
80
- self.C_lin[C] = lin_id
81
- self.pos[C] = pos
82
- self.nodes.add(C)
83
- self.time_nodes.setdefault(t, set()).add(C)
84
- self.time[C] = t
85
- if not link:
86
- self.displacement[C] = np.array([dx, dy, dz * z_mult])
87
- unique_id += 1
88
- self.max_id = unique_id - 1
89
- self.t_b = min(self.time_nodes)
90
- self.t_e = max(self.time_nodes)
91
-
92
- def read_from_ASTEC(self, file_path: str, eigen: bool = False):
93
- """
94
- Read an `xml` or `pkl` file produced by the ASTEC algorithm.
95
-
96
- Args:
97
- file_path (str): path to an output generated by ASTEC
98
- eigen (bool): whether or not to read the eigen values, default False
99
- """
100
- self._astec_keydictionary = {
101
- "cell_lineage": [
102
- "lineage_tree",
103
- "lin_tree",
104
- "Lineage tree",
105
- "cell_lineage",
106
- ],
107
- "cell_h_min": ["cell_h_min", "h_mins_information"],
108
- "cell_volume": [
109
- "cell_volume",
110
- "volumes_information",
111
- "volumes information",
112
- "vol",
113
- ],
114
- "cell_surface": ["cell_surface", "cell surface"],
115
- "cell_compactness": [
116
- "cell_compactness",
117
- "Cell Compactness",
118
- "compacity",
119
- "cell_sphericity",
120
- ],
121
- "cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
122
- "cell_labels_in_time": [
123
- "cell_labels_in_time",
124
- "Cells labels in time",
125
- "time_labels",
126
- ],
127
- "cell_barycenter": [
128
- "cell_barycenter",
129
- "Barycenters",
130
- "barycenters",
131
- ],
132
- "cell_fate": ["cell_fate", "Fate"],
133
- "cell_fate_2": ["cell_fate_2", "Fate2"],
134
- "cell_fate_3": ["cell_fate_3", "Fate3"],
135
- "cell_fate_4": ["cell_fate_4", "Fate4"],
136
- "all_cells": [
137
- "all_cells",
138
- "All Cells",
139
- "All_Cells",
140
- "all cells",
141
- "tot_cells",
142
- ],
143
- "cell_principal_values": [
144
- "cell_principal_values",
145
- "Principal values",
146
- ],
147
- "cell_name": ["cell_name", "Names", "names", "cell_names"],
148
- "cell_contact_surface": [
149
- "cell_contact_surface",
150
- "cell_cell_contact_information",
151
- ],
152
- "cell_history": [
153
- "cell_history",
154
- "Cells history",
155
- "cell_life",
156
- "life",
157
- ],
158
- "cell_principal_vectors": [
159
- "cell_principal_vectors",
160
- "Principal vectors",
161
- ],
162
- "cell_naming_score": ["cell_naming_score", "Scores", "scores"],
163
- "problematic_cells": ["problematic_cells"],
164
- "unknown_key": ["unknown_key"],
165
- }
166
-
167
- if os.path.splitext(file_path)[-1] == ".xml":
168
- tmp_data = self._read_from_ASTEC_xml(file_path)
172
+ return eval(root.text)
169
173
  else:
170
- tmp_data = self._read_from_ASTEC_pkl(file_path, eigen)
174
+ dictionary = {}
175
+ for child in root:
176
+ key = child.tag
177
+ if child.tag == "cell":
178
+ key = int(child.attrib["cell-id"])
179
+ dictionary[key] = _set_dictionary_value(child)
180
+ return dictionary
171
181
 
172
- # make sure these are all named liked they are in tmp_data (or change dictionary above)
173
- self.name = {}
182
+ tree = ET.parse(file_path)
183
+ root = tree.getroot()
184
+ dictionary = {}
185
+
186
+ for k in ASTEC_KEYDICTIONARY:
187
+ if root.tag == k:
188
+ dictionary[str(root.tag)] = _set_dictionary_value(root)
189
+ break
190
+ else:
191
+ for child in root:
192
+ value = _set_dictionary_value(child)
193
+ if value is not None:
194
+ dictionary[str(child.tag)] = value
195
+ return dictionary
196
+
197
+
198
+ def _read_from_ASTEC_pkl(file_path: str, eigen: bool = False):
199
+ with open(file_path, "rb") as f:
200
+ tmp_data = pkl.load(f, encoding="latin1")
201
+ f.close()
202
+ new_ref = {}
203
+ for k, v in ASTEC_KEYDICTIONARY.items():
204
+ for key in v:
205
+ new_ref[key] = k
206
+ new_dict = {}
207
+
208
+ for k, v in tmp_data.items():
209
+ if k in new_ref:
210
+ new_dict[new_ref[k]] = v
211
+ else:
212
+ new_dict[k] = v
213
+ return new_dict
214
+
215
+
216
+ def read_from_ASTEC(
217
+ file_path: str, eigen: bool = False, name: None | str = None
218
+ ) -> lineageTree:
219
+ """
220
+ Read an `xml` or `pkl` file produced by the ASTEC algorithm.
221
+
222
+ Parameters
223
+ ----------
224
+ file_path : str
225
+ path to an output generated by ASTEC
226
+ eigen : bool, default=False
227
+ whether or not to read the eigen values, default False
228
+ name : None or str, optional
229
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
230
+ will be the name attribute, otherwise the name will be the stem of the file path.
231
+
232
+ Returns
233
+ -------
234
+ lineageTree
235
+ lineage tree
236
+ """
237
+
238
+ if os.path.splitext(file_path)[-1] == ".xml":
239
+ tmp_data = _read_from_ASTEC_xml(file_path)
240
+ else:
241
+ tmp_data = _read_from_ASTEC_pkl(file_path, eigen)
242
+
243
+ # make sure these are all named liked they are in tmp_data (or change dictionary above)
244
+ properties = {}
245
+ if "cell_volume" in tmp_data:
246
+ properties["volume"] = {}
247
+ if "cell_fate" in tmp_data:
248
+ properties["fate"] = {}
249
+ if "cell_barycenter" in tmp_data:
250
+ pos = {}
251
+ if "cell_name" in tmp_data:
252
+ properties["label"] = {}
253
+ lT2pkl = {}
254
+ pkl2lT = {}
255
+ image_label = {}
256
+
257
+ lt = tmp_data["cell_lineage"]
258
+
259
+ if "cell_contact_surface" in tmp_data:
260
+ properties["contact"] = {}
261
+ do_surf = True
262
+ surfaces = tmp_data["cell_contact_surface"]
263
+ else:
264
+ do_surf = False
265
+
266
+ inv = {vi: [c] for c, v in lt.items() for vi in v}
267
+ nodes = set(lt).union(inv)
268
+
269
+ unique_id = 0
270
+ time = {}
271
+
272
+ for unique_id, n in enumerate(nodes):
273
+ t = n // 10**4
274
+ image_label[unique_id] = n % 10**4
275
+ lT2pkl[unique_id] = n
276
+ pkl2lT[n] = unique_id
277
+ time[unique_id] = t
174
278
  if "cell_volume" in tmp_data:
175
- self.volume = {}
279
+ properties["volume"][unique_id] = tmp_data["cell_volume"].get(
280
+ n, 0.0
281
+ )
176
282
  if "cell_fate" in tmp_data:
177
- self.fates = {}
283
+ properties["fate"][unique_id] = tmp_data["cell_fate"].get(n, "")
178
284
  if "cell_barycenter" in tmp_data:
179
- self.pos = {}
180
- self.lT2pkl = {}
181
- self.pkl2lT = {}
182
- self.contact = {}
183
- self.prob_cells = set()
184
- self.image_label = {}
185
-
186
- lt = tmp_data["cell_lineage"]
187
-
188
- if "cell_contact_surface" in tmp_data:
189
- do_surf = True
190
- surfaces = tmp_data["cell_contact_surface"]
191
- else:
192
- do_surf = False
193
-
194
- inv = {vi: [c] for c, v in lt.items() for vi in v}
195
- nodes = set(lt).union(inv)
196
-
197
- unique_id = 0
198
-
199
- for n in nodes:
200
- t = n // 10**4
201
- self.image_label[unique_id] = n % 10**4
202
- self.lT2pkl[unique_id] = n
203
- self.pkl2lT[n] = unique_id
204
- self.time_nodes.setdefault(t, set()).add(unique_id)
205
- self.nodes.add(unique_id)
206
- self.time[unique_id] = t
207
- if "cell_volume" in tmp_data:
208
- self.volume[unique_id] = tmp_data["cell_volume"].get(n, 0.0)
209
- if "cell_fate" in tmp_data:
210
- self.fates[unique_id] = tmp_data["cell_fate"].get(n, "")
211
- if "cell_barycenter" in tmp_data:
212
- self.pos[unique_id] = tmp_data["cell_barycenter"].get(
213
- n, np.zeros(3)
214
- )
215
-
216
- unique_id += 1
217
- if do_surf:
218
- for c in nodes:
219
- if c in surfaces and c in self.pkl2lT:
220
- self.contact[self.pkl2lT[c]] = {
221
- self.pkl2lT.get(n, -1): s
222
- for n, s in surfaces[c].items()
223
- if n % 10**4 == 1 or n in self.pkl2lT
224
- }
225
-
226
- for n, new_id in self.pkl2lT.items():
227
- if n in inv:
228
- self.predecessor[new_id] = [self.pkl2lT[ni] for ni in inv[n]]
229
- if n in lt:
230
- self.successor[new_id] = [
231
- self.pkl2lT[ni] for ni in lt[n] if ni in self.pkl2lT
232
- ]
233
-
234
- for ni in self.successor[new_id]:
235
- self.time_edges.setdefault(t - 1, set()).add((new_id, ni))
236
-
237
- self.t_b = min(self.time_nodes)
238
- self.t_e = max(self.time_nodes)
239
- self.max_id = unique_id
240
-
241
- # do this in the end of the process, skip lineage tree and whatever is stored already
242
- discard = {
243
- "cell_volume",
244
- "cell_fate",
245
- "cell_barycenter",
246
- "cell_contact_surface",
247
- "cell_lineage",
248
- "all_cells",
249
- "cell_history",
250
- "problematic_cells",
251
- "cell_labels_in_time",
252
- }
253
- self.specific_properties = []
254
- for prop_name, prop_values in tmp_data.items():
255
- if not (prop_name in discard or hasattr(self, prop_name)):
256
- if isinstance(prop_values, dict):
257
- dictionary = {
258
- self.pkl2lT.get(k, -1): v
259
- for k, v in prop_values.items()
285
+ pos[unique_id] = tmp_data["cell_barycenter"].get(n, np.zeros(3))
286
+ if "cell_name" in tmp_data:
287
+ properties["label"][unique_id] = tmp_data["cell_name"].get(n, "")
288
+
289
+ if do_surf:
290
+ for c in nodes:
291
+ if c in surfaces and c in pkl2lT:
292
+ properties["contact"][pkl2lT[c]] = {
293
+ pkl2lT.get(n, -1): s
294
+ for n, s in surfaces[c].items()
295
+ if n % 10**4 == 1 or n in pkl2lT
296
+ }
297
+
298
+ successor = {}
299
+ for n, new_id in pkl2lT.items():
300
+ if n in lt:
301
+ successor[new_id] = [pkl2lT[ni] for ni in lt[n] if ni in pkl2lT]
302
+
303
+ # do this in the end of the process, skip lineage tree and whatever is stored already
304
+ discard = {
305
+ "cell_volume", # already stored
306
+ "cell_fate", # already stored
307
+ "cell_barycenter", # already stored
308
+ "cell_contact_surface", # already stored
309
+ "cell_lineage", # already stored
310
+ "cell_name", # already stored
311
+ "all_cells", # not a property
312
+ "cell_history", # redundant
313
+ "problematic_cells", # not useful here
314
+ "cell_labels_in_time", # redundant
315
+ }
316
+ for prop_name, prop_values in tmp_data.items():
317
+ if prop_name not in discard and isinstance(prop_values, dict):
318
+ dictionary = {pkl2lT.get(k, -1): v for k, v in prop_values.items()}
319
+ # is it a regular dictionary or a dictionary with dictionaries inside?
320
+ for key, value in dictionary.items():
321
+ if isinstance(value, dict):
322
+ # rename all ids from old to new
323
+ dictionary[key] = {
324
+ pkl2lT.get(k, -1): v for k, v in value.items()
260
325
  }
261
- # is it a regular dictionary or a dictionary with dictionaries inside?
262
- for key, value in dictionary.items():
263
- if isinstance(value, dict):
264
- # rename all ids from old to new
265
- dictionary[key] = {
266
- self.pkl2lT.get(k, -1): v
267
- for k, v in value.items()
268
- }
269
- self.__dict__[prop_name] = dictionary
270
- self.specific_properties.append(prop_name)
271
- # is any of this necessary? Or does it mean it anyways does not contain
272
- # information about the id and a simple else: is enough?
273
- elif (
274
- isinstance(prop_values, (list, set, np.ndarray))
275
- and prop_name not in []
276
- ):
277
- self.__dict__[prop_name] = prop_values
278
- self.specific_properties.append(prop_name)
279
-
280
- # what else could it be?
281
-
282
- # add a list of all available properties
283
-
284
- def _read_from_ASTEC_xml(self, file_path: str):
285
- def _set_dictionary_value(root):
286
- if len(root) == 0:
287
- if root.text is None:
288
- return None
289
- else:
290
- return eval(root.text)
326
+ properties[prop_name] = dictionary
327
+ if not name:
328
+ tmp_name = Path(file_path).stem
329
+ if name == "":
330
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
331
+ name = tmp_name
332
+ return lineageTree(
333
+ successor=successor, time=time, pos=pos, name=name, **properties
334
+ )
335
+
336
+
337
+ def read_from_binary(fname: str, name: None | str = None) -> lineageTree:
338
+ """
339
+ Reads a binary lineageTree file name.
340
+ Format description: see lineageTree.to_binary
341
+
342
+ Parameters
343
+ ----------
344
+ fname : string
345
+ path to the binary file
346
+ name : None or str, optional
347
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
348
+ will be the name attribute, otherwise the name will be the stem of the file path.
349
+
350
+ Returns
351
+ -------
352
+ lineageTree
353
+ lineage tree
354
+ """
355
+ q_size = struct.calcsize("q")
356
+ H_size = struct.calcsize("H")
357
+ d_size = struct.calcsize("d")
358
+
359
+ with open(fname, "rb") as f:
360
+ len_tree = struct.unpack("q", f.read(q_size))[0]
361
+ len_time = struct.unpack("q", f.read(q_size))[0]
362
+ len_pos = struct.unpack("q", f.read(q_size))[0]
363
+ number_sequence = list(
364
+ struct.unpack("q" * len_tree, f.read(q_size * len_tree))
365
+ )
366
+ time_sequence = list(
367
+ struct.unpack("H" * len_time, f.read(H_size * len_time))
368
+ )
369
+ pos_sequence = np.array(
370
+ struct.unpack("d" * len_pos, f.read(d_size * len_pos))
371
+ )
372
+
373
+ f.close()
374
+
375
+ successor = {}
376
+ time = {}
377
+ pos = {}
378
+ is_root = {}
379
+ waiting_list = []
380
+ i = 0
381
+ done = False
382
+ t = 0
383
+ if max(number_sequence[::2]) == -1:
384
+ tmp = number_sequence[1::2]
385
+ if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
386
+ time = dict(list(zip(tmp, time_sequence, strict=True)))
387
+ pos = dict(
388
+ list(
389
+ zip(
390
+ tmp,
391
+ np.reshape(pos_sequence, (len_time, 3)),
392
+ strict=True,
393
+ )
394
+ )
395
+ )
396
+ is_root = dict.fromkeys(tmp, True)
397
+ done = True
398
+ while (
399
+ i < len(number_sequence) and not done
400
+ ): # , c in enumerate(number_sequence[:-1]):
401
+ c = number_sequence[i]
402
+ if c == -1:
403
+ if waiting_list != []:
404
+ prev_mother = waiting_list.pop()
405
+ successor[prev_mother].insert(0, number_sequence[i + 1])
406
+ t = time[prev_mother] + 1
291
407
  else:
292
- dictionary = {}
293
- for child in root:
294
- key = child.tag
295
- if child.tag == "cell":
296
- key = int(child.attrib["cell-id"])
297
- dictionary[key] = _set_dictionary_value(child)
298
- return dictionary
299
-
300
- tree = ET.parse(file_path)
301
- root = tree.getroot()
302
- dictionary = {}
303
-
304
- for k, _v in self._astec_keydictionary.items():
305
- if root.tag == k:
306
- dictionary[str(root.tag)] = _set_dictionary_value(root)
307
- break
308
- else:
309
- for child in root:
310
- value = _set_dictionary_value(child)
311
- if value is not None:
312
- dictionary[str(child.tag)] = value
313
- return dictionary
314
-
315
- def _read_from_ASTEC_pkl(self, file_path: str, eigen: bool = False):
316
- with open(file_path, "rb") as f:
317
- tmp_data = pkl.load(f, encoding="latin1")
318
- f.close()
319
- new_ref = {}
320
- for k, v in self._astec_keydictionary.items():
321
- for key in v:
322
- new_ref[key] = k
323
- new_dict = {}
324
-
325
- for k, v in tmp_data.items():
326
- if k in new_ref:
327
- new_dict[new_ref[k]] = v
408
+ t = time_sequence.pop(0)
409
+
410
+ elif c == -2:
411
+ successor[waiting_list[-1]] = [number_sequence[i + 1]]
412
+ is_root[number_sequence[i + 1]] = False
413
+ pos[waiting_list[-1]] = pos_sequence[:3]
414
+ pos_sequence = pos_sequence[3:]
415
+ time[waiting_list[-1]] = t
416
+ t += 1
417
+
418
+ elif number_sequence[i + 1] >= 0:
419
+ successor[c] = [number_sequence[i + 1]]
420
+ pos[c] = pos_sequence[:3]
421
+ pos_sequence = pos_sequence[3:]
422
+ time[c] = t
423
+ t += 1
424
+
425
+ elif number_sequence[i + 1] == -2:
426
+ waiting_list += [c]
427
+
428
+ elif number_sequence[i + 1] == -1:
429
+ pos[c] = pos_sequence[:3]
430
+ pos_sequence = pos_sequence[3:]
431
+ time[c] = t
432
+ t += 1
433
+ i += 1
434
+ if waiting_list != []:
435
+ prev_mother = waiting_list.pop()
436
+ successor[prev_mother].insert(0, number_sequence[i + 1])
437
+ t = time[prev_mother] + 1
328
438
  else:
329
- new_dict[k] = v
330
- return new_dict
331
-
332
- def read_from_binary(self, fname: str):
333
- """
334
- Reads a binary lineageTree file name.
335
- Format description: see self.to_binary
336
-
337
- Args:
338
- fname: string, path to the binary file
339
- reverse_time: bool, not used
340
- """
341
- q_size = struct.calcsize("q")
342
- H_size = struct.calcsize("H")
343
- d_size = struct.calcsize("d")
344
-
345
- with open(fname, "rb") as f:
346
- len_tree = struct.unpack("q", f.read(q_size))[0]
347
- len_time = struct.unpack("q", f.read(q_size))[0]
348
- len_pos = struct.unpack("q", f.read(q_size))[0]
349
- number_sequence = list(
350
- struct.unpack("q" * len_tree, f.read(q_size * len_tree))
351
- )
352
- time_sequence = list(
353
- struct.unpack("H" * len_time, f.read(H_size * len_time))
354
- )
355
- pos_sequence = np.array(
356
- struct.unpack("d" * len_pos, f.read(d_size * len_pos))
357
- )
358
-
359
- f.close()
360
-
361
- successor = {}
362
- predecessor = {}
363
- time = {}
364
- time_nodes = {}
365
- time_edges = {}
366
- pos = {}
367
- is_root = {}
368
- nodes = []
369
- edges = []
370
- waiting_list = []
371
- i = 0
372
- done = False
373
- if max(number_sequence[::2]) == -1:
374
- tmp = number_sequence[1::2]
375
- if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
376
- time = dict(list(zip(tmp, time_sequence)))
377
- for c, t in time.items():
378
- time_nodes.setdefault(t, set()).add(c)
379
- pos = dict(
380
- list(zip(tmp, np.reshape(pos_sequence, (len_time, 3))))
381
- )
382
- is_root = {c: True for c in tmp}
383
- nodes = tmp
384
- done = True
385
- while (
386
- i < len(number_sequence) and not done
387
- ): # , c in enumerate(number_sequence[:-1]):
388
- c = number_sequence[i]
389
- if c == -1:
390
- if waiting_list != []:
391
- prev_mother = waiting_list.pop()
392
- successor[prev_mother].insert(0, number_sequence[i + 1])
393
- edges.append((prev_mother, number_sequence[i + 1]))
394
- time_edges.setdefault(t, set()).add(
395
- (prev_mother, number_sequence[i + 1])
396
- )
397
- is_root[number_sequence[i + 1]] = False
398
- t = time[prev_mother] + 1
399
- else:
439
+ if len(time_sequence) > 0:
400
440
  t = time_sequence.pop(0)
401
- is_root[number_sequence[i + 1]] = True
402
-
403
- elif c == -2:
404
- successor[waiting_list[-1]] = [number_sequence[i + 1]]
405
- edges.append((waiting_list[-1], number_sequence[i + 1]))
406
- time_edges.setdefault(t, set()).add(
407
- (waiting_list[-1], number_sequence[i + 1])
408
- )
409
- is_root[number_sequence[i + 1]] = False
410
- pos[waiting_list[-1]] = pos_sequence[:3]
411
- pos_sequence = pos_sequence[3:]
412
- nodes.append(waiting_list[-1])
413
- time[waiting_list[-1]] = t
414
- time_nodes.setdefault(t, set()).add(waiting_list[-1])
415
- t += 1
416
-
417
- elif number_sequence[i + 1] >= 0:
418
- successor[c] = [number_sequence[i + 1]]
419
- edges.append((c, number_sequence[i + 1]))
420
- time_edges.setdefault(t, set()).add(
421
- (c, number_sequence[i + 1])
422
- )
423
- is_root[number_sequence[i + 1]] = False
424
- pos[c] = pos_sequence[:3]
425
- pos_sequence = pos_sequence[3:]
426
- nodes.append(c)
427
- time[c] = t
428
- time_nodes.setdefault(t, set()).add(c)
429
- t += 1
430
-
431
- elif number_sequence[i + 1] == -2:
432
- waiting_list += [c]
433
-
434
- elif number_sequence[i + 1] == -1:
435
- pos[c] = pos_sequence[:3]
436
- pos_sequence = pos_sequence[3:]
437
- nodes.append(c)
438
- time[c] = t
439
- time_nodes.setdefault(t, set()).add(c)
440
- t += 1
441
- i += 1
442
- if waiting_list != []:
443
- prev_mother = waiting_list.pop()
444
- successor[prev_mother].insert(0, number_sequence[i + 1])
445
- edges.append((prev_mother, number_sequence[i + 1]))
446
- time_edges.setdefault(t, set()).add(
447
- (prev_mother, number_sequence[i + 1])
448
- )
449
- if i + 1 < len(number_sequence):
450
- is_root[number_sequence[i + 1]] = False
451
- t = time[prev_mother] + 1
441
+ i += 1
442
+ if not name:
443
+ tmp_name = Path(fname).stem
444
+ if name == "":
445
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
446
+ name = tmp_name
447
+ return lineageTree(successor=successor, time=time, pos=pos, name=name)
448
+
449
+
450
+ def read_from_txt_for_celegans(
451
+ file: str, name: None | str = None
452
+ ) -> lineageTree:
453
+ """
454
+ Read a C. elegans lineage tree
455
+
456
+ Parameters
457
+ ----------
458
+ file : str
459
+ Path to the file to read
460
+ name : None or str, optional
461
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
462
+ will be the name attribute, otherwise the name will be the stem of the file path.
463
+
464
+ Returns
465
+ -------
466
+ lineageTree
467
+ lineage tree
468
+ """
469
+ with open(file) as f:
470
+ raw = f.readlines()[1:]
471
+ f.close()
472
+ _labels = {}
473
+ time_nodes = {}
474
+ pos = {}
475
+ time = {}
476
+ successor = {}
477
+
478
+ for unique_id, line in enumerate(raw):
479
+ t = int(line.split("\t")[0])
480
+ _labels[unique_id] = line.split("\t")[1]
481
+ position = np.array(line.split("\t")[2:5], dtype=float)
482
+ time_nodes.setdefault(t, set()).add(unique_id)
483
+ pos[unique_id] = position
484
+ time[unique_id] = t
485
+
486
+ t_b = min(time_nodes)
487
+
488
+ for t, cells in time_nodes.items():
489
+ if t != t_b:
490
+ prev_cells = time_nodes[t - 1]
491
+ name_to_id = {_labels[c]: c for c in prev_cells}
492
+ for c in cells:
493
+ if _labels[c] in name_to_id:
494
+ p = name_to_id[_labels[c]]
495
+ elif _labels[c][:-1] in name_to_id:
496
+ p = name_to_id[_labels[c][:-1]]
497
+ elif IMPLICIT_L_T.get(_labels[c]) in name_to_id:
498
+ p = name_to_id[IMPLICIT_L_T.get(_labels[c])]
452
499
  else:
453
- if len(time_sequence) > 0:
454
- t = time_sequence.pop(0)
455
- if i + 1 < len(number_sequence):
456
- is_root[number_sequence[i + 1]] = True
457
- i += 1
458
-
459
- predecessor = {vi: [k] for k, v in successor.items() for vi in v}
460
-
461
- self.successor = successor
462
- self.predecessor = predecessor
463
- self.time = time
464
- self.time_nodes = time_nodes
465
- self.time_edges = time_edges
466
- self.pos = pos
467
- self.nodes = set(nodes)
468
- self.t_b = min(time_nodes)
469
- self.t_e = max(time_nodes)
470
- self.is_root = is_root
471
- self.max_id = max(self.nodes)
472
-
473
- def read_from_txt_for_celegans(self, file: str):
474
- """
475
- Read a C. elegans lineage tree
476
-
477
- Args:
478
- file (str): Path to the file to read
479
- """
480
- with open(file) as f:
481
- raw = f.readlines()[1:]
482
- f.close()
483
- self.name = {}
484
-
485
- unique_id = 0
486
- for line in raw:
487
- t = int(line.split("\t")[0])
488
- self.name[unique_id] = line.split("\t")[1]
489
- position = np.array(line.split("\t")[2:5], dtype=float)
490
- self.time_nodes.setdefault(t, set()).add(unique_id)
491
- self.nodes.add(unique_id)
492
- self.pos[unique_id] = position
493
- self.time[unique_id] = t
494
- unique_id += 1
495
-
496
- self.t_b = min(self.time_nodes)
497
- self.t_e = max(self.time_nodes)
498
-
499
- for t, cells in self.time_nodes.items():
500
- if t != self.t_b:
501
- prev_cells = self.time_nodes[t - 1]
502
- name_to_id = {self.name[c]: c for c in prev_cells}
503
- for c in cells:
504
- if self.name[c] in name_to_id:
505
- p = name_to_id[self.name[c]]
506
- elif self.name[c][:-1] in name_to_id:
507
- p = name_to_id[self.name[c][:-1]]
508
- elif self.implicit_l_t.get(self.name[c]) in name_to_id:
509
- p = name_to_id[self.implicit_l_t.get(self.name[c])]
510
- else:
511
- p = None
512
- self.predecessor.setdefault(c, []).append(p)
513
- self.successor.setdefault(p, []).append(c)
514
- self.time_edges.setdefault(t - 1, set()).add((p, c))
515
- self.max_id = unique_id
516
-
517
- def read_from_txt_for_celegans_CAO(
518
- self,
519
- file: str,
520
- reorder: bool = False,
521
- raw_size: float = None,
522
- shape: float = None,
523
- ):
524
- """
525
- Read a C. elegans lineage tree from Cao et al.
526
-
527
- Args:
528
- file (str): Path to the file to read
529
- """
530
-
531
- def split_line(line):
532
- return (
533
- line.split()[0],
534
- eval(line.split()[1]),
535
- eval(line.split()[2]),
536
- eval(line.split()[3]),
537
- eval(line.split()[4]),
500
+ p = None
501
+ successor.setdefault(p, []).append(c)
502
+ if not name:
503
+ tmp_name = Path(file).stem
504
+ if name == "":
505
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
506
+ name = tmp_name
507
+ properties = {"_labels": _labels}
508
+ return lineageTree(
509
+ successor=successor, time=time, pos=pos, name=name, **properties
510
+ )
511
+
512
+
513
+ def read_from_txt_for_celegans_CAO(
514
+ file: str,
515
+ reorder: bool = False,
516
+ raw_size: np.ndarray | None = None,
517
+ shape: float | None = None,
518
+ name: str | None = None,
519
+ ) -> lineageTree:
520
+ """
521
+ Read a C. elegans lineage tree from Cao et al.
522
+
523
+ Parameters
524
+ ----------
525
+ file : str
526
+ Path to the file to read
527
+ name : None or str, optional
528
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
529
+ will be the name attribute, otherwise the name will be the stem of the file path.
530
+
531
+ Returns
532
+ -------
533
+ lineageTree
534
+ lineage tree
535
+ """
536
+
537
+ def split_line(line):
538
+ return (
539
+ line.split()[0],
540
+ eval(line.split()[1]),
541
+ eval(line.split()[2]),
542
+ eval(line.split()[3]),
543
+ eval(line.split()[4]),
544
+ )
545
+
546
+ with open(file) as f:
547
+ raw = f.readlines()[1:]
548
+ f.close()
549
+ label = {}
550
+ time_nodes = {}
551
+ pos = {}
552
+ successor = {}
553
+ time = {}
554
+
555
+ unique_id = 0
556
+ for unique_id, (label, t, z, x, y) in enumerate(map(split_line, raw)):
557
+ label[unique_id] = label
558
+ position = np.array([x, y, z], dtype=np.float)
559
+ time_nodes.setdefault(t, set()).add(unique_id)
560
+ if reorder:
561
+
562
+ def flip(x):
563
+ return np.array([x[0], x[1], raw_size[2] - x[2]])
564
+
565
+ def adjust(x):
566
+ return (shape / raw_size * flip(x))[[1, 0, 2]]
567
+
568
+ pos[unique_id] = adjust(position)
569
+ else:
570
+ pos[unique_id] = position
571
+ time[unique_id] = t
572
+
573
+ t_b = min(time_nodes)
574
+
575
+ for t, cells in time_nodes.items():
576
+ if t != t_b:
577
+ prev_cells = time_nodes[t - 1]
578
+ name_to_id = {label[c]: c for c in prev_cells}
579
+ for c in cells:
580
+ if label[c] in name_to_id:
581
+ p = name_to_id[label[c]]
582
+ elif label[c][:-1] in name_to_id:
583
+ p = name_to_id[label[c][:-1]]
584
+ elif IMPLICIT_L_T.get(label[c]) in name_to_id:
585
+ p = name_to_id[IMPLICIT_L_T.get(label[c])]
586
+ else:
587
+ warn(
588
+ f"error, cell {label[c]} has no predecessors",
589
+ stacklevel=2,
590
+ )
591
+ p = None
592
+ successor.setdefault(p, []).append(c)
593
+ if not name:
594
+ tmp_name = Path(file).stem
595
+ if name == "":
596
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
597
+ name = tmp_name
598
+ return lineageTree(
599
+ successor=successor, time=time, pos=pos, label=label, name=name
600
+ )
601
+
602
+
603
+ def read_from_txt_for_celegans_BAO(
604
+ path: str, name: None | str = None
605
+ ) -> lineageTree:
606
+ """Read a C. elegans Bao file from http://digital-development.org
607
+
608
+ Parameters
609
+ ----------
610
+ file : str
611
+ Path to the file to read
612
+ name : str, optional
613
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
614
+ will be the name attribute, otherwise the name will be the stem of the file path.
615
+
616
+ Returns
617
+ -------
618
+ lineageTree
619
+ lineage tree
620
+ """
621
+ cell_times = {}
622
+ properties = {}
623
+ properties["expression"] = {}
624
+ properties["_labels"] = {}
625
+ with open(path) as f:
626
+ for line in f:
627
+ if "cell_name" not in line:
628
+ cell_times[line.split("\t")[0]] = [
629
+ eval(val) for val in line.split("\t")[-1].split(",")
630
+ ]
631
+ unique_id = 0
632
+ to_link = {}
633
+ successor = {}
634
+ for c, lc in cell_times.items():
635
+ ids = list(range(unique_id, unique_id + len(lc)))
636
+ successor.update({ids[i]: [ids[i + 1]] for i in range(len(ids) - 1)})
637
+ properties["expression"].update(dict(zip(ids, lc, strict=True)))
638
+ properties["_labels"].update(dict.fromkeys(ids, c))
639
+ to_link[c] = (unique_id, unique_id + len(lc) - 1)
640
+ unique_id += len(lc)
641
+
642
+ for c_name, c_id in to_link.items():
643
+ if c_name[:-1] in to_link:
644
+ successor.setdefault(to_link[c_name[:-1]][1], []).append(c_id[0])
645
+ elif c_name in IMPLICIT_L_T and IMPLICIT_L_T[c_name] in to_link:
646
+ successor.setdefault(to_link[IMPLICIT_L_T[c_name]][1], []).append(
647
+ c_id[0]
538
648
  )
539
-
540
- with open(file) as f:
541
- raw = f.readlines()[1:]
542
- f.close()
543
- self.name = {}
544
-
545
- unique_id = 0
546
- for name, t, z, x, y in map(split_line, raw):
547
- self.name[unique_id] = name
548
- position = np.array([x, y, z], dtype=np.float)
549
- self.time_nodes.setdefault(t, set()).add(unique_id)
550
- self.nodes.add(unique_id)
551
- if reorder:
552
-
553
- def flip(x):
554
- return np.array([x[0], x[1], raw_size[2] - x[2]])
555
-
556
- def adjust(x):
557
- return (shape / raw_size * flip(x))[[1, 0, 2]]
558
-
559
- self.pos[unique_id] = adjust(position)
560
- else:
561
- self.pos[unique_id] = position
562
- self.time[unique_id] = t
563
- unique_id += 1
564
-
565
- self.t_b = min(self.time_nodes)
566
- self.t_e = max(self.time_nodes)
567
-
568
- for t, cells in self.time_nodes.items():
569
- if t != self.t_b:
570
- prev_cells = self.time_nodes[t - 1]
571
- name_to_id = {self.name[c]: c for c in prev_cells}
572
- for c in cells:
573
- if self.name[c] in name_to_id:
574
- p = name_to_id[self.name[c]]
575
- elif self.name[c][:-1] in name_to_id:
576
- p = name_to_id[self.name[c][:-1]]
577
- elif self.implicit_l_t.get(self.name[c]) in name_to_id:
578
- p = name_to_id[self.implicit_l_t.get(self.name[c])]
579
- else:
580
- warn(
581
- f"error, cell {self.name[c]} has no predecessors",
582
- stacklevel=2,
583
- )
584
- p = None
585
- self.predecessor.setdefault(c, []).append(p)
586
- self.successor.setdefault(p, []).append(c)
587
- self.time_edges.setdefault(t - 1, set()).add((p, c))
588
- self.max_id = unique_id
589
-
590
- def read_tgmm_xml(
591
- self, file_format: str, tb: int, te: int, z_mult: float = 1.0
592
- ):
593
- """Reads a lineage tree from TGMM xml output.
594
-
595
- Args:
596
- file_format (str): path to the xmls location.
597
- it should be written as follow:
598
- path/to/xml/standard_name_t{t:06d}.xml where (as an example)
599
- {t:06d} means a series of 6 digits representing the time and
600
- if the time values is smaller that 6 digits, the missing
601
- digits are filed with 0s
602
- tb (int): first time point to read
603
- te (int): last time point to read
604
- z_mult (float): aspect ratio
605
- """
606
- self.time_nodes = {}
607
- self.time_edges = {}
608
- unique_id = 0
609
- self.nodes = set()
610
- self.successor = {}
611
- self.predecessor = {}
612
- self.pos = {}
613
- self.time_id = {}
614
- self.time = {}
615
- self.mother_not_found = []
616
- self.ind_cells = {}
617
- self.svIdx = {}
618
- self.lin = {}
619
- self.C_lin = {}
620
- self.coeffs = {}
621
- self.intensity = {}
622
- self.W = {}
623
- for t in range(tb, te + 1):
624
- tree = ET.parse(file_format.format(t=t))
625
- root = tree.getroot()
626
- self.time_nodes[t] = set()
627
- self.time_edges[t] = set()
628
- for it in root:
649
+ if not name:
650
+ tmp_name = Path(path).stem
651
+ if name == "":
652
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
653
+ name = tmp_name
654
+ return lineageTree(
655
+ successor=successor, starting_time=0, name=name, **properties
656
+ )
657
+
658
+
659
+ def read_from_tgmm_xml(
660
+ file_format: str,
661
+ tb: int,
662
+ te: int,
663
+ z_mult: float = 1.0,
664
+ name: None | str = None,
665
+ ) -> lineageTree:
666
+ """Reads a lineage tree from TGMM xml output.
667
+
668
+ Parameters
669
+ ----------
670
+ file_format : str
671
+ path to the xmls location.
672
+ it should be written as follow:
673
+ path/to/xml/standard_name_t{t:06d}.xml where (as an example)
674
+ {t:06d} means a series of 6 digits representing the time and
675
+ if the time values is smaller that 6 digits, the missing
676
+ digits are filed with 0s
677
+ tb : int
678
+ first time point to read
679
+ te : int
680
+ last time point to read
681
+ z_mult : float, default=1.0
682
+ aspect ratio
683
+ name : str, optional
684
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
685
+ will be the name attribute, otherwise the name will be the stem of the file path.
686
+
687
+ Returns
688
+ -------
689
+ lineageTree
690
+ lineage tree
691
+ """
692
+ unique_id = 0
693
+ successor = {}
694
+ pos = {}
695
+ time_id = {}
696
+ time = {}
697
+ properties = {}
698
+ properties["svIdx"] = {}
699
+ properties["lin"] = {}
700
+ properties["C_lin"] = {}
701
+ properties["coeffs"] = {}
702
+ properties["intensity"] = {}
703
+ W = {}
704
+ for t in range(tb, te + 1):
705
+ tree = ET.parse(file_format.format(t=t))
706
+ root = tree.getroot()
707
+ for unique_id, it in enumerate(root):
708
+ if "-1.#IND" not in it.attrib["m"] and "nan" not in it.attrib["m"]:
709
+ M_id, positions, cell_id, svIdx, lin_id = (
710
+ int(it.attrib["parent"]),
711
+ [float(v) for v in it.attrib["m"].split(" ") if v != ""],
712
+ int(it.attrib["id"]),
713
+ [int(v) for v in it.attrib["svIdx"].split(" ") if v != ""],
714
+ int(it.attrib["lineage"]),
715
+ )
629
716
  if (
630
- "-1.#IND" not in it.attrib["m"]
631
- and "nan" not in it.attrib["m"]
717
+ "alpha" in it.attrib
718
+ and "W" in it.attrib
719
+ and "nu" in it.attrib
720
+ and "alphaPrior" in it.attrib
632
721
  ):
633
- M_id, pos, cell_id, svIdx, lin_id = (
634
- int(it.attrib["parent"]),
722
+ alpha, W, nu, alphaPrior = (
723
+ float(it.attrib["alpha"]),
635
724
  [
636
725
  float(v)
637
- for v in it.attrib["m"].split(" ")
726
+ for v in it.attrib["W"].split(" ")
638
727
  if v != ""
639
728
  ],
640
- int(it.attrib["id"]),
641
- [
642
- int(v)
643
- for v in it.attrib["svIdx"].split(" ")
644
- if v != ""
645
- ],
646
- int(it.attrib["lineage"]),
729
+ float(it.attrib["nu"]),
730
+ float(it.attrib["alphaPrior"]),
647
731
  )
648
- try:
649
- alpha, W, nu, alphaPrior = (
650
- float(it.attrib["alpha"]),
651
- [
652
- float(v)
653
- for v in it.attrib["W"].split(" ")
654
- if v != ""
655
- ],
656
- float(it.attrib["nu"]),
657
- float(it.attrib["alphaPrior"]),
658
- )
659
- pos = np.array(pos)
660
- C = unique_id
661
- pos[-1] = pos[-1] * z_mult
662
- if (t - 1, M_id) in self.time_id:
663
- M = self.time_id[(t - 1, M_id)]
664
- self.successor.setdefault(M, []).append(C)
665
- self.predecessor.setdefault(C, []).append(M)
666
- self.time_edges[t].add((M, C))
667
- else:
668
- if M_id != -1:
669
- self.mother_not_found.append(C)
670
- self.pos[C] = pos
671
- self.nodes.add(C)
672
- self.time_nodes[t].add(C)
673
- self.time_id[(t, cell_id)] = C
674
- self.time[C] = t
675
- self.svIdx[C] = svIdx
676
- self.lin.setdefault(lin_id, []).append(C)
677
- self.C_lin[C] = lin_id
678
- self.intensity[C] = max(alpha - alphaPrior, 0)
679
- tmp = list(np.array(W) * nu)
680
- self.W[C] = np.array(W).reshape(3, 3)
681
- self.coeffs[C] = (
682
- tmp[:3] + tmp[4:6] + tmp[8:9] + list(pos)
683
- )
684
- unique_id += 1
685
- except Exception:
686
- pass
687
- else:
688
- if t in self.ind_cells:
689
- self.ind_cells[t] += 1
690
- else:
691
- self.ind_cells[t] = 1
692
- self.max_id = unique_id - 1
693
-
694
- def read_from_mastodon(self, path: str, name: str):
695
- """
696
- TODO: write doc
697
- """
698
- from mastodon_reader import MastodonReader
699
-
700
- mr = MastodonReader(path)
701
- spots, links = mr.read_tables()
702
-
703
- self.node_name = {}
704
-
705
- for c in spots.iloc:
706
- unique_id = c.name
707
- x, y, z = c.x, c.y, c.z
708
- t = c.t
709
- n = c[name] if name is not None else ""
710
- self.time_nodes.setdefault(t, set()).add(unique_id)
711
- self.nodes.add(unique_id)
712
- self.time[unique_id] = t
713
- self.node_name[unique_id] = n
714
- self.pos[unique_id] = np.array([x, y, z])
715
-
716
- for e in links.iloc:
717
- source = e.source_idx
718
- target = e.target_idx
719
- self.predecessor.setdefault(target, []).append(source)
720
- self.successor.setdefault(source, []).append(target)
721
- self.time_edges.setdefault(self.time[source], set()).add(
722
- (source, target)
723
- )
724
- self.t_b = min(self.time_nodes.keys())
725
- self.t_e = max(self.time_nodes.keys())
726
-
727
- def read_from_mastodon_csv(self, path: str):
728
- """
729
- TODO: Write doc
730
- """
731
- spots = []
732
- links = []
733
- self.node_name = {}
734
-
735
- with open(path[0], encoding="utf-8", errors="ignore") as file:
736
- csvreader = csv.reader(file)
737
- for row in csvreader:
738
- spots.append(row)
739
- spots = spots[3:]
740
-
741
- with open(path[1], encoding="utf-8", errors="ignore") as file:
742
- csvreader = csv.reader(file)
743
- for row in csvreader:
744
- links.append(row)
745
- links = links[3:]
746
-
747
- for spot in spots:
748
- unique_id = int(spot[1])
749
- x, y, z = spot[5:8]
750
- t = int(spot[4])
751
- self.time_nodes.setdefault(t, set()).add(unique_id)
752
- self.nodes.add(unique_id)
753
- self.time[unique_id] = t
754
- self.node_name[unique_id] = spot[1]
755
- self.pos[unique_id] = np.array([x, y, z], dtype=float)
756
-
757
- for link in links:
758
- source = int(float(link[4]))
759
- target = int(float(link[5]))
760
- self.predecessor.setdefault(target, []).append(source)
761
- self.successor.setdefault(source, []).append(target)
762
- self.time_edges.setdefault(self.time[source], set()).add(
763
- (source, target)
732
+ positions = np.array(positions)
733
+ C = unique_id
734
+ positions[-1] = positions[-1] * z_mult
735
+ if (t - 1, M_id) in time_id:
736
+ M = time_id[(t - 1, M_id)]
737
+ successor.setdefault(M, []).append(C)
738
+ pos[C] = positions
739
+ time_id[(t, cell_id)] = C
740
+ time[C] = t
741
+ properties["svIdx"][C] = svIdx
742
+ properties["lin"].setdefault(lin_id, []).append(C)
743
+ properties["C_lin"][C] = lin_id
744
+ properties["intensity"][C] = max(alpha - alphaPrior, 0)
745
+ tmp = list(np.array(W) * nu)
746
+ W[C] = np.array(W).reshape(3, 3)
747
+ properties["coeffs"][C] = (
748
+ tmp[:3] + tmp[4:6] + tmp[8:9] + list(positions)
749
+ )
750
+ if not name:
751
+ tmp_name = Path(file_format).stem
752
+ if name == "":
753
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
754
+ name = tmp_name
755
+ return lineageTree(
756
+ successor=successor, time=time, pos=pos, name=name, **properties
757
+ )
758
+
759
+
760
+ def read_from_mastodon(
761
+ path: str, tag_set: int | None = None, name: None | str = None
762
+ ) -> lineageTree:
763
+ """Read a maston lineage tree.
764
+
765
+ Parameters
766
+ ----------
767
+ path : str
768
+ path to the mastodon file
769
+ tag_set : int, optional
770
+ The tag set that will be used to label.
771
+ name : str, optional
772
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
773
+ will be the name attribute, otherwise the name will be the stem of the file path.
774
+
775
+ Returns
776
+ -------
777
+ lineageTree
778
+ lineage tree
779
+ """
780
+ from mastodon_reader import MastodonReader
781
+
782
+ mr = MastodonReader(path)
783
+ spots, links = mr.read_tables()
784
+
785
+ label = {}
786
+ time = {}
787
+ pos = {}
788
+ successor = {}
789
+
790
+ for c in spots.iloc:
791
+ unique_id = c.name
792
+ x, y, z = c.x, c.y, c.z
793
+ t = c.t
794
+ time[unique_id] = t
795
+ pos[unique_id] = np.array([x, y, z])
796
+
797
+ for e in links.iloc:
798
+ source = e.source_idx
799
+ target = e.target_idx
800
+ successor.setdefault(source, []).append(target)
801
+ if isinstance(tag_set, int):
802
+ tags = mr.read_tags(spots, links)[tag_set]
803
+ for tag in tags["tags"]:
804
+ label[tag["id"]] = tag["label"]
805
+
806
+ if not name:
807
+ tmp_name = Path(path).stem
808
+ if name == "":
809
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
810
+ name = tmp_name
811
+ return lineageTree(
812
+ successor=successor, time=time, pos=pos, label=label, name=name
813
+ )
814
+
815
+
816
+ def read_from_mastodon_csv(
817
+ paths: list[str], name: None | str = None
818
+ ) -> lineageTree:
819
+ """Read a lineage tree from a mastodon csv.
820
+
821
+ Parameters
822
+ ----------
823
+ paths : list[str]
824
+ list of paths to the csv files
825
+ name : None or str, optional
826
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
827
+ will be the name attribute, otherwise the name will be the stem of the file path.
828
+
829
+ Returns
830
+ -------
831
+ lineageTree
832
+ lineage tree
833
+ """
834
+ spots = []
835
+ links = []
836
+ label = {}
837
+ time = {}
838
+ pos = {}
839
+ successor = {}
840
+
841
+ with open(paths[0], encoding="utf-8", errors="ignore") as file:
842
+ csvreader = csv.reader(file)
843
+ for row in csvreader:
844
+ spots.append(row)
845
+ spots = spots[3:]
846
+
847
+ with open(paths[1], encoding="utf-8", errors="ignore") as file:
848
+ csvreader = csv.reader(file)
849
+ for row in csvreader:
850
+ links.append(row)
851
+ links = links[3:]
852
+
853
+ for spot in spots:
854
+ unique_id = int(spot[1])
855
+ x, y, z = spot[5:8]
856
+ t = int(spot[4])
857
+ time[unique_id] = t
858
+ label[unique_id] = spot[1]
859
+ pos[unique_id] = np.array([x, y, z], dtype=float)
860
+
861
+ for link in links:
862
+ source = int(float(link[4]))
863
+ target = int(float(link[5]))
864
+ successor.setdefault(source, []).append(target)
865
+ if not name:
866
+ tmp_name = Path(paths[0]).stem
867
+ if name == "":
868
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
869
+ name = tmp_name
870
+
871
+ return lineageTree(
872
+ successor=successor, time=time, pos=pos, label=label, name=name
873
+ )
874
+
875
+
876
+ def read_from_mamut_xml(
877
+ path: str, xml_attributes: list[str] | None = None, name: None | str = None
878
+ ) -> lineageTree:
879
+ """Read a lineage tree from a MaMuT xml.
880
+
881
+ Parameters
882
+ ----------
883
+ path : str
884
+ path to the MaMut xml
885
+ name : None or str, optional
886
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
887
+ will be the name attribute, otherwise the name will be the stem of the file path.
888
+
889
+ Returns
890
+ -------
891
+ lineageTree
892
+ lineage tree
893
+ """
894
+ tree = ET.parse(path)
895
+ for elem in tree.getroot():
896
+ if elem.tag == "Model":
897
+ Model = elem
898
+ FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
899
+ xml_attributes = xml_attributes or []
900
+
901
+ properties = {}
902
+ for attr in xml_attributes:
903
+ properties[attr] = {}
904
+ nodes = set()
905
+ pos = {}
906
+ time = {}
907
+ properties["label"] = {}
908
+
909
+ for frame in AllSpots:
910
+ t = int(frame.attrib["frame"])
911
+ for cell in frame:
912
+ cell_id, n, x, y, z = (
913
+ int(cell.attrib["ID"]),
914
+ cell.attrib["name"],
915
+ float(cell.attrib["POSITION_X"]),
916
+ float(cell.attrib["POSITION_Y"]),
917
+ float(cell.attrib["POSITION_Z"]),
764
918
  )
765
- self.t_b = min(self.time_nodes.keys())
766
- self.t_e = max(self.time_nodes.keys())
767
-
768
- def read_from_mamut_xml(self, path: str):
769
- """Read a lineage tree from a MaMuT xml.
770
-
771
- Args:
772
- path (str): path to the MaMut xml
773
- """
774
- tree = ET.parse(path)
775
- for elem in tree.getroot():
776
- if elem.tag == "Model":
777
- Model = elem
778
- FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
779
-
780
- for attr in self.xml_attributes:
781
- self.__dict__[attr] = {}
782
- self.time_nodes = {}
783
- self.time_edges = {}
784
- self.nodes = set()
785
- self.pos = {}
786
- self.time = {}
787
- self.node_name = {}
788
- for frame in AllSpots:
789
- t = int(frame.attrib["frame"])
790
- self.time_nodes[t] = set()
791
- for cell in frame:
792
- cell_id, n, x, y, z = (
793
- int(cell.attrib["ID"]),
794
- cell.attrib["name"],
795
- float(cell.attrib["POSITION_X"]),
796
- float(cell.attrib["POSITION_Y"]),
797
- float(cell.attrib["POSITION_Z"]),
798
- )
799
- self.time_nodes[t].add(cell_id)
800
- self.nodes.add(cell_id)
801
- self.pos[cell_id] = np.array([x, y, z])
802
- self.time[cell_id] = t
803
- self.node_name[cell_id] = n
804
- if "TISSUE_NAME" in cell.attrib:
805
- if not hasattr(self, "fate"):
806
- self.fate = {}
807
- self.fate[cell_id] = cell.attrib["TISSUE_NAME"]
808
- if "TISSUE_TYPE" in cell.attrib:
809
- if not hasattr(self, "fate_nb"):
810
- self.fate_nb = {}
811
- self.fate_nb[cell_id] = eval(cell.attrib["TISSUE_TYPE"])
812
- for attr in cell.attrib:
813
- if attr in self.xml_attributes:
814
- self.__dict__[attr][cell_id] = eval(cell.attrib[attr])
815
-
816
- tracks = {}
817
- self.successor = {}
818
- self.predecessor = {}
819
- self.track_name = {}
820
- for track in AllTracks:
821
- if "TRACK_DURATION" in track.attrib:
822
- t_id, _ = (
823
- int(track.attrib["TRACK_ID"]),
824
- float(track.attrib["TRACK_DURATION"]),
825
- )
826
- else:
827
- t_id = int(track.attrib["TRACK_ID"])
828
- t_name = track.attrib["name"]
829
- tracks[t_id] = []
830
- for edge in track:
831
- s, t = (
832
- int(edge.attrib["SPOT_SOURCE_ID"]),
833
- int(edge.attrib["SPOT_TARGET_ID"]),
919
+ nodes.add(cell_id)
920
+ pos[cell_id] = np.array([x, y, z])
921
+ time[cell_id] = t
922
+ properties["label"][cell_id] = n
923
+ if "TISSUE_NAME" in cell.attrib:
924
+ if "fate" not in properties:
925
+ properties["fate"] = {}
926
+ properties["fate"][cell_id] = cell.attrib["TISSUE_NAME"]
927
+ if "TISSUE_TYPE" in cell.attrib:
928
+ if "fate_nb" not in properties:
929
+ properties["fate_nb"] = {}
930
+ properties["fate_nb"][cell_id] = eval(
931
+ cell.attrib["TISSUE_TYPE"]
834
932
  )
835
- if s in self.nodes and t in self.nodes:
836
- if self.time[s] > self.time[t]:
837
- s, t = t, s
838
- self.successor.setdefault(s, []).append(t)
839
- self.predecessor.setdefault(t, []).append(s)
840
- self.track_name[s] = t_name
841
- self.track_name[t] = t_name
842
- tracks[t_id].append((s, t))
843
- self.t_b = min(self.time_nodes.keys())
844
- self.t_e = max(self.time_nodes.keys())
845
-
846
- def read_C_elegans_bao(self, path):
847
- cell_times = {}
848
- self.expression = {}
849
- with open(path) as f:
850
- for line in f:
851
- if "cell_name" not in line:
852
- cell_times[line.split("\t")[0]] = list(
853
- line.split("\t")[-1].split(",")
854
- )
855
- new_dict = {}
856
- end_dict = {}
857
- self.t_e = 0
858
- self.t_b = 0
859
- for c, lc in cell_times.items():
860
- new_dict[c] = self.add_node(0)
861
- tmp = self.add_branch(
862
- new_dict[c],
863
- length=len(lc) - 1,
864
- reverse=True,
865
- move_timepoints=True,
933
+ for attr in cell.attrib:
934
+ if attr in xml_attributes:
935
+ properties[attr][cell_id] = eval(cell.attrib[attr])
936
+
937
+ properties["tracks"] = {}
938
+ successor = {}
939
+ properties["track_name"] = {}
940
+ for track in AllTracks:
941
+ if "TRACK_DURATION" in track.attrib:
942
+ t_id, _ = (
943
+ int(track.attrib["TRACK_ID"]),
944
+ float(track.attrib["TRACK_DURATION"]),
945
+ )
946
+ else:
947
+ t_id = int(track.attrib["TRACK_ID"])
948
+ t_name = track.attrib["name"]
949
+ properties["tracks"][t_id] = []
950
+ for edge in track:
951
+ s, t = (
952
+ int(edge.attrib["SPOT_SOURCE_ID"]),
953
+ int(edge.attrib["SPOT_TARGET_ID"]),
866
954
  )
867
- for i, node in enumerate(self.get_cycle(tmp)):
868
- self.expression[node] = int(lc[i])
869
- self._labels[self.get_cycle(tmp)[0]] = c
870
- self._labels.pop(tmp)
871
- end_dict[c] = self.get_cycle(new_dict[c])[-1]
872
- cell_names = list(cell_times.keys())
873
- c_to_p = {}
874
- while cell_names:
875
- cur = cell_names.pop()
876
- if cur[:-1] in cell_names:
877
- c_to_p[cur] = cur[:-1]
878
- c_to_p.update(self.implicit_l_t)
879
- for c, p in c_to_p.items():
880
- if p in cell_times:
881
- cyc = end_dict[p]
882
- self.predecessor[new_dict[c]] = [cyc]
883
- if cyc not in self.successor:
884
- self.successor[cyc] = []
885
- self.successor[cyc].append(new_dict[c])
886
- self.time_nodes.clear()
887
- for root in self.roots:
888
- to_do = [root]
889
- while to_do:
890
- cur = to_do.pop()
891
- self.time_nodes.setdefault(self.time[cur], set()).add(cur)
892
- _next = self.successor.get(cur, [])
893
- to_do += _next
894
- for n in _next:
895
- self.time[n] = self.time[cur] + 1
896
- self.t_e = max(self.time.values())
955
+ if s in nodes and t in nodes:
956
+ if time[s] > time[t]:
957
+ s, t = t, s
958
+ successor.setdefault(s, []).append(t)
959
+ properties["track_name"][s] = t_name
960
+ properties["track_name"][t] = t_name
961
+ properties["tracks"][t_id].append((s, t))
962
+ if not name:
963
+ tmp_name = Path(path).stem
964
+ if name == "":
965
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
966
+ name = tmp_name
967
+
968
+ return lineageTree(
969
+ successor=successor,
970
+ time=time,
971
+ pos=pos,
972
+ name=name,
973
+ **properties,
974
+ )