LineageTree 1.7.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
LineageTree/loaders.py CHANGED
@@ -1,722 +1,974 @@
1
1
  import csv
2
2
  import os
3
3
  import pickle as pkl
4
+ import struct
4
5
  import xml.etree.ElementTree as ET
6
+ from pathlib import Path
7
+ from warnings import warn
5
8
 
6
9
  import numpy as np
7
10
 
8
-
9
- class lineageTreeLoaders:
10
-
11
- def read_from_csv(
12
- self, file_path: str, z_mult: float, link: int = 1, delim: str = ","
13
- ):
14
- """
15
- TODO: write doc
16
- """
17
- with open(file_path) as f:
18
- lines = f.readlines()
19
- f.close()
20
- self.time_nodes = {}
21
- self.time_edges = {}
22
- unique_id = 0
23
- self.nodes = set()
24
- self.edges = set()
25
- self.successor = {}
26
- self.predecessor = {}
27
- self.pos = {}
28
- self.time_id = {}
29
- self.time = {}
30
- self.lin = {}
31
- self.C_lin = {}
32
- if not link:
33
- self.displacement = {}
34
- lines_to_int = []
35
- corres = {}
36
- for line in lines:
37
- lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
38
- lines_to_int = np.array(lines_to_int)
39
- if link == 2:
40
- lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
11
+ from .lineageTree import lineageTree
12
+
13
+ IMPLICIT_L_T = {
14
+ "AB": "P0",
15
+ "P1": "P0",
16
+ "EMS": "P1",
17
+ "P2": "P1",
18
+ "MS": "EMS",
19
+ "E": "EMS",
20
+ "C": "P2",
21
+ "P3": "P2",
22
+ "D": "P3",
23
+ "P4": "P3",
24
+ "Z2": "P4",
25
+ "Z3": "P4",
26
+ }
27
+
28
+ ASTEC_KEYDICTIONARY = {
29
+ "cell_lineage": [
30
+ "lineage_tree",
31
+ "lin_tree",
32
+ "Lineage tree",
33
+ "cell_lineage",
34
+ ],
35
+ "cell_h_min": ["cell_h_min", "h_mins_information"],
36
+ "cell_volume": [
37
+ "cell_volume",
38
+ "volumes_information",
39
+ "volumes information",
40
+ "vol",
41
+ ],
42
+ "cell_surface": ["cell_surface", "cell surface"],
43
+ "cell_compactness": [
44
+ "cell_compactness",
45
+ "Cell Compactness",
46
+ "compacity",
47
+ "cell_sphericity",
48
+ ],
49
+ "cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
50
+ "cell_labels_in_time": [
51
+ "cell_labels_in_time",
52
+ "Cells labels in time",
53
+ "time_labels",
54
+ ],
55
+ "cell_barycenter": [
56
+ "cell_barycenter",
57
+ "Barycenters",
58
+ "barycenters",
59
+ ],
60
+ "cell_fate": ["cell_fate", "Fate"],
61
+ "cell_fate_2": ["cell_fate_2", "Fate2"],
62
+ "cell_fate_3": ["cell_fate_3", "Fate3"],
63
+ "cell_fate_4": ["cell_fate_4", "Fate4"],
64
+ "all_cells": [
65
+ "all_cells",
66
+ "All Cells",
67
+ "All_Cells",
68
+ "all cells",
69
+ "tot_cells",
70
+ ],
71
+ "cell_principal_values": [
72
+ "cell_principal_values",
73
+ "Principal values",
74
+ ],
75
+ "cell_name": ["cell_name", "Names", "names", "cell_names"],
76
+ "cell_contact_surface": [
77
+ "cell_contact_surface",
78
+ "cell_cell_contact_information",
79
+ ],
80
+ "cell_history": [
81
+ "cell_history",
82
+ "Cells history",
83
+ "cell_life",
84
+ "life",
85
+ ],
86
+ "cell_principal_vectors": [
87
+ "cell_principal_vectors",
88
+ "Principal vectors",
89
+ ],
90
+ "cell_naming_score": ["cell_naming_score", "Scores", "scores"],
91
+ "problematic_cells": ["problematic_cells"],
92
+ "unknown_key": ["unknown_key"],
93
+ }
94
+
95
+
96
+ def read_from_csv(
97
+ file_path: str,
98
+ z_mult: float,
99
+ link: int = 1,
100
+ delim: str = ",",
101
+ name: None | str = None,
102
+ ) -> lineageTree:
103
+ """Read a lineage tree from a csv file with the following format:
104
+ id, time, z, y, x, id, pred_id, lin_id
105
+
106
+ Parameters
107
+ ----------
108
+ file_path : str
109
+ path to the csv file
110
+ z_mult : float
111
+ aspect ratio
112
+ link : int
113
+ 1 if the csv file is ordered by id, 2 if ordered by pred_id
114
+ delim : str, default=","
115
+ delimiter used in the csv file
116
+ name : None or str, optional
117
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
118
+ will be the name attribute, otherwise the name will be the stem of the file path.
119
+
120
+ Returns
121
+ -------
122
+ lineageTree
123
+ lineage tree
124
+ """
125
+ with open(file_path) as f:
126
+ lines = f.readlines()
127
+ f.close()
128
+ successor = {}
129
+ pos = {}
130
+ time = {}
131
+ lines_to_int = []
132
+ corres = {}
133
+ for line in lines:
134
+ lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
135
+ lines_to_int = np.array(lines_to_int)
136
+ if link == 2:
137
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
138
+ else:
139
+ lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
140
+ for unique_id, line in enumerate(lines_to_int):
141
+ if link == 1:
142
+ id_, t, z, y, x, pred, lin_id = line
143
+ elif link == 2:
144
+ t, z, y, x, id_, pred, lin_id = line
41
145
  else:
42
- lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
43
- for line in lines_to_int:
44
- if link == 1:
45
- id_, t, z, y, x, pred, lin_id = line
46
- elif link == 2:
47
- t, z, y, x, id_, pred, lin_id = line
146
+ id_, t, z, y, x, *_ = line
147
+ pred = None
148
+ t = int(t)
149
+ pos = np.array([x, y, z])
150
+ C = unique_id
151
+ corres[id_] = C
152
+ pos[-1] = pos[-1] * z_mult
153
+ if pred in corres:
154
+ M = corres[pred]
155
+ successor.setdefault(M, []).append(C)
156
+ pos[C] = pos
157
+ time[C] = t
158
+ if not name:
159
+ tmp_name = Path(file_path).stem
160
+ if name == "":
161
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
162
+ name = tmp_name
163
+ return lineageTree(successor=successor, time=time, pos=pos, name=name)
164
+
165
+
166
+ def _read_from_ASTEC_xml(file_path: str):
167
+ def _set_dictionary_value(root):
168
+ if len(root) == 0:
169
+ if root.text is None:
170
+ return None
48
171
  else:
49
- id_, t, z, y, x, dz, dy, dx = line
50
- pred = None
51
- lin_id = None
52
- t = int(t)
53
- pos = np.array([x, y, z])
54
- C = unique_id
55
- corres[id_] = C
56
- pos[-1] = pos[-1] * z_mult
57
- if pred in corres:
58
- M = corres[pred]
59
- self.predecessor[C] = [M]
60
- self.successor.setdefault(M, []).append(C)
61
- self.edges.add((M, C))
62
- self.time_edges.setdefault(t, set()).add((M, C))
63
- self.lin.setdefault(lin_id, []).append(C)
64
- self.C_lin[C] = lin_id
65
- self.pos[C] = pos
66
- self.nodes.add(C)
67
- self.time_nodes.setdefault(t, set()).add(C)
68
- # self.time_id[(t, cell_id)] = C
69
- self.time[C] = t
70
- if not link:
71
- self.displacement[C] = np.array([dx, dy, dz * z_mult])
72
- unique_id += 1
73
- self.max_id = unique_id - 1
74
- self.t_b = min(self.time_nodes)
75
- self.t_e = max(self.time_nodes)
76
-
77
- def read_from_ASTEC(self, file_path: str, eigen: bool = False):
78
- """
79
- Read an `xml` or `pkl` file produced by the ASTEC algorithm.
80
-
81
- Args:
82
- file_path (str): path to an output generated by ASTEC
83
- eigen (bool): whether or not to read the eigen values, default False
84
- """
85
- self._astec_keydictionary = {
86
- "cell_lineage": [
87
- "lineage_tree",
88
- "lin_tree",
89
- "Lineage tree",
90
- "cell_lineage",
91
- ],
92
- "cell_h_min": ["cell_h_min", "h_mins_information"],
93
- "cell_volume": [
94
- "cell_volume",
95
- "volumes_information",
96
- "volumes information",
97
- "vol",
98
- ],
99
- "cell_surface": ["cell_surface", "cell surface"],
100
- "cell_compactness": [
101
- "cell_compactness",
102
- "Cell Compactness",
103
- "compacity",
104
- "cell_sphericity",
105
- ],
106
- "cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
107
- "cell_labels_in_time": [
108
- "cell_labels_in_time",
109
- "Cells labels in time",
110
- "time_labels",
111
- ],
112
- "cell_barycenter": [
113
- "cell_barycenter",
114
- "Barycenters",
115
- "barycenters",
116
- ],
117
- "cell_fate": ["cell_fate", "Fate"],
118
- "cell_fate_2": ["cell_fate_2", "Fate2"],
119
- "cell_fate_3": ["cell_fate_3", "Fate3"],
120
- "cell_fate_4": ["cell_fate_4", "Fate4"],
121
- "all_cells": [
122
- "all_cells",
123
- "All Cells",
124
- "All_Cells",
125
- "all cells",
126
- "tot_cells",
127
- ],
128
- "cell_principal_values": [
129
- "cell_principal_values",
130
- "Principal values",
131
- ],
132
- "cell_name": ["cell_name", "Names", "names", "cell_names"],
133
- "cell_contact_surface": [
134
- "cell_contact_surface",
135
- "cell_cell_contact_information",
136
- ],
137
- "cell_history": [
138
- "cell_history",
139
- "Cells history",
140
- "cell_life",
141
- "life",
142
- ],
143
- "cell_principal_vectors": [
144
- "cell_principal_vectors",
145
- "Principal vectors",
146
- ],
147
- "cell_naming_score": ["cell_naming_score", "Scores", "scores"],
148
- "problematic_cells": ["problematic_cells"],
149
- "unknown_key": ["unknown_key"],
150
- }
151
-
152
- if os.path.splitext(file_path)[-1] == ".xml":
153
- tmp_data = self._read_from_ASTEC_xml(file_path)
172
+ return eval(root.text)
154
173
  else:
155
- tmp_data = self._read_from_ASTEC_pkl(file_path, eigen)
174
+ dictionary = {}
175
+ for child in root:
176
+ key = child.tag
177
+ if child.tag == "cell":
178
+ key = int(child.attrib["cell-id"])
179
+ dictionary[key] = _set_dictionary_value(child)
180
+ return dictionary
156
181
 
157
- # make sure these are all named liked they are in tmp_data (or change dictionary above)
158
- self.name = {}
182
+ tree = ET.parse(file_path)
183
+ root = tree.getroot()
184
+ dictionary = {}
185
+
186
+ for k in ASTEC_KEYDICTIONARY:
187
+ if root.tag == k:
188
+ dictionary[str(root.tag)] = _set_dictionary_value(root)
189
+ break
190
+ else:
191
+ for child in root:
192
+ value = _set_dictionary_value(child)
193
+ if value is not None:
194
+ dictionary[str(child.tag)] = value
195
+ return dictionary
196
+
197
+
198
+ def _read_from_ASTEC_pkl(file_path: str, eigen: bool = False):
199
+ with open(file_path, "rb") as f:
200
+ tmp_data = pkl.load(f, encoding="latin1")
201
+ f.close()
202
+ new_ref = {}
203
+ for k, v in ASTEC_KEYDICTIONARY.items():
204
+ for key in v:
205
+ new_ref[key] = k
206
+ new_dict = {}
207
+
208
+ for k, v in tmp_data.items():
209
+ if k in new_ref:
210
+ new_dict[new_ref[k]] = v
211
+ else:
212
+ new_dict[k] = v
213
+ return new_dict
214
+
215
+
216
+ def read_from_ASTEC(
217
+ file_path: str, eigen: bool = False, name: None | str = None
218
+ ) -> lineageTree:
219
+ """
220
+ Read an `xml` or `pkl` file produced by the ASTEC algorithm.
221
+
222
+ Parameters
223
+ ----------
224
+ file_path : str
225
+ path to an output generated by ASTEC
226
+ eigen : bool, default=False
227
+ whether or not to read the eigen values, default False
228
+ name : None or str, optional
229
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
230
+ will be the name attribute, otherwise the name will be the stem of the file path.
231
+
232
+ Returns
233
+ -------
234
+ lineageTree
235
+ lineage tree
236
+ """
237
+
238
+ if os.path.splitext(file_path)[-1] == ".xml":
239
+ tmp_data = _read_from_ASTEC_xml(file_path)
240
+ else:
241
+ tmp_data = _read_from_ASTEC_pkl(file_path, eigen)
242
+
243
+ # make sure these are all named liked they are in tmp_data (or change dictionary above)
244
+ properties = {}
245
+ if "cell_volume" in tmp_data:
246
+ properties["volume"] = {}
247
+ if "cell_fate" in tmp_data:
248
+ properties["fate"] = {}
249
+ if "cell_barycenter" in tmp_data:
250
+ pos = {}
251
+ if "cell_name" in tmp_data:
252
+ properties["label"] = {}
253
+ lT2pkl = {}
254
+ pkl2lT = {}
255
+ image_label = {}
256
+
257
+ lt = tmp_data["cell_lineage"]
258
+
259
+ if "cell_contact_surface" in tmp_data:
260
+ properties["contact"] = {}
261
+ do_surf = True
262
+ surfaces = tmp_data["cell_contact_surface"]
263
+ else:
264
+ do_surf = False
265
+
266
+ inv = {vi: [c] for c, v in lt.items() for vi in v}
267
+ nodes = set(lt).union(inv)
268
+
269
+ unique_id = 0
270
+ time = {}
271
+
272
+ for unique_id, n in enumerate(nodes):
273
+ t = n // 10**4
274
+ image_label[unique_id] = n % 10**4
275
+ lT2pkl[unique_id] = n
276
+ pkl2lT[n] = unique_id
277
+ time[unique_id] = t
159
278
  if "cell_volume" in tmp_data:
160
- self.volume = {}
279
+ properties["volume"][unique_id] = tmp_data["cell_volume"].get(
280
+ n, 0.0
281
+ )
161
282
  if "cell_fate" in tmp_data:
162
- self.fates = {}
283
+ properties["fate"][unique_id] = tmp_data["cell_fate"].get(n, "")
163
284
  if "cell_barycenter" in tmp_data:
164
- self.pos = {}
165
- self.lT2pkl = {}
166
- self.pkl2lT = {}
167
- self.contact = {}
168
- self.prob_cells = set()
169
- self.image_label = {}
170
-
171
- lt = tmp_data["cell_lineage"]
172
-
173
- if "cell_contact_surface" in tmp_data:
174
- do_surf = True
175
- surfaces = tmp_data["cell_contact_surface"]
176
- else:
177
- do_surf = False
178
-
179
- inv = {vi: [c] for c, v in lt.items() for vi in v}
180
- nodes = set(lt).union(inv)
181
-
182
- unique_id = 0
183
-
184
- for n in nodes:
185
- t = n // 10**4
186
- self.image_label[unique_id] = n % 10**4
187
- self.lT2pkl[unique_id] = n
188
- self.pkl2lT[n] = unique_id
189
- self.time_nodes.setdefault(t, set()).add(unique_id)
190
- self.nodes.add(unique_id)
191
- self.time[unique_id] = t
192
- if "cell_volume" in tmp_data:
193
- self.volume[unique_id] = tmp_data["cell_volume"].get(n, 0.0)
194
- if "cell_fate" in tmp_data:
195
- self.fates[unique_id] = tmp_data["cell_fate"].get(n, "")
196
- if "cell_barycenter" in tmp_data:
197
- self.pos[unique_id] = tmp_data["cell_barycenter"].get(
198
- n, np.zeros(3)
199
- )
200
-
201
- unique_id += 1
202
- if do_surf:
203
- for c in nodes:
204
- if c in surfaces and c in self.pkl2lT:
205
- self.contact[self.pkl2lT[c]] = {
206
- self.pkl2lT.get(n, -1): s
207
- for n, s in surfaces[c].items()
208
- if n % 10**4 == 1 or n in self.pkl2lT
209
- }
210
-
211
- for n, new_id in self.pkl2lT.items():
212
- if n in inv:
213
- self.predecessor[new_id] = [self.pkl2lT[ni] for ni in inv[n]]
214
- if n in lt:
215
- self.successor[new_id] = [
216
- self.pkl2lT[ni] for ni in lt[n] if ni in self.pkl2lT
217
- ]
218
-
219
- for ni in self.successor[new_id]:
220
- self.time_edges.setdefault(t - 1, set()).add((new_id, ni))
221
-
222
- self.t_b = min(self.time_nodes)
223
- self.t_e = max(self.time_nodes)
224
- self.max_id = unique_id
225
-
226
- # do this in the end of the process, skip lineage tree and whatever is stored already
227
- discard = {
228
- "cell_volume",
229
- "cell_fate",
230
- "cell_barycenter",
231
- "cell_contact_surface",
232
- "cell_lineage",
233
- "all_cells",
234
- "cell_history",
235
- "problematic_cells",
236
- "cell_labels_in_time",
237
- }
238
- self.specific_properties = []
239
- for prop_name, prop_values in tmp_data.items():
240
- if not (prop_name in discard or hasattr(self, prop_name)):
241
- if isinstance(prop_values, dict):
242
- dictionary = {
243
- self.pkl2lT.get(k, -1): v
244
- for k, v in prop_values.items()
285
+ pos[unique_id] = tmp_data["cell_barycenter"].get(n, np.zeros(3))
286
+ if "cell_name" in tmp_data:
287
+ properties["label"][unique_id] = tmp_data["cell_name"].get(n, "")
288
+
289
+ if do_surf:
290
+ for c in nodes:
291
+ if c in surfaces and c in pkl2lT:
292
+ properties["contact"][pkl2lT[c]] = {
293
+ pkl2lT.get(n, -1): s
294
+ for n, s in surfaces[c].items()
295
+ if n % 10**4 == 1 or n in pkl2lT
296
+ }
297
+
298
+ successor = {}
299
+ for n, new_id in pkl2lT.items():
300
+ if n in lt:
301
+ successor[new_id] = [pkl2lT[ni] for ni in lt[n] if ni in pkl2lT]
302
+
303
+ # do this in the end of the process, skip lineage tree and whatever is stored already
304
+ discard = {
305
+ "cell_volume", # already stored
306
+ "cell_fate", # already stored
307
+ "cell_barycenter", # already stored
308
+ "cell_contact_surface", # already stored
309
+ "cell_lineage", # already stored
310
+ "cell_name", # already stored
311
+ "all_cells", # not a property
312
+ "cell_history", # redundant
313
+ "problematic_cells", # not useful here
314
+ "cell_labels_in_time", # redundant
315
+ }
316
+ for prop_name, prop_values in tmp_data.items():
317
+ if prop_name not in discard and isinstance(prop_values, dict):
318
+ dictionary = {pkl2lT.get(k, -1): v for k, v in prop_values.items()}
319
+ # is it a regular dictionary or a dictionary with dictionaries inside?
320
+ for key, value in dictionary.items():
321
+ if isinstance(value, dict):
322
+ # rename all ids from old to new
323
+ dictionary[key] = {
324
+ pkl2lT.get(k, -1): v for k, v in value.items()
245
325
  }
246
- # is it a regular dictionary or a dictionary with dictionaries inside?
247
- for key, value in dictionary.items():
248
- if isinstance(value, dict):
249
- # rename all ids from old to new
250
- dictionary[key] = {
251
- self.pkl2lT.get(k, -1): v
252
- for k, v in value.items()
253
- }
254
- self.__dict__[prop_name] = dictionary
255
- self.specific_properties.append(prop_name)
256
- # is any of this necessary? Or does it mean it anyways does not contain
257
- # information about the id and a simple else: is enough?
258
- elif (
259
- isinstance(prop_values, (list, set, np.ndarray))
260
- and prop_name not in []
261
- ):
262
- self.__dict__[prop_name] = prop_values
263
- self.specific_properties.append(prop_name)
264
-
265
- # what else could it be?
266
-
267
- # add a list of all available properties
268
-
269
- def _read_from_ASTEC_xml(self, file_path: str):
270
- def _set_dictionary_value(root):
271
- if len(root) == 0:
272
- if root.text is None:
273
- return None
274
- else:
275
- return eval(root.text)
326
+ properties[prop_name] = dictionary
327
+ if not name:
328
+ tmp_name = Path(file_path).stem
329
+ if name == "":
330
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
331
+ name = tmp_name
332
+ return lineageTree(
333
+ successor=successor, time=time, pos=pos, name=name, **properties
334
+ )
335
+
336
+
337
+ def read_from_binary(fname: str, name: None | str = None) -> lineageTree:
338
+ """
339
+ Reads a binary lineageTree file name.
340
+ Format description: see lineageTree.to_binary
341
+
342
+ Parameters
343
+ ----------
344
+ fname : string
345
+ path to the binary file
346
+ name : None or str, optional
347
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
348
+ will be the name attribute, otherwise the name will be the stem of the file path.
349
+
350
+ Returns
351
+ -------
352
+ lineageTree
353
+ lineage tree
354
+ """
355
+ q_size = struct.calcsize("q")
356
+ H_size = struct.calcsize("H")
357
+ d_size = struct.calcsize("d")
358
+
359
+ with open(fname, "rb") as f:
360
+ len_tree = struct.unpack("q", f.read(q_size))[0]
361
+ len_time = struct.unpack("q", f.read(q_size))[0]
362
+ len_pos = struct.unpack("q", f.read(q_size))[0]
363
+ number_sequence = list(
364
+ struct.unpack("q" * len_tree, f.read(q_size * len_tree))
365
+ )
366
+ time_sequence = list(
367
+ struct.unpack("H" * len_time, f.read(H_size * len_time))
368
+ )
369
+ pos_sequence = np.array(
370
+ struct.unpack("d" * len_pos, f.read(d_size * len_pos))
371
+ )
372
+
373
+ f.close()
374
+
375
+ successor = {}
376
+ time = {}
377
+ pos = {}
378
+ is_root = {}
379
+ waiting_list = []
380
+ i = 0
381
+ done = False
382
+ t = 0
383
+ if max(number_sequence[::2]) == -1:
384
+ tmp = number_sequence[1::2]
385
+ if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
386
+ time = dict(list(zip(tmp, time_sequence, strict=True)))
387
+ pos = dict(
388
+ list(
389
+ zip(
390
+ tmp,
391
+ np.reshape(pos_sequence, (len_time, 3)),
392
+ strict=True,
393
+ )
394
+ )
395
+ )
396
+ is_root = {c: True for c in tmp}
397
+ done = True
398
+ while (
399
+ i < len(number_sequence) and not done
400
+ ): # , c in enumerate(number_sequence[:-1]):
401
+ c = number_sequence[i]
402
+ if c == -1:
403
+ if waiting_list != []:
404
+ prev_mother = waiting_list.pop()
405
+ successor[prev_mother].insert(0, number_sequence[i + 1])
406
+ t = time[prev_mother] + 1
276
407
  else:
277
- dictionary = {}
278
- for child in root:
279
- key = child.tag
280
- if child.tag == "cell":
281
- key = int(child.attrib["cell-id"])
282
- dictionary[key] = _set_dictionary_value(child)
283
- return dictionary
284
-
285
- tree = ET.parse(file_path)
286
- root = tree.getroot()
287
- dictionary = {}
288
-
289
- for k, _v in self._astec_keydictionary.items():
290
- if root.tag == k:
291
- dictionary[str(root.tag)] = _set_dictionary_value(root)
292
- break
293
- else:
294
- for child in root:
295
- value = _set_dictionary_value(child)
296
- if value is not None:
297
- dictionary[str(child.tag)] = value
298
- return dictionary
299
-
300
- def _read_from_ASTEC_pkl(self, file_path: str, eigen: bool = False):
301
- with open(file_path, "rb") as f:
302
- tmp_data = pkl.load(f, encoding="latin1")
303
- f.close()
304
- new_ref = {}
305
- for k, v in self._astec_keydictionary.items():
306
- for key in v:
307
- new_ref[key] = k
308
- new_dict = {}
309
-
310
- for k, v in tmp_data.items():
311
- if k in new_ref:
312
- new_dict[new_ref[k]] = v
408
+ t = time_sequence.pop(0)
409
+
410
+ elif c == -2:
411
+ successor[waiting_list[-1]] = [number_sequence[i + 1]]
412
+ is_root[number_sequence[i + 1]] = False
413
+ pos[waiting_list[-1]] = pos_sequence[:3]
414
+ pos_sequence = pos_sequence[3:]
415
+ time[waiting_list[-1]] = t
416
+ t += 1
417
+
418
+ elif number_sequence[i + 1] >= 0:
419
+ successor[c] = [number_sequence[i + 1]]
420
+ pos[c] = pos_sequence[:3]
421
+ pos_sequence = pos_sequence[3:]
422
+ time[c] = t
423
+ t += 1
424
+
425
+ elif number_sequence[i + 1] == -2:
426
+ waiting_list += [c]
427
+
428
+ elif number_sequence[i + 1] == -1:
429
+ pos[c] = pos_sequence[:3]
430
+ pos_sequence = pos_sequence[3:]
431
+ time[c] = t
432
+ t += 1
433
+ i += 1
434
+ if waiting_list != []:
435
+ prev_mother = waiting_list.pop()
436
+ successor[prev_mother].insert(0, number_sequence[i + 1])
437
+ t = time[prev_mother] + 1
313
438
  else:
314
- new_dict[k] = v
315
- return new_dict
316
-
317
- def read_from_txt_for_celegans(self, file: str):
318
- """
319
- Read a C. elegans lineage tree
320
-
321
- Args:
322
- file (str): Path to the file to read
323
- """
324
- implicit_l_t = {
325
- "AB": "P0",
326
- "P1": "P0",
327
- "EMS": "P1",
328
- "P2": "P1",
329
- "MS": "EMS",
330
- "E": "EMS",
331
- "C": "P2",
332
- "P3": "P2",
333
- "D": "P3",
334
- "P4": "P3",
335
- "Z2": "P4",
336
- "Z3": "P4",
337
- }
338
- with open(file) as f:
339
- raw = f.readlines()[1:]
340
- f.close()
341
- self.name = {}
342
-
343
- unique_id = 0
344
- for line in raw:
345
- t = int(line.split("\t")[0])
346
- self.name[unique_id] = line.split("\t")[1]
347
- position = np.array(line.split("\t")[2:5], dtype=float)
348
- self.time_nodes.setdefault(t, set()).add(unique_id)
349
- self.nodes.add(unique_id)
350
- self.pos[unique_id] = position
351
- self.time[unique_id] = t
352
- unique_id += 1
353
-
354
- self.t_b = min(self.time_nodes)
355
- self.t_e = max(self.time_nodes)
356
-
357
- for t, cells in self.time_nodes.items():
358
- if t != self.t_b:
359
- prev_cells = self.time_nodes[t - 1]
360
- name_to_id = {self.name[c]: c for c in prev_cells}
361
- for c in cells:
362
- if self.name[c] in name_to_id:
363
- p = name_to_id[self.name[c]]
364
- elif self.name[c][:-1] in name_to_id:
365
- p = name_to_id[self.name[c][:-1]]
366
- elif implicit_l_t.get(self.name[c]) in name_to_id:
367
- p = name_to_id[implicit_l_t.get(self.name[c])]
368
- else:
369
- print(
370
- "error, cell %s has no predecessors" % self.name[c]
371
- )
372
- p = None
373
- self.predecessor.setdefault(c, []).append(p)
374
- self.successor.setdefault(p, []).append(c)
375
- self.time_edges.setdefault(t - 1, set()).add((p, c))
376
- self.max_id = unique_id
377
-
378
- def read_from_txt_for_celegans_CAO(
379
- self,
380
- file: str,
381
- reorder: bool = False,
382
- raw_size: float = None,
383
- shape: float = None,
384
- ):
385
- """
386
- Read a C. elegans lineage tree from Cao et al.
387
-
388
- Args:
389
- file (str): Path to the file to read
390
- """
391
-
392
- implicit_l_t = {
393
- "AB": "P0",
394
- "P1": "P0",
395
- "EMS": "P1",
396
- "P2": "P1",
397
- "MS": "EMS",
398
- "E": "EMS",
399
- "C": "P2",
400
- "P3": "P2",
401
- "D": "P3",
402
- "P4": "P3",
403
- "Z2": "P4",
404
- "Z3": "P4",
405
- }
406
-
407
- def split_line(line):
408
- return (
409
- line.split()[0],
410
- eval(line.split()[1]),
411
- eval(line.split()[2]),
412
- eval(line.split()[3]),
413
- eval(line.split()[4]),
439
+ if len(time_sequence) > 0:
440
+ t = time_sequence.pop(0)
441
+ i += 1
442
+ if not name:
443
+ tmp_name = Path(fname).stem
444
+ if name == "":
445
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
446
+ name = tmp_name
447
+ return lineageTree(successor=successor, time=time, pos=pos, name=name)
448
+
449
+
450
+ def read_from_txt_for_celegans(
451
+ file: str, name: None | str = None
452
+ ) -> lineageTree:
453
+ """
454
+ Read a C. elegans lineage tree
455
+
456
+ Parameters
457
+ ----------
458
+ file : str
459
+ Path to the file to read
460
+ name : None or str, optional
461
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
462
+ will be the name attribute, otherwise the name will be the stem of the file path.
463
+
464
+ Returns
465
+ -------
466
+ lineageTree
467
+ lineage tree
468
+ """
469
+ with open(file) as f:
470
+ raw = f.readlines()[1:]
471
+ f.close()
472
+ _labels = {}
473
+ time_nodes = {}
474
+ pos = {}
475
+ time = {}
476
+ successor = {}
477
+
478
+ for unique_id, line in enumerate(raw):
479
+ t = int(line.split("\t")[0])
480
+ _labels[unique_id] = line.split("\t")[1]
481
+ position = np.array(line.split("\t")[2:5], dtype=float)
482
+ time_nodes.setdefault(t, set()).add(unique_id)
483
+ pos[unique_id] = position
484
+ time[unique_id] = t
485
+
486
+ t_b = min(time_nodes)
487
+
488
+ for t, cells in time_nodes.items():
489
+ if t != t_b:
490
+ prev_cells = time_nodes[t - 1]
491
+ name_to_id = {_labels[c]: c for c in prev_cells}
492
+ for c in cells:
493
+ if _labels[c] in name_to_id:
494
+ p = name_to_id[_labels[c]]
495
+ elif _labels[c][:-1] in name_to_id:
496
+ p = name_to_id[_labels[c][:-1]]
497
+ elif IMPLICIT_L_T.get(_labels[c]) in name_to_id:
498
+ p = name_to_id[IMPLICIT_L_T.get(_labels[c])]
499
+ else:
500
+ p = None
501
+ successor.setdefault(p, []).append(c)
502
+ if not name:
503
+ tmp_name = Path(file).stem
504
+ if name == "":
505
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
506
+ name = tmp_name
507
+ properties = {"_labels": _labels}
508
+ return lineageTree(
509
+ successor=successor, time=time, pos=pos, name=name, **properties
510
+ )
511
+
512
+
513
+ def read_from_txt_for_celegans_CAO(
514
+ file: str,
515
+ reorder: bool = False,
516
+ raw_size: np.ndarray | None = None,
517
+ shape: float | None = None,
518
+ name: str | None = None,
519
+ ) -> lineageTree:
520
+ """
521
+ Read a C. elegans lineage tree from Cao et al.
522
+
523
+ Parameters
524
+ ----------
525
+ file : str
526
+ Path to the file to read
527
+ name : None or str, optional
528
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
529
+ will be the name attribute, otherwise the name will be the stem of the file path.
530
+
531
+ Returns
532
+ -------
533
+ lineageTree
534
+ lineage tree
535
+ """
536
+
537
+ def split_line(line):
538
+ return (
539
+ line.split()[0],
540
+ eval(line.split()[1]),
541
+ eval(line.split()[2]),
542
+ eval(line.split()[3]),
543
+ eval(line.split()[4]),
544
+ )
545
+
546
+ with open(file) as f:
547
+ raw = f.readlines()[1:]
548
+ f.close()
549
+ label = {}
550
+ time_nodes = {}
551
+ pos = {}
552
+ successor = {}
553
+ time = {}
554
+
555
+ unique_id = 0
556
+ for unique_id, (label, t, z, x, y) in enumerate(map(split_line, raw)):
557
+ label[unique_id] = label
558
+ position = np.array([x, y, z], dtype=np.float)
559
+ time_nodes.setdefault(t, set()).add(unique_id)
560
+ if reorder:
561
+
562
+ def flip(x):
563
+ return np.array([x[0], x[1], raw_size[2] - x[2]])
564
+
565
+ def adjust(x):
566
+ return (shape / raw_size * flip(x))[[1, 0, 2]]
567
+
568
+ pos[unique_id] = adjust(position)
569
+ else:
570
+ pos[unique_id] = position
571
+ time[unique_id] = t
572
+
573
+ t_b = min(time_nodes)
574
+
575
+ for t, cells in time_nodes.items():
576
+ if t != t_b:
577
+ prev_cells = time_nodes[t - 1]
578
+ name_to_id = {label[c]: c for c in prev_cells}
579
+ for c in cells:
580
+ if label[c] in name_to_id:
581
+ p = name_to_id[label[c]]
582
+ elif label[c][:-1] in name_to_id:
583
+ p = name_to_id[label[c][:-1]]
584
+ elif IMPLICIT_L_T.get(label[c]) in name_to_id:
585
+ p = name_to_id[IMPLICIT_L_T.get(label[c])]
586
+ else:
587
+ warn(
588
+ f"error, cell {label[c]} has no predecessors",
589
+ stacklevel=2,
590
+ )
591
+ p = None
592
+ successor.setdefault(p, []).append(c)
593
+ if not name:
594
+ tmp_name = Path(file).stem
595
+ if name == "":
596
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
597
+ name = tmp_name
598
+ return lineageTree(
599
+ successor=successor, time=time, pos=pos, label=label, name=name
600
+ )
601
+
602
+
603
+ def read_from_txt_for_celegans_BAO(
604
+ path: str, name: None | str = None
605
+ ) -> lineageTree:
606
+ """Read a C. elegans Bao file from http://digital-development.org
607
+
608
+ Parameters
609
+ ----------
610
+ file : str
611
+ Path to the file to read
612
+ name : str, optional
613
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
614
+ will be the name attribute, otherwise the name will be the stem of the file path.
615
+
616
+ Returns
617
+ -------
618
+ lineageTree
619
+ lineage tree
620
+ """
621
+ cell_times = {}
622
+ properties = {}
623
+ properties["expression"] = {}
624
+ properties["_labels"] = {}
625
+ with open(path) as f:
626
+ for line in f:
627
+ if "cell_name" not in line:
628
+ cell_times[line.split("\t")[0]] = [
629
+ eval(val) for val in line.split("\t")[-1].split(",")
630
+ ]
631
+ unique_id = 0
632
+ to_link = {}
633
+ successor = {}
634
+ for c, lc in cell_times.items():
635
+ ids = list(range(unique_id, unique_id + len(lc)))
636
+ successor.update({ids[i]: [ids[i + 1]] for i in range(len(ids) - 1)})
637
+ properties["expression"].update(dict(zip(ids, lc, strict=True)))
638
+ properties["_labels"].update({id_: c for id_ in ids})
639
+ to_link[c] = (unique_id, unique_id + len(lc) - 1)
640
+ unique_id += len(lc)
641
+
642
+ for c_name, c_id in to_link.items():
643
+ if c_name[:-1] in to_link:
644
+ successor.setdefault(to_link[c_name[:-1]][1], []).append(c_id[0])
645
+ elif c_name in IMPLICIT_L_T and IMPLICIT_L_T[c_name] in to_link:
646
+ successor.setdefault(to_link[IMPLICIT_L_T[c_name]][1], []).append(
647
+ c_id[0]
414
648
  )
415
-
416
- with open(file) as f:
417
- raw = f.readlines()[1:]
418
- f.close()
419
- self.name = {}
420
-
421
- unique_id = 0
422
- for name, t, z, x, y in map(split_line, raw):
423
- self.name[unique_id] = name
424
- position = np.array([x, y, z], dtype=np.float)
425
- self.time_nodes.setdefault(t, set()).add(unique_id)
426
- self.nodes.add(unique_id)
427
- if reorder:
428
-
429
- def flip(x):
430
- return np.array([x[0], x[1], raw_size[2] - x[2]])
431
-
432
- def adjust(x):
433
- return (shape / raw_size * flip(x))[[1, 0, 2]]
434
-
435
- self.pos[unique_id] = adjust(position)
436
- else:
437
- self.pos[unique_id] = position
438
- self.time[unique_id] = t
439
- unique_id += 1
440
-
441
- self.t_b = min(self.time_nodes)
442
- self.t_e = max(self.time_nodes)
443
-
444
- for t, cells in self.time_nodes.items():
445
- if t != self.t_b:
446
- prev_cells = self.time_nodes[t - 1]
447
- name_to_id = {self.name[c]: c for c in prev_cells}
448
- for c in cells:
449
- if self.name[c] in name_to_id:
450
- p = name_to_id[self.name[c]]
451
- elif self.name[c][:-1] in name_to_id:
452
- p = name_to_id[self.name[c][:-1]]
453
- elif implicit_l_t.get(self.name[c]) in name_to_id:
454
- p = name_to_id[implicit_l_t.get(self.name[c])]
455
- else:
456
- print(
457
- "error, cell %s has no predecessors" % self.name[c]
458
- )
459
- p = None
460
- self.predecessor.setdefault(c, []).append(p)
461
- self.successor.setdefault(p, []).append(c)
462
- self.time_edges.setdefault(t - 1, set()).add((p, c))
463
- self.max_id = unique_id
464
-
465
- def read_tgmm_xml(
466
- self, file_format: str, tb: int, te: int, z_mult: float = 1.0
467
- ):
468
- """Reads a lineage tree from TGMM xml output.
469
-
470
- Args:
471
- file_format (str): path to the xmls location.
472
- it should be written as follow:
473
- path/to/xml/standard_name_t{t:06d}.xml where (as an example)
474
- {t:06d} means a series of 6 digits representing the time and
475
- if the time values is smaller that 6 digits, the missing
476
- digits are filed with 0s
477
- tb (int): first time point to read
478
- te (int): last time point to read
479
- z_mult (float): aspect ratio
480
- """
481
- self.time_nodes = {}
482
- self.time_edges = {}
483
- unique_id = 0
484
- self.nodes = set()
485
- self.successor = {}
486
- self.predecessor = {}
487
- self.pos = {}
488
- self.time_id = {}
489
- self.time = {}
490
- self.mother_not_found = []
491
- self.ind_cells = {}
492
- self.svIdx = {}
493
- self.lin = {}
494
- self.C_lin = {}
495
- self.coeffs = {}
496
- self.intensity = {}
497
- self.W = {}
498
- for t in range(tb, te + 1):
499
- print(t, end=" ")
500
- if t % 10 == 0:
501
- print()
502
- tree = ET.parse(file_format.format(t=t))
503
- root = tree.getroot()
504
- self.time_nodes[t] = set()
505
- self.time_edges[t] = set()
506
- for it in root:
649
+ if not name:
650
+ tmp_name = Path(path).stem
651
+ if name == "":
652
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
653
+ name = tmp_name
654
+ return lineageTree(
655
+ successor=successor, starting_time=0, name=name, **properties
656
+ )
657
+
658
+
659
+ def read_from_tgmm_xml(
660
+ file_format: str,
661
+ tb: int,
662
+ te: int,
663
+ z_mult: float = 1.0,
664
+ name: None | str = None,
665
+ ) -> lineageTree:
666
+ """Reads a lineage tree from TGMM xml output.
667
+
668
+ Parameters
669
+ ----------
670
+ file_format : str
671
+ path to the xmls location.
672
+ it should be written as follow:
673
+ path/to/xml/standard_name_t{t:06d}.xml where (as an example)
674
+ {t:06d} means a series of 6 digits representing the time and
675
+ if the time values is smaller that 6 digits, the missing
676
+ digits are filed with 0s
677
+ tb : int
678
+ first time point to read
679
+ te : int
680
+ last time point to read
681
+ z_mult : float, default=1.0
682
+ aspect ratio
683
+ name : str, optional
684
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
685
+ will be the name attribute, otherwise the name will be the stem of the file path.
686
+
687
+ Returns
688
+ -------
689
+ lineageTree
690
+ lineage tree
691
+ """
692
+ unique_id = 0
693
+ successor = {}
694
+ pos = {}
695
+ time_id = {}
696
+ time = {}
697
+ properties = {}
698
+ properties["svIdx"] = {}
699
+ properties["lin"] = {}
700
+ properties["C_lin"] = {}
701
+ properties["coeffs"] = {}
702
+ properties["intensity"] = {}
703
+ W = {}
704
+ for t in range(tb, te + 1):
705
+ tree = ET.parse(file_format.format(t=t))
706
+ root = tree.getroot()
707
+ for unique_id, it in enumerate(root):
708
+ if "-1.#IND" not in it.attrib["m"] and "nan" not in it.attrib["m"]:
709
+ M_id, pos, cell_id, svIdx, lin_id = (
710
+ int(it.attrib["parent"]),
711
+ [float(v) for v in it.attrib["m"].split(" ") if v != ""],
712
+ int(it.attrib["id"]),
713
+ [int(v) for v in it.attrib["svIdx"].split(" ") if v != ""],
714
+ int(it.attrib["lineage"]),
715
+ )
507
716
  if (
508
- "-1.#IND" not in it.attrib["m"]
509
- and "nan" not in it.attrib["m"]
717
+ "alpha" in it.attrib
718
+ and "W" in it.attrib
719
+ and "nu" in it.attrib
720
+ and "alphaPrior" in it.attrib
510
721
  ):
511
- M_id, pos, cell_id, svIdx, lin_id = (
512
- int(it.attrib["parent"]),
722
+ alpha, W, nu, alphaPrior = (
723
+ float(it.attrib["alpha"]),
513
724
  [
514
725
  float(v)
515
- for v in it.attrib["m"].split(" ")
726
+ for v in it.attrib["W"].split(" ")
516
727
  if v != ""
517
728
  ],
518
- int(it.attrib["id"]),
519
- [
520
- int(v)
521
- for v in it.attrib["svIdx"].split(" ")
522
- if v != ""
523
- ],
524
- int(it.attrib["lineage"]),
729
+ float(it.attrib["nu"]),
730
+ float(it.attrib["alphaPrior"]),
525
731
  )
526
- try:
527
- alpha, W, nu, alphaPrior = (
528
- float(it.attrib["alpha"]),
529
- [
530
- float(v)
531
- for v in it.attrib["W"].split(" ")
532
- if v != ""
533
- ],
534
- float(it.attrib["nu"]),
535
- float(it.attrib["alphaPrior"]),
536
- )
537
- pos = np.array(pos)
538
- C = unique_id
539
- pos[-1] = pos[-1] * z_mult
540
- if (t - 1, M_id) in self.time_id:
541
- M = self.time_id[(t - 1, M_id)]
542
- self.successor.setdefault(M, []).append(C)
543
- self.predecessor.setdefault(C, []).append(M)
544
- self.time_edges[t].add((M, C))
545
- else:
546
- if M_id != -1:
547
- self.mother_not_found.append(C)
548
- self.pos[C] = pos
549
- self.nodes.add(C)
550
- self.time_nodes[t].add(C)
551
- self.time_id[(t, cell_id)] = C
552
- self.time[C] = t
553
- self.svIdx[C] = svIdx
554
- self.lin.setdefault(lin_id, []).append(C)
555
- self.C_lin[C] = lin_id
556
- self.intensity[C] = max(alpha - alphaPrior, 0)
557
- tmp = list(np.array(W) * nu)
558
- self.W[C] = np.array(W).reshape(3, 3)
559
- self.coeffs[C] = (
560
- tmp[:3] + tmp[4:6] + tmp[8:9] + list(pos)
561
- )
562
- unique_id += 1
563
- except Exception:
564
- pass
565
- else:
566
- if t in self.ind_cells:
567
- self.ind_cells[t] += 1
568
- else:
569
- self.ind_cells[t] = 1
570
- self.max_id = unique_id - 1
571
-
572
- def read_from_mastodon(self, path: str, name: str):
573
- """
574
- TODO: write doc
575
- """
576
- from mastodon_reader import MastodonReader
577
-
578
- mr = MastodonReader(path)
579
- spots, links = mr.read_tables()
580
-
581
- self.node_name = {}
582
-
583
- for c in spots.iloc:
584
- unique_id = c.name
585
- x, y, z = c.x, c.y, c.z
586
- t = c.t
587
- n = c[name] if name is not None else ""
588
- self.time_nodes.setdefault(t, set()).add(unique_id)
589
- self.nodes.add(unique_id)
590
- self.time[unique_id] = t
591
- self.node_name[unique_id] = n
592
- self.pos[unique_id] = np.array([x, y, z])
593
-
594
- for e in links.iloc:
595
- source = e.source_idx
596
- target = e.target_idx
597
- self.predecessor.setdefault(target, []).append(source)
598
- self.successor.setdefault(source, []).append(target)
599
- self.time_edges.setdefault(self.time[source], set()).add(
600
- (source, target)
601
- )
602
- self.t_b = min(self.time_nodes.keys())
603
- self.t_e = max(self.time_nodes.keys())
604
-
605
- def read_from_mastodon_csv(self, path: str):
606
- """
607
- TODO: Write doc
608
- """
609
- spots = []
610
- links = []
611
- self.node_name = {}
612
-
613
- with open(path[0], encoding="utf-8", errors="ignore") as file:
614
- csvreader = csv.reader(file)
615
- for row in csvreader:
616
- spots.append(row)
617
- spots = spots[3:]
618
-
619
- with open(path[1], encoding="utf-8", errors="ignore") as file:
620
- csvreader = csv.reader(file)
621
- for row in csvreader:
622
- links.append(row)
623
- links = links[3:]
624
-
625
- for spot in spots:
626
- unique_id = int(spot[1])
627
- x, y, z = spot[5:8]
628
- t = int(spot[4])
629
- self.time_nodes.setdefault(t, set()).add(unique_id)
630
- self.nodes.add(unique_id)
631
- self.time[unique_id] = t
632
- self.node_name[unique_id] = spot[1]
633
- self.pos[unique_id] = np.array([x, y, z], dtype=float)
634
-
635
- for link in links:
636
- source = int(float(link[4]))
637
- target = int(float(link[5]))
638
- self.predecessor.setdefault(target, []).append(source)
639
- self.successor.setdefault(source, []).append(target)
640
- self.time_edges.setdefault(self.time[source], set()).add(
641
- (source, target)
732
+ pos = np.array(pos)
733
+ C = unique_id
734
+ pos[-1] = pos[-1] * z_mult
735
+ if (t - 1, M_id) in time_id:
736
+ M = time_id[(t - 1, M_id)]
737
+ successor.setdefault(M, []).append(C)
738
+ pos[C] = pos
739
+ time_id[(t, cell_id)] = C
740
+ time[C] = t
741
+ properties["svIdx"][C] = svIdx
742
+ properties["lin"].setdefault(lin_id, []).append(C)
743
+ properties["C_lin"][C] = lin_id
744
+ properties["intensity"][C] = max(alpha - alphaPrior, 0)
745
+ tmp = list(np.array(W) * nu)
746
+ W[C] = np.array(W).reshape(3, 3)
747
+ properties["coeffs"][C] = (
748
+ tmp[:3] + tmp[4:6] + tmp[8:9] + list(pos)
749
+ )
750
+ if not name:
751
+ tmp_name = Path(file_format).stem
752
+ if name == "":
753
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
754
+ name = tmp_name
755
+ return lineageTree(
756
+ successor=successor, time=time, pos=pos, name=name, **properties
757
+ )
758
+
759
+
760
+ def read_from_mastodon(
761
+ path: str, tag_set: int | None = None, name: None | str = None
762
+ ) -> lineageTree:
763
+ """Read a maston lineage tree.
764
+
765
+ Parameters
766
+ ----------
767
+ path : str
768
+ path to the mastodon file
769
+ tag_set : int, optional
770
+ The tag set that will be used to label.
771
+ name : str, optional
772
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
773
+ will be the name attribute, otherwise the name will be the stem of the file path.
774
+
775
+ Returns
776
+ -------
777
+ lineageTree
778
+ lineage tree
779
+ """
780
+ from mastodon_reader import MastodonReader
781
+
782
+ mr = MastodonReader(path)
783
+ spots, links = mr.read_tables()
784
+
785
+ label = {}
786
+ time = {}
787
+ pos = {}
788
+ successor = {}
789
+
790
+ for c in spots.iloc:
791
+ unique_id = c.name
792
+ x, y, z = c.x, c.y, c.z
793
+ t = c.t
794
+ time[unique_id] = t
795
+ pos[unique_id] = np.array([x, y, z])
796
+
797
+ for e in links.iloc:
798
+ source = e.source_idx
799
+ target = e.target_idx
800
+ successor.setdefault(source, []).append(target)
801
+ if isinstance(tag_set, int):
802
+ tags = mr.read_tags(spots, links)[tag_set]
803
+ for tag in tags["tags"]:
804
+ label[tag["id"]] = tag["label"]
805
+
806
+ if not name:
807
+ tmp_name = Path(path).stem
808
+ if name == "":
809
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
810
+ name = tmp_name
811
+ return lineageTree(
812
+ successor=successor, time=time, pos=pos, label=label, name=name
813
+ )
814
+
815
+
816
+ def read_from_mastodon_csv(
817
+ paths: list[str], name: None | str = None
818
+ ) -> lineageTree:
819
+ """Read a lineage tree from a mastodon csv.
820
+
821
+ Parameters
822
+ ----------
823
+ paths : list[str]
824
+ list of paths to the csv files
825
+ name : None or str, optional
826
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
827
+ will be the name attribute, otherwise the name will be the stem of the file path.
828
+
829
+ Returns
830
+ -------
831
+ lineageTree
832
+ lineage tree
833
+ """
834
+ spots = []
835
+ links = []
836
+ label = {}
837
+ time = {}
838
+ pos = {}
839
+ successor = {}
840
+
841
+ with open(paths[0], encoding="utf-8", errors="ignore") as file:
842
+ csvreader = csv.reader(file)
843
+ for row in csvreader:
844
+ spots.append(row)
845
+ spots = spots[3:]
846
+
847
+ with open(paths[1], encoding="utf-8", errors="ignore") as file:
848
+ csvreader = csv.reader(file)
849
+ for row in csvreader:
850
+ links.append(row)
851
+ links = links[3:]
852
+
853
+ for spot in spots:
854
+ unique_id = int(spot[1])
855
+ x, y, z = spot[5:8]
856
+ t = int(spot[4])
857
+ time[unique_id] = t
858
+ label[unique_id] = spot[1]
859
+ pos[unique_id] = np.array([x, y, z], dtype=float)
860
+
861
+ for link in links:
862
+ source = int(float(link[4]))
863
+ target = int(float(link[5]))
864
+ successor.setdefault(source, []).append(target)
865
+ if not name:
866
+ tmp_name = Path(paths[0]).stem
867
+ if name == "":
868
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
869
+ name = tmp_name
870
+
871
+ return lineageTree(
872
+ successor=successor, time=time, pos=pos, label=label, name=name
873
+ )
874
+
875
+
876
+ def read_from_mamut_xml(
877
+ path: str, xml_attributes: list[str] | None = None, name: None | str = None
878
+ ) -> lineageTree:
879
+ """Read a lineage tree from a MaMuT xml.
880
+
881
+ Parameters
882
+ ----------
883
+ path : str
884
+ path to the MaMut xml
885
+ name : None or str, optional
886
+ The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
887
+ will be the name attribute, otherwise the name will be the stem of the file path.
888
+
889
+ Returns
890
+ -------
891
+ lineageTree
892
+ lineage tree
893
+ """
894
+ tree = ET.parse(path)
895
+ for elem in tree.getroot():
896
+ if elem.tag == "Model":
897
+ Model = elem
898
+ FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
899
+ xml_attributes = xml_attributes or []
900
+
901
+ properties = {}
902
+ for attr in xml_attributes:
903
+ properties[attr] = {}
904
+ nodes = set()
905
+ pos = {}
906
+ time = {}
907
+ properties["label"] = {}
908
+
909
+ for frame in AllSpots:
910
+ t = int(frame.attrib["frame"])
911
+ for cell in frame:
912
+ cell_id, n, x, y, z = (
913
+ int(cell.attrib["ID"]),
914
+ cell.attrib["name"],
915
+ float(cell.attrib["POSITION_X"]),
916
+ float(cell.attrib["POSITION_Y"]),
917
+ float(cell.attrib["POSITION_Z"]),
642
918
  )
643
- self.t_b = min(self.time_nodes.keys())
644
- self.t_e = max(self.time_nodes.keys())
645
-
646
- def read_from_mamut_xml(self, path: str):
647
- """Read a lineage tree from a MaMuT xml.
648
-
649
- Args:
650
- path (str): path to the MaMut xml
651
- """
652
- tree = ET.parse(path)
653
- for elem in tree.getroot():
654
- if elem.tag == "Model":
655
- Model = elem
656
- FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
657
-
658
- for attr in self.xml_attributes:
659
- self.__dict__[attr] = {}
660
- self.time_nodes = {}
661
- self.time_edges = {}
662
- self.nodes = set()
663
- self.pos = {}
664
- self.time = {}
665
- self.node_name = {}
666
- for frame in AllSpots:
667
- t = int(frame.attrib["frame"])
668
- self.time_nodes[t] = set()
669
- for cell in frame:
670
- cell_id, n, x, y, z = (
671
- int(cell.attrib["ID"]),
672
- cell.attrib["name"],
673
- float(cell.attrib["POSITION_X"]),
674
- float(cell.attrib["POSITION_Y"]),
675
- float(cell.attrib["POSITION_Z"]),
919
+ nodes.add(cell_id)
920
+ pos[cell_id] = np.array([x, y, z])
921
+ time[cell_id] = t
922
+ properties["label"][cell_id] = n
923
+ if "TISSUE_NAME" in cell.attrib:
924
+ if "fate" not in properties:
925
+ properties["fate"] = {}
926
+ properties["fate"][cell_id] = cell.attrib["TISSUE_NAME"]
927
+ if "TISSUE_TYPE" in cell.attrib:
928
+ if "fate_nb" not in properties:
929
+ properties["fate_nb"] = {}
930
+ properties["fate_nb"][cell_id] = eval(
931
+ cell.attrib["TISSUE_TYPE"]
676
932
  )
677
- self.time_nodes[t].add(cell_id)
678
- self.nodes.add(cell_id)
679
- self.pos[cell_id] = np.array([x, y, z])
680
- self.time[cell_id] = t
681
- self.node_name[cell_id] = n
682
- if "TISSUE_NAME" in cell.attrib:
683
- if not hasattr(self, "fate"):
684
- self.fate = {}
685
- self.fate[cell_id] = cell.attrib["TISSUE_NAME"]
686
- if "TISSUE_TYPE" in cell.attrib:
687
- if not hasattr(self, "fate_nb"):
688
- self.fate_nb = {}
689
- self.fate_nb[cell_id] = eval(cell.attrib["TISSUE_TYPE"])
690
- for attr in cell.attrib:
691
- if attr in self.xml_attributes:
692
- self.__dict__[attr][cell_id] = eval(cell.attrib[attr])
693
-
694
- tracks = {}
695
- self.successor = {}
696
- self.predecessor = {}
697
- self.track_name = {}
698
- for track in AllTracks:
699
- if "TRACK_DURATION" in track.attrib:
700
- t_id, _ = (
701
- int(track.attrib["TRACK_ID"]),
702
- float(track.attrib["TRACK_DURATION"]),
703
- )
704
- else:
705
- t_id = int(track.attrib["TRACK_ID"])
706
- t_name = track.attrib["name"]
707
- tracks[t_id] = []
708
- for edge in track:
709
- s, t = (
710
- int(edge.attrib["SPOT_SOURCE_ID"]),
711
- int(edge.attrib["SPOT_TARGET_ID"]),
712
- )
713
- if s in self.nodes and t in self.nodes:
714
- if self.time[s] > self.time[t]:
715
- s, t = t, s
716
- self.successor.setdefault(s, []).append(t)
717
- self.predecessor.setdefault(t, []).append(s)
718
- self.track_name[s] = t_name
719
- self.track_name[t] = t_name
720
- tracks[t_id].append((s, t))
721
- self.t_b = min(self.time_nodes.keys())
722
- self.t_e = max(self.time_nodes.keys())
933
+ for attr in cell.attrib:
934
+ if attr in xml_attributes:
935
+ properties[attr][cell_id] = eval(cell.attrib[attr])
936
+
937
+ properties["tracks"] = {}
938
+ successor = {}
939
+ properties["track_name"] = {}
940
+ for track in AllTracks:
941
+ if "TRACK_DURATION" in track.attrib:
942
+ t_id, _ = (
943
+ int(track.attrib["TRACK_ID"]),
944
+ float(track.attrib["TRACK_DURATION"]),
945
+ )
946
+ else:
947
+ t_id = int(track.attrib["TRACK_ID"])
948
+ t_name = track.attrib["name"]
949
+ properties["tracks"][t_id] = []
950
+ for edge in track:
951
+ s, t = (
952
+ int(edge.attrib["SPOT_SOURCE_ID"]),
953
+ int(edge.attrib["SPOT_TARGET_ID"]),
954
+ )
955
+ if s in nodes and t in nodes:
956
+ if time[s] > time[t]:
957
+ s, t = t, s
958
+ successor.setdefault(s, []).append(t)
959
+ properties["track_name"][s] = t_name
960
+ properties["track_name"][t] = t_name
961
+ properties["tracks"][t_id].append((s, t))
962
+ if not name:
963
+ tmp_name = Path(path).stem
964
+ if name == "":
965
+ warn(f"Name set to default {tmp_name}", stacklevel=2)
966
+ name = tmp_name
967
+
968
+ return lineageTree(
969
+ successor=successor,
970
+ time=time,
971
+ pos=pos,
972
+ name=name,
973
+ **properties,
974
+ )