LineageTree 1.7.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LineageTree/__init__.py +27 -2
- LineageTree/legacy/export_csv.py +70 -0
- LineageTree/legacy/to_lineajea.py +30 -0
- LineageTree/legacy/to_motile.py +36 -0
- LineageTree/lineageTree.py +2294 -1618
- LineageTree/lineageTreeManager.py +759 -55
- LineageTree/loaders.py +947 -695
- LineageTree/test/test_lineageTree.py +634 -0
- LineageTree/test/test_uted.py +233 -0
- LineageTree/tree_approximation.py +488 -0
- LineageTree/utils.py +106 -108
- {LineageTree-1.7.0.dist-info → lineagetree-2.0.1.dist-info}/METADATA +31 -34
- lineagetree-2.0.1.dist-info/RECORD +16 -0
- {LineageTree-1.7.0.dist-info → lineagetree-2.0.1.dist-info}/WHEEL +1 -1
- LineageTree/tree_styles.py +0 -322
- LineageTree-1.7.0.dist-info/RECORD +0 -11
- {LineageTree-1.7.0.dist-info → lineagetree-2.0.1.dist-info/licenses}/LICENSE +0 -0
- {LineageTree-1.7.0.dist-info → lineagetree-2.0.1.dist-info}/top_level.txt +0 -0
LineageTree/loaders.py
CHANGED
@@ -1,722 +1,974 @@
|
|
1
1
|
import csv
|
2
2
|
import os
|
3
3
|
import pickle as pkl
|
4
|
+
import struct
|
4
5
|
import xml.etree.ElementTree as ET
|
6
|
+
from pathlib import Path
|
7
|
+
from warnings import warn
|
5
8
|
|
6
9
|
import numpy as np
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
11
|
+
from .lineageTree import lineageTree
|
12
|
+
|
13
|
+
IMPLICIT_L_T = {
|
14
|
+
"AB": "P0",
|
15
|
+
"P1": "P0",
|
16
|
+
"EMS": "P1",
|
17
|
+
"P2": "P1",
|
18
|
+
"MS": "EMS",
|
19
|
+
"E": "EMS",
|
20
|
+
"C": "P2",
|
21
|
+
"P3": "P2",
|
22
|
+
"D": "P3",
|
23
|
+
"P4": "P3",
|
24
|
+
"Z2": "P4",
|
25
|
+
"Z3": "P4",
|
26
|
+
}
|
27
|
+
|
28
|
+
ASTEC_KEYDICTIONARY = {
|
29
|
+
"cell_lineage": [
|
30
|
+
"lineage_tree",
|
31
|
+
"lin_tree",
|
32
|
+
"Lineage tree",
|
33
|
+
"cell_lineage",
|
34
|
+
],
|
35
|
+
"cell_h_min": ["cell_h_min", "h_mins_information"],
|
36
|
+
"cell_volume": [
|
37
|
+
"cell_volume",
|
38
|
+
"volumes_information",
|
39
|
+
"volumes information",
|
40
|
+
"vol",
|
41
|
+
],
|
42
|
+
"cell_surface": ["cell_surface", "cell surface"],
|
43
|
+
"cell_compactness": [
|
44
|
+
"cell_compactness",
|
45
|
+
"Cell Compactness",
|
46
|
+
"compacity",
|
47
|
+
"cell_sphericity",
|
48
|
+
],
|
49
|
+
"cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
|
50
|
+
"cell_labels_in_time": [
|
51
|
+
"cell_labels_in_time",
|
52
|
+
"Cells labels in time",
|
53
|
+
"time_labels",
|
54
|
+
],
|
55
|
+
"cell_barycenter": [
|
56
|
+
"cell_barycenter",
|
57
|
+
"Barycenters",
|
58
|
+
"barycenters",
|
59
|
+
],
|
60
|
+
"cell_fate": ["cell_fate", "Fate"],
|
61
|
+
"cell_fate_2": ["cell_fate_2", "Fate2"],
|
62
|
+
"cell_fate_3": ["cell_fate_3", "Fate3"],
|
63
|
+
"cell_fate_4": ["cell_fate_4", "Fate4"],
|
64
|
+
"all_cells": [
|
65
|
+
"all_cells",
|
66
|
+
"All Cells",
|
67
|
+
"All_Cells",
|
68
|
+
"all cells",
|
69
|
+
"tot_cells",
|
70
|
+
],
|
71
|
+
"cell_principal_values": [
|
72
|
+
"cell_principal_values",
|
73
|
+
"Principal values",
|
74
|
+
],
|
75
|
+
"cell_name": ["cell_name", "Names", "names", "cell_names"],
|
76
|
+
"cell_contact_surface": [
|
77
|
+
"cell_contact_surface",
|
78
|
+
"cell_cell_contact_information",
|
79
|
+
],
|
80
|
+
"cell_history": [
|
81
|
+
"cell_history",
|
82
|
+
"Cells history",
|
83
|
+
"cell_life",
|
84
|
+
"life",
|
85
|
+
],
|
86
|
+
"cell_principal_vectors": [
|
87
|
+
"cell_principal_vectors",
|
88
|
+
"Principal vectors",
|
89
|
+
],
|
90
|
+
"cell_naming_score": ["cell_naming_score", "Scores", "scores"],
|
91
|
+
"problematic_cells": ["problematic_cells"],
|
92
|
+
"unknown_key": ["unknown_key"],
|
93
|
+
}
|
94
|
+
|
95
|
+
|
96
|
+
def read_from_csv(
|
97
|
+
file_path: str,
|
98
|
+
z_mult: float,
|
99
|
+
link: int = 1,
|
100
|
+
delim: str = ",",
|
101
|
+
name: None | str = None,
|
102
|
+
) -> lineageTree:
|
103
|
+
"""Read a lineage tree from a csv file with the following format:
|
104
|
+
id, time, z, y, x, id, pred_id, lin_id
|
105
|
+
|
106
|
+
Parameters
|
107
|
+
----------
|
108
|
+
file_path : str
|
109
|
+
path to the csv file
|
110
|
+
z_mult : float
|
111
|
+
aspect ratio
|
112
|
+
link : int
|
113
|
+
1 if the csv file is ordered by id, 2 if ordered by pred_id
|
114
|
+
delim : str, default=","
|
115
|
+
delimiter used in the csv file
|
116
|
+
name : None or str, optional
|
117
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
118
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
119
|
+
|
120
|
+
Returns
|
121
|
+
-------
|
122
|
+
lineageTree
|
123
|
+
lineage tree
|
124
|
+
"""
|
125
|
+
with open(file_path) as f:
|
126
|
+
lines = f.readlines()
|
127
|
+
f.close()
|
128
|
+
successor = {}
|
129
|
+
pos = {}
|
130
|
+
time = {}
|
131
|
+
lines_to_int = []
|
132
|
+
corres = {}
|
133
|
+
for line in lines:
|
134
|
+
lines_to_int += [[eval(v.strip()) for v in line.split(delim)]]
|
135
|
+
lines_to_int = np.array(lines_to_int)
|
136
|
+
if link == 2:
|
137
|
+
lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 0])]
|
138
|
+
else:
|
139
|
+
lines_to_int = lines_to_int[np.argsort(lines_to_int[:, 1])]
|
140
|
+
for unique_id, line in enumerate(lines_to_int):
|
141
|
+
if link == 1:
|
142
|
+
id_, t, z, y, x, pred, lin_id = line
|
143
|
+
elif link == 2:
|
144
|
+
t, z, y, x, id_, pred, lin_id = line
|
41
145
|
else:
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
146
|
+
id_, t, z, y, x, *_ = line
|
147
|
+
pred = None
|
148
|
+
t = int(t)
|
149
|
+
pos = np.array([x, y, z])
|
150
|
+
C = unique_id
|
151
|
+
corres[id_] = C
|
152
|
+
pos[-1] = pos[-1] * z_mult
|
153
|
+
if pred in corres:
|
154
|
+
M = corres[pred]
|
155
|
+
successor.setdefault(M, []).append(C)
|
156
|
+
pos[C] = pos
|
157
|
+
time[C] = t
|
158
|
+
if not name:
|
159
|
+
tmp_name = Path(file_path).stem
|
160
|
+
if name == "":
|
161
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
162
|
+
name = tmp_name
|
163
|
+
return lineageTree(successor=successor, time=time, pos=pos, name=name)
|
164
|
+
|
165
|
+
|
166
|
+
def _read_from_ASTEC_xml(file_path: str):
|
167
|
+
def _set_dictionary_value(root):
|
168
|
+
if len(root) == 0:
|
169
|
+
if root.text is None:
|
170
|
+
return None
|
48
171
|
else:
|
49
|
-
|
50
|
-
pred = None
|
51
|
-
lin_id = None
|
52
|
-
t = int(t)
|
53
|
-
pos = np.array([x, y, z])
|
54
|
-
C = unique_id
|
55
|
-
corres[id_] = C
|
56
|
-
pos[-1] = pos[-1] * z_mult
|
57
|
-
if pred in corres:
|
58
|
-
M = corres[pred]
|
59
|
-
self.predecessor[C] = [M]
|
60
|
-
self.successor.setdefault(M, []).append(C)
|
61
|
-
self.edges.add((M, C))
|
62
|
-
self.time_edges.setdefault(t, set()).add((M, C))
|
63
|
-
self.lin.setdefault(lin_id, []).append(C)
|
64
|
-
self.C_lin[C] = lin_id
|
65
|
-
self.pos[C] = pos
|
66
|
-
self.nodes.add(C)
|
67
|
-
self.time_nodes.setdefault(t, set()).add(C)
|
68
|
-
# self.time_id[(t, cell_id)] = C
|
69
|
-
self.time[C] = t
|
70
|
-
if not link:
|
71
|
-
self.displacement[C] = np.array([dx, dy, dz * z_mult])
|
72
|
-
unique_id += 1
|
73
|
-
self.max_id = unique_id - 1
|
74
|
-
self.t_b = min(self.time_nodes)
|
75
|
-
self.t_e = max(self.time_nodes)
|
76
|
-
|
77
|
-
def read_from_ASTEC(self, file_path: str, eigen: bool = False):
|
78
|
-
"""
|
79
|
-
Read an `xml` or `pkl` file produced by the ASTEC algorithm.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
file_path (str): path to an output generated by ASTEC
|
83
|
-
eigen (bool): whether or not to read the eigen values, default False
|
84
|
-
"""
|
85
|
-
self._astec_keydictionary = {
|
86
|
-
"cell_lineage": [
|
87
|
-
"lineage_tree",
|
88
|
-
"lin_tree",
|
89
|
-
"Lineage tree",
|
90
|
-
"cell_lineage",
|
91
|
-
],
|
92
|
-
"cell_h_min": ["cell_h_min", "h_mins_information"],
|
93
|
-
"cell_volume": [
|
94
|
-
"cell_volume",
|
95
|
-
"volumes_information",
|
96
|
-
"volumes information",
|
97
|
-
"vol",
|
98
|
-
],
|
99
|
-
"cell_surface": ["cell_surface", "cell surface"],
|
100
|
-
"cell_compactness": [
|
101
|
-
"cell_compactness",
|
102
|
-
"Cell Compactness",
|
103
|
-
"compacity",
|
104
|
-
"cell_sphericity",
|
105
|
-
],
|
106
|
-
"cell_sigma": ["cell_sigma", "sigmas_information", "sigmas"],
|
107
|
-
"cell_labels_in_time": [
|
108
|
-
"cell_labels_in_time",
|
109
|
-
"Cells labels in time",
|
110
|
-
"time_labels",
|
111
|
-
],
|
112
|
-
"cell_barycenter": [
|
113
|
-
"cell_barycenter",
|
114
|
-
"Barycenters",
|
115
|
-
"barycenters",
|
116
|
-
],
|
117
|
-
"cell_fate": ["cell_fate", "Fate"],
|
118
|
-
"cell_fate_2": ["cell_fate_2", "Fate2"],
|
119
|
-
"cell_fate_3": ["cell_fate_3", "Fate3"],
|
120
|
-
"cell_fate_4": ["cell_fate_4", "Fate4"],
|
121
|
-
"all_cells": [
|
122
|
-
"all_cells",
|
123
|
-
"All Cells",
|
124
|
-
"All_Cells",
|
125
|
-
"all cells",
|
126
|
-
"tot_cells",
|
127
|
-
],
|
128
|
-
"cell_principal_values": [
|
129
|
-
"cell_principal_values",
|
130
|
-
"Principal values",
|
131
|
-
],
|
132
|
-
"cell_name": ["cell_name", "Names", "names", "cell_names"],
|
133
|
-
"cell_contact_surface": [
|
134
|
-
"cell_contact_surface",
|
135
|
-
"cell_cell_contact_information",
|
136
|
-
],
|
137
|
-
"cell_history": [
|
138
|
-
"cell_history",
|
139
|
-
"Cells history",
|
140
|
-
"cell_life",
|
141
|
-
"life",
|
142
|
-
],
|
143
|
-
"cell_principal_vectors": [
|
144
|
-
"cell_principal_vectors",
|
145
|
-
"Principal vectors",
|
146
|
-
],
|
147
|
-
"cell_naming_score": ["cell_naming_score", "Scores", "scores"],
|
148
|
-
"problematic_cells": ["problematic_cells"],
|
149
|
-
"unknown_key": ["unknown_key"],
|
150
|
-
}
|
151
|
-
|
152
|
-
if os.path.splitext(file_path)[-1] == ".xml":
|
153
|
-
tmp_data = self._read_from_ASTEC_xml(file_path)
|
172
|
+
return eval(root.text)
|
154
173
|
else:
|
155
|
-
|
174
|
+
dictionary = {}
|
175
|
+
for child in root:
|
176
|
+
key = child.tag
|
177
|
+
if child.tag == "cell":
|
178
|
+
key = int(child.attrib["cell-id"])
|
179
|
+
dictionary[key] = _set_dictionary_value(child)
|
180
|
+
return dictionary
|
156
181
|
|
157
|
-
|
158
|
-
|
182
|
+
tree = ET.parse(file_path)
|
183
|
+
root = tree.getroot()
|
184
|
+
dictionary = {}
|
185
|
+
|
186
|
+
for k in ASTEC_KEYDICTIONARY:
|
187
|
+
if root.tag == k:
|
188
|
+
dictionary[str(root.tag)] = _set_dictionary_value(root)
|
189
|
+
break
|
190
|
+
else:
|
191
|
+
for child in root:
|
192
|
+
value = _set_dictionary_value(child)
|
193
|
+
if value is not None:
|
194
|
+
dictionary[str(child.tag)] = value
|
195
|
+
return dictionary
|
196
|
+
|
197
|
+
|
198
|
+
def _read_from_ASTEC_pkl(file_path: str, eigen: bool = False):
|
199
|
+
with open(file_path, "rb") as f:
|
200
|
+
tmp_data = pkl.load(f, encoding="latin1")
|
201
|
+
f.close()
|
202
|
+
new_ref = {}
|
203
|
+
for k, v in ASTEC_KEYDICTIONARY.items():
|
204
|
+
for key in v:
|
205
|
+
new_ref[key] = k
|
206
|
+
new_dict = {}
|
207
|
+
|
208
|
+
for k, v in tmp_data.items():
|
209
|
+
if k in new_ref:
|
210
|
+
new_dict[new_ref[k]] = v
|
211
|
+
else:
|
212
|
+
new_dict[k] = v
|
213
|
+
return new_dict
|
214
|
+
|
215
|
+
|
216
|
+
def read_from_ASTEC(
|
217
|
+
file_path: str, eigen: bool = False, name: None | str = None
|
218
|
+
) -> lineageTree:
|
219
|
+
"""
|
220
|
+
Read an `xml` or `pkl` file produced by the ASTEC algorithm.
|
221
|
+
|
222
|
+
Parameters
|
223
|
+
----------
|
224
|
+
file_path : str
|
225
|
+
path to an output generated by ASTEC
|
226
|
+
eigen : bool, default=False
|
227
|
+
whether or not to read the eigen values, default False
|
228
|
+
name : None or str, optional
|
229
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
230
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
231
|
+
|
232
|
+
Returns
|
233
|
+
-------
|
234
|
+
lineageTree
|
235
|
+
lineage tree
|
236
|
+
"""
|
237
|
+
|
238
|
+
if os.path.splitext(file_path)[-1] == ".xml":
|
239
|
+
tmp_data = _read_from_ASTEC_xml(file_path)
|
240
|
+
else:
|
241
|
+
tmp_data = _read_from_ASTEC_pkl(file_path, eigen)
|
242
|
+
|
243
|
+
# make sure these are all named liked they are in tmp_data (or change dictionary above)
|
244
|
+
properties = {}
|
245
|
+
if "cell_volume" in tmp_data:
|
246
|
+
properties["volume"] = {}
|
247
|
+
if "cell_fate" in tmp_data:
|
248
|
+
properties["fate"] = {}
|
249
|
+
if "cell_barycenter" in tmp_data:
|
250
|
+
pos = {}
|
251
|
+
if "cell_name" in tmp_data:
|
252
|
+
properties["label"] = {}
|
253
|
+
lT2pkl = {}
|
254
|
+
pkl2lT = {}
|
255
|
+
image_label = {}
|
256
|
+
|
257
|
+
lt = tmp_data["cell_lineage"]
|
258
|
+
|
259
|
+
if "cell_contact_surface" in tmp_data:
|
260
|
+
properties["contact"] = {}
|
261
|
+
do_surf = True
|
262
|
+
surfaces = tmp_data["cell_contact_surface"]
|
263
|
+
else:
|
264
|
+
do_surf = False
|
265
|
+
|
266
|
+
inv = {vi: [c] for c, v in lt.items() for vi in v}
|
267
|
+
nodes = set(lt).union(inv)
|
268
|
+
|
269
|
+
unique_id = 0
|
270
|
+
time = {}
|
271
|
+
|
272
|
+
for unique_id, n in enumerate(nodes):
|
273
|
+
t = n // 10**4
|
274
|
+
image_label[unique_id] = n % 10**4
|
275
|
+
lT2pkl[unique_id] = n
|
276
|
+
pkl2lT[n] = unique_id
|
277
|
+
time[unique_id] = t
|
159
278
|
if "cell_volume" in tmp_data:
|
160
|
-
|
279
|
+
properties["volume"][unique_id] = tmp_data["cell_volume"].get(
|
280
|
+
n, 0.0
|
281
|
+
)
|
161
282
|
if "cell_fate" in tmp_data:
|
162
|
-
|
283
|
+
properties["fate"][unique_id] = tmp_data["cell_fate"].get(n, "")
|
163
284
|
if "cell_barycenter" in tmp_data:
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
if c in surfaces and c in self.pkl2lT:
|
205
|
-
self.contact[self.pkl2lT[c]] = {
|
206
|
-
self.pkl2lT.get(n, -1): s
|
207
|
-
for n, s in surfaces[c].items()
|
208
|
-
if n % 10**4 == 1 or n in self.pkl2lT
|
209
|
-
}
|
210
|
-
|
211
|
-
for n, new_id in self.pkl2lT.items():
|
212
|
-
if n in inv:
|
213
|
-
self.predecessor[new_id] = [self.pkl2lT[ni] for ni in inv[n]]
|
214
|
-
if n in lt:
|
215
|
-
self.successor[new_id] = [
|
216
|
-
self.pkl2lT[ni] for ni in lt[n] if ni in self.pkl2lT
|
217
|
-
]
|
218
|
-
|
219
|
-
for ni in self.successor[new_id]:
|
220
|
-
self.time_edges.setdefault(t - 1, set()).add((new_id, ni))
|
221
|
-
|
222
|
-
self.t_b = min(self.time_nodes)
|
223
|
-
self.t_e = max(self.time_nodes)
|
224
|
-
self.max_id = unique_id
|
225
|
-
|
226
|
-
# do this in the end of the process, skip lineage tree and whatever is stored already
|
227
|
-
discard = {
|
228
|
-
"cell_volume",
|
229
|
-
"cell_fate",
|
230
|
-
"cell_barycenter",
|
231
|
-
"cell_contact_surface",
|
232
|
-
"cell_lineage",
|
233
|
-
"all_cells",
|
234
|
-
"cell_history",
|
235
|
-
"problematic_cells",
|
236
|
-
"cell_labels_in_time",
|
237
|
-
}
|
238
|
-
self.specific_properties = []
|
239
|
-
for prop_name, prop_values in tmp_data.items():
|
240
|
-
if not (prop_name in discard or hasattr(self, prop_name)):
|
241
|
-
if isinstance(prop_values, dict):
|
242
|
-
dictionary = {
|
243
|
-
self.pkl2lT.get(k, -1): v
|
244
|
-
for k, v in prop_values.items()
|
285
|
+
pos[unique_id] = tmp_data["cell_barycenter"].get(n, np.zeros(3))
|
286
|
+
if "cell_name" in tmp_data:
|
287
|
+
properties["label"][unique_id] = tmp_data["cell_name"].get(n, "")
|
288
|
+
|
289
|
+
if do_surf:
|
290
|
+
for c in nodes:
|
291
|
+
if c in surfaces and c in pkl2lT:
|
292
|
+
properties["contact"][pkl2lT[c]] = {
|
293
|
+
pkl2lT.get(n, -1): s
|
294
|
+
for n, s in surfaces[c].items()
|
295
|
+
if n % 10**4 == 1 or n in pkl2lT
|
296
|
+
}
|
297
|
+
|
298
|
+
successor = {}
|
299
|
+
for n, new_id in pkl2lT.items():
|
300
|
+
if n in lt:
|
301
|
+
successor[new_id] = [pkl2lT[ni] for ni in lt[n] if ni in pkl2lT]
|
302
|
+
|
303
|
+
# do this in the end of the process, skip lineage tree and whatever is stored already
|
304
|
+
discard = {
|
305
|
+
"cell_volume", # already stored
|
306
|
+
"cell_fate", # already stored
|
307
|
+
"cell_barycenter", # already stored
|
308
|
+
"cell_contact_surface", # already stored
|
309
|
+
"cell_lineage", # already stored
|
310
|
+
"cell_name", # already stored
|
311
|
+
"all_cells", # not a property
|
312
|
+
"cell_history", # redundant
|
313
|
+
"problematic_cells", # not useful here
|
314
|
+
"cell_labels_in_time", # redundant
|
315
|
+
}
|
316
|
+
for prop_name, prop_values in tmp_data.items():
|
317
|
+
if prop_name not in discard and isinstance(prop_values, dict):
|
318
|
+
dictionary = {pkl2lT.get(k, -1): v for k, v in prop_values.items()}
|
319
|
+
# is it a regular dictionary or a dictionary with dictionaries inside?
|
320
|
+
for key, value in dictionary.items():
|
321
|
+
if isinstance(value, dict):
|
322
|
+
# rename all ids from old to new
|
323
|
+
dictionary[key] = {
|
324
|
+
pkl2lT.get(k, -1): v for k, v in value.items()
|
245
325
|
}
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
326
|
+
properties[prop_name] = dictionary
|
327
|
+
if not name:
|
328
|
+
tmp_name = Path(file_path).stem
|
329
|
+
if name == "":
|
330
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
331
|
+
name = tmp_name
|
332
|
+
return lineageTree(
|
333
|
+
successor=successor, time=time, pos=pos, name=name, **properties
|
334
|
+
)
|
335
|
+
|
336
|
+
|
337
|
+
def read_from_binary(fname: str, name: None | str = None) -> lineageTree:
|
338
|
+
"""
|
339
|
+
Reads a binary lineageTree file name.
|
340
|
+
Format description: see lineageTree.to_binary
|
341
|
+
|
342
|
+
Parameters
|
343
|
+
----------
|
344
|
+
fname : string
|
345
|
+
path to the binary file
|
346
|
+
name : None or str, optional
|
347
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
348
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
349
|
+
|
350
|
+
Returns
|
351
|
+
-------
|
352
|
+
lineageTree
|
353
|
+
lineage tree
|
354
|
+
"""
|
355
|
+
q_size = struct.calcsize("q")
|
356
|
+
H_size = struct.calcsize("H")
|
357
|
+
d_size = struct.calcsize("d")
|
358
|
+
|
359
|
+
with open(fname, "rb") as f:
|
360
|
+
len_tree = struct.unpack("q", f.read(q_size))[0]
|
361
|
+
len_time = struct.unpack("q", f.read(q_size))[0]
|
362
|
+
len_pos = struct.unpack("q", f.read(q_size))[0]
|
363
|
+
number_sequence = list(
|
364
|
+
struct.unpack("q" * len_tree, f.read(q_size * len_tree))
|
365
|
+
)
|
366
|
+
time_sequence = list(
|
367
|
+
struct.unpack("H" * len_time, f.read(H_size * len_time))
|
368
|
+
)
|
369
|
+
pos_sequence = np.array(
|
370
|
+
struct.unpack("d" * len_pos, f.read(d_size * len_pos))
|
371
|
+
)
|
372
|
+
|
373
|
+
f.close()
|
374
|
+
|
375
|
+
successor = {}
|
376
|
+
time = {}
|
377
|
+
pos = {}
|
378
|
+
is_root = {}
|
379
|
+
waiting_list = []
|
380
|
+
i = 0
|
381
|
+
done = False
|
382
|
+
t = 0
|
383
|
+
if max(number_sequence[::2]) == -1:
|
384
|
+
tmp = number_sequence[1::2]
|
385
|
+
if len(tmp) * 3 == len(pos_sequence) == len(time_sequence) * 3:
|
386
|
+
time = dict(list(zip(tmp, time_sequence, strict=True)))
|
387
|
+
pos = dict(
|
388
|
+
list(
|
389
|
+
zip(
|
390
|
+
tmp,
|
391
|
+
np.reshape(pos_sequence, (len_time, 3)),
|
392
|
+
strict=True,
|
393
|
+
)
|
394
|
+
)
|
395
|
+
)
|
396
|
+
is_root = {c: True for c in tmp}
|
397
|
+
done = True
|
398
|
+
while (
|
399
|
+
i < len(number_sequence) and not done
|
400
|
+
): # , c in enumerate(number_sequence[:-1]):
|
401
|
+
c = number_sequence[i]
|
402
|
+
if c == -1:
|
403
|
+
if waiting_list != []:
|
404
|
+
prev_mother = waiting_list.pop()
|
405
|
+
successor[prev_mother].insert(0, number_sequence[i + 1])
|
406
|
+
t = time[prev_mother] + 1
|
276
407
|
else:
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
new_ref[key] = k
|
308
|
-
new_dict = {}
|
309
|
-
|
310
|
-
for k, v in tmp_data.items():
|
311
|
-
if k in new_ref:
|
312
|
-
new_dict[new_ref[k]] = v
|
408
|
+
t = time_sequence.pop(0)
|
409
|
+
|
410
|
+
elif c == -2:
|
411
|
+
successor[waiting_list[-1]] = [number_sequence[i + 1]]
|
412
|
+
is_root[number_sequence[i + 1]] = False
|
413
|
+
pos[waiting_list[-1]] = pos_sequence[:3]
|
414
|
+
pos_sequence = pos_sequence[3:]
|
415
|
+
time[waiting_list[-1]] = t
|
416
|
+
t += 1
|
417
|
+
|
418
|
+
elif number_sequence[i + 1] >= 0:
|
419
|
+
successor[c] = [number_sequence[i + 1]]
|
420
|
+
pos[c] = pos_sequence[:3]
|
421
|
+
pos_sequence = pos_sequence[3:]
|
422
|
+
time[c] = t
|
423
|
+
t += 1
|
424
|
+
|
425
|
+
elif number_sequence[i + 1] == -2:
|
426
|
+
waiting_list += [c]
|
427
|
+
|
428
|
+
elif number_sequence[i + 1] == -1:
|
429
|
+
pos[c] = pos_sequence[:3]
|
430
|
+
pos_sequence = pos_sequence[3:]
|
431
|
+
time[c] = t
|
432
|
+
t += 1
|
433
|
+
i += 1
|
434
|
+
if waiting_list != []:
|
435
|
+
prev_mother = waiting_list.pop()
|
436
|
+
successor[prev_mother].insert(0, number_sequence[i + 1])
|
437
|
+
t = time[prev_mother] + 1
|
313
438
|
else:
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
439
|
+
if len(time_sequence) > 0:
|
440
|
+
t = time_sequence.pop(0)
|
441
|
+
i += 1
|
442
|
+
if not name:
|
443
|
+
tmp_name = Path(fname).stem
|
444
|
+
if name == "":
|
445
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
446
|
+
name = tmp_name
|
447
|
+
return lineageTree(successor=successor, time=time, pos=pos, name=name)
|
448
|
+
|
449
|
+
|
450
|
+
def read_from_txt_for_celegans(
|
451
|
+
file: str, name: None | str = None
|
452
|
+
) -> lineageTree:
|
453
|
+
"""
|
454
|
+
Read a C. elegans lineage tree
|
455
|
+
|
456
|
+
Parameters
|
457
|
+
----------
|
458
|
+
file : str
|
459
|
+
Path to the file to read
|
460
|
+
name : None or str, optional
|
461
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
462
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
463
|
+
|
464
|
+
Returns
|
465
|
+
-------
|
466
|
+
lineageTree
|
467
|
+
lineage tree
|
468
|
+
"""
|
469
|
+
with open(file) as f:
|
470
|
+
raw = f.readlines()[1:]
|
471
|
+
f.close()
|
472
|
+
_labels = {}
|
473
|
+
time_nodes = {}
|
474
|
+
pos = {}
|
475
|
+
time = {}
|
476
|
+
successor = {}
|
477
|
+
|
478
|
+
for unique_id, line in enumerate(raw):
|
479
|
+
t = int(line.split("\t")[0])
|
480
|
+
_labels[unique_id] = line.split("\t")[1]
|
481
|
+
position = np.array(line.split("\t")[2:5], dtype=float)
|
482
|
+
time_nodes.setdefault(t, set()).add(unique_id)
|
483
|
+
pos[unique_id] = position
|
484
|
+
time[unique_id] = t
|
485
|
+
|
486
|
+
t_b = min(time_nodes)
|
487
|
+
|
488
|
+
for t, cells in time_nodes.items():
|
489
|
+
if t != t_b:
|
490
|
+
prev_cells = time_nodes[t - 1]
|
491
|
+
name_to_id = {_labels[c]: c for c in prev_cells}
|
492
|
+
for c in cells:
|
493
|
+
if _labels[c] in name_to_id:
|
494
|
+
p = name_to_id[_labels[c]]
|
495
|
+
elif _labels[c][:-1] in name_to_id:
|
496
|
+
p = name_to_id[_labels[c][:-1]]
|
497
|
+
elif IMPLICIT_L_T.get(_labels[c]) in name_to_id:
|
498
|
+
p = name_to_id[IMPLICIT_L_T.get(_labels[c])]
|
499
|
+
else:
|
500
|
+
p = None
|
501
|
+
successor.setdefault(p, []).append(c)
|
502
|
+
if not name:
|
503
|
+
tmp_name = Path(file).stem
|
504
|
+
if name == "":
|
505
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
506
|
+
name = tmp_name
|
507
|
+
properties = {"_labels": _labels}
|
508
|
+
return lineageTree(
|
509
|
+
successor=successor, time=time, pos=pos, name=name, **properties
|
510
|
+
)
|
511
|
+
|
512
|
+
|
513
|
+
def read_from_txt_for_celegans_CAO(
|
514
|
+
file: str,
|
515
|
+
reorder: bool = False,
|
516
|
+
raw_size: np.ndarray | None = None,
|
517
|
+
shape: float | None = None,
|
518
|
+
name: str | None = None,
|
519
|
+
) -> lineageTree:
|
520
|
+
"""
|
521
|
+
Read a C. elegans lineage tree from Cao et al.
|
522
|
+
|
523
|
+
Parameters
|
524
|
+
----------
|
525
|
+
file : str
|
526
|
+
Path to the file to read
|
527
|
+
name : None or str, optional
|
528
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
529
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
530
|
+
|
531
|
+
Returns
|
532
|
+
-------
|
533
|
+
lineageTree
|
534
|
+
lineage tree
|
535
|
+
"""
|
536
|
+
|
537
|
+
def split_line(line):
|
538
|
+
return (
|
539
|
+
line.split()[0],
|
540
|
+
eval(line.split()[1]),
|
541
|
+
eval(line.split()[2]),
|
542
|
+
eval(line.split()[3]),
|
543
|
+
eval(line.split()[4]),
|
544
|
+
)
|
545
|
+
|
546
|
+
with open(file) as f:
|
547
|
+
raw = f.readlines()[1:]
|
548
|
+
f.close()
|
549
|
+
label = {}
|
550
|
+
time_nodes = {}
|
551
|
+
pos = {}
|
552
|
+
successor = {}
|
553
|
+
time = {}
|
554
|
+
|
555
|
+
unique_id = 0
|
556
|
+
for unique_id, (label, t, z, x, y) in enumerate(map(split_line, raw)):
|
557
|
+
label[unique_id] = label
|
558
|
+
position = np.array([x, y, z], dtype=np.float)
|
559
|
+
time_nodes.setdefault(t, set()).add(unique_id)
|
560
|
+
if reorder:
|
561
|
+
|
562
|
+
def flip(x):
|
563
|
+
return np.array([x[0], x[1], raw_size[2] - x[2]])
|
564
|
+
|
565
|
+
def adjust(x):
|
566
|
+
return (shape / raw_size * flip(x))[[1, 0, 2]]
|
567
|
+
|
568
|
+
pos[unique_id] = adjust(position)
|
569
|
+
else:
|
570
|
+
pos[unique_id] = position
|
571
|
+
time[unique_id] = t
|
572
|
+
|
573
|
+
t_b = min(time_nodes)
|
574
|
+
|
575
|
+
for t, cells in time_nodes.items():
|
576
|
+
if t != t_b:
|
577
|
+
prev_cells = time_nodes[t - 1]
|
578
|
+
name_to_id = {label[c]: c for c in prev_cells}
|
579
|
+
for c in cells:
|
580
|
+
if label[c] in name_to_id:
|
581
|
+
p = name_to_id[label[c]]
|
582
|
+
elif label[c][:-1] in name_to_id:
|
583
|
+
p = name_to_id[label[c][:-1]]
|
584
|
+
elif IMPLICIT_L_T.get(label[c]) in name_to_id:
|
585
|
+
p = name_to_id[IMPLICIT_L_T.get(label[c])]
|
586
|
+
else:
|
587
|
+
warn(
|
588
|
+
f"error, cell {label[c]} has no predecessors",
|
589
|
+
stacklevel=2,
|
590
|
+
)
|
591
|
+
p = None
|
592
|
+
successor.setdefault(p, []).append(c)
|
593
|
+
if not name:
|
594
|
+
tmp_name = Path(file).stem
|
595
|
+
if name == "":
|
596
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
597
|
+
name = tmp_name
|
598
|
+
return lineageTree(
|
599
|
+
successor=successor, time=time, pos=pos, label=label, name=name
|
600
|
+
)
|
601
|
+
|
602
|
+
|
603
|
+
def read_from_txt_for_celegans_BAO(
|
604
|
+
path: str, name: None | str = None
|
605
|
+
) -> lineageTree:
|
606
|
+
"""Read a C. elegans Bao file from http://digital-development.org
|
607
|
+
|
608
|
+
Parameters
|
609
|
+
----------
|
610
|
+
file : str
|
611
|
+
Path to the file to read
|
612
|
+
name : str, optional
|
613
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
614
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
615
|
+
|
616
|
+
Returns
|
617
|
+
-------
|
618
|
+
lineageTree
|
619
|
+
lineage tree
|
620
|
+
"""
|
621
|
+
cell_times = {}
|
622
|
+
properties = {}
|
623
|
+
properties["expression"] = {}
|
624
|
+
properties["_labels"] = {}
|
625
|
+
with open(path) as f:
|
626
|
+
for line in f:
|
627
|
+
if "cell_name" not in line:
|
628
|
+
cell_times[line.split("\t")[0]] = [
|
629
|
+
eval(val) for val in line.split("\t")[-1].split(",")
|
630
|
+
]
|
631
|
+
unique_id = 0
|
632
|
+
to_link = {}
|
633
|
+
successor = {}
|
634
|
+
for c, lc in cell_times.items():
|
635
|
+
ids = list(range(unique_id, unique_id + len(lc)))
|
636
|
+
successor.update({ids[i]: [ids[i + 1]] for i in range(len(ids) - 1)})
|
637
|
+
properties["expression"].update(dict(zip(ids, lc, strict=True)))
|
638
|
+
properties["_labels"].update({id_: c for id_ in ids})
|
639
|
+
to_link[c] = (unique_id, unique_id + len(lc) - 1)
|
640
|
+
unique_id += len(lc)
|
641
|
+
|
642
|
+
for c_name, c_id in to_link.items():
|
643
|
+
if c_name[:-1] in to_link:
|
644
|
+
successor.setdefault(to_link[c_name[:-1]][1], []).append(c_id[0])
|
645
|
+
elif c_name in IMPLICIT_L_T and IMPLICIT_L_T[c_name] in to_link:
|
646
|
+
successor.setdefault(to_link[IMPLICIT_L_T[c_name]][1], []).append(
|
647
|
+
c_id[0]
|
414
648
|
)
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
f
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
self.time_edges = {}
|
483
|
-
unique_id = 0
|
484
|
-
self.nodes = set()
|
485
|
-
self.successor = {}
|
486
|
-
self.predecessor = {}
|
487
|
-
self.pos = {}
|
488
|
-
self.time_id = {}
|
489
|
-
self.time = {}
|
490
|
-
self.mother_not_found = []
|
491
|
-
self.ind_cells = {}
|
492
|
-
self.svIdx = {}
|
493
|
-
self.lin = {}
|
494
|
-
self.C_lin = {}
|
495
|
-
self.coeffs = {}
|
496
|
-
self.intensity = {}
|
497
|
-
self.W = {}
|
498
|
-
for t in range(tb, te + 1):
|
499
|
-
print(t, end=" ")
|
500
|
-
if t % 10 == 0:
|
501
|
-
print()
|
502
|
-
tree = ET.parse(file_format.format(t=t))
|
503
|
-
root = tree.getroot()
|
504
|
-
self.time_nodes[t] = set()
|
505
|
-
self.time_edges[t] = set()
|
506
|
-
for it in root:
|
649
|
+
if not name:
|
650
|
+
tmp_name = Path(path).stem
|
651
|
+
if name == "":
|
652
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
653
|
+
name = tmp_name
|
654
|
+
return lineageTree(
|
655
|
+
successor=successor, starting_time=0, name=name, **properties
|
656
|
+
)
|
657
|
+
|
658
|
+
|
659
|
+
def read_from_tgmm_xml(
|
660
|
+
file_format: str,
|
661
|
+
tb: int,
|
662
|
+
te: int,
|
663
|
+
z_mult: float = 1.0,
|
664
|
+
name: None | str = None,
|
665
|
+
) -> lineageTree:
|
666
|
+
"""Reads a lineage tree from TGMM xml output.
|
667
|
+
|
668
|
+
Parameters
|
669
|
+
----------
|
670
|
+
file_format : str
|
671
|
+
path to the xmls location.
|
672
|
+
it should be written as follow:
|
673
|
+
path/to/xml/standard_name_t{t:06d}.xml where (as an example)
|
674
|
+
{t:06d} means a series of 6 digits representing the time and
|
675
|
+
if the time values is smaller that 6 digits, the missing
|
676
|
+
digits are filed with 0s
|
677
|
+
tb : int
|
678
|
+
first time point to read
|
679
|
+
te : int
|
680
|
+
last time point to read
|
681
|
+
z_mult : float, default=1.0
|
682
|
+
aspect ratio
|
683
|
+
name : str, optional
|
684
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
685
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
686
|
+
|
687
|
+
Returns
|
688
|
+
-------
|
689
|
+
lineageTree
|
690
|
+
lineage tree
|
691
|
+
"""
|
692
|
+
unique_id = 0
|
693
|
+
successor = {}
|
694
|
+
pos = {}
|
695
|
+
time_id = {}
|
696
|
+
time = {}
|
697
|
+
properties = {}
|
698
|
+
properties["svIdx"] = {}
|
699
|
+
properties["lin"] = {}
|
700
|
+
properties["C_lin"] = {}
|
701
|
+
properties["coeffs"] = {}
|
702
|
+
properties["intensity"] = {}
|
703
|
+
W = {}
|
704
|
+
for t in range(tb, te + 1):
|
705
|
+
tree = ET.parse(file_format.format(t=t))
|
706
|
+
root = tree.getroot()
|
707
|
+
for unique_id, it in enumerate(root):
|
708
|
+
if "-1.#IND" not in it.attrib["m"] and "nan" not in it.attrib["m"]:
|
709
|
+
M_id, pos, cell_id, svIdx, lin_id = (
|
710
|
+
int(it.attrib["parent"]),
|
711
|
+
[float(v) for v in it.attrib["m"].split(" ") if v != ""],
|
712
|
+
int(it.attrib["id"]),
|
713
|
+
[int(v) for v in it.attrib["svIdx"].split(" ") if v != ""],
|
714
|
+
int(it.attrib["lineage"]),
|
715
|
+
)
|
507
716
|
if (
|
508
|
-
"
|
509
|
-
and "
|
717
|
+
"alpha" in it.attrib
|
718
|
+
and "W" in it.attrib
|
719
|
+
and "nu" in it.attrib
|
720
|
+
and "alphaPrior" in it.attrib
|
510
721
|
):
|
511
|
-
|
512
|
-
|
722
|
+
alpha, W, nu, alphaPrior = (
|
723
|
+
float(it.attrib["alpha"]),
|
513
724
|
[
|
514
725
|
float(v)
|
515
|
-
for v in it.attrib["
|
726
|
+
for v in it.attrib["W"].split(" ")
|
516
727
|
if v != ""
|
517
728
|
],
|
518
|
-
|
519
|
-
[
|
520
|
-
int(v)
|
521
|
-
for v in it.attrib["svIdx"].split(" ")
|
522
|
-
if v != ""
|
523
|
-
],
|
524
|
-
int(it.attrib["lineage"]),
|
729
|
+
float(it.attrib["nu"]),
|
730
|
+
float(it.attrib["alphaPrior"]),
|
525
731
|
)
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
732
|
+
pos = np.array(pos)
|
733
|
+
C = unique_id
|
734
|
+
pos[-1] = pos[-1] * z_mult
|
735
|
+
if (t - 1, M_id) in time_id:
|
736
|
+
M = time_id[(t - 1, M_id)]
|
737
|
+
successor.setdefault(M, []).append(C)
|
738
|
+
pos[C] = pos
|
739
|
+
time_id[(t, cell_id)] = C
|
740
|
+
time[C] = t
|
741
|
+
properties["svIdx"][C] = svIdx
|
742
|
+
properties["lin"].setdefault(lin_id, []).append(C)
|
743
|
+
properties["C_lin"][C] = lin_id
|
744
|
+
properties["intensity"][C] = max(alpha - alphaPrior, 0)
|
745
|
+
tmp = list(np.array(W) * nu)
|
746
|
+
W[C] = np.array(W).reshape(3, 3)
|
747
|
+
properties["coeffs"][C] = (
|
748
|
+
tmp[:3] + tmp[4:6] + tmp[8:9] + list(pos)
|
749
|
+
)
|
750
|
+
if not name:
|
751
|
+
tmp_name = Path(file_format).stem
|
752
|
+
if name == "":
|
753
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
754
|
+
name = tmp_name
|
755
|
+
return lineageTree(
|
756
|
+
successor=successor, time=time, pos=pos, name=name, **properties
|
757
|
+
)
|
758
|
+
|
759
|
+
|
760
|
+
def read_from_mastodon(
|
761
|
+
path: str, tag_set: int | None = None, name: None | str = None
|
762
|
+
) -> lineageTree:
|
763
|
+
"""Read a maston lineage tree.
|
764
|
+
|
765
|
+
Parameters
|
766
|
+
----------
|
767
|
+
path : str
|
768
|
+
path to the mastodon file
|
769
|
+
tag_set : int, optional
|
770
|
+
The tag set that will be used to label.
|
771
|
+
name : str, optional
|
772
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
773
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
774
|
+
|
775
|
+
Returns
|
776
|
+
-------
|
777
|
+
lineageTree
|
778
|
+
lineage tree
|
779
|
+
"""
|
780
|
+
from mastodon_reader import MastodonReader
|
781
|
+
|
782
|
+
mr = MastodonReader(path)
|
783
|
+
spots, links = mr.read_tables()
|
784
|
+
|
785
|
+
label = {}
|
786
|
+
time = {}
|
787
|
+
pos = {}
|
788
|
+
successor = {}
|
789
|
+
|
790
|
+
for c in spots.iloc:
|
791
|
+
unique_id = c.name
|
792
|
+
x, y, z = c.x, c.y, c.z
|
793
|
+
t = c.t
|
794
|
+
time[unique_id] = t
|
795
|
+
pos[unique_id] = np.array([x, y, z])
|
796
|
+
|
797
|
+
for e in links.iloc:
|
798
|
+
source = e.source_idx
|
799
|
+
target = e.target_idx
|
800
|
+
successor.setdefault(source, []).append(target)
|
801
|
+
if isinstance(tag_set, int):
|
802
|
+
tags = mr.read_tags(spots, links)[tag_set]
|
803
|
+
for tag in tags["tags"]:
|
804
|
+
label[tag["id"]] = tag["label"]
|
805
|
+
|
806
|
+
if not name:
|
807
|
+
tmp_name = Path(path).stem
|
808
|
+
if name == "":
|
809
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
810
|
+
name = tmp_name
|
811
|
+
return lineageTree(
|
812
|
+
successor=successor, time=time, pos=pos, label=label, name=name
|
813
|
+
)
|
814
|
+
|
815
|
+
|
816
|
+
def read_from_mastodon_csv(
|
817
|
+
paths: list[str], name: None | str = None
|
818
|
+
) -> lineageTree:
|
819
|
+
"""Read a lineage tree from a mastodon csv.
|
820
|
+
|
821
|
+
Parameters
|
822
|
+
----------
|
823
|
+
paths : list[str]
|
824
|
+
list of paths to the csv files
|
825
|
+
name : None or str, optional
|
826
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
827
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
828
|
+
|
829
|
+
Returns
|
830
|
+
-------
|
831
|
+
lineageTree
|
832
|
+
lineage tree
|
833
|
+
"""
|
834
|
+
spots = []
|
835
|
+
links = []
|
836
|
+
label = {}
|
837
|
+
time = {}
|
838
|
+
pos = {}
|
839
|
+
successor = {}
|
840
|
+
|
841
|
+
with open(paths[0], encoding="utf-8", errors="ignore") as file:
|
842
|
+
csvreader = csv.reader(file)
|
843
|
+
for row in csvreader:
|
844
|
+
spots.append(row)
|
845
|
+
spots = spots[3:]
|
846
|
+
|
847
|
+
with open(paths[1], encoding="utf-8", errors="ignore") as file:
|
848
|
+
csvreader = csv.reader(file)
|
849
|
+
for row in csvreader:
|
850
|
+
links.append(row)
|
851
|
+
links = links[3:]
|
852
|
+
|
853
|
+
for spot in spots:
|
854
|
+
unique_id = int(spot[1])
|
855
|
+
x, y, z = spot[5:8]
|
856
|
+
t = int(spot[4])
|
857
|
+
time[unique_id] = t
|
858
|
+
label[unique_id] = spot[1]
|
859
|
+
pos[unique_id] = np.array([x, y, z], dtype=float)
|
860
|
+
|
861
|
+
for link in links:
|
862
|
+
source = int(float(link[4]))
|
863
|
+
target = int(float(link[5]))
|
864
|
+
successor.setdefault(source, []).append(target)
|
865
|
+
if not name:
|
866
|
+
tmp_name = Path(paths[0]).stem
|
867
|
+
if name == "":
|
868
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
869
|
+
name = tmp_name
|
870
|
+
|
871
|
+
return lineageTree(
|
872
|
+
successor=successor, time=time, pos=pos, label=label, name=name
|
873
|
+
)
|
874
|
+
|
875
|
+
|
876
|
+
def read_from_mamut_xml(
|
877
|
+
path: str, xml_attributes: list[str] | None = None, name: None | str = None
|
878
|
+
) -> lineageTree:
|
879
|
+
"""Read a lineage tree from a MaMuT xml.
|
880
|
+
|
881
|
+
Parameters
|
882
|
+
----------
|
883
|
+
path : str
|
884
|
+
path to the MaMut xml
|
885
|
+
name : None or str, optional
|
886
|
+
The name attribute of the lineageTree file. If given a non-empty string, the value of the attribute
|
887
|
+
will be the name attribute, otherwise the name will be the stem of the file path.
|
888
|
+
|
889
|
+
Returns
|
890
|
+
-------
|
891
|
+
lineageTree
|
892
|
+
lineage tree
|
893
|
+
"""
|
894
|
+
tree = ET.parse(path)
|
895
|
+
for elem in tree.getroot():
|
896
|
+
if elem.tag == "Model":
|
897
|
+
Model = elem
|
898
|
+
FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
|
899
|
+
xml_attributes = xml_attributes or []
|
900
|
+
|
901
|
+
properties = {}
|
902
|
+
for attr in xml_attributes:
|
903
|
+
properties[attr] = {}
|
904
|
+
nodes = set()
|
905
|
+
pos = {}
|
906
|
+
time = {}
|
907
|
+
properties["label"] = {}
|
908
|
+
|
909
|
+
for frame in AllSpots:
|
910
|
+
t = int(frame.attrib["frame"])
|
911
|
+
for cell in frame:
|
912
|
+
cell_id, n, x, y, z = (
|
913
|
+
int(cell.attrib["ID"]),
|
914
|
+
cell.attrib["name"],
|
915
|
+
float(cell.attrib["POSITION_X"]),
|
916
|
+
float(cell.attrib["POSITION_Y"]),
|
917
|
+
float(cell.attrib["POSITION_Z"]),
|
642
918
|
)
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
FeatureDeclarations, AllSpots, AllTracks, FilteredTracks = list(Model)
|
657
|
-
|
658
|
-
for attr in self.xml_attributes:
|
659
|
-
self.__dict__[attr] = {}
|
660
|
-
self.time_nodes = {}
|
661
|
-
self.time_edges = {}
|
662
|
-
self.nodes = set()
|
663
|
-
self.pos = {}
|
664
|
-
self.time = {}
|
665
|
-
self.node_name = {}
|
666
|
-
for frame in AllSpots:
|
667
|
-
t = int(frame.attrib["frame"])
|
668
|
-
self.time_nodes[t] = set()
|
669
|
-
for cell in frame:
|
670
|
-
cell_id, n, x, y, z = (
|
671
|
-
int(cell.attrib["ID"]),
|
672
|
-
cell.attrib["name"],
|
673
|
-
float(cell.attrib["POSITION_X"]),
|
674
|
-
float(cell.attrib["POSITION_Y"]),
|
675
|
-
float(cell.attrib["POSITION_Z"]),
|
919
|
+
nodes.add(cell_id)
|
920
|
+
pos[cell_id] = np.array([x, y, z])
|
921
|
+
time[cell_id] = t
|
922
|
+
properties["label"][cell_id] = n
|
923
|
+
if "TISSUE_NAME" in cell.attrib:
|
924
|
+
if "fate" not in properties:
|
925
|
+
properties["fate"] = {}
|
926
|
+
properties["fate"][cell_id] = cell.attrib["TISSUE_NAME"]
|
927
|
+
if "TISSUE_TYPE" in cell.attrib:
|
928
|
+
if "fate_nb" not in properties:
|
929
|
+
properties["fate_nb"] = {}
|
930
|
+
properties["fate_nb"][cell_id] = eval(
|
931
|
+
cell.attrib["TISSUE_TYPE"]
|
676
932
|
)
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
if
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
self.track_name[t] = t_name
|
720
|
-
tracks[t_id].append((s, t))
|
721
|
-
self.t_b = min(self.time_nodes.keys())
|
722
|
-
self.t_e = max(self.time_nodes.keys())
|
933
|
+
for attr in cell.attrib:
|
934
|
+
if attr in xml_attributes:
|
935
|
+
properties[attr][cell_id] = eval(cell.attrib[attr])
|
936
|
+
|
937
|
+
properties["tracks"] = {}
|
938
|
+
successor = {}
|
939
|
+
properties["track_name"] = {}
|
940
|
+
for track in AllTracks:
|
941
|
+
if "TRACK_DURATION" in track.attrib:
|
942
|
+
t_id, _ = (
|
943
|
+
int(track.attrib["TRACK_ID"]),
|
944
|
+
float(track.attrib["TRACK_DURATION"]),
|
945
|
+
)
|
946
|
+
else:
|
947
|
+
t_id = int(track.attrib["TRACK_ID"])
|
948
|
+
t_name = track.attrib["name"]
|
949
|
+
properties["tracks"][t_id] = []
|
950
|
+
for edge in track:
|
951
|
+
s, t = (
|
952
|
+
int(edge.attrib["SPOT_SOURCE_ID"]),
|
953
|
+
int(edge.attrib["SPOT_TARGET_ID"]),
|
954
|
+
)
|
955
|
+
if s in nodes and t in nodes:
|
956
|
+
if time[s] > time[t]:
|
957
|
+
s, t = t, s
|
958
|
+
successor.setdefault(s, []).append(t)
|
959
|
+
properties["track_name"][s] = t_name
|
960
|
+
properties["track_name"][t] = t_name
|
961
|
+
properties["tracks"][t_id].append((s, t))
|
962
|
+
if not name:
|
963
|
+
tmp_name = Path(path).stem
|
964
|
+
if name == "":
|
965
|
+
warn(f"Name set to default {tmp_name}", stacklevel=2)
|
966
|
+
name = tmp_name
|
967
|
+
|
968
|
+
return lineageTree(
|
969
|
+
successor=successor,
|
970
|
+
time=time,
|
971
|
+
pos=pos,
|
972
|
+
name=name,
|
973
|
+
**properties,
|
974
|
+
)
|