fucciphase 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,454 @@
1
+ from typing import List, Optional
2
+
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import pandas as pd
6
+ from LineageTree import lineageTree
7
+ from matplotlib import colormaps
8
+ from scipy import signal
9
+
10
+
11
+ def split_track(
12
+ track: pd.DataFrame,
13
+ highest_track_idx: int,
14
+ sg2m_channel: str,
15
+ distance: int = 3,
16
+ background_fluctuation_percentage: float = 0.2,
17
+ track_id_name: str = "TRACK_ID",
18
+ ) -> int:
19
+ """Detect mitosis events and split a single track.
20
+
21
+ Parameters
22
+ ----------
23
+ track: pd.DataFrame
24
+ DataFrame containing a single track
25
+ highest_track_idx: int
26
+ Highest index of all tracks! Split tracks will be appended
27
+ sg2m_channel: str
28
+ Name of the S/G2/M marker
29
+ distance: int
30
+ Minimum distance between peaks
31
+ background_fluctuation_percentage: float
32
+ Fluctuation of background level, used to detect low magenta level
33
+ track_id_name: str
34
+ Name of column with track IDs
35
+
36
+ """
37
+ if track_id_name not in track.columns:
38
+ raise ValueError(f"{track_id_name} column is missing.")
39
+ magenta = track[sg2m_channel]
40
+ # get minima of magenta
41
+ peaks, _ = signal.find_peaks(1.0 / magenta, distance=distance)
42
+ magenta_background = magenta.min()
43
+ # filter peaks
44
+ peaks_to_use = []
45
+ for idx, peak in enumerate(peaks):
46
+ # if magenta intensity is high, continue
47
+ if magenta.iloc[peak] > 1.2 * magenta_background:
48
+ continue
49
+ # check if there was a magenta signal in the meantime
50
+ bg_level = 1 + background_fluctuation_percentage
51
+ if not np.any(
52
+ magenta.iloc[peaks[idx - 1] : peak] > bg_level * magenta_background
53
+ ):
54
+ continue
55
+ peaks_to_use.append(peak)
56
+
57
+ # split tracks
58
+ for idx, peak in enumerate(peaks_to_use):
59
+ next_peak = len(track)
60
+ if len(peaks_to_use) > idx + 1:
61
+ next_peak = peaks_to_use[idx + 1]
62
+ track.loc[track.index[peak:next_peak], track_id_name] = highest_track_idx + 1
63
+ highest_track_idx += 1
64
+
65
+ return highest_track_idx
66
+
67
+
68
+ def split_all_tracks(
69
+ track_df: pd.DataFrame,
70
+ sg2m_channel: str,
71
+ distance: int = 3,
72
+ minimum_track_length: int = 20,
73
+ background_fluctuation_percentage: float = 0.2,
74
+ track_id_name: str = "TRACK_ID",
75
+ ) -> None:
76
+ """Go through all tracks and split them after mitosis.
77
+
78
+ Parameters
79
+ ----------
80
+ track_df: pd.DataFrame
81
+ DataFrame containing multiple tracks, is changed in place
82
+ sg2m_channel: str
83
+ Name of the S/G2/M marker
84
+ distance: int
85
+ Minimum distance between peaks
86
+ minimum_track_length: int
87
+ minimum length required to check if track should be split
88
+ background_fluctuation_percentage: float
89
+ Fluctuation of background level, used to detect low magenta level
90
+ track_id_name: str
91
+ Name of column with track IDs
92
+
93
+ """
94
+ if track_id_name not in track_df.columns:
95
+ raise ValueError(f"{track_id_name} column is missing.")
96
+ highest_track_idx = track_df[track_id_name].max()
97
+ highest_track_idx_counter = highest_track_idx
98
+ # go through all tracks and split if needed
99
+ for track_idx in range(highest_track_idx):
100
+ track = track_df.loc[track_df[track_id_name] == track_idx]
101
+ if len(track) < minimum_track_length:
102
+ continue
103
+ # split single track
104
+ highest_track_idx_counter = split_track(
105
+ track,
106
+ highest_track_idx_counter,
107
+ sg2m_channel,
108
+ distance,
109
+ background_fluctuation_percentage,
110
+ )
111
+ # update all tracks
112
+ track_df.loc[track_df[track_id_name] == track_idx] = track
113
+
114
+
115
+ def compute_motility_parameters(
116
+ track_df: pd.DataFrame,
117
+ centroid_x: str = "POSITION_X",
118
+ centroid_y: str = "POSITION_Y",
119
+ centroid_z: bool = False,
120
+ track_id_name: str = "TRACK_ID",
121
+ ) -> None:
122
+ """Add motility parameters to DataFrame.
123
+
124
+ Parameters
125
+ ----------
126
+ track_df: pd.DataFrame
127
+ DataFrame with tracking data
128
+ centroid_x: str
129
+ Name of column with x-coordinate of centroid
130
+ centroid_y: str
131
+ Name of column with y-coordinate of centroid
132
+ centroid_z: str
133
+ Name of column with z-coordinate of centroid
134
+ track_id_name: str
135
+ Name of column with track IDs
136
+
137
+ """
138
+ track_df["MSD"] = np.nan
139
+ track_df["DISPLACEMENTS"] = np.nan
140
+ indices = track_df[track_id_name].unique()
141
+ for index in indices:
142
+ if index == -1:
143
+ continue
144
+ track = track_df[track_df[track_id_name] == index]
145
+ centroids_x = track[centroid_x].to_numpy()
146
+ centroids_y = track[centroid_y].to_numpy()
147
+ centroids_z = None
148
+ if centroid_z is not False:
149
+ centroids_z = track[centroid_z].to_numpy()
150
+
151
+ displacements = compute_displacements(centroids_x, centroids_y, centroids_z)
152
+ velocities = compute_velocities(centroids_x, centroids_y, centroids_z)
153
+ MSDs = compute_MSD(centroids_x, centroids_y, centroids_z)
154
+ track_df.loc[track_df[track_id_name] == index, "DISPLACEMENTS"] = displacements
155
+ track_df.loc[track_df[track_id_name] == index, "VELOCITIES"] = velocities
156
+ track_df.loc[track_df[track_id_name] == index, "MSD"] = MSDs
157
+
158
+
159
+ def compute_displacements(
160
+ centroids_x: np.ndarray, centroids_y: np.ndarray, centroids_z: Optional[np.ndarray]
161
+ ) -> np.ndarray:
162
+ """Compute displacement w.r.t origin."""
163
+ N = len(centroids_x)
164
+ x0 = centroids_x[0]
165
+ y0 = centroids_y[0]
166
+ z0 = None
167
+ if centroids_z is not None:
168
+ z0 = centroids_z[0]
169
+ r0 = (x0, y0, z0)
170
+ distances = np.zeros(N)
171
+ for idx in range(N):
172
+ x = centroids_x[idx]
173
+ y = centroids_y[idx]
174
+ z = None
175
+ if centroids_z is not None:
176
+ z = centroids_z[idx]
177
+ r = (x, y, z)
178
+ distances[idx] = np.sqrt(get_squared_displacement(r0, r))
179
+ return distances
180
+
181
+
182
+ def compute_velocities(
183
+ centroids_x: np.ndarray, centroids_y: np.ndarray, centroids_z: Optional[np.ndarray]
184
+ ) -> np.ndarray:
185
+ """Compute velocity."""
186
+ N = len(centroids_x)
187
+ x0 = centroids_x[0]
188
+ y0 = centroids_y[0]
189
+ z0 = None
190
+ if centroids_z is not None:
191
+ z0 = centroids_z[0]
192
+ r0 = (x0, y0, z0)
193
+ distances = np.zeros(N)
194
+ for idx in range(N):
195
+ x = centroids_x[idx]
196
+ y = centroids_y[idx]
197
+ z = None
198
+ if centroids_z is not None:
199
+ z = centroids_z[idx]
200
+ r = (x, y, z)
201
+ distances[idx] = np.sqrt(get_squared_displacement(r0, r))
202
+ # overwrite start vector
203
+ r0 = (x, y, z)
204
+ return distances
205
+
206
+
207
+ def compute_MSD(
208
+ centroids_x: np.ndarray, centroids_y: np.ndarray, centroids_z: Optional[np.ndarray]
209
+ ) -> np.ndarray:
210
+ """Compute mean-squared distance.
211
+
212
+ Notes
213
+ -----
214
+ Please find more information in
215
+ Methods for cell and particle tracking.,
216
+ Meijering E, Dzyubachyk O, Smal I.,
217
+ Methods Enzymol. 2012;504:183-200.
218
+ https://doi.org/10.1016/B978-0-12-391857-4.00009-4
219
+ """
220
+ N = len(centroids_x)
221
+ MSDs = np.zeros(N)
222
+ for idx in range(N):
223
+ if idx == 0:
224
+ continue
225
+ MSD = 0.0
226
+ for i in range(N - idx):
227
+ x = centroids_x[i + idx]
228
+ y = centroids_y[i + idx]
229
+ z = None
230
+ if centroids_z is not None:
231
+ z = centroids_z[i + idx]
232
+ r = (x, y, z)
233
+
234
+ xi = centroids_x[i]
235
+ yi = centroids_y[i]
236
+ zi = None
237
+ if centroids_z is not None:
238
+ zi = centroids_z[i]
239
+ ri = (xi, yi, zi)
240
+ MSD += get_squared_displacement(ri, r)
241
+ MSD /= N - idx
242
+ MSDs[idx] = MSD
243
+ return MSDs
244
+
245
+
246
+ def get_squared_displacement(r0: tuple, r: tuple) -> float:
247
+ """Return squared displacement between two points."""
248
+ if not len(r0) == 3:
249
+ raise ValueError("Provide three-component coordinates")
250
+ if not len(r) == 3:
251
+ raise ValueError("Provide three-component coordinates")
252
+ displacement = 0.0
253
+ for i in range(3):
254
+ x0 = r0[i]
255
+ x = r[i]
256
+ if x0 is None:
257
+ continue
258
+ displacement += (x0 - x) ** 2
259
+ return displacement
260
+
261
+
262
+ def plot_trackscheme(
263
+ df: pd.DataFrame,
264
+ track_id_name: str = "TRACK_ID",
265
+ time_id: str = "POSITION_T",
266
+ cycle_percentage_id: str = "CELL_CYCLE_PERC_POST",
267
+ figsize: tuple = (10, 30),
268
+ ) -> None:
269
+ """Plot tracks similar to TrackMate trackscheme.
270
+
271
+ Parameters
272
+ ----------
273
+ df: pd.DataFrame
274
+ DataFrame holding tracks
275
+ track_id_name: str
276
+ Name of column with track IDs
277
+ time_id : str
278
+ Name of column with time steps
279
+ cycle_percentage_id: str
280
+ Name of column with cell cycle percentage info
281
+ figsize: tuple
282
+ Size of matplotlib figure
283
+
284
+ Notes
285
+ -----
286
+ A percentage column, which must contain values between 0 and 100
287
+ is used to color the individual dots.
288
+ """
289
+ cmap_name = "cool"
290
+ cmap = colormaps.get(cmap_name)
291
+ plt.figure(figsize=figsize)
292
+ for track_id in df[track_id_name]:
293
+ track = df.loc[df[track_id_name] == track_id, time_id]
294
+ color = df.loc[df[track_id_name] == track_id, cycle_percentage_id]
295
+ colormapper = []
296
+ for c in color:
297
+ if np.isnan(c):
298
+ colormapper.append("black")
299
+ else:
300
+ colormapper.append(cmap(c / 100.0))
301
+ sc = plt.scatter([round(track_id)] * len(track), track, color=colormapper)
302
+ plt.xticks(np.arange(1, df[track_id_name].max(), step=1))
303
+ sc.set_cmap(cmap_name)
304
+
305
+ cbar = plt.colorbar(ticks=[0, 0.5, 1], location="top")
306
+ cbar.ax.set_xticklabels([0, 50, 100])
307
+ return
308
+
309
+
310
+ def split_trackmate_tracks(
311
+ df: pd.DataFrame, track_id_name: str = "TRACK_ID", label_id_name: str = "name"
312
+ ) -> None:
313
+ """Split TrackMate tracks into subtracks.
314
+
315
+ Parameters
316
+ ----------
317
+ df: pd.DataFrame
318
+ DataFrame obtained from TrackMate XML, updated in place
319
+ label_id_name: str
320
+ Name of spots to split track IDs into unique tracks
321
+ track_id_name: str
322
+ Name of track ID column
323
+
324
+ Notes
325
+ -----
326
+ TrackMate permits track splitting but then assigns
327
+ the same Track ID for all subtracks.
328
+ A way around this is to use a TrackMate action:
329
+ https://forum.image.sc/t/how-to-identify-subtracks-in-tracking-csv/71474
330
+
331
+ Use this action on your data first and then use this function
332
+ to obtain a new DataFrame that has unique Track IDs.
333
+ The updated Track IDs are stored in a new column called
334
+ `UNIQUE_TRACK_ID`.
335
+ """
336
+ # pattern to identify subtracks
337
+ regex = r"Track_[0-9]+\.[a-z]+"
338
+ subtracks = df.loc[df[label_id_name].str.contains(regex), label_id_name].unique()
339
+ subtracks = sorted(subtracks)
340
+
341
+ mapping_of_subtracks = {}
342
+ max_track = df[track_id_name].max() + 1
343
+
344
+ for subtrack in subtracks:
345
+ mapping_of_subtracks[subtrack] = max_track
346
+ max_track += 1
347
+
348
+ subtrack_series = df.loc[df[label_id_name].str.contains(regex), label_id_name]
349
+ new_track_ids = subtrack_series.transform(lambda x: mapping_of_subtracks[x])
350
+
351
+ df.loc[:, "UNIQUE_TRACK_ID"] = df[track_id_name].copy()
352
+ df["UNIQUE_TRACK_ID"].update(new_track_ids)
353
+ return
354
+
355
+
356
+ # flake8: noqa: C901
357
+ def export_lineage_tree_to_svg(
358
+ df: pd.DataFrame,
359
+ trackmate_file: str,
360
+ node_color_column: Optional[str] = None,
361
+ stroke_width: Optional[float] = None,
362
+ ) -> List[str]:
363
+ """Write a lineage tree colored by FUCCI phases.
364
+
365
+ Parameters
366
+ ----------
367
+ df: pd.DataFrame
368
+ DataFrame processed by fucciphase
369
+ trackmate_file: str
370
+ The original trackmate file
371
+ node_color_column: Optional[str]
372
+ Name of column to color nodes
373
+ stroke_width: Optional[float]
374
+ Width of edges connecting nodes
375
+
376
+
377
+ Returns
378
+ -------
379
+ final_track_names: List[str]
380
+ Names of tracks (from left to right)
381
+
382
+ Notes
383
+ -----
384
+ This function currently only supports
385
+ the standard FUCCISA sensor.
386
+ """
387
+ print("Warning: make sure that you updated the spot names using TrackMate actions!")
388
+ # initialise lineage tree
389
+ lt = lineageTree(trackmate_file, file_type="TrackMate")
390
+ cmap_name = "cool"
391
+ cmap = colormaps.get(cmap_name)
392
+
393
+ # filter spots that are not part of a track
394
+ for track in lt.all_tracks:
395
+ spot_0 = track[0]
396
+ track_name = df.loc[df["ID"].astype(int) == spot_0, "name"].values
397
+ if not len(track_name) == 1:
398
+ raise RuntimeError("Illegal track found")
399
+ if "Track" not in track_name[0]:
400
+ lt.remove_track(track)
401
+
402
+ node_color = None
403
+ if node_color_column is not None:
404
+ if "PERC" in node_color_column:
405
+
406
+ def node_color(id: int) -> tuple:
407
+ color = df.loc[df["ID"].astype(int) == id, "CELL_CYCLE_PERC_DTW"].values
408
+ if len(color) == 0:
409
+ raise ValueError("ID not in track")
410
+ if np.isnan(color[0]):
411
+ rgba_value = (0, 0, 0)
412
+ else:
413
+ rgba_value = cmap(color[0] / 100.0)
414
+ return (255 * rgba_value[0], 255 * rgba_value[1], 255 * rgba_value[2])
415
+
416
+ elif "PHASE" in node_color_column:
417
+
418
+ def node_color(id: int) -> tuple:
419
+ color = df.loc[df["ID"].astype(int) == id, "DISCRETE_PHASE_MAX"].values
420
+ if len(color) == 0:
421
+ raise ValueError("ID not in track")
422
+ color = color[0]
423
+ if color == "G1":
424
+ color = 0.0
425
+ elif color == "G1/S":
426
+ color = 0.4
427
+ else:
428
+ color = 1.0
429
+ rgba_value = cmap(color)
430
+ return (255 * rgba_value[0], 255 * rgba_value[1], 255 * rgba_value[2])
431
+
432
+ else:
433
+ raise ValueError(
434
+ "So far only discrete phases or percentages "
435
+ "for 2-channel FUCCI sensors are supported."
436
+ )
437
+
438
+ stroke_width_function = None
439
+ if stroke_width is not None:
440
+
441
+ def stroke_width_function(id: int) -> float:
442
+ return stroke_width
443
+
444
+ lt.write_to_svg(
445
+ "lineage_tree.svg",
446
+ node_color=node_color,
447
+ node_color_map=cmap_name,
448
+ stroke_width=stroke_width_function,
449
+ )
450
+ final_track_names = []
451
+ for root in lt.roots:
452
+ track_name = df.loc[df["ID"].astype(int) == root, "name"].values
453
+ final_track_names.append(track_name[0])
454
+ return final_track_names