celldetective 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. celldetective/__init__.py +2 -0
  2. celldetective/__main__.py +432 -0
  3. celldetective/datasets/segmentation_annotations/blank +0 -0
  4. celldetective/datasets/signal_annotations/blank +0 -0
  5. celldetective/events.py +149 -0
  6. celldetective/extra_properties.py +100 -0
  7. celldetective/filters.py +89 -0
  8. celldetective/gui/__init__.py +20 -0
  9. celldetective/gui/about.py +44 -0
  10. celldetective/gui/analyze_block.py +563 -0
  11. celldetective/gui/btrack_options.py +898 -0
  12. celldetective/gui/classifier_widget.py +386 -0
  13. celldetective/gui/configure_new_exp.py +532 -0
  14. celldetective/gui/control_panel.py +438 -0
  15. celldetective/gui/gui_utils.py +495 -0
  16. celldetective/gui/json_readers.py +113 -0
  17. celldetective/gui/measurement_options.py +1425 -0
  18. celldetective/gui/neighborhood_options.py +452 -0
  19. celldetective/gui/plot_signals_ui.py +1042 -0
  20. celldetective/gui/process_block.py +1055 -0
  21. celldetective/gui/retrain_segmentation_model_options.py +706 -0
  22. celldetective/gui/retrain_signal_model_options.py +643 -0
  23. celldetective/gui/seg_model_loader.py +460 -0
  24. celldetective/gui/signal_annotator.py +2388 -0
  25. celldetective/gui/signal_annotator_options.py +340 -0
  26. celldetective/gui/styles.py +217 -0
  27. celldetective/gui/survival_ui.py +903 -0
  28. celldetective/gui/tableUI.py +608 -0
  29. celldetective/gui/thresholds_gui.py +1300 -0
  30. celldetective/icons/logo-large.png +0 -0
  31. celldetective/icons/logo.png +0 -0
  32. celldetective/icons/signals_icon.png +0 -0
  33. celldetective/icons/splash-test.png +0 -0
  34. celldetective/icons/splash.png +0 -0
  35. celldetective/icons/splash0.png +0 -0
  36. celldetective/icons/survival2.png +0 -0
  37. celldetective/icons/vignette_signals2.png +0 -0
  38. celldetective/icons/vignette_signals2.svg +114 -0
  39. celldetective/io.py +2050 -0
  40. celldetective/links/zenodo.json +561 -0
  41. celldetective/measure.py +1258 -0
  42. celldetective/models/segmentation_effectors/blank +0 -0
  43. celldetective/models/segmentation_generic/blank +0 -0
  44. celldetective/models/segmentation_targets/blank +0 -0
  45. celldetective/models/signal_detection/blank +0 -0
  46. celldetective/models/tracking_configs/mcf7.json +68 -0
  47. celldetective/models/tracking_configs/ricm.json +203 -0
  48. celldetective/models/tracking_configs/ricm2.json +203 -0
  49. celldetective/neighborhood.py +717 -0
  50. celldetective/scripts/analyze_signals.py +51 -0
  51. celldetective/scripts/measure_cells.py +275 -0
  52. celldetective/scripts/segment_cells.py +212 -0
  53. celldetective/scripts/segment_cells_thresholds.py +140 -0
  54. celldetective/scripts/track_cells.py +206 -0
  55. celldetective/scripts/train_segmentation_model.py +246 -0
  56. celldetective/scripts/train_signal_model.py +49 -0
  57. celldetective/segmentation.py +712 -0
  58. celldetective/signals.py +2826 -0
  59. celldetective/tracking.py +974 -0
  60. celldetective/utils.py +1681 -0
  61. celldetective-1.0.2.dist-info/LICENSE +674 -0
  62. celldetective-1.0.2.dist-info/METADATA +192 -0
  63. celldetective-1.0.2.dist-info/RECORD +66 -0
  64. celldetective-1.0.2.dist-info/WHEEL +5 -0
  65. celldetective-1.0.2.dist-info/entry_points.txt +2 -0
  66. celldetective-1.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,974 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from tqdm import tqdm
4
+ from sklearn.preprocessing import StandardScaler
5
+
6
+ from btrack.io.utils import localizations_to_objects
7
+ from btrack import BayesianTracker
8
+
9
+ from celldetective.measure import measure_features
10
+ from celldetective.utils import rename_intensity_column
11
+ from celldetective.io import view_on_napari_btrack, interpret_tracking_configuration
12
+
13
+ from btrack.datasets import cell_config
14
+ import os
15
+ import subprocess
16
+
17
+ abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0],'celldetective'])
18
+
19
+ def track(labels, configuration=None, stack=None, spatial_calibration=1, features=None, channel_names=None,
20
+ haralick_options=None, return_napari_data=False, view_on_napari=False, mask_timepoints=None, mask_channels=None, volume=(2048,2048),
21
+ optimizer_options = {'tm_lim': int(12e4)}, track_kwargs={'step_size': 100}, objects=None,
22
+ clean_trajectories_kwargs=None, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
23
+ ):
24
+
25
+ """
26
+
27
+ Perform cell tracking on segmented labels using the bTrack library.
28
+
29
+ Parameters
30
+ ----------
31
+ labels : ndarray
32
+ The segmented labels representing cell objects.
33
+ configuration : Configuration or None
34
+ The bTrack configuration object. If None, a default configuration is used.
35
+ stack : ndarray or None, optional
36
+ The image stack corresponding to the labels. Default is None.
37
+ spatial_calibration : float, optional
38
+ The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
39
+ features : list or None, optional
40
+ The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
41
+ channel_names : list or None, optional
42
+ The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
43
+ Default is None.
44
+ haralick_options : dict or None, optional
45
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
46
+ return_napari_data : bool, optional
47
+ Whether to return the napari data dictionary along with the DataFrame. Default is False.
48
+ view_on_napari : bool, optional
49
+ Whether to view the tracking results on napari. Default is False.
50
+ optimizer_options : dict, optional
51
+ The options for the optimizer. Default is {'tm_lim': int(12e4)}.
52
+ track_kwargs : dict, optional
53
+ Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
54
+ clean_trajectories_kwargs : dict or None, optional
55
+ Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
56
+ Default is None.
57
+ column_labels : dict, optional
58
+ The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
59
+
60
+ Returns
61
+ -------
62
+ DataFrame or tuple
63
+ If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
64
+ containing the DataFrame and the napari data dictionary.
65
+
66
+ Notes
67
+ -----
68
+ This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
69
+ the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
70
+ clean_trajectories function. If specified, the tracking results can be visualized on napari.
71
+
72
+ Examples
73
+ --------
74
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
75
+ [1, 1, 1, 2, 2, 0],
76
+ [0, 0, 1, 2, 0, 0]])
77
+ >>> configuration = cell_config()
78
+ >>> stack = np.random.rand(3, 6)
79
+ >>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
80
+ >>> df.head()
81
+
82
+ TRACK_ID FRAME POSITION_Y POSITION_X
83
+ 0 0 0 0.0 0.0
84
+ 1 0 1 0.0 0.0
85
+ 2 0 2 0.0 0.0
86
+ 3 1 0 0.5 0.5
87
+ 4 1 1 0.5 0.5
88
+
89
+ """
90
+
91
+ configuration = interpret_tracking_configuration(configuration)
92
+
93
+ if objects is None:
94
+ objects = extract_objects_and_features(labels, stack, features,
95
+ channel_names=channel_names,
96
+ haralick_options=haralick_options,
97
+ mask_timepoints=mask_timepoints,
98
+ mask_channels=mask_channels,
99
+ )
100
+
101
+ columns = list(objects.columns)
102
+ to_remove = ['x','y','class_id','t']
103
+ for tr in to_remove:
104
+ try:
105
+ columns.remove(tr)
106
+ except:
107
+ print(f'column {tr} could not be found...')
108
+
109
+ scaler = StandardScaler()
110
+ if columns:
111
+ x = objects[columns].values
112
+ x_scaled = scaler.fit_transform(x)
113
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index = objects.index)
114
+ objects[columns] = df_temp
115
+ else:
116
+ print('Warning: no features were passed to bTrack...')
117
+
118
+ # 2) track the objects
119
+ new_btrack_objects = localizations_to_objects(objects)
120
+
121
+ with BayesianTracker() as tracker:
122
+
123
+ tracker.configure(configuration)
124
+
125
+ if columns:
126
+ tracking_updates = ["motion","visual"]
127
+ #tracker.tracking_updates = ["motion","visual"]
128
+ tracker.features = columns
129
+ else:
130
+ tracking_updates = ["motion"]
131
+
132
+ tracker.append(new_btrack_objects)
133
+ tracker.volume = ((0,volume[0]), (0,volume[1])) #(-1e5, 1e5)
134
+ #print(tracker.volume)
135
+ tracker.track(tracking_updates=tracking_updates, **track_kwargs)
136
+ tracker.optimize(options=optimizer_options)
137
+
138
+ data, properties, graph = tracker.to_napari() #ndim=2
139
+
140
+ # do the table post processing and napari options
141
+ df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],column_labels['y'],column_labels['x']])
142
+ df[column_labels['x']+'_um'] = df[column_labels['x']]*spatial_calibration
143
+ df[column_labels['y']+'_um'] = df[column_labels['y']]*spatial_calibration
144
+
145
+ df = df.merge(pd.DataFrame(properties),left_index=True, right_index=True)
146
+ if columns:
147
+ x = df[columns].values
148
+ x_scaled = scaler.inverse_transform(x)
149
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index = df.index)
150
+ df[columns] = df_temp
151
+
152
+ df = df.sort_values(by=[column_labels['track'],column_labels['time']])
153
+
154
+ if channel_names is not None:
155
+ df = rename_intensity_column(df, channel_names)
156
+
157
+ df = write_first_detection_class(df, column_labels=column_labels)
158
+
159
+ if clean_trajectories_kwargs is not None:
160
+ df = clean_trajectories(df.copy(),**clean_trajectories_kwargs)
161
+
162
+ if view_on_napari:
163
+ view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
164
+
165
+ if return_napari_data:
166
+ napari_data = {"data": data, "properties": properties, "graph": graph}
167
+ return df, napari_data
168
+ else:
169
+ return df
170
+
171
+ def extract_objects_and_features(labels, stack, features, channel_names=None, haralick_options=None, mask_timepoints=None, mask_channels=None):
172
+
173
+ """
174
+
175
+ Extract objects and features from segmented labels and image stack.
176
+
177
+ Parameters
178
+ ----------
179
+ labels : ndarray
180
+ The segmented labels representing cell objects.
181
+ stack : ndarray
182
+ The image stack corresponding to the labels.
183
+ features : list or None
184
+ The list of features to extract from the objects. If None, no additional features are extracted.
185
+ channel_names : list or None, optional
186
+ The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
187
+ haralick_options : dict or None, optional
188
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
189
+ mask_timepoints : list of None, optionak
190
+ Frames to hide during tracking.
191
+ Returns
192
+ -------
193
+ DataFrame
194
+ The DataFrame containing the extracted object features.
195
+
196
+ Notes
197
+ -----
198
+ This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
199
+ labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
200
+ be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
201
+
202
+ Examples
203
+ --------
204
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
205
+ [1, 1, 1, 2, 2, 0],
206
+ [0, 0, 1, 2, 0, 0]])
207
+ >>> stack = np.random.rand(3, 6, 3)
208
+ >>> features = ['area', 'mean_intensity']
209
+ >>> df = extract_objects_and_features(labels, stack, features)
210
+
211
+ """
212
+
213
+ if features is None:
214
+ features = []
215
+
216
+ if stack is None:
217
+ haralick_options = None
218
+
219
+ if mask_timepoints is not None:
220
+ for f in mask_timepoints:
221
+ labels[f] = 0.
222
+
223
+ nbr_frames = len(labels)
224
+ timestep_dataframes = []
225
+
226
+ for t in tqdm(range(nbr_frames),desc='frame'):
227
+
228
+ if stack is not None:
229
+ img = stack[t]
230
+ else:
231
+ img = None
232
+
233
+ if (haralick_options is not None) and (t==0) and (stack is not None):
234
+ if not 'percentiles' in haralick_options:
235
+ haralick_options.update({'percentiles': (0.01,99.99)})
236
+ if not 'target_channel' in haralick_options:
237
+ haralick_options.update({'target_channel': 0})
238
+ haralick_percentiles = haralick_options['percentiles']
239
+ haralick_channel_index = haralick_options['target_channel']
240
+ min_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[0])
241
+ max_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[1])
242
+ haralick_options.update({'clip_values': (min_value, max_value)})
243
+
244
+ df_props = measure_features(img, labels[t], features = features+['centroid'], border_dist=None,
245
+ channels=channel_names, haralick_options=haralick_options, verbose=False)
246
+ df_props.rename(columns={'centroid-1': 'x', 'centroid-0': 'y'},inplace=True)
247
+ df_props['t'] = int(t)
248
+ timestep_dataframes.append(df_props)
249
+
250
+ df = pd.concat(timestep_dataframes)
251
+ df.reset_index(inplace=True, drop=True)
252
+
253
+ if mask_channels is not None:
254
+ cols_to_drop = []
255
+ for mc in mask_channels:
256
+ columns = df.columns
257
+ col_contains = [mc in c for c in columns]
258
+ to_remove = np.array(columns)[np.array(col_contains)]
259
+ cols_to_drop.extend(to_remove)
260
+ if len(cols_to_drop)>0:
261
+ df = df.drop(cols_to_drop, axis=1)
262
+
263
+ return df
264
+
265
+
266
+ def clean_trajectories(trajectories,remove_not_in_first=False,remove_not_in_last=False,
267
+ minimum_tracklength=0, interpolate_position_gaps=False,
268
+ extrapolate_tracks_post=False,
269
+ extrapolate_tracks_pre=False,
270
+ interpolate_na=False,
271
+ column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
272
+
273
+ """
274
+ Clean trajectories by applying various cleaning operations.
275
+
276
+ Parameters
277
+ ----------
278
+ trajectories : pandas.DataFrame
279
+ The input DataFrame containing trajectory data.
280
+ remove_not_in_first : bool, optional
281
+ Flag indicating whether to remove tracks not present in the first frame.
282
+ Defaults to True.
283
+ remove_not_in_last : bool, optional
284
+ Flag indicating whether to remove tracks not present in the last frame.
285
+ Defaults to True.
286
+ minimum_tracklength : int, optional
287
+ The minimum length of a track to be retained.
288
+ Defaults to 0.
289
+ interpolate_position_gaps : bool, optional
290
+ Flag indicating whether to interpolate position gaps in tracks.
291
+ Defaults to True.
292
+ extrapolate_tracks_post : bool, optional
293
+ Flag indicating whether to extrapolate tracks after the last known position.
294
+ Defaults to True.
295
+ extrapolate_tracks_pre : bool, optional
296
+ Flag indicating whether to extrapolate tracks before the first known position.
297
+ Defaults to False.
298
+ interpolate_na : bool, optional
299
+ Flag indicating whether to interpolate missing values in tracks.
300
+ Defaults to False.
301
+ column_labels : dict, optional
302
+ Dictionary specifying the column labels used in the input DataFrame.
303
+ The keys represent the following column labels:
304
+ - 'track': The column label for the track ID.
305
+ - 'time': The column label for the timestamp.
306
+ - 'x': The column label for the x-coordinate.
307
+ - 'y': The column label for the y-coordinate.
308
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
309
+
310
+ Returns
311
+ -------
312
+ pandas.DataFrame
313
+ The cleaned DataFrame with trajectories.
314
+
315
+ Notes
316
+ -----
317
+ This function applies various cleaning operations to the input DataFrame containing trajectory data.
318
+ The cleaning operations include:
319
+ - Filtering tracks based on their endpoints.
320
+ - Filtering tracks based on their length.
321
+ - Interpolating position gaps in tracks.
322
+ - Extrapolating tracks after the last known position.
323
+ - Extrapolating tracks before the first known position.
324
+ - Interpolating missing values in tracks.
325
+
326
+ The input DataFrame is expected to have the following columns:
327
+ - track: The unique ID of each track.
328
+ - time: The timestamp of each data point.
329
+ - x: The x-coordinate of each data point.
330
+ - y: The y-coordinate of each data point.
331
+
332
+ Examples
333
+ --------
334
+ >>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
335
+ ... minimum_tracklength=10, interpolate_position_gaps=True,
336
+ ... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
337
+ ... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
338
+ >>> print(cleaned_data.head())
339
+
340
+ """
341
+
342
+ trajectories.reset_index
343
+ trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
344
+
345
+ if minimum_tracklength>0:
346
+ trajectories = filter_by_tracklength(trajectories.copy(), minimum_tracklength, track_label=column_labels['track'])
347
+
348
+ if np.any([remove_not_in_first, remove_not_in_last]):
349
+ trajectories = filter_by_endpoints(trajectories.copy(), remove_not_in_first=remove_not_in_first,
350
+ remove_not_in_last=remove_not_in_last, column_labels=column_labels)
351
+
352
+ if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
353
+ trajectories = extrapolate_tracks(trajectories.copy(), post=extrapolate_tracks_post,
354
+ pre=extrapolate_tracks_pre, column_labels=column_labels)
355
+
356
+ if interpolate_position_gaps:
357
+ trajectories = interpolate_time_gaps(trajectories.copy(), column_labels=column_labels)
358
+
359
+ if interpolate_na:
360
+ trajectories = interpolate_nan_properties(trajectories.copy(), track_label=column_labels['track'])
361
+
362
+ trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
363
+ trajectories.reset_index(inplace=True, drop=True)
364
+
365
+ if 'class_firstdetection' in list(trajectories.columns):
366
+ for tid, track_group in trajectories.groupby(column_labels['track']):
367
+ indices = track_group.index
368
+
369
+ class_values = np.array(track_group['class_firstdetection'].unique())
370
+ class_values = class_values[class_values==class_values]
371
+ t_values = np.array(track_group['t_firstdetection'].unique())
372
+ t_values = t_values[t_values==t_values]
373
+ if len(class_values)==0:
374
+ class_values = 2
375
+ t_values = -1
376
+ else:
377
+ class_values = class_values[0]
378
+ t_values = t_values[0]
379
+
380
+ trajectories.loc[indices, 'class_firstdetection'] = class_values
381
+ trajectories.loc[indices, 't_firstdetection'] = t_values
382
+
383
+ return trajectories
384
+
385
+ def interpolate_per_track(group_df):
386
+
387
+ """
388
+ Interpolate missing values within a track.
389
+
390
+ Parameters
391
+ ----------
392
+ group_df : pandas.DataFrame
393
+ The input DataFrame containing data for a single track.
394
+
395
+ Returns
396
+ -------
397
+ pandas.DataFrame
398
+ The interpolated DataFrame with missing values filled.
399
+
400
+ Notes
401
+ -----
402
+ This function performs linear interpolation to fill missing values within a track.
403
+ Missing values are interpolated based on the neighboring data points in the track.
404
+
405
+ """
406
+
407
+ interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
408
+
409
+ return interpolated_group
410
+
411
+ def interpolate_nan_properties(trajectories, track_label="TRACK_ID"):
412
+
413
+ """
414
+ Interpolate missing values within tracks in the input DataFrame.
415
+
416
+ Parameters
417
+ ----------
418
+ trajectories : pandas.DataFrame
419
+ The input DataFrame containing trajectory data.
420
+ track_label : str, optional
421
+ The column label for the track ID.
422
+ Defaults to "TRACK_ID".
423
+
424
+ Returns
425
+ -------
426
+ pandas.DataFrame
427
+ The DataFrame with missing values interpolated within tracks.
428
+
429
+ Notes
430
+ -----
431
+ This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
432
+ to interpolate missing values within each track.
433
+ Missing values are interpolated based on the neighboring data points in each track.
434
+
435
+ The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
436
+
437
+ Examples
438
+ --------
439
+ >>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
440
+ >>> print(interpolated_data.head())
441
+
442
+ """
443
+
444
+ trajectories = trajectories.groupby(track_label, group_keys=False).apply(interpolate_per_track)
445
+
446
+ return trajectories
447
+
448
+
449
+ def filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False,
450
+ column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
451
+
452
+ """
453
+ Filter trajectories based on their endpoints.
454
+
455
+ Parameters
456
+ ----------
457
+ trajectories : pandas.DataFrame
458
+ The input DataFrame containing trajectory data.
459
+ remove_not_in_first : bool, optional
460
+ Flag indicating whether to remove tracks not present in the first frame.
461
+ Defaults to True.
462
+ remove_not_in_last : bool, optional
463
+ Flag indicating whether to remove tracks not present in the last frame.
464
+ Defaults to False.
465
+ column_labels : dict, optional
466
+ Dictionary specifying the column labels used in the input DataFrame.
467
+ The keys represent the following column labels:
468
+ - 'track': The column label for the track ID.
469
+ - 'time': The column label for the timestamp.
470
+ - 'x': The column label for the x-coordinate.
471
+ - 'y': The column label for the y-coordinate.
472
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
473
+
474
+ Returns
475
+ -------
476
+ pandas.DataFrame
477
+ The filtered DataFrame with trajectories based on their endpoints.
478
+
479
+ Notes
480
+ -----
481
+ This function filters the input DataFrame based on the endpoints of the trajectories.
482
+ The filtering can be performed in three modes:
483
+ - remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
484
+ - remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
485
+ - remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
486
+
487
+ The input DataFrame is expected to have the following columns:
488
+ - track: The unique ID of each track.
489
+ - time: The timestamp of each data point.
490
+ - x: The x-coordinate of each data point.
491
+ - y: The y-coordinate of each data point.
492
+
493
+ Examples
494
+ --------
495
+ >>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
496
+ >>> print(filtered_data.head())
497
+
498
+ """
499
+
500
+ if (remove_not_in_first)*(not remove_not_in_last):
501
+ # filter tracks not in first frame
502
+ leftover_tracks = trajectories.groupby(column_labels['track']).min().index[trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']])]
503
+ trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
504
+
505
+ elif (remove_not_in_last)*(not remove_not_in_first):
506
+ # filter tracks not in last frame
507
+ leftover_tracks = trajectories.groupby(column_labels['track']).max().index[trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']])]
508
+ trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
509
+
510
+ elif remove_not_in_first*remove_not_in_last:
511
+ # filter tracks both not in first and last frame
512
+ leftover_tracks = trajectories.groupby(column_labels['track']).max().index[(trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']]))*(trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']]))]
513
+ trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
514
+
515
+ trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
516
+
517
+ return trajectories
518
+
519
+ def filter_by_tracklength(trajectories, minimum_tracklength, track_label="TRACK_ID"):
520
+
521
+ """
522
+ Filter trajectories based on the minimum track length.
523
+
524
+ Parameters
525
+ ----------
526
+ trajectories : pandas.DataFrame
527
+ The input DataFrame containing trajectory data.
528
+ minimum_tracklength : int
529
+ The minimum length required for a track to be included.
530
+ track_label : str, optional
531
+ The column name in the DataFrame that represents the track ID.
532
+ Defaults to "TRACK_ID".
533
+
534
+ Returns
535
+ -------
536
+ pandas.DataFrame
537
+ The filtered DataFrame with trajectories that meet the minimum track length.
538
+
539
+ Notes
540
+ -----
541
+ This function removes any tracks from the input DataFrame that have a length
542
+ (number of data points) less than the specified minimum track length.
543
+
544
+ Examples
545
+ --------
546
+ >>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
547
+ >>> print(filtered_data.head())
548
+
549
+ """
550
+
551
+ if minimum_tracklength>0:
552
+
553
+ leftover_tracks = trajectories.groupby(track_label, group_keys=False).size().index[trajectories.groupby(track_label, group_keys=False).size() > minimum_tracklength]
554
+ trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
555
+
556
+ trajectories = trajectories.reset_index(drop=True)
557
+
558
+ return trajectories
559
+
560
+
561
+ def interpolate_time_gaps(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
562
+
563
+ """
564
+ Interpolate time gaps in trajectories.
565
+
566
+ Parameters
567
+ ----------
568
+ trajectories : pandas.DataFrame
569
+ The input DataFrame containing trajectory data.
570
+ column_labels : dict, optional
571
+ Dictionary specifying the column labels used in the input DataFrame.
572
+ The keys represent the following column labels:
573
+ - 'track': The column label for the track ID.
574
+ - 'time': The column label for the timestamp.
575
+ - 'x': The column label for the x-coordinate.
576
+ - 'y': The column label for the y-coordinate.
577
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
578
+
579
+ Returns
580
+ -------
581
+ pandas.DataFrame
582
+ The interpolated DataFrame with reduced time gaps in trajectories.
583
+
584
+ Notes
585
+ -----
586
+ This function performs interpolation on the input trajectories to reduce time gaps between data points.
587
+ It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
588
+
589
+ The input DataFrame is expected to have the following columns:
590
+ - track: The unique ID of each track.
591
+ - time: The timestamp of each data point (in seconds).
592
+ - x: The x-coordinate of each data point.
593
+ - y: The y-coordinate of each data point.
594
+
595
+ Examples
596
+ --------
597
+ >>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
598
+ >>> print(interpolated_data.head())
599
+
600
+ """
601
+
602
+ trajectories[column_labels['time']] = pd.to_datetime(trajectories[column_labels['time']], unit='s')
603
+ trajectories.set_index(column_labels['track'], inplace=True)
604
+ trajectories = trajectories.groupby(column_labels['track'], group_keys=True).apply(lambda x: x.set_index(column_labels['time']).resample('1S').asfreq()).reset_index()
605
+ trajectories[[column_labels['x'], column_labels['y']]] = trajectories.groupby(column_labels['track'], group_keys=False)[[column_labels['x'], column_labels['y']]].apply(lambda x: x.interpolate(method='linear'))
606
+ trajectories.reset_index(drop=True, inplace=True)
607
+ trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64').astype(float) / 10**9
608
+ #trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
609
+ print(trajectories[column_labels['time']])
610
+ trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
611
+
612
+ return trajectories
613
+
614
+
615
+ def extrapolate_tracks(trajectories, post=False, pre=False, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
616
+
617
+ """
618
+ Extrapolate tracks in trajectories.
619
+
620
+ Parameters
621
+ ----------
622
+ trajectories : pandas.DataFrame
623
+ The input DataFrame containing trajectory data.
624
+ post : bool, optional
625
+ Flag indicating whether to perform post-extrapolation.
626
+ Defaults to True.
627
+ pre : bool, optional
628
+ Flag indicating whether to perform pre-extrapolation.
629
+ Defaults to False.
630
+ column_labels : dict, optional
631
+ Dictionary specifying the column labels used in the input DataFrame.
632
+ The keys represent the following column labels:
633
+ - 'track': The column label for the track ID.
634
+ - 'time': The column label for the timestamp.
635
+ - 'x': The column label for the x-coordinate.
636
+ - 'y': The column label for the y-coordinate.
637
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
638
+
639
+ Returns
640
+ -------
641
+ pandas.DataFrame
642
+ The extrapolated DataFrame with extended tracks.
643
+
644
+ Notes
645
+ -----
646
+ This function extrapolates tracks in the input DataFrame by repeating the last known position
647
+ either after (post-extrapolation) or before (pre-extrapolation) the available data.
648
+
649
+ The input DataFrame is expected to have the following columns:
650
+ - track: The unique ID of each track.
651
+ - time: The timestamp of each data point.
652
+ - x: The x-coordinate of each data point.
653
+ - y: The y-coordinate of each data point.
654
+
655
+ Examples
656
+ --------
657
+ >>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
658
+ >>> print(extrapolated_data.head())
659
+
660
+ """
661
+
662
+ if post:
663
+
664
+ # get the maximum time T in the dataframe
665
+ max_time = trajectories[column_labels['time']].max()
666
+
667
+ # extrapolate the position until time T by repeating the last known position
668
+ df_extrapolated = pd.DataFrame()
669
+ for track_id, group in trajectories.groupby(column_labels['track']):
670
+ last_known_position = group.loc[group[column_labels['time']] <= max_time].tail(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
671
+ extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(last_known_position[0][0] + 1, max_time + 1)})
672
+ extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(last_known_position[0][0] + 1, max_time + 1))
673
+ track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
674
+ track_data[column_labels['track']] = track_id
675
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
676
+
677
+
678
+ # concatenate the original dataframe and the extrapolated dataframe
679
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
680
+ # sort the dataframe by TRACK_ID and FRAME
681
+ trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
682
+
683
+ if pre:
684
+
685
+ # get the maximum time T in the dataframe
686
+ min_time = 0 #trajectories[column_labels['time']].min()
687
+
688
+ # extrapolate the position until time T by repeating the last known position
689
+ df_extrapolated = pd.DataFrame()
690
+ for track_id, group in trajectories.groupby(column_labels['track']):
691
+ last_known_position = group.loc[group[column_labels['time']] >= min_time].head(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
692
+ extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(min_time, last_known_position[0][0] + 1)})
693
+ extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(min_time, last_known_position[0][0]))
694
+ track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
695
+ track_data[column_labels['track']] = track_id
696
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
697
+
698
+ # concatenate the original dataframe and the extrapolated dataframe
699
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
700
+
701
+ # sort the dataframe by TRACK_ID and FRAME
702
+ trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
703
+
704
+ return trajectories
705
+
706
+ def compute_instantaneous_velocity(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
707
+
708
+ """
709
+
710
+ Compute the instantaneous velocity for each point in the trajectories.
711
+
712
+ Parameters
713
+ ----------
714
+ trajectories : pandas.DataFrame
715
+ The input DataFrame containing trajectory data.
716
+ column_labels : dict, optional
717
+ A dictionary specifying the column labels for track ID, time, position X, and position Y.
718
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
719
+
720
+ Returns
721
+ -------
722
+ pandas.DataFrame
723
+ The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
724
+
725
+ Notes
726
+ -----
727
+ This function calculates the instantaneous velocity for each point in the trajectories.
728
+ The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
729
+
730
+ The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
731
+
732
+ Examples
733
+ --------
734
+ >>> velocity_data = compute_instantaneous_velocity(trajectories)
735
+ >>> print(velocity_data.head())
736
+
737
+ """
738
+
739
+ # Calculate the time differences and position differences
740
+ trajectories['dt'] = trajectories.groupby(column_labels['track'])[column_labels['time']].diff()
741
+ trajectories['dx'] = trajectories.groupby(column_labels['track'])[column_labels['x']].diff()
742
+ trajectories['dy'] = trajectories.groupby(column_labels['track'])[column_labels['y']].diff()
743
+
744
+ # Calculate the instantaneous velocity
745
+ trajectories['velocity'] = np.sqrt(trajectories['dx']**2 +trajectories['dy']**2) / trajectories['dt']
746
+ trajectories = trajectories.drop(['dx', 'dy', 'dt'], axis=1)
747
+ trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
748
+
749
+ return trajectories
750
+
751
+ def instantaneous_diffusion(positions_x, positions_y, timeline):
752
+
753
+ """
754
+ Compute the instantaneous diffusion coefficients for each position coordinate.
755
+
756
+ Parameters
757
+ ----------
758
+ positions_x : numpy.ndarray
759
+ Array of x-coordinates of positions.
760
+ positions_y : numpy.ndarray
761
+ Array of y-coordinates of positions.
762
+ timeline : numpy.ndarray
763
+ Array of corresponding time points.
764
+
765
+ Returns
766
+ -------
767
+ numpy.ndarray
768
+ Array of instantaneous diffusion coefficients for each position coordinate.
769
+
770
+ Notes
771
+ -----
772
+ The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
773
+ The diffusion coefficient at each time point is computed using the formula:
774
+ D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
775
+ where x represents the position coordinate (x or y) and t represents the corresponding time point.
776
+
777
+ Examples
778
+ --------
779
+ >>> x = np.array([0, 1, 2, 3, 4, 5])
780
+ >>> y = np.array([0, 1, 4, 9, 16, 25])
781
+ >>> t = np.array([0, 1, 2, 3, 4, 5])
782
+ >>> diff = instantaneous_diffusion(x, y, t)
783
+ >>> print(diff)
784
+
785
+ """
786
+
787
+ diff = np.zeros((len(positions_x),2))
788
+ diff[:,:] = np.nan
789
+
790
+ for t in range(1,len(positions_x)-1):
791
+ diff[t,0] = (positions_x[t+1] - positions_x[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_x[t+1] - positions_x[t])*(positions_x[t] - positions_x[t-1]))
792
+
793
+ for t in range(1,len(positions_y)-1):
794
+ diff[t,1] = (positions_y[t+1] - positions_y[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_y[t+1] - positions_y[t])*(positions_y[t] - positions_y[t-1]))
795
+
796
+ return diff
797
+
798
+ def magnitude_diffusion(diffusion_vector):
799
+
800
+ """
801
+ Compute the magnitude of diffusion for each diffusion vector.
802
+
803
+ Parameters
804
+ ----------
805
+ diffusion_vector : numpy.ndarray
806
+ Array of diffusion vectors.
807
+
808
+ Returns
809
+ -------
810
+ numpy.ndarray
811
+ Array of magnitudes of diffusion.
812
+
813
+ Notes
814
+ -----
815
+ The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
816
+ The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
817
+
818
+ Examples
819
+ --------
820
+ >>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
821
+ >>> magnitudes = magnitude_diffusion(diffusion)
822
+ >>> print(magnitudes)
823
+
824
+ """
825
+
826
+ return np.sqrt(diffusion_vector[:,0]**2+diffusion_vector[:,1]**2)
827
+
828
+
829
+ def compute_instantaneous_diffusion(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
830
+
831
+ """
832
+
833
+ Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
834
+
835
+ Parameters
836
+ ----------
837
+ trajectories : DataFrame
838
+ The input DataFrame containing trajectories with position and time information.
839
+ column_labels : dict, optional
840
+ A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
841
+ The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
842
+
843
+ Returns
844
+ -------
845
+ DataFrame
846
+ The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
847
+
848
+ Notes
849
+ -----
850
+
851
+ The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
852
+ are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
853
+
854
+ Examples
855
+ --------
856
+ >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
857
+ ... 'FRAME': [0, 1, 2, 0, 1, 2],
858
+ ... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
859
+ ... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
860
+ >>> compute_instantaneous_diffusion(trajectories)
861
+ # Output DataFrame with added "diffusion" column
862
+
863
+ """
864
+
865
+ trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
866
+ trajectories['diffusion'] = np.nan
867
+
868
+ for tid,group in trajectories.groupby(column_labels['track']):
869
+
870
+ indices = group.index
871
+ x = group[column_labels['x']].to_numpy()
872
+ y = group[column_labels['y']].to_numpy()
873
+ t = group[column_labels['time']].to_numpy()
874
+
875
+ if len(x)>3: #to have t-1,t,t+1
876
+ diff = instantaneous_diffusion(x,y,t)
877
+ d = magnitude_diffusion(diff)
878
+ trajectories.loc[indices, "diffusion"] = d
879
+
880
+ return trajectories
881
+
882
+ def track_at_position(pos, mode, return_tracks=False, view_on_napari=False, threads=1):
883
+
884
+ pos = pos.replace('\\','/')
885
+ pos = rf"{pos}"
886
+ assert os.path.exists(pos),f'Position {pos} is not a valid path.'
887
+ if not pos.endswith('/'):
888
+ pos += '/'
889
+
890
+ script_path = os.sep.join([abs_path, 'scripts', 'track_cells.py'])
891
+ cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
892
+ subprocess.call(cmd, shell=True)
893
+
894
+ track_table = pos + os.sep.join(["output","tables",f"trajectories_{mode}.csv"])
895
+ if return_tracks:
896
+ df = pd.read_csv(track_table)
897
+ return df
898
+ else:
899
+ return None
900
+
901
+ # # if return_labels or view_on_napari:
902
+ # # labels = locate_labels(pos, population=mode)
903
+ # # if view_on_napari:
904
+ # # if stack_prefix is None:
905
+ # # stack_prefix = ''
906
+ # # stack = locate_stack(pos, prefix=stack_prefix)
907
+ # # _view_on_napari(tracks=None, stack=stack, labels=labels)
908
+ # # if return_labels:
909
+ # # return labels
910
+ # # else:
911
+ # return None
912
+
913
+ def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
914
+
915
+ """
916
+ Annotates a dataframe with the time of the first detection and classifies tracks based on their detection status.
917
+
918
+ This function processes a dataframe containing tracking data, identifying the first point of detection for each
919
+ track based on the x-coordinate values. It annotates the dataframe with the time of the first detection and
920
+ assigns a class to each track indicating whether the first detection occurs at the start, during, or if there's
921
+ no detection within the tracking data.
922
+
923
+ Parameters
924
+ ----------
925
+ tab : pandas.DataFrame
926
+ The dataframe containing tracking data, expected to have columns for track ID, time, and spatial coordinates.
927
+ column_labels : dict, optional
928
+ A dictionary mapping standard column names ('track', 'time', 'x', 'y') to the corresponding column names in
929
+ `tab`. Default column names are 'TRACK_ID', 'FRAME', 'POSITION_X', 'POSITION_Y'.
930
+
931
+ Returns
932
+ -------
933
+ pandas.DataFrame
934
+ The input dataframe `tab` with two additional columns: 'class_firstdetection' indicating the detection class,
935
+ and 't_firstdetection' indicating the time of the first detection.
936
+
937
+ Notes
938
+ -----
939
+ - Detection is based on the presence of non-NaN values in the 'x' column for each track.
940
+ - Tracks with their first detection at the first time point are classified differently (`cclass=2`) and assigned
941
+ a `t_first` of -1, indicating no prior detection.
942
+ - The function assumes uniform time steps between each frame in the tracking data.
943
+
944
+ """
945
+
946
+ tab = tab.sort_values(by=[column_labels['track'],column_labels['time']])
947
+ for tid,track_group in tab.groupby(column_labels['track']):
948
+ indices = track_group.index
949
+ detection = track_group[column_labels['x']].values
950
+ timeline = track_group[column_labels['time']].values
951
+ if len(timeline)>2:
952
+ dt = timeline[1] - timeline[0]
953
+ if np.any(detection==detection):
954
+ t_first = timeline[detection==detection][0]
955
+ cclass = 0
956
+ if t_first==0:
957
+ t_first = -1
958
+ cclass = 2
959
+ else:
960
+ t_first = float(t_first) - float(dt)
961
+ else:
962
+ t_first = -1
963
+ cclass = 2
964
+
965
+ tab.loc[indices, 'class_firstdetection'] = cclass
966
+ tab.loc[indices, 't_firstdetection'] = t_first
967
+ return tab
968
+
969
+
970
+
971
+ if __name__ == "__main__":
972
+ track_at_position("/home/limozin/Documents/Experiments/MinimumJan/W4/401",
973
+ "targets",
974
+ )