celldetective 1.4.1.post1__py3-none-any.whl → 1.5.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. celldetective/__init__.py +25 -0
  2. celldetective/__main__.py +62 -43
  3. celldetective/_version.py +1 -1
  4. celldetective/extra_properties.py +477 -399
  5. celldetective/filters.py +192 -97
  6. celldetective/gui/InitWindow.py +541 -411
  7. celldetective/gui/__init__.py +0 -15
  8. celldetective/gui/about.py +44 -39
  9. celldetective/gui/analyze_block.py +120 -84
  10. celldetective/gui/base/__init__.py +0 -0
  11. celldetective/gui/base/channel_norm_generator.py +335 -0
  12. celldetective/gui/base/components.py +249 -0
  13. celldetective/gui/base/feature_choice.py +92 -0
  14. celldetective/gui/base/figure_canvas.py +52 -0
  15. celldetective/gui/base/list_widget.py +133 -0
  16. celldetective/gui/{styles.py → base/styles.py} +92 -36
  17. celldetective/gui/base/utils.py +33 -0
  18. celldetective/gui/base_annotator.py +900 -767
  19. celldetective/gui/classifier_widget.py +642 -554
  20. celldetective/gui/configure_new_exp.py +777 -671
  21. celldetective/gui/control_panel.py +635 -524
  22. celldetective/gui/dynamic_progress.py +449 -0
  23. celldetective/gui/event_annotator.py +2023 -1662
  24. celldetective/gui/generic_signal_plot.py +1292 -944
  25. celldetective/gui/gui_utils.py +899 -1289
  26. celldetective/gui/interactions_block.py +658 -0
  27. celldetective/gui/interactive_timeseries_viewer.py +447 -0
  28. celldetective/gui/json_readers.py +48 -15
  29. celldetective/gui/layouts/__init__.py +5 -0
  30. celldetective/gui/layouts/background_model_free_layout.py +537 -0
  31. celldetective/gui/layouts/channel_offset_layout.py +134 -0
  32. celldetective/gui/layouts/local_correction_layout.py +91 -0
  33. celldetective/gui/layouts/model_fit_layout.py +372 -0
  34. celldetective/gui/layouts/operation_layout.py +68 -0
  35. celldetective/gui/layouts/protocol_designer_layout.py +96 -0
  36. celldetective/gui/pair_event_annotator.py +3130 -2435
  37. celldetective/gui/plot_measurements.py +586 -267
  38. celldetective/gui/plot_signals_ui.py +724 -506
  39. celldetective/gui/preprocessing_block.py +395 -0
  40. celldetective/gui/process_block.py +1678 -1831
  41. celldetective/gui/seg_model_loader.py +580 -473
  42. celldetective/gui/settings/__init__.py +0 -7
  43. celldetective/gui/settings/_cellpose_model_params.py +181 -0
  44. celldetective/gui/settings/_event_detection_model_params.py +95 -0
  45. celldetective/gui/settings/_segmentation_model_params.py +159 -0
  46. celldetective/gui/settings/_settings_base.py +77 -65
  47. celldetective/gui/settings/_settings_event_model_training.py +752 -526
  48. celldetective/gui/settings/_settings_measurements.py +1133 -964
  49. celldetective/gui/settings/_settings_neighborhood.py +574 -488
  50. celldetective/gui/settings/_settings_segmentation_model_training.py +779 -564
  51. celldetective/gui/settings/_settings_signal_annotator.py +329 -305
  52. celldetective/gui/settings/_settings_tracking.py +1304 -1094
  53. celldetective/gui/settings/_stardist_model_params.py +98 -0
  54. celldetective/gui/survival_ui.py +422 -312
  55. celldetective/gui/tableUI.py +1665 -1700
  56. celldetective/gui/table_ops/_maths.py +295 -0
  57. celldetective/gui/table_ops/_merge_groups.py +140 -0
  58. celldetective/gui/table_ops/_merge_one_hot.py +95 -0
  59. celldetective/gui/table_ops/_query_table.py +43 -0
  60. celldetective/gui/table_ops/_rename_col.py +44 -0
  61. celldetective/gui/thresholds_gui.py +382 -179
  62. celldetective/gui/viewers/__init__.py +0 -0
  63. celldetective/gui/viewers/base_viewer.py +700 -0
  64. celldetective/gui/viewers/channel_offset_viewer.py +331 -0
  65. celldetective/gui/viewers/contour_viewer.py +394 -0
  66. celldetective/gui/viewers/size_viewer.py +153 -0
  67. celldetective/gui/viewers/spot_detection_viewer.py +341 -0
  68. celldetective/gui/viewers/threshold_viewer.py +309 -0
  69. celldetective/gui/workers.py +304 -126
  70. celldetective/log_manager.py +92 -0
  71. celldetective/measure.py +1895 -1478
  72. celldetective/napari/__init__.py +0 -0
  73. celldetective/napari/utils.py +1025 -0
  74. celldetective/neighborhood.py +1914 -1448
  75. celldetective/preprocessing.py +1620 -1220
  76. celldetective/processes/__init__.py +0 -0
  77. celldetective/processes/background_correction.py +271 -0
  78. celldetective/processes/compute_neighborhood.py +894 -0
  79. celldetective/processes/detect_events.py +246 -0
  80. celldetective/processes/measure_cells.py +565 -0
  81. celldetective/processes/segment_cells.py +760 -0
  82. celldetective/processes/track_cells.py +435 -0
  83. celldetective/processes/train_segmentation_model.py +694 -0
  84. celldetective/processes/train_signal_model.py +265 -0
  85. celldetective/processes/unified_process.py +292 -0
  86. celldetective/regionprops/_regionprops.py +358 -317
  87. celldetective/relative_measurements.py +987 -710
  88. celldetective/scripts/measure_cells.py +313 -212
  89. celldetective/scripts/measure_relative.py +90 -46
  90. celldetective/scripts/segment_cells.py +165 -104
  91. celldetective/scripts/segment_cells_thresholds.py +96 -68
  92. celldetective/scripts/track_cells.py +198 -149
  93. celldetective/scripts/train_segmentation_model.py +324 -201
  94. celldetective/scripts/train_signal_model.py +87 -45
  95. celldetective/segmentation.py +844 -749
  96. celldetective/signals.py +3514 -2861
  97. celldetective/tracking.py +1332 -1011
  98. celldetective/utils/__init__.py +0 -0
  99. celldetective/utils/cellpose_utils/__init__.py +133 -0
  100. celldetective/utils/color_mappings.py +42 -0
  101. celldetective/utils/data_cleaning.py +630 -0
  102. celldetective/utils/data_loaders.py +450 -0
  103. celldetective/utils/dataset_helpers.py +207 -0
  104. celldetective/utils/downloaders.py +197 -0
  105. celldetective/utils/event_detection/__init__.py +8 -0
  106. celldetective/utils/experiment.py +1782 -0
  107. celldetective/utils/image_augmenters.py +308 -0
  108. celldetective/utils/image_cleaning.py +74 -0
  109. celldetective/utils/image_loaders.py +926 -0
  110. celldetective/utils/image_transforms.py +335 -0
  111. celldetective/utils/io.py +62 -0
  112. celldetective/utils/mask_cleaning.py +348 -0
  113. celldetective/utils/mask_transforms.py +5 -0
  114. celldetective/utils/masks.py +184 -0
  115. celldetective/utils/maths.py +351 -0
  116. celldetective/utils/model_getters.py +325 -0
  117. celldetective/utils/model_loaders.py +296 -0
  118. celldetective/utils/normalization.py +380 -0
  119. celldetective/utils/parsing.py +465 -0
  120. celldetective/utils/plots/__init__.py +0 -0
  121. celldetective/utils/plots/regression.py +53 -0
  122. celldetective/utils/resources.py +34 -0
  123. celldetective/utils/stardist_utils/__init__.py +104 -0
  124. celldetective/utils/stats.py +90 -0
  125. celldetective/utils/types.py +21 -0
  126. {celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/METADATA +1 -1
  127. celldetective-1.5.0b0.dist-info/RECORD +187 -0
  128. {celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/WHEEL +1 -1
  129. tests/gui/test_new_project.py +129 -117
  130. tests/gui/test_project.py +127 -79
  131. tests/test_filters.py +39 -15
  132. tests/test_notebooks.py +8 -0
  133. tests/test_tracking.py +425 -144
  134. tests/test_utils.py +123 -77
  135. celldetective/gui/base_components.py +0 -23
  136. celldetective/gui/layouts.py +0 -1602
  137. celldetective/gui/processes/compute_neighborhood.py +0 -594
  138. celldetective/gui/processes/measure_cells.py +0 -360
  139. celldetective/gui/processes/segment_cells.py +0 -499
  140. celldetective/gui/processes/track_cells.py +0 -303
  141. celldetective/gui/processes/train_segmentation_model.py +0 -270
  142. celldetective/gui/processes/train_signal_model.py +0 -108
  143. celldetective/gui/table_ops/merge_groups.py +0 -118
  144. celldetective/gui/viewers.py +0 -1354
  145. celldetective/io.py +0 -3663
  146. celldetective/utils.py +0 -3108
  147. celldetective-1.4.1.post1.dist-info/RECORD +0 -123
  148. /celldetective/{gui/processes → processes}/downloader.py +0 -0
  149. {celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/entry_points.txt +0 -0
  150. {celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/licenses/LICENSE +0 -0
  151. {celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/top_level.txt +0 -0
celldetective/tracking.py CHANGED
@@ -7,1044 +7,1365 @@ from btrack.io.utils import localizations_to_objects
7
7
  from btrack import BayesianTracker
8
8
 
9
9
  from celldetective.measure import measure_features
10
- from celldetective.utils import rename_intensity_column, velocity_per_track
11
- from celldetective.io import interpret_tracking_configuration
10
+ from celldetective.utils.maths import velocity_per_track
11
+ from celldetective.utils.data_cleaning import rename_intensity_column
12
+ from celldetective.utils.data_loaders import interpret_tracking_configuration
12
13
 
13
14
  import os
14
15
  import subprocess
15
16
  import trackpy as tp
16
17
 
17
- abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0],'celldetective'])
18
-
19
- def track(labels, configuration=None, stack=None, spatial_calibration=1, features=None, channel_names=None,
20
- haralick_options=None, return_napari_data=False, view_on_napari=False, mask_timepoints=None, mask_channels=None, volume=(2048,2048),
21
- optimizer_options = {'tm_lim': int(12e4)}, track_kwargs={'step_size': 100}, objects=None,
22
- clean_trajectories_kwargs=None, btrack_option=True, search_range=None, memory=None,column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
23
- ):
24
-
25
- """
26
-
27
- Perform cell tracking on segmented labels using the bTrack library.
28
-
29
- Parameters
30
- ----------
31
- labels : ndarray
32
- The segmented labels representing cell objects.
33
- configuration : Configuration or None
34
- The bTrack configuration object. If None, a default configuration is used.
35
- stack : ndarray or None, optional
36
- The image stack corresponding to the labels. Default is None.
37
- spatial_calibration : float, optional
38
- The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
39
- features : list or None, optional
40
- The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
41
- channel_names : list or None, optional
42
- The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
43
- Default is None.
44
- haralick_options : dict or None, optional
45
- The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
46
- return_napari_data : bool, optional
47
- Whether to return the napari data dictionary along with the DataFrame. Default is False.
48
- view_on_napari : bool, optional
49
- Whether to view the tracking results on napari. Default is False.
50
- optimizer_options : dict, optional
51
- The options for the optimizer. Default is {'tm_lim': int(12e4)}.
52
- track_kwargs : dict, optional
53
- Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
54
- clean_trajectories_kwargs : dict or None, optional
55
- Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
56
- Default is None.
57
- column_labels : dict, optional
58
- The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
59
-
60
- Returns
61
- -------
62
- DataFrame or tuple
63
- If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
64
- containing the DataFrame and the napari data dictionary.
65
-
66
- Notes
67
- -----
68
- This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
69
- the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
70
- clean_trajectories function. If specified, the tracking results can be visualized on napari.
71
-
72
- Examples
73
- --------
74
- >>> labels = np.array([[1, 1, 2, 2, 0, 0],
75
- [1, 1, 1, 2, 2, 0],
76
- [0, 0, 1, 2, 0, 0]])
77
- >>> configuration = cell_config()
78
- >>> stack = np.random.rand(3, 6)
79
- >>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
80
- >>> df.head()
81
-
82
- TRACK_ID FRAME POSITION_Y POSITION_X
83
- 0 0 0 0.0 0.0
84
- 1 0 1 0.0 0.0
85
- 2 0 2 0.0 0.0
86
- 3 1 0 0.5 0.5
87
- 4 1 1 0.5 0.5
88
-
89
- """
90
-
91
- configuration = interpret_tracking_configuration(configuration)
92
-
93
- if objects is None:
94
-
95
- if not btrack_option:
96
- features = []
97
- channel_names = None
98
- haralick_options = None
99
-
100
- objects = extract_objects_and_features(labels, stack, features,
101
- channel_names=channel_names,
102
- haralick_options=haralick_options,
103
- mask_timepoints=mask_timepoints,
104
- mask_channels=mask_channels,
105
- )
106
-
107
- if btrack_option:
108
- columns = list(objects.columns)
109
- to_remove = ['x','y','class_id','t']
110
- for tr in to_remove:
111
- try:
112
- columns.remove(tr)
113
- except:
114
- print(f'column {tr} could not be found...')
115
-
116
- scaler = StandardScaler()
117
- if columns:
118
- x = objects[columns].values
119
- x_scaled = scaler.fit_transform(x)
120
- df_temp = pd.DataFrame(x_scaled, columns=columns, index = objects.index)
121
- objects[columns] = df_temp
122
- else:
123
- print('Warning: no features were passed to bTrack...')
124
-
125
- # 2) track the objects
126
- new_btrack_objects = localizations_to_objects(objects)
127
-
128
- with BayesianTracker() as tracker:
129
-
130
- tracker.configure(configuration)
131
-
132
- if columns:
133
- tracking_updates = ["motion","visual"]
134
- #tracker.tracking_updates = ["motion","visual"]
135
- tracker.features = columns
136
- else:
137
- tracking_updates = ["motion"]
138
-
139
- tracker.append(new_btrack_objects)
140
- tracker.volume = ((0,volume[0]), (0,volume[1]), (-1e5, 1e5)) #(-1e5, 1e5)
141
- #print(tracker.volume)
142
- tracker.track(tracking_updates=tracking_updates, **track_kwargs)
143
- tracker.optimize(options=optimizer_options)
144
-
145
- data, properties, graph = tracker.to_napari() #ndim=2
146
- # do the table post processing and napari options
147
- if data.shape[1]==4:
148
- df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],column_labels['y'],column_labels['x']])
149
- elif data.shape[1]==5:
150
- df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],"z",column_labels['y'],column_labels['x']])
151
- df = df.drop(columns=['z'])
152
- df[column_labels['x']+'_um'] = df[column_labels['x']]*spatial_calibration
153
- df[column_labels['y']+'_um'] = df[column_labels['y']]*spatial_calibration
154
-
155
- else:
156
- properties = None
157
- graph = {}
158
- print(f"{objects=} {objects.columns=}")
159
- objects = objects.rename(columns={"t": "frame"})
160
- if search_range is not None and memory is not None:
161
- data = tp.link(objects, search_range, memory=memory,link_strategy='auto')
162
- else:
163
- print('Please provide a valid search range and memory value...')
164
- return None
165
- data['particle'] = data['particle'] + 1 # force track id to start at 1
166
- df = data.rename(columns={'frame': column_labels['time'], 'x': column_labels['x'], 'y': column_labels['y'], 'particle': column_labels['track']})
167
- df['state'] = 5.0; df['generation'] = 0.0; df['root'] = 1.0; df['parent'] = 1.0; df['dummy'] = False; df['z'] = 0.0;
168
- data = df[[column_labels['track'],column_labels['time'],"z",column_labels['y'],column_labels['x']]].to_numpy()
169
- print(f"{df=}")
170
-
171
- if btrack_option:
172
- df = df.merge(pd.DataFrame(properties),left_index=True, right_index=True)
173
- if columns:
174
- x = df[columns].values
175
- x_scaled = scaler.inverse_transform(x)
176
- df_temp = pd.DataFrame(x_scaled, columns=columns, index = df.index)
177
- df[columns] = df_temp
178
-
179
- # set dummy features to NaN
180
- df.loc[df['dummy'],['class_id']+columns] = np.nan
181
-
182
- df = df.sort_values(by=[column_labels['track'],column_labels['time']])
183
- df = velocity_per_track(df, window_size=3, mode='bi')
184
-
185
- if channel_names is not None:
186
- df = rename_intensity_column(df, channel_names)
187
-
188
- df = write_first_detection_class(df, img_shape=volume, column_labels=column_labels)
189
-
190
- if clean_trajectories_kwargs is not None:
191
- df = clean_trajectories(df.copy(),**clean_trajectories_kwargs)
192
-
193
- df.loc[df["status_firstdetection"].isna(), "status_firstdetection"] = 0
194
- df['ID'] = np.arange(len(df)).astype(int)
195
-
196
- invalid_cols = [c for c in list(df.columns) if c.startswith('Unnamed')]
197
- if len(invalid_cols)>0:
198
- df = df.drop(invalid_cols, axis=1)
199
-
200
- # if view_on_napari:
201
- # view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
202
-
203
- if return_napari_data:
204
- napari_data = {"data": data, "properties": properties, "graph": graph}
205
- return df, napari_data
206
- else:
207
- return df
208
-
209
- def extract_objects_and_features(labels, stack, features, channel_names=None, haralick_options=None, mask_timepoints=None, mask_channels=None):
210
-
211
- """
212
-
213
- Extract objects and features from segmented labels and image stack.
214
-
215
- Parameters
216
- ----------
217
- labels : ndarray
218
- The segmented labels representing cell objects.
219
- stack : ndarray
220
- The image stack corresponding to the labels.
221
- features : list or None
222
- The list of features to extract from the objects. If None, no additional features are extracted.
223
- channel_names : list or None, optional
224
- The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
225
- haralick_options : dict or None, optional
226
- The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
227
- mask_timepoints : list of None, optionak
228
- Frames to hide during tracking.
229
- Returns
230
- -------
231
- DataFrame
232
- The DataFrame containing the extracted object features.
233
-
234
- Notes
235
- -----
236
- This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
237
- labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
238
- be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
239
-
240
- Examples
241
- --------
242
- >>> labels = np.array([[1, 1, 2, 2, 0, 0],
243
- [1, 1, 1, 2, 2, 0],
244
- [0, 0, 1, 2, 0, 0]])
245
- >>> stack = np.random.rand(3, 6, 3)
246
- >>> features = ['area', 'mean_intensity']
247
- >>> df = extract_objects_and_features(labels, stack, features)
248
-
249
- """
250
-
251
- if features is None:
252
- features = []
253
-
254
- if stack is None:
255
- haralick_options = None
256
-
257
- if mask_timepoints is not None:
258
- for f in mask_timepoints:
259
- labels[f] = 0.
260
-
261
- nbr_frames = len(labels)
262
- timestep_dataframes = []
263
-
264
- for t in tqdm(range(nbr_frames),desc='frame'):
265
-
266
- if stack is not None:
267
- img = stack[t]
268
- else:
269
- img = None
270
-
271
- if (haralick_options is not None) and (t==0) and (stack is not None):
272
- if not 'percentiles' in haralick_options:
273
- haralick_options.update({'percentiles': (0.01,99.99)})
274
- if not 'target_channel' in haralick_options:
275
- haralick_options.update({'target_channel': 0})
276
- haralick_percentiles = haralick_options['percentiles']
277
- haralick_channel_index = haralick_options['target_channel']
278
- min_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[0])
279
- max_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[1])
280
- haralick_options.update({'clip_values': (min_value, max_value)})
281
-
282
- df_props = measure_features(img, labels[t], features = features+['centroid'], border_dist=None,
283
- channels=channel_names, haralick_options=haralick_options, verbose=False)
284
- df_props.rename(columns={'centroid-1': 'x', 'centroid-0': 'y'},inplace=True)
285
- df_props['t'] = int(t)
286
- timestep_dataframes.append(df_props)
287
-
288
- df = pd.concat(timestep_dataframes)
289
- df.reset_index(inplace=True, drop=True)
290
-
291
- if mask_channels is not None:
292
- cols_to_drop = []
293
- for mc in mask_channels:
294
- columns = df.columns
295
- col_contains = [mc in c for c in columns]
296
- to_remove = np.array(columns)[np.array(col_contains)]
297
- cols_to_drop.extend(to_remove)
298
- if len(cols_to_drop)>0:
299
- df = df.drop(cols_to_drop, axis=1)
300
-
301
- return df
302
-
303
-
304
- def clean_trajectories(trajectories,remove_not_in_first=False,remove_not_in_last=False,
305
- minimum_tracklength=0, interpolate_position_gaps=False,
306
- extrapolate_tracks_post=False,
307
- extrapolate_tracks_pre=False,
308
- interpolate_na=False,
309
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
310
-
311
- """
312
- Clean trajectories by applying various cleaning operations.
313
-
314
- Parameters
315
- ----------
316
- trajectories : pandas.DataFrame
317
- The input DataFrame containing trajectory data.
318
- remove_not_in_first : bool, optional
319
- Flag indicating whether to remove tracks not present in the first frame.
320
- Defaults to True.
321
- remove_not_in_last : bool, optional
322
- Flag indicating whether to remove tracks not present in the last frame.
323
- Defaults to True.
324
- minimum_tracklength : int, optional
325
- The minimum length of a track to be retained.
326
- Defaults to 0.
327
- interpolate_position_gaps : bool, optional
328
- Flag indicating whether to interpolate position gaps in tracks.
329
- Defaults to True.
330
- extrapolate_tracks_post : bool, optional
331
- Flag indicating whether to extrapolate tracks after the last known position.
332
- Defaults to True.
333
- extrapolate_tracks_pre : bool, optional
334
- Flag indicating whether to extrapolate tracks before the first known position.
335
- Defaults to False.
336
- interpolate_na : bool, optional
337
- Flag indicating whether to interpolate missing values in tracks.
338
- Defaults to False.
339
- column_labels : dict, optional
340
- Dictionary specifying the column labels used in the input DataFrame.
341
- The keys represent the following column labels:
342
- - 'track': The column label for the track ID.
343
- - 'time': The column label for the timestamp.
344
- - 'x': The column label for the x-coordinate.
345
- - 'y': The column label for the y-coordinate.
346
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
347
-
348
- Returns
349
- -------
350
- pandas.DataFrame
351
- The cleaned DataFrame with trajectories.
352
-
353
- Notes
354
- -----
355
- This function applies various cleaning operations to the input DataFrame containing trajectory data.
356
- The cleaning operations include:
357
- - Filtering tracks based on their endpoints.
358
- - Filtering tracks based on their length.
359
- - Interpolating position gaps in tracks.
360
- - Extrapolating tracks after the last known position.
361
- - Extrapolating tracks before the first known position.
362
- - Interpolating missing values in tracks.
363
-
364
- The input DataFrame is expected to have the following columns:
365
- - track: The unique ID of each track.
366
- - time: The timestamp of each data point.
367
- - x: The x-coordinate of each data point.
368
- - y: The y-coordinate of each data point.
369
-
370
- Examples
371
- --------
372
- >>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
373
- ... minimum_tracklength=10, interpolate_position_gaps=True,
374
- ... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
375
- ... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
376
- >>> print(cleaned_data.head())
377
-
378
- """
379
-
380
- trajectories.reset_index
381
- trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
382
-
383
- if minimum_tracklength>0:
384
- trajectories = filter_by_tracklength(trajectories.copy(), minimum_tracklength, track_label=column_labels['track'])
385
-
386
- if np.any([remove_not_in_first, remove_not_in_last]):
387
- trajectories = filter_by_endpoints(trajectories.copy(), remove_not_in_first=remove_not_in_first,
388
- remove_not_in_last=remove_not_in_last, column_labels=column_labels)
389
-
390
- if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
391
- trajectories = extrapolate_tracks(trajectories.copy(), post=extrapolate_tracks_post,
392
- pre=extrapolate_tracks_pre, column_labels=column_labels)
393
-
394
- if interpolate_position_gaps:
395
- trajectories = interpolate_time_gaps(trajectories.copy(), column_labels=column_labels)
396
-
397
- if interpolate_na:
398
- trajectories = interpolate_nan_properties(trajectories.copy(), track_label=column_labels['track'])
399
-
400
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
401
- trajectories.reset_index(inplace=True, drop=True)
402
-
403
- if 'class_firstdetection' in list(trajectories.columns):
404
- for tid, track_group in trajectories.groupby(column_labels['track']):
405
- indices = track_group.index
406
-
407
- class_values = np.array(track_group['class_firstdetection'].unique())
408
- class_values = class_values[class_values==class_values]
409
- t_values = np.array(track_group['t_firstdetection'].unique())
410
- t_values = t_values[t_values==t_values]
411
- if len(class_values)==0:
412
- class_values = 2
413
- t_values = -1
414
- else:
415
- class_values = class_values[0]
416
- t_values = t_values[0]
417
-
418
- trajectories.loc[indices, 'class_firstdetection'] = class_values
419
- trajectories.loc[indices, 't_firstdetection'] = t_values
420
-
421
- return trajectories
18
+ abs_path = os.sep.join(
19
+ [os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], "celldetective"]
20
+ )
21
+
22
+
23
+ def track(
24
+ labels,
25
+ configuration=None,
26
+ stack=None,
27
+ spatial_calibration=1,
28
+ features=None,
29
+ channel_names=None,
30
+ haralick_options=None,
31
+ return_napari_data=False,
32
+ view_on_napari=False,
33
+ mask_timepoints=None,
34
+ mask_channels=None,
35
+ volume=(2048, 2048),
36
+ optimizer_options={"tm_lim": int(12e4)},
37
+ track_kwargs={"step_size": 100},
38
+ objects=None,
39
+ clean_trajectories_kwargs=None,
40
+ btrack_option=True,
41
+ search_range=None,
42
+ memory=None,
43
+ column_labels={
44
+ "track": "TRACK_ID",
45
+ "time": "FRAME",
46
+ "x": "POSITION_X",
47
+ "y": "POSITION_Y",
48
+ },
49
+ ):
50
+ """
51
+
52
+ Perform cell tracking on segmented labels using the bTrack library.
53
+
54
+ Parameters
55
+ ----------
56
+ labels : ndarray
57
+ The segmented labels representing cell objects.
58
+ configuration : Configuration or None
59
+ The bTrack configuration object. If None, a default configuration is used.
60
+ stack : ndarray or None, optional
61
+ The image stack corresponding to the labels. Default is None.
62
+ spatial_calibration : float, optional
63
+ The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
64
+ features : list or None, optional
65
+ The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
66
+ channel_names : list or None, optional
67
+ The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
68
+ Default is None.
69
+ haralick_options : dict or None, optional
70
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
71
+ return_napari_data : bool, optional
72
+ Whether to return the napari data dictionary along with the DataFrame. Default is False.
73
+ view_on_napari : bool, optional
74
+ Whether to view the tracking results on napari. Default is False.
75
+ optimizer_options : dict, optional
76
+ The options for the optimizer. Default is {'tm_lim': int(12e4)}.
77
+ track_kwargs : dict, optional
78
+ Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
79
+ clean_trajectories_kwargs : dict or None, optional
80
+ Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
81
+ Default is None.
82
+ column_labels : dict, optional
83
+ The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
84
+
85
+ Returns
86
+ -------
87
+ DataFrame or tuple
88
+ If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
89
+ containing the DataFrame and the napari data dictionary.
90
+
91
+ Notes
92
+ -----
93
+ This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
94
+ the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
95
+ clean_trajectories function. If specified, the tracking results can be visualized on napari.
96
+
97
+ Examples
98
+ --------
99
+
100
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
101
+ [1, 1, 1, 2, 2, 0],
102
+ [0, 0, 1, 2, 0, 0]])
103
+ >>> configuration = cell_config()
104
+ >>> stack = np.random.rand(3, 6)
105
+ >>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
106
+ >>> df.head()
107
+
108
+ TRACK_ID FRAME POSITION_Y POSITION_X
109
+ 0 0 0 0.0 0.0
110
+ 1 0 1 0.0 0.0
111
+ 2 0 2 0.0 0.0
112
+ 3 1 0 0.5 0.5
113
+ 4 1 1 0.5 0.5
114
+
115
+ """
116
+
117
+ configuration = interpret_tracking_configuration(configuration)
118
+
119
+ if objects is None:
120
+
121
+ if not btrack_option:
122
+ features = []
123
+ channel_names = None
124
+ haralick_options = None
125
+
126
+ objects = extract_objects_and_features(
127
+ labels,
128
+ stack,
129
+ features,
130
+ channel_names=channel_names,
131
+ haralick_options=haralick_options,
132
+ mask_timepoints=mask_timepoints,
133
+ mask_channels=mask_channels,
134
+ )
135
+
136
+ if btrack_option:
137
+ columns = list(objects.columns)
138
+ to_remove = ["x", "y", "class_id", "t"]
139
+ for tr in to_remove:
140
+ try:
141
+ columns.remove(tr)
142
+ except:
143
+ print(f"column {tr} could not be found...")
144
+
145
+ scaler = StandardScaler()
146
+ if columns:
147
+ x = objects[columns].values
148
+ x_scaled = scaler.fit_transform(x)
149
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index=objects.index)
150
+ objects[columns] = df_temp
151
+ else:
152
+ print("Warning: no features were passed to bTrack...")
153
+
154
+ # 2) track the objects
155
+ new_btrack_objects = localizations_to_objects(objects)
156
+
157
+ with BayesianTracker() as tracker:
158
+
159
+ tracker.configure(configuration)
160
+
161
+ if columns:
162
+ tracking_updates = ["motion", "visual"]
163
+ # tracker.tracking_updates = ["motion","visual"]
164
+ tracker.features = columns
165
+ else:
166
+ tracking_updates = ["motion"]
167
+
168
+ tracker.append(new_btrack_objects)
169
+ tracker.volume = (
170
+ (0, volume[0]),
171
+ (0, volume[1]),
172
+ (-1e5, 1e5),
173
+ ) # (-1e5, 1e5)
174
+ # print(tracker.volume)
175
+ tracker.track(tracking_updates=tracking_updates, **track_kwargs)
176
+ tracker.optimize(options=optimizer_options)
177
+
178
+ data, properties, graph = tracker.to_napari() # ndim=2
179
+ # do the table post processing and napari options
180
+ if data.shape[1] == 4:
181
+ df = pd.DataFrame(
182
+ data,
183
+ columns=[
184
+ column_labels["track"],
185
+ column_labels["time"],
186
+ column_labels["y"],
187
+ column_labels["x"],
188
+ ],
189
+ )
190
+ elif data.shape[1] == 5:
191
+ df = pd.DataFrame(
192
+ data,
193
+ columns=[
194
+ column_labels["track"],
195
+ column_labels["time"],
196
+ "z",
197
+ column_labels["y"],
198
+ column_labels["x"],
199
+ ],
200
+ )
201
+ df = df.drop(columns=["z"])
202
+ df[column_labels["x"] + "_um"] = df[column_labels["x"]] * spatial_calibration
203
+ df[column_labels["y"] + "_um"] = df[column_labels["y"]] * spatial_calibration
204
+
205
+ else:
206
+ properties = None
207
+ graph = {}
208
+ print(f"{objects=} {objects.columns=}")
209
+ objects = objects.rename(columns={"t": "frame"})
210
+ if search_range is not None and memory is not None:
211
+ data = tp.link(objects, search_range, memory=memory, link_strategy="auto")
212
+ else:
213
+ print("Please provide a valid search range and memory value...")
214
+ return None
215
+ data["particle"] = data["particle"] + 1 # force track id to start at 1
216
+ df = data.rename(
217
+ columns={
218
+ "frame": column_labels["time"],
219
+ "x": column_labels["x"],
220
+ "y": column_labels["y"],
221
+ "particle": column_labels["track"],
222
+ }
223
+ )
224
+ df["state"] = 5.0
225
+ df["generation"] = 0.0
226
+ df["root"] = 1.0
227
+ df["parent"] = 1.0
228
+ df["dummy"] = False
229
+ df["z"] = 0.0
230
+ data = df[
231
+ [
232
+ column_labels["track"],
233
+ column_labels["time"],
234
+ "z",
235
+ column_labels["y"],
236
+ column_labels["x"],
237
+ ]
238
+ ].to_numpy()
239
+ print(f"{df=}")
240
+
241
+ if btrack_option:
242
+ df = df.merge(pd.DataFrame(properties), left_index=True, right_index=True)
243
+ if columns:
244
+ x = df[columns].values
245
+ x_scaled = scaler.inverse_transform(x)
246
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index=df.index)
247
+ df[columns] = df_temp
248
+
249
+ # set dummy features to NaN
250
+ df.loc[df["dummy"], ["class_id"] + columns] = np.nan
251
+
252
+ df = df.sort_values(by=[column_labels["track"], column_labels["time"]])
253
+ df = velocity_per_track(df, window_size=3, mode="bi")
254
+
255
+ if channel_names is not None:
256
+ df = rename_intensity_column(df, channel_names)
257
+
258
+ df = write_first_detection_class(df, img_shape=volume, column_labels=column_labels)
259
+
260
+ if clean_trajectories_kwargs is not None:
261
+ df = clean_trajectories(df.copy(), **clean_trajectories_kwargs)
262
+
263
+ df.loc[df["status_firstdetection"].isna(), "status_firstdetection"] = 0
264
+ df["ID"] = np.arange(len(df)).astype(int)
265
+
266
+ invalid_cols = [c for c in list(df.columns) if c.startswith("Unnamed")]
267
+ if len(invalid_cols) > 0:
268
+ df = df.drop(invalid_cols, axis=1)
269
+
270
+ # if view_on_napari:
271
+ # view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
272
+
273
+ if return_napari_data:
274
+ napari_data = {"data": data, "properties": properties, "graph": graph}
275
+ return df, napari_data
276
+ else:
277
+ return df
278
+
279
+
280
+ def extract_objects_and_features(
281
+ labels,
282
+ stack,
283
+ features,
284
+ channel_names=None,
285
+ haralick_options=None,
286
+ mask_timepoints=None,
287
+ mask_channels=None,
288
+ ):
289
+ """
290
+
291
+ Extract objects and features from segmented labels and image stack.
292
+
293
+ Parameters
294
+ ----------
295
+ labels : ndarray
296
+ The segmented labels representing cell objects.
297
+ stack : ndarray
298
+ The image stack corresponding to the labels.
299
+ features : list or None
300
+ The list of features to extract from the objects. If None, no additional features are extracted.
301
+ channel_names : list or None, optional
302
+ The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
303
+ haralick_options : dict or None, optional
304
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
305
+ mask_timepoints : list of None, optionak
306
+ Frames to hide during tracking.
307
+ Returns
308
+ -------
309
+ DataFrame
310
+ The DataFrame containing the extracted object features.
311
+
312
+ Notes
313
+ -----
314
+ This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
315
+ labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
316
+ be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
317
+
318
+ Examples
319
+ --------
320
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
321
+ [1, 1, 1, 2, 2, 0],
322
+ [0, 0, 1, 2, 0, 0]])
323
+ >>> stack = np.random.rand(3, 6, 3)
324
+ >>> features = ['area', 'mean_intensity']
325
+ >>> df = extract_objects_and_features(labels, stack, features)
326
+
327
+ """
328
+
329
+ if features is None:
330
+ features = []
331
+
332
+ if stack is None:
333
+ haralick_options = None
334
+
335
+ if mask_timepoints is not None:
336
+ for f in mask_timepoints:
337
+ labels[f] = 0.0
338
+
339
+ nbr_frames = len(labels)
340
+ timestep_dataframes = []
341
+
342
+ for t in tqdm(range(nbr_frames), desc="frame"):
343
+
344
+ if stack is not None:
345
+ img = stack[t]
346
+ else:
347
+ img = None
348
+
349
+ if (haralick_options is not None) and (t == 0) and (stack is not None):
350
+ if not "percentiles" in haralick_options:
351
+ haralick_options.update({"percentiles": (0.01, 99.99)})
352
+ if not "target_channel" in haralick_options:
353
+ haralick_options.update({"target_channel": 0})
354
+ haralick_percentiles = haralick_options["percentiles"]
355
+ haralick_channel_index = haralick_options["target_channel"]
356
+ min_value = np.nanpercentile(
357
+ img[:, :, haralick_channel_index].flatten(), haralick_percentiles[0]
358
+ )
359
+ max_value = np.nanpercentile(
360
+ img[:, :, haralick_channel_index].flatten(), haralick_percentiles[1]
361
+ )
362
+ haralick_options.update({"clip_values": (min_value, max_value)})
363
+
364
+ df_props = measure_features(
365
+ img,
366
+ labels[t],
367
+ features=features + ["centroid"],
368
+ border_dist=None,
369
+ channels=channel_names,
370
+ haralick_options=haralick_options,
371
+ verbose=False,
372
+ )
373
+ df_props.rename(columns={"centroid-1": "x", "centroid-0": "y"}, inplace=True)
374
+ df_props["t"] = int(t)
375
+ timestep_dataframes.append(df_props)
376
+
377
+ df = pd.concat(timestep_dataframes)
378
+ df.reset_index(inplace=True, drop=True)
379
+
380
+ if mask_channels is not None:
381
+ cols_to_drop = []
382
+ for mc in mask_channels:
383
+ columns = df.columns
384
+ col_contains = [mc in c for c in columns]
385
+ to_remove = np.array(columns)[np.array(col_contains)]
386
+ cols_to_drop.extend(to_remove)
387
+ if len(cols_to_drop) > 0:
388
+ df = df.drop(cols_to_drop, axis=1)
389
+
390
+ return df
391
+
392
+
393
+ def clean_trajectories(
394
+ trajectories,
395
+ remove_not_in_first=False,
396
+ remove_not_in_last=False,
397
+ minimum_tracklength=0,
398
+ interpolate_position_gaps=False,
399
+ extrapolate_tracks_post=False,
400
+ extrapolate_tracks_pre=False,
401
+ interpolate_na=False,
402
+ column_labels={
403
+ "track": "TRACK_ID",
404
+ "time": "FRAME",
405
+ "x": "POSITION_X",
406
+ "y": "POSITION_Y",
407
+ },
408
+ ):
409
+ """
410
+ Clean trajectories by applying various cleaning operations.
411
+
412
+ Parameters
413
+ ----------
414
+ trajectories : pandas.DataFrame
415
+ The input DataFrame containing trajectory data.
416
+ remove_not_in_first : bool, optional
417
+ Flag indicating whether to remove tracks not present in the first frame.
418
+ Defaults to True.
419
+ remove_not_in_last : bool, optional
420
+ Flag indicating whether to remove tracks not present in the last frame.
421
+ Defaults to True.
422
+ minimum_tracklength : int, optional
423
+ The minimum length of a track to be retained.
424
+ Defaults to 0.
425
+ interpolate_position_gaps : bool, optional
426
+ Flag indicating whether to interpolate position gaps in tracks.
427
+ Defaults to True.
428
+ extrapolate_tracks_post : bool, optional
429
+ Flag indicating whether to extrapolate tracks after the last known position.
430
+ Defaults to True.
431
+ extrapolate_tracks_pre : bool, optional
432
+ Flag indicating whether to extrapolate tracks before the first known position.
433
+ Defaults to False.
434
+ interpolate_na : bool, optional
435
+ Flag indicating whether to interpolate missing values in tracks.
436
+ Defaults to False.
437
+ column_labels : dict, optional
438
+ Dictionary specifying the column labels used in the input DataFrame.
439
+ The keys represent the following column labels:
440
+ - 'track': The column label for the track ID.
441
+ - 'time': The column label for the timestamp.
442
+ - 'x': The column label for the x-coordinate.
443
+ - 'y': The column label for the y-coordinate.
444
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
445
+
446
+ Returns
447
+ -------
448
+ pandas.DataFrame
449
+ The cleaned DataFrame with trajectories.
450
+
451
+ Notes
452
+ -----
453
+ This function applies various cleaning operations to the input DataFrame containing trajectory data.
454
+ The cleaning operations include:
455
+ - Filtering tracks based on their endpoints.
456
+ - Filtering tracks based on their length.
457
+ - Interpolating position gaps in tracks.
458
+ - Extrapolating tracks after the last known position.
459
+ - Extrapolating tracks before the first known position.
460
+ - Interpolating missing values in tracks.
461
+
462
+ The input DataFrame is expected to have the following columns:
463
+ - track: The unique ID of each track.
464
+ - time: The timestamp of each data point.
465
+ - x: The x-coordinate of each data point.
466
+ - y: The y-coordinate of each data point.
467
+
468
+ Examples
469
+ --------
470
+ >>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
471
+ ... minimum_tracklength=10, interpolate_position_gaps=True,
472
+ ... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
473
+ ... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
474
+ >>> print(cleaned_data.head())
475
+
476
+ """
477
+
478
+ trajectories.reset_index(drop=True, inplace=True)
479
+ trajectories.sort_values(
480
+ by=[column_labels["track"], column_labels["time"]], inplace=True
481
+ )
482
+
483
+ if np.any([remove_not_in_first, remove_not_in_last]):
484
+ trajectories = filter_by_endpoints(
485
+ trajectories.copy(),
486
+ remove_not_in_first=remove_not_in_first,
487
+ remove_not_in_last=remove_not_in_last,
488
+ column_labels=column_labels,
489
+ )
490
+
491
+ if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
492
+ trajectories = extrapolate_tracks(
493
+ trajectories.copy(),
494
+ post=extrapolate_tracks_post,
495
+ pre=extrapolate_tracks_pre,
496
+ column_labels=column_labels,
497
+ )
498
+
499
+ if interpolate_position_gaps:
500
+ trajectories = interpolate_time_gaps(
501
+ trajectories.copy(), column_labels=column_labels
502
+ )
503
+ # interpolate_time_gaps might leave TRACK_ID in index of some rows or overall
504
+ trajectories.reset_index(drop=True, inplace=True)
505
+
506
+ if interpolate_na:
507
+ trajectories = interpolate_nan_properties(
508
+ trajectories.copy(), track_label=column_labels["track"]
509
+ )
510
+ trajectories.reset_index(drop=True, inplace=True)
511
+
512
+ if minimum_tracklength > 0:
513
+ trajectories = filter_by_tracklength(
514
+ trajectories.copy(), minimum_tracklength, track_label=column_labels["track"]
515
+ )
516
+
517
+ trajectories = trajectories.sort_values(
518
+ by=[column_labels["track"], column_labels["time"]]
519
+ )
520
+ trajectories.reset_index(inplace=True, drop=True)
521
+
522
+ if "class_firstdetection" in list(trajectories.columns):
523
+ for tid, track_group in trajectories.groupby(column_labels["track"]):
524
+ indices = track_group.index
525
+
526
+ class_values = np.array(track_group["class_firstdetection"].unique())
527
+ class_values = class_values[class_values == class_values]
528
+ t_values = np.array(track_group["t_firstdetection"].unique())
529
+ t_values = t_values[t_values == t_values]
530
+ if len(class_values) == 0:
531
+ class_values = 2
532
+ t_values = -1
533
+ else:
534
+ class_values = class_values[0]
535
+ t_values = t_values[0]
536
+
537
+ trajectories.loc[indices, "class_firstdetection"] = class_values
538
+ trajectories.loc[indices, "t_firstdetection"] = t_values
539
+
540
+ return trajectories
541
+
422
542
 
423
543
  def interpolate_per_track(group_df):
544
+ """
545
+ Interpolate missing values within a track.
546
+
547
+ Parameters
548
+ ----------
549
+ group_df : pandas.DataFrame
550
+ The input DataFrame containing data for a single track.
424
551
 
425
- """
426
- Interpolate missing values within a track.
552
+ Returns
553
+ -------
554
+ pandas.DataFrame
555
+ The interpolated DataFrame with missing values filled.
427
556
 
428
- Parameters
429
- ----------
430
- group_df : pandas.DataFrame
431
- The input DataFrame containing data for a single track.
557
+ Notes
558
+ -----
559
+ This function performs linear interpolation to fill missing values within a track.
560
+ Missing values are interpolated based on the neighboring data points in the track.
432
561
 
433
- Returns
434
- -------
435
- pandas.DataFrame
436
- The interpolated DataFrame with missing values filled.
562
+ """
437
563
 
438
- Notes
439
- -----
440
- This function performs linear interpolation to fill missing values within a track.
441
- Missing values are interpolated based on the neighboring data points in the track.
564
+ for c in list(group_df.columns):
565
+ group_df_new_dtype = group_df[c].infer_objects(copy=False)
566
+ if group_df_new_dtype.dtype != "O":
567
+ group_df[c] = group_df_new_dtype.interpolate(
568
+ method="linear", limit_direction="both"
569
+ )
442
570
 
443
- """
571
+ # interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
444
572
 
445
- for c in list(group_df.columns):
446
- group_df_new_dtype = group_df[c].infer_objects(copy=False)
447
- if group_df_new_dtype.dtype!='O':
448
- group_df[c] = group_df_new_dtype.interpolate(method='linear',limit_direction="both")
449
-
450
- #interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
573
+ return group_df
451
574
 
452
- return group_df
453
575
 
454
576
  def interpolate_nan_properties(trajectories, track_label="TRACK_ID"):
577
+ """
578
+ Interpolate missing values within tracks in the input DataFrame.
579
+
580
+ Parameters
581
+ ----------
582
+ trajectories : pandas.DataFrame
583
+ The input DataFrame containing trajectory data.
584
+ track_label : str, optional
585
+ The column label for the track ID.
586
+ Defaults to "TRACK_ID".
587
+
588
+ Returns
589
+ -------
590
+ pandas.DataFrame
591
+ The DataFrame with missing values interpolated within tracks.
592
+
593
+ Notes
594
+ -----
595
+ This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
596
+ to interpolate missing values within each track.
597
+ Missing values are interpolated based on the neighboring data points in each track.
598
+
599
+ The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
600
+
601
+ Examples
602
+ --------
603
+ >>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
604
+ >>> print(interpolated_data.head())
605
+
606
+ """
607
+
608
+ trajectories = trajectories.groupby(track_label, group_keys=True).apply(
609
+ interpolate_per_track
610
+ )
611
+
612
+ if track_label in trajectories.index.names:
613
+ trajectories = trajectories.reset_index(
614
+ level=0, drop=track_label in trajectories.columns
615
+ )
616
+
617
+ return trajectories
618
+
619
+
620
+ def filter_by_endpoints(
621
+ trajectories,
622
+ remove_not_in_first=True,
623
+ remove_not_in_last=False,
624
+ column_labels={
625
+ "track": "TRACK_ID",
626
+ "time": "FRAME",
627
+ "x": "POSITION_X",
628
+ "y": "POSITION_Y",
629
+ },
630
+ ):
631
+ """
632
+ Filter trajectories based on their endpoints.
633
+
634
+ Parameters
635
+ ----------
636
+ trajectories : pandas.DataFrame
637
+ The input DataFrame containing trajectory data.
638
+ remove_not_in_first : bool, optional
639
+ Flag indicating whether to remove tracks not present in the first frame.
640
+ Defaults to True.
641
+ remove_not_in_last : bool, optional
642
+ Flag indicating whether to remove tracks not present in the last frame.
643
+ Defaults to False.
644
+ column_labels : dict, optional
645
+ Dictionary specifying the column labels used in the input DataFrame.
646
+ The keys represent the following column labels:
647
+ - 'track': The column label for the track ID.
648
+ - 'time': The column label for the timestamp.
649
+ - 'x': The column label for the x-coordinate.
650
+ - 'y': The column label for the y-coordinate.
651
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
652
+
653
+ Returns
654
+ -------
655
+ pandas.DataFrame
656
+ The filtered DataFrame with trajectories based on their endpoints.
657
+
658
+ Notes
659
+ -----
660
+ This function filters the input DataFrame based on the endpoints of the trajectories.
661
+ The filtering can be performed in three modes:
662
+ - remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
663
+ - remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
664
+ - remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
665
+
666
+ The input DataFrame is expected to have the following columns:
667
+ - track: The unique ID of each track.
668
+ - time: The timestamp of each data point.
669
+ - x: The x-coordinate of each data point.
670
+ - y: The y-coordinate of each data point.
671
+
672
+ Examples
673
+ --------
674
+ >>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
675
+ >>> print(filtered_data.head())
676
+
677
+ """
678
+
679
+ if (remove_not_in_first) * (not remove_not_in_last):
680
+ # filter tracks not in first frame
681
+ leftover_tracks = (
682
+ trajectories.groupby(column_labels["track"])
683
+ .min()
684
+ .index[
685
+ trajectories.groupby(column_labels["track"]).min()[
686
+ column_labels["time"]
687
+ ]
688
+ == np.amin(trajectories[column_labels["time"]])
689
+ ]
690
+ )
691
+ trajectories = trajectories.loc[
692
+ trajectories[column_labels["track"]].isin(leftover_tracks)
693
+ ]
694
+
695
+ elif (remove_not_in_last) * (not remove_not_in_first):
696
+ # filter tracks not in last frame
697
+ leftover_tracks = (
698
+ trajectories.groupby(column_labels["track"])
699
+ .max()
700
+ .index[
701
+ trajectories.groupby(column_labels["track"]).max()[
702
+ column_labels["time"]
703
+ ]
704
+ == np.amax(trajectories[column_labels["time"]])
705
+ ]
706
+ )
707
+ trajectories = trajectories.loc[
708
+ trajectories[column_labels["track"]].isin(leftover_tracks)
709
+ ]
710
+
711
+ elif remove_not_in_first * remove_not_in_last:
712
+ # filter tracks both not in first and last frame
713
+ leftover_tracks = (
714
+ trajectories.groupby(column_labels["track"])
715
+ .max()
716
+ .index[
717
+ (
718
+ trajectories.groupby(column_labels["track"]).max()[
719
+ column_labels["time"]
720
+ ]
721
+ == np.amax(trajectories[column_labels["time"]])
722
+ )
723
+ * (
724
+ trajectories.groupby(column_labels["track"]).min()[
725
+ column_labels["time"]
726
+ ]
727
+ == np.amin(trajectories[column_labels["time"]])
728
+ )
729
+ ]
730
+ )
731
+ trajectories = trajectories.loc[
732
+ trajectories[column_labels["track"]].isin(leftover_tracks)
733
+ ]
734
+
735
+ trajectories = trajectories.sort_values(
736
+ by=[column_labels["track"], column_labels["time"]]
737
+ )
738
+
739
+ return trajectories
455
740
 
456
- """
457
- Interpolate missing values within tracks in the input DataFrame.
458
-
459
- Parameters
460
- ----------
461
- trajectories : pandas.DataFrame
462
- The input DataFrame containing trajectory data.
463
- track_label : str, optional
464
- The column label for the track ID.
465
- Defaults to "TRACK_ID".
466
-
467
- Returns
468
- -------
469
- pandas.DataFrame
470
- The DataFrame with missing values interpolated within tracks.
471
-
472
- Notes
473
- -----
474
- This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
475
- to interpolate missing values within each track.
476
- Missing values are interpolated based on the neighboring data points in each track.
477
-
478
- The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
479
-
480
- Examples
481
- --------
482
- >>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
483
- >>> print(interpolated_data.head())
484
-
485
- """
486
-
487
- trajectories = trajectories.groupby(track_label, group_keys=False).apply(interpolate_per_track)
488
-
489
- return trajectories
490
-
491
-
492
- def filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False,
493
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
494
-
495
- """
496
- Filter trajectories based on their endpoints.
497
-
498
- Parameters
499
- ----------
500
- trajectories : pandas.DataFrame
501
- The input DataFrame containing trajectory data.
502
- remove_not_in_first : bool, optional
503
- Flag indicating whether to remove tracks not present in the first frame.
504
- Defaults to True.
505
- remove_not_in_last : bool, optional
506
- Flag indicating whether to remove tracks not present in the last frame.
507
- Defaults to False.
508
- column_labels : dict, optional
509
- Dictionary specifying the column labels used in the input DataFrame.
510
- The keys represent the following column labels:
511
- - 'track': The column label for the track ID.
512
- - 'time': The column label for the timestamp.
513
- - 'x': The column label for the x-coordinate.
514
- - 'y': The column label for the y-coordinate.
515
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
516
-
517
- Returns
518
- -------
519
- pandas.DataFrame
520
- The filtered DataFrame with trajectories based on their endpoints.
521
-
522
- Notes
523
- -----
524
- This function filters the input DataFrame based on the endpoints of the trajectories.
525
- The filtering can be performed in three modes:
526
- - remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
527
- - remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
528
- - remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
529
-
530
- The input DataFrame is expected to have the following columns:
531
- - track: The unique ID of each track.
532
- - time: The timestamp of each data point.
533
- - x: The x-coordinate of each data point.
534
- - y: The y-coordinate of each data point.
535
-
536
- Examples
537
- --------
538
- >>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
539
- >>> print(filtered_data.head())
540
-
541
- """
542
-
543
- if (remove_not_in_first)*(not remove_not_in_last):
544
- # filter tracks not in first frame
545
- leftover_tracks = trajectories.groupby(column_labels['track']).min().index[trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']])]
546
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
547
-
548
- elif (remove_not_in_last)*(not remove_not_in_first):
549
- # filter tracks not in last frame
550
- leftover_tracks = trajectories.groupby(column_labels['track']).max().index[trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']])]
551
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
552
-
553
- elif remove_not_in_first*remove_not_in_last:
554
- # filter tracks both not in first and last frame
555
- leftover_tracks = trajectories.groupby(column_labels['track']).max().index[(trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']]))*(trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']]))]
556
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
557
-
558
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
559
-
560
- return trajectories
561
741
 
562
742
  def filter_by_tracklength(trajectories, minimum_tracklength, track_label="TRACK_ID"):
563
-
564
- """
565
- Filter trajectories based on the minimum track length.
566
-
567
- Parameters
568
- ----------
569
- trajectories : pandas.DataFrame
570
- The input DataFrame containing trajectory data.
571
- minimum_tracklength : int
572
- The minimum length required for a track to be included.
573
- track_label : str, optional
574
- The column name in the DataFrame that represents the track ID.
575
- Defaults to "TRACK_ID".
576
-
577
- Returns
578
- -------
579
- pandas.DataFrame
580
- The filtered DataFrame with trajectories that meet the minimum track length.
581
-
582
- Notes
583
- -----
584
- This function removes any tracks from the input DataFrame that have a length
585
- (number of data points) less than the specified minimum track length.
586
-
587
- Examples
588
- --------
589
- >>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
590
- >>> print(filtered_data.head())
591
-
592
- """
593
-
594
- if minimum_tracklength>0:
595
-
596
- leftover_tracks = trajectories.groupby(track_label, group_keys=False).size().index[trajectories.groupby(track_label, group_keys=False).size() > minimum_tracklength]
597
- trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
598
-
599
- trajectories = trajectories.reset_index(drop=True)
600
-
601
- return trajectories
602
-
603
-
604
- def interpolate_time_gaps(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
605
-
606
- """
607
- Interpolate time gaps in trajectories.
608
-
609
- Parameters
610
- ----------
611
- trajectories : pandas.DataFrame
612
- The input DataFrame containing trajectory data.
613
- column_labels : dict, optional
614
- Dictionary specifying the column labels used in the input DataFrame.
615
- The keys represent the following column labels:
616
- - 'track': The column label for the track ID.
617
- - 'time': The column label for the timestamp.
618
- - 'x': The column label for the x-coordinate.
619
- - 'y': The column label for the y-coordinate.
620
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
621
-
622
- Returns
623
- -------
624
- pandas.DataFrame
625
- The interpolated DataFrame with reduced time gaps in trajectories.
626
-
627
- Notes
628
- -----
629
- This function performs interpolation on the input trajectories to reduce time gaps between data points.
630
- It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
631
-
632
- The input DataFrame is expected to have the following columns:
633
- - track: The unique ID of each track.
634
- - time: The timestamp of each data point (in seconds).
635
- - x: The x-coordinate of each data point.
636
- - y: The y-coordinate of each data point.
637
-
638
- Examples
639
- --------
640
- >>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
641
- >>> print(interpolated_data.head())
642
-
643
- """
644
-
645
- trajectories[column_labels['time']] = pd.to_datetime(trajectories[column_labels['time']], unit='s')
646
- trajectories.set_index(column_labels['track'], inplace=True)
647
- trajectories = trajectories.groupby(column_labels['track'], group_keys=True).apply(lambda x: x.set_index(column_labels['time']).resample('1s').asfreq()).reset_index()
648
- trajectories[[column_labels['x'], column_labels['y']]] = trajectories.groupby(column_labels['track'], group_keys=False)[[column_labels['x'], column_labels['y']]].apply(lambda x: x.interpolate(method='linear'))
649
- trajectories.reset_index(drop=True, inplace=True)
650
- trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64').astype(float) / 10**9
651
- #trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
652
- trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
653
-
654
- return trajectories
655
-
656
-
657
- def extrapolate_tracks(trajectories, post=False, pre=False, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
658
-
659
- """
660
- Extrapolate tracks in trajectories.
661
-
662
- Parameters
663
- ----------
664
- trajectories : pandas.DataFrame
665
- The input DataFrame containing trajectory data.
666
- post : bool, optional
667
- Flag indicating whether to perform post-extrapolation.
668
- Defaults to True.
669
- pre : bool, optional
670
- Flag indicating whether to perform pre-extrapolation.
671
- Defaults to False.
672
- column_labels : dict, optional
673
- Dictionary specifying the column labels used in the input DataFrame.
674
- The keys represent the following column labels:
675
- - 'track': The column label for the track ID.
676
- - 'time': The column label for the timestamp.
677
- - 'x': The column label for the x-coordinate.
678
- - 'y': The column label for the y-coordinate.
679
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
680
-
681
- Returns
682
- -------
683
- pandas.DataFrame
684
- The extrapolated DataFrame with extended tracks.
685
-
686
- Notes
687
- -----
688
- This function extrapolates tracks in the input DataFrame by repeating the last known position
689
- either after (post-extrapolation) or before (pre-extrapolation) the available data.
690
-
691
- The input DataFrame is expected to have the following columns:
692
- - track: The unique ID of each track.
693
- - time: The timestamp of each data point.
694
- - x: The x-coordinate of each data point.
695
- - y: The y-coordinate of each data point.
696
-
697
- Examples
698
- --------
699
- >>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
700
- >>> print(extrapolated_data.head())
701
-
702
- """
703
-
704
- if post:
705
-
706
- # get the maximum time T in the dataframe
707
- max_time = trajectories[column_labels['time']].max()
708
-
709
- # extrapolate the position until time T by repeating the last known position
710
- df_extrapolated = pd.DataFrame()
711
- for track_id, group in trajectories.groupby(column_labels['track']):
712
- last_known_position = group.loc[group[column_labels['time']] <= max_time].tail(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
713
- extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(last_known_position[0][0] + 1, max_time + 1)})
714
- extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(last_known_position[0][0] + 1, max_time + 1))
715
- track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
716
- track_data[column_labels['track']] = track_id
717
-
718
- if len(df_extrapolated)==0:
719
- df_extrapolated = track_data
720
- elif len(track_data)!=0:
721
- df_extrapolated = pd.concat([df_extrapolated, track_data])
722
-
723
-
724
- # concatenate the original dataframe and the extrapolated dataframe
725
- trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
726
- # sort the dataframe by TRACK_ID and FRAME
727
- trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
728
-
729
- if pre:
730
-
731
- # get the maximum time T in the dataframe
732
- min_time = 0 #trajectories[column_labels['time']].min()
733
-
734
- # extrapolate the position until time T by repeating the last known position
735
- df_extrapolated = pd.DataFrame()
736
- for track_id, group in trajectories.groupby(column_labels['track']):
737
- last_known_position = group.loc[group[column_labels['time']] >= min_time].head(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
738
- extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(min_time, last_known_position[0][0] + 1)})
739
- extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(min_time, last_known_position[0][0]))
740
- track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
741
- track_data[column_labels['track']] = track_id
742
- df_extrapolated = pd.concat([df_extrapolated, track_data])
743
-
744
- # concatenate the original dataframe and the extrapolated dataframe
745
- trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
746
-
747
- # sort the dataframe by TRACK_ID and FRAME
748
- trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
749
-
750
- return trajectories
751
-
752
- def compute_instantaneous_velocity(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
753
-
754
- """
755
-
756
- Compute the instantaneous velocity for each point in the trajectories.
757
-
758
- Parameters
759
- ----------
760
- trajectories : pandas.DataFrame
761
- The input DataFrame containing trajectory data.
762
- column_labels : dict, optional
763
- A dictionary specifying the column labels for track ID, time, position X, and position Y.
764
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
765
-
766
- Returns
767
- -------
768
- pandas.DataFrame
769
- The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
770
-
771
- Notes
772
- -----
773
- This function calculates the instantaneous velocity for each point in the trajectories.
774
- The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
775
-
776
- The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
777
-
778
- Examples
779
- --------
780
- >>> velocity_data = compute_instantaneous_velocity(trajectories)
781
- >>> print(velocity_data.head())
782
-
783
- """
784
-
785
- # Calculate the time differences and position differences
786
- trajectories['dt'] = trajectories.groupby(column_labels['track'])[column_labels['time']].diff()
787
- trajectories['dx'] = trajectories.groupby(column_labels['track'])[column_labels['x']].diff()
788
- trajectories['dy'] = trajectories.groupby(column_labels['track'])[column_labels['y']].diff()
789
-
790
- # Calculate the instantaneous velocity
791
- trajectories['velocity'] = np.sqrt(trajectories['dx']**2 +trajectories['dy']**2) / trajectories['dt']
792
- trajectories = trajectories.drop(['dx', 'dy', 'dt'], axis=1)
793
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
794
-
795
- return trajectories
796
-
797
- def instantaneous_diffusion(positions_x, positions_y, timeline):
798
-
799
- """
800
- Compute the instantaneous diffusion coefficients for each position coordinate.
801
-
802
- Parameters
803
- ----------
804
- positions_x : numpy.ndarray
805
- Array of x-coordinates of positions.
806
- positions_y : numpy.ndarray
807
- Array of y-coordinates of positions.
808
- timeline : numpy.ndarray
809
- Array of corresponding time points.
810
-
811
- Returns
812
- -------
813
- numpy.ndarray
814
- Array of instantaneous diffusion coefficients for each position coordinate.
815
-
816
- Notes
817
- -----
818
- The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
819
- The diffusion coefficient at each time point is computed using the formula:
820
- D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
821
- where x represents the position coordinate (x or y) and t represents the corresponding time point.
822
-
823
- Examples
824
- --------
825
- >>> x = np.array([0, 1, 2, 3, 4, 5])
826
- >>> y = np.array([0, 1, 4, 9, 16, 25])
827
- >>> t = np.array([0, 1, 2, 3, 4, 5])
828
- >>> diff = instantaneous_diffusion(x, y, t)
829
- >>> print(diff)
830
-
831
- """
832
-
833
- diff = np.zeros((len(positions_x),2))
834
- diff[:,:] = np.nan
835
-
836
- for t in range(1,len(positions_x)-1):
837
- diff[t,0] = (positions_x[t+1] - positions_x[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_x[t+1] - positions_x[t])*(positions_x[t] - positions_x[t-1]))
838
-
839
- for t in range(1,len(positions_y)-1):
840
- diff[t,1] = (positions_y[t+1] - positions_y[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_y[t+1] - positions_y[t])*(positions_y[t] - positions_y[t-1]))
841
-
842
- return diff
843
-
844
- def magnitude_diffusion(diffusion_vector):
845
-
846
- """
847
- Compute the magnitude of diffusion for each diffusion vector.
848
-
849
- Parameters
850
- ----------
851
- diffusion_vector : numpy.ndarray
852
- Array of diffusion vectors.
853
-
854
- Returns
855
- -------
856
- numpy.ndarray
857
- Array of magnitudes of diffusion.
858
-
859
- Notes
860
- -----
861
- The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
862
- The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
863
-
864
- Examples
865
- --------
866
- >>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
867
- >>> magnitudes = magnitude_diffusion(diffusion)
868
- >>> print(magnitudes)
743
+ """
744
+ Filter trajectories based on the minimum track length.
745
+
746
+ Parameters
747
+ ----------
748
+ trajectories : pandas.DataFrame
749
+ The input DataFrame containing trajectory data.
750
+ minimum_tracklength : int
751
+ The minimum length required for a track to be included.
752
+ track_label : str, optional
753
+ The column name in the DataFrame that represents the track ID.
754
+ Defaults to "TRACK_ID".
755
+
756
+ Returns
757
+ -------
758
+ pandas.DataFrame
759
+ The filtered DataFrame with trajectories that meet the minimum track length.
760
+
761
+ Notes
762
+ -----
763
+ This function removes any tracks from the input DataFrame that have a length
764
+ (number of data points) less than the specified minimum track length.
765
+
766
+ Examples
767
+ --------
768
+ >>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
769
+ >>> print(filtered_data.head())
770
+
771
+ """
772
+
773
+ if minimum_tracklength > 0:
774
+
775
+ leftover_tracks = (
776
+ trajectories.groupby(track_label, group_keys=False)
777
+ .size()
778
+ .index[
779
+ trajectories.groupby(track_label, group_keys=False).size()
780
+ > minimum_tracklength
781
+ ]
782
+ )
783
+ trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
784
+
785
+ trajectories = trajectories.reset_index(drop=True)
786
+
787
+ return trajectories
788
+
789
+
790
+ def interpolate_time_gaps(
791
+ trajectories,
792
+ column_labels={
793
+ "track": "TRACK_ID",
794
+ "time": "FRAME",
795
+ "x": "POSITION_X",
796
+ "y": "POSITION_Y",
797
+ },
798
+ ):
799
+ """
800
+ Interpolate time gaps in trajectories.
801
+
802
+ Parameters
803
+ ----------
804
+ trajectories : pandas.DataFrame
805
+ The input DataFrame containing trajectory data.
806
+ column_labels : dict, optional
807
+ Dictionary specifying the column labels used in the input DataFrame.
808
+ The keys represent the following column labels:
809
+ - 'track': The column label for the track ID.
810
+ - 'time': The column label for the timestamp.
811
+ - 'x': The column label for the x-coordinate.
812
+ - 'y': The column label for the y-coordinate.
813
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
814
+
815
+ Returns
816
+ -------
817
+ pandas.DataFrame
818
+ The interpolated DataFrame with reduced time gaps in trajectories.
819
+
820
+ Notes
821
+ -----
822
+ This function performs interpolation on the input trajectories to reduce time gaps between data points.
823
+ It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
824
+
825
+ The input DataFrame is expected to have the following columns:
826
+ - track: The unique ID of each track.
827
+ - time: The timestamp of each data point (in seconds).
828
+ - x: The x-coordinate of each data point.
829
+ - y: The y-coordinate of each data point.
830
+
831
+ Examples
832
+ --------
833
+ >>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
834
+ >>> print(interpolated_data.head())
835
+
836
+ """
837
+
838
+ trajectories[column_labels["time"]] = pd.to_datetime(
839
+ trajectories[column_labels["time"]], unit="s"
840
+ )
841
+ trajectories.set_index(column_labels["track"], inplace=True)
842
+ trajectories = (
843
+ trajectories.groupby(column_labels["track"], group_keys=True)
844
+ .apply(lambda x: x.set_index(column_labels["time"]).resample("1s").asfreq())
845
+ .reset_index()
846
+ )
847
+ trajectories[[column_labels["x"], column_labels["y"]]] = trajectories.groupby(
848
+ column_labels["track"], group_keys=False
849
+ )[[column_labels["x"], column_labels["y"]]].apply(
850
+ lambda x: x.interpolate(method="linear")
851
+ )
852
+ trajectories.reset_index(drop=True, inplace=True)
853
+ trajectories[column_labels["time"]] = (
854
+ trajectories[column_labels["time"]] - pd.Timestamp("1970-01-01")
855
+ ).dt.total_seconds()
856
+ # trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
857
+ trajectories.sort_values(
858
+ by=[column_labels["track"], column_labels["time"]], inplace=True
859
+ )
860
+
861
+ return trajectories
862
+
863
+
864
+ def extrapolate_tracks(
865
+ trajectories,
866
+ post=False,
867
+ pre=False,
868
+ column_labels={
869
+ "track": "TRACK_ID",
870
+ "time": "FRAME",
871
+ "x": "POSITION_X",
872
+ "y": "POSITION_Y",
873
+ },
874
+ ):
875
+ """
876
+ Extrapolate tracks in trajectories.
877
+
878
+ Parameters
879
+ ----------
880
+ trajectories : pandas.DataFrame
881
+ The input DataFrame containing trajectory data.
882
+ post : bool, optional
883
+ Flag indicating whether to perform post-extrapolation.
884
+ Defaults to True.
885
+ pre : bool, optional
886
+ Flag indicating whether to perform pre-extrapolation.
887
+ Defaults to False.
888
+ column_labels : dict, optional
889
+ Dictionary specifying the column labels used in the input DataFrame.
890
+ The keys represent the following column labels:
891
+ - 'track': The column label for the track ID.
892
+ - 'time': The column label for the timestamp.
893
+ - 'x': The column label for the x-coordinate.
894
+ - 'y': The column label for the y-coordinate.
895
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
896
+
897
+ Returns
898
+ -------
899
+ pandas.DataFrame
900
+ The extrapolated DataFrame with extended tracks.
901
+
902
+ Notes
903
+ -----
904
+ This function extrapolates tracks in the input DataFrame by repeating the last known position
905
+ either after (post-extrapolation) or before (pre-extrapolation) the available data.
906
+
907
+ The input DataFrame is expected to have the following columns:
908
+ - track: The unique ID of each track.
909
+ - time: The timestamp of each data point.
910
+ - x: The x-coordinate of each data point.
911
+ - y: The y-coordinate of each data point.
912
+
913
+ Examples
914
+ --------
915
+ >>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
916
+ >>> print(extrapolated_data.head())
917
+
918
+ """
919
+
920
+ if post:
921
+
922
+ # get the maximum time T in the dataframe
923
+ max_time = trajectories[column_labels["time"]].max()
924
+
925
+ # extrapolate the position until time T by repeating the last known position
926
+ df_extrapolated = pd.DataFrame()
927
+ for track_id, group in trajectories.groupby(column_labels["track"]):
928
+ last_known_position = (
929
+ group.loc[group[column_labels["time"]] <= max_time]
930
+ .tail(1)[
931
+ [column_labels["time"], column_labels["x"], column_labels["y"]]
932
+ ]
933
+ .values
934
+ )
935
+ extrapolated_frames = pd.DataFrame(
936
+ {
937
+ column_labels["time"]: np.arange(
938
+ last_known_position[0][0] + 1, max_time + 1
939
+ )
940
+ }
941
+ )
942
+ extrapolated_positions = pd.DataFrame(
943
+ {
944
+ column_labels["x"]: last_known_position[0][1],
945
+ column_labels["y"]: last_known_position[0][2],
946
+ },
947
+ index=np.arange(last_known_position[0][0] + 1, max_time + 1),
948
+ )
949
+ track_data = extrapolated_frames.join(
950
+ extrapolated_positions, how="inner", on=column_labels["time"]
951
+ )
952
+ track_data[column_labels["track"]] = track_id
953
+
954
+ if len(df_extrapolated) == 0:
955
+ df_extrapolated = track_data
956
+ elif len(track_data) != 0:
957
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
958
+
959
+ # concatenate the original dataframe and the extrapolated dataframe
960
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
961
+ # sort the dataframe by TRACK_ID and FRAME
962
+ trajectories.sort_values(
963
+ [column_labels["track"], column_labels["time"]], inplace=True
964
+ )
965
+
966
+ if pre:
967
+
968
+ # get the maximum time T in the dataframe
969
+ min_time = 0 # trajectories[column_labels['time']].min()
970
+
971
+ # extrapolate the position until time T by repeating the last known position
972
+ df_extrapolated = pd.DataFrame()
973
+ for track_id, group in trajectories.groupby(column_labels["track"]):
974
+ last_known_position = (
975
+ group.loc[group[column_labels["time"]] >= min_time]
976
+ .head(1)[
977
+ [column_labels["time"], column_labels["x"], column_labels["y"]]
978
+ ]
979
+ .values
980
+ )
981
+ extrapolated_frames = pd.DataFrame(
982
+ {
983
+ column_labels["time"]: np.arange(
984
+ min_time, last_known_position[0][0] + 1
985
+ )
986
+ }
987
+ )
988
+ extrapolated_positions = pd.DataFrame(
989
+ {
990
+ column_labels["x"]: last_known_position[0][1],
991
+ column_labels["y"]: last_known_position[0][2],
992
+ },
993
+ index=np.arange(min_time, last_known_position[0][0]),
994
+ )
995
+ track_data = extrapolated_frames.join(
996
+ extrapolated_positions, how="inner", on=column_labels["time"]
997
+ )
998
+ track_data[column_labels["track"]] = track_id
999
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
1000
+
1001
+ # concatenate the original dataframe and the extrapolated dataframe
1002
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
1003
+
1004
+ # sort the dataframe by TRACK_ID and FRAME
1005
+ trajectories.sort_values(
1006
+ [column_labels["track"], column_labels["time"]], inplace=True
1007
+ )
1008
+
1009
+ return trajectories[
1010
+ [column_labels["track"], column_labels["time"]]
1011
+ + [
1012
+ col
1013
+ for col in trajectories.columns
1014
+ if col not in [column_labels["track"], column_labels["time"]]
1015
+ ]
1016
+ ]
1017
+
1018
+
1019
+ def compute_instantaneous_velocity(
1020
+ trajectories,
1021
+ column_labels={
1022
+ "track": "TRACK_ID",
1023
+ "time": "FRAME",
1024
+ "x": "POSITION_X",
1025
+ "y": "POSITION_Y",
1026
+ },
1027
+ ):
1028
+ """
1029
+
1030
+ Compute the instantaneous velocity for each point in the trajectories.
1031
+
1032
+ Parameters
1033
+ ----------
1034
+ trajectories : pandas.DataFrame
1035
+ The input DataFrame containing trajectory data.
1036
+ column_labels : dict, optional
1037
+ A dictionary specifying the column labels for track ID, time, position X, and position Y.
1038
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
1039
+
1040
+ Returns
1041
+ -------
1042
+ pandas.DataFrame
1043
+ The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
1044
+
1045
+ Notes
1046
+ -----
1047
+ This function calculates the instantaneous velocity for each point in the trajectories.
1048
+ The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
1049
+
1050
+ The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
1051
+
1052
+ Examples
1053
+ --------
1054
+ >>> velocity_data = compute_instantaneous_velocity(trajectories)
1055
+ >>> print(velocity_data.head())
1056
+
1057
+ """
1058
+
1059
+ # Calculate the time differences and position differences
1060
+ trajectories["dt"] = trajectories.groupby(column_labels["track"])[
1061
+ column_labels["time"]
1062
+ ].diff()
1063
+ trajectories["dx"] = trajectories.groupby(column_labels["track"])[
1064
+ column_labels["x"]
1065
+ ].diff()
1066
+ trajectories["dy"] = trajectories.groupby(column_labels["track"])[
1067
+ column_labels["y"]
1068
+ ].diff()
1069
+
1070
+ # Calculate the instantaneous velocity
1071
+ trajectories["velocity"] = (
1072
+ np.sqrt(trajectories["dx"] ** 2 + trajectories["dy"] ** 2) / trajectories["dt"]
1073
+ )
1074
+ trajectories = trajectories.drop(["dx", "dy", "dt"], axis=1)
1075
+ trajectories = trajectories.sort_values(
1076
+ by=[column_labels["track"], column_labels["time"]]
1077
+ )
1078
+
1079
+ return trajectories
869
1080
 
870
- """
871
1081
 
872
- return np.sqrt(diffusion_vector[:,0]**2+diffusion_vector[:,1]**2)
873
-
874
-
875
- def compute_instantaneous_diffusion(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
876
-
877
- """
878
-
879
- Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
880
-
881
- Parameters
882
- ----------
883
- trajectories : DataFrame
884
- The input DataFrame containing trajectories with position and time information.
885
- column_labels : dict, optional
886
- A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
887
- The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
888
-
889
- Returns
890
- -------
891
- DataFrame
892
- The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
893
-
894
- Notes
895
- -----
896
-
897
- The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
898
- are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
899
-
900
- Examples
901
- --------
902
- >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
903
- ... 'FRAME': [0, 1, 2, 0, 1, 2],
904
- ... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
905
- ... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
906
- >>> compute_instantaneous_diffusion(trajectories)
907
- # Output DataFrame with added "diffusion" column
908
-
909
- """
1082
+ def instantaneous_diffusion(positions_x, positions_y, timeline):
1083
+ """
1084
+ Compute the instantaneous diffusion coefficients for each position coordinate.
1085
+
1086
+ Parameters
1087
+ ----------
1088
+ positions_x : numpy.ndarray
1089
+ Array of x-coordinates of positions.
1090
+ positions_y : numpy.ndarray
1091
+ Array of y-coordinates of positions.
1092
+ timeline : numpy.ndarray
1093
+ Array of corresponding time points.
1094
+
1095
+ Returns
1096
+ -------
1097
+ numpy.ndarray
1098
+ Array of instantaneous diffusion coefficients for each position coordinate.
1099
+
1100
+ Notes
1101
+ -----
1102
+ The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
1103
+ The diffusion coefficient at each time point is computed using the formula:
1104
+ D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
1105
+ where x represents the position coordinate (x or y) and t represents the corresponding time point.
1106
+
1107
+ Examples
1108
+ --------
1109
+ >>> x = np.array([0, 1, 2, 3, 4, 5])
1110
+ >>> y = np.array([0, 1, 4, 9, 16, 25])
1111
+ >>> t = np.array([0, 1, 2, 3, 4, 5])
1112
+ >>> diff = instantaneous_diffusion(x, y, t)
1113
+ >>> print(diff)
1114
+
1115
+ """
1116
+
1117
+ diff = np.zeros((len(positions_x), 2))
1118
+ diff[:, :] = np.nan
1119
+
1120
+ for t in range(1, len(positions_x) - 1):
1121
+ diff[t, 0] = (positions_x[t + 1] - positions_x[t - 1]) ** 2 / (
1122
+ 2 * (timeline[t + 1] - timeline[t - 1])
1123
+ ) + 1 / (timeline[t + 1] - timeline[t - 1]) * (
1124
+ (positions_x[t + 1] - positions_x[t])
1125
+ * (positions_x[t] - positions_x[t - 1])
1126
+ )
1127
+
1128
+ for t in range(1, len(positions_y) - 1):
1129
+ diff[t, 1] = (positions_y[t + 1] - positions_y[t - 1]) ** 2 / (
1130
+ 2 * (timeline[t + 1] - timeline[t - 1])
1131
+ ) + 1 / (timeline[t + 1] - timeline[t - 1]) * (
1132
+ (positions_y[t + 1] - positions_y[t])
1133
+ * (positions_y[t] - positions_y[t - 1])
1134
+ )
1135
+
1136
+ return diff
910
1137
 
911
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
912
- trajectories['diffusion'] = np.nan
913
-
914
- for tid,group in trajectories.groupby(column_labels['track']):
915
1138
 
916
- indices = group.index
917
- x = group[column_labels['x']].to_numpy()
918
- y = group[column_labels['y']].to_numpy()
919
- t = group[column_labels['time']].to_numpy()
920
-
921
- if len(x)>3: #to have t-1,t,t+1
922
- diff = instantaneous_diffusion(x,y,t)
923
- d = magnitude_diffusion(diff)
924
- trajectories.loc[indices, "diffusion"] = d
1139
+ def magnitude_diffusion(diffusion_vector):
1140
+ """
1141
+ Compute the magnitude of diffusion for each diffusion vector.
1142
+
1143
+ Parameters
1144
+ ----------
1145
+ diffusion_vector : numpy.ndarray
1146
+ Array of diffusion vectors.
1147
+
1148
+ Returns
1149
+ -------
1150
+ numpy.ndarray
1151
+ Array of magnitudes of diffusion.
1152
+
1153
+ Notes
1154
+ -----
1155
+ The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
1156
+ The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
1157
+
1158
+ Examples
1159
+ --------
1160
+ >>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
1161
+ >>> magnitudes = magnitude_diffusion(diffusion)
1162
+ >>> print(magnitudes)
1163
+
1164
+ """
1165
+
1166
+ return np.sqrt(diffusion_vector[:, 0] ** 2 + diffusion_vector[:, 1] ** 2)
1167
+
1168
+
1169
+ def compute_instantaneous_diffusion(
1170
+ trajectories,
1171
+ column_labels={
1172
+ "track": "TRACK_ID",
1173
+ "time": "FRAME",
1174
+ "x": "POSITION_X",
1175
+ "y": "POSITION_Y",
1176
+ },
1177
+ ):
1178
+ """
1179
+
1180
+ Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
1181
+
1182
+ Parameters
1183
+ ----------
1184
+ trajectories : DataFrame
1185
+ The input DataFrame containing trajectories with position and time information.
1186
+ column_labels : dict, optional
1187
+ A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
1188
+ The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
1189
+
1190
+ Returns
1191
+ -------
1192
+ DataFrame
1193
+ The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
1194
+
1195
+ Notes
1196
+ -----
1197
+
1198
+ The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
1199
+ are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
1200
+
1201
+ Examples
1202
+ --------
1203
+ >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
1204
+ ... 'FRAME': [0, 1, 2, 0, 1, 2],
1205
+ ... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
1206
+ ... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
1207
+ >>> compute_instantaneous_diffusion(trajectories)
1208
+ # Output DataFrame with added "diffusion" column
1209
+
1210
+ """
1211
+
1212
+ trajectories = trajectories.sort_values(
1213
+ by=[column_labels["track"], column_labels["time"]]
1214
+ )
1215
+ trajectories["diffusion"] = np.nan
1216
+
1217
+ for tid, group in trajectories.groupby(column_labels["track"]):
1218
+
1219
+ indices = group.index
1220
+ x = group[column_labels["x"]].to_numpy()
1221
+ y = group[column_labels["y"]].to_numpy()
1222
+ t = group[column_labels["time"]].to_numpy()
1223
+
1224
+ if len(x) > 3: # to have t-1,t,t+1
1225
+ diff = instantaneous_diffusion(x, y, t)
1226
+ d = magnitude_diffusion(diff)
1227
+ trajectories.loc[indices, "diffusion"] = d
1228
+
1229
+ return trajectories
925
1230
 
926
- return trajectories
927
1231
 
928
1232
  def track_at_position(pos, mode, return_tracks=False, view_on_napari=False, threads=1):
929
-
930
- pos = pos.replace('\\','/')
931
- pos = rf"{pos}"
932
- assert os.path.exists(pos),f'Position {pos} is not a valid path.'
933
- if not pos.endswith('/'):
934
- pos += '/'
935
-
936
- script_path = os.sep.join([abs_path, 'scripts', 'track_cells.py'])
937
- cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
938
- subprocess.call(cmd, shell=True)
939
-
940
- track_table = pos + os.sep.join(["output","tables",f"trajectories_{mode}.csv"])
941
- if return_tracks:
942
- df = pd.read_csv(track_table)
943
- return df
944
- else:
945
- return None
946
-
947
- def write_first_detection_class(df, img_shape=None, edge_threshold=20, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
948
-
949
- """
950
- Assigns a classification and first detection time to tracks in the given DataFrame. This function must be called
951
- before any track post-processing.
952
-
953
- This function computes the first detection time and a detection class (`class_firstdetection`) for each track in the data.
954
- Tracks that start on or near the image edge, or those detected at the initial frame, are marked with special classes.
955
-
956
- Parameters
957
- ----------
958
- df : pandas.DataFrame
959
- A DataFrame containing track data. Expected to have at least the columns specified in `column_labels` and `class_id` (mask value).
960
-
961
- img_shape : tuple of int, optional
962
- The shape of the image as `(height, width)`. Used to determine whether the first detection occurs near the image edge.
963
-
964
- edge_threshold : int, optional, default=20
965
- The distance in pixels from the image edge to consider a detection as near the edge.
966
-
967
- column_labels : dict, optional
968
- A dictionary mapping logical column names to actual column names in `tab`. Keys include:
969
-
970
- - `'track'`: The column indicating the track ID (default: `"TRACK_ID"`).
971
- - `'time'`: The column indicating the frame/time (default: `"FRAME"`).
972
- - `'x'`: The column indicating the X-coordinate (default: `"POSITION_X"`).
973
- - `'y'`: The column indicating the Y-coordinate (default: `"POSITION_Y"`).
974
-
975
- Returns
976
- -------
977
- pandas.DataFrame
978
- The input DataFrame `df` with two additional columns:
979
-
980
- - `'class_firstdetection'`: A class assigned based on detection status:
981
-
982
- - `0`: Valid detection not near the edge and not at the initial frame.
983
- - `2`: Detection near the edge, at the initial frame, or no detection available.
984
-
985
- - `'t_firstdetection'`: The adjusted first detection time (in frame units):
986
-
987
- - `-1`: Indicates no valid detection or detection near the edge.
988
- - A float value representing the adjusted first detection time otherwise.
989
-
990
- Notes
991
- -----
992
- - The function assumes that tracks are grouped and sorted by track ID and frame.
993
- - Detections near the edge or at the initial frame (frame 0) are considered invalid and assigned special values.
994
- - If `img_shape` is not provided, edge checks are skipped.
995
-
996
- """
997
-
998
- df = df.sort_values(by=[column_labels['track'],column_labels['time']])
999
- for tid,track_group in df.groupby(column_labels['track']):
1000
- indices = track_group.index
1001
- detection = track_group['class_id'].values
1002
- timeline = track_group[column_labels['time']].values
1003
- positions_x = track_group[column_labels['x']].values
1004
- positions_y = track_group[column_labels['y']].values
1005
- dt = 1
1006
-
1007
- timeline = track_group['FRAME'].to_numpy()
1008
- status = np.ones_like(timeline)
1009
-
1010
- # Initialize
1011
- cclass = 2; t_first = np.nan;
1012
-
1013
- if np.any(detection==detection):
1014
-
1015
- t_first = timeline[detection==detection][0]
1016
- x_first = positions_x[detection==detection][0]; y_first = positions_y[detection==detection][0];
1017
-
1018
- edge_test = False
1019
- if img_shape is not None:
1020
- edge_test = (x_first < edge_threshold) or (y_first < edge_threshold) or (y_first > (img_shape[0] - edge_threshold)) or (x_first > (img_shape[1] - edge_threshold))
1021
-
1022
- cclass = 0
1023
- if t_first<=0:
1024
- t_first = -1
1025
- cclass = 2
1026
- else:
1027
- t_first = float(t_first) - float(dt)
1028
- if t_first==0:
1029
- t_first += 0.01
1030
-
1031
- if edge_test:
1032
- cclass = 2
1033
- # switch to class 2 but keep time/status information
1034
- else:
1035
- t_first = -1
1036
- cclass = 2
1037
-
1038
- status[timeline < t_first] = 0.
1039
- df.loc[indices, 'class_firstdetection'] = cclass
1040
- df.loc[indices, 't_firstdetection'] = t_first
1041
- df.loc[indices, 'status_firstdetection'] = status
1042
-
1043
- return df
1044
1233
 
1234
+ pos = pos.replace("\\", "/")
1235
+ pos = rf"{pos}"
1236
+ assert os.path.exists(pos), f"Position {pos} is not a valid path."
1237
+ if not pos.endswith("/"):
1238
+ pos += "/"
1239
+
1240
+ script_path = os.sep.join([abs_path, "scripts", "track_cells.py"])
1241
+ cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
1242
+ subprocess.call(cmd, shell=True)
1243
+
1244
+ track_table = pos + os.sep.join(["output", "tables", f"trajectories_{mode}.csv"])
1245
+ if return_tracks:
1246
+ df = pd.read_csv(track_table)
1247
+ return df
1248
+ else:
1249
+ return None
1250
+
1251
+
1252
+ def write_first_detection_class(
1253
+ df,
1254
+ img_shape=None,
1255
+ edge_threshold=20,
1256
+ column_labels={
1257
+ "track": "TRACK_ID",
1258
+ "time": "FRAME",
1259
+ "x": "POSITION_X",
1260
+ "y": "POSITION_Y",
1261
+ },
1262
+ ):
1263
+ """
1264
+ Assigns a classification and first detection time to tracks in the given DataFrame. This function must be called
1265
+ before any track post-processing.
1266
+
1267
+ This function computes the first detection time and a detection class (`class_firstdetection`) for each track in the data.
1268
+ Tracks that start on or near the image edge, or those detected at the initial frame, are marked with special classes.
1269
+
1270
+ Parameters
1271
+ ----------
1272
+ df : pandas.DataFrame
1273
+ A DataFrame containing track data. Expected to have at least the columns specified in `column_labels` and `class_id` (mask value).
1274
+
1275
+ img_shape : tuple of int, optional
1276
+ The shape of the image as `(height, width)`. Used to determine whether the first detection occurs near the image edge.
1277
+
1278
+ edge_threshold : int, optional, default=20
1279
+ The distance in pixels from the image edge to consider a detection as near the edge.
1280
+
1281
+ column_labels : dict, optional
1282
+ A dictionary mapping logical column names to actual column names in `tab`. Keys include:
1283
+
1284
+ - `'track'`: The column indicating the track ID (default: `"TRACK_ID"`).
1285
+ - `'time'`: The column indicating the frame/time (default: `"FRAME"`).
1286
+ - `'x'`: The column indicating the X-coordinate (default: `"POSITION_X"`).
1287
+ - `'y'`: The column indicating the Y-coordinate (default: `"POSITION_Y"`).
1288
+
1289
+ Returns
1290
+ -------
1291
+ pandas.DataFrame
1292
+ The input DataFrame `df` with two additional columns:
1293
+
1294
+ - `'class_firstdetection'`: A class assigned based on detection status:
1295
+
1296
+ - `0`: Valid detection not near the edge and not at the initial frame.
1297
+ - `2`: Detection near the edge, at the initial frame, or no detection available.
1298
+
1299
+ - `'t_firstdetection'`: The adjusted first detection time (in frame units):
1300
+
1301
+ - `-1`: Indicates no valid detection or detection near the edge.
1302
+ - A float value representing the adjusted first detection time otherwise.
1303
+
1304
+ Notes
1305
+ -----
1306
+ - The function assumes that tracks are grouped and sorted by track ID and frame.
1307
+ - Detections near the edge or at the initial frame (frame 0) are considered invalid and assigned special values.
1308
+ - If `img_shape` is not provided, edge checks are skipped.
1309
+
1310
+ """
1311
+
1312
+ df = df.sort_values(by=[column_labels["track"], column_labels["time"]])
1313
+ for tid, track_group in df.groupby(column_labels["track"]):
1314
+ indices = track_group.index
1315
+ detection = track_group["class_id"].values
1316
+ timeline = track_group[column_labels["time"]].values
1317
+ positions_x = track_group[column_labels["x"]].values
1318
+ positions_y = track_group[column_labels["y"]].values
1319
+ dt = 1
1320
+
1321
+ timeline = track_group["FRAME"].to_numpy()
1322
+ status = np.ones_like(timeline)
1323
+
1324
+ # Initialize
1325
+ cclass = 2
1326
+ t_first = np.nan
1327
+
1328
+ if np.any(detection == detection):
1329
+
1330
+ t_first = timeline[detection == detection][0]
1331
+ x_first = positions_x[detection == detection][0]
1332
+ y_first = positions_y[detection == detection][0]
1333
+
1334
+ edge_test = False
1335
+ if img_shape is not None:
1336
+ edge_test = (
1337
+ (x_first < edge_threshold)
1338
+ or (y_first < edge_threshold)
1339
+ or (y_first > (img_shape[0] - edge_threshold))
1340
+ or (x_first > (img_shape[1] - edge_threshold))
1341
+ )
1342
+
1343
+ cclass = 0
1344
+ if t_first <= 0:
1345
+ t_first = -1
1346
+ cclass = 2
1347
+ else:
1348
+ t_first = float(t_first) - float(dt)
1349
+ if t_first == 0:
1350
+ t_first += 0.01
1351
+
1352
+ if edge_test:
1353
+ cclass = 2
1354
+ # switch to class 2 but keep time/status information
1355
+ else:
1356
+ t_first = -1
1357
+ cclass = 2
1358
+
1359
+ status[timeline < t_first] = 0.0
1360
+ df.loc[indices, "class_firstdetection"] = cclass
1361
+ df.loc[indices, "t_firstdetection"] = t_first
1362
+ df.loc[indices, "status_firstdetection"] = status
1363
+
1364
+ return df
1045
1365
 
1046
1366
 
1047
1367
  if __name__ == "__main__":
1048
- track_at_position("/home/limozin/Documents/Experiments/MinimumJan/W4/401",
1049
- "targets",
1050
- )
1368
+ track_at_position(
1369
+ "/home/limozin/Documents/Experiments/MinimumJan/W4/401",
1370
+ "targets",
1371
+ )