celldetective 1.4.1__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
celldetective/tracking.py CHANGED
@@ -14,1037 +14,1343 @@ import os
14
14
  import subprocess
15
15
  import trackpy as tp
16
16
 
17
- abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0],'celldetective'])
18
-
19
- def track(labels, configuration=None, stack=None, spatial_calibration=1, features=None, channel_names=None,
20
- haralick_options=None, return_napari_data=False, view_on_napari=False, mask_timepoints=None, mask_channels=None, volume=(2048,2048),
21
- optimizer_options = {'tm_lim': int(12e4)}, track_kwargs={'step_size': 100}, objects=None,
22
- clean_trajectories_kwargs=None, btrack_option=True, search_range=None, memory=None,column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
23
- ):
24
-
25
- """
26
-
27
- Perform cell tracking on segmented labels using the bTrack library.
28
-
29
- Parameters
30
- ----------
31
- labels : ndarray
32
- The segmented labels representing cell objects.
33
- configuration : Configuration or None
34
- The bTrack configuration object. If None, a default configuration is used.
35
- stack : ndarray or None, optional
36
- The image stack corresponding to the labels. Default is None.
37
- spatial_calibration : float, optional
38
- The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
39
- features : list or None, optional
40
- The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
41
- channel_names : list or None, optional
42
- The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
43
- Default is None.
44
- haralick_options : dict or None, optional
45
- The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
46
- return_napari_data : bool, optional
47
- Whether to return the napari data dictionary along with the DataFrame. Default is False.
48
- view_on_napari : bool, optional
49
- Whether to view the tracking results on napari. Default is False.
50
- optimizer_options : dict, optional
51
- The options for the optimizer. Default is {'tm_lim': int(12e4)}.
52
- track_kwargs : dict, optional
53
- Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
54
- clean_trajectories_kwargs : dict or None, optional
55
- Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
56
- Default is None.
57
- column_labels : dict, optional
58
- The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
59
-
60
- Returns
61
- -------
62
- DataFrame or tuple
63
- If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
64
- containing the DataFrame and the napari data dictionary.
65
-
66
- Notes
67
- -----
68
- This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
69
- the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
70
- clean_trajectories function. If specified, the tracking results can be visualized on napari.
71
-
72
- Examples
73
- --------
74
- >>> labels = np.array([[1, 1, 2, 2, 0, 0],
75
- [1, 1, 1, 2, 2, 0],
76
- [0, 0, 1, 2, 0, 0]])
77
- >>> configuration = cell_config()
78
- >>> stack = np.random.rand(3, 6)
79
- >>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
80
- >>> df.head()
81
-
82
- TRACK_ID FRAME POSITION_Y POSITION_X
83
- 0 0 0 0.0 0.0
84
- 1 0 1 0.0 0.0
85
- 2 0 2 0.0 0.0
86
- 3 1 0 0.5 0.5
87
- 4 1 1 0.5 0.5
88
-
89
- """
90
-
91
- configuration = interpret_tracking_configuration(configuration)
92
-
93
- if objects is None:
94
-
95
- if not btrack_option:
96
- features = []
97
- channel_names = None
98
- haralick_options = None
99
-
100
- objects = extract_objects_and_features(labels, stack, features,
101
- channel_names=channel_names,
102
- haralick_options=haralick_options,
103
- mask_timepoints=mask_timepoints,
104
- mask_channels=mask_channels,
105
- )
106
-
107
- if btrack_option:
108
- columns = list(objects.columns)
109
- to_remove = ['x','y','class_id','t']
110
- for tr in to_remove:
111
- try:
112
- columns.remove(tr)
113
- except:
114
- print(f'column {tr} could not be found...')
115
-
116
- scaler = StandardScaler()
117
- if columns:
118
- x = objects[columns].values
119
- x_scaled = scaler.fit_transform(x)
120
- df_temp = pd.DataFrame(x_scaled, columns=columns, index = objects.index)
121
- objects[columns] = df_temp
122
- else:
123
- print('Warning: no features were passed to bTrack...')
124
-
125
- # 2) track the objects
126
- new_btrack_objects = localizations_to_objects(objects)
127
-
128
- with BayesianTracker() as tracker:
129
-
130
- tracker.configure(configuration)
131
-
132
- if columns:
133
- tracking_updates = ["motion","visual"]
134
- #tracker.tracking_updates = ["motion","visual"]
135
- tracker.features = columns
136
- else:
137
- tracking_updates = ["motion"]
138
-
139
- tracker.append(new_btrack_objects)
140
- tracker.volume = ((0,volume[0]), (0,volume[1]), (-1e5, 1e5)) #(-1e5, 1e5)
141
- #print(tracker.volume)
142
- tracker.track(tracking_updates=tracking_updates, **track_kwargs)
143
- tracker.optimize(options=optimizer_options)
144
-
145
- data, properties, graph = tracker.to_napari() #ndim=2
146
- # do the table post processing and napari options
147
- if data.shape[1]==4:
148
- df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],column_labels['y'],column_labels['x']])
149
- elif data.shape[1]==5:
150
- df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],"z",column_labels['y'],column_labels['x']])
151
- df = df.drop(columns=['z'])
152
- df[column_labels['x']+'_um'] = df[column_labels['x']]*spatial_calibration
153
- df[column_labels['y']+'_um'] = df[column_labels['y']]*spatial_calibration
154
-
155
- else:
156
- properties = None
157
- graph = {}
158
- print(f"{objects=} {objects.columns=}")
159
- objects = objects.rename(columns={"t": "frame"})
160
- if search_range is not None and memory is not None:
161
- data = tp.link(objects, search_range, memory=memory,link_strategy='auto')
162
- else:
163
- print('Please provide a valid search range and memory value...')
164
- return None
165
- data['particle'] = data['particle'] + 1 # force track id to start at 1
166
- df = data.rename(columns={'frame': column_labels['time'], 'x': column_labels['x'], 'y': column_labels['y'], 'particle': column_labels['track']})
167
- df['state'] = 5.0; df['generation'] = 0.0; df['root'] = 1.0; df['parent'] = 1.0; df['dummy'] = False; df['z'] = 0.0;
168
- data = df[[column_labels['track'],column_labels['time'],"z",column_labels['y'],column_labels['x']]].to_numpy()
169
- print(f"{df=}")
170
-
171
- if btrack_option:
172
- df = df.merge(pd.DataFrame(properties),left_index=True, right_index=True)
173
- if columns:
174
- x = df[columns].values
175
- x_scaled = scaler.inverse_transform(x)
176
- df_temp = pd.DataFrame(x_scaled, columns=columns, index = df.index)
177
- df[columns] = df_temp
178
-
179
- # set dummy features to NaN
180
- df.loc[df['dummy'],['class_id']+columns] = np.nan
181
-
182
- df = df.sort_values(by=[column_labels['track'],column_labels['time']])
183
- df = velocity_per_track(df, window_size=3, mode='bi')
184
-
185
- if channel_names is not None:
186
- df = rename_intensity_column(df, channel_names)
187
-
188
- df = write_first_detection_class(df, img_shape=volume, column_labels=column_labels)
189
-
190
- if clean_trajectories_kwargs is not None:
191
- df = clean_trajectories(df.copy(),**clean_trajectories_kwargs)
192
-
193
- df.loc[df["status_firstdetection"].isna(), "status_firstdetection"] = 0
194
- df['ID'] = np.arange(len(df)).astype(int)
195
-
196
- invalid_cols = [c for c in list(df.columns) if c.startswith('Unnamed')]
197
- if len(invalid_cols)>0:
198
- df = df.drop(invalid_cols, axis=1)
199
-
200
- # if view_on_napari:
201
- # view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
202
-
203
- if return_napari_data:
204
- napari_data = {"data": data, "properties": properties, "graph": graph}
205
- return df, napari_data
206
- else:
207
- return df
208
-
209
- def extract_objects_and_features(labels, stack, features, channel_names=None, haralick_options=None, mask_timepoints=None, mask_channels=None):
210
-
211
- """
212
-
213
- Extract objects and features from segmented labels and image stack.
214
-
215
- Parameters
216
- ----------
217
- labels : ndarray
218
- The segmented labels representing cell objects.
219
- stack : ndarray
220
- The image stack corresponding to the labels.
221
- features : list or None
222
- The list of features to extract from the objects. If None, no additional features are extracted.
223
- channel_names : list or None, optional
224
- The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
225
- haralick_options : dict or None, optional
226
- The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
227
- mask_timepoints : list of None, optionak
228
- Frames to hide during tracking.
229
- Returns
230
- -------
231
- DataFrame
232
- The DataFrame containing the extracted object features.
233
-
234
- Notes
235
- -----
236
- This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
237
- labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
238
- be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
239
-
240
- Examples
241
- --------
242
- >>> labels = np.array([[1, 1, 2, 2, 0, 0],
243
- [1, 1, 1, 2, 2, 0],
244
- [0, 0, 1, 2, 0, 0]])
245
- >>> stack = np.random.rand(3, 6, 3)
246
- >>> features = ['area', 'mean_intensity']
247
- >>> df = extract_objects_and_features(labels, stack, features)
248
-
249
- """
250
-
251
- if features is None:
252
- features = []
253
-
254
- if stack is None:
255
- haralick_options = None
256
-
257
- if mask_timepoints is not None:
258
- for f in mask_timepoints:
259
- labels[f] = 0.
260
-
261
- nbr_frames = len(labels)
262
- timestep_dataframes = []
263
-
264
- for t in tqdm(range(nbr_frames),desc='frame'):
265
-
266
- if stack is not None:
267
- img = stack[t]
268
- else:
269
- img = None
270
-
271
- if (haralick_options is not None) and (t==0) and (stack is not None):
272
- if not 'percentiles' in haralick_options:
273
- haralick_options.update({'percentiles': (0.01,99.99)})
274
- if not 'target_channel' in haralick_options:
275
- haralick_options.update({'target_channel': 0})
276
- haralick_percentiles = haralick_options['percentiles']
277
- haralick_channel_index = haralick_options['target_channel']
278
- min_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[0])
279
- max_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[1])
280
- haralick_options.update({'clip_values': (min_value, max_value)})
281
-
282
- df_props = measure_features(img, labels[t], features = features+['centroid'], border_dist=None,
283
- channels=channel_names, haralick_options=haralick_options, verbose=False)
284
- df_props.rename(columns={'centroid-1': 'x', 'centroid-0': 'y'},inplace=True)
285
- df_props['t'] = int(t)
286
- timestep_dataframes.append(df_props)
287
-
288
- df = pd.concat(timestep_dataframes)
289
- df.reset_index(inplace=True, drop=True)
290
-
291
- if mask_channels is not None:
292
- cols_to_drop = []
293
- for mc in mask_channels:
294
- columns = df.columns
295
- col_contains = [mc in c for c in columns]
296
- to_remove = np.array(columns)[np.array(col_contains)]
297
- cols_to_drop.extend(to_remove)
298
- if len(cols_to_drop)>0:
299
- df = df.drop(cols_to_drop, axis=1)
300
-
301
- return df
302
-
303
-
304
- def clean_trajectories(trajectories,remove_not_in_first=False,remove_not_in_last=False,
305
- minimum_tracklength=0, interpolate_position_gaps=False,
306
- extrapolate_tracks_post=False,
307
- extrapolate_tracks_pre=False,
308
- interpolate_na=False,
309
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
310
-
311
- """
312
- Clean trajectories by applying various cleaning operations.
313
-
314
- Parameters
315
- ----------
316
- trajectories : pandas.DataFrame
317
- The input DataFrame containing trajectory data.
318
- remove_not_in_first : bool, optional
319
- Flag indicating whether to remove tracks not present in the first frame.
320
- Defaults to True.
321
- remove_not_in_last : bool, optional
322
- Flag indicating whether to remove tracks not present in the last frame.
323
- Defaults to True.
324
- minimum_tracklength : int, optional
325
- The minimum length of a track to be retained.
326
- Defaults to 0.
327
- interpolate_position_gaps : bool, optional
328
- Flag indicating whether to interpolate position gaps in tracks.
329
- Defaults to True.
330
- extrapolate_tracks_post : bool, optional
331
- Flag indicating whether to extrapolate tracks after the last known position.
332
- Defaults to True.
333
- extrapolate_tracks_pre : bool, optional
334
- Flag indicating whether to extrapolate tracks before the first known position.
335
- Defaults to False.
336
- interpolate_na : bool, optional
337
- Flag indicating whether to interpolate missing values in tracks.
338
- Defaults to False.
339
- column_labels : dict, optional
340
- Dictionary specifying the column labels used in the input DataFrame.
341
- The keys represent the following column labels:
342
- - 'track': The column label for the track ID.
343
- - 'time': The column label for the timestamp.
344
- - 'x': The column label for the x-coordinate.
345
- - 'y': The column label for the y-coordinate.
346
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
347
-
348
- Returns
349
- -------
350
- pandas.DataFrame
351
- The cleaned DataFrame with trajectories.
352
-
353
- Notes
354
- -----
355
- This function applies various cleaning operations to the input DataFrame containing trajectory data.
356
- The cleaning operations include:
357
- - Filtering tracks based on their endpoints.
358
- - Filtering tracks based on their length.
359
- - Interpolating position gaps in tracks.
360
- - Extrapolating tracks after the last known position.
361
- - Extrapolating tracks before the first known position.
362
- - Interpolating missing values in tracks.
363
-
364
- The input DataFrame is expected to have the following columns:
365
- - track: The unique ID of each track.
366
- - time: The timestamp of each data point.
367
- - x: The x-coordinate of each data point.
368
- - y: The y-coordinate of each data point.
369
-
370
- Examples
371
- --------
372
- >>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
373
- ... minimum_tracklength=10, interpolate_position_gaps=True,
374
- ... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
375
- ... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
376
- >>> print(cleaned_data.head())
377
-
378
- """
379
-
380
- trajectories.reset_index
381
- trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
382
-
383
- if minimum_tracklength>0:
384
- trajectories = filter_by_tracklength(trajectories.copy(), minimum_tracklength, track_label=column_labels['track'])
385
-
386
- if np.any([remove_not_in_first, remove_not_in_last]):
387
- trajectories = filter_by_endpoints(trajectories.copy(), remove_not_in_first=remove_not_in_first,
388
- remove_not_in_last=remove_not_in_last, column_labels=column_labels)
389
-
390
- if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
391
- trajectories = extrapolate_tracks(trajectories.copy(), post=extrapolate_tracks_post,
392
- pre=extrapolate_tracks_pre, column_labels=column_labels)
393
-
394
- if interpolate_position_gaps:
395
- trajectories = interpolate_time_gaps(trajectories.copy(), column_labels=column_labels)
396
-
397
- if interpolate_na:
398
- trajectories = interpolate_nan_properties(trajectories.copy(), track_label=column_labels['track'])
399
-
400
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
401
- trajectories.reset_index(inplace=True, drop=True)
402
-
403
- if 'class_firstdetection' in list(trajectories.columns):
404
- for tid, track_group in trajectories.groupby(column_labels['track']):
405
- indices = track_group.index
406
-
407
- class_values = np.array(track_group['class_firstdetection'].unique())
408
- class_values = class_values[class_values==class_values]
409
- t_values = np.array(track_group['t_firstdetection'].unique())
410
- t_values = t_values[t_values==t_values]
411
- if len(class_values)==0:
412
- class_values = 2
413
- t_values = -1
414
- else:
415
- class_values = class_values[0]
416
- t_values = t_values[0]
417
-
418
- trajectories.loc[indices, 'class_firstdetection'] = class_values
419
- trajectories.loc[indices, 't_firstdetection'] = t_values
420
-
421
- return trajectories
17
+ abs_path = os.sep.join(
18
+ [os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], "celldetective"]
19
+ )
20
+
21
+
22
+ def track(
23
+ labels,
24
+ configuration=None,
25
+ stack=None,
26
+ spatial_calibration=1,
27
+ features=None,
28
+ channel_names=None,
29
+ haralick_options=None,
30
+ return_napari_data=False,
31
+ view_on_napari=False,
32
+ mask_timepoints=None,
33
+ mask_channels=None,
34
+ volume=(2048, 2048),
35
+ optimizer_options={"tm_lim": int(12e4)},
36
+ track_kwargs={"step_size": 100},
37
+ objects=None,
38
+ clean_trajectories_kwargs=None,
39
+ btrack_option=True,
40
+ search_range=None,
41
+ memory=None,
42
+ column_labels={
43
+ "track": "TRACK_ID",
44
+ "time": "FRAME",
45
+ "x": "POSITION_X",
46
+ "y": "POSITION_Y",
47
+ },
48
+ ):
49
+ """
50
+
51
+ Perform cell tracking on segmented labels using the bTrack library.
52
+
53
+ Parameters
54
+ ----------
55
+ labels : ndarray
56
+ The segmented labels representing cell objects.
57
+ configuration : Configuration or None
58
+ The bTrack configuration object. If None, a default configuration is used.
59
+ stack : ndarray or None, optional
60
+ The image stack corresponding to the labels. Default is None.
61
+ spatial_calibration : float, optional
62
+ The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
63
+ features : list or None, optional
64
+ The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
65
+ channel_names : list or None, optional
66
+ The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
67
+ Default is None.
68
+ haralick_options : dict or None, optional
69
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
70
+ return_napari_data : bool, optional
71
+ Whether to return the napari data dictionary along with the DataFrame. Default is False.
72
+ view_on_napari : bool, optional
73
+ Whether to view the tracking results on napari. Default is False.
74
+ optimizer_options : dict, optional
75
+ The options for the optimizer. Default is {'tm_lim': int(12e4)}.
76
+ track_kwargs : dict, optional
77
+ Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
78
+ clean_trajectories_kwargs : dict or None, optional
79
+ Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
80
+ Default is None.
81
+ column_labels : dict, optional
82
+ The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
83
+
84
+ Returns
85
+ -------
86
+ DataFrame or tuple
87
+ If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
88
+ containing the DataFrame and the napari data dictionary.
89
+
90
+ Notes
91
+ -----
92
+ This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
93
+ the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
94
+ clean_trajectories function. If specified, the tracking results can be visualized on napari.
95
+
96
+ Examples
97
+ --------
98
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
99
+ [1, 1, 1, 2, 2, 0],
100
+ [0, 0, 1, 2, 0, 0]])
101
+ >>> configuration = cell_config()
102
+ >>> stack = np.random.rand(3, 6)
103
+ >>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
104
+ >>> df.head()
105
+
106
+ TRACK_ID FRAME POSITION_Y POSITION_X
107
+ 0 0 0 0.0 0.0
108
+ 1 0 1 0.0 0.0
109
+ 2 0 2 0.0 0.0
110
+ 3 1 0 0.5 0.5
111
+ 4 1 1 0.5 0.5
112
+
113
+ """
114
+
115
+ configuration = interpret_tracking_configuration(configuration)
116
+
117
+ if objects is None:
118
+
119
+ if not btrack_option:
120
+ features = []
121
+ channel_names = None
122
+ haralick_options = None
123
+
124
+ objects = extract_objects_and_features(
125
+ labels,
126
+ stack,
127
+ features,
128
+ channel_names=channel_names,
129
+ haralick_options=haralick_options,
130
+ mask_timepoints=mask_timepoints,
131
+ mask_channels=mask_channels,
132
+ )
133
+
134
+ if btrack_option:
135
+ columns = list(objects.columns)
136
+ to_remove = ["x", "y", "class_id", "t"]
137
+ for tr in to_remove:
138
+ try:
139
+ columns.remove(tr)
140
+ except:
141
+ print(f"column {tr} could not be found...")
142
+
143
+ scaler = StandardScaler()
144
+ if columns:
145
+ x = objects[columns].values
146
+ x_scaled = scaler.fit_transform(x)
147
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index=objects.index)
148
+ objects[columns] = df_temp
149
+ else:
150
+ print("Warning: no features were passed to bTrack...")
151
+
152
+ # 2) track the objects
153
+ new_btrack_objects = localizations_to_objects(objects)
154
+
155
+ with BayesianTracker() as tracker:
156
+
157
+ tracker.configure(configuration)
158
+
159
+ if columns:
160
+ tracking_updates = ["motion", "visual"]
161
+ # tracker.tracking_updates = ["motion","visual"]
162
+ tracker.features = columns
163
+ else:
164
+ tracking_updates = ["motion"]
165
+
166
+ tracker.append(new_btrack_objects)
167
+ tracker.volume = (
168
+ (0, volume[0]),
169
+ (0, volume[1]),
170
+ (-1e5, 1e5),
171
+ ) # (-1e5, 1e5)
172
+ # print(tracker.volume)
173
+ tracker.track(tracking_updates=tracking_updates, **track_kwargs)
174
+ tracker.optimize(options=optimizer_options)
175
+
176
+ data, properties, graph = tracker.to_napari() # ndim=2
177
+ # do the table post processing and napari options
178
+ if data.shape[1] == 4:
179
+ df = pd.DataFrame(
180
+ data,
181
+ columns=[
182
+ column_labels["track"],
183
+ column_labels["time"],
184
+ column_labels["y"],
185
+ column_labels["x"],
186
+ ],
187
+ )
188
+ elif data.shape[1] == 5:
189
+ df = pd.DataFrame(
190
+ data,
191
+ columns=[
192
+ column_labels["track"],
193
+ column_labels["time"],
194
+ "z",
195
+ column_labels["y"],
196
+ column_labels["x"],
197
+ ],
198
+ )
199
+ df = df.drop(columns=["z"])
200
+ df[column_labels["x"] + "_um"] = df[column_labels["x"]] * spatial_calibration
201
+ df[column_labels["y"] + "_um"] = df[column_labels["y"]] * spatial_calibration
202
+
203
+ else:
204
+ properties = None
205
+ graph = {}
206
+ print(f"{objects=} {objects.columns=}")
207
+ objects = objects.rename(columns={"t": "frame"})
208
+ if search_range is not None and memory is not None:
209
+ data = tp.link(objects, search_range, memory=memory, link_strategy="auto")
210
+ else:
211
+ print("Please provide a valid search range and memory value...")
212
+ return None
213
+ data["particle"] = data["particle"] + 1 # force track id to start at 1
214
+ df = data.rename(
215
+ columns={
216
+ "frame": column_labels["time"],
217
+ "x": column_labels["x"],
218
+ "y": column_labels["y"],
219
+ "particle": column_labels["track"],
220
+ }
221
+ )
222
+ df["state"] = 5.0
223
+ df["generation"] = 0.0
224
+ df["root"] = 1.0
225
+ df["parent"] = 1.0
226
+ df["dummy"] = False
227
+ df["z"] = 0.0
228
+ data = df[
229
+ [
230
+ column_labels["track"],
231
+ column_labels["time"],
232
+ "z",
233
+ column_labels["y"],
234
+ column_labels["x"],
235
+ ]
236
+ ].to_numpy()
237
+ print(f"{df=}")
238
+
239
+ if btrack_option:
240
+ df = df.merge(pd.DataFrame(properties), left_index=True, right_index=True)
241
+ if columns:
242
+ x = df[columns].values
243
+ x_scaled = scaler.inverse_transform(x)
244
+ df_temp = pd.DataFrame(x_scaled, columns=columns, index=df.index)
245
+ df[columns] = df_temp
246
+
247
+ # set dummy features to NaN
248
+ df.loc[df["dummy"], ["class_id"] + columns] = np.nan
249
+
250
+ df = df.sort_values(by=[column_labels["track"], column_labels["time"]])
251
+ df = velocity_per_track(df, window_size=3, mode="bi")
252
+
253
+ if channel_names is not None:
254
+ df = rename_intensity_column(df, channel_names)
255
+
256
+ df = write_first_detection_class(df, img_shape=volume, column_labels=column_labels)
257
+
258
+ if clean_trajectories_kwargs is not None:
259
+ df = clean_trajectories(df.copy(), **clean_trajectories_kwargs)
260
+
261
+ df.loc[df["status_firstdetection"].isna(), "status_firstdetection"] = 0
262
+ df["ID"] = np.arange(len(df)).astype(int)
263
+
264
+ invalid_cols = [c for c in list(df.columns) if c.startswith("Unnamed")]
265
+ if len(invalid_cols) > 0:
266
+ df = df.drop(invalid_cols, axis=1)
267
+
268
+ # if view_on_napari:
269
+ # view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
270
+
271
+ if return_napari_data:
272
+ napari_data = {"data": data, "properties": properties, "graph": graph}
273
+ return df, napari_data
274
+ else:
275
+ return df
276
+
277
+
278
+ def extract_objects_and_features(
279
+ labels,
280
+ stack,
281
+ features,
282
+ channel_names=None,
283
+ haralick_options=None,
284
+ mask_timepoints=None,
285
+ mask_channels=None,
286
+ ):
287
+ """
288
+
289
+ Extract objects and features from segmented labels and image stack.
290
+
291
+ Parameters
292
+ ----------
293
+ labels : ndarray
294
+ The segmented labels representing cell objects.
295
+ stack : ndarray
296
+ The image stack corresponding to the labels.
297
+ features : list or None
298
+ The list of features to extract from the objects. If None, no additional features are extracted.
299
+ channel_names : list or None, optional
300
+ The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
301
+ haralick_options : dict or None, optional
302
+ The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
303
+ mask_timepoints : list of None, optionak
304
+ Frames to hide during tracking.
305
+ Returns
306
+ -------
307
+ DataFrame
308
+ The DataFrame containing the extracted object features.
309
+
310
+ Notes
311
+ -----
312
+ This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
313
+ labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
314
+ be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
315
+
316
+ Examples
317
+ --------
318
+ >>> labels = np.array([[1, 1, 2, 2, 0, 0],
319
+ [1, 1, 1, 2, 2, 0],
320
+ [0, 0, 1, 2, 0, 0]])
321
+ >>> stack = np.random.rand(3, 6, 3)
322
+ >>> features = ['area', 'mean_intensity']
323
+ >>> df = extract_objects_and_features(labels, stack, features)
324
+
325
+ """
326
+
327
+ if features is None:
328
+ features = []
329
+
330
+ if stack is None:
331
+ haralick_options = None
332
+
333
+ if mask_timepoints is not None:
334
+ for f in mask_timepoints:
335
+ labels[f] = 0.0
336
+
337
+ nbr_frames = len(labels)
338
+ timestep_dataframes = []
339
+
340
+ for t in tqdm(range(nbr_frames), desc="frame"):
341
+
342
+ if stack is not None:
343
+ img = stack[t]
344
+ else:
345
+ img = None
346
+
347
+ if (haralick_options is not None) and (t == 0) and (stack is not None):
348
+ if not "percentiles" in haralick_options:
349
+ haralick_options.update({"percentiles": (0.01, 99.99)})
350
+ if not "target_channel" in haralick_options:
351
+ haralick_options.update({"target_channel": 0})
352
+ haralick_percentiles = haralick_options["percentiles"]
353
+ haralick_channel_index = haralick_options["target_channel"]
354
+ min_value = np.nanpercentile(
355
+ img[:, :, haralick_channel_index].flatten(), haralick_percentiles[0]
356
+ )
357
+ max_value = np.nanpercentile(
358
+ img[:, :, haralick_channel_index].flatten(), haralick_percentiles[1]
359
+ )
360
+ haralick_options.update({"clip_values": (min_value, max_value)})
361
+
362
+ df_props = measure_features(
363
+ img,
364
+ labels[t],
365
+ features=features + ["centroid"],
366
+ border_dist=None,
367
+ channels=channel_names,
368
+ haralick_options=haralick_options,
369
+ verbose=False,
370
+ )
371
+ df_props.rename(columns={"centroid-1": "x", "centroid-0": "y"}, inplace=True)
372
+ df_props["t"] = int(t)
373
+ timestep_dataframes.append(df_props)
374
+
375
+ df = pd.concat(timestep_dataframes)
376
+ df.reset_index(inplace=True, drop=True)
377
+
378
+ if mask_channels is not None:
379
+ cols_to_drop = []
380
+ for mc in mask_channels:
381
+ columns = df.columns
382
+ col_contains = [mc in c for c in columns]
383
+ to_remove = np.array(columns)[np.array(col_contains)]
384
+ cols_to_drop.extend(to_remove)
385
+ if len(cols_to_drop) > 0:
386
+ df = df.drop(cols_to_drop, axis=1)
387
+
388
+ return df
389
+
390
+
391
+ def clean_trajectories(
392
+ trajectories,
393
+ remove_not_in_first=False,
394
+ remove_not_in_last=False,
395
+ minimum_tracklength=0,
396
+ interpolate_position_gaps=False,
397
+ extrapolate_tracks_post=False,
398
+ extrapolate_tracks_pre=False,
399
+ interpolate_na=False,
400
+ column_labels={
401
+ "track": "TRACK_ID",
402
+ "time": "FRAME",
403
+ "x": "POSITION_X",
404
+ "y": "POSITION_Y",
405
+ },
406
+ ):
407
+ """
408
+ Clean trajectories by applying various cleaning operations.
409
+
410
+ Parameters
411
+ ----------
412
+ trajectories : pandas.DataFrame
413
+ The input DataFrame containing trajectory data.
414
+ remove_not_in_first : bool, optional
415
+ Flag indicating whether to remove tracks not present in the first frame.
416
+ Defaults to True.
417
+ remove_not_in_last : bool, optional
418
+ Flag indicating whether to remove tracks not present in the last frame.
419
+ Defaults to True.
420
+ minimum_tracklength : int, optional
421
+ The minimum length of a track to be retained.
422
+ Defaults to 0.
423
+ interpolate_position_gaps : bool, optional
424
+ Flag indicating whether to interpolate position gaps in tracks.
425
+ Defaults to True.
426
+ extrapolate_tracks_post : bool, optional
427
+ Flag indicating whether to extrapolate tracks after the last known position.
428
+ Defaults to True.
429
+ extrapolate_tracks_pre : bool, optional
430
+ Flag indicating whether to extrapolate tracks before the first known position.
431
+ Defaults to False.
432
+ interpolate_na : bool, optional
433
+ Flag indicating whether to interpolate missing values in tracks.
434
+ Defaults to False.
435
+ column_labels : dict, optional
436
+ Dictionary specifying the column labels used in the input DataFrame.
437
+ The keys represent the following column labels:
438
+ - 'track': The column label for the track ID.
439
+ - 'time': The column label for the timestamp.
440
+ - 'x': The column label for the x-coordinate.
441
+ - 'y': The column label for the y-coordinate.
442
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
443
+
444
+ Returns
445
+ -------
446
+ pandas.DataFrame
447
+ The cleaned DataFrame with trajectories.
448
+
449
+ Notes
450
+ -----
451
+ This function applies various cleaning operations to the input DataFrame containing trajectory data.
452
+ The cleaning operations include:
453
+ - Filtering tracks based on their endpoints.
454
+ - Filtering tracks based on their length.
455
+ - Interpolating position gaps in tracks.
456
+ - Extrapolating tracks after the last known position.
457
+ - Extrapolating tracks before the first known position.
458
+ - Interpolating missing values in tracks.
459
+
460
+ The input DataFrame is expected to have the following columns:
461
+ - track: The unique ID of each track.
462
+ - time: The timestamp of each data point.
463
+ - x: The x-coordinate of each data point.
464
+ - y: The y-coordinate of each data point.
465
+
466
+ Examples
467
+ --------
468
+ >>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
469
+ ... minimum_tracklength=10, interpolate_position_gaps=True,
470
+ ... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
471
+ ... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
472
+ >>> print(cleaned_data.head())
473
+
474
+ """
475
+
476
+ trajectories.reset_index
477
+ trajectories.sort_values(
478
+ by=[column_labels["track"], column_labels["time"]], inplace=True
479
+ )
480
+
481
+ if minimum_tracklength > 0:
482
+ trajectories = filter_by_tracklength(
483
+ trajectories.copy(), minimum_tracklength, track_label=column_labels["track"]
484
+ )
485
+
486
+ if np.any([remove_not_in_first, remove_not_in_last]):
487
+ trajectories = filter_by_endpoints(
488
+ trajectories.copy(),
489
+ remove_not_in_first=remove_not_in_first,
490
+ remove_not_in_last=remove_not_in_last,
491
+ column_labels=column_labels,
492
+ )
493
+
494
+ if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
495
+ trajectories = extrapolate_tracks(
496
+ trajectories.copy(),
497
+ post=extrapolate_tracks_post,
498
+ pre=extrapolate_tracks_pre,
499
+ column_labels=column_labels,
500
+ )
501
+
502
+ if interpolate_position_gaps:
503
+ trajectories = interpolate_time_gaps(
504
+ trajectories.copy(), column_labels=column_labels
505
+ )
506
+
507
+ if interpolate_na:
508
+ trajectories = interpolate_nan_properties(
509
+ trajectories.copy(), track_label=column_labels["track"]
510
+ )
511
+
512
+ trajectories = trajectories.sort_values(
513
+ by=[column_labels["track"], column_labels["time"]]
514
+ )
515
+ trajectories.reset_index(inplace=True, drop=True)
516
+
517
+ if "class_firstdetection" in list(trajectories.columns):
518
+ for tid, track_group in trajectories.groupby(column_labels["track"]):
519
+ indices = track_group.index
520
+
521
+ class_values = np.array(track_group["class_firstdetection"].unique())
522
+ class_values = class_values[class_values == class_values]
523
+ t_values = np.array(track_group["t_firstdetection"].unique())
524
+ t_values = t_values[t_values == t_values]
525
+ if len(class_values) == 0:
526
+ class_values = 2
527
+ t_values = -1
528
+ else:
529
+ class_values = class_values[0]
530
+ t_values = t_values[0]
531
+
532
+ trajectories.loc[indices, "class_firstdetection"] = class_values
533
+ trajectories.loc[indices, "t_firstdetection"] = t_values
534
+
535
+ return trajectories
536
+
422
537
 
423
538
  def interpolate_per_track(group_df):
539
+ """
540
+ Interpolate missing values within a track.
541
+
542
+ Parameters
543
+ ----------
544
+ group_df : pandas.DataFrame
545
+ The input DataFrame containing data for a single track.
424
546
 
425
- """
426
- Interpolate missing values within a track.
547
+ Returns
548
+ -------
549
+ pandas.DataFrame
550
+ The interpolated DataFrame with missing values filled.
427
551
 
428
- Parameters
429
- ----------
430
- group_df : pandas.DataFrame
431
- The input DataFrame containing data for a single track.
552
+ Notes
553
+ -----
554
+ This function performs linear interpolation to fill missing values within a track.
555
+ Missing values are interpolated based on the neighboring data points in the track.
432
556
 
433
- Returns
434
- -------
435
- pandas.DataFrame
436
- The interpolated DataFrame with missing values filled.
557
+ """
437
558
 
438
- Notes
439
- -----
440
- This function performs linear interpolation to fill missing values within a track.
441
- Missing values are interpolated based on the neighboring data points in the track.
559
+ for c in list(group_df.columns):
560
+ group_df_new_dtype = group_df[c].infer_objects(copy=False)
561
+ if group_df_new_dtype.dtype != "O":
562
+ group_df[c] = group_df_new_dtype.interpolate(
563
+ method="linear", limit_direction="both"
564
+ )
442
565
 
443
- """
566
+ # interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
444
567
 
445
- for c in list(group_df.columns):
446
- group_df_new_dtype = group_df[c].infer_objects(copy=False)
447
- if group_df_new_dtype.dtype!='O':
448
- group_df[c] = group_df_new_dtype.interpolate(method='linear',limit_direction="both")
449
-
450
- #interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
568
+ return group_df
451
569
 
452
- return group_df
453
570
 
454
571
  def interpolate_nan_properties(trajectories, track_label="TRACK_ID"):
572
+ """
573
+ Interpolate missing values within tracks in the input DataFrame.
574
+
575
+ Parameters
576
+ ----------
577
+ trajectories : pandas.DataFrame
578
+ The input DataFrame containing trajectory data.
579
+ track_label : str, optional
580
+ The column label for the track ID.
581
+ Defaults to "TRACK_ID".
582
+
583
+ Returns
584
+ -------
585
+ pandas.DataFrame
586
+ The DataFrame with missing values interpolated within tracks.
587
+
588
+ Notes
589
+ -----
590
+ This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
591
+ to interpolate missing values within each track.
592
+ Missing values are interpolated based on the neighboring data points in each track.
593
+
594
+ The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
595
+
596
+ Examples
597
+ --------
598
+ >>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
599
+ >>> print(interpolated_data.head())
600
+
601
+ """
602
+
603
+ trajectories = trajectories.groupby(track_label, group_keys=False).apply(
604
+ interpolate_per_track
605
+ )
606
+
607
+ return trajectories
608
+
609
+
610
+ def filter_by_endpoints(
611
+ trajectories,
612
+ remove_not_in_first=True,
613
+ remove_not_in_last=False,
614
+ column_labels={
615
+ "track": "TRACK_ID",
616
+ "time": "FRAME",
617
+ "x": "POSITION_X",
618
+ "y": "POSITION_Y",
619
+ },
620
+ ):
621
+ """
622
+ Filter trajectories based on their endpoints.
623
+
624
+ Parameters
625
+ ----------
626
+ trajectories : pandas.DataFrame
627
+ The input DataFrame containing trajectory data.
628
+ remove_not_in_first : bool, optional
629
+ Flag indicating whether to remove tracks not present in the first frame.
630
+ Defaults to True.
631
+ remove_not_in_last : bool, optional
632
+ Flag indicating whether to remove tracks not present in the last frame.
633
+ Defaults to False.
634
+ column_labels : dict, optional
635
+ Dictionary specifying the column labels used in the input DataFrame.
636
+ The keys represent the following column labels:
637
+ - 'track': The column label for the track ID.
638
+ - 'time': The column label for the timestamp.
639
+ - 'x': The column label for the x-coordinate.
640
+ - 'y': The column label for the y-coordinate.
641
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
642
+
643
+ Returns
644
+ -------
645
+ pandas.DataFrame
646
+ The filtered DataFrame with trajectories based on their endpoints.
647
+
648
+ Notes
649
+ -----
650
+ This function filters the input DataFrame based on the endpoints of the trajectories.
651
+ The filtering can be performed in three modes:
652
+ - remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
653
+ - remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
654
+ - remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
655
+
656
+ The input DataFrame is expected to have the following columns:
657
+ - track: The unique ID of each track.
658
+ - time: The timestamp of each data point.
659
+ - x: The x-coordinate of each data point.
660
+ - y: The y-coordinate of each data point.
661
+
662
+ Examples
663
+ --------
664
+ >>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
665
+ >>> print(filtered_data.head())
666
+
667
+ """
668
+
669
+ if (remove_not_in_first) * (not remove_not_in_last):
670
+ # filter tracks not in first frame
671
+ leftover_tracks = (
672
+ trajectories.groupby(column_labels["track"])
673
+ .min()
674
+ .index[
675
+ trajectories.groupby(column_labels["track"]).min()[
676
+ column_labels["time"]
677
+ ]
678
+ == np.amin(trajectories[column_labels["time"]])
679
+ ]
680
+ )
681
+ trajectories = trajectories.loc[
682
+ trajectories[column_labels["track"]].isin(leftover_tracks)
683
+ ]
684
+
685
+ elif (remove_not_in_last) * (not remove_not_in_first):
686
+ # filter tracks not in last frame
687
+ leftover_tracks = (
688
+ trajectories.groupby(column_labels["track"])
689
+ .max()
690
+ .index[
691
+ trajectories.groupby(column_labels["track"]).max()[
692
+ column_labels["time"]
693
+ ]
694
+ == np.amax(trajectories[column_labels["time"]])
695
+ ]
696
+ )
697
+ trajectories = trajectories.loc[
698
+ trajectories[column_labels["track"]].isin(leftover_tracks)
699
+ ]
700
+
701
+ elif remove_not_in_first * remove_not_in_last:
702
+ # filter tracks both not in first and last frame
703
+ leftover_tracks = (
704
+ trajectories.groupby(column_labels["track"])
705
+ .max()
706
+ .index[
707
+ (
708
+ trajectories.groupby(column_labels["track"]).max()[
709
+ column_labels["time"]
710
+ ]
711
+ == np.amax(trajectories[column_labels["time"]])
712
+ )
713
+ * (
714
+ trajectories.groupby(column_labels["track"]).min()[
715
+ column_labels["time"]
716
+ ]
717
+ == np.amin(trajectories[column_labels["time"]])
718
+ )
719
+ ]
720
+ )
721
+ trajectories = trajectories.loc[
722
+ trajectories[column_labels["track"]].isin(leftover_tracks)
723
+ ]
724
+
725
+ trajectories = trajectories.sort_values(
726
+ by=[column_labels["track"], column_labels["time"]]
727
+ )
728
+
729
+ return trajectories
455
730
 
456
- """
457
- Interpolate missing values within tracks in the input DataFrame.
458
-
459
- Parameters
460
- ----------
461
- trajectories : pandas.DataFrame
462
- The input DataFrame containing trajectory data.
463
- track_label : str, optional
464
- The column label for the track ID.
465
- Defaults to "TRACK_ID".
466
-
467
- Returns
468
- -------
469
- pandas.DataFrame
470
- The DataFrame with missing values interpolated within tracks.
471
-
472
- Notes
473
- -----
474
- This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
475
- to interpolate missing values within each track.
476
- Missing values are interpolated based on the neighboring data points in each track.
477
-
478
- The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
479
-
480
- Examples
481
- --------
482
- >>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
483
- >>> print(interpolated_data.head())
484
-
485
- """
486
-
487
- trajectories = trajectories.groupby(track_label, group_keys=False).apply(interpolate_per_track)
488
-
489
- return trajectories
490
-
491
-
492
- def filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False,
493
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
494
-
495
- """
496
- Filter trajectories based on their endpoints.
497
-
498
- Parameters
499
- ----------
500
- trajectories : pandas.DataFrame
501
- The input DataFrame containing trajectory data.
502
- remove_not_in_first : bool, optional
503
- Flag indicating whether to remove tracks not present in the first frame.
504
- Defaults to True.
505
- remove_not_in_last : bool, optional
506
- Flag indicating whether to remove tracks not present in the last frame.
507
- Defaults to False.
508
- column_labels : dict, optional
509
- Dictionary specifying the column labels used in the input DataFrame.
510
- The keys represent the following column labels:
511
- - 'track': The column label for the track ID.
512
- - 'time': The column label for the timestamp.
513
- - 'x': The column label for the x-coordinate.
514
- - 'y': The column label for the y-coordinate.
515
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
516
-
517
- Returns
518
- -------
519
- pandas.DataFrame
520
- The filtered DataFrame with trajectories based on their endpoints.
521
-
522
- Notes
523
- -----
524
- This function filters the input DataFrame based on the endpoints of the trajectories.
525
- The filtering can be performed in three modes:
526
- - remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
527
- - remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
528
- - remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
529
-
530
- The input DataFrame is expected to have the following columns:
531
- - track: The unique ID of each track.
532
- - time: The timestamp of each data point.
533
- - x: The x-coordinate of each data point.
534
- - y: The y-coordinate of each data point.
535
-
536
- Examples
537
- --------
538
- >>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
539
- >>> print(filtered_data.head())
540
-
541
- """
542
-
543
- if (remove_not_in_first)*(not remove_not_in_last):
544
- # filter tracks not in first frame
545
- leftover_tracks = trajectories.groupby(column_labels['track']).min().index[trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']])]
546
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
547
-
548
- elif (remove_not_in_last)*(not remove_not_in_first):
549
- # filter tracks not in last frame
550
- leftover_tracks = trajectories.groupby(column_labels['track']).max().index[trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']])]
551
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
552
-
553
- elif remove_not_in_first*remove_not_in_last:
554
- # filter tracks both not in first and last frame
555
- leftover_tracks = trajectories.groupby(column_labels['track']).max().index[(trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']]))*(trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']]))]
556
- trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
557
-
558
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
559
-
560
- return trajectories
561
731
 
562
732
  def filter_by_tracklength(trajectories, minimum_tracklength, track_label="TRACK_ID"):
563
-
564
- """
565
- Filter trajectories based on the minimum track length.
566
-
567
- Parameters
568
- ----------
569
- trajectories : pandas.DataFrame
570
- The input DataFrame containing trajectory data.
571
- minimum_tracklength : int
572
- The minimum length required for a track to be included.
573
- track_label : str, optional
574
- The column name in the DataFrame that represents the track ID.
575
- Defaults to "TRACK_ID".
576
-
577
- Returns
578
- -------
579
- pandas.DataFrame
580
- The filtered DataFrame with trajectories that meet the minimum track length.
581
-
582
- Notes
583
- -----
584
- This function removes any tracks from the input DataFrame that have a length
585
- (number of data points) less than the specified minimum track length.
586
-
587
- Examples
588
- --------
589
- >>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
590
- >>> print(filtered_data.head())
591
-
592
- """
593
-
594
- if minimum_tracklength>0:
595
-
596
- leftover_tracks = trajectories.groupby(track_label, group_keys=False).size().index[trajectories.groupby(track_label, group_keys=False).size() > minimum_tracklength]
597
- trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
598
-
599
- trajectories = trajectories.reset_index(drop=True)
600
-
601
- return trajectories
602
-
603
-
604
- def interpolate_time_gaps(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
605
-
606
- """
607
- Interpolate time gaps in trajectories.
608
-
609
- Parameters
610
- ----------
611
- trajectories : pandas.DataFrame
612
- The input DataFrame containing trajectory data.
613
- column_labels : dict, optional
614
- Dictionary specifying the column labels used in the input DataFrame.
615
- The keys represent the following column labels:
616
- - 'track': The column label for the track ID.
617
- - 'time': The column label for the timestamp.
618
- - 'x': The column label for the x-coordinate.
619
- - 'y': The column label for the y-coordinate.
620
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
621
-
622
- Returns
623
- -------
624
- pandas.DataFrame
625
- The interpolated DataFrame with reduced time gaps in trajectories.
626
-
627
- Notes
628
- -----
629
- This function performs interpolation on the input trajectories to reduce time gaps between data points.
630
- It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
631
-
632
- The input DataFrame is expected to have the following columns:
633
- - track: The unique ID of each track.
634
- - time: The timestamp of each data point (in seconds).
635
- - x: The x-coordinate of each data point.
636
- - y: The y-coordinate of each data point.
637
-
638
- Examples
639
- --------
640
- >>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
641
- >>> print(interpolated_data.head())
642
-
643
- """
644
-
645
- trajectories[column_labels['time']] = pd.to_datetime(trajectories[column_labels['time']], unit='s')
646
- trajectories.set_index(column_labels['track'], inplace=True)
647
- trajectories = trajectories.groupby(column_labels['track'], group_keys=True).apply(lambda x: x.set_index(column_labels['time']).resample('1s').asfreq()).reset_index()
648
- trajectories[[column_labels['x'], column_labels['y']]] = trajectories.groupby(column_labels['track'], group_keys=False)[[column_labels['x'], column_labels['y']]].apply(lambda x: x.interpolate(method='linear'))
649
- trajectories.reset_index(drop=True, inplace=True)
650
- trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64').astype(float) / 10**9
651
- #trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
652
- trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
653
-
654
- return trajectories
655
-
656
-
657
- def extrapolate_tracks(trajectories, post=False, pre=False, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
658
-
659
- """
660
- Extrapolate tracks in trajectories.
661
-
662
- Parameters
663
- ----------
664
- trajectories : pandas.DataFrame
665
- The input DataFrame containing trajectory data.
666
- post : bool, optional
667
- Flag indicating whether to perform post-extrapolation.
668
- Defaults to True.
669
- pre : bool, optional
670
- Flag indicating whether to perform pre-extrapolation.
671
- Defaults to False.
672
- column_labels : dict, optional
673
- Dictionary specifying the column labels used in the input DataFrame.
674
- The keys represent the following column labels:
675
- - 'track': The column label for the track ID.
676
- - 'time': The column label for the timestamp.
677
- - 'x': The column label for the x-coordinate.
678
- - 'y': The column label for the y-coordinate.
679
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
680
-
681
- Returns
682
- -------
683
- pandas.DataFrame
684
- The extrapolated DataFrame with extended tracks.
685
-
686
- Notes
687
- -----
688
- This function extrapolates tracks in the input DataFrame by repeating the last known position
689
- either after (post-extrapolation) or before (pre-extrapolation) the available data.
690
-
691
- The input DataFrame is expected to have the following columns:
692
- - track: The unique ID of each track.
693
- - time: The timestamp of each data point.
694
- - x: The x-coordinate of each data point.
695
- - y: The y-coordinate of each data point.
696
-
697
- Examples
698
- --------
699
- >>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
700
- >>> print(extrapolated_data.head())
701
-
702
- """
703
-
704
- if post:
705
-
706
- # get the maximum time T in the dataframe
707
- max_time = trajectories[column_labels['time']].max()
708
-
709
- # extrapolate the position until time T by repeating the last known position
710
- df_extrapolated = pd.DataFrame()
711
- for track_id, group in trajectories.groupby(column_labels['track']):
712
- last_known_position = group.loc[group[column_labels['time']] <= max_time].tail(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
713
- extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(last_known_position[0][0] + 1, max_time + 1)})
714
- extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(last_known_position[0][0] + 1, max_time + 1))
715
- track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
716
- track_data[column_labels['track']] = track_id
717
-
718
- if len(df_extrapolated)==0:
719
- df_extrapolated = track_data
720
- elif len(track_data)!=0:
721
- df_extrapolated = pd.concat([df_extrapolated, track_data])
722
-
723
-
724
- # concatenate the original dataframe and the extrapolated dataframe
725
- trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
726
- # sort the dataframe by TRACK_ID and FRAME
727
- trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
728
-
729
- if pre:
730
-
731
- # get the maximum time T in the dataframe
732
- min_time = 0 #trajectories[column_labels['time']].min()
733
-
734
- # extrapolate the position until time T by repeating the last known position
735
- df_extrapolated = pd.DataFrame()
736
- for track_id, group in trajectories.groupby(column_labels['track']):
737
- last_known_position = group.loc[group[column_labels['time']] >= min_time].head(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
738
- extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(min_time, last_known_position[0][0] + 1)})
739
- extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(min_time, last_known_position[0][0]))
740
- track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
741
- track_data[column_labels['track']] = track_id
742
- df_extrapolated = pd.concat([df_extrapolated, track_data])
743
-
744
- # concatenate the original dataframe and the extrapolated dataframe
745
- trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
746
-
747
- # sort the dataframe by TRACK_ID and FRAME
748
- trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
749
-
750
- return trajectories
751
-
752
- def compute_instantaneous_velocity(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
753
-
754
- """
755
-
756
- Compute the instantaneous velocity for each point in the trajectories.
757
-
758
- Parameters
759
- ----------
760
- trajectories : pandas.DataFrame
761
- The input DataFrame containing trajectory data.
762
- column_labels : dict, optional
763
- A dictionary specifying the column labels for track ID, time, position X, and position Y.
764
- Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
765
-
766
- Returns
767
- -------
768
- pandas.DataFrame
769
- The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
770
-
771
- Notes
772
- -----
773
- This function calculates the instantaneous velocity for each point in the trajectories.
774
- The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
775
-
776
- The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
777
-
778
- Examples
779
- --------
780
- >>> velocity_data = compute_instantaneous_velocity(trajectories)
781
- >>> print(velocity_data.head())
782
-
783
- """
784
-
785
- # Calculate the time differences and position differences
786
- trajectories['dt'] = trajectories.groupby(column_labels['track'])[column_labels['time']].diff()
787
- trajectories['dx'] = trajectories.groupby(column_labels['track'])[column_labels['x']].diff()
788
- trajectories['dy'] = trajectories.groupby(column_labels['track'])[column_labels['y']].diff()
789
-
790
- # Calculate the instantaneous velocity
791
- trajectories['velocity'] = np.sqrt(trajectories['dx']**2 +trajectories['dy']**2) / trajectories['dt']
792
- trajectories = trajectories.drop(['dx', 'dy', 'dt'], axis=1)
793
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
794
-
795
- return trajectories
796
-
797
- def instantaneous_diffusion(positions_x, positions_y, timeline):
798
-
799
- """
800
- Compute the instantaneous diffusion coefficients for each position coordinate.
801
-
802
- Parameters
803
- ----------
804
- positions_x : numpy.ndarray
805
- Array of x-coordinates of positions.
806
- positions_y : numpy.ndarray
807
- Array of y-coordinates of positions.
808
- timeline : numpy.ndarray
809
- Array of corresponding time points.
810
-
811
- Returns
812
- -------
813
- numpy.ndarray
814
- Array of instantaneous diffusion coefficients for each position coordinate.
815
-
816
- Notes
817
- -----
818
- The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
819
- The diffusion coefficient at each time point is computed using the formula:
820
- D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
821
- where x represents the position coordinate (x or y) and t represents the corresponding time point.
822
-
823
- Examples
824
- --------
825
- >>> x = np.array([0, 1, 2, 3, 4, 5])
826
- >>> y = np.array([0, 1, 4, 9, 16, 25])
827
- >>> t = np.array([0, 1, 2, 3, 4, 5])
828
- >>> diff = instantaneous_diffusion(x, y, t)
829
- >>> print(diff)
830
-
831
- """
832
-
833
- diff = np.zeros((len(positions_x),2))
834
- diff[:,:] = np.nan
835
-
836
- for t in range(1,len(positions_x)-1):
837
- diff[t,0] = (positions_x[t+1] - positions_x[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_x[t+1] - positions_x[t])*(positions_x[t] - positions_x[t-1]))
838
-
839
- for t in range(1,len(positions_y)-1):
840
- diff[t,1] = (positions_y[t+1] - positions_y[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_y[t+1] - positions_y[t])*(positions_y[t] - positions_y[t-1]))
841
-
842
- return diff
843
-
844
- def magnitude_diffusion(diffusion_vector):
845
-
846
- """
847
- Compute the magnitude of diffusion for each diffusion vector.
848
-
849
- Parameters
850
- ----------
851
- diffusion_vector : numpy.ndarray
852
- Array of diffusion vectors.
853
-
854
- Returns
855
- -------
856
- numpy.ndarray
857
- Array of magnitudes of diffusion.
858
-
859
- Notes
860
- -----
861
- The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
862
- The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
863
-
864
- Examples
865
- --------
866
- >>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
867
- >>> magnitudes = magnitude_diffusion(diffusion)
868
- >>> print(magnitudes)
733
+ """
734
+ Filter trajectories based on the minimum track length.
735
+
736
+ Parameters
737
+ ----------
738
+ trajectories : pandas.DataFrame
739
+ The input DataFrame containing trajectory data.
740
+ minimum_tracklength : int
741
+ The minimum length required for a track to be included.
742
+ track_label : str, optional
743
+ The column name in the DataFrame that represents the track ID.
744
+ Defaults to "TRACK_ID".
745
+
746
+ Returns
747
+ -------
748
+ pandas.DataFrame
749
+ The filtered DataFrame with trajectories that meet the minimum track length.
750
+
751
+ Notes
752
+ -----
753
+ This function removes any tracks from the input DataFrame that have a length
754
+ (number of data points) less than the specified minimum track length.
755
+
756
+ Examples
757
+ --------
758
+ >>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
759
+ >>> print(filtered_data.head())
760
+
761
+ """
762
+
763
+ if minimum_tracklength > 0:
764
+
765
+ leftover_tracks = (
766
+ trajectories.groupby(track_label, group_keys=False)
767
+ .size()
768
+ .index[
769
+ trajectories.groupby(track_label, group_keys=False).size()
770
+ > minimum_tracklength
771
+ ]
772
+ )
773
+ trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
774
+
775
+ trajectories = trajectories.reset_index(drop=True)
776
+
777
+ return trajectories
778
+
779
+
780
+ def interpolate_time_gaps(
781
+ trajectories,
782
+ column_labels={
783
+ "track": "TRACK_ID",
784
+ "time": "FRAME",
785
+ "x": "POSITION_X",
786
+ "y": "POSITION_Y",
787
+ },
788
+ ):
789
+ """
790
+ Interpolate time gaps in trajectories.
791
+
792
+ Parameters
793
+ ----------
794
+ trajectories : pandas.DataFrame
795
+ The input DataFrame containing trajectory data.
796
+ column_labels : dict, optional
797
+ Dictionary specifying the column labels used in the input DataFrame.
798
+ The keys represent the following column labels:
799
+ - 'track': The column label for the track ID.
800
+ - 'time': The column label for the timestamp.
801
+ - 'x': The column label for the x-coordinate.
802
+ - 'y': The column label for the y-coordinate.
803
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
804
+
805
+ Returns
806
+ -------
807
+ pandas.DataFrame
808
+ The interpolated DataFrame with reduced time gaps in trajectories.
809
+
810
+ Notes
811
+ -----
812
+ This function performs interpolation on the input trajectories to reduce time gaps between data points.
813
+ It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
814
+
815
+ The input DataFrame is expected to have the following columns:
816
+ - track: The unique ID of each track.
817
+ - time: The timestamp of each data point (in seconds).
818
+ - x: The x-coordinate of each data point.
819
+ - y: The y-coordinate of each data point.
820
+
821
+ Examples
822
+ --------
823
+ >>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
824
+ >>> print(interpolated_data.head())
825
+
826
+ """
827
+
828
+ trajectories[column_labels["time"]] = pd.to_datetime(
829
+ trajectories[column_labels["time"]], unit="s"
830
+ )
831
+ trajectories.set_index(column_labels["track"], inplace=True)
832
+ trajectories = (
833
+ trajectories.groupby(column_labels["track"], group_keys=True)
834
+ .apply(lambda x: x.set_index(column_labels["time"]).resample("1s").asfreq())
835
+ .reset_index()
836
+ )
837
+ trajectories[[column_labels["x"], column_labels["y"]]] = trajectories.groupby(
838
+ column_labels["track"], group_keys=False
839
+ )[[column_labels["x"], column_labels["y"]]].apply(
840
+ lambda x: x.interpolate(method="linear")
841
+ )
842
+ trajectories.reset_index(drop=True, inplace=True)
843
+ trajectories[column_labels["time"]] = np.round(
844
+ (trajectories[column_labels["time"]] - pd.Timestamp("1970-01-01"))
845
+ / pd.Timedelta("1s"),
846
+ 9,
847
+ )
848
+ # trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
849
+ trajectories.sort_values(
850
+ by=[column_labels["track"], column_labels["time"]], inplace=True
851
+ )
852
+
853
+ return trajectories
854
+
855
+
856
+ def extrapolate_tracks(
857
+ trajectories,
858
+ post=False,
859
+ pre=False,
860
+ column_labels={
861
+ "track": "TRACK_ID",
862
+ "time": "FRAME",
863
+ "x": "POSITION_X",
864
+ "y": "POSITION_Y",
865
+ },
866
+ ):
867
+ """
868
+ Extrapolate tracks in trajectories.
869
+
870
+ Parameters
871
+ ----------
872
+ trajectories : pandas.DataFrame
873
+ The input DataFrame containing trajectory data.
874
+ post : bool, optional
875
+ Flag indicating whether to perform post-extrapolation.
876
+ Defaults to True.
877
+ pre : bool, optional
878
+ Flag indicating whether to perform pre-extrapolation.
879
+ Defaults to False.
880
+ column_labels : dict, optional
881
+ Dictionary specifying the column labels used in the input DataFrame.
882
+ The keys represent the following column labels:
883
+ - 'track': The column label for the track ID.
884
+ - 'time': The column label for the timestamp.
885
+ - 'x': The column label for the x-coordinate.
886
+ - 'y': The column label for the y-coordinate.
887
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
888
+
889
+ Returns
890
+ -------
891
+ pandas.DataFrame
892
+ The extrapolated DataFrame with extended tracks.
893
+
894
+ Notes
895
+ -----
896
+ This function extrapolates tracks in the input DataFrame by repeating the last known position
897
+ either after (post-extrapolation) or before (pre-extrapolation) the available data.
898
+
899
+ The input DataFrame is expected to have the following columns:
900
+ - track: The unique ID of each track.
901
+ - time: The timestamp of each data point.
902
+ - x: The x-coordinate of each data point.
903
+ - y: The y-coordinate of each data point.
904
+
905
+ Examples
906
+ --------
907
+ >>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
908
+ >>> print(extrapolated_data.head())
909
+
910
+ """
911
+
912
+ if post:
913
+
914
+ # get the maximum time T in the dataframe
915
+ max_time = trajectories[column_labels["time"]].max()
916
+
917
+ # extrapolate the position until time T by repeating the last known position
918
+ df_extrapolated = pd.DataFrame()
919
+ for track_id, group in trajectories.groupby(column_labels["track"]):
920
+ last_known_position = (
921
+ group.loc[group[column_labels["time"]] <= max_time]
922
+ .tail(1)[
923
+ [column_labels["time"], column_labels["x"], column_labels["y"]]
924
+ ]
925
+ .values
926
+ )
927
+ extrapolated_frames = pd.DataFrame(
928
+ {
929
+ column_labels["time"]: np.arange(
930
+ last_known_position[0][0] + 1, max_time + 1
931
+ )
932
+ }
933
+ )
934
+ extrapolated_positions = pd.DataFrame(
935
+ {
936
+ column_labels["x"]: last_known_position[0][1],
937
+ column_labels["y"]: last_known_position[0][2],
938
+ },
939
+ index=np.arange(last_known_position[0][0] + 1, max_time + 1),
940
+ )
941
+ track_data = extrapolated_frames.join(
942
+ extrapolated_positions, how="inner", on=column_labels["time"]
943
+ )
944
+ track_data[column_labels["track"]] = track_id
945
+
946
+ if len(df_extrapolated) == 0:
947
+ df_extrapolated = track_data
948
+ elif len(track_data) != 0:
949
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
950
+
951
+ # concatenate the original dataframe and the extrapolated dataframe
952
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
953
+ # sort the dataframe by TRACK_ID and FRAME
954
+ trajectories.sort_values(
955
+ [column_labels["track"], column_labels["time"]], inplace=True
956
+ )
957
+
958
+ if pre:
959
+
960
+ # get the maximum time T in the dataframe
961
+ min_time = 0 # trajectories[column_labels['time']].min()
962
+
963
+ # extrapolate the position until time T by repeating the last known position
964
+ df_extrapolated = pd.DataFrame()
965
+ for track_id, group in trajectories.groupby(column_labels["track"]):
966
+ last_known_position = (
967
+ group.loc[group[column_labels["time"]] >= min_time]
968
+ .head(1)[
969
+ [column_labels["time"], column_labels["x"], column_labels["y"]]
970
+ ]
971
+ .values
972
+ )
973
+ extrapolated_frames = pd.DataFrame(
974
+ {
975
+ column_labels["time"]: np.arange(
976
+ min_time, last_known_position[0][0] + 1
977
+ )
978
+ }
979
+ )
980
+ extrapolated_positions = pd.DataFrame(
981
+ {
982
+ column_labels["x"]: last_known_position[0][1],
983
+ column_labels["y"]: last_known_position[0][2],
984
+ },
985
+ index=np.arange(min_time, last_known_position[0][0]),
986
+ )
987
+ track_data = extrapolated_frames.join(
988
+ extrapolated_positions, how="inner", on=column_labels["time"]
989
+ )
990
+ track_data[column_labels["track"]] = track_id
991
+ df_extrapolated = pd.concat([df_extrapolated, track_data])
992
+
993
+ # concatenate the original dataframe and the extrapolated dataframe
994
+ trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
995
+
996
+ # sort the dataframe by TRACK_ID and FRAME
997
+ trajectories.sort_values(
998
+ [column_labels["track"], column_labels["time"]], inplace=True
999
+ )
1000
+
1001
+ return trajectories
1002
+
1003
+
1004
+ def compute_instantaneous_velocity(
1005
+ trajectories,
1006
+ column_labels={
1007
+ "track": "TRACK_ID",
1008
+ "time": "FRAME",
1009
+ "x": "POSITION_X",
1010
+ "y": "POSITION_Y",
1011
+ },
1012
+ ):
1013
+ """
1014
+
1015
+ Compute the instantaneous velocity for each point in the trajectories.
1016
+
1017
+ Parameters
1018
+ ----------
1019
+ trajectories : pandas.DataFrame
1020
+ The input DataFrame containing trajectory data.
1021
+ column_labels : dict, optional
1022
+ A dictionary specifying the column labels for track ID, time, position X, and position Y.
1023
+ Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
1024
+
1025
+ Returns
1026
+ -------
1027
+ pandas.DataFrame
1028
+ The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
1029
+
1030
+ Notes
1031
+ -----
1032
+ This function calculates the instantaneous velocity for each point in the trajectories.
1033
+ The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
1034
+
1035
+ The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
1036
+
1037
+ Examples
1038
+ --------
1039
+ >>> velocity_data = compute_instantaneous_velocity(trajectories)
1040
+ >>> print(velocity_data.head())
1041
+
1042
+ """
1043
+
1044
+ # Calculate the time differences and position differences
1045
+ trajectories["dt"] = trajectories.groupby(column_labels["track"])[
1046
+ column_labels["time"]
1047
+ ].diff()
1048
+ trajectories["dx"] = trajectories.groupby(column_labels["track"])[
1049
+ column_labels["x"]
1050
+ ].diff()
1051
+ trajectories["dy"] = trajectories.groupby(column_labels["track"])[
1052
+ column_labels["y"]
1053
+ ].diff()
1054
+
1055
+ # Calculate the instantaneous velocity
1056
+ trajectories["velocity"] = (
1057
+ np.sqrt(trajectories["dx"] ** 2 + trajectories["dy"] ** 2) / trajectories["dt"]
1058
+ )
1059
+ trajectories = trajectories.drop(["dx", "dy", "dt"], axis=1)
1060
+ trajectories = trajectories.sort_values(
1061
+ by=[column_labels["track"], column_labels["time"]]
1062
+ )
1063
+
1064
+ return trajectories
869
1065
 
870
- """
871
1066
 
872
- return np.sqrt(diffusion_vector[:,0]**2+diffusion_vector[:,1]**2)
873
-
874
-
875
- def compute_instantaneous_diffusion(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
876
-
877
- """
878
-
879
- Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
880
-
881
- Parameters
882
- ----------
883
- trajectories : DataFrame
884
- The input DataFrame containing trajectories with position and time information.
885
- column_labels : dict, optional
886
- A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
887
- The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
888
-
889
- Returns
890
- -------
891
- DataFrame
892
- The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
893
-
894
- Notes
895
- -----
896
-
897
- The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
898
- are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
899
-
900
- Examples
901
- --------
902
- >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
903
- ... 'FRAME': [0, 1, 2, 0, 1, 2],
904
- ... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
905
- ... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
906
- >>> compute_instantaneous_diffusion(trajectories)
907
- # Output DataFrame with added "diffusion" column
908
-
909
- """
1067
+ def instantaneous_diffusion(positions_x, positions_y, timeline):
1068
+ """
1069
+ Compute the instantaneous diffusion coefficients for each position coordinate.
1070
+
1071
+ Parameters
1072
+ ----------
1073
+ positions_x : numpy.ndarray
1074
+ Array of x-coordinates of positions.
1075
+ positions_y : numpy.ndarray
1076
+ Array of y-coordinates of positions.
1077
+ timeline : numpy.ndarray
1078
+ Array of corresponding time points.
1079
+
1080
+ Returns
1081
+ -------
1082
+ numpy.ndarray
1083
+ Array of instantaneous diffusion coefficients for each position coordinate.
1084
+
1085
+ Notes
1086
+ -----
1087
+ The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
1088
+ The diffusion coefficient at each time point is computed using the formula:
1089
+ D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
1090
+ where x represents the position coordinate (x or y) and t represents the corresponding time point.
1091
+
1092
+ Examples
1093
+ --------
1094
+ >>> x = np.array([0, 1, 2, 3, 4, 5])
1095
+ >>> y = np.array([0, 1, 4, 9, 16, 25])
1096
+ >>> t = np.array([0, 1, 2, 3, 4, 5])
1097
+ >>> diff = instantaneous_diffusion(x, y, t)
1098
+ >>> print(diff)
1099
+
1100
+ """
1101
+
1102
+ diff = np.zeros((len(positions_x), 2))
1103
+ diff[:, :] = np.nan
1104
+
1105
+ for t in range(1, len(positions_x) - 1):
1106
+ diff[t, 0] = (positions_x[t + 1] - positions_x[t - 1]) ** 2 / (
1107
+ 2 * (timeline[t + 1] - timeline[t - 1])
1108
+ ) + 1 / (timeline[t + 1] - timeline[t - 1]) * (
1109
+ (positions_x[t + 1] - positions_x[t])
1110
+ * (positions_x[t] - positions_x[t - 1])
1111
+ )
1112
+
1113
+ for t in range(1, len(positions_y) - 1):
1114
+ diff[t, 1] = (positions_y[t + 1] - positions_y[t - 1]) ** 2 / (
1115
+ 2 * (timeline[t + 1] - timeline[t - 1])
1116
+ ) + 1 / (timeline[t + 1] - timeline[t - 1]) * (
1117
+ (positions_y[t + 1] - positions_y[t])
1118
+ * (positions_y[t] - positions_y[t - 1])
1119
+ )
1120
+
1121
+ return diff
910
1122
 
911
- trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
912
- trajectories['diffusion'] = np.nan
913
-
914
- for tid,group in trajectories.groupby(column_labels['track']):
915
1123
 
916
- indices = group.index
917
- x = group[column_labels['x']].to_numpy()
918
- y = group[column_labels['y']].to_numpy()
919
- t = group[column_labels['time']].to_numpy()
920
-
921
- if len(x)>3: #to have t-1,t,t+1
922
- diff = instantaneous_diffusion(x,y,t)
923
- d = magnitude_diffusion(diff)
924
- trajectories.loc[indices, "diffusion"] = d
1124
+ def magnitude_diffusion(diffusion_vector):
1125
+ """
1126
+ Compute the magnitude of diffusion for each diffusion vector.
1127
+
1128
+ Parameters
1129
+ ----------
1130
+ diffusion_vector : numpy.ndarray
1131
+ Array of diffusion vectors.
1132
+
1133
+ Returns
1134
+ -------
1135
+ numpy.ndarray
1136
+ Array of magnitudes of diffusion.
1137
+
1138
+ Notes
1139
+ -----
1140
+ The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
1141
+ The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
1142
+
1143
+ Examples
1144
+ --------
1145
+ >>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
1146
+ >>> magnitudes = magnitude_diffusion(diffusion)
1147
+ >>> print(magnitudes)
1148
+
1149
+ """
1150
+
1151
+ return np.sqrt(diffusion_vector[:, 0] ** 2 + diffusion_vector[:, 1] ** 2)
1152
+
1153
+
1154
+ def compute_instantaneous_diffusion(
1155
+ trajectories,
1156
+ column_labels={
1157
+ "track": "TRACK_ID",
1158
+ "time": "FRAME",
1159
+ "x": "POSITION_X",
1160
+ "y": "POSITION_Y",
1161
+ },
1162
+ ):
1163
+ """
1164
+
1165
+ Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
1166
+
1167
+ Parameters
1168
+ ----------
1169
+ trajectories : DataFrame
1170
+ The input DataFrame containing trajectories with position and time information.
1171
+ column_labels : dict, optional
1172
+ A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
1173
+ The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
1174
+
1175
+ Returns
1176
+ -------
1177
+ DataFrame
1178
+ The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
1179
+
1180
+ Notes
1181
+ -----
1182
+
1183
+ The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
1184
+ are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
1185
+
1186
+ Examples
1187
+ --------
1188
+ >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
1189
+ ... 'FRAME': [0, 1, 2, 0, 1, 2],
1190
+ ... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
1191
+ ... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
1192
+ >>> compute_instantaneous_diffusion(trajectories)
1193
+ # Output DataFrame with added "diffusion" column
1194
+
1195
+ """
1196
+
1197
+ trajectories = trajectories.sort_values(
1198
+ by=[column_labels["track"], column_labels["time"]]
1199
+ )
1200
+ trajectories["diffusion"] = np.nan
1201
+
1202
+ for tid, group in trajectories.groupby(column_labels["track"]):
1203
+
1204
+ indices = group.index
1205
+ x = group[column_labels["x"]].to_numpy()
1206
+ y = group[column_labels["y"]].to_numpy()
1207
+ t = group[column_labels["time"]].to_numpy()
1208
+
1209
+ if len(x) > 3: # to have t-1,t,t+1
1210
+ diff = instantaneous_diffusion(x, y, t)
1211
+ d = magnitude_diffusion(diff)
1212
+ trajectories.loc[indices, "diffusion"] = d
1213
+
1214
+ return trajectories
925
1215
 
926
- return trajectories
927
1216
 
928
1217
  def track_at_position(pos, mode, return_tracks=False, view_on_napari=False, threads=1):
929
-
930
- pos = pos.replace('\\','/')
931
- pos = rf"{pos}"
932
- assert os.path.exists(pos),f'Position {pos} is not a valid path.'
933
- if not pos.endswith('/'):
934
- pos += '/'
935
-
936
- script_path = os.sep.join([abs_path, 'scripts', 'track_cells.py'])
937
- cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
938
- subprocess.call(cmd, shell=True)
939
-
940
- track_table = pos + os.sep.join(["output","tables",f"trajectories_{mode}.csv"])
941
- if return_tracks:
942
- df = pd.read_csv(track_table)
943
- return df
944
- else:
945
- return None
946
-
947
- def write_first_detection_class(df, img_shape=None, edge_threshold=20, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
948
-
949
- """
950
- Assigns a classification and first detection time to tracks in the given DataFrame. This function must be called
951
- before any track post-processing.
952
-
953
- This function computes the first detection time and a detection class (`class_firstdetection`) for each track in the data.
954
- Tracks that start on or near the image edge, or those detected at the initial frame, are marked with special classes.
955
-
956
- Parameters
957
- ----------
958
- df : pandas.DataFrame
959
- A DataFrame containing track data. Expected to have at least the columns specified in `column_labels` and `class_id` (mask value).
960
-
961
- img_shape : tuple of int, optional
962
- The shape of the image as `(height, width)`. Used to determine whether the first detection occurs near the image edge.
963
-
964
- edge_threshold : int, optional, default=20
965
- The distance in pixels from the image edge to consider a detection as near the edge.
966
-
967
- column_labels : dict, optional
968
- A dictionary mapping logical column names to actual column names in `tab`. Keys include:
969
-
970
- - `'track'`: The column indicating the track ID (default: `"TRACK_ID"`).
971
- - `'time'`: The column indicating the frame/time (default: `"FRAME"`).
972
- - `'x'`: The column indicating the X-coordinate (default: `"POSITION_X"`).
973
- - `'y'`: The column indicating the Y-coordinate (default: `"POSITION_Y"`).
974
-
975
- Returns
976
- -------
977
- pandas.DataFrame
978
- The input DataFrame `df` with two additional columns:
979
-
980
- - `'class_firstdetection'`: A class assigned based on detection status:
981
-
982
- - `0`: Valid detection not near the edge and not at the initial frame.
983
- - `2`: Detection near the edge, at the initial frame, or no detection available.
984
-
985
- - `'t_firstdetection'`: The adjusted first detection time (in frame units):
986
-
987
- - `-1`: Indicates no valid detection or detection near the edge.
988
- - A float value representing the adjusted first detection time otherwise.
989
-
990
- Notes
991
- -----
992
- - The function assumes that tracks are grouped and sorted by track ID and frame.
993
- - Detections near the edge or at the initial frame (frame 0) are considered invalid and assigned special values.
994
- - If `img_shape` is not provided, edge checks are skipped.
995
-
996
- """
997
-
998
- df = df.sort_values(by=[column_labels['track'],column_labels['time']])
999
- for tid,track_group in df.groupby(column_labels['track']):
1000
- indices = track_group.index
1001
- detection = track_group['class_id'].values
1002
- timeline = track_group[column_labels['time']].values
1003
- positions_x = track_group[column_labels['x']].values
1004
- positions_y = track_group[column_labels['y']].values
1005
- dt = 1
1006
-
1007
- timeline = track_group['FRAME'].to_numpy()
1008
- status = np.ones_like(timeline)
1009
-
1010
- # Initialize
1011
- cclass = 2; t_first = np.nan;
1012
-
1013
- if np.any(detection==detection):
1014
-
1015
- t_first = timeline[detection==detection][0]
1016
- x_first = positions_x[detection==detection][0]; y_first = positions_y[detection==detection][0];
1017
-
1018
- edge_test = False
1019
- if img_shape is not None:
1020
- edge_test = (x_first < edge_threshold) or (y_first < edge_threshold) or (y_first > (img_shape[0] - edge_threshold)) or (x_first > (img_shape[1] - edge_threshold))
1021
-
1022
- cclass = 0
1023
- if t_first<=0:
1024
- t_first = -1
1025
- cclass = 2
1026
- else:
1027
- t_first = float(t_first) - float(dt)
1028
- if t_first==0:
1029
- t_first += 0.01
1030
-
1031
- if edge_test:
1032
- cclass = 2
1033
- # switch to class 2 but keep time/status information
1034
- else:
1035
- t_first = -1
1036
- cclass = 2
1037
-
1038
- status[timeline < t_first] = 0.
1039
- df.loc[indices, 'class_firstdetection'] = cclass
1040
- df.loc[indices, 't_firstdetection'] = t_first
1041
- df.loc[indices, 'status_firstdetection'] = status
1042
-
1043
- return df
1044
1218
 
1219
+ pos = pos.replace("\\", "/")
1220
+ pos = rf"{pos}"
1221
+ assert os.path.exists(pos), f"Position {pos} is not a valid path."
1222
+ if not pos.endswith("/"):
1223
+ pos += "/"
1224
+
1225
+ script_path = os.sep.join([abs_path, "scripts", "track_cells.py"])
1226
+ cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
1227
+ subprocess.call(cmd, shell=True)
1228
+
1229
+ track_table = pos + os.sep.join(["output", "tables", f"trajectories_{mode}.csv"])
1230
+ if return_tracks:
1231
+ df = pd.read_csv(track_table)
1232
+ return df
1233
+ else:
1234
+ return None
1235
+
1236
+
1237
+ def write_first_detection_class(
1238
+ df,
1239
+ img_shape=None,
1240
+ edge_threshold=20,
1241
+ column_labels={
1242
+ "track": "TRACK_ID",
1243
+ "time": "FRAME",
1244
+ "x": "POSITION_X",
1245
+ "y": "POSITION_Y",
1246
+ },
1247
+ ):
1248
+ """
1249
+ Assigns a classification and first detection time to tracks in the given DataFrame. This function must be called
1250
+ before any track post-processing.
1251
+
1252
+ This function computes the first detection time and a detection class (`class_firstdetection`) for each track in the data.
1253
+ Tracks that start on or near the image edge, or those detected at the initial frame, are marked with special classes.
1254
+
1255
+ Parameters
1256
+ ----------
1257
+ df : pandas.DataFrame
1258
+ A DataFrame containing track data. Expected to have at least the columns specified in `column_labels` and `class_id` (mask value).
1259
+
1260
+ img_shape : tuple of int, optional
1261
+ The shape of the image as `(height, width)`. Used to determine whether the first detection occurs near the image edge.
1262
+
1263
+ edge_threshold : int, optional, default=20
1264
+ The distance in pixels from the image edge to consider a detection as near the edge.
1265
+
1266
+ column_labels : dict, optional
1267
+ A dictionary mapping logical column names to actual column names in `tab`. Keys include:
1268
+
1269
+ - `'track'`: The column indicating the track ID (default: `"TRACK_ID"`).
1270
+ - `'time'`: The column indicating the frame/time (default: `"FRAME"`).
1271
+ - `'x'`: The column indicating the X-coordinate (default: `"POSITION_X"`).
1272
+ - `'y'`: The column indicating the Y-coordinate (default: `"POSITION_Y"`).
1273
+
1274
+ Returns
1275
+ -------
1276
+ pandas.DataFrame
1277
+ The input DataFrame `df` with two additional columns:
1278
+
1279
+ - `'class_firstdetection'`: A class assigned based on detection status:
1280
+
1281
+ - `0`: Valid detection not near the edge and not at the initial frame.
1282
+ - `2`: Detection near the edge, at the initial frame, or no detection available.
1283
+
1284
+ - `'t_firstdetection'`: The adjusted first detection time (in frame units):
1285
+
1286
+ - `-1`: Indicates no valid detection or detection near the edge.
1287
+ - A float value representing the adjusted first detection time otherwise.
1288
+
1289
+ Notes
1290
+ -----
1291
+ - The function assumes that tracks are grouped and sorted by track ID and frame.
1292
+ - Detections near the edge or at the initial frame (frame 0) are considered invalid and assigned special values.
1293
+ - If `img_shape` is not provided, edge checks are skipped.
1294
+
1295
+ """
1296
+
1297
+ df = df.sort_values(by=[column_labels["track"], column_labels["time"]])
1298
+ for tid, track_group in df.groupby(column_labels["track"]):
1299
+ indices = track_group.index
1300
+ detection = track_group["class_id"].values
1301
+ timeline = track_group[column_labels["time"]].values
1302
+ positions_x = track_group[column_labels["x"]].values
1303
+ positions_y = track_group[column_labels["y"]].values
1304
+ dt = 1
1305
+
1306
+ timeline = track_group["FRAME"].to_numpy()
1307
+ status = np.ones_like(timeline)
1308
+
1309
+ # Initialize
1310
+ cclass = 2
1311
+ t_first = np.nan
1312
+
1313
+ if np.any(detection == detection):
1314
+
1315
+ t_first = timeline[detection == detection][0]
1316
+ x_first = positions_x[detection == detection][0]
1317
+ y_first = positions_y[detection == detection][0]
1318
+
1319
+ edge_test = False
1320
+ if img_shape is not None:
1321
+ edge_test = (
1322
+ (x_first < edge_threshold)
1323
+ or (y_first < edge_threshold)
1324
+ or (y_first > (img_shape[0] - edge_threshold))
1325
+ or (x_first > (img_shape[1] - edge_threshold))
1326
+ )
1327
+
1328
+ cclass = 0
1329
+ if t_first <= 0:
1330
+ t_first = -1
1331
+ cclass = 2
1332
+ else:
1333
+ t_first = float(t_first) - float(dt)
1334
+ if t_first == 0:
1335
+ t_first += 0.01
1336
+
1337
+ if edge_test:
1338
+ cclass = 2
1339
+ # switch to class 2 but keep time/status information
1340
+ else:
1341
+ t_first = -1
1342
+ cclass = 2
1343
+
1344
+ status[timeline < t_first] = 0.0
1345
+ df.loc[indices, "class_firstdetection"] = cclass
1346
+ df.loc[indices, "t_firstdetection"] = t_first
1347
+ df.loc[indices, "status_firstdetection"] = status
1348
+
1349
+ return df
1045
1350
 
1046
1351
 
1047
1352
  if __name__ == "__main__":
1048
- track_at_position("/home/limozin/Documents/Experiments/MinimumJan/W4/401",
1049
- "targets",
1050
- )
1353
+ track_at_position(
1354
+ "/home/limozin/Documents/Experiments/MinimumJan/W4/401",
1355
+ "targets",
1356
+ )