celldetective 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celldetective/__init__.py +2 -0
- celldetective/__main__.py +432 -0
- celldetective/datasets/segmentation_annotations/blank +0 -0
- celldetective/datasets/signal_annotations/blank +0 -0
- celldetective/events.py +149 -0
- celldetective/extra_properties.py +100 -0
- celldetective/filters.py +89 -0
- celldetective/gui/__init__.py +20 -0
- celldetective/gui/about.py +44 -0
- celldetective/gui/analyze_block.py +563 -0
- celldetective/gui/btrack_options.py +898 -0
- celldetective/gui/classifier_widget.py +386 -0
- celldetective/gui/configure_new_exp.py +532 -0
- celldetective/gui/control_panel.py +438 -0
- celldetective/gui/gui_utils.py +495 -0
- celldetective/gui/json_readers.py +113 -0
- celldetective/gui/measurement_options.py +1425 -0
- celldetective/gui/neighborhood_options.py +452 -0
- celldetective/gui/plot_signals_ui.py +1042 -0
- celldetective/gui/process_block.py +1055 -0
- celldetective/gui/retrain_segmentation_model_options.py +706 -0
- celldetective/gui/retrain_signal_model_options.py +643 -0
- celldetective/gui/seg_model_loader.py +460 -0
- celldetective/gui/signal_annotator.py +2388 -0
- celldetective/gui/signal_annotator_options.py +340 -0
- celldetective/gui/styles.py +217 -0
- celldetective/gui/survival_ui.py +903 -0
- celldetective/gui/tableUI.py +608 -0
- celldetective/gui/thresholds_gui.py +1300 -0
- celldetective/icons/logo-large.png +0 -0
- celldetective/icons/logo.png +0 -0
- celldetective/icons/signals_icon.png +0 -0
- celldetective/icons/splash-test.png +0 -0
- celldetective/icons/splash.png +0 -0
- celldetective/icons/splash0.png +0 -0
- celldetective/icons/survival2.png +0 -0
- celldetective/icons/vignette_signals2.png +0 -0
- celldetective/icons/vignette_signals2.svg +114 -0
- celldetective/io.py +2050 -0
- celldetective/links/zenodo.json +561 -0
- celldetective/measure.py +1258 -0
- celldetective/models/segmentation_effectors/blank +0 -0
- celldetective/models/segmentation_generic/blank +0 -0
- celldetective/models/segmentation_targets/blank +0 -0
- celldetective/models/signal_detection/blank +0 -0
- celldetective/models/tracking_configs/mcf7.json +68 -0
- celldetective/models/tracking_configs/ricm.json +203 -0
- celldetective/models/tracking_configs/ricm2.json +203 -0
- celldetective/neighborhood.py +717 -0
- celldetective/scripts/analyze_signals.py +51 -0
- celldetective/scripts/measure_cells.py +275 -0
- celldetective/scripts/segment_cells.py +212 -0
- celldetective/scripts/segment_cells_thresholds.py +140 -0
- celldetective/scripts/track_cells.py +206 -0
- celldetective/scripts/train_segmentation_model.py +246 -0
- celldetective/scripts/train_signal_model.py +49 -0
- celldetective/segmentation.py +712 -0
- celldetective/signals.py +2826 -0
- celldetective/tracking.py +974 -0
- celldetective/utils.py +1681 -0
- celldetective-1.0.2.dist-info/LICENSE +674 -0
- celldetective-1.0.2.dist-info/METADATA +192 -0
- celldetective-1.0.2.dist-info/RECORD +66 -0
- celldetective-1.0.2.dist-info/WHEEL +5 -0
- celldetective-1.0.2.dist-info/entry_points.txt +2 -0
- celldetective-1.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,974 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from sklearn.preprocessing import StandardScaler
|
|
5
|
+
|
|
6
|
+
from btrack.io.utils import localizations_to_objects
|
|
7
|
+
from btrack import BayesianTracker
|
|
8
|
+
|
|
9
|
+
from celldetective.measure import measure_features
|
|
10
|
+
from celldetective.utils import rename_intensity_column
|
|
11
|
+
from celldetective.io import view_on_napari_btrack, interpret_tracking_configuration
|
|
12
|
+
|
|
13
|
+
from btrack.datasets import cell_config
|
|
14
|
+
import os
|
|
15
|
+
import subprocess
|
|
16
|
+
|
|
17
|
+
abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0],'celldetective'])
|
|
18
|
+
|
|
19
|
+
def track(labels, configuration=None, stack=None, spatial_calibration=1, features=None, channel_names=None,
|
|
20
|
+
haralick_options=None, return_napari_data=False, view_on_napari=False, mask_timepoints=None, mask_channels=None, volume=(2048,2048),
|
|
21
|
+
optimizer_options = {'tm_lim': int(12e4)}, track_kwargs={'step_size': 100}, objects=None,
|
|
22
|
+
clean_trajectories_kwargs=None, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
Perform cell tracking on segmented labels using the bTrack library.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
labels : ndarray
|
|
32
|
+
The segmented labels representing cell objects.
|
|
33
|
+
configuration : Configuration or None
|
|
34
|
+
The bTrack configuration object. If None, a default configuration is used.
|
|
35
|
+
stack : ndarray or None, optional
|
|
36
|
+
The image stack corresponding to the labels. Default is None.
|
|
37
|
+
spatial_calibration : float, optional
|
|
38
|
+
The spatial calibration factor to convert pixel coordinates to physical units. Default is 1.
|
|
39
|
+
features : list or None, optional
|
|
40
|
+
The list of features to extract from the objects. If None, no additional features are extracted. Default is None.
|
|
41
|
+
channel_names : list or None, optional
|
|
42
|
+
The list of channel names corresponding to the image stack. Used for renaming intensity columns in the output DataFrame.
|
|
43
|
+
Default is None.
|
|
44
|
+
haralick_options : dict or None, optional
|
|
45
|
+
The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
|
|
46
|
+
return_napari_data : bool, optional
|
|
47
|
+
Whether to return the napari data dictionary along with the DataFrame. Default is False.
|
|
48
|
+
view_on_napari : bool, optional
|
|
49
|
+
Whether to view the tracking results on napari. Default is False.
|
|
50
|
+
optimizer_options : dict, optional
|
|
51
|
+
The options for the optimizer. Default is {'tm_lim': int(12e4)}.
|
|
52
|
+
track_kwargs : dict, optional
|
|
53
|
+
Additional keyword arguments for the bTrack tracker. Default is {'step_size': 100}.
|
|
54
|
+
clean_trajectories_kwargs : dict or None, optional
|
|
55
|
+
Keyword arguments for the clean_trajectories function to post-process the tracking trajectories. If None, no post-processing is performed.
|
|
56
|
+
Default is None.
|
|
57
|
+
column_labels : dict, optional
|
|
58
|
+
The column labels to use in the output DataFrame. Default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
DataFrame or tuple
|
|
63
|
+
If return_napari_data is False, returns the DataFrame containing the tracking results. If return_napari_data is True, returns a tuple
|
|
64
|
+
containing the DataFrame and the napari data dictionary.
|
|
65
|
+
|
|
66
|
+
Notes
|
|
67
|
+
-----
|
|
68
|
+
This function performs cell tracking on the segmented labels using the bTrack library. It extracts features from the objects, normalizes
|
|
69
|
+
the features, tracks the objects, and generates a DataFrame with the tracking results. The DataFrame can be post-processed using the
|
|
70
|
+
clean_trajectories function. If specified, the tracking results can be visualized on napari.
|
|
71
|
+
|
|
72
|
+
Examples
|
|
73
|
+
--------
|
|
74
|
+
>>> labels = np.array([[1, 1, 2, 2, 0, 0],
|
|
75
|
+
[1, 1, 1, 2, 2, 0],
|
|
76
|
+
[0, 0, 1, 2, 0, 0]])
|
|
77
|
+
>>> configuration = cell_config()
|
|
78
|
+
>>> stack = np.random.rand(3, 6)
|
|
79
|
+
>>> df = track(labels, configuration, stack=stack, spatial_calibration=0.5)
|
|
80
|
+
>>> df.head()
|
|
81
|
+
|
|
82
|
+
TRACK_ID FRAME POSITION_Y POSITION_X
|
|
83
|
+
0 0 0 0.0 0.0
|
|
84
|
+
1 0 1 0.0 0.0
|
|
85
|
+
2 0 2 0.0 0.0
|
|
86
|
+
3 1 0 0.5 0.5
|
|
87
|
+
4 1 1 0.5 0.5
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
configuration = interpret_tracking_configuration(configuration)
|
|
92
|
+
|
|
93
|
+
if objects is None:
|
|
94
|
+
objects = extract_objects_and_features(labels, stack, features,
|
|
95
|
+
channel_names=channel_names,
|
|
96
|
+
haralick_options=haralick_options,
|
|
97
|
+
mask_timepoints=mask_timepoints,
|
|
98
|
+
mask_channels=mask_channels,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
columns = list(objects.columns)
|
|
102
|
+
to_remove = ['x','y','class_id','t']
|
|
103
|
+
for tr in to_remove:
|
|
104
|
+
try:
|
|
105
|
+
columns.remove(tr)
|
|
106
|
+
except:
|
|
107
|
+
print(f'column {tr} could not be found...')
|
|
108
|
+
|
|
109
|
+
scaler = StandardScaler()
|
|
110
|
+
if columns:
|
|
111
|
+
x = objects[columns].values
|
|
112
|
+
x_scaled = scaler.fit_transform(x)
|
|
113
|
+
df_temp = pd.DataFrame(x_scaled, columns=columns, index = objects.index)
|
|
114
|
+
objects[columns] = df_temp
|
|
115
|
+
else:
|
|
116
|
+
print('Warning: no features were passed to bTrack...')
|
|
117
|
+
|
|
118
|
+
# 2) track the objects
|
|
119
|
+
new_btrack_objects = localizations_to_objects(objects)
|
|
120
|
+
|
|
121
|
+
with BayesianTracker() as tracker:
|
|
122
|
+
|
|
123
|
+
tracker.configure(configuration)
|
|
124
|
+
|
|
125
|
+
if columns:
|
|
126
|
+
tracking_updates = ["motion","visual"]
|
|
127
|
+
#tracker.tracking_updates = ["motion","visual"]
|
|
128
|
+
tracker.features = columns
|
|
129
|
+
else:
|
|
130
|
+
tracking_updates = ["motion"]
|
|
131
|
+
|
|
132
|
+
tracker.append(new_btrack_objects)
|
|
133
|
+
tracker.volume = ((0,volume[0]), (0,volume[1])) #(-1e5, 1e5)
|
|
134
|
+
#print(tracker.volume)
|
|
135
|
+
tracker.track(tracking_updates=tracking_updates, **track_kwargs)
|
|
136
|
+
tracker.optimize(options=optimizer_options)
|
|
137
|
+
|
|
138
|
+
data, properties, graph = tracker.to_napari() #ndim=2
|
|
139
|
+
|
|
140
|
+
# do the table post processing and napari options
|
|
141
|
+
df = pd.DataFrame(data, columns=[column_labels['track'],column_labels['time'],column_labels['y'],column_labels['x']])
|
|
142
|
+
df[column_labels['x']+'_um'] = df[column_labels['x']]*spatial_calibration
|
|
143
|
+
df[column_labels['y']+'_um'] = df[column_labels['y']]*spatial_calibration
|
|
144
|
+
|
|
145
|
+
df = df.merge(pd.DataFrame(properties),left_index=True, right_index=True)
|
|
146
|
+
if columns:
|
|
147
|
+
x = df[columns].values
|
|
148
|
+
x_scaled = scaler.inverse_transform(x)
|
|
149
|
+
df_temp = pd.DataFrame(x_scaled, columns=columns, index = df.index)
|
|
150
|
+
df[columns] = df_temp
|
|
151
|
+
|
|
152
|
+
df = df.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
153
|
+
|
|
154
|
+
if channel_names is not None:
|
|
155
|
+
df = rename_intensity_column(df, channel_names)
|
|
156
|
+
|
|
157
|
+
df = write_first_detection_class(df, column_labels=column_labels)
|
|
158
|
+
|
|
159
|
+
if clean_trajectories_kwargs is not None:
|
|
160
|
+
df = clean_trajectories(df.copy(),**clean_trajectories_kwargs)
|
|
161
|
+
|
|
162
|
+
if view_on_napari:
|
|
163
|
+
view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
|
|
164
|
+
|
|
165
|
+
if return_napari_data:
|
|
166
|
+
napari_data = {"data": data, "properties": properties, "graph": graph}
|
|
167
|
+
return df, napari_data
|
|
168
|
+
else:
|
|
169
|
+
return df
|
|
170
|
+
|
|
171
|
+
def extract_objects_and_features(labels, stack, features, channel_names=None, haralick_options=None, mask_timepoints=None, mask_channels=None):
|
|
172
|
+
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
Extract objects and features from segmented labels and image stack.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
labels : ndarray
|
|
180
|
+
The segmented labels representing cell objects.
|
|
181
|
+
stack : ndarray
|
|
182
|
+
The image stack corresponding to the labels.
|
|
183
|
+
features : list or None
|
|
184
|
+
The list of features to extract from the objects. If None, no additional features are extracted.
|
|
185
|
+
channel_names : list or None, optional
|
|
186
|
+
The list of channel names corresponding to the image stack. Used for extracting Haralick features. Default is None.
|
|
187
|
+
haralick_options : dict or None, optional
|
|
188
|
+
The options for Haralick feature extraction. If None, no Haralick features are extracted. Default is None.
|
|
189
|
+
mask_timepoints : list of None, optionak
|
|
190
|
+
Frames to hide during tracking.
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
DataFrame
|
|
194
|
+
The DataFrame containing the extracted object features.
|
|
195
|
+
|
|
196
|
+
Notes
|
|
197
|
+
-----
|
|
198
|
+
This function extracts objects and features from the segmented labels and image stack. It computes the specified features for each
|
|
199
|
+
labeled object and returns a DataFrame containing the object features. Additional features such as centroid coordinates can also
|
|
200
|
+
be extracted. If Haralick features are enabled, they are computed based on the image stack using the specified options.
|
|
201
|
+
|
|
202
|
+
Examples
|
|
203
|
+
--------
|
|
204
|
+
>>> labels = np.array([[1, 1, 2, 2, 0, 0],
|
|
205
|
+
[1, 1, 1, 2, 2, 0],
|
|
206
|
+
[0, 0, 1, 2, 0, 0]])
|
|
207
|
+
>>> stack = np.random.rand(3, 6, 3)
|
|
208
|
+
>>> features = ['area', 'mean_intensity']
|
|
209
|
+
>>> df = extract_objects_and_features(labels, stack, features)
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
if features is None:
|
|
214
|
+
features = []
|
|
215
|
+
|
|
216
|
+
if stack is None:
|
|
217
|
+
haralick_options = None
|
|
218
|
+
|
|
219
|
+
if mask_timepoints is not None:
|
|
220
|
+
for f in mask_timepoints:
|
|
221
|
+
labels[f] = 0.
|
|
222
|
+
|
|
223
|
+
nbr_frames = len(labels)
|
|
224
|
+
timestep_dataframes = []
|
|
225
|
+
|
|
226
|
+
for t in tqdm(range(nbr_frames),desc='frame'):
|
|
227
|
+
|
|
228
|
+
if stack is not None:
|
|
229
|
+
img = stack[t]
|
|
230
|
+
else:
|
|
231
|
+
img = None
|
|
232
|
+
|
|
233
|
+
if (haralick_options is not None) and (t==0) and (stack is not None):
|
|
234
|
+
if not 'percentiles' in haralick_options:
|
|
235
|
+
haralick_options.update({'percentiles': (0.01,99.99)})
|
|
236
|
+
if not 'target_channel' in haralick_options:
|
|
237
|
+
haralick_options.update({'target_channel': 0})
|
|
238
|
+
haralick_percentiles = haralick_options['percentiles']
|
|
239
|
+
haralick_channel_index = haralick_options['target_channel']
|
|
240
|
+
min_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[0])
|
|
241
|
+
max_value = np.nanpercentile(img[:,:,haralick_channel_index].flatten(), haralick_percentiles[1])
|
|
242
|
+
haralick_options.update({'clip_values': (min_value, max_value)})
|
|
243
|
+
|
|
244
|
+
df_props = measure_features(img, labels[t], features = features+['centroid'], border_dist=None,
|
|
245
|
+
channels=channel_names, haralick_options=haralick_options, verbose=False)
|
|
246
|
+
df_props.rename(columns={'centroid-1': 'x', 'centroid-0': 'y'},inplace=True)
|
|
247
|
+
df_props['t'] = int(t)
|
|
248
|
+
timestep_dataframes.append(df_props)
|
|
249
|
+
|
|
250
|
+
df = pd.concat(timestep_dataframes)
|
|
251
|
+
df.reset_index(inplace=True, drop=True)
|
|
252
|
+
|
|
253
|
+
if mask_channels is not None:
|
|
254
|
+
cols_to_drop = []
|
|
255
|
+
for mc in mask_channels:
|
|
256
|
+
columns = df.columns
|
|
257
|
+
col_contains = [mc in c for c in columns]
|
|
258
|
+
to_remove = np.array(columns)[np.array(col_contains)]
|
|
259
|
+
cols_to_drop.extend(to_remove)
|
|
260
|
+
if len(cols_to_drop)>0:
|
|
261
|
+
df = df.drop(cols_to_drop, axis=1)
|
|
262
|
+
|
|
263
|
+
return df
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def clean_trajectories(trajectories,remove_not_in_first=False,remove_not_in_last=False,
|
|
267
|
+
minimum_tracklength=0, interpolate_position_gaps=False,
|
|
268
|
+
extrapolate_tracks_post=False,
|
|
269
|
+
extrapolate_tracks_pre=False,
|
|
270
|
+
interpolate_na=False,
|
|
271
|
+
column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
Clean trajectories by applying various cleaning operations.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
trajectories : pandas.DataFrame
|
|
279
|
+
The input DataFrame containing trajectory data.
|
|
280
|
+
remove_not_in_first : bool, optional
|
|
281
|
+
Flag indicating whether to remove tracks not present in the first frame.
|
|
282
|
+
Defaults to True.
|
|
283
|
+
remove_not_in_last : bool, optional
|
|
284
|
+
Flag indicating whether to remove tracks not present in the last frame.
|
|
285
|
+
Defaults to True.
|
|
286
|
+
minimum_tracklength : int, optional
|
|
287
|
+
The minimum length of a track to be retained.
|
|
288
|
+
Defaults to 0.
|
|
289
|
+
interpolate_position_gaps : bool, optional
|
|
290
|
+
Flag indicating whether to interpolate position gaps in tracks.
|
|
291
|
+
Defaults to True.
|
|
292
|
+
extrapolate_tracks_post : bool, optional
|
|
293
|
+
Flag indicating whether to extrapolate tracks after the last known position.
|
|
294
|
+
Defaults to True.
|
|
295
|
+
extrapolate_tracks_pre : bool, optional
|
|
296
|
+
Flag indicating whether to extrapolate tracks before the first known position.
|
|
297
|
+
Defaults to False.
|
|
298
|
+
interpolate_na : bool, optional
|
|
299
|
+
Flag indicating whether to interpolate missing values in tracks.
|
|
300
|
+
Defaults to False.
|
|
301
|
+
column_labels : dict, optional
|
|
302
|
+
Dictionary specifying the column labels used in the input DataFrame.
|
|
303
|
+
The keys represent the following column labels:
|
|
304
|
+
- 'track': The column label for the track ID.
|
|
305
|
+
- 'time': The column label for the timestamp.
|
|
306
|
+
- 'x': The column label for the x-coordinate.
|
|
307
|
+
- 'y': The column label for the y-coordinate.
|
|
308
|
+
Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
pandas.DataFrame
|
|
313
|
+
The cleaned DataFrame with trajectories.
|
|
314
|
+
|
|
315
|
+
Notes
|
|
316
|
+
-----
|
|
317
|
+
This function applies various cleaning operations to the input DataFrame containing trajectory data.
|
|
318
|
+
The cleaning operations include:
|
|
319
|
+
- Filtering tracks based on their endpoints.
|
|
320
|
+
- Filtering tracks based on their length.
|
|
321
|
+
- Interpolating position gaps in tracks.
|
|
322
|
+
- Extrapolating tracks after the last known position.
|
|
323
|
+
- Extrapolating tracks before the first known position.
|
|
324
|
+
- Interpolating missing values in tracks.
|
|
325
|
+
|
|
326
|
+
The input DataFrame is expected to have the following columns:
|
|
327
|
+
- track: The unique ID of each track.
|
|
328
|
+
- time: The timestamp of each data point.
|
|
329
|
+
- x: The x-coordinate of each data point.
|
|
330
|
+
- y: The y-coordinate of each data point.
|
|
331
|
+
|
|
332
|
+
Examples
|
|
333
|
+
--------
|
|
334
|
+
>>> cleaned_data = clean_trajectories(trajectories, remove_not_in_first=True, remove_not_in_last=True,
|
|
335
|
+
... minimum_tracklength=10, interpolate_position_gaps=True,
|
|
336
|
+
... extrapolate_tracks_post=True, extrapolate_tracks_pre=False,
|
|
337
|
+
... interpolate_na=True, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
|
|
338
|
+
>>> print(cleaned_data.head())
|
|
339
|
+
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
trajectories.reset_index
|
|
343
|
+
trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
|
|
344
|
+
|
|
345
|
+
if minimum_tracklength>0:
|
|
346
|
+
trajectories = filter_by_tracklength(trajectories.copy(), minimum_tracklength, track_label=column_labels['track'])
|
|
347
|
+
|
|
348
|
+
if np.any([remove_not_in_first, remove_not_in_last]):
|
|
349
|
+
trajectories = filter_by_endpoints(trajectories.copy(), remove_not_in_first=remove_not_in_first,
|
|
350
|
+
remove_not_in_last=remove_not_in_last, column_labels=column_labels)
|
|
351
|
+
|
|
352
|
+
if np.any([extrapolate_tracks_post, extrapolate_tracks_pre]):
|
|
353
|
+
trajectories = extrapolate_tracks(trajectories.copy(), post=extrapolate_tracks_post,
|
|
354
|
+
pre=extrapolate_tracks_pre, column_labels=column_labels)
|
|
355
|
+
|
|
356
|
+
if interpolate_position_gaps:
|
|
357
|
+
trajectories = interpolate_time_gaps(trajectories.copy(), column_labels=column_labels)
|
|
358
|
+
|
|
359
|
+
if interpolate_na:
|
|
360
|
+
trajectories = interpolate_nan_properties(trajectories.copy(), track_label=column_labels['track'])
|
|
361
|
+
|
|
362
|
+
trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
363
|
+
trajectories.reset_index(inplace=True, drop=True)
|
|
364
|
+
|
|
365
|
+
if 'class_firstdetection' in list(trajectories.columns):
|
|
366
|
+
for tid, track_group in trajectories.groupby(column_labels['track']):
|
|
367
|
+
indices = track_group.index
|
|
368
|
+
|
|
369
|
+
class_values = np.array(track_group['class_firstdetection'].unique())
|
|
370
|
+
class_values = class_values[class_values==class_values]
|
|
371
|
+
t_values = np.array(track_group['t_firstdetection'].unique())
|
|
372
|
+
t_values = t_values[t_values==t_values]
|
|
373
|
+
if len(class_values)==0:
|
|
374
|
+
class_values = 2
|
|
375
|
+
t_values = -1
|
|
376
|
+
else:
|
|
377
|
+
class_values = class_values[0]
|
|
378
|
+
t_values = t_values[0]
|
|
379
|
+
|
|
380
|
+
trajectories.loc[indices, 'class_firstdetection'] = class_values
|
|
381
|
+
trajectories.loc[indices, 't_firstdetection'] = t_values
|
|
382
|
+
|
|
383
|
+
return trajectories
|
|
384
|
+
|
|
385
|
+
def interpolate_per_track(group_df):
|
|
386
|
+
|
|
387
|
+
"""
|
|
388
|
+
Interpolate missing values within a track.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
group_df : pandas.DataFrame
|
|
393
|
+
The input DataFrame containing data for a single track.
|
|
394
|
+
|
|
395
|
+
Returns
|
|
396
|
+
-------
|
|
397
|
+
pandas.DataFrame
|
|
398
|
+
The interpolated DataFrame with missing values filled.
|
|
399
|
+
|
|
400
|
+
Notes
|
|
401
|
+
-----
|
|
402
|
+
This function performs linear interpolation to fill missing values within a track.
|
|
403
|
+
Missing values are interpolated based on the neighboring data points in the track.
|
|
404
|
+
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
interpolated_group = group_df.interpolate(method='linear',limit_direction="both")
|
|
408
|
+
|
|
409
|
+
return interpolated_group
|
|
410
|
+
|
|
411
|
+
def interpolate_nan_properties(trajectories, track_label="TRACK_ID"):
|
|
412
|
+
|
|
413
|
+
"""
|
|
414
|
+
Interpolate missing values within tracks in the input DataFrame.
|
|
415
|
+
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
trajectories : pandas.DataFrame
|
|
419
|
+
The input DataFrame containing trajectory data.
|
|
420
|
+
track_label : str, optional
|
|
421
|
+
The column label for the track ID.
|
|
422
|
+
Defaults to "TRACK_ID".
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
pandas.DataFrame
|
|
427
|
+
The DataFrame with missing values interpolated within tracks.
|
|
428
|
+
|
|
429
|
+
Notes
|
|
430
|
+
-----
|
|
431
|
+
This function groups the input DataFrame by track ID and applies `interpolate_per_track` function
|
|
432
|
+
to interpolate missing values within each track.
|
|
433
|
+
Missing values are interpolated based on the neighboring data points in each track.
|
|
434
|
+
|
|
435
|
+
The input DataFrame is expected to have a column with the specified `track_label` containing the track IDs.
|
|
436
|
+
|
|
437
|
+
Examples
|
|
438
|
+
--------
|
|
439
|
+
>>> interpolated_data = interpolate_nan_properties(trajectories, track_label="ID")
|
|
440
|
+
>>> print(interpolated_data.head())
|
|
441
|
+
|
|
442
|
+
"""
|
|
443
|
+
|
|
444
|
+
trajectories = trajectories.groupby(track_label, group_keys=False).apply(interpolate_per_track)
|
|
445
|
+
|
|
446
|
+
return trajectories
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False,
|
|
450
|
+
column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
451
|
+
|
|
452
|
+
"""
|
|
453
|
+
Filter trajectories based on their endpoints.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
trajectories : pandas.DataFrame
|
|
458
|
+
The input DataFrame containing trajectory data.
|
|
459
|
+
remove_not_in_first : bool, optional
|
|
460
|
+
Flag indicating whether to remove tracks not present in the first frame.
|
|
461
|
+
Defaults to True.
|
|
462
|
+
remove_not_in_last : bool, optional
|
|
463
|
+
Flag indicating whether to remove tracks not present in the last frame.
|
|
464
|
+
Defaults to False.
|
|
465
|
+
column_labels : dict, optional
|
|
466
|
+
Dictionary specifying the column labels used in the input DataFrame.
|
|
467
|
+
The keys represent the following column labels:
|
|
468
|
+
- 'track': The column label for the track ID.
|
|
469
|
+
- 'time': The column label for the timestamp.
|
|
470
|
+
- 'x': The column label for the x-coordinate.
|
|
471
|
+
- 'y': The column label for the y-coordinate.
|
|
472
|
+
Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
473
|
+
|
|
474
|
+
Returns
|
|
475
|
+
-------
|
|
476
|
+
pandas.DataFrame
|
|
477
|
+
The filtered DataFrame with trajectories based on their endpoints.
|
|
478
|
+
|
|
479
|
+
Notes
|
|
480
|
+
-----
|
|
481
|
+
This function filters the input DataFrame based on the endpoints of the trajectories.
|
|
482
|
+
The filtering can be performed in three modes:
|
|
483
|
+
- remove_not_in_first=True and remove_not_in_last=False: Remove tracks that are not present in the first frame.
|
|
484
|
+
- remove_not_in_first=False and remove_not_in_last=True: Remove tracks that are not present in the last frame.
|
|
485
|
+
- remove_not_in_first=True and remove_not_in_last=True: Remove tracks that are not present in both the first and last frames.
|
|
486
|
+
|
|
487
|
+
The input DataFrame is expected to have the following columns:
|
|
488
|
+
- track: The unique ID of each track.
|
|
489
|
+
- time: The timestamp of each data point.
|
|
490
|
+
- x: The x-coordinate of each data point.
|
|
491
|
+
- y: The y-coordinate of each data point.
|
|
492
|
+
|
|
493
|
+
Examples
|
|
494
|
+
--------
|
|
495
|
+
>>> filtered_data = filter_by_endpoints(trajectories, remove_not_in_first=True, remove_not_in_last=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
|
|
496
|
+
>>> print(filtered_data.head())
|
|
497
|
+
|
|
498
|
+
"""
|
|
499
|
+
|
|
500
|
+
if (remove_not_in_first)*(not remove_not_in_last):
|
|
501
|
+
# filter tracks not in first frame
|
|
502
|
+
leftover_tracks = trajectories.groupby(column_labels['track']).min().index[trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']])]
|
|
503
|
+
trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
|
|
504
|
+
|
|
505
|
+
elif (remove_not_in_last)*(not remove_not_in_first):
|
|
506
|
+
# filter tracks not in last frame
|
|
507
|
+
leftover_tracks = trajectories.groupby(column_labels['track']).max().index[trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']])]
|
|
508
|
+
trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
|
|
509
|
+
|
|
510
|
+
elif remove_not_in_first*remove_not_in_last:
|
|
511
|
+
# filter tracks both not in first and last frame
|
|
512
|
+
leftover_tracks = trajectories.groupby(column_labels['track']).max().index[(trajectories.groupby(column_labels['track']).max()[column_labels['time']]==np.amax(trajectories[column_labels['time']]))*(trajectories.groupby(column_labels['track']).min()[column_labels['time']]==np.amin(trajectories[column_labels['time']]))]
|
|
513
|
+
trajectories = trajectories.loc[trajectories[column_labels['track']].isin(leftover_tracks)]
|
|
514
|
+
|
|
515
|
+
trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
516
|
+
|
|
517
|
+
return trajectories
|
|
518
|
+
|
|
519
|
+
def filter_by_tracklength(trajectories, minimum_tracklength, track_label="TRACK_ID"):
|
|
520
|
+
|
|
521
|
+
"""
|
|
522
|
+
Filter trajectories based on the minimum track length.
|
|
523
|
+
|
|
524
|
+
Parameters
|
|
525
|
+
----------
|
|
526
|
+
trajectories : pandas.DataFrame
|
|
527
|
+
The input DataFrame containing trajectory data.
|
|
528
|
+
minimum_tracklength : int
|
|
529
|
+
The minimum length required for a track to be included.
|
|
530
|
+
track_label : str, optional
|
|
531
|
+
The column name in the DataFrame that represents the track ID.
|
|
532
|
+
Defaults to "TRACK_ID".
|
|
533
|
+
|
|
534
|
+
Returns
|
|
535
|
+
-------
|
|
536
|
+
pandas.DataFrame
|
|
537
|
+
The filtered DataFrame with trajectories that meet the minimum track length.
|
|
538
|
+
|
|
539
|
+
Notes
|
|
540
|
+
-----
|
|
541
|
+
This function removes any tracks from the input DataFrame that have a length
|
|
542
|
+
(number of data points) less than the specified minimum track length.
|
|
543
|
+
|
|
544
|
+
Examples
|
|
545
|
+
--------
|
|
546
|
+
>>> filtered_data = filter_by_tracklength(trajectories, 10, track_label="TrackID")
|
|
547
|
+
>>> print(filtered_data.head())
|
|
548
|
+
|
|
549
|
+
"""
|
|
550
|
+
|
|
551
|
+
if minimum_tracklength>0:
|
|
552
|
+
|
|
553
|
+
leftover_tracks = trajectories.groupby(track_label, group_keys=False).size().index[trajectories.groupby(track_label, group_keys=False).size() > minimum_tracklength]
|
|
554
|
+
trajectories = trajectories.loc[trajectories[track_label].isin(leftover_tracks)]
|
|
555
|
+
|
|
556
|
+
trajectories = trajectories.reset_index(drop=True)
|
|
557
|
+
|
|
558
|
+
return trajectories
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def interpolate_time_gaps(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
562
|
+
|
|
563
|
+
"""
|
|
564
|
+
Interpolate time gaps in trajectories.
|
|
565
|
+
|
|
566
|
+
Parameters
|
|
567
|
+
----------
|
|
568
|
+
trajectories : pandas.DataFrame
|
|
569
|
+
The input DataFrame containing trajectory data.
|
|
570
|
+
column_labels : dict, optional
|
|
571
|
+
Dictionary specifying the column labels used in the input DataFrame.
|
|
572
|
+
The keys represent the following column labels:
|
|
573
|
+
- 'track': The column label for the track ID.
|
|
574
|
+
- 'time': The column label for the timestamp.
|
|
575
|
+
- 'x': The column label for the x-coordinate.
|
|
576
|
+
- 'y': The column label for the y-coordinate.
|
|
577
|
+
Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
578
|
+
|
|
579
|
+
Returns
|
|
580
|
+
-------
|
|
581
|
+
pandas.DataFrame
|
|
582
|
+
The interpolated DataFrame with reduced time gaps in trajectories.
|
|
583
|
+
|
|
584
|
+
Notes
|
|
585
|
+
-----
|
|
586
|
+
This function performs interpolation on the input trajectories to reduce time gaps between data points.
|
|
587
|
+
It uses linear interpolation to fill missing values for the specified x and y coordinate attributes.
|
|
588
|
+
|
|
589
|
+
The input DataFrame is expected to have the following columns:
|
|
590
|
+
- track: The unique ID of each track.
|
|
591
|
+
- time: The timestamp of each data point (in seconds).
|
|
592
|
+
- x: The x-coordinate of each data point.
|
|
593
|
+
- y: The y-coordinate of each data point.
|
|
594
|
+
|
|
595
|
+
Examples
|
|
596
|
+
--------
|
|
597
|
+
>>> interpolated_data = interpolate_time_gaps(trajectories, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
|
|
598
|
+
>>> print(interpolated_data.head())
|
|
599
|
+
|
|
600
|
+
"""
|
|
601
|
+
|
|
602
|
+
trajectories[column_labels['time']] = pd.to_datetime(trajectories[column_labels['time']], unit='s')
|
|
603
|
+
trajectories.set_index(column_labels['track'], inplace=True)
|
|
604
|
+
trajectories = trajectories.groupby(column_labels['track'], group_keys=True).apply(lambda x: x.set_index(column_labels['time']).resample('1S').asfreq()).reset_index()
|
|
605
|
+
trajectories[[column_labels['x'], column_labels['y']]] = trajectories.groupby(column_labels['track'], group_keys=False)[[column_labels['x'], column_labels['y']]].apply(lambda x: x.interpolate(method='linear'))
|
|
606
|
+
trajectories.reset_index(drop=True, inplace=True)
|
|
607
|
+
trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64').astype(float) / 10**9
|
|
608
|
+
#trajectories[column_labels['time']] = trajectories[column_labels['time']].astype('int64')
|
|
609
|
+
print(trajectories[column_labels['time']])
|
|
610
|
+
trajectories.sort_values(by=[column_labels['track'],column_labels['time']],inplace=True)
|
|
611
|
+
|
|
612
|
+
return trajectories
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def extrapolate_tracks(trajectories, post=False, pre=False, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
616
|
+
|
|
617
|
+
"""
|
|
618
|
+
Extrapolate tracks in trajectories.
|
|
619
|
+
|
|
620
|
+
Parameters
|
|
621
|
+
----------
|
|
622
|
+
trajectories : pandas.DataFrame
|
|
623
|
+
The input DataFrame containing trajectory data.
|
|
624
|
+
post : bool, optional
|
|
625
|
+
Flag indicating whether to perform post-extrapolation.
|
|
626
|
+
Defaults to True.
|
|
627
|
+
pre : bool, optional
|
|
628
|
+
Flag indicating whether to perform pre-extrapolation.
|
|
629
|
+
Defaults to False.
|
|
630
|
+
column_labels : dict, optional
|
|
631
|
+
Dictionary specifying the column labels used in the input DataFrame.
|
|
632
|
+
The keys represent the following column labels:
|
|
633
|
+
- 'track': The column label for the track ID.
|
|
634
|
+
- 'time': The column label for the timestamp.
|
|
635
|
+
- 'x': The column label for the x-coordinate.
|
|
636
|
+
- 'y': The column label for the y-coordinate.
|
|
637
|
+
Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
638
|
+
|
|
639
|
+
Returns
|
|
640
|
+
-------
|
|
641
|
+
pandas.DataFrame
|
|
642
|
+
The extrapolated DataFrame with extended tracks.
|
|
643
|
+
|
|
644
|
+
Notes
|
|
645
|
+
-----
|
|
646
|
+
This function extrapolates tracks in the input DataFrame by repeating the last known position
|
|
647
|
+
either after (post-extrapolation) or before (pre-extrapolation) the available data.
|
|
648
|
+
|
|
649
|
+
The input DataFrame is expected to have the following columns:
|
|
650
|
+
- track: The unique ID of each track.
|
|
651
|
+
- time: The timestamp of each data point.
|
|
652
|
+
- x: The x-coordinate of each data point.
|
|
653
|
+
- y: The y-coordinate of each data point.
|
|
654
|
+
|
|
655
|
+
Examples
|
|
656
|
+
--------
|
|
657
|
+
>>> extrapolated_data = extrapolate_tracks(trajectories, post=True, pre=False, column_labels={'track': "ID", 'time': 'TIME', 'x': 'X', 'y': 'Y'})
|
|
658
|
+
>>> print(extrapolated_data.head())
|
|
659
|
+
|
|
660
|
+
"""
|
|
661
|
+
|
|
662
|
+
if post:
|
|
663
|
+
|
|
664
|
+
# get the maximum time T in the dataframe
|
|
665
|
+
max_time = trajectories[column_labels['time']].max()
|
|
666
|
+
|
|
667
|
+
# extrapolate the position until time T by repeating the last known position
|
|
668
|
+
df_extrapolated = pd.DataFrame()
|
|
669
|
+
for track_id, group in trajectories.groupby(column_labels['track']):
|
|
670
|
+
last_known_position = group.loc[group[column_labels['time']] <= max_time].tail(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
|
|
671
|
+
extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(last_known_position[0][0] + 1, max_time + 1)})
|
|
672
|
+
extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(last_known_position[0][0] + 1, max_time + 1))
|
|
673
|
+
track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
|
|
674
|
+
track_data[column_labels['track']] = track_id
|
|
675
|
+
df_extrapolated = pd.concat([df_extrapolated, track_data])
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# concatenate the original dataframe and the extrapolated dataframe
|
|
679
|
+
trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
|
|
680
|
+
# sort the dataframe by TRACK_ID and FRAME
|
|
681
|
+
trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
|
|
682
|
+
|
|
683
|
+
if pre:
|
|
684
|
+
|
|
685
|
+
# get the maximum time T in the dataframe
|
|
686
|
+
min_time = 0 #trajectories[column_labels['time']].min()
|
|
687
|
+
|
|
688
|
+
# extrapolate the position until time T by repeating the last known position
|
|
689
|
+
df_extrapolated = pd.DataFrame()
|
|
690
|
+
for track_id, group in trajectories.groupby(column_labels['track']):
|
|
691
|
+
last_known_position = group.loc[group[column_labels['time']] >= min_time].head(1)[[column_labels['time'],column_labels['x'], column_labels['y']]].values
|
|
692
|
+
extrapolated_frames = pd.DataFrame({column_labels['time']: np.arange(min_time, last_known_position[0][0] + 1)})
|
|
693
|
+
extrapolated_positions = pd.DataFrame({column_labels['x']: last_known_position[0][1], column_labels['y']: last_known_position[0][2]}, index=np.arange(min_time, last_known_position[0][0]))
|
|
694
|
+
track_data = extrapolated_frames.join(extrapolated_positions, how="inner", on=column_labels['time'])
|
|
695
|
+
track_data[column_labels['track']] = track_id
|
|
696
|
+
df_extrapolated = pd.concat([df_extrapolated, track_data])
|
|
697
|
+
|
|
698
|
+
# concatenate the original dataframe and the extrapolated dataframe
|
|
699
|
+
trajectories = pd.concat([trajectories, df_extrapolated], axis=0)
|
|
700
|
+
|
|
701
|
+
# sort the dataframe by TRACK_ID and FRAME
|
|
702
|
+
trajectories.sort_values([column_labels['track'], column_labels['time']], inplace=True)
|
|
703
|
+
|
|
704
|
+
return trajectories
|
|
705
|
+
|
|
706
|
+
def compute_instantaneous_velocity(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
707
|
+
|
|
708
|
+
"""
|
|
709
|
+
|
|
710
|
+
Compute the instantaneous velocity for each point in the trajectories.
|
|
711
|
+
|
|
712
|
+
Parameters
|
|
713
|
+
----------
|
|
714
|
+
trajectories : pandas.DataFrame
|
|
715
|
+
The input DataFrame containing trajectory data.
|
|
716
|
+
column_labels : dict, optional
|
|
717
|
+
A dictionary specifying the column labels for track ID, time, position X, and position Y.
|
|
718
|
+
Defaults to {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
719
|
+
|
|
720
|
+
Returns
|
|
721
|
+
-------
|
|
722
|
+
pandas.DataFrame
|
|
723
|
+
The DataFrame with added 'velocity' column representing the instantaneous velocity for each point.
|
|
724
|
+
|
|
725
|
+
Notes
|
|
726
|
+
-----
|
|
727
|
+
This function calculates the instantaneous velocity for each point in the trajectories.
|
|
728
|
+
The velocity is computed as the Euclidean distance traveled divided by the time difference between consecutive points.
|
|
729
|
+
|
|
730
|
+
The input DataFrame is expected to have columns with the specified column labels for track ID, time, position X, and position Y.
|
|
731
|
+
|
|
732
|
+
Examples
|
|
733
|
+
--------
|
|
734
|
+
>>> velocity_data = compute_instantaneous_velocity(trajectories)
|
|
735
|
+
>>> print(velocity_data.head())
|
|
736
|
+
|
|
737
|
+
"""
|
|
738
|
+
|
|
739
|
+
# Calculate the time differences and position differences
|
|
740
|
+
trajectories['dt'] = trajectories.groupby(column_labels['track'])[column_labels['time']].diff()
|
|
741
|
+
trajectories['dx'] = trajectories.groupby(column_labels['track'])[column_labels['x']].diff()
|
|
742
|
+
trajectories['dy'] = trajectories.groupby(column_labels['track'])[column_labels['y']].diff()
|
|
743
|
+
|
|
744
|
+
# Calculate the instantaneous velocity
|
|
745
|
+
trajectories['velocity'] = np.sqrt(trajectories['dx']**2 +trajectories['dy']**2) / trajectories['dt']
|
|
746
|
+
trajectories = trajectories.drop(['dx', 'dy', 'dt'], axis=1)
|
|
747
|
+
trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
748
|
+
|
|
749
|
+
return trajectories
|
|
750
|
+
|
|
751
|
+
def instantaneous_diffusion(positions_x, positions_y, timeline):
|
|
752
|
+
|
|
753
|
+
"""
|
|
754
|
+
Compute the instantaneous diffusion coefficients for each position coordinate.
|
|
755
|
+
|
|
756
|
+
Parameters
|
|
757
|
+
----------
|
|
758
|
+
positions_x : numpy.ndarray
|
|
759
|
+
Array of x-coordinates of positions.
|
|
760
|
+
positions_y : numpy.ndarray
|
|
761
|
+
Array of y-coordinates of positions.
|
|
762
|
+
timeline : numpy.ndarray
|
|
763
|
+
Array of corresponding time points.
|
|
764
|
+
|
|
765
|
+
Returns
|
|
766
|
+
-------
|
|
767
|
+
numpy.ndarray
|
|
768
|
+
Array of instantaneous diffusion coefficients for each position coordinate.
|
|
769
|
+
|
|
770
|
+
Notes
|
|
771
|
+
-----
|
|
772
|
+
The function calculates the instantaneous diffusion coefficients for each position coordinate (x, y) based on the provided positions and timeline.
|
|
773
|
+
The diffusion coefficient at each time point is computed using the formula:
|
|
774
|
+
D = ((x[t+1] - x[t-1])^2 / (2 * (t[t+1] - t[t-1]))) + (1 / (t[t+1] - t[t-1])) * ((x[t+1] - x[t]) * (x[t] - x[t-1]))
|
|
775
|
+
where x represents the position coordinate (x or y) and t represents the corresponding time point.
|
|
776
|
+
|
|
777
|
+
Examples
|
|
778
|
+
--------
|
|
779
|
+
>>> x = np.array([0, 1, 2, 3, 4, 5])
|
|
780
|
+
>>> y = np.array([0, 1, 4, 9, 16, 25])
|
|
781
|
+
>>> t = np.array([0, 1, 2, 3, 4, 5])
|
|
782
|
+
>>> diff = instantaneous_diffusion(x, y, t)
|
|
783
|
+
>>> print(diff)
|
|
784
|
+
|
|
785
|
+
"""
|
|
786
|
+
|
|
787
|
+
diff = np.zeros((len(positions_x),2))
|
|
788
|
+
diff[:,:] = np.nan
|
|
789
|
+
|
|
790
|
+
for t in range(1,len(positions_x)-1):
|
|
791
|
+
diff[t,0] = (positions_x[t+1] - positions_x[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_x[t+1] - positions_x[t])*(positions_x[t] - positions_x[t-1]))
|
|
792
|
+
|
|
793
|
+
for t in range(1,len(positions_y)-1):
|
|
794
|
+
diff[t,1] = (positions_y[t+1] - positions_y[t-1])**2/(2*(timeline[t+1] - timeline[t-1])) + 1/(timeline[t+1] - timeline[t-1])*((positions_y[t+1] - positions_y[t])*(positions_y[t] - positions_y[t-1]))
|
|
795
|
+
|
|
796
|
+
return diff
|
|
797
|
+
|
|
798
|
+
def magnitude_diffusion(diffusion_vector):
|
|
799
|
+
|
|
800
|
+
"""
|
|
801
|
+
Compute the magnitude of diffusion for each diffusion vector.
|
|
802
|
+
|
|
803
|
+
Parameters
|
|
804
|
+
----------
|
|
805
|
+
diffusion_vector : numpy.ndarray
|
|
806
|
+
Array of diffusion vectors.
|
|
807
|
+
|
|
808
|
+
Returns
|
|
809
|
+
-------
|
|
810
|
+
numpy.ndarray
|
|
811
|
+
Array of magnitudes of diffusion.
|
|
812
|
+
|
|
813
|
+
Notes
|
|
814
|
+
-----
|
|
815
|
+
The function calculates the magnitude of diffusion for each diffusion vector (x, y) based on the provided diffusion vectors.
|
|
816
|
+
The magnitude of diffusion is computed as the Euclidean norm of the diffusion vector.
|
|
817
|
+
|
|
818
|
+
Examples
|
|
819
|
+
--------
|
|
820
|
+
>>> diffusion = np.array([[1.0, 2.0], [3.0, 4.0], [0.5, 0.5]])
|
|
821
|
+
>>> magnitudes = magnitude_diffusion(diffusion)
|
|
822
|
+
>>> print(magnitudes)
|
|
823
|
+
|
|
824
|
+
"""
|
|
825
|
+
|
|
826
|
+
return np.sqrt(diffusion_vector[:,0]**2+diffusion_vector[:,1]**2)
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def compute_instantaneous_diffusion(trajectories, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
830
|
+
|
|
831
|
+
"""
|
|
832
|
+
|
|
833
|
+
Compute the instantaneous diffusion for each track in the provided trajectories DataFrame.
|
|
834
|
+
|
|
835
|
+
Parameters
|
|
836
|
+
----------
|
|
837
|
+
trajectories : DataFrame
|
|
838
|
+
The input DataFrame containing trajectories with position and time information.
|
|
839
|
+
column_labels : dict, optional
|
|
840
|
+
A dictionary specifying the column labels for track ID, time, x-coordinate, and y-coordinate.
|
|
841
|
+
The default is {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
|
|
842
|
+
|
|
843
|
+
Returns
|
|
844
|
+
-------
|
|
845
|
+
DataFrame
|
|
846
|
+
The modified DataFrame with an additional column "diffusion" containing the computed diffusion values.
|
|
847
|
+
|
|
848
|
+
Notes
|
|
849
|
+
-----
|
|
850
|
+
|
|
851
|
+
The instantaneous diffusion is calculated using the positions and times of each track. The diffusion values
|
|
852
|
+
are computed for each track individually and added as a new column "diffusion" in the output DataFrame.
|
|
853
|
+
|
|
854
|
+
Examples
|
|
855
|
+
--------
|
|
856
|
+
>>> trajectories = pd.DataFrame({'TRACK_ID': [1, 1, 1, 2, 2, 2],
|
|
857
|
+
... 'FRAME': [0, 1, 2, 0, 1, 2],
|
|
858
|
+
... 'POSITION_X': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
|
|
859
|
+
... 'POSITION_Y': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]})
|
|
860
|
+
>>> compute_instantaneous_diffusion(trajectories)
|
|
861
|
+
# Output DataFrame with added "diffusion" column
|
|
862
|
+
|
|
863
|
+
"""
|
|
864
|
+
|
|
865
|
+
trajectories = trajectories.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
866
|
+
trajectories['diffusion'] = np.nan
|
|
867
|
+
|
|
868
|
+
for tid,group in trajectories.groupby(column_labels['track']):
|
|
869
|
+
|
|
870
|
+
indices = group.index
|
|
871
|
+
x = group[column_labels['x']].to_numpy()
|
|
872
|
+
y = group[column_labels['y']].to_numpy()
|
|
873
|
+
t = group[column_labels['time']].to_numpy()
|
|
874
|
+
|
|
875
|
+
if len(x)>3: #to have t-1,t,t+1
|
|
876
|
+
diff = instantaneous_diffusion(x,y,t)
|
|
877
|
+
d = magnitude_diffusion(diff)
|
|
878
|
+
trajectories.loc[indices, "diffusion"] = d
|
|
879
|
+
|
|
880
|
+
return trajectories
|
|
881
|
+
|
|
882
|
+
def track_at_position(pos, mode, return_tracks=False, view_on_napari=False, threads=1):
|
|
883
|
+
|
|
884
|
+
pos = pos.replace('\\','/')
|
|
885
|
+
pos = rf"{pos}"
|
|
886
|
+
assert os.path.exists(pos),f'Position {pos} is not a valid path.'
|
|
887
|
+
if not pos.endswith('/'):
|
|
888
|
+
pos += '/'
|
|
889
|
+
|
|
890
|
+
script_path = os.sep.join([abs_path, 'scripts', 'track_cells.py'])
|
|
891
|
+
cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
|
|
892
|
+
subprocess.call(cmd, shell=True)
|
|
893
|
+
|
|
894
|
+
track_table = pos + os.sep.join(["output","tables",f"trajectories_{mode}.csv"])
|
|
895
|
+
if return_tracks:
|
|
896
|
+
df = pd.read_csv(track_table)
|
|
897
|
+
return df
|
|
898
|
+
else:
|
|
899
|
+
return None
|
|
900
|
+
|
|
901
|
+
# # if return_labels or view_on_napari:
|
|
902
|
+
# # labels = locate_labels(pos, population=mode)
|
|
903
|
+
# # if view_on_napari:
|
|
904
|
+
# # if stack_prefix is None:
|
|
905
|
+
# # stack_prefix = ''
|
|
906
|
+
# # stack = locate_stack(pos, prefix=stack_prefix)
|
|
907
|
+
# # _view_on_napari(tracks=None, stack=stack, labels=labels)
|
|
908
|
+
# # if return_labels:
|
|
909
|
+
# # return labels
|
|
910
|
+
# # else:
|
|
911
|
+
# return None
|
|
912
|
+
|
|
913
|
+
def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
|
|
914
|
+
|
|
915
|
+
"""
|
|
916
|
+
Annotates a dataframe with the time of the first detection and classifies tracks based on their detection status.
|
|
917
|
+
|
|
918
|
+
This function processes a dataframe containing tracking data, identifying the first point of detection for each
|
|
919
|
+
track based on the x-coordinate values. It annotates the dataframe with the time of the first detection and
|
|
920
|
+
assigns a class to each track indicating whether the first detection occurs at the start, during, or if there's
|
|
921
|
+
no detection within the tracking data.
|
|
922
|
+
|
|
923
|
+
Parameters
|
|
924
|
+
----------
|
|
925
|
+
tab : pandas.DataFrame
|
|
926
|
+
The dataframe containing tracking data, expected to have columns for track ID, time, and spatial coordinates.
|
|
927
|
+
column_labels : dict, optional
|
|
928
|
+
A dictionary mapping standard column names ('track', 'time', 'x', 'y') to the corresponding column names in
|
|
929
|
+
`tab`. Default column names are 'TRACK_ID', 'FRAME', 'POSITION_X', 'POSITION_Y'.
|
|
930
|
+
|
|
931
|
+
Returns
|
|
932
|
+
-------
|
|
933
|
+
pandas.DataFrame
|
|
934
|
+
The input dataframe `tab` with two additional columns: 'class_firstdetection' indicating the detection class,
|
|
935
|
+
and 't_firstdetection' indicating the time of the first detection.
|
|
936
|
+
|
|
937
|
+
Notes
|
|
938
|
+
-----
|
|
939
|
+
- Detection is based on the presence of non-NaN values in the 'x' column for each track.
|
|
940
|
+
- Tracks with their first detection at the first time point are classified differently (`cclass=2`) and assigned
|
|
941
|
+
a `t_first` of -1, indicating no prior detection.
|
|
942
|
+
- The function assumes uniform time steps between each frame in the tracking data.
|
|
943
|
+
|
|
944
|
+
"""
|
|
945
|
+
|
|
946
|
+
tab = tab.sort_values(by=[column_labels['track'],column_labels['time']])
|
|
947
|
+
for tid,track_group in tab.groupby(column_labels['track']):
|
|
948
|
+
indices = track_group.index
|
|
949
|
+
detection = track_group[column_labels['x']].values
|
|
950
|
+
timeline = track_group[column_labels['time']].values
|
|
951
|
+
if len(timeline)>2:
|
|
952
|
+
dt = timeline[1] - timeline[0]
|
|
953
|
+
if np.any(detection==detection):
|
|
954
|
+
t_first = timeline[detection==detection][0]
|
|
955
|
+
cclass = 0
|
|
956
|
+
if t_first==0:
|
|
957
|
+
t_first = -1
|
|
958
|
+
cclass = 2
|
|
959
|
+
else:
|
|
960
|
+
t_first = float(t_first) - float(dt)
|
|
961
|
+
else:
|
|
962
|
+
t_first = -1
|
|
963
|
+
cclass = 2
|
|
964
|
+
|
|
965
|
+
tab.loc[indices, 'class_firstdetection'] = cclass
|
|
966
|
+
tab.loc[indices, 't_firstdetection'] = t_first
|
|
967
|
+
return tab
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
if __name__ == "__main__":
|
|
972
|
+
track_at_position("/home/limozin/Documents/Experiments/MinimumJan/W4/401",
|
|
973
|
+
"targets",
|
|
974
|
+
)
|