celldetective 1.4.2__py3-none-any.whl → 1.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. celldetective/__init__.py +25 -0
  2. celldetective/__main__.py +62 -43
  3. celldetective/_version.py +1 -1
  4. celldetective/extra_properties.py +477 -399
  5. celldetective/filters.py +192 -97
  6. celldetective/gui/InitWindow.py +541 -411
  7. celldetective/gui/__init__.py +0 -15
  8. celldetective/gui/about.py +44 -39
  9. celldetective/gui/analyze_block.py +120 -84
  10. celldetective/gui/base/__init__.py +0 -0
  11. celldetective/gui/base/channel_norm_generator.py +335 -0
  12. celldetective/gui/base/components.py +249 -0
  13. celldetective/gui/base/feature_choice.py +92 -0
  14. celldetective/gui/base/figure_canvas.py +52 -0
  15. celldetective/gui/base/list_widget.py +133 -0
  16. celldetective/gui/{styles.py → base/styles.py} +92 -36
  17. celldetective/gui/base/utils.py +33 -0
  18. celldetective/gui/base_annotator.py +900 -767
  19. celldetective/gui/classifier_widget.py +6 -22
  20. celldetective/gui/configure_new_exp.py +777 -671
  21. celldetective/gui/control_panel.py +635 -524
  22. celldetective/gui/dynamic_progress.py +449 -0
  23. celldetective/gui/event_annotator.py +2023 -1662
  24. celldetective/gui/generic_signal_plot.py +1292 -944
  25. celldetective/gui/gui_utils.py +899 -1289
  26. celldetective/gui/interactions_block.py +658 -0
  27. celldetective/gui/interactive_timeseries_viewer.py +447 -0
  28. celldetective/gui/json_readers.py +48 -15
  29. celldetective/gui/layouts/__init__.py +5 -0
  30. celldetective/gui/layouts/background_model_free_layout.py +537 -0
  31. celldetective/gui/layouts/channel_offset_layout.py +134 -0
  32. celldetective/gui/layouts/local_correction_layout.py +91 -0
  33. celldetective/gui/layouts/model_fit_layout.py +372 -0
  34. celldetective/gui/layouts/operation_layout.py +68 -0
  35. celldetective/gui/layouts/protocol_designer_layout.py +96 -0
  36. celldetective/gui/pair_event_annotator.py +3130 -2435
  37. celldetective/gui/plot_measurements.py +586 -267
  38. celldetective/gui/plot_signals_ui.py +724 -506
  39. celldetective/gui/preprocessing_block.py +395 -0
  40. celldetective/gui/process_block.py +1678 -1831
  41. celldetective/gui/seg_model_loader.py +580 -473
  42. celldetective/gui/settings/__init__.py +0 -7
  43. celldetective/gui/settings/_cellpose_model_params.py +181 -0
  44. celldetective/gui/settings/_event_detection_model_params.py +95 -0
  45. celldetective/gui/settings/_segmentation_model_params.py +159 -0
  46. celldetective/gui/settings/_settings_base.py +77 -65
  47. celldetective/gui/settings/_settings_event_model_training.py +752 -526
  48. celldetective/gui/settings/_settings_measurements.py +1133 -964
  49. celldetective/gui/settings/_settings_neighborhood.py +574 -488
  50. celldetective/gui/settings/_settings_segmentation_model_training.py +779 -564
  51. celldetective/gui/settings/_settings_signal_annotator.py +329 -305
  52. celldetective/gui/settings/_settings_tracking.py +1304 -1094
  53. celldetective/gui/settings/_stardist_model_params.py +98 -0
  54. celldetective/gui/survival_ui.py +422 -312
  55. celldetective/gui/tableUI.py +1665 -1701
  56. celldetective/gui/table_ops/_maths.py +295 -0
  57. celldetective/gui/table_ops/_merge_groups.py +140 -0
  58. celldetective/gui/table_ops/_merge_one_hot.py +95 -0
  59. celldetective/gui/table_ops/_query_table.py +43 -0
  60. celldetective/gui/table_ops/_rename_col.py +44 -0
  61. celldetective/gui/thresholds_gui.py +382 -179
  62. celldetective/gui/viewers/__init__.py +0 -0
  63. celldetective/gui/viewers/base_viewer.py +700 -0
  64. celldetective/gui/viewers/channel_offset_viewer.py +331 -0
  65. celldetective/gui/viewers/contour_viewer.py +394 -0
  66. celldetective/gui/viewers/size_viewer.py +153 -0
  67. celldetective/gui/viewers/spot_detection_viewer.py +341 -0
  68. celldetective/gui/viewers/threshold_viewer.py +309 -0
  69. celldetective/gui/workers.py +403 -126
  70. celldetective/log_manager.py +92 -0
  71. celldetective/measure.py +1895 -1478
  72. celldetective/napari/__init__.py +0 -0
  73. celldetective/napari/utils.py +1025 -0
  74. celldetective/neighborhood.py +1914 -1448
  75. celldetective/preprocessing.py +1620 -1220
  76. celldetective/processes/__init__.py +0 -0
  77. celldetective/processes/background_correction.py +271 -0
  78. celldetective/processes/compute_neighborhood.py +894 -0
  79. celldetective/processes/detect_events.py +246 -0
  80. celldetective/processes/downloader.py +137 -0
  81. celldetective/processes/measure_cells.py +565 -0
  82. celldetective/processes/segment_cells.py +760 -0
  83. celldetective/processes/track_cells.py +435 -0
  84. celldetective/processes/train_segmentation_model.py +694 -0
  85. celldetective/processes/train_signal_model.py +265 -0
  86. celldetective/processes/unified_process.py +292 -0
  87. celldetective/regionprops/_regionprops.py +358 -317
  88. celldetective/relative_measurements.py +987 -710
  89. celldetective/scripts/measure_cells.py +313 -212
  90. celldetective/scripts/measure_relative.py +90 -46
  91. celldetective/scripts/segment_cells.py +165 -104
  92. celldetective/scripts/segment_cells_thresholds.py +96 -68
  93. celldetective/scripts/track_cells.py +198 -149
  94. celldetective/scripts/train_segmentation_model.py +324 -201
  95. celldetective/scripts/train_signal_model.py +87 -45
  96. celldetective/segmentation.py +844 -749
  97. celldetective/signals.py +3514 -2861
  98. celldetective/tracking.py +30 -15
  99. celldetective/utils/__init__.py +0 -0
  100. celldetective/utils/cellpose_utils/__init__.py +133 -0
  101. celldetective/utils/color_mappings.py +42 -0
  102. celldetective/utils/data_cleaning.py +630 -0
  103. celldetective/utils/data_loaders.py +450 -0
  104. celldetective/utils/dataset_helpers.py +207 -0
  105. celldetective/utils/downloaders.py +235 -0
  106. celldetective/utils/event_detection/__init__.py +8 -0
  107. celldetective/utils/experiment.py +1782 -0
  108. celldetective/utils/image_augmenters.py +308 -0
  109. celldetective/utils/image_cleaning.py +74 -0
  110. celldetective/utils/image_loaders.py +926 -0
  111. celldetective/utils/image_transforms.py +335 -0
  112. celldetective/utils/io.py +62 -0
  113. celldetective/utils/mask_cleaning.py +348 -0
  114. celldetective/utils/mask_transforms.py +5 -0
  115. celldetective/utils/masks.py +184 -0
  116. celldetective/utils/maths.py +351 -0
  117. celldetective/utils/model_getters.py +325 -0
  118. celldetective/utils/model_loaders.py +296 -0
  119. celldetective/utils/normalization.py +380 -0
  120. celldetective/utils/parsing.py +465 -0
  121. celldetective/utils/plots/__init__.py +0 -0
  122. celldetective/utils/plots/regression.py +53 -0
  123. celldetective/utils/resources.py +34 -0
  124. celldetective/utils/stardist_utils/__init__.py +104 -0
  125. celldetective/utils/stats.py +90 -0
  126. celldetective/utils/types.py +21 -0
  127. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/METADATA +1 -1
  128. celldetective-1.5.0b1.dist-info/RECORD +187 -0
  129. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/WHEEL +1 -1
  130. tests/gui/test_new_project.py +129 -117
  131. tests/gui/test_project.py +127 -79
  132. tests/test_filters.py +39 -15
  133. tests/test_notebooks.py +8 -0
  134. tests/test_tracking.py +232 -13
  135. tests/test_utils.py +123 -77
  136. celldetective/gui/base_components.py +0 -23
  137. celldetective/gui/layouts.py +0 -1602
  138. celldetective/gui/processes/compute_neighborhood.py +0 -594
  139. celldetective/gui/processes/downloader.py +0 -111
  140. celldetective/gui/processes/measure_cells.py +0 -360
  141. celldetective/gui/processes/segment_cells.py +0 -499
  142. celldetective/gui/processes/track_cells.py +0 -303
  143. celldetective/gui/processes/train_segmentation_model.py +0 -270
  144. celldetective/gui/processes/train_signal_model.py +0 -108
  145. celldetective/gui/table_ops/merge_groups.py +0 -118
  146. celldetective/gui/viewers.py +0 -1354
  147. celldetective/io.py +0 -3663
  148. celldetective/utils.py +0 -3108
  149. celldetective-1.4.2.dist-info/RECORD +0 -123
  150. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/entry_points.txt +0 -0
  151. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/licenses/LICENSE +0 -0
  152. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/top_level.txt +0 -0
celldetective/measure.py CHANGED
@@ -1,1504 +1,1921 @@
1
1
  import math
2
-
3
2
  import numpy as np
4
- import pandas as pd
5
- from sklearn.metrics import r2_score
6
- from scipy.optimize import curve_fit
7
- from scipy import ndimage
8
- from tqdm import tqdm
9
- #from skimage.measure import regionprops_table
10
- from functools import reduce
11
- from mahotas.features import haralick
12
- from scipy.ndimage import zoom
13
3
  import os
14
4
  import subprocess
15
5
  from math import ceil
16
-
17
- from skimage.draw import disk as dsk
18
- from skimage.feature import blob_dog, blob_log
6
+ from functools import reduce
7
+ from inspect import getmembers, isfunction
19
8
 
20
9
  from celldetective.exceptions import EmptyQueryError, MissingColumnsError, QueryError
21
- from celldetective.utils import rename_intensity_column, create_patch_mask, remove_redundant_features, \
22
- remove_trajectory_measurements, contour_of_instance_segmentation, extract_cols_from_query, step_function, interpolate_nan, _remove_invalid_cols
10
+ from celldetective.utils.masks import (
11
+ contour_of_instance_segmentation,
12
+ create_patch_mask,
13
+ )
14
+ from celldetective.utils.parsing import extract_cols_from_query
15
+ from celldetective.utils.data_cleaning import (
16
+ _remove_invalid_cols,
17
+ rename_intensity_column,
18
+ remove_redundant_features,
19
+ remove_trajectory_measurements,
20
+ )
21
+ from celldetective.utils.maths import step_function
22
+ from celldetective.utils.image_cleaning import interpolate_nan
23
23
  from celldetective.preprocessing import field_correction
24
-
25
- # try:
26
- # from celldetective.extra_properties import *
27
- # extra_props = True
28
- # except Exception as e:
29
- # print(f"The module extra_properties seems corrupted: {e}... Skip...")
30
- # extra_props = False
31
-
32
- from inspect import getmembers, isfunction
33
- from skimage.morphology import disk
34
- from scipy.signal import find_peaks, peak_widths
35
-
36
- from celldetective.segmentation import filter_image
37
- from celldetective.regionprops import regionprops_table
38
- from celldetective.utils import pretty_table
39
-
40
- abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], 'celldetective'])
41
-
42
-
43
-
44
- def measure(stack=None, labels=None, trajectories=None, channel_names=None,
45
- features=None, intensity_measurement_radii=None, isotropic_operations=['mean'], border_distances=None,
46
- haralick_options=None, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}, clear_previous=False):
47
-
48
- """
49
-
50
- Perform measurements on a stack of images or labels.
51
-
52
- Parameters
53
- ----------
54
- stack : numpy array, optional
55
- Stack of images with shape (T, Y, X, C), where T is the number of frames, Y and X are the spatial dimensions,
56
- and C is the number of channels. Default is None.
57
- labels : numpy array, optional
58
- Label stack with shape (T, Y, X) representing cell segmentations. Default is None.
59
- trajectories : pandas DataFrame, optional
60
- DataFrame of cell trajectories with columns specified in `column_labels`. Default is None.
61
- channel_names : list, optional
62
- List of channel names corresponding to the image stack. Default is None.
63
- features : list, optional
64
- List of features to measure using the `measure_features` function. Default is None.
65
- intensity_measurement_radii : int, float, or list, optional
66
- Radius or list of radii specifying the size of the isotropic measurement area for intensity measurements.
67
- If a single value is provided, a circular measurement area is used. If a list of values is provided, multiple
68
- measurements are performed using ring-shaped measurement areas. Default is None.
69
- isotropic_operations : list, optional
70
- List of operations to perform on the isotropic intensity values. Default is ['mean'].
71
- border_distances : int, float, or list, optional
72
- Distance or list of distances specifying the size of the border region for intensity measurements.
73
- If a single value is provided, measurements are performed at a fixed distance from the cell borders.
74
- If a list of values is provided, measurements are performed at multiple border distances. Default is None.
75
- haralick_options : dict, optional
76
- Dictionary of options for Haralick feature measurements. Default is None.
77
- column_labels : dict, optional
78
- Dictionary containing the column labels for the DataFrame. Default is {'track': "TRACK_ID",
79
- 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
80
-
81
- Returns
82
- -------
83
- pandas DataFrame
84
- DataFrame containing the measured features and intensities.
85
-
86
- Notes
87
- -----
88
- This function performs measurements on a stack of images or labels. If both `stack` and `labels` are provided,
89
- measurements are performed on each frame of the stack. The measurements include isotropic intensity values, computed
90
- using the `measure_isotropic_intensity` function, and additional features, computed using the `measure_features` function.
91
- The intensity measurements are performed at the positions specified in the `trajectories` DataFrame, using the
92
- specified `intensity_measurement_radii` and `border_distances`. The resulting measurements are combined into a single
93
- DataFrame and returned.
94
-
95
- Examples
96
- --------
97
- >>> stack = np.random.rand(10, 100, 100, 3)
98
- >>> labels = np.random.randint(0, 2, (10, 100, 100))
99
- >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 2, 3], 'FRAME': [1, 1, 1],
100
- ... 'POSITION_X': [10, 20, 30], 'POSITION_Y': [15, 25, 35]})
101
- >>> channel_names = ['channel1', 'channel2', 'channel3']
102
- >>> features = ['area', 'intensity_mean']
103
- >>> intensity_measurement_radii = [5, 10]
104
- >>> border_distances = 2
105
- >>> measurements = measure(stack=stack, labels=labels, trajectories=trajectories, channel_names=channel_names,
106
- ... features=features, intensity_measurement_radii=intensity_measurement_radii,
107
- ... border_distances=border_distances)
108
- # Perform measurements on the stack, labels, and trajectories, computing isotropic intensities and additional features.
109
-
110
- """
111
-
112
-
113
- do_iso_intensities = True
114
- do_features = True
115
-
116
-
117
- # Check that conditions are satisfied to perform measurements
118
- assert (labels is not None) or (stack is not None),'Please pass a stack and/or labels... Abort.'
119
- if (labels is not None)*(stack is not None):
120
- assert labels.shape==stack.shape[:-1],f"Shape mismatch between the stack of shape {stack.shape} and the segmentation {labels.shape}..."
121
-
122
- # Condition to compute features
123
- if labels is None:
124
- do_features = False
125
- nbr_frames = len(stack)
126
- print('No labels were provided... Features will not be computed...')
127
- else:
128
- nbr_frames = len(labels)
129
-
130
- # Condition to compute isotropic intensities
131
- if (stack is None) or (trajectories is None) or (intensity_measurement_radii is None):
132
- do_iso_intensities = False
133
- print('Either no image, no positions or no radii were provided... Isotropic intensities will not be computed...')
134
-
135
- # Compensate for non provided channel names
136
- if (stack is not None)*(channel_names is None):
137
- nbr_channels = stack.shape[-1]
138
- channel_names = [f'intensity-{k}' for k in range(nbr_channels)]
139
-
140
- if isinstance(intensity_measurement_radii, int) or isinstance(intensity_measurement_radii, float):
141
- intensity_measurement_radii = [intensity_measurement_radii]
142
-
143
- if isinstance(border_distances, int) or isinstance(border_distances, float):
144
- border_distances = [border_distances]
145
-
146
- if features is not None:
147
- features = remove_redundant_features(features, trajectories.columns,
148
- channel_names=channel_names)
149
-
150
- if features is None:
151
- features = []
152
-
153
- # Prep for the case where no trajectory is provided but still want to measure isotropic intensities...
154
- if (trajectories is None):
155
- do_features = True
156
- features += ['centroid']
157
- else:
158
- if clear_previous:
159
- trajectories = remove_trajectory_measurements(trajectories, column_labels)
160
-
161
- timestep_dataframes = []
162
-
163
- for t in tqdm(range(nbr_frames),desc='frame'):
164
-
165
- if stack is not None:
166
- img = stack[t]
167
- else:
168
- img = None
169
- if labels is not None:
170
- lbl = labels[t]
171
- else:
172
- lbl = None
173
-
174
- if trajectories is not None:
175
- positions_at_t = trajectories.loc[trajectories[column_labels['time']]==t].copy()
176
-
177
- if do_features:
178
- feature_table = measure_features(img, lbl, features = features, border_dist=border_distances,
179
- channels=channel_names, haralick_options=haralick_options, verbose=False)
180
- if trajectories is None:
181
- # Use the centroids as estimate for the location of the cells, to be passed to the measure_isotropic_intensity function.
182
- positions_at_t = feature_table[['centroid-1', 'centroid-0','class_id']].copy()
183
- positions_at_t['ID'] = np.arange(len(positions_at_t)) # temporary ID for the cells, that will be reset at the end since they are not tracked
184
- positions_at_t.rename(columns={'centroid-1': 'POSITION_X', 'centroid-0': 'POSITION_Y'},inplace=True)
185
- positions_at_t['FRAME'] = int(t)
186
- column_labels = {'track': "ID", 'time': column_labels['time'], 'x': column_labels['x'], 'y': column_labels['y']}
187
-
188
- center_of_mass_x_cols = [c for c in list(positions_at_t.columns) if c.endswith('centre_of_mass_x')]
189
- center_of_mass_y_cols = [c for c in list(positions_at_t.columns) if c.endswith('centre_of_mass_y')]
190
- for c in center_of_mass_x_cols:
191
- positions_at_t.loc[:,c.replace('_x','_POSITION_X')] = positions_at_t[c] + positions_at_t['POSITION_X']
192
- for c in center_of_mass_y_cols:
193
- positions_at_t.loc[:,c.replace('_y','_POSITION_Y')] = positions_at_t[c] + positions_at_t['POSITION_Y']
194
- positions_at_t = positions_at_t.drop(columns = center_of_mass_x_cols+center_of_mass_y_cols)
195
-
196
- # Isotropic measurements (circle, ring)
197
- if do_iso_intensities:
198
- iso_table = measure_isotropic_intensity(positions_at_t, img, channels=channel_names, intensity_measurement_radii=intensity_measurement_radii,
199
- column_labels=column_labels, operations=isotropic_operations, verbose=False)
200
-
201
- if do_iso_intensities*do_features:
202
- measurements_at_t = iso_table.merge(feature_table, how='outer', on='class_id')
203
- elif do_iso_intensities*(not do_features):
204
- measurements_at_t = iso_table
205
- elif do_features*(trajectories is not None):
206
- measurements_at_t = positions_at_t.merge(feature_table, how='outer', on='class_id')
207
- elif do_features*(trajectories is None):
208
- measurements_at_t = positions_at_t
209
-
210
- try:
211
- measurements_at_t['radial_distance'] = np.sqrt((measurements_at_t[column_labels['x']] - img.shape[0] / 2) ** 2 + (
212
- measurements_at_t[column_labels['y']] - img.shape[1] / 2) ** 2)
213
- except Exception as e:
214
- print(f"{e=}")
215
-
216
- timestep_dataframes.append(measurements_at_t)
217
-
218
- measurements = pd.concat(timestep_dataframes)
219
- if trajectories is not None:
220
- measurements = measurements.sort_values(by=[column_labels['track'],column_labels['time']])
221
- measurements = measurements.dropna(subset=[column_labels['track']])
222
- else:
223
- measurements['ID'] = np.arange(len(measurements))
224
-
225
- measurements = measurements.reset_index(drop=True)
226
- measurements = _remove_invalid_cols(measurements)
227
-
228
- return measurements
229
-
230
- def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
231
-
232
- tab = tab.sort_values(by=[column_labels['track'],column_labels['time']])
233
- if 'area' in tab.columns:
234
- for tid,track_group in tab.groupby(column_labels['track']):
235
- indices = track_group.index
236
- area = track_group['area'].values
237
- timeline = track_group[column_labels['time']].values
238
- if np.any(area==area):
239
- t_first = timeline[area==area][0]
240
- cclass = 1
241
- if t_first==0:
242
- t_first = 0
243
- cclass = 2
244
- else:
245
- t_first = -1
246
- cclass = 2
247
-
248
- tab.loc[indices, 'class_firstdetection'] = cclass
249
- tab.loc[indices, 't_firstdetection'] = t_first
250
- return tab
24
+ from celldetective.log_manager import get_logger
25
+
26
+ logger = get_logger(__name__)
27
+
28
+
29
+ abs_path = os.sep.join(
30
+ [os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], "celldetective"]
31
+ )
32
+
33
+
34
+ def measure(
35
+ stack=None,
36
+ labels=None,
37
+ trajectories=None,
38
+ channel_names=None,
39
+ features=None,
40
+ intensity_measurement_radii=None,
41
+ isotropic_operations=["mean"],
42
+ border_distances=None,
43
+ haralick_options=None,
44
+ column_labels={
45
+ "track": "TRACK_ID",
46
+ "time": "FRAME",
47
+ "x": "POSITION_X",
48
+ "y": "POSITION_Y",
49
+ },
50
+ clear_previous=False,
51
+ ):
52
+ """
53
+
54
+ Perform measurements on a stack of images or labels.
55
+
56
+ Parameters
57
+ ----------
58
+ stack : numpy array, optional
59
+ Stack of images with shape (T, Y, X, C), where T is the number of frames, Y and X are the spatial dimensions,
60
+ and C is the number of channels. Default is None.
61
+ labels : numpy array, optional
62
+ Label stack with shape (T, Y, X) representing cell segmentations. Default is None.
63
+ trajectories : pandas DataFrame, optional
64
+ DataFrame of cell trajectories with columns specified in `column_labels`. Default is None.
65
+ channel_names : list, optional
66
+ List of channel names corresponding to the image stack. Default is None.
67
+ features : list, optional
68
+ List of features to measure using the `measure_features` function. Default is None.
69
+ intensity_measurement_radii : int, float, or list, optional
70
+ Radius or list of radii specifying the size of the isotropic measurement area for intensity measurements.
71
+ If a single value is provided, a circular measurement area is used. If a list of values is provided, multiple
72
+ measurements are performed using ring-shaped measurement areas. Default is None.
73
+ isotropic_operations : list, optional
74
+ List of operations to perform on the isotropic intensity values. Default is ['mean'].
75
+ border_distances : int, float, or list, optional
76
+ Distance or list of distances specifying the size of the border region for intensity measurements.
77
+ If a single value is provided, measurements are performed at a fixed distance from the cell borders.
78
+ If a list of values is provided, measurements are performed at multiple border distances. Default is None.
79
+ haralick_options : dict, optional
80
+ Dictionary of options for Haralick feature measurements. Default is None.
81
+ column_labels : dict, optional
82
+ Dictionary containing the column labels for the DataFrame. Default is {'track': "TRACK_ID",
83
+ 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
84
+
85
+ Returns
86
+ -------
87
+ pandas DataFrame
88
+ DataFrame containing the measured features and intensities.
89
+
90
+ Notes
91
+ -----
92
+ This function performs measurements on a stack of images or labels. If both `stack` and `labels` are provided,
93
+ measurements are performed on each frame of the stack. The measurements include isotropic intensity values, computed
94
+ using the `measure_isotropic_intensity` function, and additional features, computed using the `measure_features` function.
95
+ The intensity measurements are performed at the positions specified in the `trajectories` DataFrame, using the
96
+ specified `intensity_measurement_radii` and `border_distances`. The resulting measurements are combined into a single
97
+ DataFrame and returned.
98
+
99
+ Examples
100
+ --------
101
+ >>> stack = np.random.rand(10, 100, 100, 3)
102
+ >>> labels = np.random.randint(0, 2, (10, 100, 100))
103
+ >>> trajectories = pd.DataFrame({'TRACK_ID': [1, 2, 3], 'FRAME': [1, 1, 1],
104
+ ... 'POSITION_X': [10, 20, 30], 'POSITION_Y': [15, 25, 35]})
105
+ >>> channel_names = ['channel1', 'channel2', 'channel3']
106
+ >>> features = ['area', 'intensity_mean']
107
+ >>> intensity_measurement_radii = [5, 10]
108
+ >>> border_distances = 2
109
+ >>> measurements = measure(stack=stack, labels=labels, trajectories=trajectories, channel_names=channel_names,
110
+ ... features=features, intensity_measurement_radii=intensity_measurement_radii,
111
+ ... border_distances=border_distances)
112
+ # Perform measurements on the stack, labels, and trajectories, computing isotropic intensities and additional features.
113
+
114
+ """
115
+
116
+ do_iso_intensities = True
117
+ do_features = True
118
+
119
+ # Check that conditions are satisfied to perform measurements
120
+ assert (labels is not None) or (
121
+ stack is not None
122
+ ), "Please pass a stack and/or labels... Abort."
123
+ if (labels is not None) * (stack is not None):
124
+ assert (
125
+ labels.shape == stack.shape[:-1]
126
+ ), f"Shape mismatch between the stack of shape {stack.shape} and the segmentation {labels.shape}..."
127
+
128
+ # Condition to compute features
129
+ if labels is None:
130
+ do_features = False
131
+ nbr_frames = len(stack)
132
+ logger.warning("No labels were provided... Features will not be computed...")
133
+ else:
134
+ nbr_frames = len(labels)
135
+
136
+ # Condition to compute isotropic intensities
137
+ if (
138
+ (stack is None)
139
+ or (trajectories is None)
140
+ or (intensity_measurement_radii is None)
141
+ ):
142
+ do_iso_intensities = False
143
+ logger.warning(
144
+ "Either no image, no positions or no radii were provided... Isotropic intensities will not be computed..."
145
+ )
146
+
147
+ # Compensate for non provided channel names
148
+ if (stack is not None) * (channel_names is None):
149
+ nbr_channels = stack.shape[-1]
150
+ channel_names = [f"intensity-{k}" for k in range(nbr_channels)]
151
+
152
+ if isinstance(intensity_measurement_radii, int) or isinstance(
153
+ intensity_measurement_radii, float
154
+ ):
155
+ intensity_measurement_radii = [intensity_measurement_radii]
156
+
157
+ if isinstance(border_distances, (int, float, str)):
158
+ border_distances = [border_distances]
159
+
160
+ if features is not None:
161
+ features = remove_redundant_features(
162
+ features,
163
+ trajectories.columns if trajectories is not None else [],
164
+ channel_names=channel_names,
165
+ )
166
+
167
+ if features is None:
168
+ features = []
169
+
170
+ # Prep for the case where no trajectory is provided but still want to measure isotropic intensities...
171
+ if trajectories is None:
172
+ do_features = True
173
+ features += ["centroid"]
174
+ else:
175
+ if clear_previous:
176
+ trajectories = remove_trajectory_measurements(trajectories, column_labels)
177
+
178
+ timestep_dataframes = []
179
+
180
+ from tqdm import tqdm
181
+
182
+ for t in tqdm(range(nbr_frames), desc="frame"):
183
+
184
+ if stack is not None:
185
+ img = stack[t]
186
+ else:
187
+ img = None
188
+ if labels is not None:
189
+ lbl = labels[t]
190
+ else:
191
+ lbl = None
192
+
193
+ if trajectories is not None:
194
+ positions_at_t = trajectories.loc[
195
+ trajectories[column_labels["time"]] == t
196
+ ].copy()
197
+
198
+ if do_features:
199
+ feature_table = measure_features(
200
+ img,
201
+ lbl,
202
+ features=features,
203
+ border_dist=border_distances,
204
+ channels=channel_names,
205
+ haralick_options=haralick_options,
206
+ verbose=False,
207
+ )
208
+ if trajectories is None:
209
+ # Use the centroids as estimate for the location of the cells, to be passed to the measure_isotropic_intensity function.
210
+ positions_at_t = feature_table[
211
+ ["centroid-1", "centroid-0", "class_id"]
212
+ ].copy()
213
+ positions_at_t["ID"] = np.arange(
214
+ len(positions_at_t)
215
+ ) # temporary ID for the cells, that will be reset at the end since they are not tracked
216
+ positions_at_t.rename(
217
+ columns={"centroid-1": "POSITION_X", "centroid-0": "POSITION_Y"},
218
+ inplace=True,
219
+ )
220
+ positions_at_t["FRAME"] = int(t)
221
+ column_labels = {
222
+ "track": "ID",
223
+ "time": column_labels["time"],
224
+ "x": column_labels["x"],
225
+ "y": column_labels["y"],
226
+ }
227
+
228
+ center_of_mass_x_cols = [
229
+ c for c in list(positions_at_t.columns) if c.endswith("centre_of_mass_x")
230
+ ]
231
+ center_of_mass_y_cols = [
232
+ c for c in list(positions_at_t.columns) if c.endswith("centre_of_mass_y")
233
+ ]
234
+ for c in center_of_mass_x_cols:
235
+ positions_at_t.loc[:, c.replace("_x", "_POSITION_X")] = (
236
+ positions_at_t[c] + positions_at_t["POSITION_X"]
237
+ )
238
+ for c in center_of_mass_y_cols:
239
+ positions_at_t.loc[:, c.replace("_y", "_POSITION_Y")] = (
240
+ positions_at_t[c] + positions_at_t["POSITION_Y"]
241
+ )
242
+ positions_at_t = positions_at_t.drop(
243
+ columns=center_of_mass_x_cols + center_of_mass_y_cols
244
+ )
245
+
246
+ # Isotropic measurements (circle, ring)
247
+ if do_iso_intensities:
248
+ iso_table = measure_isotropic_intensity(
249
+ positions_at_t,
250
+ img,
251
+ channels=channel_names,
252
+ intensity_measurement_radii=intensity_measurement_radii,
253
+ column_labels=column_labels,
254
+ operations=isotropic_operations,
255
+ verbose=False,
256
+ )
257
+
258
+ if do_iso_intensities * do_features:
259
+ measurements_at_t = iso_table.merge(
260
+ feature_table, how="outer", on="class_id"
261
+ )
262
+ elif do_iso_intensities * (not do_features):
263
+ measurements_at_t = iso_table
264
+ elif do_features * (trajectories is not None):
265
+ measurements_at_t = positions_at_t.merge(
266
+ feature_table, how="outer", on="class_id"
267
+ )
268
+ elif do_features * (trajectories is None):
269
+ measurements_at_t = positions_at_t
270
+
271
+ try:
272
+ measurements_at_t["radial_distance"] = np.sqrt(
273
+ (measurements_at_t[column_labels["x"]] - img.shape[0] / 2) ** 2
274
+ + (measurements_at_t[column_labels["y"]] - img.shape[1] / 2) ** 2
275
+ )
276
+ except Exception as e:
277
+ logger.error(f"{e=}")
278
+
279
+ timestep_dataframes.append(measurements_at_t)
280
+
281
+ import pandas as pd
282
+
283
+ measurements = pd.concat(timestep_dataframes)
284
+ if trajectories is not None:
285
+ measurements = measurements.sort_values(
286
+ by=[column_labels["track"], column_labels["time"]]
287
+ )
288
+ measurements = measurements.dropna(subset=[column_labels["track"]])
289
+ else:
290
+ measurements["ID"] = np.arange(len(measurements))
291
+
292
+ measurements = measurements.reset_index(drop=True)
293
+ measurements = _remove_invalid_cols(measurements)
294
+
295
+ return measurements
296
+
297
+
298
+ def write_first_detection_class(
299
+ tab,
300
+ column_labels={
301
+ "track": "TRACK_ID",
302
+ "time": "FRAME",
303
+ "x": "POSITION_X",
304
+ "y": "POSITION_Y",
305
+ },
306
+ ):
307
+
308
+ tab = tab.sort_values(by=[column_labels["track"], column_labels["time"]])
309
+ if "area" in tab.columns:
310
+ for tid, track_group in tab.groupby(column_labels["track"]):
311
+ indices = track_group.index
312
+ area = track_group["area"].values
313
+ timeline = track_group[column_labels["time"]].values
314
+ if np.any(area == area):
315
+ t_first = timeline[area == area][0]
316
+ cclass = 1
317
+ if t_first == 0:
318
+ t_first = 0
319
+ cclass = 2
320
+ else:
321
+ t_first = -1
322
+ cclass = 2
323
+
324
+ tab.loc[indices, "class_firstdetection"] = cclass
325
+ tab.loc[indices, "t_firstdetection"] = t_first
326
+ return tab
251
327
 
252
328
 
253
329
  def drop_tonal_features(features):
330
+ """
331
+ Removes features related to intensity from a list of feature names.
332
+
333
+ This function iterates over a list of feature names and removes any feature that includes the term 'intensity' in its name.
334
+ The operation is performed in-place, meaning the original list of features is modified directly.
335
+
336
+ Parameters
337
+ ----------
338
+ features : list of str
339
+ A list of feature names from which intensity-related features are to be removed.
340
+
341
+ Returns
342
+ -------
343
+ list of str
344
+ The modified list of feature names with intensity-related features removed. Note that this operation modifies the
345
+ input list in-place, so the return value is the same list object with some elements removed.
346
+
347
+ """
348
+
349
+ feat2 = features[:]
350
+ for f in features:
351
+ if "intensity" in f:
352
+ feat2.remove(f)
353
+ return feat2
354
+
355
+
356
+ def measure_features(
357
+ img,
358
+ label,
359
+ features=["area", "intensity_mean"],
360
+ channels=None,
361
+ border_dist=None,
362
+ haralick_options=None,
363
+ verbose=True,
364
+ normalisation_list=None,
365
+ radial_intensity=None,
366
+ radial_channel=None,
367
+ spot_detection=None,
368
+ ):
369
+ """
370
+ Measure features within segmented regions of an image.
371
+
372
+ Parameters
373
+ ----------
374
+ img : ndarray
375
+ The input image as a NumPy array.
376
+ label : ndarray
377
+ The segmentation labels corresponding to the image regions.
378
+ features : list, optional
379
+ The list of features to measure within the segmented regions. The default is ['area', 'intensity_mean'].
380
+ channels : list, optional
381
+ The list of channel names in the image. The default is ["brightfield_channel", "dead_nuclei_channel", "live_nuclei_channel"].
382
+ border_dist : int, float, or list, optional
383
+ The distance(s) in pixels from the edge of each segmented region to measure features. The default is None.
384
+ haralick_options : dict, optional
385
+ The options for computing Haralick features. The default is None.
386
+ verbose : bool, optional
387
+ If True, warnings will be logged.
388
+ normalisation_list : list of dict, optional
389
+ List of normalization operations to apply.
390
+ radial_intensity : Any, optional
391
+ Deprecated/Unused parameter.
392
+ radial_channel : Any, optional
393
+ Deprecated/Unused parameter.
394
+ spot_detection : dict, optional
395
+ Options for spot detection.
396
+
397
+ Returns
398
+ -------
399
+ df_props : DataFrame
400
+ A pandas DataFrame containing the measured features for each segmented region.
401
+ """
402
+ if features is None:
403
+ features = []
404
+ elif isinstance(features, list):
405
+ features = features.copy()
406
+
407
+ measure_mean_intensities = False
408
+ if img is None:
409
+ if verbose:
410
+ logger.warning("No image was provided... Skip intensity measurements.")
411
+ border_dist = None
412
+ haralick_options = None
413
+ features = drop_tonal_features(features)
414
+
415
+ if "intensity_mean" in features:
416
+ measure_mean_intensities = True
417
+ features.remove("intensity_mean")
418
+
419
+ # Add label to have identity of mask
420
+ if "label" not in features:
421
+ features.append("label")
422
+
423
+ if img is not None:
424
+ if img.ndim == 2:
425
+ img = img[:, :, np.newaxis]
426
+
427
+ if channels is None:
428
+ channels = [f"intensity-{k}" for k in range(img.shape[-1])]
429
+
430
+ if img.ndim == 3 and channels is not None:
431
+ assert (
432
+ len(channels) == img.shape[-1]
433
+ ), "Mismatch between the provided channel names and the shape of the image"
434
+
435
+ if spot_detection is not None:
436
+ detection_channel = spot_detection.get("channel")
437
+ if detection_channel in channels:
438
+ ind = channels.index(detection_channel)
439
+ if "image_preprocessing" not in spot_detection:
440
+ spot_detection.update({"image_preprocessing": None})
441
+
442
+ df_spots = blob_detection(
443
+ img,
444
+ label,
445
+ diameter=spot_detection["diameter"],
446
+ threshold=spot_detection["threshold"],
447
+ channel_name=detection_channel,
448
+ target_channel=ind,
449
+ image_preprocessing=spot_detection["image_preprocessing"],
450
+ )
451
+ else:
452
+ logger.warning(
453
+ f"Spot detection channel '{detection_channel}' not found in channels."
454
+ )
455
+ df_spots = None
456
+
457
+ if normalisation_list:
458
+ for norm in normalisation_list:
459
+ target = norm.get("target_channel")
460
+ if target in channels:
461
+ ind = channels.index(target)
462
+
463
+ if norm["correction_type"] == "local":
464
+ normalised_image = normalise_by_cell(
465
+ img[:, :, ind].copy(),
466
+ label,
467
+ distance=int(norm["distance"]),
468
+ model=norm["model"],
469
+ operation=norm["operation"],
470
+ clip=norm["clip"],
471
+ )
472
+ img[:, :, ind] = normalised_image
473
+ else:
474
+ corrected_image = field_correction(
475
+ img[:, :, ind].copy(),
476
+ threshold_on_std=norm["threshold_on_std"],
477
+ operation=norm["operation"],
478
+ model=norm["model"],
479
+ clip=norm["clip"],
480
+ )
481
+ img[:, :, ind] = corrected_image
482
+ else:
483
+ logger.warning(
484
+ f"Normalization target '{target}' not found in channels."
485
+ )
486
+
487
+ # Initialize extra properties list and name check list
488
+ extra = [] # Ensure 'extra' is defined regardless of import success
489
+ try:
490
+ import celldetective.extra_properties as extra_props
491
+
492
+ extraprops = True
493
+ except Exception as e:
494
+ logger.error(f"The module extra_properties seems corrupted: {e}... Skip...")
495
+ extraprops = False
496
+
497
+ extra_props_list = []
498
+
499
+ if extraprops:
500
+ # Get list of function names in extra_properties
501
+ extra = [name for name, _ in getmembers(extra_props, isfunction)]
502
+
503
+ feats_temp = features.copy()
504
+ for f in feats_temp:
505
+ if f in extra:
506
+ features.remove(f)
507
+ extra_props_list.append(getattr(extra_props, f))
508
+
509
+ # Add intensity nan mean if need to measure mean intensities
510
+ if measure_mean_intensities:
511
+ extra_props_list.append(getattr(extra_props, "intensity_nanmean"))
512
+
513
+ else:
514
+ if measure_mean_intensities:
515
+ features.append("intensity_mean")
516
+
517
+ if not extra_props_list:
518
+ extra_props_list = None
519
+ else:
520
+ extra_props_list = tuple(extra_props_list)
521
+
522
+ from celldetective.regionprops import regionprops_table
523
+
524
+ props = regionprops_table(
525
+ label,
526
+ intensity_image=img,
527
+ properties=features,
528
+ extra_properties=extra_props_list,
529
+ channel_names=channels,
530
+ )
531
+ import pandas as pd
532
+
533
+ df_props = pd.DataFrame(props)
534
+
535
+ if spot_detection is not None and df_spots is not None:
536
+ df_props = df_props.merge(
537
+ df_spots, how="outer", on="label", suffixes=("_delme", "")
538
+ )
539
+ df_props = df_props[[c for c in df_props.columns if not c.endswith("_delme")]]
540
+
541
+ if border_dist is not None:
542
+ # Filter for features containing "intensity" but not "centroid" or "peripheral"
543
+ intensity_features = [
544
+ f
545
+ for f in (features + extra)
546
+ if "intensity" in f and "centroid" not in f and "peripheral" not in f
547
+ ]
548
+
549
+ # Prepare extra properties for intensity features on borders
550
+ intensity_extra = []
551
+ if measure_mean_intensities and extraprops:
552
+ intensity_extra.append(getattr(extra_props, "intensity_nanmean"))
553
+
554
+ clean_intensity_features = []
555
+ for s in intensity_features:
556
+ if s in extra:
557
+ intensity_extra.append(getattr(extra_props, s))
558
+ else:
559
+ clean_intensity_features.append(s)
560
+
561
+ if not intensity_extra and not clean_intensity_features:
562
+ logger.warning(
563
+ "No intensity feature was passed... Adding mean intensity for edge measurement..."
564
+ )
565
+ if extraprops:
566
+ intensity_extra.append(getattr(extra_props, "intensity_nanmean"))
567
+
568
+ # Always include label for merging
569
+ clean_intensity_features.append("label")
570
+
571
+ # Helper to format suffix
572
+ def get_suffix(d):
573
+ d_str = str(d)
574
+ d_clean = (
575
+ d_str.replace("(", "")
576
+ .replace(")", "")
577
+ .replace(", ", "_")
578
+ .replace(",", "_")
579
+ )
580
+ if "-" in d_str or "," in d_str:
581
+ return f"_slice_{d_clean.replace('-', 'm')}px"
582
+ else:
583
+ return f"_edge_{d_clean}px"
584
+
585
+ # Ensure border_dist is a list for uniform processing
586
+ dist_list = (
587
+ [border_dist] if isinstance(border_dist, (int, float, str)) else border_dist
588
+ )
589
+
590
+ df_props_border_list = []
591
+ for d in dist_list:
592
+ border_label = contour_of_instance_segmentation(label, d)
593
+ props_border = regionprops_table(
594
+ border_label,
595
+ intensity_image=img,
596
+ properties=clean_intensity_features,
597
+ extra_properties=intensity_extra,
598
+ channel_names=channels,
599
+ )
600
+ import pandas as pd
601
+
602
+ df_props_border_d = pd.DataFrame(props_border)
603
+
604
+ # Rename columns with suffix
605
+ rename_dict = {}
606
+ for c in df_props_border_d.columns:
607
+ if "intensity" in c:
608
+ rename_dict[c] = c + get_suffix(d)
609
+
610
+ df_props_border_d = df_props_border_d.rename(columns=rename_dict)
611
+ df_props_border_list.append(df_props_border_d)
612
+
613
+ if df_props_border_list:
614
+ df_props_border = reduce(
615
+ lambda left, right: pd.merge(left, right, on=["label"], how="outer"),
616
+ df_props_border_list,
617
+ )
618
+ df_props = df_props.merge(df_props_border, how="outer", on="label")
619
+
620
+ if haralick_options is not None:
621
+ try:
622
+ df_haralick = compute_haralick_features(
623
+ img, label, channels=channels, **haralick_options
624
+ )
625
+ if df_haralick is not None:
626
+ df_haralick = df_haralick.rename(columns={"cell_id": "label"})
627
+ df_props = df_props.merge(
628
+ df_haralick, how="outer", on="label", suffixes=("_delme", "")
629
+ )
630
+ df_props = df_props[
631
+ [c for c in df_props.columns if not c.endswith("_delme")]
632
+ ]
633
+ except Exception as e:
634
+ logger.error(f"Haralick computation failed: {e}")
635
+ pass
636
+
637
+ if channels is not None:
638
+ df_props = rename_intensity_column(df_props, channels)
639
+
640
+ df_props.rename(columns={"label": "class_id"}, inplace=True)
641
+ df_props["class_id"] = df_props["class_id"].astype(float)
642
+
643
+ return df_props
644
+
645
+
646
+ def compute_haralick_features(
647
+ img,
648
+ labels,
649
+ channels=None,
650
+ target_channel=0,
651
+ scale_factor=1,
652
+ percentiles=(0.01, 99.99),
653
+ clip_values=None,
654
+ n_intensity_bins=256,
655
+ ignore_zero=True,
656
+ return_mean=True,
657
+ return_mean_ptp=False,
658
+ distance=1,
659
+ disable_progress_bar=False,
660
+ return_norm_image_only=False,
661
+ return_digit_image_only=False,
662
+ ):
663
+ """
664
+
665
+ Compute Haralick texture features on each segmented region of an image.
666
+
667
+ Parameters
668
+ ----------
669
+ img : ndarray
670
+ The input image as a NumPy array.
671
+ labels : ndarray
672
+ The segmentation labels corresponding to the image regions.
673
+ target_channel : int, optional
674
+ The target channel index of the image. The default is 0.
675
+ modality : str, optional
676
+ The modality or channel type of the image. The default is 'brightfield_channel'.
677
+ scale_factor : float, optional
678
+ The scale factor for resampling the image and labels. The default is 1.
679
+ percentiles : tuple of float, optional
680
+ The percentiles to use for image normalization. The default is (0.01, 99.99).
681
+ clip_values : tuple of float, optional
682
+ The minimum and maximum values to clip the image. If None, percentiles are used. The default is None.
683
+ n_intensity_bins : int, optional
684
+ The number of intensity bins for image normalization. The default is 255.
685
+ ignore_zero : bool, optional
686
+ Flag indicating whether to ignore zero values during feature computation. The default is True.
687
+ return_mean : bool, optional
688
+ Flag indicating whether to return the mean value of each Haralick feature. The default is True.
689
+ return_mean_ptp : bool, optional
690
+ Flag indicating whether to return the mean and peak-to-peak values of each Haralick feature. The default is False.
691
+ distance : int, optional
692
+ The distance parameter for Haralick feature computation. The default is 1.
693
+
694
+ Returns
695
+ -------
696
+ features : DataFrame
697
+ A pandas DataFrame containing the computed Haralick features for each segmented region.
698
+
699
+ Notes
700
+ -----
701
+ This function computes Haralick features on an image within segmented regions.
702
+ It uses the mahotas library for feature extraction and pandas DataFrame for storage.
703
+ The image is rescaled, normalized and digitized based on the specified parameters.
704
+ Haralick features are computed for each segmented region, and the results are returned as a DataFrame.
705
+
706
+ Examples
707
+ --------
708
+ >>> features = compute_haralick_features(img, labels, target_channel=0, modality="brightfield_channel")
709
+ # Compute Haralick features on the image within segmented regions.
710
+
711
+ """
712
+
713
+ assert (img.ndim == 2) | (
714
+ img.ndim == 3
715
+ ), f"Invalid image shape to compute the Haralick features. Expected YXC, got {img.shape}..."
716
+ assert (
717
+ img.shape[:2] == labels.shape
718
+ ), f"Mismatch between image shape {img.shape} and labels shape {labels.shape}"
719
+
720
+ if img.ndim == 2:
721
+ img = img[:, :, np.newaxis]
722
+ target_channel = 0
723
+ if isinstance(channels, list):
724
+ modality = channels[0]
725
+ elif isinstance(channels, str):
726
+ modality = channels
727
+ else:
728
+ logger.error("Channel name unrecognized...")
729
+ modality = ""
730
+ elif img.ndim == 3:
731
+ assert (
732
+ target_channel is not None
733
+ ), "The image is multichannel. Please provide a target channel to compute the Haralick features. Abort."
734
+ modality = channels[target_channel]
735
+
736
+ haralick_labels = [
737
+ "angular_second_moment",
738
+ "contrast",
739
+ "correlation",
740
+ "sum_of_square_variance",
741
+ "inverse_difference_moment",
742
+ "sum_average",
743
+ "sum_variance",
744
+ "sum_entropy",
745
+ "entropy",
746
+ "difference_variance",
747
+ "difference_entropy",
748
+ "information_measure_of_correlation_1",
749
+ "information_measure_of_correlation_2",
750
+ "maximal_correlation_coefficient",
751
+ ]
752
+
753
+ haralick_labels = ["haralick_" + h + "_" + modality for h in haralick_labels]
754
+ if len(img.shape) == 3:
755
+ img = img[:, :, target_channel]
756
+
757
+ # Routine to skip black frames
758
+ if np.percentile(img.flatten(), 99.9) == 0.0:
759
+ return None
760
+
761
+ img = interpolate_nan(img)
762
+
763
+ # Rescale image and mask
764
+ from scipy.ndimage import zoom
765
+
766
+ img = zoom(img, [scale_factor, scale_factor], order=3).astype(float)
767
+ labels = zoom(labels, [scale_factor, scale_factor], order=0)
768
+
769
+ # Normalize image
770
+ if clip_values is None:
771
+ min_value = np.nanpercentile(img[img != 0.0].flatten(), percentiles[0])
772
+ max_value = np.nanpercentile(img[img != 0.0].flatten(), percentiles[1])
773
+ else:
774
+ min_value = clip_values[0]
775
+ max_value = clip_values[1]
776
+
777
+ img -= min_value
778
+ img /= (max_value - min_value) / n_intensity_bins
779
+ img[img <= 0.0] = 0.0
780
+ img[img >= n_intensity_bins] = n_intensity_bins
781
+
782
+ if return_norm_image_only:
783
+ return img
784
+
785
+ hist, bins = np.histogram(img.flatten(), bins=n_intensity_bins)
786
+ centered_bins = [bins[0]] + [
787
+ bins[i] + (bins[i + 1] - bins[i]) / 2.0 for i in range(len(bins) - 1)
788
+ ]
789
+
790
+ digitized = np.digitize(img, bins)
791
+ img_binned = np.zeros_like(img)
792
+ for i in range(img.shape[0]):
793
+ for j in range(img.shape[1]):
794
+ img_binned[i, j] = centered_bins[digitized[i, j] - 1]
795
+
796
+ img = img_binned.astype(int)
797
+ if return_digit_image_only:
798
+ return img
799
+
800
+ haralick_properties = []
801
+
802
+ from tqdm import tqdm
803
+
804
+ for cell in tqdm(np.unique(labels)[1:], disable=disable_progress_bar):
805
+
806
+ mask = labels == cell
807
+ f = img * mask
808
+ from mahotas.features import haralick
809
+
810
+ features = haralick(
811
+ f, ignore_zeros=ignore_zero, return_mean=return_mean, distance=distance
812
+ )
813
+
814
+ dictionary = {"cell_id": cell}
815
+ for k in range(len(features)):
816
+ dictionary.update({haralick_labels[k]: features[k]})
817
+ haralick_properties.append(dictionary)
818
+
819
+ assert len(haralick_properties) == (
820
+ len(np.unique(labels)) - 1
821
+ ), "Some cells have not been measured..."
822
+
823
+ import pandas as pd
824
+
825
+ return pd.DataFrame(haralick_properties)
826
+
827
+
828
+ def measure_isotropic_intensity(
829
+ positions, # Dataframe of cell positions @ t
830
+ img, # multichannel frame (YXC) @ t
831
+ channels=None, # channels, need labels to name measurements
832
+ intensity_measurement_radii=None, # list of radii, single value is circle, tuple is ring?
833
+ operations=["mean"],
834
+ measurement_kernel=None,
835
+ pbar=None,
836
+ column_labels={
837
+ "track": "TRACK_ID",
838
+ "time": "FRAME",
839
+ "x": "POSITION_X",
840
+ "y": "POSITION_Y",
841
+ },
842
+ verbose=True,
843
+ ):
844
+ """
845
+
846
+ Measure isotropic intensity values around cell positions in an image.
847
+
848
+ Parameters
849
+ ----------
850
+ positions : pandas DataFrame
851
+ DataFrame of cell positions at time 't' containing columns specified in `column_labels`.
852
+ img : numpy array
853
+ Multichannel frame (YXC) at time 't' used for intensity measurement.
854
+ channels : list or str, optional
855
+ List of channel names corresponding to the image channels. Default is None.
856
+ intensity_measurement_radii : int, list, or tuple
857
+ Radius or list of radii specifying the size of the isotropic measurement area.
858
+ If a single value is provided, a circular measurement area is used. If a list or tuple of two values
859
+ is provided, a ring-shaped measurement area is used. Default is None.
860
+ operations : list, optional
861
+ List of operations to perform on the intensity values. Default is ['mean'].
862
+ measurement_kernel : numpy array, optional
863
+ Kernel used for intensity measurement. If None, a circular or ring-shaped kernel is generated
864
+ based on the provided `intensity_measurement_radii`. Default is None.
865
+ pbar : tqdm progress bar, optional
866
+ Progress bar for tracking the measurement process. Default is None.
867
+ column_labels : dict, optional
868
+ Dictionary containing the column labels for the DataFrame. Default is {'track': "TRACK_ID",
869
+ 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
870
+ verbose : bool, optional
871
+ If True, enables verbose output. Default is True.
872
+
873
+ Returns
874
+ -------
875
+ pandas DataFrame
876
+ The updated DataFrame `positions` with additional columns representing the measured intensity values.
877
+
878
+ Notes
879
+ -----
880
+ This function measures the isotropic intensity values around the cell positions specified in the `positions`
881
+ DataFrame using the provided image `img`. The intensity measurements are performed using circular or ring-shaped
882
+ measurement areas defined by the `intensity_measurement_radii`. The measurements are calculated for each channel
883
+ specified in the `channels` list. The resulting intensity values are stored in additional columns of the `positions`
884
+ DataFrame. The `operations` parameter allows specifying different operations to be performed on the intensity
885
+ values, such as 'mean', 'median', etc. The measurement kernel can be customized by providing the `measurement_kernel`
886
+ parameter. If not provided, the measurement kernel is automatically generated based on the `intensity_measurement_radii`.
887
+ The progress bar `pbar` can be used to track the measurement process. The `column_labels` dictionary is used to
888
+ specify the column labels for the DataFrame.
889
+
890
+ Examples
891
+ --------
892
+ >>> positions = pd.DataFrame({'TRACK_ID': [1, 2, 3], 'FRAME': [1, 1, 1],
893
+ ... 'POSITION_X': [10, 20, 30], 'POSITION_Y': [15, 25, 35]})
894
+ >>> img = np.random.rand(100, 100, 3)
895
+ >>> channels = ['channel1', 'channel2', 'channel3']
896
+ >>> intensity_measurement_radii = 5
897
+ >>> positions = measure_isotropic_intensity(positions, img, channels=channels,
898
+ ... intensity_measurement_radii=intensity_measurement_radii)
899
+ # Measure isotropic intensity values around cell positions in the image.
900
+
901
+ """
902
+
903
+ epsilon = -10000
904
+ assert (img.ndim == 2) | (
905
+ img.ndim == 3
906
+ ), f"Invalid image shape to compute the Haralick features. Expected YXC, got {img.shape}..."
907
+
908
+ if img.ndim == 2:
909
+ img = img[:, :, np.newaxis]
910
+ if isinstance(channels, str):
911
+ channels = [channels]
912
+ else:
913
+ if verbose:
914
+ print("Channel name unrecognized...")
915
+ channels = ["intensity"]
916
+ elif img.ndim == 3:
917
+ assert (
918
+ channels is not None
919
+ ), "The image is multichannel. Please provide the list of channel names. Abort."
920
+
921
+ if isinstance(intensity_measurement_radii, int) or isinstance(
922
+ intensity_measurement_radii, float
923
+ ):
924
+ intensity_measurement_radii = [intensity_measurement_radii]
925
+
926
+ if (measurement_kernel is None) * (intensity_measurement_radii is not None):
927
+
928
+ for r in intensity_measurement_radii:
929
+
930
+ if isinstance(r, list):
931
+ mask = create_patch_mask(
932
+ 2 * max(r) + 1,
933
+ 2 * max(r) + 1,
934
+ ((2 * max(r)) // 2, (2 * max(r)) // 2),
935
+ radius=r,
936
+ )
937
+ else:
938
+ mask = create_patch_mask(
939
+ 2 * r + 1, 2 * r + 1, ((2 * r) // 2, (2 * r) // 2), r
940
+ )
941
+
942
+ pad_value_x = mask.shape[0] // 2 + 1
943
+ pad_value_y = mask.shape[1] // 2 + 1
944
+ frame_padded = np.pad(
945
+ img.astype(float),
946
+ [(pad_value_x, pad_value_x), (pad_value_y, pad_value_y), (0, 0)],
947
+ constant_values=[(epsilon, epsilon), (epsilon, epsilon), (0, 0)],
948
+ )
949
+
950
+ # Find a way to measure intensity in mask
951
+ for tid, group in positions.groupby(column_labels["track"]):
952
+
953
+ x = group[column_labels["x"]].to_numpy()[0]
954
+ y = group[column_labels["y"]].to_numpy()[0]
955
+
956
+ xmin = int(x)
957
+ xmax = int(x) + 2 * pad_value_y - 1
958
+ ymin = int(y)
959
+ ymax = int(y) + 2 * pad_value_x - 1
960
+
961
+ assert (
962
+ frame_padded[ymin:ymax, xmin:xmax, 0].shape == mask.shape
963
+ ), "Shape mismatch between the measurement kernel and the image..."
964
+
965
+ expanded_mask = np.expand_dims(mask, axis=-1) # shape: (X, Y, 1)
966
+ crop = frame_padded[ymin:ymax, xmin:xmax]
967
+
968
+ crop_temp = crop.copy()
969
+ crop_temp[crop_temp == epsilon] = 0.0
970
+ projection = np.multiply(crop_temp, expanded_mask)
971
+
972
+ projection[crop == epsilon] = epsilon
973
+ projection[expanded_mask[:, :, 0] == 0.0, :] = epsilon
974
+
975
+ for op in operations:
976
+ func = eval("np." + op)
977
+ intensity_values = func(
978
+ projection, axis=(0, 1), where=projection > epsilon
979
+ )
980
+ for k in range(crop.shape[-1]):
981
+ if isinstance(r, list):
982
+ positions.loc[
983
+ group.index,
984
+ f"{channels[k]}_ring_{min(r)}_{max(r)}_{op}",
985
+ ] = intensity_values[k]
986
+ else:
987
+ positions.loc[
988
+ group.index, f"{channels[k]}_circle_{r}_{op}"
989
+ ] = intensity_values[k]
990
+
991
+ elif measurement_kernel is not None:
992
+ # do something like this
993
+ mask = measurement_kernel
994
+ pad_value_x = mask.shape[0] // 2 + 1
995
+ pad_value_y = mask.shape[1] // 2 + 1
996
+ frame_padded = np.pad(
997
+ img, [(pad_value_x, pad_value_x), (pad_value_y, pad_value_y), (0, 0)]
998
+ )
999
+
1000
+ for tid, group in positions.groupby(column_labels["track"]):
1001
+
1002
+ x = group[column_labels["x"]].to_numpy()[0]
1003
+ y = group[column_labels["y"]].to_numpy()[0]
1004
+
1005
+ xmin = int(x)
1006
+ xmax = int(x) + 2 * pad_value_y - 1
1007
+ ymin = int(y)
1008
+ ymax = int(y) + 2 * pad_value_x - 1
1009
+
1010
+ assert (
1011
+ frame_padded[ymin:ymax, xmin:xmax, 0].shape == mask.shape
1012
+ ), "Shape mismatch between the measurement kernel and the image..."
1013
+
1014
+ expanded_mask = np.expand_dims(mask, axis=-1) # shape: (X, Y, 1)
1015
+ crop = frame_padded[ymin:ymax, xmin:xmax]
1016
+ projection = np.multiply(crop, expanded_mask)
1017
+
1018
+ for op in operations:
1019
+ func = eval("np." + op)
1020
+ intensity_values = func(
1021
+ projection, axis=(0, 1), where=projection == projection
1022
+ )
1023
+ for k in range(crop.shape[-1]):
1024
+ positions.loc[group.index, f"{channels[k]}_custom_kernel_{op}"] = (
1025
+ intensity_values[k]
1026
+ )
1027
+
1028
+ if pbar is not None:
1029
+ pbar.update(1)
1030
+ positions["class_id"] = positions["class_id"].astype(float)
1031
+ return positions
254
1032
 
255
- """
256
- Removes features related to intensity from a list of feature names.
257
-
258
- This function iterates over a list of feature names and removes any feature that includes the term 'intensity' in its name.
259
- The operation is performed in-place, meaning the original list of features is modified directly.
260
-
261
- Parameters
262
- ----------
263
- features : list of str
264
- A list of feature names from which intensity-related features are to be removed.
265
-
266
- Returns
267
- -------
268
- list of str
269
- The modified list of feature names with intensity-related features removed. Note that this operation modifies the
270
- input list in-place, so the return value is the same list object with some elements removed.
271
-
272
- """
273
-
274
- feat2 = features[:]
275
- for f in features:
276
- if 'intensity' in f:
277
- feat2.remove(f)
278
- return feat2
279
-
280
- def measure_features(img, label, features=['area', 'intensity_mean'], channels=None,
281
- border_dist=None, haralick_options=None, verbose=True, normalisation_list=None,
282
- radial_intensity=None,
283
- radial_channel=None, spot_detection=None):
284
- """
285
-
286
- Measure features within segmented regions of an image.
287
-
288
- Parameters
289
- ----------
290
- img : ndarray
291
- The input image as a NumPy array.
292
- label : ndarray
293
- The segmentation labels corresponding to the image regions.
294
- features : list, optional
295
- The list of features to measure within the segmented regions. The default is ['area', 'intensity_mean'].
296
- channels : list, optional
297
- The list of channel names in the image. The default is ["brightfield_channel", "dead_nuclei_channel", "live_nuclei_channel"].
298
- border_dist : int, float, or list, optional
299
- The distance(s) in pixels from the edge of each segmented region to measure features. The default is None.
300
- haralick_options : dict, optional
301
- The options for computing Haralick features. The default is None.
302
-
303
- Returns
304
- -------
305
- df_props : DataFrame
306
- A pandas DataFrame containing the measured features for each segmented region.
307
-
308
- Notes
309
- -----
310
- This function measures features within segmented regions of an image.
311
- It utilizes the regionprops_table function from the skimage.measure module for feature extraction.
312
- The features to measure can be specified using the 'features' parameter.
313
- Optional parameters such as 'channels' and 'border_dist' allow for additional measurements.
314
- If provided, Haralick features can be computed using the 'haralick_options' parameter.
315
- The results are returned as a pandas DataFrame.
316
-
317
- Examples
318
- --------
319
- >>> df_props = measure_features(img, label, features=['area', 'intensity_mean'], channels=["brightfield_channel", "dead_nuclei_channel", "live_nuclei_channel"])
320
- # Measure area and mean intensity within segmented regions of the image.
321
-
322
- """
323
-
324
- if isinstance(features, list):
325
- features = features.copy()
326
-
327
- if features is None:
328
- features = []
329
-
330
- measure_mean_intensities = False
331
- if img is None:
332
- if verbose:
333
- print('No image was provided... Skip intensity measurements.')
334
- border_dist = None;
335
- haralick_options = None;
336
- features = drop_tonal_features(features)
337
-
338
- if 'intensity_mean' in features:
339
- measure_mean_intensities = True
340
- features.remove('intensity_mean')
341
-
342
- # Add label to have identity of mask
343
- if 'label' not in features:
344
- features.append('label')
345
-
346
- if img is not None:
347
- if img.ndim == 2:
348
- img = img[:, :, np.newaxis]
349
- if channels is None:
350
- channels = [f'intensity-{k}' for k in range(img.shape[-1])]
351
- if (channels is not None) * (img.ndim == 3):
352
- assert len(channels) == img.shape[
353
- -1], "Mismatch between the provided channel names and the shape of the image"
354
-
355
- if spot_detection is not None:
356
- for index, channel in enumerate(channels):
357
- if channel == spot_detection['channel']:
358
- ind = index
359
- if "image_preprocessing" not in spot_detection:
360
- spot_detection.update({'image_preprocessing': None})
361
- df_spots = blob_detection(img, label, diameter=spot_detection['diameter'],threshold=spot_detection['threshold'], channel_name=spot_detection['channel'], target_channel=ind, image_preprocessing=spot_detection['image_preprocessing'])
362
-
363
- if normalisation_list:
364
- for norm in normalisation_list:
365
- for index, channel in enumerate(channels):
366
- if channel == norm['target_channel']:
367
- ind = index
368
- if norm['correction_type'] == 'local':
369
- normalised_image = normalise_by_cell(img[:, :, ind].copy(), label,
370
- distance=int(norm['distance']), model=norm['model'],
371
- operation=norm['operation'], clip=norm['clip'])
372
- img[:, :, ind] = normalised_image
373
- else:
374
- corrected_image = field_correction(img[:,:,ind].copy(), threshold_on_std=norm['threshold_on_std'], operation=norm['operation'], model=norm['model'], clip=norm['clip'])
375
- img[:, :, ind] = corrected_image
376
-
377
- try:
378
- import celldetective.extra_properties as extra_props
379
- extraprops = True
380
- except Exception as e:
381
- print(f"The module extra_properties seems corrupted: {e}... Skip...")
382
- extraprops = False
383
-
384
- if extraprops:
385
- extra = getmembers(extra_props, isfunction)
386
- extra = [extra[i][0] for i in range(len(extra))]
387
-
388
- extra_props_list = []
389
- feats = features.copy()
390
- for f in features:
391
- if f in extra:
392
- feats.remove(f)
393
- extra_props_list.append(getattr(extra_props, f))
394
-
395
- # Add intensity nan mean if need to measure mean intensities
396
- if measure_mean_intensities:
397
- extra_props_list.append(getattr(extra_props, 'intensity_nanmean'))
398
-
399
- if len(extra_props_list) == 0:
400
- extra_props_list = None
401
- else:
402
- extra_props_list = tuple(extra_props_list)
403
- else:
404
- extra_props_list = []
405
- feats = features.copy()
406
-
407
- props = regionprops_table(label, intensity_image=img, properties=feats, extra_properties=extra_props_list, channel_names=channels)
408
- df_props = pd.DataFrame(props)
409
- if spot_detection is not None:
410
- if df_spots is not None:
411
- df_props = df_props.merge(df_spots, how='outer', on='label',suffixes=('_delme', ''))
412
- df_props = df_props[[c for c in df_props.columns if not c.endswith('_delme')]]
413
-
414
- if border_dist is not None:
415
- # automatically drop all non intensity features
416
- intensity_features_test = [('intensity' in s and 'centroid' not in s and 'peripheral' not in s) for s in
417
- features]
418
- intensity_features = list(np.array(features)[np.array(intensity_features_test)])
419
- intensity_extra = []
420
- for s in intensity_features:
421
- if s in extra:
422
- intensity_extra.append(getattr(extra_props, s))
423
- intensity_features.remove(s)
424
-
425
- if len(intensity_features) == 0:
426
- if verbose:
427
- print('No intensity feature was passed... Adding mean intensity for edge measurement...')
428
- intensity_features = np.append(intensity_features, 'intensity_mean')
429
- intensity_features = list(np.append(intensity_features, 'label'))
430
-
431
- new_intensity_features = intensity_features.copy()
432
- for int_feat in intensity_features:
433
- if int_feat in extra:
434
- new_intensity_features.remove(int_feat)
435
- intensity_features = new_intensity_features
436
-
437
- if (isinstance(border_dist, int) or isinstance(border_dist, float)):
438
- border_label = contour_of_instance_segmentation(label, border_dist)
439
- props_border = regionprops_table(border_label, intensity_image=img, properties=intensity_features, channel_names=channels)
440
- df_props_border = pd.DataFrame(props_border)
441
- for c in df_props_border.columns:
442
- if 'intensity' in c:
443
- df_props_border = df_props_border.rename({c: c+f'_edge_{border_dist}px'},axis=1)
444
-
445
- if isinstance(border_dist, list):
446
- df_props_border_list = []
447
- for d in border_dist:
448
- border_label = contour_of_instance_segmentation(label, d)
449
- props_border = regionprops_table(border_label, intensity_image=img, properties=intensity_features, channel_names=channels)
450
- df_props_border_d = pd.DataFrame(props_border)
451
- for c in df_props_border_d.columns:
452
- if 'intensity' in c:
453
- if '-' in str(d):
454
- df_props_border_d = df_props_border_d.rename({c: c + f'_outer_edge_{d}px'}, axis=1)
455
- else:
456
- df_props_border_d = df_props_border_d.rename({c: c + f'_edge_{d}px'}, axis=1)
457
- df_props_border_list.append(df_props_border_d)
458
-
459
- df_props_border = reduce(lambda left,right: pd.merge(left,right,on=['label'],
460
- how='outer'), df_props_border_list)
461
-
462
- df_props = df_props.merge(df_props_border, how='outer', on='label')
463
-
464
- if haralick_options is not None:
465
- try:
466
- df_haralick = compute_haralick_features(img, label, channels=channels, **haralick_options)
467
- if df_haralick is not None:
468
- df_haralick = df_haralick.rename(columns={"cell_id": "label"})
469
- df_props = df_props.merge(df_haralick, how='outer', on='label', suffixes=('_delme', ''))
470
- df_props = df_props[[c for c in df_props.columns if not c.endswith('_delme')]]
471
- except Exception as e:
472
- print(e)
473
- pass
474
-
475
- if channels is not None:
476
- df_props = rename_intensity_column(df_props, channels)
477
- df_props.rename(columns={"label": "class_id"},inplace=True)
478
- df_props['class_id'] = df_props['class_id'].astype(float)
479
-
480
- return df_props
481
-
482
- def compute_haralick_features(img, labels, channels=None, target_channel=0, scale_factor=1, percentiles=(0.01,99.99), clip_values=None,
483
- n_intensity_bins=256, ignore_zero=True, return_mean=True, return_mean_ptp=False, distance=1, disable_progress_bar=False, return_norm_image_only=False, return_digit_image_only=False):
484
-
485
- """
486
-
487
- Compute Haralick texture features on each segmented region of an image.
488
-
489
- Parameters
490
- ----------
491
- img : ndarray
492
- The input image as a NumPy array.
493
- labels : ndarray
494
- The segmentation labels corresponding to the image regions.
495
- target_channel : int, optional
496
- The target channel index of the image. The default is 0.
497
- modality : str, optional
498
- The modality or channel type of the image. The default is 'brightfield_channel'.
499
- scale_factor : float, optional
500
- The scale factor for resampling the image and labels. The default is 1.
501
- percentiles : tuple of float, optional
502
- The percentiles to use for image normalization. The default is (0.01, 99.99).
503
- clip_values : tuple of float, optional
504
- The minimum and maximum values to clip the image. If None, percentiles are used. The default is None.
505
- n_intensity_bins : int, optional
506
- The number of intensity bins for image normalization. The default is 255.
507
- ignore_zero : bool, optional
508
- Flag indicating whether to ignore zero values during feature computation. The default is True.
509
- return_mean : bool, optional
510
- Flag indicating whether to return the mean value of each Haralick feature. The default is True.
511
- return_mean_ptp : bool, optional
512
- Flag indicating whether to return the mean and peak-to-peak values of each Haralick feature. The default is False.
513
- distance : int, optional
514
- The distance parameter for Haralick feature computation. The default is 1.
515
-
516
- Returns
517
- -------
518
- features : DataFrame
519
- A pandas DataFrame containing the computed Haralick features for each segmented region.
520
-
521
- Notes
522
- -----
523
- This function computes Haralick features on an image within segmented regions.
524
- It uses the mahotas library for feature extraction and pandas DataFrame for storage.
525
- The image is rescaled, normalized and digitized based on the specified parameters.
526
- Haralick features are computed for each segmented region, and the results are returned as a DataFrame.
527
-
528
- Examples
529
- --------
530
- >>> features = compute_haralick_features(img, labels, target_channel=0, modality="brightfield_channel")
531
- # Compute Haralick features on the image within segmented regions.
532
-
533
- """
534
-
535
- assert ((img.ndim==2)|(img.ndim==3)),f'Invalid image shape to compute the Haralick features. Expected YXC, got {img.shape}...'
536
- assert img.shape[:2]==labels.shape,f'Mismatch between image shape {img.shape} and labels shape {labels.shape}'
537
-
538
- if img.ndim==2:
539
- img = img[:,:,np.newaxis]
540
- target_channel = 0
541
- if isinstance(channels, list):
542
- modality = channels[0]
543
- elif isinstance(channels, str):
544
- modality = channels
545
- else:
546
- print('Channel name unrecognized...')
547
- modality=''
548
- elif img.ndim==3:
549
- assert target_channel is not None,"The image is multichannel. Please provide a target channel to compute the Haralick features. Abort."
550
- modality = channels[target_channel]
551
-
552
- haralick_labels = ["angular_second_moment",
553
- "contrast",
554
- "correlation",
555
- "sum_of_square_variance",
556
- "inverse_difference_moment",
557
- "sum_average",
558
- "sum_variance",
559
- "sum_entropy",
560
- "entropy",
561
- "difference_variance",
562
- "difference_entropy",
563
- "information_measure_of_correlation_1",
564
- "information_measure_of_correlation_2",
565
- "maximal_correlation_coefficient"]
566
-
567
- haralick_labels = ['haralick_'+h+"_"+modality for h in haralick_labels]
568
- if len(img.shape)==3:
569
- img = img[:,:,target_channel]
570
-
571
- # Routine to skip black frames
572
- if np.percentile(img.flatten(),99.9)==0.0:
573
- return None
574
-
575
- img = interpolate_nan(img)
576
-
577
- # Rescale image and mask
578
- img = zoom(img,[scale_factor,scale_factor],order=3).astype(float)
579
- labels = zoom(labels, [scale_factor,scale_factor],order=0)
580
-
581
- # Normalize image
582
- if clip_values is None:
583
- min_value = np.nanpercentile(img[img!=0.].flatten(), percentiles[0])
584
- max_value = np.nanpercentile(img[img!=0.].flatten(), percentiles[1])
585
- else:
586
- min_value = clip_values[0]; max_value = clip_values[1]
587
-
588
- img -= min_value
589
- img /= (max_value-min_value) / n_intensity_bins
590
- img[img<=0.] = 0.
591
- img[img>=n_intensity_bins] = n_intensity_bins
592
-
593
- if return_norm_image_only:
594
- return img
595
-
596
- hist,bins = np.histogram(img.flatten(),bins=n_intensity_bins)
597
- centered_bins = [bins[0]] + [bins[i] + (bins[i+1] - bins[i])/2. for i in range(len(bins)-1)]
598
-
599
- digitized = np.digitize(img, bins)
600
- img_binned = np.zeros_like(img)
601
- for i in range(img.shape[0]):
602
- for j in range(img.shape[1]):
603
- img_binned[i,j] = centered_bins[digitized[i,j] - 1]
604
-
605
- img = img_binned.astype(int)
606
- if return_digit_image_only:
607
- return img
608
-
609
- haralick_properties = []
610
-
611
- for cell in tqdm(np.unique(labels)[1:],disable=disable_progress_bar):
612
-
613
- mask = labels==cell
614
- f = img*mask
615
- features = haralick(f, ignore_zeros=ignore_zero,return_mean=return_mean,distance=distance)
616
-
617
- dictionary = {'cell_id': cell}
618
- for k in range(len(features)):
619
- dictionary.update({haralick_labels[k]: features[k]})
620
- haralick_properties.append(dictionary)
621
-
622
- assert len(haralick_properties)==(len(np.unique(labels))-1),'Some cells have not been measured...'
623
-
624
- return pd.DataFrame(haralick_properties)
625
-
626
-
627
- def measure_isotropic_intensity(positions, # Dataframe of cell positions @ t
628
- img, # multichannel frame (YXC) @ t
629
- channels=None, #channels, need labels to name measurements
630
- intensity_measurement_radii=None, #list of radii, single value is circle, tuple is ring?
631
- operations = ['mean'],
632
- measurement_kernel = None,
633
- pbar=None,
634
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
635
- verbose=True,
636
- ):
637
-
638
- """
639
-
640
- Measure isotropic intensity values around cell positions in an image.
641
-
642
- Parameters
643
- ----------
644
- positions : pandas DataFrame
645
- DataFrame of cell positions at time 't' containing columns specified in `column_labels`.
646
- img : numpy array
647
- Multichannel frame (YXC) at time 't' used for intensity measurement.
648
- channels : list or str, optional
649
- List of channel names corresponding to the image channels. Default is None.
650
- intensity_measurement_radii : int, list, or tuple
651
- Radius or list of radii specifying the size of the isotropic measurement area.
652
- If a single value is provided, a circular measurement area is used. If a list or tuple of two values
653
- is provided, a ring-shaped measurement area is used. Default is None.
654
- operations : list, optional
655
- List of operations to perform on the intensity values. Default is ['mean'].
656
- measurement_kernel : numpy array, optional
657
- Kernel used for intensity measurement. If None, a circular or ring-shaped kernel is generated
658
- based on the provided `intensity_measurement_radii`. Default is None.
659
- pbar : tqdm progress bar, optional
660
- Progress bar for tracking the measurement process. Default is None.
661
- column_labels : dict, optional
662
- Dictionary containing the column labels for the DataFrame. Default is {'track': "TRACK_ID",
663
- 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}.
664
- verbose : bool, optional
665
- If True, enables verbose output. Default is True.
666
-
667
- Returns
668
- -------
669
- pandas DataFrame
670
- The updated DataFrame `positions` with additional columns representing the measured intensity values.
671
-
672
- Notes
673
- -----
674
- This function measures the isotropic intensity values around the cell positions specified in the `positions`
675
- DataFrame using the provided image `img`. The intensity measurements are performed using circular or ring-shaped
676
- measurement areas defined by the `intensity_measurement_radii`. The measurements are calculated for each channel
677
- specified in the `channels` list. The resulting intensity values are stored in additional columns of the `positions`
678
- DataFrame. The `operations` parameter allows specifying different operations to be performed on the intensity
679
- values, such as 'mean', 'median', etc. The measurement kernel can be customized by providing the `measurement_kernel`
680
- parameter. If not provided, the measurement kernel is automatically generated based on the `intensity_measurement_radii`.
681
- The progress bar `pbar` can be used to track the measurement process. The `column_labels` dictionary is used to
682
- specify the column labels for the DataFrame.
683
-
684
- Examples
685
- --------
686
- >>> positions = pd.DataFrame({'TRACK_ID': [1, 2, 3], 'FRAME': [1, 1, 1],
687
- ... 'POSITION_X': [10, 20, 30], 'POSITION_Y': [15, 25, 35]})
688
- >>> img = np.random.rand(100, 100, 3)
689
- >>> channels = ['channel1', 'channel2', 'channel3']
690
- >>> intensity_measurement_radii = 5
691
- >>> positions = measure_isotropic_intensity(positions, img, channels=channels,
692
- ... intensity_measurement_radii=intensity_measurement_radii)
693
- # Measure isotropic intensity values around cell positions in the image.
694
-
695
- """
696
-
697
- epsilon = -10000
698
- assert ((img.ndim==2)|(img.ndim==3)),f'Invalid image shape to compute the Haralick features. Expected YXC, got {img.shape}...'
699
-
700
- if img.ndim==2:
701
- img = img[:,:,np.newaxis]
702
- if isinstance(channels, str):
703
- channels = [channels]
704
- else:
705
- if verbose:
706
- print('Channel name unrecognized...')
707
- channels=['intensity']
708
- elif img.ndim==3:
709
- assert channels is not None,"The image is multichannel. Please provide the list of channel names. Abort."
710
-
711
- if isinstance(intensity_measurement_radii, int) or isinstance(intensity_measurement_radii, float):
712
- intensity_measurement_radii = [intensity_measurement_radii]
713
-
714
- if (measurement_kernel is None)*(intensity_measurement_radii is not None):
715
-
716
- for r in intensity_measurement_radii:
717
-
718
- if isinstance(r,list):
719
- mask = create_patch_mask(2*max(r)+1,2*max(r)+1,((2*max(r))//2,(2*max(r))//2),radius=r)
720
- else:
721
- mask = create_patch_mask(2*r+1,2*r+1,((2*r)//2,(2*r)//2),r)
722
-
723
- pad_value_x = mask.shape[0]//2 + 1
724
- pad_value_y = mask.shape[1]//2 + 1
725
- frame_padded = np.pad(img.astype(float), [(pad_value_x,pad_value_x),(pad_value_y,pad_value_y),(0,0)], constant_values=[(epsilon,epsilon),(epsilon,epsilon),(0,0)])
726
-
727
- # Find a way to measure intensity in mask
728
- for tid,group in positions.groupby(column_labels['track']):
729
-
730
- x = group[column_labels['x']].to_numpy()[0]
731
- y = group[column_labels['y']].to_numpy()[0]
732
-
733
- xmin = int(x)
734
- xmax = int(x) + 2*pad_value_y - 1
735
- ymin = int(y)
736
- ymax = int(y) + 2*pad_value_x - 1
737
-
738
- assert frame_padded[ymin:ymax,xmin:xmax,0].shape == mask.shape,"Shape mismatch between the measurement kernel and the image..."
739
-
740
- expanded_mask = np.expand_dims(mask, axis=-1) # shape: (X, Y, 1)
741
- crop = frame_padded[ymin:ymax,xmin:xmax]
742
-
743
- crop_temp = crop.copy()
744
- crop_temp[crop_temp==epsilon] = 0.
745
- projection = np.multiply(crop_temp, expanded_mask)
746
-
747
- projection[crop==epsilon] = epsilon
748
- projection[expanded_mask[:,:,0]==0.,:] = epsilon
749
-
750
- for op in operations:
751
- func = eval('np.'+op)
752
- intensity_values = func(projection, axis=(0,1), where=projection>epsilon)
753
- for k in range(crop.shape[-1]):
754
- if isinstance(r,list):
755
- positions.loc[group.index, f'{channels[k]}_ring_{min(r)}_{max(r)}_{op}'] = intensity_values[k]
756
- else:
757
- positions.loc[group.index, f'{channels[k]}_circle_{r}_{op}'] = intensity_values[k]
758
-
759
- elif (measurement_kernel is not None):
760
- # do something like this
761
- mask = measurement_kernel
762
- pad_value_x = mask.shape[0]//2 + 1
763
- pad_value_y = mask.shape[1]//2 + 1
764
- frame_padded = np.pad(img, [(pad_value_x,pad_value_x),(pad_value_y,pad_value_y),(0,0)])
765
-
766
- for tid,group in positions.groupby(column_labels['track']):
767
-
768
- x = group[column_labels['x']].to_numpy()[0]
769
- y = group[column_labels['y']].to_numpy()[0]
770
-
771
- xmin = int(x)
772
- xmax = int(x) + 2*pad_value_y - 1
773
- ymin = int(y)
774
- ymax = int(y) + 2*pad_value_x - 1
775
-
776
- assert frame_padded[ymin:ymax,xmin:xmax,0].shape == mask.shape,"Shape mismatch between the measurement kernel and the image..."
777
-
778
- expanded_mask = np.expand_dims(mask, axis=-1) # shape: (X, Y, 1)
779
- crop = frame_padded[ymin:ymax,xmin:xmax]
780
- projection = np.multiply(crop, expanded_mask)
781
-
782
- for op in operations:
783
- func = eval('np.'+op)
784
- intensity_values = func(projection, axis=(0,1), where=projection==projection)
785
- for k in range(crop.shape[-1]):
786
- positions.loc[group.index, f'{channels[k]}_custom_kernel_{op}'] = intensity_values[k]
787
-
788
- if pbar is not None:
789
- pbar.update(1)
790
- positions['class_id'] = positions['class_id'].astype(float)
791
- return positions
792
1033
 
793
1034
  def measure_at_position(pos, mode, return_measurements=False, threads=1):
794
-
795
- """
796
- Executes a measurement script at a specified position directory, optionally returning the measured data.
797
-
798
- This function calls an external Python script to perform measurements on data
799
- located in a specified position directory. The measurement mode determines the type of analysis performed by the script.
800
- The function can either return the path to the resulting measurements table or load and return the measurements as a
801
- pandas DataFrame.
802
-
803
- Parameters
804
- ----------
805
- pos : str
806
- The path to the position directory where the measurements should be performed. The path should be a valid directory.
807
- mode : str
808
- The measurement mode to be used by the script. This determines the type of analysis performed (e.g., 'tracking',
809
- 'feature_extraction').
810
- return_measurements : bool, optional
811
- If True, the function loads the resulting measurements from a CSV file into a pandas DataFrame and returns it. If
812
- False, the function returns None (default is False).
813
-
814
- Returns
815
- -------
816
- pandas.DataFrame or None
817
- If `return_measurements` is True, returns a pandas DataFrame containing the measurements. Otherwise, returns None.
818
-
819
- """
820
-
821
- pos = pos.replace('\\','/')
822
- pos = rf"{pos}"
823
- assert os.path.exists(pos),f'Position {pos} is not a valid path.'
824
- if not pos.endswith('/'):
825
- pos += '/'
826
- script_path = os.sep.join([abs_path, 'scripts', 'measure_cells.py'])
827
- cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
828
- subprocess.call(cmd, shell=True)
829
-
830
- table = pos + os.sep.join(["output","tables",f"trajectories_{mode}.csv"])
831
- if return_measurements:
832
- df = pd.read_csv(table)
833
- return df
834
- else:
835
- return None
836
-
837
-
838
- def local_normalisation(image, labels, background_intensity, measurement='intensity_median', operation='subtract', clip=False):
839
-
840
-
841
- for index, cell in enumerate(np.unique(labels)):
842
- if cell == 0:
843
- continue
844
- if operation == 'subtract':
845
- image[np.where(labels == cell)] = image[np.where(labels == cell)].astype(float) - \
846
- background_intensity[measurement][index-1].astype(float)
847
- elif operation == 'divide':
848
- image[np.where(labels == cell)] = image[np.where(labels == cell)].astype(float) / \
849
- background_intensity[measurement][index-1].astype(float)
850
- if clip:
851
- image[image<=0.] = 0.
852
-
853
- return image.astype(float)
854
-
855
-
856
- def normalise_by_cell(image, labels, distance=5, model='median', operation='subtract', clip=False):
857
-
858
- try:
859
- import celldetective.extra_properties as extra_props
860
- extraprops = True
861
- except Exception as e:
862
- print(f"The module extra_properties seems corrupted: {e}... Skip...")
863
- extraprops = False
864
-
865
- border = contour_of_instance_segmentation(label=labels, distance=distance * (-1))
866
- if model == 'mean':
867
-
868
- measurement = 'intensity_nanmean'
869
- if extraprops:
870
- extra_props = [getattr(extra_props, measurement)]
871
- else:
872
- extra_props = []
873
-
874
- background_intensity = regionprops_table(intensity_image=image, label_image=border,
875
- extra_properties=extra_props)
876
- elif model == 'median':
877
-
878
- measurement = 'intensity_median'
879
- if extraprops:
880
- extra_props = [getattr(extra_props, measurement)]
881
- else:
882
- extra_props = []
883
-
884
- background_intensity = regionprops_table(intensity_image=image, label_image=border,
885
- extra_properties=extra_props)
886
-
887
- normalised_frame = local_normalisation(image=image.astype(float).copy(),
888
- labels=labels, background_intensity=background_intensity, measurement=measurement,
889
- operation=operation, clip=clip)
890
-
891
- return normalised_frame
892
-
893
-
894
- def extract_blobs_in_image(image, label, diameter, threshold=0., method="log", image_preprocessing=None):
895
-
896
- if np.percentile(image.flatten(),99.9)==0.0:
897
- return None
898
-
899
- if isinstance(image_preprocessing, (list, np.ndarray)):
900
- image = filter_image(image.copy(),filters=image_preprocessing) # apply prefiltering to images before spot detection
901
-
902
- dilated_image = ndimage.grey_dilation(label, footprint=disk(int(1.2*diameter))) # dilation larger than spot diameter to be safe
903
-
904
- masked_image = image.copy()
905
- masked_image[np.where((dilated_image == 0)|(image!=image))] = 0
906
- min_sigma = (1 / (1 + math.sqrt(2))) * diameter
907
- max_sigma = math.sqrt(2) * min_sigma
908
- if method=="dog":
909
- blobs = blob_dog(masked_image, threshold=threshold, min_sigma=min_sigma, max_sigma=max_sigma, overlap=0.75)
910
- elif method=="log":
911
- blobs = blob_log(masked_image, threshold=threshold, min_sigma=min_sigma, max_sigma=max_sigma, overlap=0.75)
912
-
913
- # Exclude spots outside of cell masks
914
- mask = np.array([label[int(y), int(x)] != 0 for y, x, _ in blobs])
915
- if np.any(mask):
916
- blobs_filtered = blobs[mask]
917
- else:
918
- blobs_filtered=[]
919
-
920
- return blobs_filtered
921
-
922
-
923
- def blob_detection(image, label, diameter, threshold=0., channel_name=None, target_channel=0, method="log", image_preprocessing=None):
924
-
925
-
926
- image = image[:, :, target_channel].copy()
927
- if np.percentile(image.flatten(),99.9)==0.0:
928
- return None
929
-
930
- detections = []
931
- blobs_filtered = extract_blobs_in_image(image, label, diameter, method=method, threshold=threshold, image_preprocessing=image_preprocessing)
932
-
933
- for lbl in np.unique(label):
934
- if lbl>0:
935
-
936
- blob_selection = np.array([label[int(y), int(x)] == lbl for y, x, _ in blobs_filtered])
937
- if np.any(blob_selection):
938
- # if any spot
939
- blobs_in_cell = blobs_filtered[blob_selection]
940
- n_spots = len(blobs_in_cell)
941
- binary_blobs = np.zeros_like(label)
942
- for blob in blobs_in_cell:
943
- y, x, sig = blob
944
- r = np.sqrt(2)*sig
945
- rr, cc = dsk((y, x), r, shape=binary_blobs.shape)
946
- binary_blobs[rr, cc] = 1
947
- intensity_mean = np.nanmean(image[binary_blobs==1].flatten())
948
- else:
949
- n_spots = 0
950
- intensity_mean = np.nan
951
- detections.append({'label': lbl, f'{channel_name}_spot_count': n_spots, f'{channel_name}_mean_spot_intensity': intensity_mean})
952
- detections = pd.DataFrame(detections)
953
-
954
- return detections
955
-
956
-
957
- def estimate_time(df, class_attr, model='step_function', class_of_interest=[2], r2_threshold=0.5):
958
-
959
- """
960
- Estimate the timing of an event for cells based on classification status and fit a model to the observed status signal.
961
-
962
- Parameters
963
- ----------
964
- df : pandas.DataFrame
965
- DataFrame containing tracked data with classification and status columns.
966
- class_attr : str
967
- Column name for the classification attribute (e.g., 'class_event').
968
- model : str, optional
969
- Name of the model function used to fit the status signal (default is 'step_function').
970
- class_of_interest : list, optional
971
- List of class values that define the cells of interest for analysis (default is [2]).
972
- r2_threshold : float, optional
973
- R-squared threshold for determining if the model fit is acceptable (default is 0.5).
974
-
975
- Returns
976
- -------
977
- pandas.DataFrame
978
- Updated DataFrame with estimated event timing added in a column replacing 'class' with 't',
979
- and reclassification of cells based on the model fit.
980
-
981
- Notes
982
- -----
983
- - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by time ('FRAME').
984
- - If the model provides a poor fit (R² < r2_threshold), the class of interest is set to 2.0 and timing (-1).
985
- - The function supports different models that can be passed as the `model` parameter, which are evaluated using `eval()`.
986
-
987
- Example
988
- -------
989
- >>> df = estimate_time(df, 'class', model='step_function', class_of_interest=[2], r2_threshold=0.6)
990
-
991
- """
992
-
993
- cols = list(df.columns)
994
- assert 'TRACK_ID' in cols,'Please provide tracked data...'
995
- if 'position' in cols:
996
- sort_cols = ['position', 'TRACK_ID']
997
- else:
998
- sort_cols = ['TRACK_ID']
999
-
1000
- df = df.sort_values(by=sort_cols,ignore_index=True)
1001
- df = df.reset_index(drop=True)
1002
- max_time = df['FRAME'].max()
1003
-
1004
-
1005
- for tid,group in df.loc[df[class_attr].isin(class_of_interest)].groupby(sort_cols):
1006
-
1007
- indices = group.index
1008
- status_col = class_attr.replace('class','status')
1009
-
1010
- group_clean = group.dropna(subset=status_col)
1011
- status_signal = group_clean[status_col].values
1012
- if np.all(np.array(status_signal)==1):
1013
- continue
1014
-
1015
- timeline = group_clean['FRAME'].values
1016
- frames = group_clean['FRAME'].to_numpy()
1017
- status_values = group_clean[status_col].to_numpy()
1018
- t_first = group['t_firstdetection'].to_numpy()[0]
1019
-
1020
- try:
1021
- popt, pcov = curve_fit(eval(model), timeline.astype(int), status_signal, p0=[max(timeline)//2, 0.8],maxfev=100000)
1022
- values = [eval(model)(t, *popt) for t in timeline]
1023
- r2 = r2_score(status_signal,values)
1024
- except Exception:
1025
- df.loc[indices, class_attr] = 2.0
1026
- df.loc[indices, class_attr.replace('class','t')] = -1
1027
- continue
1028
-
1029
- if r2 > float(r2_threshold):
1030
- t0 = popt[0]
1031
- if t0>=max_time:
1032
- t0 = max_time - 1
1033
- df.loc[indices, class_attr.replace('class','t')] = t0
1034
- df.loc[indices, class_attr] = 0.0
1035
- else:
1036
- df.loc[indices, class_attr.replace('class','t')] = -1
1037
- df.loc[indices, class_attr] = 2.0
1038
-
1039
- return df
1040
-
1041
-
1042
- def interpret_track_classification(df, class_attr, irreversible_event=False, unique_state=False, transient_event=False, r2_threshold=0.5, percentile_recovery=50, pre_event=None):
1043
-
1044
- """
1045
- Interpret and classify tracked cells based on their status signals.
1046
-
1047
- Parameters
1048
- ----------
1049
- df : pandas.DataFrame
1050
- DataFrame containing tracked cell data, including a classification attribute column and other necessary columns.
1051
- class_attr : str
1052
- Column name for the classification attribute (e.g., 'class') used to determine the state of cells.
1053
- irreversible_event : bool, optional
1054
- If True, classifies irreversible events in the dataset (default is False).
1055
- When set to True, `unique_state` is ignored.
1056
- unique_state : bool, optional
1057
- If True, classifies unique states of cells in the dataset based on a percentile threshold (default is False).
1058
- This option is ignored if `irreversible_event` is set to True.
1059
- r2_threshold : float, optional
1060
- R-squared threshold used when fitting the model during the classification of irreversible events (default is 0.5).
1061
-
1062
- Returns
1063
- -------
1064
- pandas.DataFrame
1065
- DataFrame with updated classifications for cell trajectories:
1066
- - If `irreversible_event` is True, it classifies irreversible events using the `classify_irreversible_events` function.
1067
- - If `unique_state` is True, it classifies unique states using the `classify_unique_states` function.
1068
-
1069
- Raises
1070
- ------
1071
- AssertionError
1072
- If the 'TRACK_ID' column is missing in the input DataFrame.
1073
-
1074
- Notes
1075
- -----
1076
- - The function assumes that the input DataFrame contains a column for tracking cells (`TRACK_ID`) and possibly a 'position' column.
1077
- - The classification behavior depends on the `irreversible_event` and `unique_state` flags:
1078
- - When `irreversible_event` is True, the function classifies events that are considered irreversible.
1079
- - When `unique_state` is True (and `irreversible_event` is False), it classifies unique states using a 50th percentile threshold.
1080
-
1081
- Example
1082
- -------
1083
- >>> df = interpret_track_classification(df, 'class', irreversible_event=True, r2_threshold=0.7)
1084
-
1085
- """
1086
-
1087
- cols = list(df.columns)
1088
-
1089
- assert 'TRACK_ID' in cols,'Please provide tracked data...'
1090
- if 'position' in cols:
1091
- sort_cols = ['position', 'TRACK_ID']
1092
- else:
1093
- sort_cols = ['TRACK_ID']
1094
- if class_attr.replace('class','status') not in cols:
1095
- df.loc[:,class_attr.replace('class','status')] = df.loc[:,class_attr]
1096
-
1097
- if irreversible_event:
1098
- unique_state = False
1099
-
1100
- if irreversible_event:
1101
-
1102
- df = classify_irreversible_events(df, class_attr, r2_threshold=r2_threshold, percentile_recovery=percentile_recovery, pre_event=pre_event)
1103
-
1104
- elif unique_state:
1105
-
1106
- df = classify_unique_states(df, class_attr, percentile=50, pre_event=pre_event)
1107
-
1108
- elif transient_event:
1109
-
1110
- df = classify_transient_events(df, class_attr, pre_event=pre_event)
1111
-
1112
- return df
1035
+ """
1036
+ Executes a measurement script at a specified position directory, optionally returning the measured data.
1037
+
1038
+ This function calls an external Python script to perform measurements on data
1039
+ located in a specified position directory. The measurement mode determines the type of analysis performed by the script.
1040
+ The function can either return the path to the resulting measurements table or load and return the measurements as a
1041
+ pandas DataFrame.
1042
+
1043
+ Parameters
1044
+ ----------
1045
+ pos : str
1046
+ The path to the position directory where the measurements should be performed. The path should be a valid directory.
1047
+ mode : str
1048
+ The measurement mode to be used by the script. This determines the type of analysis performed (e.g., 'tracking',
1049
+ 'feature_extraction').
1050
+ return_measurements : bool, optional
1051
+ If True, the function loads the resulting measurements from a CSV file into a pandas DataFrame and returns it. If
1052
+ False, the function returns None (default is False).
1053
+
1054
+ Returns
1055
+ -------
1056
+ pandas.DataFrame or None
1057
+ If `return_measurements` is True, returns a pandas DataFrame containing the measurements. Otherwise, returns None.
1058
+
1059
+ """
1060
+
1061
+ pos = pos.replace("\\", "/")
1062
+ pos = rf"{pos}"
1063
+ assert os.path.exists(pos), f"Position {pos} is not a valid path."
1064
+ if not pos.endswith("/"):
1065
+ pos += "/"
1066
+ script_path = os.sep.join([abs_path, "scripts", "measure_cells.py"])
1067
+ cmd = f'python "{script_path}" --pos "{pos}" --mode "{mode}" --threads "{threads}"'
1068
+ subprocess.call(cmd, shell=True)
1069
+
1070
+ table = pos + os.sep.join(["output", "tables", f"trajectories_{mode}.csv"])
1071
+ if return_measurements:
1072
+ import pandas as pd
1073
+
1074
+ df = pd.read_csv(table)
1075
+ return df
1076
+ else:
1077
+ return None
1078
+
1079
+
1080
+ def local_normalisation(
1081
+ image,
1082
+ labels,
1083
+ background_intensity,
1084
+ measurement="intensity_median",
1085
+ operation="subtract",
1086
+ clip=False,
1087
+ ):
1088
+
1089
+ for index, cell in enumerate(np.unique(labels)):
1090
+ if cell == 0:
1091
+ continue
1092
+ if operation == "subtract":
1093
+ image[np.where(labels == cell)] = image[np.where(labels == cell)].astype(
1094
+ float
1095
+ ) - background_intensity[measurement][index - 1].astype(float)
1096
+ elif operation == "divide":
1097
+ image[np.where(labels == cell)] = image[np.where(labels == cell)].astype(
1098
+ float
1099
+ ) / background_intensity[measurement][index - 1].astype(float)
1100
+ if clip:
1101
+ image[image <= 0.0] = 0.0
1102
+
1103
+ return image.astype(float)
1104
+
1105
+
1106
+ def normalise_by_cell(
1107
+ image, labels, distance=5, model="median", operation="subtract", clip=False
1108
+ ):
1109
+
1110
+ try:
1111
+ import celldetective.extra_properties as extra_props
1112
+
1113
+ extraprops = True
1114
+ except Exception as e:
1115
+ print(f"The module extra_properties seems corrupted: {e}... Skip...")
1116
+ extraprops = False
1117
+
1118
+ border = contour_of_instance_segmentation(label=labels, distance=distance * (-1))
1119
+ if model == "mean":
1120
+
1121
+ measurement = "intensity_nanmean"
1122
+ if extraprops:
1123
+ extra_props = [getattr(extra_props, measurement)]
1124
+ else:
1125
+ extra_props = []
1126
+
1127
+ from celldetective.regionprops import regionprops_table
1128
+
1129
+ background_intensity = regionprops_table(
1130
+ intensity_image=image, label_image=border, extra_properties=extra_props
1131
+ )
1132
+ elif model == "median":
1133
+
1134
+ measurement = "intensity_median"
1135
+ if extraprops:
1136
+ extra_props = [getattr(extra_props, measurement)]
1137
+ else:
1138
+ extra_props = []
1139
+
1140
+ from celldetective.regionprops import regionprops_table
1141
+
1142
+ background_intensity = regionprops_table(
1143
+ intensity_image=image, label_image=border, extra_properties=extra_props
1144
+ )
1145
+
1146
+ normalised_frame = local_normalisation(
1147
+ image=image.astype(float).copy(),
1148
+ labels=labels,
1149
+ background_intensity=background_intensity,
1150
+ measurement=measurement,
1151
+ operation=operation,
1152
+ clip=clip,
1153
+ )
1154
+
1155
+ return normalised_frame
1156
+
1157
+
1158
+ def extract_blobs_in_image(
1159
+ image, label, diameter, threshold=0.0, method="log", image_preprocessing=None
1160
+ ):
1161
+
1162
+ if np.percentile(image.flatten(), 99.9) == 0.0:
1163
+ return None
1164
+
1165
+ if isinstance(image_preprocessing, (list, np.ndarray)):
1166
+ from celldetective.filters import filter_image
1167
+
1168
+ image = filter_image(
1169
+ image.copy(), filters=image_preprocessing
1170
+ ) # apply prefiltering to images before spot detection
1171
+
1172
+ from scipy import ndimage
1173
+ from skimage.morphology import disk
1174
+
1175
+ dilated_image = ndimage.grey_dilation(
1176
+ label, footprint=disk(int(1.2 * diameter))
1177
+ ) # dilation larger than spot diameter to be safe
1178
+
1179
+ masked_image = image.copy()
1180
+ masked_image[np.where((dilated_image == 0) | (image != image))] = 0
1181
+ min_sigma = (1 / (1 + math.sqrt(2))) * diameter
1182
+ max_sigma = math.sqrt(2) * min_sigma
1183
+ if method == "dog":
1184
+ from skimage.feature import blob_dog
1185
+
1186
+ blobs = blob_dog(
1187
+ masked_image,
1188
+ threshold=threshold,
1189
+ min_sigma=min_sigma,
1190
+ max_sigma=max_sigma,
1191
+ overlap=0.75,
1192
+ )
1193
+ elif method == "log":
1194
+ from skimage.feature import blob_log
1195
+
1196
+ blobs = blob_log(
1197
+ masked_image,
1198
+ threshold=threshold,
1199
+ min_sigma=min_sigma,
1200
+ max_sigma=max_sigma,
1201
+ overlap=0.75,
1202
+ )
1203
+
1204
+ # Exclude spots outside of cell masks
1205
+ mask = np.array([label[int(y), int(x)] != 0 for y, x, _ in blobs])
1206
+ if np.any(mask):
1207
+ blobs_filtered = blobs[mask]
1208
+ else:
1209
+ blobs_filtered = []
1210
+
1211
+ return blobs_filtered
1212
+
1213
+
1214
+ def blob_detection(
1215
+ image,
1216
+ label,
1217
+ diameter,
1218
+ threshold=0.0,
1219
+ channel_name=None,
1220
+ target_channel=0,
1221
+ method="log",
1222
+ image_preprocessing=None,
1223
+ ):
1224
+
1225
+ image = image[:, :, target_channel].copy()
1226
+ if np.percentile(image.flatten(), 99.9) == 0.0:
1227
+ return None
1228
+
1229
+ detections = []
1230
+ blobs_filtered = extract_blobs_in_image(
1231
+ image,
1232
+ label,
1233
+ diameter,
1234
+ method=method,
1235
+ threshold=threshold,
1236
+ image_preprocessing=image_preprocessing,
1237
+ )
1238
+
1239
+ for lbl in np.unique(label):
1240
+ if lbl > 0:
1241
+
1242
+ blob_selection = np.array(
1243
+ [label[int(y), int(x)] == lbl for y, x, _ in blobs_filtered]
1244
+ )
1245
+ if np.any(blob_selection):
1246
+ # if any spot
1247
+ blobs_in_cell = blobs_filtered[blob_selection]
1248
+ n_spots = len(blobs_in_cell)
1249
+ binary_blobs = np.zeros_like(label)
1250
+ for blob in blobs_in_cell:
1251
+ y, x, sig = blob
1252
+ r = np.sqrt(2) * sig
1253
+ from skimage.draw import disk as dsk
1254
+
1255
+ rr, cc = dsk((y, x), r, shape=binary_blobs.shape)
1256
+ binary_blobs[rr, cc] = 1
1257
+ intensity_mean = np.nanmean(image[binary_blobs == 1].flatten())
1258
+ else:
1259
+ n_spots = 0
1260
+ intensity_mean = np.nan
1261
+ detections.append(
1262
+ {
1263
+ "label": lbl,
1264
+ f"{channel_name}_spot_count": n_spots,
1265
+ f"{channel_name}_mean_spot_intensity": intensity_mean,
1266
+ }
1267
+ )
1268
+ detections = pd.DataFrame(detections)
1269
+
1270
+ return detections
1271
+
1272
+
1273
+ def estimate_time(
1274
+ df, class_attr, model="step_function", class_of_interest=[2], r2_threshold=0.5
1275
+ ):
1276
+ """
1277
+ Estimate the timing of an event for cells based on classification status and fit a model to the observed status signal.
1278
+
1279
+ Parameters
1280
+ ----------
1281
+ df : pandas.DataFrame
1282
+ DataFrame containing tracked data with classification and status columns.
1283
+ class_attr : str
1284
+ Column name for the classification attribute (e.g., 'class_event').
1285
+ model : str, optional
1286
+ Name of the model function used to fit the status signal (default is 'step_function').
1287
+ class_of_interest : list, optional
1288
+ List of class values that define the cells of interest for analysis (default is [2]).
1289
+ r2_threshold : float, optional
1290
+ R-squared threshold for determining if the model fit is acceptable (default is 0.5).
1291
+
1292
+ Returns
1293
+ -------
1294
+ pandas.DataFrame
1295
+ Updated DataFrame with estimated event timing added in a column replacing 'class' with 't',
1296
+ and reclassification of cells based on the model fit.
1297
+
1298
+ Notes
1299
+ -----
1300
+ - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by time ('FRAME').
1301
+ - If the model provides a poor fit (R² < r2_threshold), the class of interest is set to 2.0 and timing (-1).
1302
+ - The function supports different models that can be passed as the `model` parameter, which are evaluated using `eval()`.
1303
+
1304
+ Example
1305
+ -------
1306
+ >>> df = estimate_time(df, 'class', model='step_function', class_of_interest=[2], r2_threshold=0.6)
1307
+
1308
+ """
1309
+
1310
+ cols = list(df.columns)
1311
+ assert "TRACK_ID" in cols, "Please provide tracked data..."
1312
+ if "position" in cols:
1313
+ sort_cols = ["position", "TRACK_ID"]
1314
+ else:
1315
+ sort_cols = ["TRACK_ID"]
1316
+
1317
+ df = df.sort_values(by=sort_cols, ignore_index=True)
1318
+ df = df.reset_index(drop=True)
1319
+ max_time = df["FRAME"].max()
1320
+
1321
+ for tid, group in df.loc[df[class_attr].isin(class_of_interest)].groupby(sort_cols):
1322
+
1323
+ indices = group.index
1324
+ status_col = class_attr.replace("class", "status")
1325
+
1326
+ group_clean = group.dropna(subset=status_col)
1327
+ status_signal = group_clean[status_col].values
1328
+ if np.all(np.array(status_signal) == 1):
1329
+ continue
1330
+
1331
+ timeline = group_clean["FRAME"].values
1332
+ frames = group_clean["FRAME"].to_numpy()
1333
+ status_values = group_clean[status_col].to_numpy()
1334
+ t_first = group["t_firstdetection"].to_numpy()[0]
1335
+
1336
+ try:
1337
+ from scipy.optimize import curve_fit
1338
+ from sklearn.metrics import r2_score
1339
+
1340
+ popt, pcov = curve_fit(
1341
+ eval(model),
1342
+ timeline.astype(int),
1343
+ status_signal,
1344
+ p0=[max(timeline) // 2, 0.8],
1345
+ maxfev=100000,
1346
+ )
1347
+ values = [eval(model)(t, *popt) for t in timeline]
1348
+ r2 = r2_score(status_signal, values)
1349
+ except Exception:
1350
+ df.loc[indices, class_attr] = 2.0
1351
+ df.loc[indices, class_attr.replace("class", "t")] = -1
1352
+ continue
1353
+
1354
+ if r2 > float(r2_threshold):
1355
+ t0 = popt[0]
1356
+ if t0 >= max_time:
1357
+ t0 = max_time - 1
1358
+ df.loc[indices, class_attr.replace("class", "t")] = t0
1359
+ df.loc[indices, class_attr] = 0.0
1360
+ else:
1361
+ df.loc[indices, class_attr.replace("class", "t")] = -1
1362
+ df.loc[indices, class_attr] = 2.0
1363
+
1364
+ return df
1365
+
1366
+
1367
+ def interpret_track_classification(
1368
+ df,
1369
+ class_attr,
1370
+ irreversible_event=False,
1371
+ unique_state=False,
1372
+ transient_event=False,
1373
+ r2_threshold=0.5,
1374
+ percentile_recovery=50,
1375
+ pre_event=None,
1376
+ ):
1377
+ """
1378
+ Interpret and classify tracked cells based on their status signals.
1379
+
1380
+ Parameters
1381
+ ----------
1382
+ df : pandas.DataFrame
1383
+ DataFrame containing tracked cell data, including a classification attribute column and other necessary columns.
1384
+ class_attr : str
1385
+ Column name for the classification attribute (e.g., 'class') used to determine the state of cells.
1386
+ irreversible_event : bool, optional
1387
+ If True, classifies irreversible events in the dataset (default is False).
1388
+ When set to True, `unique_state` is ignored.
1389
+ unique_state : bool, optional
1390
+ If True, classifies unique states of cells in the dataset based on a percentile threshold (default is False).
1391
+ This option is ignored if `irreversible_event` is set to True.
1392
+ r2_threshold : float, optional
1393
+ R-squared threshold used when fitting the model during the classification of irreversible events (default is 0.5).
1394
+
1395
+ Returns
1396
+ -------
1397
+ pandas.DataFrame
1398
+ DataFrame with updated classifications for cell trajectories:
1399
+ - If `irreversible_event` is True, it classifies irreversible events using the `classify_irreversible_events` function.
1400
+ - If `unique_state` is True, it classifies unique states using the `classify_unique_states` function.
1401
+
1402
+ Raises
1403
+ ------
1404
+ AssertionError
1405
+ If the 'TRACK_ID' column is missing in the input DataFrame.
1406
+
1407
+ Notes
1408
+ -----
1409
+ - The function assumes that the input DataFrame contains a column for tracking cells (`TRACK_ID`) and possibly a 'position' column.
1410
+ - The classification behavior depends on the `irreversible_event` and `unique_state` flags:
1411
+ - When `irreversible_event` is True, the function classifies events that are considered irreversible.
1412
+ - When `unique_state` is True (and `irreversible_event` is False), it classifies unique states using a 50th percentile threshold.
1413
+
1414
+ Example
1415
+ -------
1416
+ >>> df = interpret_track_classification(df, 'class', irreversible_event=True, r2_threshold=0.7)
1417
+
1418
+ """
1419
+
1420
+ cols = list(df.columns)
1421
+
1422
+ assert "TRACK_ID" in cols, "Please provide tracked data..."
1423
+ if "position" in cols:
1424
+ sort_cols = ["position", "TRACK_ID"]
1425
+ else:
1426
+ sort_cols = ["TRACK_ID"]
1427
+ if class_attr.replace("class", "status") not in cols:
1428
+ df.loc[:, class_attr.replace("class", "status")] = df.loc[:, class_attr]
1429
+
1430
+ if irreversible_event:
1431
+ unique_state = False
1432
+
1433
+ if irreversible_event:
1434
+
1435
+ df = classify_irreversible_events(
1436
+ df,
1437
+ class_attr,
1438
+ r2_threshold=r2_threshold,
1439
+ percentile_recovery=percentile_recovery,
1440
+ pre_event=pre_event,
1441
+ )
1442
+
1443
+ elif unique_state:
1444
+
1445
+ df = classify_unique_states(df, class_attr, percentile=50, pre_event=pre_event)
1446
+
1447
+ elif transient_event:
1448
+
1449
+ df = classify_transient_events(df, class_attr, pre_event=pre_event)
1450
+
1451
+ return df
1113
1452
 
1114
1453
 
1115
1454
  def classify_transient_events(data, class_attr, pre_event=None):
1116
1455
 
1117
- df = data.copy()
1118
- cols = list(df.columns)
1119
-
1120
- # Control input
1121
- assert 'TRACK_ID' in cols,'Please provide tracked data...'
1122
- if 'position' in cols:
1123
- sort_cols = ['position', 'TRACK_ID']
1124
- df = df.sort_values(by=sort_cols+['FRAME'])
1125
- else:
1126
- sort_cols = ['TRACK_ID']
1127
- df = df.sort_values(by=sort_cols+['FRAME'])
1128
- if pre_event is not None:
1129
- assert 't_'+pre_event in cols,"Pre-event time does not seem to be a valid column in the DataFrame..."
1130
- assert 'class_'+pre_event in cols,"Pre-event class does not seem to be a valid column in the DataFrame..."
1131
-
1132
- stat_col = class_attr.replace('class','status')
1133
- continuous_stat_col = stat_col.replace('status_','smooth_status_')
1134
- df[continuous_stat_col] = df[stat_col].copy()
1135
-
1136
- for tid,track in df.groupby(sort_cols):
1137
-
1138
- indices = track[class_attr].index
1139
-
1140
- if pre_event is not None:
1141
-
1142
- if track['class_'+pre_event].values[0]==1:
1143
- df.loc[indices, class_attr] = np.nan
1144
- df.loc[indices, stat_col] = np.nan
1145
- continue
1146
- else:
1147
- # pre-event took place (if left-censored took place at time -1)
1148
- t_pre_event = track['t_'+pre_event].values[0]
1149
- indices_pre = track.loc[track['FRAME']<=t_pre_event,class_attr].index
1150
- df.loc[indices_pre, stat_col] = np.nan # set to NaN all statuses before pre-event
1151
- track.loc[track['FRAME']<=t_pre_event, stat_col] = np.nan
1152
- track.loc[track['FRAME']<=t_pre_event, continuous_stat_col] = np.nan
1153
-
1154
- status = track[stat_col].to_numpy()
1155
- timeline = track['FRAME'].to_numpy()
1156
- timeline_safe = timeline[status==status]
1157
- status_safe = list(status[status==status])
1158
-
1159
- peaks, _ = find_peaks(status_safe)
1160
- widths, _, left, right = peak_widths(status_safe, peaks, rel_height=1)
1161
- minimum_weight = 0
1162
-
1163
- if len(peaks)>0:
1164
- idx = np.argmax(widths)
1165
- peak = peaks[idx]; width = widths[idx];
1166
- if width >= minimum_weight:
1167
- left = left[idx]; right = right[idx];
1168
- left = timeline_safe[int(left)]; right = timeline_safe[int(right)];
1169
-
1170
- df.loc[indices, class_attr] = 0
1171
- t0 = left #take onset + (right - left)/2.0
1172
- df.loc[indices, class_attr.replace('class_','t_')] = t0
1173
- df.loc[track.loc[track[stat_col].isnull(),class_attr].index, continuous_stat_col] = np.nan
1174
- df.loc[track.loc[track['FRAME']<t0,class_attr].index, continuous_stat_col] = 0
1175
- df.loc[track.loc[track['FRAME']>=t0,class_attr].index, continuous_stat_col] = 1
1176
- else:
1177
- df.loc[indices, class_attr] = 1
1178
- df.loc[indices, class_attr.replace('class_','t_')] = -1
1179
- df.loc[indices, continuous_stat_col] = 0
1180
- else:
1181
- df.loc[indices, class_attr] = 1
1182
- df.loc[indices, class_attr.replace('class_','t_')] = -1
1183
- df.loc[indices, continuous_stat_col] = 0
1184
-
1185
- # restate NaN for out of scope timepoints
1186
- df.loc[df[stat_col].isnull(),continuous_stat_col] = np.nan
1187
- if 'inst_'+stat_col in list(df.columns):
1188
- df = df.drop(columns=['inst_'+stat_col])
1189
- df = df.rename(columns={stat_col: 'inst_'+stat_col})
1190
- df = df.rename(columns={continuous_stat_col: stat_col})
1191
- print("Classes: ",df.loc[df['FRAME']==0,class_attr].value_counts())
1192
-
1193
- return df
1194
-
1195
-
1196
- def classify_irreversible_events(data, class_attr, r2_threshold=0.5, percentile_recovery=50, pre_event=None):
1197
-
1198
- """
1199
- Classify irreversible events in a tracked dataset based on the status of cells and transitions.
1200
-
1201
- Parameters
1202
- ----------
1203
- df : pandas.DataFrame
1204
- DataFrame containing tracked cell data, including classification and status columns.
1205
- class_attr : str
1206
- Column name for the classification attribute (e.g., 'class') used to update the classification of cell states.
1207
- r2_threshold : float, optional
1208
- R-squared threshold for fitting the model (default is 0.5). Used when estimating the time of transition.
1209
-
1210
- Returns
1211
- -------
1212
- pandas.DataFrame
1213
- DataFrame with updated classifications for irreversible events, with the following outcomes:
1214
- - Cells with all 0s in the status column are classified as 1 (no event).
1215
- - Cells with all 1s are classified as 2 (event already occurred).
1216
- - Cells with a mix of 0s and 1s are classified as 2 (ambiguous, possible transition).
1217
- - For cells classified as 2, the time of the event is estimated using the `estimate_time` function. If successful they are reclassified as 0 (event).
1218
- - The classification for cells still classified as 2 is revisited using a 95th percentile threshold.
1219
-
1220
- Notes
1221
- -----
1222
- - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by position or ID.
1223
- - The classification is based on the `stat_col` derived from `class_attr` (status column).
1224
- - Cells with no event (all 0s in the status column) are assigned a class value of 1.
1225
- - Cells with irreversible events (all 1s in the status column) are assigned a class value of 2.
1226
- - Cells with transitions (a mix of 0s and 1s) are classified as 2 and their event times are estimated. When successful they are reclassified as 0.
1227
- - After event classification, the function reclassifies leftover ambiguous cases (class 2) using the `classify_unique_states` function.
1228
-
1229
- Example
1230
- -------
1231
- >>> df = classify_irreversible_events(df, 'class', r2_threshold=0.7)
1232
-
1233
- """
1234
-
1235
- df = data.copy()
1236
- cols = list(df.columns)
1237
-
1238
- # Control input
1239
- assert 'TRACK_ID' in cols,'Please provide tracked data...'
1240
- if 'position' in cols:
1241
- sort_cols = ['position', 'TRACK_ID']
1242
- else:
1243
- sort_cols = ['TRACK_ID']
1244
- if pre_event is not None:
1245
- assert 't_'+pre_event in cols,"Pre-event time does not seem to be a valid column in the DataFrame..."
1246
- assert 'class_'+pre_event in cols,"Pre-event class does not seem to be a valid column in the DataFrame..."
1247
-
1248
- stat_col = class_attr.replace('class','status')
1249
-
1250
- for tid,track in df.groupby(sort_cols):
1251
-
1252
- indices = track[class_attr].index
1253
-
1254
- if pre_event is not None:
1255
- if track['class_'+pre_event].values[0]==1:
1256
- df.loc[indices, class_attr] = np.nan
1257
- df.loc[indices, stat_col] = np.nan
1258
- continue
1259
- else:
1260
- # pre-event took place (if left-censored took place at time -1)
1261
- t_pre_event = track['t_'+pre_event].values[0]
1262
- indices_pre = track.loc[track['FRAME']<=t_pre_event,class_attr].index
1263
- df.loc[indices_pre, stat_col] = np.nan # set to NaN all statuses before pre-event
1264
- track.loc[track['FRAME']<=t_pre_event, stat_col] = np.nan
1265
- else:
1266
- # set state to 0 before first detection
1267
- t_firstdetection = track['t_firstdetection'].values[0]
1268
- indices_pre_detection = track.loc[track['FRAME']<=t_firstdetection,class_attr].index
1269
- track.loc[indices_pre_detection,stat_col] = 0.0
1270
- df.loc[indices_pre_detection,stat_col] = 0.0
1271
-
1272
- # The non-NaN part of track (post pre-event)
1273
- track_valid = track.dropna(subset=stat_col, inplace=False)
1274
- status_values = track_valid[stat_col].to_numpy()
1275
-
1276
- if np.all([s==0 for s in status_values]):
1277
- # all negative to condition, event not observed
1278
- df.loc[indices, class_attr] = 1
1279
- elif np.all([s==1 for s in status_values]):
1280
- # all positive, event already observed (left-censored)
1281
- df.loc[indices, class_attr] = 2
1282
- else:
1283
- # ambiguity, possible transition, use `unique_state` technique after
1284
- df.loc[indices, class_attr] = 2
1285
-
1286
- print("Number of cells per class after the initial pass: ")
1287
- pretty_table(df.loc[df['FRAME']==0,class_attr].value_counts().to_dict())
1288
-
1289
- df.loc[df[class_attr]!=2, class_attr.replace('class', 't')] = -1
1290
- # Try to fit time on class 2 cells (ambiguous)
1291
- df = estimate_time(df, class_attr, model='step_function', class_of_interest=[2], r2_threshold=r2_threshold)
1292
-
1293
- print("Number of cells per class after conditional signal fit: ")
1294
- pretty_table(df.loc[df['FRAME']==0,class_attr].value_counts().to_dict())
1295
-
1296
- # Revisit class 2 cells to classify as neg/pos with percentile tolerance
1297
- df.loc[df[class_attr]==2,:] = classify_unique_states(df.loc[df[class_attr]==2,:].copy(), class_attr, percentile_recovery)
1298
- print("Number of cells per class after recovery pass (median state): ")
1299
- pretty_table(df.loc[df['FRAME']==0,class_attr].value_counts().to_dict())
1300
-
1301
- return df
1456
+ df = data.copy()
1457
+ cols = list(df.columns)
1458
+
1459
+ # Control input
1460
+ assert "TRACK_ID" in cols, "Please provide tracked data..."
1461
+ if "position" in cols:
1462
+ sort_cols = ["position", "TRACK_ID"]
1463
+ df = df.sort_values(by=sort_cols + ["FRAME"])
1464
+ else:
1465
+ sort_cols = ["TRACK_ID"]
1466
+ df = df.sort_values(by=sort_cols + ["FRAME"])
1467
+ if pre_event is not None:
1468
+ assert (
1469
+ "t_" + pre_event in cols
1470
+ ), "Pre-event time does not seem to be a valid column in the DataFrame..."
1471
+ assert (
1472
+ "class_" + pre_event in cols
1473
+ ), "Pre-event class does not seem to be a valid column in the DataFrame..."
1474
+
1475
+ stat_col = class_attr.replace("class", "status")
1476
+ continuous_stat_col = stat_col.replace("status_", "smooth_status_")
1477
+ df[continuous_stat_col] = df[stat_col].copy()
1478
+
1479
+ for tid, track in df.groupby(sort_cols):
1480
+
1481
+ indices = track[class_attr].index
1482
+
1483
+ if pre_event is not None:
1484
+
1485
+ if track["class_" + pre_event].values[0] == 1:
1486
+ df.loc[indices, class_attr] = np.nan
1487
+ df.loc[indices, stat_col] = np.nan
1488
+ continue
1489
+ else:
1490
+ # pre-event took place (if left-censored took place at time -1)
1491
+ t_pre_event = track["t_" + pre_event].values[0]
1492
+ indices_pre = track.loc[track["FRAME"] <= t_pre_event, class_attr].index
1493
+ df.loc[indices_pre, stat_col] = (
1494
+ np.nan
1495
+ ) # set to NaN all statuses before pre-event
1496
+ track.loc[track["FRAME"] <= t_pre_event, stat_col] = np.nan
1497
+ track.loc[track["FRAME"] <= t_pre_event, continuous_stat_col] = np.nan
1498
+
1499
+ status = track[stat_col].to_numpy()
1500
+ timeline = track["FRAME"].to_numpy()
1501
+ timeline_safe = timeline[status == status]
1502
+ status_safe = list(status[status == status])
1503
+
1504
+ from scipy.signal import find_peaks, peak_widths
1505
+
1506
+ peaks, _ = find_peaks(status_safe)
1507
+ widths, _, left, right = peak_widths(status_safe, peaks, rel_height=1)
1508
+ minimum_weight = 0
1509
+
1510
+ if len(peaks) > 0:
1511
+ idx = np.argmax(widths)
1512
+ peak = peaks[idx]
1513
+ width = widths[idx]
1514
+ if width >= minimum_weight:
1515
+ left = left[idx]
1516
+ right = right[idx]
1517
+ left = timeline_safe[int(left)]
1518
+ right = timeline_safe[int(right)]
1519
+
1520
+ df.loc[indices, class_attr] = 0
1521
+ t0 = left # take onset + (right - left)/2.0
1522
+ df.loc[indices, class_attr.replace("class_", "t_")] = t0
1523
+ df.loc[
1524
+ track.loc[track[stat_col].isnull(), class_attr].index,
1525
+ continuous_stat_col,
1526
+ ] = np.nan
1527
+ df.loc[
1528
+ track.loc[track["FRAME"] < t0, class_attr].index,
1529
+ continuous_stat_col,
1530
+ ] = 0
1531
+ df.loc[
1532
+ track.loc[track["FRAME"] >= t0, class_attr].index,
1533
+ continuous_stat_col,
1534
+ ] = 1
1535
+ else:
1536
+ df.loc[indices, class_attr] = 1
1537
+ df.loc[indices, class_attr.replace("class_", "t_")] = -1
1538
+ df.loc[indices, continuous_stat_col] = 0
1539
+ else:
1540
+ df.loc[indices, class_attr] = 1
1541
+ df.loc[indices, class_attr.replace("class_", "t_")] = -1
1542
+ df.loc[indices, continuous_stat_col] = 0
1543
+
1544
+ # restate NaN for out of scope timepoints
1545
+ df.loc[df[stat_col].isnull(), continuous_stat_col] = np.nan
1546
+ if "inst_" + stat_col in list(df.columns):
1547
+ df = df.drop(columns=["inst_" + stat_col])
1548
+ df = df.rename(columns={stat_col: "inst_" + stat_col})
1549
+ df = df.rename(columns={continuous_stat_col: stat_col})
1550
+ print("Classes: ", df.loc[df["FRAME"] == 0, class_attr].value_counts())
1551
+
1552
+ return df
1553
+
1554
+
1555
+ def classify_irreversible_events(
1556
+ data, class_attr, r2_threshold=0.5, percentile_recovery=50, pre_event=None
1557
+ ):
1558
+ """
1559
+ Classify irreversible events in a tracked dataset based on the status of cells and transitions.
1560
+
1561
+ Parameters
1562
+ ----------
1563
+ df : pandas.DataFrame
1564
+ DataFrame containing tracked cell data, including classification and status columns.
1565
+ class_attr : str
1566
+ Column name for the classification attribute (e.g., 'class') used to update the classification of cell states.
1567
+ r2_threshold : float, optional
1568
+ R-squared threshold for fitting the model (default is 0.5). Used when estimating the time of transition.
1569
+
1570
+ Returns
1571
+ -------
1572
+ pandas.DataFrame
1573
+ DataFrame with updated classifications for irreversible events, with the following outcomes:
1574
+ - Cells with all 0s in the status column are classified as 1 (no event).
1575
+ - Cells with all 1s are classified as 2 (event already occurred).
1576
+ - Cells with a mix of 0s and 1s are classified as 2 (ambiguous, possible transition).
1577
+ - For cells classified as 2, the time of the event is estimated using the `estimate_time` function. If successful they are reclassified as 0 (event).
1578
+ - The classification for cells still classified as 2 is revisited using a 95th percentile threshold.
1579
+
1580
+ Notes
1581
+ -----
1582
+ - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by position or ID.
1583
+ - The classification is based on the `stat_col` derived from `class_attr` (status column).
1584
+ - Cells with no event (all 0s in the status column) are assigned a class value of 1.
1585
+ - Cells with irreversible events (all 1s in the status column) are assigned a class value of 2.
1586
+ - Cells with transitions (a mix of 0s and 1s) are classified as 2 and their event times are estimated. When successful they are reclassified as 0.
1587
+ - After event classification, the function reclassifies leftover ambiguous cases (class 2) using the `classify_unique_states` function.
1588
+
1589
+ Example
1590
+ -------
1591
+ >>> df = classify_irreversible_events(df, 'class', r2_threshold=0.7)
1592
+
1593
+ """
1594
+
1595
+ df = data.copy()
1596
+ cols = list(df.columns)
1597
+
1598
+ # Control input
1599
+ assert "TRACK_ID" in cols, "Please provide tracked data..."
1600
+ if "position" in cols:
1601
+ sort_cols = ["position", "TRACK_ID"]
1602
+ else:
1603
+ sort_cols = ["TRACK_ID"]
1604
+ if pre_event is not None:
1605
+ assert (
1606
+ "t_" + pre_event in cols
1607
+ ), "Pre-event time does not seem to be a valid column in the DataFrame..."
1608
+ assert (
1609
+ "class_" + pre_event in cols
1610
+ ), "Pre-event class does not seem to be a valid column in the DataFrame..."
1611
+
1612
+ stat_col = class_attr.replace("class", "status")
1613
+
1614
+ for tid, track in df.groupby(sort_cols):
1615
+
1616
+ indices = track[class_attr].index
1617
+
1618
+ if pre_event is not None:
1619
+ if track["class_" + pre_event].values[0] == 1:
1620
+ df.loc[indices, class_attr] = np.nan
1621
+ df.loc[indices, stat_col] = np.nan
1622
+ continue
1623
+ else:
1624
+ # pre-event took place (if left-censored took place at time -1)
1625
+ t_pre_event = track["t_" + pre_event].values[0]
1626
+ indices_pre = track.loc[track["FRAME"] <= t_pre_event, class_attr].index
1627
+ df.loc[indices_pre, stat_col] = (
1628
+ np.nan
1629
+ ) # set to NaN all statuses before pre-event
1630
+ track.loc[track["FRAME"] <= t_pre_event, stat_col] = np.nan
1631
+ else:
1632
+ # set state to 0 before first detection
1633
+ t_firstdetection = track["t_firstdetection"].values[0]
1634
+ indices_pre_detection = track.loc[
1635
+ track["FRAME"] <= t_firstdetection, class_attr
1636
+ ].index
1637
+ track.loc[indices_pre_detection, stat_col] = 0.0
1638
+ df.loc[indices_pre_detection, stat_col] = 0.0
1639
+
1640
+ # The non-NaN part of track (post pre-event)
1641
+ track_valid = track.dropna(subset=stat_col, inplace=False)
1642
+ status_values = track_valid[stat_col].to_numpy()
1643
+
1644
+ if np.all([s == 0 for s in status_values]):
1645
+ # all negative to condition, event not observed
1646
+ df.loc[indices, class_attr] = 1
1647
+ elif np.all([s == 1 for s in status_values]):
1648
+ # all positive, event already observed (left-censored)
1649
+ df.loc[indices, class_attr] = 2
1650
+ else:
1651
+ # ambiguity, possible transition, use `unique_state` technique after
1652
+ df.loc[indices, class_attr] = 2
1653
+
1654
+ print("Number of cells per class after the initial pass: ")
1655
+ pretty_table(df.loc[df["FRAME"] == 0, class_attr].value_counts().to_dict())
1656
+
1657
+ df.loc[df[class_attr] != 2, class_attr.replace("class", "t")] = -1
1658
+ # Try to fit time on class 2 cells (ambiguous)
1659
+ df = estimate_time(
1660
+ df,
1661
+ class_attr,
1662
+ model="step_function",
1663
+ class_of_interest=[2],
1664
+ r2_threshold=r2_threshold,
1665
+ )
1666
+
1667
+ print("Number of cells per class after conditional signal fit: ")
1668
+ pretty_table(df.loc[df["FRAME"] == 0, class_attr].value_counts().to_dict())
1669
+
1670
+ # Revisit class 2 cells to classify as neg/pos with percentile tolerance
1671
+ df.loc[df[class_attr] == 2, :] = classify_unique_states(
1672
+ df.loc[df[class_attr] == 2, :].copy(), class_attr, percentile_recovery
1673
+ )
1674
+ print("Number of cells per class after recovery pass (median state): ")
1675
+ pretty_table(df.loc[df["FRAME"] == 0, class_attr].value_counts().to_dict())
1676
+
1677
+ return df
1302
1678
 
1303
1679
 
1304
1680
  def classify_unique_states(df, class_attr, percentile=50, pre_event=None):
1681
+ """
1682
+ Classify unique cell states based on percentile values of a status attribute in a tracked dataset.
1683
+
1684
+ Parameters
1685
+ ----------
1686
+ df : pandas.DataFrame
1687
+ DataFrame containing tracked cell data, including classification and status columns.
1688
+ class_attr : str
1689
+ Column name for the classification attribute (e.g., 'class') used to update the classification of cell states.
1690
+ percentile : int, optional
1691
+ Percentile value used to classify the status attribute within the valid frames (default is median).
1692
+
1693
+ Returns
1694
+ -------
1695
+ pandas.DataFrame
1696
+ DataFrame with updated classification for each track and corresponding time (if applicable).
1697
+ The classification is updated based on the calculated percentile:
1698
+ - Cells with percentile values that round to 0 (negative to classification) are classified as 1.
1699
+ - Cells with percentile values that round to 1 (positive to classification) are classified as 2.
1700
+ - If classification is not applicable (NaN), time (`class_attr.replace('class', 't')`) is set to -1.
1701
+
1702
+ Notes
1703
+ -----
1704
+ - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by position or ID.
1705
+ - The classification is based on the `stat_col` derived from `class_attr` (status column).
1706
+ - NaN values in the status column are excluded from the percentile calculation.
1707
+ - For each track, the classification is assigned according to the rounded percentile value.
1708
+ - Time (`class_attr.replace('class', 't')`) is set to -1 when the cell state is classified.
1709
+
1710
+ Example
1711
+ -------
1712
+ >>> df = classify_unique_states(df, 'class', percentile=75)
1713
+
1714
+ """
1715
+
1716
+ cols = list(df.columns)
1717
+ assert "TRACK_ID" in cols, "Please provide tracked data..."
1718
+ if "position" in cols:
1719
+ sort_cols = ["position", "TRACK_ID"]
1720
+ else:
1721
+ sort_cols = ["TRACK_ID"]
1722
+
1723
+ if pre_event is not None:
1724
+ assert (
1725
+ "t_" + pre_event in cols
1726
+ ), "Pre-event time does not seem to be a valid column in the DataFrame..."
1727
+ assert (
1728
+ "class_" + pre_event in cols
1729
+ ), "Pre-event class does not seem to be a valid column in the DataFrame..."
1730
+
1731
+ stat_col = class_attr.replace("class", "status")
1732
+
1733
+ for tid, track in df.groupby(sort_cols):
1734
+
1735
+ indices = track[class_attr].index
1736
+
1737
+ if pre_event is not None:
1738
+ if track["class_" + pre_event].values[0] == 1:
1739
+ df.loc[indices, class_attr] = np.nan
1740
+ df.loc[indices, stat_col] = np.nan
1741
+ df.loc[indices, stat_col.replace("status_", "t_")] = -1
1742
+ continue
1743
+ else:
1744
+ t_pre_event = track["t_" + pre_event].values[0]
1745
+ indices_pre = track.loc[track["FRAME"] <= t_pre_event, class_attr].index
1746
+ df.loc[indices_pre, stat_col] = np.nan
1747
+ track.loc[track["FRAME"] <= t_pre_event, stat_col] = np.nan
1748
+
1749
+ # Post pre-event track
1750
+ track_valid = track.dropna(subset=stat_col, inplace=False)
1751
+ status_values = track_valid[stat_col].to_numpy()
1752
+ frames = track_valid["FRAME"].to_numpy()
1753
+ t_first = track["t_firstdetection"].to_numpy()[0]
1754
+ perc_status = np.nanpercentile(status_values[frames >= t_first], percentile)
1755
+
1756
+ if perc_status == perc_status:
1757
+ c = ceil(perc_status)
1758
+ if c == 0:
1759
+ df.loc[indices, class_attr] = 1
1760
+ df.loc[indices, class_attr.replace("class", "t")] = -1
1761
+ elif c == 1:
1762
+ df.loc[indices, class_attr] = 2
1763
+ df.loc[indices, class_attr.replace("class", "t")] = -1
1764
+ return df
1305
1765
 
1306
- """
1307
- Classify unique cell states based on percentile values of a status attribute in a tracked dataset.
1308
-
1309
- Parameters
1310
- ----------
1311
- df : pandas.DataFrame
1312
- DataFrame containing tracked cell data, including classification and status columns.
1313
- class_attr : str
1314
- Column name for the classification attribute (e.g., 'class') used to update the classification of cell states.
1315
- percentile : int, optional
1316
- Percentile value used to classify the status attribute within the valid frames (default is median).
1317
-
1318
- Returns
1319
- -------
1320
- pandas.DataFrame
1321
- DataFrame with updated classification for each track and corresponding time (if applicable).
1322
- The classification is updated based on the calculated percentile:
1323
- - Cells with percentile values that round to 0 (negative to classification) are classified as 1.
1324
- - Cells with percentile values that round to 1 (positive to classification) are classified as 2.
1325
- - If classification is not applicable (NaN), time (`class_attr.replace('class', 't')`) is set to -1.
1326
-
1327
- Notes
1328
- -----
1329
- - The function assumes that cells are grouped by a unique identifier ('TRACK_ID') and sorted by position or ID.
1330
- - The classification is based on the `stat_col` derived from `class_attr` (status column).
1331
- - NaN values in the status column are excluded from the percentile calculation.
1332
- - For each track, the classification is assigned according to the rounded percentile value.
1333
- - Time (`class_attr.replace('class', 't')`) is set to -1 when the cell state is classified.
1334
-
1335
- Example
1336
- -------
1337
- >>> df = classify_unique_states(df, 'class', percentile=75)
1338
-
1339
- """
1340
-
1341
- cols = list(df.columns)
1342
- assert 'TRACK_ID' in cols,'Please provide tracked data...'
1343
- if 'position' in cols:
1344
- sort_cols = ['position', 'TRACK_ID']
1345
- else:
1346
- sort_cols = ['TRACK_ID']
1347
-
1348
- if pre_event is not None:
1349
- assert 't_'+pre_event in cols,"Pre-event time does not seem to be a valid column in the DataFrame..."
1350
- assert 'class_'+pre_event in cols,"Pre-event class does not seem to be a valid column in the DataFrame..."
1351
-
1352
- stat_col = class_attr.replace('class','status')
1353
-
1354
- for tid, track in df.groupby(sort_cols):
1355
-
1356
- indices = track[class_attr].index
1357
-
1358
- if pre_event is not None:
1359
- if track['class_'+pre_event].values[0]==1:
1360
- df.loc[indices, class_attr] = np.nan
1361
- df.loc[indices, stat_col] = np.nan
1362
- df.loc[indices, stat_col.replace('status_','t_')] = -1
1363
- continue
1364
- else:
1365
- t_pre_event = track['t_'+pre_event].values[0]
1366
- indices_pre = track.loc[track['FRAME']<=t_pre_event, class_attr].index
1367
- df.loc[indices_pre, stat_col] = np.nan
1368
- track.loc[track['FRAME']<=t_pre_event, stat_col] = np.nan
1369
-
1370
- # Post pre-event track
1371
- track_valid = track.dropna(subset=stat_col, inplace=False)
1372
- status_values = track_valid[stat_col].to_numpy()
1373
- frames = track_valid['FRAME'].to_numpy()
1374
- t_first = track['t_firstdetection'].to_numpy()[0]
1375
- perc_status = np.nanpercentile(status_values[frames>=t_first], percentile)
1376
-
1377
- if perc_status==perc_status:
1378
- c = ceil(perc_status)
1379
- if c==0:
1380
- df.loc[indices, class_attr] = 1
1381
- df.loc[indices, class_attr.replace('class','t')] = -1
1382
- elif c==1:
1383
- df.loc[indices, class_attr] = 2
1384
- df.loc[indices, class_attr.replace('class','t')] = -1
1385
- return df
1386
1766
 
1387
1767
  def classify_cells_from_query(df, status_attr, query):
1388
-
1389
- """
1390
- Classify cells in a DataFrame based on a query string, assigning classifications to a specified column.
1391
-
1392
- Parameters
1393
- ----------
1394
- df : pandas.DataFrame
1395
- The DataFrame containing cell data to be classified.
1396
- status_attr : str
1397
- The name of the column where the classification results will be stored.
1398
- - Initially, all cells are assigned a value of 0.
1399
- query : str
1400
- A string representing the condition for classifying the cells. The query is applied to the DataFrame using pandas `.query()`.
1401
-
1402
- Returns
1403
- -------
1404
- pandas.DataFrame
1405
- The DataFrame with an updated `status_attr` column:
1406
- - Cells matching the query are classified with a value of 1.
1407
- - Cells that have `NaN` values in any of the columns involved in the query are classified as `NaN`.
1408
- - Cells that do not match the query are classified with a value of 0.
1409
-
1410
- Notes
1411
- -----
1412
- - If the `query` string is empty, a message is printed and no classification is performed.
1413
- - If the query contains columns that are not found in `df`, the entire `class_attr` column is set to `NaN`.
1414
- - Any errors encountered during query evaluation will prevent changes from being applied and will print a message.
1415
-
1416
- Examples
1417
- --------
1418
- >>> data = {'cell_type': ['A', 'B', 'A', 'B'], 'size': [10, 20, np.nan, 15]}
1419
- >>> df = pd.DataFrame(data)
1420
- >>> classify_cells_from_query(df, 'selected_cells', 'size > 15')
1421
- cell_type size selected_cells
1422
- 0 A 10.0 0.0
1423
- 1 B 20.0 1.0
1424
- 2 A NaN NaN
1425
- 3 B 15.0 0.0
1426
-
1427
- - If the query string is empty, the function prints a message and returns the DataFrame unchanged.
1428
- - If any of the columns in the query don't exist in the DataFrame, the classification column is set to `NaN`.
1429
-
1430
- Raises
1431
- ------
1432
- Exception
1433
- If the query is invalid or if there are issues with the DataFrame or query syntax, an error message is printed, and `None` is returned.
1434
-
1435
- """
1436
-
1437
- if not status_attr.startswith("status_"):
1438
- status_attr = "status_" + status_attr
1439
-
1440
- df = df.copy()
1441
- df = df.replace([np.inf, -np.inf, None], np.nan)
1442
- #df = df.convert_dtypes()
1443
-
1444
- df.loc[:, status_attr] = 0
1445
- df[status_attr] = df[status_attr].astype(float)
1446
-
1447
- cols = extract_cols_from_query(query)
1448
- print(f"The following DataFrame measurements were identified in the query: {cols=}...")
1449
-
1450
- if query.strip() == "":
1451
- raise EmptyQueryError("The provided query is empty.")
1452
-
1453
- missing_cols = [c for c in cols if c not in df.columns]
1454
- if missing_cols:
1455
- raise MissingColumnsError(missing_cols)
1456
-
1457
- try:
1458
- sub_df = df.dropna(subset=cols)
1459
- if len(sub_df) > 0:
1460
- selection = sub_df.query(query).index
1461
- null_selection = df[df.loc[:, cols].isna().any(axis=1)].index
1462
- df.loc[null_selection, status_attr] = np.nan
1463
- df.loc[selection, status_attr] = 1
1464
- else:
1465
- df.loc[:, status_attr] = np.nan
1466
- except Exception as e:
1467
- raise QueryError(f"The query could not be understood: {e}")
1468
-
1469
- return df.copy()
1470
-
1471
- def classify_tracks_from_query(df, event_name, query, irreversible_event=True, unique_state=False, r2_threshold=0.5, percentile_recovery=50):
1472
-
1473
- status_attr = "status_"+event_name
1474
- df = classify_cells_from_query(df, status_attr, query)
1475
- class_attr = "class_"+event_name
1476
-
1477
- name_map = {status_attr: class_attr}
1478
- df = df.drop(list(set(name_map.values()) & set(df.columns)), axis=1).rename(columns=name_map)
1479
- df.reset_index(inplace=True, drop=True)
1480
-
1481
- df = interpret_track_classification(df, class_attr, irreversible_event=irreversible_event, unique_state=unique_state, r2_threshold=r2_threshold, percentile_recovery=percentile_recovery)
1482
-
1483
- return df
1484
-
1485
- def measure_radial_distance_to_center(df, volume, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
1486
-
1487
- try:
1488
- df['radial_distance'] = np.sqrt((df[column_labels['x']] - volume[0] / 2) ** 2 + (df[column_labels['y']] - volume[1] / 2) ** 2)
1489
- except Exception as e:
1490
- print(f"{e=}")
1491
-
1492
- return df
1768
+ """
1769
+ Classify cells in a DataFrame based on a query string, assigning classifications to a specified column.
1770
+
1771
+ Parameters
1772
+ ----------
1773
+ df : pandas.DataFrame
1774
+ The DataFrame containing cell data to be classified.
1775
+ status_attr : str
1776
+ The name of the column where the classification results will be stored.
1777
+ - Initially, all cells are assigned a value of 0.
1778
+ query : str
1779
+ A string representing the condition for classifying the cells. The query is applied to the DataFrame using pandas `.query()`.
1780
+
1781
+ Returns
1782
+ -------
1783
+ pandas.DataFrame
1784
+ The DataFrame with an updated `status_attr` column:
1785
+ - Cells matching the query are classified with a value of 1.
1786
+ - Cells that have `NaN` values in any of the columns involved in the query are classified as `NaN`.
1787
+ - Cells that do not match the query are classified with a value of 0.
1788
+
1789
+ Notes
1790
+ -----
1791
+ - If the `query` string is empty, a message is printed and no classification is performed.
1792
+ - If the query contains columns that are not found in `df`, the entire `class_attr` column is set to `NaN`.
1793
+ - Any errors encountered during query evaluation will prevent changes from being applied and will print a message.
1794
+
1795
+ Examples
1796
+ --------
1797
+ >>> data = {'cell_type': ['A', 'B', 'A', 'B'], 'size': [10, 20, np.nan, 15]}
1798
+ >>> df = pd.DataFrame(data)
1799
+ >>> classify_cells_from_query(df, 'selected_cells', 'size > 15')
1800
+ cell_type size selected_cells
1801
+ 0 A 10.0 0.0
1802
+ 1 B 20.0 1.0
1803
+ 2 A NaN NaN
1804
+ 3 B 15.0 0.0
1805
+
1806
+ - If the query string is empty, the function prints a message and returns the DataFrame unchanged.
1807
+ - If any of the columns in the query don't exist in the DataFrame, the classification column is set to `NaN`.
1808
+
1809
+ Raises
1810
+ ------
1811
+ Exception
1812
+ If the query is invalid or if there are issues with the DataFrame or query syntax, an error message is printed, and `None` is returned.
1813
+
1814
+ """
1815
+
1816
+ if not status_attr.startswith("status_"):
1817
+ status_attr = "status_" + status_attr
1818
+
1819
+ df = df.copy()
1820
+ df = df.replace([np.inf, -np.inf, None], np.nan)
1821
+ # df = df.convert_dtypes()
1822
+
1823
+ df.loc[:, status_attr] = 0
1824
+ df[status_attr] = df[status_attr].astype(float)
1825
+
1826
+ cols = extract_cols_from_query(query)
1827
+ print(
1828
+ f"The following DataFrame measurements were identified in the query: {cols=}..."
1829
+ )
1830
+
1831
+ if query.strip() == "":
1832
+ raise EmptyQueryError("The provided query is empty.")
1833
+
1834
+ missing_cols = [c for c in cols if c not in df.columns]
1835
+ if missing_cols:
1836
+ raise MissingColumnsError(missing_cols)
1837
+
1838
+ try:
1839
+ sub_df = df.dropna(subset=cols)
1840
+ if len(sub_df) > 0:
1841
+ selection = sub_df.query(query).index
1842
+ null_selection = df[df.loc[:, cols].isna().any(axis=1)].index
1843
+ df.loc[null_selection, status_attr] = np.nan
1844
+ df.loc[selection, status_attr] = 1
1845
+ else:
1846
+ df.loc[:, status_attr] = np.nan
1847
+ except Exception as e:
1848
+ raise QueryError(f"The query could not be understood: {e}")
1849
+
1850
+ return df.copy()
1851
+
1852
+
1853
+ def classify_tracks_from_query(
1854
+ df,
1855
+ event_name,
1856
+ query,
1857
+ irreversible_event=True,
1858
+ unique_state=False,
1859
+ r2_threshold=0.5,
1860
+ percentile_recovery=50,
1861
+ ):
1862
+
1863
+ status_attr = "status_" + event_name
1864
+ df = classify_cells_from_query(df, status_attr, query)
1865
+ class_attr = "class_" + event_name
1866
+
1867
+ name_map = {status_attr: class_attr}
1868
+ df = df.drop(list(set(name_map.values()) & set(df.columns)), axis=1).rename(
1869
+ columns=name_map
1870
+ )
1871
+ df.reset_index(inplace=True, drop=True)
1872
+
1873
+ df = interpret_track_classification(
1874
+ df,
1875
+ class_attr,
1876
+ irreversible_event=irreversible_event,
1877
+ unique_state=unique_state,
1878
+ r2_threshold=r2_threshold,
1879
+ percentile_recovery=percentile_recovery,
1880
+ )
1881
+
1882
+ return df
1883
+
1884
+
1885
+ def measure_radial_distance_to_center(
1886
+ df,
1887
+ volume,
1888
+ column_labels={
1889
+ "track": "TRACK_ID",
1890
+ "time": "FRAME",
1891
+ "x": "POSITION_X",
1892
+ "y": "POSITION_Y",
1893
+ },
1894
+ ):
1895
+
1896
+ try:
1897
+ df["radial_distance"] = np.sqrt(
1898
+ (df[column_labels["x"]] - volume[0] / 2) ** 2
1899
+ + (df[column_labels["y"]] - volume[1] / 2) ** 2
1900
+ )
1901
+ except Exception as e:
1902
+ print(f"{e=}")
1903
+
1904
+ return df
1905
+
1493
1906
 
1494
1907
  def center_of_mass_to_abs_coordinates(df):
1495
-
1496
- center_of_mass_x_cols = [c for c in list(df.columns) if c.endswith('centre_of_mass_x')]
1497
- center_of_mass_y_cols = [c for c in list(df.columns) if c.endswith('centre_of_mass_y')]
1498
- for c in center_of_mass_x_cols:
1499
- df.loc[:,c.replace('_x','_POSITION_X')] = df[c] + df['POSITION_X']
1500
- for c in center_of_mass_y_cols:
1501
- df.loc[:,c.replace('_y','_POSITION_Y')] = df[c] + df['POSITION_Y']
1502
- df = df.drop(columns = center_of_mass_x_cols+center_of_mass_y_cols)
1503
-
1504
- return df
1908
+
1909
+ center_of_mass_x_cols = [
1910
+ c for c in list(df.columns) if c.endswith("centre_of_mass_x")
1911
+ ]
1912
+ center_of_mass_y_cols = [
1913
+ c for c in list(df.columns) if c.endswith("centre_of_mass_y")
1914
+ ]
1915
+ for c in center_of_mass_x_cols:
1916
+ df.loc[:, c.replace("_x", "_POSITION_X")] = df[c] + df["POSITION_X"]
1917
+ for c in center_of_mass_y_cols:
1918
+ df.loc[:, c.replace("_y", "_POSITION_Y")] = df[c] + df["POSITION_Y"]
1919
+ df = df.drop(columns=center_of_mass_x_cols + center_of_mass_y_cols)
1920
+
1921
+ return df