celldetective 1.4.2__py3-none-any.whl → 1.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. celldetective/__init__.py +25 -0
  2. celldetective/__main__.py +62 -43
  3. celldetective/_version.py +1 -1
  4. celldetective/extra_properties.py +477 -399
  5. celldetective/filters.py +192 -97
  6. celldetective/gui/InitWindow.py +541 -411
  7. celldetective/gui/__init__.py +0 -15
  8. celldetective/gui/about.py +44 -39
  9. celldetective/gui/analyze_block.py +120 -84
  10. celldetective/gui/base/__init__.py +0 -0
  11. celldetective/gui/base/channel_norm_generator.py +335 -0
  12. celldetective/gui/base/components.py +249 -0
  13. celldetective/gui/base/feature_choice.py +92 -0
  14. celldetective/gui/base/figure_canvas.py +52 -0
  15. celldetective/gui/base/list_widget.py +133 -0
  16. celldetective/gui/{styles.py → base/styles.py} +92 -36
  17. celldetective/gui/base/utils.py +33 -0
  18. celldetective/gui/base_annotator.py +900 -767
  19. celldetective/gui/classifier_widget.py +6 -22
  20. celldetective/gui/configure_new_exp.py +777 -671
  21. celldetective/gui/control_panel.py +635 -524
  22. celldetective/gui/dynamic_progress.py +449 -0
  23. celldetective/gui/event_annotator.py +2023 -1662
  24. celldetective/gui/generic_signal_plot.py +1292 -944
  25. celldetective/gui/gui_utils.py +899 -1289
  26. celldetective/gui/interactions_block.py +658 -0
  27. celldetective/gui/interactive_timeseries_viewer.py +447 -0
  28. celldetective/gui/json_readers.py +48 -15
  29. celldetective/gui/layouts/__init__.py +5 -0
  30. celldetective/gui/layouts/background_model_free_layout.py +537 -0
  31. celldetective/gui/layouts/channel_offset_layout.py +134 -0
  32. celldetective/gui/layouts/local_correction_layout.py +91 -0
  33. celldetective/gui/layouts/model_fit_layout.py +372 -0
  34. celldetective/gui/layouts/operation_layout.py +68 -0
  35. celldetective/gui/layouts/protocol_designer_layout.py +96 -0
  36. celldetective/gui/pair_event_annotator.py +3130 -2435
  37. celldetective/gui/plot_measurements.py +586 -267
  38. celldetective/gui/plot_signals_ui.py +724 -506
  39. celldetective/gui/preprocessing_block.py +395 -0
  40. celldetective/gui/process_block.py +1678 -1831
  41. celldetective/gui/seg_model_loader.py +580 -473
  42. celldetective/gui/settings/__init__.py +0 -7
  43. celldetective/gui/settings/_cellpose_model_params.py +181 -0
  44. celldetective/gui/settings/_event_detection_model_params.py +95 -0
  45. celldetective/gui/settings/_segmentation_model_params.py +159 -0
  46. celldetective/gui/settings/_settings_base.py +77 -65
  47. celldetective/gui/settings/_settings_event_model_training.py +752 -526
  48. celldetective/gui/settings/_settings_measurements.py +1133 -964
  49. celldetective/gui/settings/_settings_neighborhood.py +574 -488
  50. celldetective/gui/settings/_settings_segmentation_model_training.py +779 -564
  51. celldetective/gui/settings/_settings_signal_annotator.py +329 -305
  52. celldetective/gui/settings/_settings_tracking.py +1304 -1094
  53. celldetective/gui/settings/_stardist_model_params.py +98 -0
  54. celldetective/gui/survival_ui.py +422 -312
  55. celldetective/gui/tableUI.py +1665 -1701
  56. celldetective/gui/table_ops/_maths.py +295 -0
  57. celldetective/gui/table_ops/_merge_groups.py +140 -0
  58. celldetective/gui/table_ops/_merge_one_hot.py +95 -0
  59. celldetective/gui/table_ops/_query_table.py +43 -0
  60. celldetective/gui/table_ops/_rename_col.py +44 -0
  61. celldetective/gui/thresholds_gui.py +382 -179
  62. celldetective/gui/viewers/__init__.py +0 -0
  63. celldetective/gui/viewers/base_viewer.py +700 -0
  64. celldetective/gui/viewers/channel_offset_viewer.py +331 -0
  65. celldetective/gui/viewers/contour_viewer.py +394 -0
  66. celldetective/gui/viewers/size_viewer.py +153 -0
  67. celldetective/gui/viewers/spot_detection_viewer.py +341 -0
  68. celldetective/gui/viewers/threshold_viewer.py +309 -0
  69. celldetective/gui/workers.py +403 -126
  70. celldetective/log_manager.py +92 -0
  71. celldetective/measure.py +1895 -1478
  72. celldetective/napari/__init__.py +0 -0
  73. celldetective/napari/utils.py +1025 -0
  74. celldetective/neighborhood.py +1914 -1448
  75. celldetective/preprocessing.py +1620 -1220
  76. celldetective/processes/__init__.py +0 -0
  77. celldetective/processes/background_correction.py +271 -0
  78. celldetective/processes/compute_neighborhood.py +894 -0
  79. celldetective/processes/detect_events.py +246 -0
  80. celldetective/processes/downloader.py +137 -0
  81. celldetective/processes/measure_cells.py +565 -0
  82. celldetective/processes/segment_cells.py +760 -0
  83. celldetective/processes/track_cells.py +435 -0
  84. celldetective/processes/train_segmentation_model.py +694 -0
  85. celldetective/processes/train_signal_model.py +265 -0
  86. celldetective/processes/unified_process.py +292 -0
  87. celldetective/regionprops/_regionprops.py +358 -317
  88. celldetective/relative_measurements.py +987 -710
  89. celldetective/scripts/measure_cells.py +313 -212
  90. celldetective/scripts/measure_relative.py +90 -46
  91. celldetective/scripts/segment_cells.py +165 -104
  92. celldetective/scripts/segment_cells_thresholds.py +96 -68
  93. celldetective/scripts/track_cells.py +198 -149
  94. celldetective/scripts/train_segmentation_model.py +324 -201
  95. celldetective/scripts/train_signal_model.py +87 -45
  96. celldetective/segmentation.py +844 -749
  97. celldetective/signals.py +3514 -2861
  98. celldetective/tracking.py +30 -15
  99. celldetective/utils/__init__.py +0 -0
  100. celldetective/utils/cellpose_utils/__init__.py +133 -0
  101. celldetective/utils/color_mappings.py +42 -0
  102. celldetective/utils/data_cleaning.py +630 -0
  103. celldetective/utils/data_loaders.py +450 -0
  104. celldetective/utils/dataset_helpers.py +207 -0
  105. celldetective/utils/downloaders.py +235 -0
  106. celldetective/utils/event_detection/__init__.py +8 -0
  107. celldetective/utils/experiment.py +1782 -0
  108. celldetective/utils/image_augmenters.py +308 -0
  109. celldetective/utils/image_cleaning.py +74 -0
  110. celldetective/utils/image_loaders.py +926 -0
  111. celldetective/utils/image_transforms.py +335 -0
  112. celldetective/utils/io.py +62 -0
  113. celldetective/utils/mask_cleaning.py +348 -0
  114. celldetective/utils/mask_transforms.py +5 -0
  115. celldetective/utils/masks.py +184 -0
  116. celldetective/utils/maths.py +351 -0
  117. celldetective/utils/model_getters.py +325 -0
  118. celldetective/utils/model_loaders.py +296 -0
  119. celldetective/utils/normalization.py +380 -0
  120. celldetective/utils/parsing.py +465 -0
  121. celldetective/utils/plots/__init__.py +0 -0
  122. celldetective/utils/plots/regression.py +53 -0
  123. celldetective/utils/resources.py +34 -0
  124. celldetective/utils/stardist_utils/__init__.py +104 -0
  125. celldetective/utils/stats.py +90 -0
  126. celldetective/utils/types.py +21 -0
  127. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/METADATA +1 -1
  128. celldetective-1.5.0b1.dist-info/RECORD +187 -0
  129. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/WHEEL +1 -1
  130. tests/gui/test_new_project.py +129 -117
  131. tests/gui/test_project.py +127 -79
  132. tests/test_filters.py +39 -15
  133. tests/test_notebooks.py +8 -0
  134. tests/test_tracking.py +232 -13
  135. tests/test_utils.py +123 -77
  136. celldetective/gui/base_components.py +0 -23
  137. celldetective/gui/layouts.py +0 -1602
  138. celldetective/gui/processes/compute_neighborhood.py +0 -594
  139. celldetective/gui/processes/downloader.py +0 -111
  140. celldetective/gui/processes/measure_cells.py +0 -360
  141. celldetective/gui/processes/segment_cells.py +0 -499
  142. celldetective/gui/processes/track_cells.py +0 -303
  143. celldetective/gui/processes/train_segmentation_model.py +0 -270
  144. celldetective/gui/processes/train_signal_model.py +0 -108
  145. celldetective/gui/table_ops/merge_groups.py +0 -118
  146. celldetective/gui/viewers.py +0 -1354
  147. celldetective/io.py +0 -3663
  148. celldetective/utils.py +0 -3108
  149. celldetective-1.4.2.dist-info/RECORD +0 -123
  150. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/entry_points.txt +0 -0
  151. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/licenses/LICENSE +0 -0
  152. {celldetective-1.4.2.dist-info → celldetective-1.5.0b1.dist-info}/top_level.txt +0 -0
@@ -3,1462 +3,1917 @@ import pandas as pd
3
3
  from tqdm import tqdm
4
4
  from skimage.graph import pixel_graph
5
5
  import os
6
- from celldetective.utils import contour_of_instance_segmentation, extract_identity_col
6
+ from celldetective.utils.masks import contour_of_instance_segmentation
7
+ from celldetective.utils.data_cleaning import extract_identity_col
7
8
  from scipy.spatial.distance import cdist
8
- from celldetective.io import locate_labels, get_position_pickle, get_position_table
9
-
10
- abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], 'celldetective'])
11
-
12
- def _fill_distance_neighborhood_at_t(time_index, setA, setB, dist_map, attention_weight=None, include_dead_weight=False, symmetrize=False, compute_cum_sum=False,
13
- weights=None, closest_A=None, neigh_col="", column_labelsA=None, column_labelsB=None, statusA=None, statusB=None, distance=10):
14
-
15
- index_A = list(setA.loc[setA[column_labelsA['time']] == time_index].index)
16
- index_B = list(setB.loc[setB[column_labelsB['time']] == time_index].index)
17
-
18
- dataA = setA.loc[setA[column_labelsA['time']] == time_index, [column_labelsA['x'], column_labelsA['y'], column_labelsA['track'],statusA]].to_numpy()
19
-
20
- ids_A = dataA[:, 2]
21
- status_A = dataA[:, 3]
22
-
23
- dataB = setB.loc[setB[column_labelsB['time']] == time_index, [column_labelsB['x'], column_labelsB['y'], column_labelsB['track'],statusB]].to_numpy()
24
- ids_B = dataB[:, 2]
25
- status_B = dataB[:, 3]
26
-
27
- for k in range(dist_map.shape[0]):
28
-
29
- col = dist_map[k, :]
30
- col[col == 0.] = 1.0E06
31
-
32
- neighs_B = np.array([ids_B[i] for i in np.where((col <= distance))[0]])
33
- status_neigh_B = np.array([status_B[i] for i in np.where((col <= distance))[0]])
34
- dist_B = [round(col[i], 2) for i in np.where((col <= distance))[0]]
35
- if len(dist_B) > 0:
36
- closest_B_cell = neighs_B[np.argmin(dist_B)]
37
-
38
- if symmetrize and attention_weight:
39
- n_neighs = float(len(neighs_B))
40
- if not include_dead_weight:
41
- n_neighs_alive = len(np.where(status_neigh_B == 1)[0])
42
- neigh_count = n_neighs_alive
43
- else:
44
- neigh_count = n_neighs
45
- if neigh_count > 0:
46
- weight_A = 1. / neigh_count
47
- else:
48
- weight_A = np.nan
49
-
50
- if not include_dead_weight and status_A[k] == 0:
51
- weight_A = 0
52
-
53
- neighs = []
54
- setA.at[index_A[k], neigh_col] = []
55
- for n in range(len(neighs_B)):
56
-
57
- # index in setB
58
- n_index = np.where(ids_B == neighs_B[n])[0][0]
59
- # Assess if neigh B is closest to A
60
- if attention_weight:
61
- if closest_A[n_index] == ids_A[k]:
62
- closest = True
63
- else:
64
- closest = False
65
-
66
- if symmetrize:
67
- # Load neighborhood previous data
68
- sym_neigh = setB.loc[index_B[n_index], neigh_col]
69
- if neighs_B[n] == closest_B_cell:
70
- closest_b = True
71
- else:
72
- closest_b = False
73
- if isinstance(sym_neigh, list):
74
- sym_neigh.append({'id': ids_A[k], 'distance': dist_B[n], 'status': status_A[k]})
75
- else:
76
- sym_neigh = [{'id': ids_A[k], 'distance': dist_B[n], 'status': status_A[k]}]
77
- if attention_weight:
78
- sym_neigh[-1].update({'weight': weight_A, 'closest': closest_b})
79
-
80
- # Write the minimum info about neighborhing cell B
81
- neigh_dico = {'id': neighs_B[n], 'distance': dist_B[n], 'status': status_neigh_B[n]}
82
- if attention_weight:
83
- neigh_dico.update({'weight': weights[n_index], 'closest': closest})
84
-
85
- if compute_cum_sum:
86
- # Compute the integrated presence of the neighboring cell B
87
- assert column_labelsB[
88
- 'track'] == 'TRACK_ID', 'The set B does not seem to contain tracked data. The cumulative time will be meaningless.'
89
- past_neighs = [[ll['id'] for ll in l] if len(l) > 0 else [None] for l in setA.loc[
90
- (setA[column_labelsA['track']] == ids_A[k]) & (setA[column_labelsA['time']] <= time_index), neigh_col].to_numpy()]
91
- past_neighs = [item for sublist in past_neighs for item in sublist]
92
-
93
- if attention_weight:
94
- past_weights = [[ll['weight'] for ll in l] if len(l) > 0 else [None] for l in setA.loc[
95
- (setA[column_labelsA['track']] == ids_A[k]) & (
96
- setA[column_labelsA['time']] <= time_index), neigh_col].to_numpy()]
97
- past_weights = [item for sublist in past_weights for item in sublist]
98
-
99
- cum_sum = len(np.where(past_neighs == neighs_B[n])[0])
100
- neigh_dico.update({'cumulated_presence': cum_sum + 1})
101
-
102
- if attention_weight:
103
- cum_sum_weighted = np.sum(
104
- [w if l == neighs_B[n] else 0 for l, w in zip(past_neighs, past_weights)])
105
- neigh_dico.update({'cumulated_presence_weighted': cum_sum_weighted + weights[n_index]})
106
-
107
- if symmetrize:
108
- setB.at[index_B[n_index], neigh_col] = sym_neigh
109
-
110
- neighs.append(neigh_dico)
111
-
112
- setA.at[index_A[k], neigh_col] = neighs
113
-
114
- def _fill_contact_neighborhood_at_t(time_index, setA, setB, dist_map, intersection_map=None, attention_weight=None, include_dead_weight=False, symmetrize=False, compute_cum_sum=False,
115
- weights=None, closest_A=None, neigh_col="", column_labelsA=None, column_labelsB=None, statusA=None, statusB=None, d_filter=10):
116
-
117
- index_A = list(setA.loc[setA[column_labelsA['time']] == time_index].index)
118
- index_B = list(setB.loc[setB[column_labelsB['time']] == time_index].index)
119
-
120
- dataA = setA.loc[setA[column_labelsA['time']] == time_index, [column_labelsA['x'], column_labelsA['y'], column_labelsA['track'], column_labelsA['mask_id'],
121
- statusA]].to_numpy()
122
-
123
- ids_A = dataA[:, 2]
124
- status_A = dataA[:, 4]
125
-
126
- dataB = setB.loc[setB[column_labelsB['time']] == time_index, [column_labelsB['x'], column_labelsB['y'], column_labelsB['track'], column_labelsB['mask_id'],
127
- statusB]].to_numpy()
128
- ids_B = dataB[:, 2]
129
- status_B = dataB[:, 4]
130
-
131
- for k in range(dist_map.shape[0]):
132
-
133
- col = dist_map[k, :]
134
- col_inter = intersection_map[k, :]
135
- col[col == 0.] = 1.0E06
136
-
137
- neighs_B = np.array([ids_B[i] for i in np.where((col <= d_filter))[0]])
138
- status_neigh_B = np.array([status_B[i] for i in np.where((col <= d_filter))[0]])
139
- dist_B = [round(col[i], 2) for i in np.where((col <= d_filter))[0]]
140
- intersect_B = [round(col_inter[i], 2) for i in np.where((col <= d_filter))[0]]
141
-
142
- if len(dist_B) > 0:
143
- closest_B_cell = neighs_B[np.argmin(dist_B)]
144
-
145
- if symmetrize and attention_weight:
146
- n_neighs = float(len(neighs_B))
147
- if not include_dead_weight:
148
- n_neighs_alive = len(np.where(status_neigh_B == 1)[0])
149
- neigh_count = n_neighs_alive
150
- else:
151
- neigh_count = n_neighs
152
- if neigh_count > 0:
153
- weight_A = 1. / neigh_count
154
- else:
155
- weight_A = np.nan
156
-
157
- if not include_dead_weight and status_A[k] == 0:
158
- weight_A = 0
159
-
160
- neighs = []
161
- setA.at[index_A[k], neigh_col] = []
162
- for n in range(len(neighs_B)):
163
-
164
- # index in setB
165
- n_index = np.where(ids_B == neighs_B[n])[0][0]
166
- # Assess if neigh B is closest to A
167
- if attention_weight:
168
- if closest_A[n_index] == ids_A[k]:
169
- closest = True
170
- else:
171
- closest = False
172
-
173
- if symmetrize:
174
- # Load neighborhood previous data
175
- sym_neigh = setB.loc[index_B[n_index], neigh_col]
176
- if neighs_B[n] == closest_B_cell:
177
- closest_b = True
178
- else:
179
- closest_b = False
180
- if isinstance(sym_neigh, list):
181
- sym_neigh.append({'id': ids_A[k], 'distance': dist_B[n], 'status': status_A[k],
182
- 'intersection': intersect_B[n]})
183
- else:
184
- sym_neigh = [{'id': ids_A[k], 'distance': dist_B[n], 'status': status_A[k],
185
- 'intersection': intersect_B[n]}]
186
- if attention_weight:
187
- sym_neigh[-1].update({'weight': weight_A, 'closest': closest_b})
188
-
189
- # Write the minimum info about neighborhing cell B
190
- neigh_dico = {'id': neighs_B[n], 'distance': dist_B[n], 'status': status_neigh_B[n],
191
- 'intersection': intersect_B[n]}
192
- if attention_weight:
193
- neigh_dico.update({'weight': weights[n_index], 'closest': closest})
194
-
195
- if compute_cum_sum:
196
- # Compute the integrated presence of the neighboring cell B
197
- assert column_labelsB[
198
- 'track'] == 'TRACK_ID', 'The set B does not seem to contain tracked data. The cumulative time will be meaningless.'
199
- past_neighs = [[ll['id'] for ll in l] if len(l) > 0 else [None] for l in setA.loc[
200
- (setA[column_labelsA['track']] == ids_A[k]) & (
201
- setA[column_labelsA['time']] <= time_index), neigh_col].to_numpy()]
202
- past_neighs = [item for sublist in past_neighs for item in sublist]
203
-
204
- if attention_weight:
205
- past_weights = [[ll['weight'] for ll in l] if len(l) > 0 else [None] for l in
206
- setA.loc[
207
- (setA[column_labelsA['track']] == ids_A[k]) & (
208
- setA[column_labelsA['time']] <= time_index), neigh_col].to_numpy()]
209
- past_weights = [item for sublist in past_weights for item in sublist]
210
-
211
- cum_sum = len(np.where(past_neighs == neighs_B[n])[0])
212
- neigh_dico.update({'cumulated_presence': cum_sum + 1})
213
-
214
- if attention_weight:
215
- cum_sum_weighted = np.sum(
216
- [w if l == neighs_B[n] else 0 for l, w in zip(past_neighs, past_weights)])
217
- neigh_dico.update(
218
- {'cumulated_presence_weighted': cum_sum_weighted + weights[n_index]})
219
-
220
- if symmetrize:
221
- setB.at[index_B[n_index], neigh_col] = sym_neigh
222
-
223
- neighs.append(neigh_dico)
224
-
225
- setA.at[index_A[k], neigh_col] = neighs
226
-
227
-
228
- def _compute_mask_contact_dist_map(setA, setB, labelsA, labelsB=None, distance=10, mode="self", column_labelsA=None, column_labelsB=None):
229
-
230
- coordinates_A = setA.loc[:, [column_labelsA['x'], column_labelsA['y']]].to_numpy()
231
- coordinates_B = setB.loc[:, [column_labelsB['x'], column_labelsB['y']]].to_numpy()
232
- ids_A = setA.loc[:, column_labelsA["track"]].to_numpy()
233
- ids_B = setB.loc[:, column_labelsB["track"]].to_numpy()
234
- mask_ids_A = setA.loc[:, column_labelsA["mask_id"]].to_numpy()
235
- mask_ids_B = setB.loc[:, column_labelsB["mask_id"]].to_numpy()
236
-
237
- # compute distance matrix
238
- dist_map = cdist(coordinates_A, coordinates_B, metric="euclidean")
239
- intersection_map = np.zeros_like(dist_map).astype(float)
240
-
241
- # Do the mask contact computation
242
- labelsA = np.where(np.isin(labelsA, mask_ids_A), labelsA.copy(), 0.)
243
-
244
- if labelsB is not None:
245
- labelsB = np.where(np.isin(labelsB, mask_ids_B), labelsB.copy(), 0.)
246
-
247
- contact_pairs = contact_neighborhood(labelsA, labelsB=labelsB, border=distance, connectivity=2)
248
-
249
- # Put infinite distance to all non-contact pairs (something like this)
250
- flatA = labelsA.flatten()
251
- if labelsB is not None:
252
- flatB = labelsB.flatten()
253
-
254
- if len(contact_pairs) > 0:
255
- mask = np.ones_like(dist_map).astype(bool)
256
-
257
- indices_to_keep = []
258
- for cp in contact_pairs:
259
-
260
- cp = np.abs(cp)
261
- mask_A, mask_B = cp
262
- idx_A = np.where(mask_ids_A == int(mask_A))[0][0]
263
- idx_B = np.where(mask_ids_B == int(mask_B))[0][0]
264
-
265
- intersection = 0
266
- if labelsB is not None:
267
- intersection = len(flatA[(flatA == int(mask_A)) & (flatB == int(mask_B))])
268
-
269
- indices_to_keep.append([idx_A, idx_B, intersection])
270
- print(f'Ref cell #{ids_A[idx_A]} matched with neigh. cell #{ids_B[idx_B]}...')
271
- print(f'Computed intersection: {intersection} px...')
272
-
273
- if len(indices_to_keep) > 0:
274
- indices_to_keep = np.array(indices_to_keep)
275
- mask[indices_to_keep[:, 0], indices_to_keep[:, 1]] = False
276
- if mode == 'self':
277
- mask[indices_to_keep[:, 1], indices_to_keep[:, 0]] = False
278
- dist_map[mask] = 1.0E06
279
- intersection_map[indices_to_keep[:, 0], indices_to_keep[:, 1]] = indices_to_keep[:, 2]
280
- else:
281
- dist_map[:, :] = 1.0E06
282
- else:
283
- dist_map[:, :] = 1.0E06
284
-
285
- return dist_map, intersection_map
9
+ from celldetective.utils.image_loaders import locate_labels
10
+ from celldetective.utils.data_loaders import get_position_table, get_position_pickle
11
+
12
+ abs_path = os.sep.join(
13
+ [os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], "celldetective"]
14
+ )
15
+
16
+
17
+ def _fill_distance_neighborhood_at_t(
18
+ time_index,
19
+ setA,
20
+ setB,
21
+ dist_map,
22
+ attention_weight=None,
23
+ include_dead_weight=False,
24
+ symmetrize=False,
25
+ compute_cum_sum=False,
26
+ weights=None,
27
+ closest_A=None,
28
+ neigh_col="",
29
+ column_labelsA=None,
30
+ column_labelsB=None,
31
+ statusA=None,
32
+ statusB=None,
33
+ distance=10,
34
+ ):
35
+
36
+ index_A = list(setA.loc[setA[column_labelsA["time"]] == time_index].index)
37
+ index_B = list(setB.loc[setB[column_labelsB["time"]] == time_index].index)
38
+
39
+ dataA = setA.loc[
40
+ setA[column_labelsA["time"]] == time_index,
41
+ [column_labelsA["x"], column_labelsA["y"], column_labelsA["track"], statusA],
42
+ ].to_numpy()
43
+
44
+ ids_A = dataA[:, 2]
45
+ status_A = dataA[:, 3]
46
+
47
+ dataB = setB.loc[
48
+ setB[column_labelsB["time"]] == time_index,
49
+ [column_labelsB["x"], column_labelsB["y"], column_labelsB["track"], statusB],
50
+ ].to_numpy()
51
+ ids_B = dataB[:, 2]
52
+ status_B = dataB[:, 3]
53
+
54
+ for k in range(dist_map.shape[0]):
55
+
56
+ col = dist_map[k, :]
57
+ col[col == 0.0] = 1.0e06
58
+
59
+ neighs_B = np.array([ids_B[i] for i in np.where((col <= distance))[0]])
60
+ status_neigh_B = np.array([status_B[i] for i in np.where((col <= distance))[0]])
61
+ dist_B = [round(col[i], 2) for i in np.where((col <= distance))[0]]
62
+ if len(dist_B) > 0:
63
+ closest_B_cell = neighs_B[np.argmin(dist_B)]
64
+
65
+ if symmetrize and attention_weight:
66
+ n_neighs = float(len(neighs_B))
67
+ if not include_dead_weight:
68
+ n_neighs_alive = len(np.where(status_neigh_B == 1)[0])
69
+ neigh_count = n_neighs_alive
70
+ else:
71
+ neigh_count = n_neighs
72
+ if neigh_count > 0:
73
+ weight_A = 1.0 / neigh_count
74
+ else:
75
+ weight_A = np.nan
76
+
77
+ if not include_dead_weight and status_A[k] == 0:
78
+ weight_A = 0
79
+
80
+ neighs = []
81
+ setA.at[index_A[k], neigh_col] = []
82
+ for n in range(len(neighs_B)):
83
+
84
+ # index in setB
85
+ n_index = np.where(ids_B == neighs_B[n])[0][0]
86
+ # Assess if neigh B is closest to A
87
+ if attention_weight:
88
+ if closest_A[n_index] == ids_A[k]:
89
+ closest = True
90
+ else:
91
+ closest = False
92
+
93
+ if symmetrize:
94
+ # Load neighborhood previous data
95
+ sym_neigh = setB.loc[index_B[n_index], neigh_col]
96
+ if neighs_B[n] == closest_B_cell:
97
+ closest_b = True
98
+ else:
99
+ closest_b = False
100
+ if isinstance(sym_neigh, list):
101
+ sym_neigh.append(
102
+ {"id": ids_A[k], "distance": dist_B[n], "status": status_A[k]}
103
+ )
104
+ else:
105
+ sym_neigh = [
106
+ {"id": ids_A[k], "distance": dist_B[n], "status": status_A[k]}
107
+ ]
108
+ if attention_weight:
109
+ sym_neigh[-1].update({"weight": weight_A, "closest": closest_b})
110
+
111
+ # Write the minimum info about neighborhing cell B
112
+ neigh_dico = {
113
+ "id": neighs_B[n],
114
+ "distance": dist_B[n],
115
+ "status": status_neigh_B[n],
116
+ }
117
+ if attention_weight:
118
+ neigh_dico.update({"weight": weights[n_index], "closest": closest})
119
+
120
+ if compute_cum_sum:
121
+ # Compute the integrated presence of the neighboring cell B
122
+ assert (
123
+ column_labelsB["track"] == "TRACK_ID"
124
+ ), "The set B does not seem to contain tracked data. The cumulative time will be meaningless."
125
+ past_neighs = [
126
+ [ll["id"] for ll in l] if len(l) > 0 else [None]
127
+ for l in setA.loc[
128
+ (setA[column_labelsA["track"]] == ids_A[k])
129
+ & (setA[column_labelsA["time"]] <= time_index),
130
+ neigh_col,
131
+ ].to_numpy()
132
+ ]
133
+ past_neighs = [item for sublist in past_neighs for item in sublist]
134
+
135
+ if attention_weight:
136
+ past_weights = [
137
+ [ll["weight"] for ll in l] if len(l) > 0 else [None]
138
+ for l in setA.loc[
139
+ (setA[column_labelsA["track"]] == ids_A[k])
140
+ & (setA[column_labelsA["time"]] <= time_index),
141
+ neigh_col,
142
+ ].to_numpy()
143
+ ]
144
+ past_weights = [
145
+ item for sublist in past_weights for item in sublist
146
+ ]
147
+
148
+ cum_sum = len(np.where(past_neighs == neighs_B[n])[0])
149
+ neigh_dico.update({"cumulated_presence": cum_sum + 1})
150
+
151
+ if attention_weight:
152
+ cum_sum_weighted = np.sum(
153
+ [
154
+ w if l == neighs_B[n] else 0
155
+ for l, w in zip(past_neighs, past_weights)
156
+ ]
157
+ )
158
+ neigh_dico.update(
159
+ {
160
+ "cumulated_presence_weighted": cum_sum_weighted
161
+ + weights[n_index]
162
+ }
163
+ )
164
+
165
+ if symmetrize:
166
+ setB.at[index_B[n_index], neigh_col] = sym_neigh
167
+
168
+ neighs.append(neigh_dico)
169
+
170
+ setA.at[index_A[k], neigh_col] = neighs
171
+
172
+
173
+ def _fill_contact_neighborhood_at_t(
174
+ time_index,
175
+ setA,
176
+ setB,
177
+ dist_map,
178
+ intersection_map=None,
179
+ attention_weight=None,
180
+ include_dead_weight=False,
181
+ symmetrize=False,
182
+ compute_cum_sum=False,
183
+ weights=None,
184
+ closest_A=None,
185
+ neigh_col="",
186
+ column_labelsA=None,
187
+ column_labelsB=None,
188
+ statusA=None,
189
+ statusB=None,
190
+ d_filter=10,
191
+ ):
192
+
193
+ index_A = list(setA.loc[setA[column_labelsA["time"]] == time_index].index)
194
+ index_B = list(setB.loc[setB[column_labelsB["time"]] == time_index].index)
195
+
196
+ dataA = setA.loc[
197
+ setA[column_labelsA["time"]] == time_index,
198
+ [
199
+ column_labelsA["x"],
200
+ column_labelsA["y"],
201
+ column_labelsA["track"],
202
+ column_labelsA["mask_id"],
203
+ statusA,
204
+ ],
205
+ ].to_numpy()
206
+
207
+ ids_A = dataA[:, 2]
208
+ status_A = dataA[:, 4]
209
+
210
+ dataB = setB.loc[
211
+ setB[column_labelsB["time"]] == time_index,
212
+ [
213
+ column_labelsB["x"],
214
+ column_labelsB["y"],
215
+ column_labelsB["track"],
216
+ column_labelsB["mask_id"],
217
+ statusB,
218
+ ],
219
+ ].to_numpy()
220
+ ids_B = dataB[:, 2]
221
+ status_B = dataB[:, 4]
222
+
223
+ for k in range(dist_map.shape[0]):
224
+
225
+ col = dist_map[k, :]
226
+ col_inter = intersection_map[k, :]
227
+ col[col == 0.0] = 1.0e06
228
+
229
+ neighs_B = np.array([ids_B[i] for i in np.where((col <= d_filter))[0]])
230
+ status_neigh_B = np.array([status_B[i] for i in np.where((col <= d_filter))[0]])
231
+ dist_B = [round(col[i], 2) for i in np.where((col <= d_filter))[0]]
232
+ intersect_B = [round(col_inter[i], 2) for i in np.where((col <= d_filter))[0]]
233
+
234
+ if len(dist_B) > 0:
235
+ closest_B_cell = neighs_B[np.argmin(dist_B)]
236
+
237
+ if symmetrize and attention_weight:
238
+ n_neighs = float(len(neighs_B))
239
+ if not include_dead_weight:
240
+ n_neighs_alive = len(np.where(status_neigh_B == 1)[0])
241
+ neigh_count = n_neighs_alive
242
+ else:
243
+ neigh_count = n_neighs
244
+ if neigh_count > 0:
245
+ weight_A = 1.0 / neigh_count
246
+ else:
247
+ weight_A = np.nan
248
+
249
+ if not include_dead_weight and status_A[k] == 0:
250
+ weight_A = 0
251
+
252
+ neighs = []
253
+ setA.at[index_A[k], neigh_col] = []
254
+ for n in range(len(neighs_B)):
255
+
256
+ # index in setB
257
+ n_index = np.where(ids_B == neighs_B[n])[0][0]
258
+ # Assess if neigh B is closest to A
259
+ if attention_weight:
260
+ if closest_A[n_index] == ids_A[k]:
261
+ closest = True
262
+ else:
263
+ closest = False
264
+
265
+ if symmetrize:
266
+ # Load neighborhood previous data
267
+ sym_neigh = setB.loc[index_B[n_index], neigh_col]
268
+ if neighs_B[n] == closest_B_cell:
269
+ closest_b = True
270
+ else:
271
+ closest_b = False
272
+ if isinstance(sym_neigh, list):
273
+ sym_neigh.append(
274
+ {
275
+ "id": ids_A[k],
276
+ "distance": dist_B[n],
277
+ "status": status_A[k],
278
+ "intersection": intersect_B[n],
279
+ }
280
+ )
281
+ else:
282
+ sym_neigh = [
283
+ {
284
+ "id": ids_A[k],
285
+ "distance": dist_B[n],
286
+ "status": status_A[k],
287
+ "intersection": intersect_B[n],
288
+ }
289
+ ]
290
+ if attention_weight:
291
+ sym_neigh[-1].update({"weight": weight_A, "closest": closest_b})
292
+
293
+ # Write the minimum info about neighborhing cell B
294
+ neigh_dico = {
295
+ "id": neighs_B[n],
296
+ "distance": dist_B[n],
297
+ "status": status_neigh_B[n],
298
+ "intersection": intersect_B[n],
299
+ }
300
+ if attention_weight:
301
+ neigh_dico.update({"weight": weights[n_index], "closest": closest})
302
+
303
+ if compute_cum_sum:
304
+ # Compute the integrated presence of the neighboring cell B
305
+ assert (
306
+ column_labelsB["track"] == "TRACK_ID"
307
+ ), "The set B does not seem to contain tracked data. The cumulative time will be meaningless."
308
+ past_neighs = [
309
+ [ll["id"] for ll in l] if len(l) > 0 else [None]
310
+ for l in setA.loc[
311
+ (setA[column_labelsA["track"]] == ids_A[k])
312
+ & (setA[column_labelsA["time"]] <= time_index),
313
+ neigh_col,
314
+ ].to_numpy()
315
+ ]
316
+ past_neighs = [item for sublist in past_neighs for item in sublist]
317
+
318
+ if attention_weight:
319
+ past_weights = [
320
+ [ll["weight"] for ll in l] if len(l) > 0 else [None]
321
+ for l in setA.loc[
322
+ (setA[column_labelsA["track"]] == ids_A[k])
323
+ & (setA[column_labelsA["time"]] <= time_index),
324
+ neigh_col,
325
+ ].to_numpy()
326
+ ]
327
+ past_weights = [
328
+ item for sublist in past_weights for item in sublist
329
+ ]
330
+
331
+ cum_sum = len(np.where(past_neighs == neighs_B[n])[0])
332
+ neigh_dico.update({"cumulated_presence": cum_sum + 1})
333
+
334
+ if attention_weight:
335
+ cum_sum_weighted = np.sum(
336
+ [
337
+ w if l == neighs_B[n] else 0
338
+ for l, w in zip(past_neighs, past_weights)
339
+ ]
340
+ )
341
+ neigh_dico.update(
342
+ {
343
+ "cumulated_presence_weighted": cum_sum_weighted
344
+ + weights[n_index]
345
+ }
346
+ )
347
+
348
+ if symmetrize:
349
+ setB.at[index_B[n_index], neigh_col] = sym_neigh
350
+
351
+ neighs.append(neigh_dico)
352
+
353
+ setA.at[index_A[k], neigh_col] = neighs
354
+
355
+
356
+ def _compute_mask_contact_dist_map(
357
+ setA,
358
+ setB,
359
+ labelsA,
360
+ labelsB=None,
361
+ distance=10,
362
+ mode="self",
363
+ column_labelsA=None,
364
+ column_labelsB=None,
365
+ ):
366
+
367
+ coordinates_A = setA.loc[:, [column_labelsA["x"], column_labelsA["y"]]].to_numpy()
368
+ coordinates_B = setB.loc[:, [column_labelsB["x"], column_labelsB["y"]]].to_numpy()
369
+ ids_A = setA.loc[:, column_labelsA["track"]].to_numpy()
370
+ ids_B = setB.loc[:, column_labelsB["track"]].to_numpy()
371
+ mask_ids_A = setA.loc[:, column_labelsA["mask_id"]].to_numpy()
372
+ mask_ids_B = setB.loc[:, column_labelsB["mask_id"]].to_numpy()
373
+
374
+ # compute distance matrix
375
+ dist_map = cdist(coordinates_A, coordinates_B, metric="euclidean")
376
+ intersection_map = np.zeros_like(dist_map).astype(float)
377
+
378
+ # Do the mask contact computation
379
+ labelsA = np.where(np.isin(labelsA, mask_ids_A), labelsA.copy(), 0.0)
380
+
381
+ if labelsB is not None:
382
+ labelsB = np.where(np.isin(labelsB, mask_ids_B), labelsB.copy(), 0.0)
383
+
384
+ contact_pairs = contact_neighborhood(
385
+ labelsA, labelsB=labelsB, border=distance, connectivity=2
386
+ )
387
+
388
+ # Put infinite distance to all non-contact pairs (something like this)
389
+ flatA = labelsA.flatten()
390
+ if labelsB is not None:
391
+ flatB = labelsB.flatten()
392
+
393
+ if len(contact_pairs) > 0:
394
+ mask = np.ones_like(dist_map).astype(bool)
395
+
396
+ indices_to_keep = []
397
+ for cp in contact_pairs:
398
+
399
+ cp = np.abs(cp)
400
+ mask_A, mask_B = cp
401
+ idx_A = np.where(mask_ids_A == int(mask_A))[0][0]
402
+ idx_B = np.where(mask_ids_B == int(mask_B))[0][0]
403
+
404
+ intersection = 0
405
+ if labelsB is not None:
406
+ intersection = len(
407
+ flatA[(flatA == int(mask_A)) & (flatB == int(mask_B))]
408
+ )
409
+
410
+ indices_to_keep.append([idx_A, idx_B, intersection])
411
+ print(
412
+ f"Ref cell #{ids_A[idx_A]} matched with neigh. cell #{ids_B[idx_B]}..."
413
+ )
414
+ print(f"Computed intersection: {intersection} px...")
415
+
416
+ if len(indices_to_keep) > 0:
417
+ indices_to_keep = np.array(indices_to_keep)
418
+ mask[indices_to_keep[:, 0], indices_to_keep[:, 1]] = False
419
+ if mode == "self":
420
+ mask[indices_to_keep[:, 1], indices_to_keep[:, 0]] = False
421
+ dist_map[mask] = 1.0e06
422
+ intersection_map[indices_to_keep[:, 0], indices_to_keep[:, 1]] = (
423
+ indices_to_keep[:, 2]
424
+ )
425
+ else:
426
+ dist_map[:, :] = 1.0e06
427
+ else:
428
+ dist_map[:, :] = 1.0e06
429
+
430
+ return dist_map, intersection_map
286
431
 
287
432
 
288
433
  def set_live_status(setA, setB, status, not_status_option):
289
- """
290
- Updates the live status for cells in two datasets based on specified status columns and options.
291
-
292
- This function assigns a live status to cells in two datasets (setA and setB) based on the provided
293
- status columns and options. If no status column is provided, all cells are marked as live. Otherwise,
294
- the function updates the datasets based on the status criteria, potentially inverting the status
295
- based on the `not_status_option`.
296
-
297
- Parameters
298
- ----------
299
- setA : pandas.DataFrame
300
- The first dataset containing trajectory or position information for cells.
301
- setB : pandas.DataFrame
302
- The second dataset containing trajectory or position information for cells.
303
- status : list or None
304
- A list containing the names of the columns in setA and setB that classify cells as alive (1) or dead (0).
305
- If None, all cells are considered alive. The list should contain exactly two elements.
306
- not_status_option : list
307
- A list containing boolean values indicating whether to invert the status for setA and setB, respectively.
308
- True means the status should be inverted; False means it should not.
309
-
310
- Returns
311
- -------
312
- tuple
313
- A tuple containing the updated setA and setB DataFrames, along with the final status column names
314
- used to classify cells in each set.
315
-
316
- """
317
-
318
- print(f"Provided statuses: {status}...")
319
- if status is None or status==["live_status","live_status"] or status==[None,None]:
320
- setA.loc[:,'live_status'] = 1
321
- setB.loc[:,'live_status'] = 1
322
- status = ['live_status', 'live_status']
323
- elif isinstance(status,list):
324
- assert len(status)==2,'Please provide only two columns to classify cells as alive or dead.'
325
- if status[0] is None or status[0]=='live_status':
326
- setA.loc[:,'live_status'] = 1
327
- status[0] = 'live_status'
328
- elif status[0] is not None and isinstance(not_status_option, list):
329
- setA.loc[setA[status[0]] == 2, status[0]] = 1 # already happened events become event
330
- if not_status_option[0]:
331
- setA.loc[:,'not_'+status[0]] = [not a if a==0 or a==1 else np.nan for a in setA.loc[:,status[0]].values]
332
- status[0] = 'not_'+status[0]
333
- if status[1] is None or status[1]=='live_status':
334
- setB.loc[:,'live_status'] = 1
335
- status[1] = 'live_status'
336
- elif status[1] is not None and isinstance(not_status_option, list):
337
- setB.loc[setB[status[1]] == 2, status[1]] = 1 # already happened events become event
338
- if not_status_option[1]:
339
- setB.loc[:, 'not_' + status[1]] = [not a if a == 0 or a == 1 else np.nan for a in
340
- setB.loc[:, status[1]].values]
341
- status[1] = 'not_' + status[1]
342
-
343
- assert status[0] in list(setA.columns)
344
- assert status[1] in list(setB.columns)
345
-
346
- setA = setA.reset_index(drop=True)
347
- setB = setB.reset_index(drop=True)
348
-
349
- return setA, setB, status
350
-
351
-
352
- def compute_attention_weight(dist_matrix, cut_distance, opposite_cell_status, opposite_cell_ids, axis=1,
353
- include_dead_weight=True):
354
- """
355
- Computes the attention weight for each cell based on its proximity to cells of an opposite type within a specified distance.
356
-
357
- This function calculates the attention weight for cells by considering the distance to the cells of an opposite type
358
- within a given cutoff distance. It optionally considers only the 'live' opposite cells based on their status. The function
359
- returns two arrays: one containing the attention weights and another containing the IDs of the closest opposite cells.
360
-
361
- Parameters
362
- ----------
363
- dist_matrix : ndarray
364
- A 2D array representing the distance matrix between cells of two types.
365
- cut_distance : float
366
- The cutoff distance within which opposite cells will influence the attention weight.
367
- opposite_cell_status : ndarray
368
- An array indicating the status (e.g., live or dead) of each opposite cell. Only used when `include_dead_weight` is False.
369
- opposite_cell_ids : ndarray
370
- An array containing the IDs of the opposite cells.
371
- axis : int, optional
372
- The axis along which to compute the weights (default is 1). Axis 0 corresponds to rows, and axis 1 corresponds to columns.
373
- include_dead_weight : bool, optional
374
- If True, includes all opposite cells within the cutoff distance in the weight calculation, regardless of their status.
375
- If False, only considers opposite cells that are 'live' (default is True).
376
-
377
- Returns
378
- -------
379
- tuple of ndarrays
380
- A tuple containing two arrays: `weights` and `closest_opposite`. `weights` is an array of attention weights for each cell,
381
- and `closest_opposite` is an array of the IDs of the closest opposite cells within the cutoff distance.
382
-
383
- """
384
-
385
- weights = np.empty(dist_matrix.shape[axis])
386
- closest_opposite = np.empty(dist_matrix.shape[axis])
387
-
388
- for i in range(dist_matrix.shape[axis]):
389
- if axis == 1:
390
- row = dist_matrix[:, i]
391
- elif axis == 0:
392
- row = dist_matrix[i, :]
393
- row[row == 0.] = 1.0E06
394
- nbr_opposite = len(row[row <= cut_distance])
395
-
396
- if not include_dead_weight:
397
- stat = opposite_cell_status[np.where(row <= cut_distance)[0]]
398
- nbr_opposite = len(stat[stat == 1])
399
- index_subpop = np.argmin(row[opposite_cell_status == 1])
400
- closest_opposite[i] = opposite_cell_ids[opposite_cell_status == 1][index_subpop]
401
- else:
402
- closest_opposite[i] = opposite_cell_ids[np.argmin(row)]
403
-
404
- if nbr_opposite > 0:
405
- weight = 1. / float(nbr_opposite)
406
- weights[i] = weight
407
-
408
- return weights, closest_opposite
409
-
410
-
411
- def distance_cut_neighborhood(setA, setB, distance, mode='two-pop', status=None, not_status_option=None,
412
- compute_cum_sum=True,
413
- attention_weight=True, symmetrize=True, include_dead_weight=True,
414
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X',
415
- 'y': 'POSITION_Y'}):
416
- """
417
-
418
- Match neighbors in set A and B within a circle of radius d.
419
-
420
- Parameters
421
- ----------
422
- setA,setB : pandas DataFrame
423
- Trajectory or position sets A and B.
424
- distance : float
425
- Cut-distance in pixels to match neighboring pairs.
426
- mode: str
427
- neighboring mode, between 'two-pop' (e.g. target-effector) and 'self' (target-target or effector-effector).
428
- status: None or status
429
- name to look for cells to ignore (because they are dead). By default all cells are kept.
430
- compute_cum_sum: bool,
431
- compute cumulated time of presence of neighbours (only if trajectories available for both sets)
432
- attention_weight: bool,
433
- compute the attention weight (how much a cell of set B is shared across cells of set A)
434
- symmetrize: bool,
435
- write in set B the neighborhood of set A
436
- include_dead_weight: bool
437
- do not count dead cells when establishing attention weight
438
- """
439
-
440
- # Check live_status option
441
- if setA is not None and setB is not None:
442
- setA, setB, status = set_live_status(setA, setB, status, not_status_option)
443
- else:
444
- return None, None
445
-
446
- # Check distance option
447
- if not isinstance(distance, list):
448
- distance = [distance]
449
-
450
- for d in distance:
451
- # loop over each provided distance
452
-
453
- if mode == 'two-pop':
454
- neigh_col = f'neighborhood_2_circle_{d}_px'
455
- elif mode == 'self':
456
- neigh_col = f'neighborhood_self_circle_{d}_px'
457
-
458
- cl = []
459
- for s in [setA, setB]:
460
-
461
- # Check whether data can be tracked
462
- temp_column_labels = column_labels.copy()
463
-
464
- id_col = extract_identity_col(s)
465
- temp_column_labels.update({'track': id_col})
466
- if id_col=='ID':
467
- compute_cum_sum = False # if no tracking data then cum_sum is not relevant
468
- cl.append(temp_column_labels)
469
-
470
- # Remove nan tracks (cells that do not belong to a track)
471
- s[neigh_col] = np.nan
472
- s[neigh_col] = s[neigh_col].astype(object)
473
- s.dropna(subset=[cl[-1]['track']], inplace=True)
474
-
475
- # Loop over each available timestep
476
- timeline = np.unique(np.concatenate([setA[cl[0]['time']].to_numpy(), setB[cl[1]['time']].to_numpy()])).astype(
477
- int)
478
- for t in tqdm(timeline):
479
-
480
- coordinates_A = setA.loc[setA[cl[0]['time']] == t, [cl[0]['x'], cl[0]['y']]].to_numpy()
481
- ids_A = setA.loc[setA[cl[0]['time']] == t, cl[0]['track']].to_numpy()
482
- status_A = setA.loc[setA[cl[0]['time']] == t, status[0]].to_numpy()
483
-
484
- coordinates_B = setB.loc[setB[cl[1]['time']] == t, [cl[1]['x'], cl[1]['y']]].to_numpy()
485
- ids_B = setB.loc[setB[cl[1]['time']] == t, cl[1]['track']].to_numpy()
486
-
487
- if len(ids_A) > 0 and len(ids_B) > 0:
488
-
489
- # compute distance matrix
490
- dist_map = cdist(coordinates_A, coordinates_B, metric="euclidean")
491
-
492
- if attention_weight:
493
- weights, closest_A = compute_attention_weight(dist_map, d, status_A, ids_A, axis=1, include_dead_weight=include_dead_weight)
494
-
495
- _fill_distance_neighborhood_at_t(t, setA, setB, dist_map,
496
- attention_weight=attention_weight,
497
- include_dead_weight=include_dead_weight, symmetrize=symmetrize,
498
- compute_cum_sum=compute_cum_sum, weights=weights, closest_A=closest_A,
499
- neigh_col=neigh_col, column_labelsA=cl[0], column_labelsB=cl[1],
500
- statusA=status[0], statusB=status[1], distance=d)
501
-
502
- return setA, setB
503
-
504
-
505
- def compute_neighborhood_at_position(pos, distance, population=['targets', 'effectors'], theta_dist=None,
506
- img_shape=(2048, 2048), return_tables=False, clear_neigh=False,
507
- event_time_col=None,
508
- neighborhood_kwargs={'mode': 'two-pop', 'status': None, 'not_status_option': None,
509
- 'include_dead_weight': True, "compute_cum_sum": False,
510
- "attention_weight": True, 'symmetrize': True}):
511
- """
512
- Computes neighborhood metrics for specified cell populations within a given position, based on distance criteria and additional parameters.
513
-
514
- This function assesses the neighborhood interactions between two specified cell populations (or within a single population) at a given position.
515
- It computes various neighborhood metrics based on specified distances, considering the entire image or excluding edge regions.
516
- The results are optionally cleared of previous neighborhood calculations and can be returned as updated tables.
517
-
518
- Parameters
519
- ----------
520
- pos : str
521
- The path to the position directory where the analysis is to be performed.
522
- distance : float or list of float
523
- The distance(s) in pixels to define neighborhoods.
524
- population : list of str, optional
525
- Names of the cell populations to analyze. If a single population is provided, it is used for both populations in the analysis (default is ['targets', 'effectors']).
526
- theta_dist : float or list of float, optional
527
- Edge threshold(s) in pixels to exclude cells close to the image boundaries from the analysis. If not provided, defaults to 90% of each specified distance.
528
- img_shape : tuple of int, optional
529
- The dimensions (height, width) of the images in pixels (default is (2048, 2048)).
530
- return_tables : bool, optional
531
- If True, returns the updated data tables for both populations (default is False).
532
- clear_neigh : bool, optional
533
- If True, clears existing neighborhood columns from the data tables before computing new metrics (default is False).
534
- event_time_col : str, optional
535
- The column name indicating the event time for each cell, required if mean neighborhood metrics are to be computed before events.
536
- neighborhood_kwargs : dict, optional
537
- Additional keyword arguments for neighborhood computation, including mode, status options, and metrics (default includes mode 'two-pop', and symmetrization).
538
-
539
- Returns
540
- -------
541
- pandas.DataFrame or (pandas.DataFrame, pandas.DataFrame)
542
- If `return_tables` is True, returns the updated data tables for the specified populations. If only one population is analyzed, both returned data frames will be identical.
543
-
544
- Raises
545
- ------
546
- AssertionError
547
- If the specified position path does not exist or if the number of distances and edge thresholds do not match.
548
-
549
- """
550
-
551
- pos = pos.replace('\\', '/')
552
- pos = rf"{pos}"
553
- assert os.path.exists(pos), f'Position {pos} is not a valid path.'
554
-
555
- if isinstance(population, str):
556
- population = [population, population]
557
-
558
- if not isinstance(distance, list):
559
- distance = [distance]
560
- if not theta_dist is None and not isinstance(theta_dist, list):
561
- theta_dist = [theta_dist]
562
-
563
- if theta_dist is None:
564
- theta_dist = [0.9 * d for d in distance]
565
- assert len(theta_dist) == len(distance), 'Incompatible number of distances and number of edge thresholds.'
566
-
567
- if population[0] == population[1]:
568
- neighborhood_kwargs.update({'mode': 'self'})
569
- if population[1] != population[0]:
570
- neighborhood_kwargs.update({'mode': 'two-pop'})
571
-
572
- df_A, path_A = get_position_table(pos, population=population[0], return_path=True)
573
- df_B, path_B = get_position_table(pos, population=population[1], return_path=True)
574
- if df_A is None or df_B is None:
575
- return None
576
-
577
- if clear_neigh:
578
- if os.path.exists(path_A.replace('.csv','.pkl')):
579
- os.remove(path_A.replace('.csv','.pkl'))
580
- if os.path.exists(path_B.replace('.csv','.pkl')):
581
- os.remove(path_B.replace('.csv','.pkl'))
582
- df_pair, pair_path = get_position_table(pos, population='pairs', return_path=True)
583
- if df_pair is not None:
584
- os.remove(pair_path)
585
-
586
-
587
- df_A_pkl = get_position_pickle(pos, population=population[0], return_path=False)
588
- df_B_pkl = get_position_pickle(pos, population=population[1], return_path=False)
589
-
590
- if df_A_pkl is not None:
591
- pkl_columns = np.array(df_A_pkl.columns)
592
- neigh_columns = np.array([c.startswith('neighborhood') for c in pkl_columns])
593
- cols = list(pkl_columns[neigh_columns]) + ['FRAME']
594
-
595
- id_col = extract_identity_col(df_A_pkl)
596
- cols.append(id_col)
597
- on_cols = [id_col, 'FRAME']
598
-
599
- print(f'Recover {cols} from the pickle file...')
600
- try:
601
- df_A = pd.merge(df_A, df_A_pkl.loc[:,cols], how="outer", on=on_cols)
602
- print(df_A.columns)
603
- except Exception as e:
604
- print(f'Failure to merge pickle and csv files: {e}')
605
-
606
- if df_B_pkl is not None and df_B is not None:
607
- pkl_columns = np.array(df_B_pkl.columns)
608
- neigh_columns = np.array([c.startswith('neighborhood') for c in pkl_columns])
609
- cols = list(pkl_columns[neigh_columns]) + ['FRAME']
610
-
611
- id_col = extract_identity_col(df_B_pkl)
612
- cols.append(id_col)
613
- on_cols = [id_col, 'FRAME']
614
-
615
- print(f'Recover {cols} from the pickle file...')
616
- try:
617
- df_B = pd.merge(df_B, df_B_pkl.loc[:,cols], how="outer", on=on_cols)
618
- except Exception as e:
619
- print(f'Failure to merge pickle and csv files: {e}')
620
-
621
- if clear_neigh:
622
- unwanted = df_A.columns[df_A.columns.str.contains('neighborhood')]
623
- df_A = df_A.drop(columns=unwanted)
624
- unwanted = df_B.columns[df_B.columns.str.contains('neighborhood')]
625
- df_B = df_B.drop(columns=unwanted)
626
-
627
- df_A, df_B = distance_cut_neighborhood(df_A, df_B, distance, **neighborhood_kwargs)
628
-
629
- if df_A is None or df_B is None or len(df_A)==0:
630
- return None
631
-
632
- for td, d in zip(theta_dist, distance):
633
-
634
- if neighborhood_kwargs['mode'] == 'two-pop':
635
- neigh_col = f'neighborhood_2_circle_{d}_px'
636
-
637
- elif neighborhood_kwargs['mode'] == 'self':
638
- neigh_col = f'neighborhood_self_circle_{d}_px'
639
-
640
- # edge_filter_A = (df_A['POSITION_X'] > td)&(df_A['POSITION_Y'] > td)&(df_A['POSITION_Y'] < (img_shape[0] - td))&(df_A['POSITION_X'] < (img_shape[1] - td))
641
- # edge_filter_B = (df_B['POSITION_X'] > td)&(df_B['POSITION_Y'] > td)&(df_B['POSITION_Y'] < (img_shape[0] - td))&(df_B['POSITION_X'] < (img_shape[1] - td))
642
- # df_A.loc[~edge_filter_A, neigh_col] = np.nan
643
- # df_B.loc[~edge_filter_B, neigh_col] = np.nan
644
-
645
- print('Count neighborhood...')
646
- df_A = compute_neighborhood_metrics(df_A, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
647
- # if neighborhood_kwargs['symmetrize']:
648
- # df_B = compute_neighborhood_metrics(df_B, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
649
- print('Done...')
650
-
651
- if 'TRACK_ID' in list(df_A.columns):
652
- if not np.all(df_A['TRACK_ID'].isnull()):
653
- print('Estimate average neighborhood before/after event...')
654
- df_A = mean_neighborhood_before_event(df_A, neigh_col, event_time_col)
655
- if event_time_col is not None:
656
- df_A = mean_neighborhood_after_event(df_A, neigh_col, event_time_col)
657
- print('Done...')
658
-
659
- if not population[0] == population[1]:
660
- # Remove neighborhood column from neighbor table, rename with actual population name
661
- for td, d in zip(theta_dist, distance):
662
- if neighborhood_kwargs['mode'] == 'two-pop':
663
- neigh_col = f'neighborhood_2_circle_{d}_px'
664
- new_neigh_col = neigh_col.replace('_2_',f'_({population[0]}-{population[1]})_')
665
- df_A = df_A.rename(columns={neigh_col: new_neigh_col})
666
- elif neighborhood_kwargs['mode'] == 'self':
667
- neigh_col = f'neighborhood_self_circle_{d}_px'
668
- df_B = df_B.drop(columns=[neigh_col])
669
- df_B.to_pickle(path_B.replace('.csv', '.pkl'))
670
-
671
- cols_to_rename = [c for c in list(df_A.columns) if c.startswith('intermediate_count_') or c.startswith('inclusive_count_') or c.startswith('exclusive_count_') or c.startswith('mean_count_')]
672
- new_col_names = [c.replace('_2_',f'_({population[0]}-{population[1]})_') for c in cols_to_rename]
673
- new_name_map = {}
674
- for k,c in enumerate(cols_to_rename):
675
- new_name_map.update({c: new_col_names[k]})
676
- df_A = df_A.rename(columns=new_name_map)
677
-
678
- df_A.to_pickle(path_A.replace('.csv', '.pkl'))
679
-
680
- unwanted = df_A.columns[df_A.columns.str.startswith('neighborhood_')]
681
- df_A2 = df_A.drop(columns=unwanted)
682
- df_A2.to_csv(path_A, index=False)
683
-
684
- if not population[0] == population[1]:
685
- unwanted = df_B.columns[df_B.columns.str.startswith('neighborhood_')]
686
- df_B_csv = df_B.drop(unwanted, axis=1, inplace=False)
687
- df_B_csv.to_csv(path_B, index=False)
688
-
689
- if return_tables:
690
- return df_A, df_B
691
-
692
- def compute_neighborhood_metrics(neigh_table, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=False):
693
-
694
- """
695
- Computes and appends neighborhood metrics to a dataframe based on specified neighborhood characteristics.
696
-
697
- This function iterates through a dataframe grouped by either 'TRACK_ID' or ['position', 'TRACK_ID'] (if 'position' column exists)
698
- and computes various neighborhood metrics (inclusive, exclusive, intermediate counts) for each cell. It can also decompose these
699
- metrics by cell status (e.g., live or dead) if specified.
700
-
701
- Parameters
702
- ----------
703
- neigh_table : pandas.DataFrame
704
- A dataframe containing neighborhood information for each cell, including position, track ID, frame, and a specified neighborhood column.
705
- neigh_col : str
706
- The column name in `neigh_table` that contains neighborhood information (e.g., a list of neighbors with their attributes).
707
- metrics : list of str, optional
708
- The metrics to be computed from the neighborhood information. Possible values include 'inclusive', 'exclusive', and 'intermediate'.
709
- Default is ['inclusive', 'exclusive', 'intermediate'].
710
- decompose_by_status : bool, optional
711
- If True, the metrics are computed separately for different statuses (e.g., live or dead) of the neighboring cells. Default is False.
712
-
713
- Returns
714
- -------
715
- pandas.DataFrame
716
- The input dataframe with additional columns for each of the specified metrics, and, if `decompose_by_status` is True, separate
717
- metrics for each status.
718
-
719
- Notes
720
- -----
721
- - 'inclusive' count refers to the total number of neighbors.
722
- - 'exclusive' count refers to the number of neighbors that are closest.
723
- - 'intermediate' count refers to the sum of weights attributed to neighbors, representing a weighted count.
724
- - If `decompose_by_status` is True, metrics are appended with '_s0' or '_s1' to indicate the status they correspond to.
725
-
726
- Examples
727
- --------
728
- >>> neigh_table = pd.DataFrame({
729
- ... 'TRACK_ID': [1, 1, 2, 2],
730
- ... 'FRAME': [1, 2, 1, 2],
731
- ... 'neighborhood_info': [{'weight': 1, 'status': 1, 'closest': 1}, ...] # example neighborhood info
732
- ... })
733
- >>> neigh_col = 'neighborhood_info'
734
- >>> updated_neigh_table = compute_neighborhood_metrics(neigh_table, neigh_col, metrics=['inclusive'], decompose_by_status=True)
735
- # Computes the inclusive count of neighbors for each cell, decomposed by cell status.
736
-
737
- """
738
-
739
- neigh_table = neigh_table.reset_index(drop=True)
740
- if 'position' in list(neigh_table.columns):
741
- groupbycols = ['position']
742
- else:
743
- groupbycols = []
744
-
745
- id_col = extract_identity_col(neigh_table)
746
- groupbycols.append(id_col)
747
-
748
- neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
749
-
750
- for tid, group in neigh_table.groupby(groupbycols):
751
- group = group.dropna(subset=neigh_col)
752
- indices = list(group.index)
753
- neighbors = group[neigh_col].to_numpy()
754
-
755
- if 'inclusive' in metrics:
756
- n_inclusive = [len(n) for n in neighbors]
757
-
758
- if 'intermediate' in metrics:
759
- n_intermediate = np.zeros(len(neighbors))
760
- n_intermediate[:] = np.nan
761
-
762
- if 'exclusive' in metrics:
763
- n_exclusive = np.zeros(len(neighbors))
764
- n_exclusive[:] = np.nan
765
-
766
- if decompose_by_status:
767
-
768
- if 'inclusive' in metrics:
769
- n_inclusive_status_0 = np.zeros(len(neighbors))
770
- n_inclusive_status_0[:] = np.nan
771
- n_inclusive_status_1 = np.zeros(len(neighbors))
772
- n_inclusive_status_1[:] = np.nan
773
-
774
- if 'intermediate' in metrics:
775
- n_intermediate_status_0 = np.zeros(len(neighbors))
776
- n_intermediate_status_0[:] = np.nan
777
- n_intermediate_status_1 = np.zeros(len(neighbors))
778
- n_intermediate_status_1[:] = np.nan
779
-
780
- if 'exclusive' in metrics:
781
- n_exclusive_status_0 = np.zeros(len(neighbors))
782
- n_exclusive_status_0[:] = np.nan
783
- n_exclusive_status_1 = np.zeros(len(neighbors))
784
- n_exclusive_status_1[:] = np.nan
785
-
786
- for t in range(len(neighbors)):
787
-
788
- neighs_at_t = neighbors[t]
789
- weights_at_t = [n['weight'] for n in neighs_at_t]
790
- status_at_t = [n['status'] for n in neighs_at_t]
791
- closest_at_t = [n['closest'] for n in neighs_at_t]
792
-
793
- if 'intermediate' in metrics:
794
- n_intermediate[t] = np.sum(weights_at_t)
795
- if 'exclusive' in metrics:
796
- n_exclusive[t] = sum([c == 1.0 for c in closest_at_t])
797
-
798
- if decompose_by_status:
799
-
800
- if 'inclusive' in metrics:
801
- n_inclusive_status_0[t] = sum([s == 0.0 for s in status_at_t])
802
- n_inclusive_status_1[t] = sum([s == 1.0 for s in status_at_t])
803
-
804
- if 'intermediate' in metrics:
805
- weights_at_t = np.array(weights_at_t)
806
-
807
- # intermediate
808
- weights_status_1 = weights_at_t[np.array([s == 1.0 for s in status_at_t], dtype=bool)]
809
- weights_status_0 = weights_at_t[np.array([s == 0.0 for s in status_at_t], dtype=bool)]
810
- n_intermediate_status_1[t] = np.sum(weights_status_1)
811
- n_intermediate_status_0[t] = np.sum(weights_status_0)
812
-
813
- if 'exclusive' in metrics:
814
- n_exclusive_status_0[t] = sum(
815
- [c == 1.0 if s == 0.0 else False for c, s in zip(closest_at_t, status_at_t)])
816
- n_exclusive_status_1[t] = sum(
817
- [c == 1.0 if s == 1.0 else False for c, s in zip(closest_at_t, status_at_t)])
818
-
819
- if 'inclusive' in metrics:
820
- neigh_table.loc[indices, 'inclusive_count_' + neigh_col] = n_inclusive
821
- if 'intermediate' in metrics:
822
- neigh_table.loc[indices, 'intermediate_count_' + neigh_col] = n_intermediate
823
- if 'exclusive' in metrics:
824
- neigh_table.loc[indices, 'exclusive_count_' + neigh_col] = n_exclusive
825
-
826
- if decompose_by_status:
827
- if 'inclusive' in metrics:
828
- neigh_table.loc[indices, 'inclusive_count_s0_' + neigh_col] = n_inclusive_status_0
829
- neigh_table.loc[indices, 'inclusive_count_s1_' + neigh_col] = n_inclusive_status_1
830
- if 'intermediate' in metrics:
831
- neigh_table.loc[indices, 'intermediate_count_s0_' + neigh_col] = n_intermediate_status_0
832
- neigh_table.loc[indices, 'intermediate_count_s1_' + neigh_col] = n_intermediate_status_1
833
- if 'exclusive' in metrics:
834
- neigh_table.loc[indices, 'exclusive_count_s0_' + neigh_col] = n_exclusive_status_0
835
- neigh_table.loc[indices, 'exclusive_count_s1_' + neigh_col] = n_exclusive_status_1
836
-
837
- return neigh_table
838
-
839
-
840
- def mean_neighborhood_before_event(neigh_table, neigh_col, event_time_col,
841
- metrics=['inclusive', 'exclusive', 'intermediate']):
842
- """
843
- Computes the mean neighborhood metrics for each cell track before a specified event time.
844
-
845
- This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
846
- for each cell track up to and including the frame of an event. The function requires the neighborhood metrics to
847
- have been previously computed and appended to the input dataframe. It operates on grouped data based on position
848
- and track ID, handling cases with or without position information.
849
-
850
- Parameters
851
- ----------
852
- neigh_table : pandas.DataFrame
853
- A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
854
- neigh_col : str
855
- The base name of the neighborhood metric columns in `neigh_table`.
856
- event_time_col : str or None
857
- The column name indicating the event time for each cell track. If None, the maximum frame number in the
858
- dataframe is used as the event time for all tracks.
859
-
860
- Returns
861
- -------
862
- pandas.DataFrame
863
- The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
864
- The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
865
- 'inclusive', 'exclusive', 'intermediate'.
866
-
867
- """
868
-
869
-
870
- neigh_table = neigh_table.reset_index(drop=True)
871
- if 'position' in list(neigh_table.columns):
872
- groupbycols = ['position']
873
- else:
874
- groupbycols = []
875
-
876
- id_col = extract_identity_col(neigh_table)
877
- groupbycols.append(id_col)
878
-
879
- neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
880
- suffix = '_before_event'
881
-
882
- if event_time_col is None:
883
- print('No event time was provided... Estimating the mean neighborhood over the whole observation time...')
884
- neigh_table.loc[:, 'event_time_temp'] = neigh_table['FRAME'].max()
885
- event_time_col = 'event_time_temp'
886
- suffix = ''
887
-
888
- for tid, group in neigh_table.groupby(groupbycols):
889
-
890
- group = group.dropna(subset=neigh_col)
891
- indices = list(group.index)
892
-
893
- event_time_values = group[event_time_col].to_numpy()
894
- if len(event_time_values) > 0:
895
- event_time = event_time_values[0]
896
- else:
897
- continue
898
-
899
- if event_time < 0.:
900
- event_time = group['FRAME'].max()
901
-
902
- if 'intermediate' in metrics:
903
- valid_counts_intermediate = group.loc[
904
- group['FRAME'] <= event_time, 'intermediate_count_s1_' + neigh_col].to_numpy()
905
- if len(valid_counts_intermediate[valid_counts_intermediate == valid_counts_intermediate]) > 0:
906
- neigh_table.loc[indices, f'mean_count_intermediate_{neigh_col}{suffix}'] = np.nanmean(
907
- valid_counts_intermediate)
908
- if 'inclusive' in metrics:
909
- valid_counts_inclusive = group.loc[
910
- group['FRAME'] <= event_time, 'inclusive_count_s1_' + neigh_col].to_numpy()
911
- if len(valid_counts_inclusive[valid_counts_inclusive == valid_counts_inclusive]) > 0:
912
- neigh_table.loc[indices, f'mean_count_inclusive_{neigh_col}{suffix}'] = np.nanmean(
913
- valid_counts_inclusive)
914
- if 'exclusive' in metrics:
915
- valid_counts_exclusive = group.loc[
916
- group['FRAME'] <= event_time, 'exclusive_count_s1_' + neigh_col].to_numpy()
917
- if len(valid_counts_exclusive[valid_counts_exclusive == valid_counts_exclusive]) > 0:
918
- neigh_table.loc[indices, f'mean_count_exclusive_{neigh_col}{suffix}'] = np.nanmean(
919
- valid_counts_exclusive)
920
-
921
- if event_time_col == 'event_time_temp':
922
- neigh_table = neigh_table.drop(columns='event_time_temp')
923
- return neigh_table
924
-
925
-
926
- def mean_neighborhood_after_event(neigh_table, neigh_col, event_time_col,
927
- metrics=['inclusive', 'exclusive', 'intermediate']):
928
- """
929
- Computes the mean neighborhood metrics for each cell track after a specified event time.
930
-
931
- This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
932
- for each cell track after the event time. The function requires the neighborhood metrics to
933
- have been previously computed and appended to the input dataframe. It operates on grouped data based on position
934
- and track ID, handling cases with or without position information.
935
-
936
- Parameters
937
- ----------
938
- neigh_table : pandas.DataFrame
939
- A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
940
- neigh_col : str
941
- The base name of the neighborhood metric columns in `neigh_table`.
942
- event_time_col : str or None
943
- The column name indicating the event time for each cell track. If None, the maximum frame number in the
944
- dataframe is used as the event time for all tracks.
945
-
946
- Returns
947
- -------
948
- pandas.DataFrame
949
- The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
950
- The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
951
- 'inclusive', 'exclusive', 'intermediate'.
952
-
953
- """
954
-
955
-
956
- neigh_table = neigh_table.reset_index(drop=True)
957
- if 'position' in list(neigh_table.columns):
958
- groupbycols = ['position']
959
- else:
960
- groupbycols = []
961
-
962
- id_col = extract_identity_col(neigh_table)
963
- groupbycols.append(id_col)
964
-
965
- neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
966
- suffix = '_after_event'
967
-
968
- if event_time_col is None:
969
- neigh_table.loc[:, 'event_time_temp'] = None # neigh_table['FRAME'].max()
970
- event_time_col = 'event_time_temp'
971
- suffix = ''
972
-
973
- for tid, group in neigh_table.groupby(groupbycols):
974
-
975
- group = group.dropna(subset=neigh_col)
976
- indices = list(group.index)
977
-
978
- event_time_values = group[event_time_col].to_numpy()
979
- if len(event_time_values) > 0:
980
- event_time = event_time_values[0]
981
- else:
982
- continue
983
-
984
- if event_time is not None and (event_time >= 0.):
985
-
986
- if 'intermediate' in metrics:
987
- valid_counts_intermediate = group.loc[
988
- group['FRAME'] > event_time, 'intermediate_count_s1_' + neigh_col].to_numpy()
989
- if len(valid_counts_intermediate[valid_counts_intermediate == valid_counts_intermediate]) > 0:
990
- neigh_table.loc[indices, f'mean_count_intermediate_{neigh_col}{suffix}'] = np.nanmean(
991
- valid_counts_intermediate)
992
- if 'inclusive' in metrics:
993
- valid_counts_inclusive = group.loc[
994
- group['FRAME'] > event_time, 'inclusive_count_s1_' + neigh_col].to_numpy()
995
- if len(valid_counts_inclusive[valid_counts_inclusive == valid_counts_inclusive]) > 0:
996
- neigh_table.loc[indices, f'mean_count_inclusive_{neigh_col}{suffix}'] = np.nanmean(
997
- valid_counts_inclusive)
998
- if 'exclusive' in metrics:
999
- valid_counts_exclusive = group.loc[
1000
- group['FRAME'] > event_time, 'exclusive_count_s1_' + neigh_col].to_numpy()
1001
- if len(valid_counts_exclusive[valid_counts_exclusive == valid_counts_exclusive]) > 0:
1002
- neigh_table.loc[indices, f'mean_count_exclusive_{neigh_col}{suffix}'] = np.nanmean(
1003
- valid_counts_exclusive)
1004
-
1005
- if event_time_col == 'event_time_temp':
1006
- neigh_table = neigh_table.drop(columns='event_time_temp')
1007
-
1008
- return neigh_table
434
+ """
435
+ Updates the live status for cells in two datasets based on specified status columns and options.
436
+
437
+ This function assigns a live status to cells in two datasets (setA and setB) based on the provided
438
+ status columns and options. If no status column is provided, all cells are marked as live. Otherwise,
439
+ the function updates the datasets based on the status criteria, potentially inverting the status
440
+ based on the `not_status_option`.
441
+
442
+ Parameters
443
+ ----------
444
+ setA : pandas.DataFrame
445
+ The first dataset containing trajectory or position information for cells.
446
+ setB : pandas.DataFrame
447
+ The second dataset containing trajectory or position information for cells.
448
+ status : list or None
449
+ A list containing the names of the columns in setA and setB that classify cells as alive (1) or dead (0).
450
+ If None, all cells are considered alive. The list should contain exactly two elements.
451
+ not_status_option : list
452
+ A list containing boolean values indicating whether to invert the status for setA and setB, respectively.
453
+ True means the status should be inverted; False means it should not.
454
+
455
+ Returns
456
+ -------
457
+ tuple
458
+ A tuple containing the updated setA and setB DataFrames, along with the final status column names
459
+ used to classify cells in each set.
460
+
461
+ """
462
+
463
+ print(f"Provided statuses: {status}...")
464
+ if (
465
+ status is None
466
+ or status == ["live_status", "live_status"]
467
+ or status == [None, None]
468
+ ):
469
+ setA.loc[:, "live_status"] = 1
470
+ setB.loc[:, "live_status"] = 1
471
+ status = ["live_status", "live_status"]
472
+ elif isinstance(status, list):
473
+ assert (
474
+ len(status) == 2
475
+ ), "Please provide only two columns to classify cells as alive or dead."
476
+ if status[0] is None or status[0] == "live_status":
477
+ setA.loc[:, "live_status"] = 1
478
+ status[0] = "live_status"
479
+ elif status[0] is not None and isinstance(not_status_option, list):
480
+ setA.loc[setA[status[0]] == 2, status[0]] = (
481
+ 1 # already happened events become event
482
+ )
483
+ if not_status_option[0]:
484
+ setA.loc[:, "not_" + status[0]] = [
485
+ not a if a == 0 or a == 1 else np.nan
486
+ for a in setA.loc[:, status[0]].values
487
+ ]
488
+ status[0] = "not_" + status[0]
489
+ if status[1] is None or status[1] == "live_status":
490
+ setB.loc[:, "live_status"] = 1
491
+ status[1] = "live_status"
492
+ elif status[1] is not None and isinstance(not_status_option, list):
493
+ setB.loc[setB[status[1]] == 2, status[1]] = (
494
+ 1 # already happened events become event
495
+ )
496
+ if not_status_option[1]:
497
+ setB.loc[:, "not_" + status[1]] = [
498
+ not a if a == 0 or a == 1 else np.nan
499
+ for a in setB.loc[:, status[1]].values
500
+ ]
501
+ status[1] = "not_" + status[1]
502
+
503
+ assert status[0] in list(setA.columns)
504
+ assert status[1] in list(setB.columns)
505
+
506
+ setA = setA.reset_index(drop=True)
507
+ setB = setB.reset_index(drop=True)
508
+
509
+ return setA, setB, status
510
+
511
+
512
+ def compute_attention_weight(
513
+ dist_matrix,
514
+ cut_distance,
515
+ opposite_cell_status,
516
+ opposite_cell_ids,
517
+ axis=1,
518
+ include_dead_weight=True,
519
+ ):
520
+ """
521
+ Computes the attention weight for each cell based on its proximity to cells of an opposite type within a specified distance.
522
+
523
+ This function calculates the attention weight for cells by considering the distance to the cells of an opposite type
524
+ within a given cutoff distance. It optionally considers only the 'live' opposite cells based on their status. The function
525
+ returns two arrays: one containing the attention weights and another containing the IDs of the closest opposite cells.
526
+
527
+ Parameters
528
+ ----------
529
+ dist_matrix : ndarray
530
+ A 2D array representing the distance matrix between cells of two types.
531
+ cut_distance : float
532
+ The cutoff distance within which opposite cells will influence the attention weight.
533
+ opposite_cell_status : ndarray
534
+ An array indicating the status (e.g., live or dead) of each opposite cell. Only used when `include_dead_weight` is False.
535
+ opposite_cell_ids : ndarray
536
+ An array containing the IDs of the opposite cells.
537
+ axis : int, optional
538
+ The axis along which to compute the weights (default is 1). Axis 0 corresponds to rows, and axis 1 corresponds to columns.
539
+ include_dead_weight : bool, optional
540
+ If True, includes all opposite cells within the cutoff distance in the weight calculation, regardless of their status.
541
+ If False, only considers opposite cells that are 'live' (default is True).
542
+
543
+ Returns
544
+ -------
545
+ tuple of ndarrays
546
+ A tuple containing two arrays: `weights` and `closest_opposite`. `weights` is an array of attention weights for each cell,
547
+ and `closest_opposite` is an array of the IDs of the closest opposite cells within the cutoff distance.
548
+
549
+ """
550
+
551
+ weights = np.empty(dist_matrix.shape[axis])
552
+ closest_opposite = np.empty(dist_matrix.shape[axis])
553
+
554
+ for i in range(dist_matrix.shape[axis]):
555
+ if axis == 1:
556
+ row = dist_matrix[:, i]
557
+ elif axis == 0:
558
+ row = dist_matrix[i, :]
559
+ row[row == 0.0] = 1.0e06
560
+ nbr_opposite = len(row[row <= cut_distance])
561
+
562
+ if not include_dead_weight:
563
+ stat = opposite_cell_status[np.where(row <= cut_distance)[0]]
564
+ nbr_opposite = len(stat[stat == 1])
565
+ index_subpop = np.argmin(row[opposite_cell_status == 1])
566
+ closest_opposite[i] = opposite_cell_ids[opposite_cell_status == 1][
567
+ index_subpop
568
+ ]
569
+ else:
570
+ closest_opposite[i] = opposite_cell_ids[np.argmin(row)]
571
+
572
+ if nbr_opposite > 0:
573
+ weight = 1.0 / float(nbr_opposite)
574
+ weights[i] = weight
575
+
576
+ return weights, closest_opposite
577
+
578
+
579
+ def distance_cut_neighborhood(
580
+ setA,
581
+ setB,
582
+ distance,
583
+ mode="two-pop",
584
+ status=None,
585
+ not_status_option=None,
586
+ compute_cum_sum=True,
587
+ attention_weight=True,
588
+ symmetrize=True,
589
+ include_dead_weight=True,
590
+ column_labels={
591
+ "track": "TRACK_ID",
592
+ "time": "FRAME",
593
+ "x": "POSITION_X",
594
+ "y": "POSITION_Y",
595
+ },
596
+ ):
597
+ """
598
+
599
+ Match neighbors in set A and B within a circle of radius d.
600
+
601
+ Parameters
602
+ ----------
603
+ setA,setB : pandas DataFrame
604
+ Trajectory or position sets A and B.
605
+ distance : float
606
+ Cut-distance in pixels to match neighboring pairs.
607
+ mode: str
608
+ neighboring mode, between 'two-pop' (e.g. target-effector) and 'self' (target-target or effector-effector).
609
+ status: None or status
610
+ name to look for cells to ignore (because they are dead). By default all cells are kept.
611
+ compute_cum_sum: bool,
612
+ compute cumulated time of presence of neighbours (only if trajectories available for both sets)
613
+ attention_weight: bool,
614
+ compute the attention weight (how much a cell of set B is shared across cells of set A)
615
+ symmetrize: bool,
616
+ write in set B the neighborhood of set A
617
+ include_dead_weight: bool
618
+ do not count dead cells when establishing attention weight
619
+ """
620
+
621
+ # Check live_status option
622
+ if setA is not None and setB is not None:
623
+ setA, setB, status = set_live_status(setA, setB, status, not_status_option)
624
+ else:
625
+ return None, None
626
+
627
+ # Check distance option
628
+ if not isinstance(distance, list):
629
+ distance = [distance]
630
+
631
+ for d in distance:
632
+ # loop over each provided distance
633
+
634
+ if mode == "two-pop":
635
+ neigh_col = f"neighborhood_2_circle_{d}_px"
636
+ elif mode == "self":
637
+ neigh_col = f"neighborhood_self_circle_{d}_px"
638
+
639
+ cl = []
640
+ for s in [setA, setB]:
641
+
642
+ # Check whether data can be tracked
643
+ temp_column_labels = column_labels.copy()
644
+
645
+ id_col = extract_identity_col(s)
646
+ temp_column_labels.update({"track": id_col})
647
+ if id_col == "ID":
648
+ compute_cum_sum = (
649
+ False # if no tracking data then cum_sum is not relevant
650
+ )
651
+ cl.append(temp_column_labels)
652
+
653
+ # Remove nan tracks (cells that do not belong to a track)
654
+ s[neigh_col] = np.nan
655
+ s[neigh_col] = s[neigh_col].astype(object)
656
+ s.dropna(subset=[cl[-1]["track"]], inplace=True)
657
+
658
+ # Loop over each available timestep
659
+ timeline = np.unique(
660
+ np.concatenate(
661
+ [setA[cl[0]["time"]].to_numpy(), setB[cl[1]["time"]].to_numpy()]
662
+ )
663
+ ).astype(int)
664
+ for t in tqdm(timeline):
665
+
666
+ coordinates_A = setA.loc[
667
+ setA[cl[0]["time"]] == t, [cl[0]["x"], cl[0]["y"]]
668
+ ].to_numpy()
669
+ ids_A = setA.loc[setA[cl[0]["time"]] == t, cl[0]["track"]].to_numpy()
670
+ status_A = setA.loc[setA[cl[0]["time"]] == t, status[0]].to_numpy()
671
+
672
+ coordinates_B = setB.loc[
673
+ setB[cl[1]["time"]] == t, [cl[1]["x"], cl[1]["y"]]
674
+ ].to_numpy()
675
+ ids_B = setB.loc[setB[cl[1]["time"]] == t, cl[1]["track"]].to_numpy()
676
+
677
+ if len(ids_A) > 0 and len(ids_B) > 0:
678
+
679
+ # compute distance matrix
680
+ dist_map = cdist(coordinates_A, coordinates_B, metric="euclidean")
681
+
682
+ if attention_weight:
683
+ weights, closest_A = compute_attention_weight(
684
+ dist_map,
685
+ d,
686
+ status_A,
687
+ ids_A,
688
+ axis=1,
689
+ include_dead_weight=include_dead_weight,
690
+ )
691
+
692
+ _fill_distance_neighborhood_at_t(
693
+ t,
694
+ setA,
695
+ setB,
696
+ dist_map,
697
+ attention_weight=attention_weight,
698
+ include_dead_weight=include_dead_weight,
699
+ symmetrize=symmetrize,
700
+ compute_cum_sum=compute_cum_sum,
701
+ weights=weights,
702
+ closest_A=closest_A,
703
+ neigh_col=neigh_col,
704
+ column_labelsA=cl[0],
705
+ column_labelsB=cl[1],
706
+ statusA=status[0],
707
+ statusB=status[1],
708
+ distance=d,
709
+ )
710
+
711
+ return setA, setB
712
+
713
+
714
+ def compute_neighborhood_at_position(
715
+ pos,
716
+ distance,
717
+ population=["targets", "effectors"],
718
+ theta_dist=None,
719
+ img_shape=(2048, 2048),
720
+ return_tables=False,
721
+ clear_neigh=False,
722
+ event_time_col=None,
723
+ neighborhood_kwargs={
724
+ "mode": "two-pop",
725
+ "status": None,
726
+ "not_status_option": None,
727
+ "include_dead_weight": True,
728
+ "compute_cum_sum": False,
729
+ "attention_weight": True,
730
+ "symmetrize": True,
731
+ },
732
+ ):
733
+ """
734
+ Computes neighborhood metrics for specified cell populations within a given position, based on distance criteria and additional parameters.
735
+
736
+ This function assesses the neighborhood interactions between two specified cell populations (or within a single population) at a given position.
737
+ It computes various neighborhood metrics based on specified distances, considering the entire image or excluding edge regions.
738
+ The results are optionally cleared of previous neighborhood calculations and can be returned as updated tables.
739
+
740
+ Parameters
741
+ ----------
742
+ pos : str
743
+ The path to the position directory where the analysis is to be performed.
744
+ distance : float or list of float
745
+ The distance(s) in pixels to define neighborhoods.
746
+ population : list of str, optional
747
+ Names of the cell populations to analyze. If a single population is provided, it is used for both populations in the analysis (default is ['targets', 'effectors']).
748
+ theta_dist : float or list of float, optional
749
+ Edge threshold(s) in pixels to exclude cells close to the image boundaries from the analysis. If not provided, defaults to 90% of each specified distance.
750
+ img_shape : tuple of int, optional
751
+ The dimensions (height, width) of the images in pixels (default is (2048, 2048)).
752
+ return_tables : bool, optional
753
+ If True, returns the updated data tables for both populations (default is False).
754
+ clear_neigh : bool, optional
755
+ If True, clears existing neighborhood columns from the data tables before computing new metrics (default is False).
756
+ event_time_col : str, optional
757
+ The column name indicating the event time for each cell, required if mean neighborhood metrics are to be computed before events.
758
+ neighborhood_kwargs : dict, optional
759
+ Additional keyword arguments for neighborhood computation, including mode, status options, and metrics (default includes mode 'two-pop', and symmetrization).
760
+
761
+ Returns
762
+ -------
763
+ pandas.DataFrame or (pandas.DataFrame, pandas.DataFrame)
764
+ If `return_tables` is True, returns the updated data tables for the specified populations. If only one population is analyzed, both returned data frames will be identical.
765
+
766
+ Raises
767
+ ------
768
+ AssertionError
769
+ If the specified position path does not exist or if the number of distances and edge thresholds do not match.
770
+
771
+ """
772
+
773
+ pos = pos.replace("\\", "/")
774
+ pos = rf"{pos}"
775
+ assert os.path.exists(pos), f"Position {pos} is not a valid path."
776
+
777
+ if isinstance(population, str):
778
+ population = [population, population]
779
+
780
+ if not isinstance(distance, list):
781
+ distance = [distance]
782
+ if not theta_dist is None and not isinstance(theta_dist, list):
783
+ theta_dist = [theta_dist]
784
+
785
+ if theta_dist is None:
786
+ theta_dist = [0.9 * d for d in distance]
787
+ assert len(theta_dist) == len(
788
+ distance
789
+ ), "Incompatible number of distances and number of edge thresholds."
790
+
791
+ if population[0] == population[1]:
792
+ neighborhood_kwargs.update({"mode": "self"})
793
+ if population[1] != population[0]:
794
+ neighborhood_kwargs.update({"mode": "two-pop"})
795
+
796
+ df_A, path_A = get_position_table(pos, population=population[0], return_path=True)
797
+ df_B, path_B = get_position_table(pos, population=population[1], return_path=True)
798
+ if df_A is None or df_B is None:
799
+ return None
800
+
801
+ if clear_neigh:
802
+ if os.path.exists(path_A.replace(".csv", ".pkl")):
803
+ os.remove(path_A.replace(".csv", ".pkl"))
804
+ if os.path.exists(path_B.replace(".csv", ".pkl")):
805
+ os.remove(path_B.replace(".csv", ".pkl"))
806
+ df_pair, pair_path = get_position_table(
807
+ pos, population="pairs", return_path=True
808
+ )
809
+ if df_pair is not None:
810
+ os.remove(pair_path)
811
+
812
+ df_A_pkl = get_position_pickle(pos, population=population[0], return_path=False)
813
+ df_B_pkl = get_position_pickle(pos, population=population[1], return_path=False)
814
+
815
+ if df_A_pkl is not None:
816
+ pkl_columns = np.array(df_A_pkl.columns)
817
+ neigh_columns = np.array([c.startswith("neighborhood") for c in pkl_columns])
818
+ cols = list(pkl_columns[neigh_columns]) + ["FRAME"]
819
+
820
+ id_col = extract_identity_col(df_A_pkl)
821
+ cols.append(id_col)
822
+ on_cols = [id_col, "FRAME"]
823
+
824
+ print(f"Recover {cols} from the pickle file...")
825
+ try:
826
+ df_A = pd.merge(df_A, df_A_pkl.loc[:, cols], how="outer", on=on_cols)
827
+ print(df_A.columns)
828
+ except Exception as e:
829
+ print(f"Failure to merge pickle and csv files: {e}")
830
+
831
+ if df_B_pkl is not None and df_B is not None:
832
+ pkl_columns = np.array(df_B_pkl.columns)
833
+ neigh_columns = np.array([c.startswith("neighborhood") for c in pkl_columns])
834
+ cols = list(pkl_columns[neigh_columns]) + ["FRAME"]
835
+
836
+ id_col = extract_identity_col(df_B_pkl)
837
+ cols.append(id_col)
838
+ on_cols = [id_col, "FRAME"]
839
+
840
+ print(f"Recover {cols} from the pickle file...")
841
+ try:
842
+ df_B = pd.merge(df_B, df_B_pkl.loc[:, cols], how="outer", on=on_cols)
843
+ except Exception as e:
844
+ print(f"Failure to merge pickle and csv files: {e}")
845
+
846
+ if clear_neigh:
847
+ unwanted = df_A.columns[df_A.columns.str.contains("neighborhood")]
848
+ df_A = df_A.drop(columns=unwanted)
849
+ unwanted = df_B.columns[df_B.columns.str.contains("neighborhood")]
850
+ df_B = df_B.drop(columns=unwanted)
851
+
852
+ df_A, df_B = distance_cut_neighborhood(df_A, df_B, distance, **neighborhood_kwargs)
853
+
854
+ if df_A is None or df_B is None or len(df_A) == 0:
855
+ return None
856
+
857
+ for td, d in zip(theta_dist, distance):
858
+
859
+ if neighborhood_kwargs["mode"] == "two-pop":
860
+ neigh_col = f"neighborhood_2_circle_{d}_px"
861
+
862
+ elif neighborhood_kwargs["mode"] == "self":
863
+ neigh_col = f"neighborhood_self_circle_{d}_px"
864
+
865
+ # edge_filter_A = (df_A['POSITION_X'] > td)&(df_A['POSITION_Y'] > td)&(df_A['POSITION_Y'] < (img_shape[0] - td))&(df_A['POSITION_X'] < (img_shape[1] - td))
866
+ # edge_filter_B = (df_B['POSITION_X'] > td)&(df_B['POSITION_Y'] > td)&(df_B['POSITION_Y'] < (img_shape[0] - td))&(df_B['POSITION_X'] < (img_shape[1] - td))
867
+ # df_A.loc[~edge_filter_A, neigh_col] = np.nan
868
+ # df_B.loc[~edge_filter_B, neigh_col] = np.nan
869
+
870
+ print("Count neighborhood...")
871
+ df_A = compute_neighborhood_metrics(
872
+ df_A,
873
+ neigh_col,
874
+ metrics=["inclusive", "exclusive", "intermediate"],
875
+ decompose_by_status=True,
876
+ )
877
+ # if neighborhood_kwargs['symmetrize']:
878
+ # df_B = compute_neighborhood_metrics(df_B, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
879
+ print("Done...")
880
+
881
+ if "TRACK_ID" in list(df_A.columns):
882
+ if not np.all(df_A["TRACK_ID"].isnull()):
883
+ print("Estimate average neighborhood before/after event...")
884
+ df_A = mean_neighborhood_before_event(df_A, neigh_col, event_time_col)
885
+ if event_time_col is not None:
886
+ df_A = mean_neighborhood_after_event(
887
+ df_A, neigh_col, event_time_col
888
+ )
889
+ print("Done...")
890
+
891
+ if not population[0] == population[1]:
892
+ # Remove neighborhood column from neighbor table, rename with actual population name
893
+ for td, d in zip(theta_dist, distance):
894
+ if neighborhood_kwargs["mode"] == "two-pop":
895
+ neigh_col = f"neighborhood_2_circle_{d}_px"
896
+ new_neigh_col = neigh_col.replace(
897
+ "_2_", f"_({population[0]}-{population[1]})_"
898
+ )
899
+ df_A = df_A.rename(columns={neigh_col: new_neigh_col})
900
+ elif neighborhood_kwargs["mode"] == "self":
901
+ neigh_col = f"neighborhood_self_circle_{d}_px"
902
+ df_B = df_B.drop(columns=[neigh_col])
903
+ df_B.to_pickle(path_B.replace(".csv", ".pkl"))
904
+
905
+ cols_to_rename = [
906
+ c
907
+ for c in list(df_A.columns)
908
+ if c.startswith("intermediate_count_")
909
+ or c.startswith("inclusive_count_")
910
+ or c.startswith("exclusive_count_")
911
+ or c.startswith("mean_count_")
912
+ ]
913
+ new_col_names = [
914
+ c.replace("_2_", f"_({population[0]}-{population[1]})_") for c in cols_to_rename
915
+ ]
916
+ new_name_map = {}
917
+ for k, c in enumerate(cols_to_rename):
918
+ new_name_map.update({c: new_col_names[k]})
919
+ df_A = df_A.rename(columns=new_name_map)
920
+
921
+ df_A.to_pickle(path_A.replace(".csv", ".pkl"))
922
+
923
+ unwanted = df_A.columns[df_A.columns.str.startswith("neighborhood_")]
924
+ df_A2 = df_A.drop(columns=unwanted)
925
+ df_A2.to_csv(path_A, index=False)
926
+
927
+ if not population[0] == population[1]:
928
+ unwanted = df_B.columns[df_B.columns.str.startswith("neighborhood_")]
929
+ df_B_csv = df_B.drop(unwanted, axis=1, inplace=False)
930
+ df_B_csv.to_csv(path_B, index=False)
931
+
932
+ if return_tables:
933
+ return df_A, df_B
934
+
935
+
936
+ def compute_neighborhood_metrics(
937
+ neigh_table,
938
+ neigh_col,
939
+ metrics=["inclusive", "exclusive", "intermediate"],
940
+ decompose_by_status=False,
941
+ ):
942
+ """
943
+ Computes and appends neighborhood metrics to a dataframe based on specified neighborhood characteristics.
944
+
945
+ This function iterates through a dataframe grouped by either 'TRACK_ID' or ['position', 'TRACK_ID'] (if 'position' column exists)
946
+ and computes various neighborhood metrics (inclusive, exclusive, intermediate counts) for each cell. It can also decompose these
947
+ metrics by cell status (e.g., live or dead) if specified.
948
+
949
+ Parameters
950
+ ----------
951
+ neigh_table : pandas.DataFrame
952
+ A dataframe containing neighborhood information for each cell, including position, track ID, frame, and a specified neighborhood column.
953
+ neigh_col : str
954
+ The column name in `neigh_table` that contains neighborhood information (e.g., a list of neighbors with their attributes).
955
+ metrics : list of str, optional
956
+ The metrics to be computed from the neighborhood information. Possible values include 'inclusive', 'exclusive', and 'intermediate'.
957
+ Default is ['inclusive', 'exclusive', 'intermediate'].
958
+ decompose_by_status : bool, optional
959
+ If True, the metrics are computed separately for different statuses (e.g., live or dead) of the neighboring cells. Default is False.
960
+
961
+ Returns
962
+ -------
963
+ pandas.DataFrame
964
+ The input dataframe with additional columns for each of the specified metrics, and, if `decompose_by_status` is True, separate
965
+ metrics for each status.
966
+
967
+ Notes
968
+ -----
969
+ - 'inclusive' count refers to the total number of neighbors.
970
+ - 'exclusive' count refers to the number of neighbors that are closest.
971
+ - 'intermediate' count refers to the sum of weights attributed to neighbors, representing a weighted count.
972
+ - If `decompose_by_status` is True, metrics are appended with '_s0' or '_s1' to indicate the status they correspond to.
973
+
974
+ Examples
975
+ --------
976
+ >>> neigh_table = pd.DataFrame({
977
+ ... 'TRACK_ID': [1, 1, 2, 2],
978
+ ... 'FRAME': [1, 2, 1, 2],
979
+ ... 'neighborhood_info': [{'weight': 1, 'status': 1, 'closest': 1}, ...] # example neighborhood info
980
+ ... })
981
+ >>> neigh_col = 'neighborhood_info'
982
+ >>> updated_neigh_table = compute_neighborhood_metrics(neigh_table, neigh_col, metrics=['inclusive'], decompose_by_status=True)
983
+ # Computes the inclusive count of neighbors for each cell, decomposed by cell status.
984
+
985
+ """
986
+
987
+ neigh_table = neigh_table.reset_index(drop=True)
988
+ if "position" in list(neigh_table.columns):
989
+ groupbycols = ["position"]
990
+ else:
991
+ groupbycols = []
992
+
993
+ id_col = extract_identity_col(neigh_table)
994
+ groupbycols.append(id_col)
995
+
996
+ neigh_table.sort_values(by=groupbycols + ["FRAME"], inplace=True)
997
+
998
+ for tid, group in neigh_table.groupby(groupbycols):
999
+ group = group.dropna(subset=neigh_col)
1000
+ indices = list(group.index)
1001
+ neighbors = group[neigh_col].to_numpy()
1002
+
1003
+ if "inclusive" in metrics:
1004
+ n_inclusive = [len(n) for n in neighbors]
1005
+
1006
+ if "intermediate" in metrics:
1007
+ n_intermediate = np.zeros(len(neighbors))
1008
+ n_intermediate[:] = np.nan
1009
+
1010
+ if "exclusive" in metrics:
1011
+ n_exclusive = np.zeros(len(neighbors))
1012
+ n_exclusive[:] = np.nan
1013
+
1014
+ if decompose_by_status:
1015
+
1016
+ if "inclusive" in metrics:
1017
+ n_inclusive_status_0 = np.zeros(len(neighbors))
1018
+ n_inclusive_status_0[:] = np.nan
1019
+ n_inclusive_status_1 = np.zeros(len(neighbors))
1020
+ n_inclusive_status_1[:] = np.nan
1021
+
1022
+ if "intermediate" in metrics:
1023
+ n_intermediate_status_0 = np.zeros(len(neighbors))
1024
+ n_intermediate_status_0[:] = np.nan
1025
+ n_intermediate_status_1 = np.zeros(len(neighbors))
1026
+ n_intermediate_status_1[:] = np.nan
1027
+
1028
+ if "exclusive" in metrics:
1029
+ n_exclusive_status_0 = np.zeros(len(neighbors))
1030
+ n_exclusive_status_0[:] = np.nan
1031
+ n_exclusive_status_1 = np.zeros(len(neighbors))
1032
+ n_exclusive_status_1[:] = np.nan
1033
+
1034
+ for t in range(len(neighbors)):
1035
+
1036
+ neighs_at_t = neighbors[t]
1037
+ weights_at_t = [n["weight"] for n in neighs_at_t]
1038
+ status_at_t = [n["status"] for n in neighs_at_t]
1039
+ closest_at_t = [n["closest"] for n in neighs_at_t]
1040
+
1041
+ if "intermediate" in metrics:
1042
+ n_intermediate[t] = np.sum(weights_at_t)
1043
+ if "exclusive" in metrics:
1044
+ n_exclusive[t] = sum([c == 1.0 for c in closest_at_t])
1045
+
1046
+ if decompose_by_status:
1047
+
1048
+ if "inclusive" in metrics:
1049
+ n_inclusive_status_0[t] = sum([s == 0.0 for s in status_at_t])
1050
+ n_inclusive_status_1[t] = sum([s == 1.0 for s in status_at_t])
1051
+
1052
+ if "intermediate" in metrics:
1053
+ weights_at_t = np.array(weights_at_t)
1054
+
1055
+ # intermediate
1056
+ weights_status_1 = weights_at_t[
1057
+ np.array([s == 1.0 for s in status_at_t], dtype=bool)
1058
+ ]
1059
+ weights_status_0 = weights_at_t[
1060
+ np.array([s == 0.0 for s in status_at_t], dtype=bool)
1061
+ ]
1062
+ n_intermediate_status_1[t] = np.sum(weights_status_1)
1063
+ n_intermediate_status_0[t] = np.sum(weights_status_0)
1064
+
1065
+ if "exclusive" in metrics:
1066
+ n_exclusive_status_0[t] = sum(
1067
+ [
1068
+ c == 1.0 if s == 0.0 else False
1069
+ for c, s in zip(closest_at_t, status_at_t)
1070
+ ]
1071
+ )
1072
+ n_exclusive_status_1[t] = sum(
1073
+ [
1074
+ c == 1.0 if s == 1.0 else False
1075
+ for c, s in zip(closest_at_t, status_at_t)
1076
+ ]
1077
+ )
1078
+
1079
+ if "inclusive" in metrics:
1080
+ neigh_table.loc[indices, "inclusive_count_" + neigh_col] = n_inclusive
1081
+ if "intermediate" in metrics:
1082
+ neigh_table.loc[indices, "intermediate_count_" + neigh_col] = n_intermediate
1083
+ if "exclusive" in metrics:
1084
+ neigh_table.loc[indices, "exclusive_count_" + neigh_col] = n_exclusive
1085
+
1086
+ if decompose_by_status:
1087
+ if "inclusive" in metrics:
1088
+ neigh_table.loc[indices, "inclusive_count_s0_" + neigh_col] = (
1089
+ n_inclusive_status_0
1090
+ )
1091
+ neigh_table.loc[indices, "inclusive_count_s1_" + neigh_col] = (
1092
+ n_inclusive_status_1
1093
+ )
1094
+ if "intermediate" in metrics:
1095
+ neigh_table.loc[indices, "intermediate_count_s0_" + neigh_col] = (
1096
+ n_intermediate_status_0
1097
+ )
1098
+ neigh_table.loc[indices, "intermediate_count_s1_" + neigh_col] = (
1099
+ n_intermediate_status_1
1100
+ )
1101
+ if "exclusive" in metrics:
1102
+ neigh_table.loc[indices, "exclusive_count_s0_" + neigh_col] = (
1103
+ n_exclusive_status_0
1104
+ )
1105
+ neigh_table.loc[indices, "exclusive_count_s1_" + neigh_col] = (
1106
+ n_exclusive_status_1
1107
+ )
1108
+
1109
+ return neigh_table
1110
+
1111
+
1112
+ def mean_neighborhood_before_event(
1113
+ neigh_table,
1114
+ neigh_col,
1115
+ event_time_col,
1116
+ metrics=["inclusive", "exclusive", "intermediate"],
1117
+ ):
1118
+ """
1119
+ Computes the mean neighborhood metrics for each cell track before a specified event time.
1120
+
1121
+ This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
1122
+ for each cell track up to and including the frame of an event. The function requires the neighborhood metrics to
1123
+ have been previously computed and appended to the input dataframe. It operates on grouped data based on position
1124
+ and track ID, handling cases with or without position information.
1125
+
1126
+ Parameters
1127
+ ----------
1128
+ neigh_table : pandas.DataFrame
1129
+ A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
1130
+ neigh_col : str
1131
+ The base name of the neighborhood metric columns in `neigh_table`.
1132
+ event_time_col : str or None
1133
+ The column name indicating the event time for each cell track. If None, the maximum frame number in the
1134
+ dataframe is used as the event time for all tracks.
1135
+
1136
+ Returns
1137
+ -------
1138
+ pandas.DataFrame
1139
+ The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
1140
+ The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
1141
+ 'inclusive', 'exclusive', 'intermediate'.
1142
+
1143
+ """
1144
+
1145
+ neigh_table = neigh_table.reset_index(drop=True)
1146
+ if "position" in list(neigh_table.columns):
1147
+ groupbycols = ["position"]
1148
+ else:
1149
+ groupbycols = []
1150
+
1151
+ id_col = extract_identity_col(neigh_table)
1152
+ groupbycols.append(id_col)
1153
+
1154
+ neigh_table.sort_values(by=groupbycols + ["FRAME"], inplace=True)
1155
+ suffix = "_before_event"
1156
+
1157
+ if event_time_col is None:
1158
+ print(
1159
+ "No event time was provided... Estimating the mean neighborhood over the whole observation time..."
1160
+ )
1161
+ neigh_table.loc[:, "event_time_temp"] = neigh_table["FRAME"].max()
1162
+ event_time_col = "event_time_temp"
1163
+ suffix = ""
1164
+
1165
+ for tid, group in neigh_table.groupby(groupbycols):
1166
+
1167
+ group = group.dropna(subset=neigh_col)
1168
+ indices = list(group.index)
1169
+
1170
+ event_time_values = group[event_time_col].to_numpy()
1171
+ if len(event_time_values) > 0:
1172
+ event_time = event_time_values[0]
1173
+ else:
1174
+ continue
1175
+
1176
+ if event_time < 0.0:
1177
+ event_time = group["FRAME"].max()
1178
+
1179
+ if "intermediate" in metrics:
1180
+ valid_counts_intermediate = group.loc[
1181
+ group["FRAME"] <= event_time, "intermediate_count_s1_" + neigh_col
1182
+ ].to_numpy()
1183
+ if (
1184
+ len(
1185
+ valid_counts_intermediate[
1186
+ valid_counts_intermediate == valid_counts_intermediate
1187
+ ]
1188
+ )
1189
+ > 0
1190
+ ):
1191
+ neigh_table.loc[
1192
+ indices, f"mean_count_intermediate_{neigh_col}{suffix}"
1193
+ ] = np.nanmean(valid_counts_intermediate)
1194
+ if "inclusive" in metrics:
1195
+ valid_counts_inclusive = group.loc[
1196
+ group["FRAME"] <= event_time, "inclusive_count_s1_" + neigh_col
1197
+ ].to_numpy()
1198
+ if (
1199
+ len(
1200
+ valid_counts_inclusive[
1201
+ valid_counts_inclusive == valid_counts_inclusive
1202
+ ]
1203
+ )
1204
+ > 0
1205
+ ):
1206
+ neigh_table.loc[
1207
+ indices, f"mean_count_inclusive_{neigh_col}{suffix}"
1208
+ ] = np.nanmean(valid_counts_inclusive)
1209
+ if "exclusive" in metrics:
1210
+ valid_counts_exclusive = group.loc[
1211
+ group["FRAME"] <= event_time, "exclusive_count_s1_" + neigh_col
1212
+ ].to_numpy()
1213
+ if (
1214
+ len(
1215
+ valid_counts_exclusive[
1216
+ valid_counts_exclusive == valid_counts_exclusive
1217
+ ]
1218
+ )
1219
+ > 0
1220
+ ):
1221
+ neigh_table.loc[
1222
+ indices, f"mean_count_exclusive_{neigh_col}{suffix}"
1223
+ ] = np.nanmean(valid_counts_exclusive)
1224
+
1225
+ if event_time_col == "event_time_temp":
1226
+ neigh_table = neigh_table.drop(columns="event_time_temp")
1227
+ return neigh_table
1228
+
1229
+
1230
+ def mean_neighborhood_after_event(
1231
+ neigh_table,
1232
+ neigh_col,
1233
+ event_time_col,
1234
+ metrics=["inclusive", "exclusive", "intermediate"],
1235
+ ):
1236
+ """
1237
+ Computes the mean neighborhood metrics for each cell track after a specified event time.
1238
+
1239
+ This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
1240
+ for each cell track after the event time. The function requires the neighborhood metrics to
1241
+ have been previously computed and appended to the input dataframe. It operates on grouped data based on position
1242
+ and track ID, handling cases with or without position information.
1243
+
1244
+ Parameters
1245
+ ----------
1246
+ neigh_table : pandas.DataFrame
1247
+ A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
1248
+ neigh_col : str
1249
+ The base name of the neighborhood metric columns in `neigh_table`.
1250
+ event_time_col : str or None
1251
+ The column name indicating the event time for each cell track. If None, the maximum frame number in the
1252
+ dataframe is used as the event time for all tracks.
1253
+
1254
+ Returns
1255
+ -------
1256
+ pandas.DataFrame
1257
+ The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
1258
+ The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
1259
+ 'inclusive', 'exclusive', 'intermediate'.
1260
+
1261
+ """
1262
+
1263
+ neigh_table = neigh_table.reset_index(drop=True)
1264
+ if "position" in list(neigh_table.columns):
1265
+ groupbycols = ["position"]
1266
+ else:
1267
+ groupbycols = []
1268
+
1269
+ id_col = extract_identity_col(neigh_table)
1270
+ groupbycols.append(id_col)
1271
+
1272
+ neigh_table.sort_values(by=groupbycols + ["FRAME"], inplace=True)
1273
+ suffix = "_after_event"
1274
+
1275
+ if event_time_col is None:
1276
+ neigh_table.loc[:, "event_time_temp"] = None # neigh_table['FRAME'].max()
1277
+ event_time_col = "event_time_temp"
1278
+ suffix = ""
1279
+
1280
+ for tid, group in neigh_table.groupby(groupbycols):
1281
+
1282
+ group = group.dropna(subset=neigh_col)
1283
+ indices = list(group.index)
1284
+
1285
+ event_time_values = group[event_time_col].to_numpy()
1286
+ if len(event_time_values) > 0:
1287
+ event_time = event_time_values[0]
1288
+ else:
1289
+ continue
1290
+
1291
+ if event_time is not None and (event_time >= 0.0):
1292
+
1293
+ if "intermediate" in metrics:
1294
+ valid_counts_intermediate = group.loc[
1295
+ group["FRAME"] > event_time, "intermediate_count_s1_" + neigh_col
1296
+ ].to_numpy()
1297
+ if (
1298
+ len(
1299
+ valid_counts_intermediate[
1300
+ valid_counts_intermediate == valid_counts_intermediate
1301
+ ]
1302
+ )
1303
+ > 0
1304
+ ):
1305
+ neigh_table.loc[
1306
+ indices, f"mean_count_intermediate_{neigh_col}{suffix}"
1307
+ ] = np.nanmean(valid_counts_intermediate)
1308
+ if "inclusive" in metrics:
1309
+ valid_counts_inclusive = group.loc[
1310
+ group["FRAME"] > event_time, "inclusive_count_s1_" + neigh_col
1311
+ ].to_numpy()
1312
+ if (
1313
+ len(
1314
+ valid_counts_inclusive[
1315
+ valid_counts_inclusive == valid_counts_inclusive
1316
+ ]
1317
+ )
1318
+ > 0
1319
+ ):
1320
+ neigh_table.loc[
1321
+ indices, f"mean_count_inclusive_{neigh_col}{suffix}"
1322
+ ] = np.nanmean(valid_counts_inclusive)
1323
+ if "exclusive" in metrics:
1324
+ valid_counts_exclusive = group.loc[
1325
+ group["FRAME"] > event_time, "exclusive_count_s1_" + neigh_col
1326
+ ].to_numpy()
1327
+ if (
1328
+ len(
1329
+ valid_counts_exclusive[
1330
+ valid_counts_exclusive == valid_counts_exclusive
1331
+ ]
1332
+ )
1333
+ > 0
1334
+ ):
1335
+ neigh_table.loc[
1336
+ indices, f"mean_count_exclusive_{neigh_col}{suffix}"
1337
+ ] = np.nanmean(valid_counts_exclusive)
1338
+
1339
+ if event_time_col == "event_time_temp":
1340
+ neigh_table = neigh_table.drop(columns="event_time_temp")
1341
+
1342
+ return neigh_table
1009
1343
 
1010
1344
 
1011
1345
  # New functions for direct cell-cell contact neighborhood
1012
1346
 
1347
+
1013
1348
  def sign(num):
1014
- return -1 if num < 0 else 1
1349
+ return -1 if num < 0 else 1
1015
1350
 
1016
1351
 
1017
1352
  def contact_neighborhood(labelsA, labelsB=None, border=3, connectivity=2):
1018
1353
 
1019
- labelsA = labelsA.astype(float)
1020
- if labelsB is not None:
1021
- labelsB = labelsB.astype(float)
1022
-
1023
- if border > 0:
1024
- labelsA_edge = contour_of_instance_segmentation(label=labelsA, distance=border * (-1)).astype(float)
1025
- labelsA[np.where(labelsA_edge > 0)] = labelsA_edge[np.where(labelsA_edge > 0)]
1026
- if labelsB is not None:
1027
- labelsB_edge = contour_of_instance_segmentation(label=labelsB, distance=border * (-1)).astype(float)
1028
- labelsB[np.where(labelsB_edge > 0)] = labelsB_edge[np.where(labelsB_edge > 0)]
1029
-
1030
- if labelsB is not None:
1031
- labelsA[labelsA != 0] = -labelsA[labelsA != 0]
1032
- labelsAB = merge_labels(labelsA, labelsB)
1033
- labelsBA = merge_labels(labelsB, labelsA)
1034
- label_cases = [labelsAB, labelsBA]
1035
- else:
1036
- label_cases = [labelsA]
1037
-
1038
- coocurrences = []
1039
- for lbl in label_cases:
1040
- coocurrences.extend(find_contact_neighbors(lbl, connectivity=connectivity))
1041
-
1042
- unique_pairs = np.unique(coocurrences, axis=0)
1354
+ labelsA = labelsA.astype(float)
1355
+ if labelsB is not None:
1356
+ labelsB = labelsB.astype(float)
1357
+
1358
+ if border > 0:
1359
+ labelsA_edge = contour_of_instance_segmentation(
1360
+ label=labelsA, distance=border * (-1)
1361
+ ).astype(float)
1362
+ labelsA[np.where(labelsA_edge > 0)] = labelsA_edge[np.where(labelsA_edge > 0)]
1363
+ if labelsB is not None:
1364
+ labelsB_edge = contour_of_instance_segmentation(
1365
+ label=labelsB, distance=border * (-1)
1366
+ ).astype(float)
1367
+ labelsB[np.where(labelsB_edge > 0)] = labelsB_edge[
1368
+ np.where(labelsB_edge > 0)
1369
+ ]
1370
+
1371
+ if labelsB is not None:
1372
+ labelsA[labelsA != 0] = -labelsA[labelsA != 0]
1373
+ labelsAB = merge_labels(labelsA, labelsB)
1374
+ labelsBA = merge_labels(labelsB, labelsA)
1375
+ label_cases = [labelsAB, labelsBA]
1376
+ else:
1377
+ label_cases = [labelsA]
1378
+
1379
+ coocurrences = []
1380
+ for lbl in label_cases:
1381
+ coocurrences.extend(find_contact_neighbors(lbl, connectivity=connectivity))
1382
+
1383
+ unique_pairs = np.unique(coocurrences, axis=0)
1384
+
1385
+ if labelsB is not None:
1386
+ neighs = np.unique(
1387
+ [
1388
+ tuple(sorted(p))
1389
+ for p in unique_pairs
1390
+ if p[0] != p[1] and sign(p[0]) != sign(p[1])
1391
+ ],
1392
+ axis=0,
1393
+ )
1394
+ else:
1395
+ neighs = np.unique(
1396
+ [tuple(sorted(p)) for p in unique_pairs if p[0] != p[1]], axis=0
1397
+ )
1398
+
1399
+ return neighs
1043
1400
 
1044
- if labelsB is not None:
1045
- neighs = np.unique([tuple(sorted(p)) for p in unique_pairs if p[0] != p[1] and sign(p[0]) != sign(p[1])],
1046
- axis=0)
1047
- else:
1048
- neighs = np.unique([tuple(sorted(p)) for p in unique_pairs if p[0] != p[1]], axis=0)
1049
-
1050
- return neighs
1051
1401
 
1052
1402
  def merge_labels(labelsA, labelsB):
1053
1403
 
1054
- labelsA = labelsA.astype(float)
1055
- labelsB = labelsB.astype(float)
1404
+ labelsA = labelsA.astype(float)
1405
+ labelsB = labelsB.astype(float)
1056
1406
 
1057
- labelsAB = labelsA.copy()
1058
- labelsAB[np.where(labelsB != 0)] = labelsB[np.where(labelsB != 0)]
1407
+ labelsAB = labelsA.copy()
1408
+ labelsAB[np.where(labelsB != 0)] = labelsB[np.where(labelsB != 0)]
1059
1409
 
1060
- return labelsAB
1410
+ return labelsAB
1061
1411
 
1062
1412
 
1063
1413
  def find_contact_neighbors(labels, connectivity=2):
1064
-
1065
- assert labels.ndim == 2, "Wrong dimension for labels..."
1066
- g, nodes = pixel_graph(labels, mask=labels.astype(bool), connectivity=connectivity)
1067
- g.eliminate_zeros()
1068
-
1069
- coo = g.tocoo()
1070
- center_coords = nodes[coo.row]
1071
- neighbor_coords = nodes[coo.col]
1072
-
1073
- center_values = labels.ravel()[center_coords]
1074
- neighbor_values = labels.ravel()[neighbor_coords]
1075
- touching_masks = np.column_stack((center_values, neighbor_values))
1076
-
1077
- return touching_masks
1078
-
1079
-
1080
- def mask_contact_neighborhood(setA, setB, labelsA, labelsB, distance, mode='two-pop', status=None,
1081
- not_status_option=None, compute_cum_sum=True,
1082
- attention_weight=True, symmetrize=True, include_dead_weight=True,
1083
- column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y',
1084
- 'mask_id': 'class_id'}):
1085
- """
1086
-
1087
- Match neighbors in set A and B within a circle of radius d.
1088
-
1089
- Parameters
1090
- ----------
1091
- setA,setB : pandas DataFrame
1092
- Trajectory or position sets A and B.
1093
- distance : float
1094
- Cut-distance in pixels to match neighboring pairs.
1095
- mode: str
1096
- neighboring mode, between 'two-pop' (e.g. target-effector) and 'self' (target-target or effector-effector).
1097
- status: None or status
1098
- name to look for cells to ignore (because they are dead). By default all cells are kept.
1099
- compute_cum_sum: bool,
1100
- compute cumulated time of presence of neighbours (only if trajectories available for both sets)
1101
- attention_weight: bool,
1102
- compute the attention weight (how much a cell of set B is shared across cells of set A)
1103
- symmetrize: bool,
1104
- write in set B the neighborhood of set A
1105
- include_dead_weight: bool
1106
- do not count dead cells when establishing attention weight
1107
- """
1108
-
1109
- if setA is not None and setB is not None:
1110
- setA, setB, status = set_live_status(setA, setB, status, not_status_option)
1111
- else:
1112
- return None, None
1113
-
1114
- # Check distance option
1115
- if not isinstance(distance, list):
1116
- distance = [distance]
1117
-
1118
- cl = []
1119
- for s in [setA, setB]:
1120
-
1121
- # Check whether data can be tracked
1122
- temp_column_labels = column_labels.copy()
1123
-
1124
- id_col = extract_identity_col(s)
1125
- temp_column_labels.update({'track': id_col})
1126
- if id_col=='ID':
1127
- compute_cum_sum = False
1128
-
1129
- cl.append(temp_column_labels)
1130
-
1131
- setA = setA.loc[~setA[cl[0]['track']].isnull(),:].copy()
1132
- setB = setB.loc[~setB[cl[1]['track']].isnull(),:].copy()
1133
-
1134
- if labelsB is None:
1135
- labelsB = [None] * len(labelsA)
1136
-
1137
- for d in distance:
1138
- # loop over each provided distance
1139
-
1140
- if mode == 'two-pop':
1141
- neigh_col = f'neighborhood_2_contact_{d}_px'
1142
- elif mode == 'self':
1143
- neigh_col = f'neighborhood_self_contact_{d}_px'
1144
- else:
1145
- print("Please provide a valid mode between `two-pop` and `self`...")
1146
- return None
1147
-
1148
- setA[neigh_col] = np.nan
1149
- setA[neigh_col] = setA[neigh_col].astype(object)
1150
-
1151
- setB[neigh_col] = np.nan
1152
- setB[neigh_col] = setB[neigh_col].astype(object)
1153
-
1154
- # Loop over each available timestep
1155
- timeline = np.unique(np.concatenate([setA[cl[0]['time']].to_numpy(), setB[cl[1]['time']].to_numpy()])).astype(
1156
- int)
1157
- for t in tqdm(timeline):
1158
-
1159
- setA_t = setA.loc[setA[cl[0]['time']] == t, :]
1160
- setB_t = setB.loc[setB[cl[1]['time']] == t, :]
1161
-
1162
- if len(setA_t) > 0 and len(setB_t) > 0:
1163
- dist_map, intersection_map = _compute_mask_contact_dist_map(setA_t, setB_t, labelsA[t], labelsB[t],
1164
- distance=d, mode=mode, column_labelsA=cl[0],
1165
- column_labelsB=cl[1])
1166
-
1167
- d_filter = 1.0E05
1168
- if attention_weight:
1169
- status_A = setA_t[status[0]].to_numpy()
1170
- ids_A = setA_t[cl[0]["track"]].to_numpy()
1171
- weights, closest_A = compute_attention_weight(dist_map, d_filter, status_A, ids_A, axis=1,
1172
- include_dead_weight=include_dead_weight)
1173
- else:
1174
- weights = None
1175
- closest_A = None
1176
-
1177
- _fill_contact_neighborhood_at_t(t, setA, setB, dist_map, intersection_map=intersection_map,
1178
- attention_weight=attention_weight,
1179
- include_dead_weight=include_dead_weight, symmetrize=symmetrize,
1180
- compute_cum_sum=compute_cum_sum, weights=weights, closest_A=closest_A,
1181
- neigh_col=neigh_col, column_labelsA=cl[0], column_labelsB=cl[1],
1182
- statusA=status[0], statusB=status[1], d_filter=d_filter)
1183
-
1184
- return setA, setB
1185
-
1186
-
1187
- def compute_contact_neighborhood_at_position(pos, distance, population=['targets', 'effectors'], theta_dist=None,
1188
- img_shape=(2048, 2048), return_tables=False, clear_neigh=False,
1189
- event_time_col=None,
1190
- neighborhood_kwargs={'mode': 'two-pop', 'status': None,
1191
- 'not_status_option': None,
1192
- 'include_dead_weight': True, "compute_cum_sum": False,
1193
- "attention_weight": True, 'symmetrize': True}):
1194
- """
1195
- Computes neighborhood metrics for specified cell populations within a given position, based on distance criteria and additional parameters.
1196
-
1197
- This function assesses the neighborhood interactions between two specified cell populations (or within a single population) at a given position.
1198
- It computes various neighborhood metrics based on specified distances, considering the entire image or excluding edge regions.
1199
- The results are optionally cleared of previous neighborhood calculations and can be returned as updated tables.
1200
-
1201
- Parameters
1202
- ----------
1203
- pos : str
1204
- The path to the position directory where the analysis is to be performed.
1205
- distance : float or list of float
1206
- The distance(s) in pixels to define neighborhoods.
1207
- population : list of str, optional
1208
- Names of the cell populations to analyze. If a single population is provided, it is used for both populations in the analysis (default is ['targets', 'effectors']).
1209
- theta_dist : float or list of float, optional
1210
- Edge threshold(s) in pixels to exclude cells close to the image boundaries from the analysis. If not provided, defaults to 90% of each specified distance.
1211
- img_shape : tuple of int, optional
1212
- The dimensions (height, width) of the images in pixels (default is (2048, 2048)).
1213
- return_tables : bool, optional
1214
- If True, returns the updated data tables for both populations (default is False).
1215
- clear_neigh : bool, optional
1216
- If True, clears existing neighborhood columns from the data tables before computing new metrics (default is False).
1217
- event_time_col : str, optional
1218
- The column name indicating the event time for each cell, required if mean neighborhood metrics are to be computed before events.
1219
- neighborhood_kwargs : dict, optional
1220
- Additional keyword arguments for neighborhood computation, including mode, status options, and metrics (default includes mode 'two-pop', and symmetrization).
1221
-
1222
- Returns
1223
- -------
1224
- pandas.DataFrame or (pandas.DataFrame, pandas.DataFrame)
1225
- If `return_tables` is True, returns the updated data tables for the specified populations. If only one population is analyzed, both returned data frames will be identical.
1226
-
1227
- Raises
1228
- ------
1229
- AssertionError
1230
- If the specified position path does not exist or if the number of distances and edge thresholds do not match.
1231
-
1232
- """
1233
-
1234
- pos = pos.replace('\\', '/')
1235
- pos = rf"{pos}"
1236
- assert os.path.exists(pos), f'Position {pos} is not a valid path.'
1237
-
1238
- if isinstance(population, str):
1239
- population = [population, population]
1240
-
1241
- if not isinstance(distance, list):
1242
- distance = [distance]
1243
- if not theta_dist is None and not isinstance(theta_dist, list):
1244
- theta_dist = [theta_dist]
1245
-
1246
- if theta_dist is None:
1247
- theta_dist = [0 for d in distance] # 0.9*d
1248
- assert len(theta_dist) == len(distance), 'Incompatible number of distances and number of edge thresholds.'
1249
-
1250
- if population[0] == population[1]:
1251
- neighborhood_kwargs.update({'mode': 'self'})
1252
- if population[1] != population[0]:
1253
- neighborhood_kwargs.update({'mode': 'two-pop'})
1254
-
1255
- df_A, path_A = get_position_table(pos, population=population[0], return_path=True)
1256
- df_B, path_B = get_position_table(pos, population=population[1], return_path=True)
1257
- if df_A is None or df_B is None:
1258
- return None
1259
-
1260
- if clear_neigh:
1261
- if os.path.exists(path_A.replace('.csv','.pkl')):
1262
- os.remove(path_A.replace('.csv','.pkl'))
1263
- if os.path.exists(path_B.replace('.csv','.pkl')):
1264
- os.remove(path_B.replace('.csv','.pkl'))
1265
- df_pair, pair_path = get_position_table(pos, population='pairs', return_path=True)
1266
- if df_pair is not None:
1267
- os.remove(pair_path)
1268
-
1269
- df_A_pkl = get_position_pickle(pos, population=population[0], return_path=False)
1270
- df_B_pkl = get_position_pickle(pos, population=population[1], return_path=False)
1271
-
1272
- if df_A_pkl is not None:
1273
- pkl_columns = np.array(df_A_pkl.columns)
1274
- neigh_columns = np.array([c.startswith('neighborhood') for c in pkl_columns])
1275
- cols = list(pkl_columns[neigh_columns]) + ['FRAME']
1276
-
1277
- id_col = extract_identity_col(df_A_pkl)
1278
- cols.append(id_col)
1279
- on_cols = [id_col, 'FRAME']
1280
-
1281
- print(f'Recover {cols} from the pickle file...')
1282
- try:
1283
- df_A = pd.merge(df_A, df_A_pkl.loc[:,cols], how="outer", on=on_cols)
1284
- print(df_A.columns)
1285
- except Exception as e:
1286
- print(f'Failure to merge pickle and csv files: {e}')
1287
-
1288
- if df_B_pkl is not None and df_B is not None:
1289
- pkl_columns = np.array(df_B_pkl.columns)
1290
- neigh_columns = np.array([c.startswith('neighborhood') for c in pkl_columns])
1291
- cols = list(pkl_columns[neigh_columns]) + ['FRAME']
1292
-
1293
- id_col = extract_identity_col(df_B_pkl)
1294
- cols.append(id_col)
1295
- on_cols = [id_col, 'FRAME']
1296
-
1297
- print(f'Recover {cols} from the pickle file...')
1298
- try:
1299
- df_B = pd.merge(df_B, df_B_pkl.loc[:,cols], how="outer", on=on_cols)
1300
- except Exception as e:
1301
- print(f'Failure to merge pickle and csv files: {e}')
1302
-
1303
- labelsA = locate_labels(pos, population=population[0])
1304
- if population[1] == population[0]:
1305
- labelsB = None
1306
- else:
1307
- labelsB = locate_labels(pos, population=population[1])
1308
-
1309
- if clear_neigh:
1310
- unwanted = df_A.columns[df_A.columns.str.contains('neighborhood')]
1311
- df_A = df_A.drop(columns=unwanted)
1312
- unwanted = df_B.columns[df_B.columns.str.contains('neighborhood')]
1313
- df_B = df_B.drop(columns=unwanted)
1314
-
1315
- print(f"Distance: {distance} for mask contact")
1316
- df_A, df_B = mask_contact_neighborhood(df_A, df_B, labelsA, labelsB, distance, **neighborhood_kwargs)
1317
- if df_A is None or df_B is None or len(df_A)==0:
1318
- return None
1319
-
1320
- for td, d in zip(theta_dist, distance):
1321
-
1322
- if neighborhood_kwargs['mode'] == 'two-pop':
1323
- neigh_col = f'neighborhood_2_contact_{d}_px'
1324
- elif neighborhood_kwargs['mode'] == 'self':
1325
- neigh_col = f'neighborhood_self_contact_{d}_px'
1326
-
1327
- df_A.loc[df_A['class_id'].isnull(),neigh_col] = np.nan
1328
-
1329
- # edge_filter_A = (df_A['POSITION_X'] > td)&(df_A['POSITION_Y'] > td)&(df_A['POSITION_Y'] < (img_shape[0] - td))&(df_A['POSITION_X'] < (img_shape[1] - td))
1330
- # edge_filter_B = (df_B['POSITION_X'] > td)&(df_B['POSITION_Y'] > td)&(df_B['POSITION_Y'] < (img_shape[0] - td))&(df_B['POSITION_X'] < (img_shape[1] - td))
1331
- # df_A.loc[~edge_filter_A, neigh_col] = np.nan
1332
- # df_B.loc[~edge_filter_B, neigh_col] = np.nan
1333
-
1334
- df_A = compute_neighborhood_metrics(df_A, neigh_col, metrics=['inclusive', 'intermediate'],
1335
- decompose_by_status=True)
1336
- if 'TRACK_ID' in list(df_A.columns):
1337
- if not np.all(df_A['TRACK_ID'].isnull()):
1338
- df_A = mean_neighborhood_before_event(df_A, neigh_col, event_time_col, metrics=['inclusive','intermediate'])
1339
- if event_time_col is not None:
1340
- df_A = mean_neighborhood_after_event(df_A, neigh_col, event_time_col, metrics=['inclusive', 'intermediate'])
1341
- print('Done...')
1342
-
1343
- if not population[0] == population[1]:
1344
- # Remove neighborhood column from neighbor table, rename with actual population name
1345
- for td, d in zip(theta_dist, distance):
1346
- if neighborhood_kwargs['mode'] == 'two-pop':
1347
- neigh_col = f'neighborhood_2_contact_{d}_px'
1348
- new_neigh_col = neigh_col.replace('_2_',f'_({population[0]}-{population[1]})_')
1349
- df_A = df_A.rename(columns={neigh_col: new_neigh_col})
1350
- elif neighborhood_kwargs['mode'] == 'self':
1351
- neigh_col = f'neighborhood_self_contact_{d}_px'
1352
- df_B = df_B.drop(columns=[neigh_col])
1353
- df_B.to_pickle(path_B.replace('.csv', '.pkl'))
1354
-
1355
- cols_to_rename = [c for c in list(df_A.columns) if c.startswith('intermediate_count_') or c.startswith('inclusive_count_') or c.startswith('exclusive_count_') or c.startswith('mean_count_')]
1356
- new_col_names = [c.replace('_2_',f'_({population[0]}-{population[1]})_') for c in cols_to_rename]
1357
- new_name_map = {}
1358
- for k,c in enumerate(cols_to_rename):
1359
- new_name_map.update({c: new_col_names[k]})
1360
- df_A = df_A.rename(columns=new_name_map)
1361
-
1362
- print(f'{df_A.columns=}')
1363
- df_A.to_pickle(path_A.replace('.csv', '.pkl'))
1364
-
1365
- unwanted = df_A.columns[df_A.columns.str.startswith('neighborhood_')]
1366
- df_A2 = df_A.drop(columns=unwanted)
1367
- df_A2.to_csv(path_A, index=False)
1368
-
1369
- if not population[0] == population[1]:
1370
- unwanted = df_B.columns[df_B.columns.str.startswith('neighborhood_')]
1371
- df_B_csv = df_B.drop(unwanted, axis=1, inplace=False)
1372
- df_B_csv.to_csv(path_B, index=False)
1373
-
1374
- if return_tables:
1375
- return df_A, df_B
1376
-
1377
-
1378
- def extract_neighborhood_in_pair_table(df, distance=None, reference_population="targets", neighbor_population="effectors", mode="circle", neighborhood_key=None, contact_only=True,):
1379
-
1380
- """
1381
- Extracts data from a pair table that matches specific neighborhood criteria based on reference and neighbor
1382
- populations, distance, and mode of neighborhood computation (e.g., circular or contact-based).
1383
-
1384
- Parameters
1385
- ----------
1386
- df : pandas.DataFrame
1387
- DataFrame containing the pair table, which includes columns for 'reference_population', 'neighbor_population',
1388
- and a column for neighborhood status.
1389
- distance : int, optional
1390
- Radius in pixels for neighborhood calculation, used only if `neighborhood_key` is not provided.
1391
- reference_population : str, default="targets"
1392
- The reference population to consider. Must be either "targets" or "effectors".
1393
- neighbor_population : str, default="effectors"
1394
- The neighbor population to consider. Must be either "targets" or "effectors", used only if `neighborhood_key` is not provided.
1395
- mode : str, default="circle"
1396
- Neighborhood computation mode. Options are "circle" for radius-based or "contact" for contact-based neighborhood, used only if `neighborhood_key` is not provided.
1397
- neighborhood_key : str, optional
1398
- A precomputed neighborhood key to identify specific neighborhoods. If provided, this key overrides `distance`,
1399
- `mode`, and `neighbor_population`.
1400
- contact_only : bool, default=True
1401
- If True, only rows indicating contact with the neighbor population (status=1) are kept; if False, both
1402
- contact (status=1) and no-contact (status=0) rows are included.
1403
-
1404
- Returns
1405
- -------
1406
- pandas.DataFrame
1407
- Filtered DataFrame containing rows that meet the specified neighborhood criteria.
1408
-
1409
- Notes
1410
- -----
1411
- - When `neighborhood_key` is None, the neighborhood column is generated based on the provided `reference_population`,
1412
- `neighbor_population`, `distance`, and `mode`.
1413
- - The function uses `status_<neigh_col>` to filter rows based on `contact_only` criteria.
1414
- - Ensures that `reference_population` and `neighbor_population` are valid inputs and consistent with the neighborhood
1415
- mode and key.
1416
-
1417
- Example
1418
- -------
1419
- >>> neighborhood_data = extract_neighborhood_in_pair_table(df, distance=50, reference_population="targets",
1420
- neighbor_population="effectors", mode="circle")
1421
- >>> neighborhood_data.head()
1422
-
1423
- Raises
1424
- ------
1425
- AssertionError
1426
- If `reference_population` or `neighbor_population` is not valid, or if the required neighborhood status
1427
- column does not exist in `df`.
1428
- """
1429
-
1430
-
1431
- #assert reference_population in ["targets", "effectors"], "Please set a valid reference population ('targets' or 'effectors')"
1432
- if neighborhood_key is None:
1433
- #assert neighbor_population in ["targets", "effectors"], "Please set a valid neighbor population ('targets' or 'effectors')"
1434
- assert mode in ["circle", "contact"], "Please set a valid neighborhood computation mode ('circle' or 'contact')"
1435
- type = '('+'-'.join([reference_population, neighbor_population])+')'
1436
- neigh_col = f"neighborhood_{type}_{mode}_{distance}_px"
1437
- else:
1438
- neigh_col = neighborhood_key.replace('status_','')
1439
- if '_(' in neigh_col and ')_' in neigh_col:
1440
- neighbor_population = neigh_col.split('_(')[-1].split(')_')[0].split('-')[-1]
1441
- else:
1442
- if 'self' in neigh_col:
1443
- neighbor_population = reference_population
1444
- else:
1445
- if reference_population=="effectors":
1446
- neighbor_population='targets'
1447
- else:
1448
- neighbor_population='effectors'
1449
-
1450
- assert "status_"+neigh_col in list(df.columns),"The selected neighborhood does not appear in the data..."
1451
-
1452
- print(df[['reference_population','neighbor_population', "status_"+neigh_col]])
1453
-
1454
- if contact_only:
1455
- s_keep = [1]
1456
- else:
1457
- s_keep = [0,1]
1458
-
1459
- data = df.loc[(df['reference_population']==reference_population)&(df['neighbor_population']==neighbor_population)&(df["status_"+neigh_col].isin(s_keep))]
1460
-
1461
- return data
1414
+
1415
+ assert labels.ndim == 2, "Wrong dimension for labels..."
1416
+ g, nodes = pixel_graph(labels, mask=labels.astype(bool), connectivity=connectivity)
1417
+ g.eliminate_zeros()
1418
+
1419
+ coo = g.tocoo()
1420
+ center_coords = nodes[coo.row]
1421
+ neighbor_coords = nodes[coo.col]
1422
+
1423
+ center_values = labels.ravel()[center_coords]
1424
+ neighbor_values = labels.ravel()[neighbor_coords]
1425
+ touching_masks = np.column_stack((center_values, neighbor_values))
1426
+
1427
+ return touching_masks
1428
+
1429
+
1430
+ def mask_contact_neighborhood(
1431
+ setA,
1432
+ setB,
1433
+ labelsA,
1434
+ labelsB,
1435
+ distance,
1436
+ mode="two-pop",
1437
+ status=None,
1438
+ not_status_option=None,
1439
+ compute_cum_sum=True,
1440
+ attention_weight=True,
1441
+ symmetrize=True,
1442
+ include_dead_weight=True,
1443
+ column_labels={
1444
+ "track": "TRACK_ID",
1445
+ "time": "FRAME",
1446
+ "x": "POSITION_X",
1447
+ "y": "POSITION_Y",
1448
+ "mask_id": "class_id",
1449
+ },
1450
+ ):
1451
+ """
1452
+
1453
+ Match neighbors in set A and B within a circle of radius d.
1454
+
1455
+ Parameters
1456
+ ----------
1457
+ setA,setB : pandas DataFrame
1458
+ Trajectory or position sets A and B.
1459
+ distance : float
1460
+ Cut-distance in pixels to match neighboring pairs.
1461
+ mode: str
1462
+ neighboring mode, between 'two-pop' (e.g. target-effector) and 'self' (target-target or effector-effector).
1463
+ status: None or status
1464
+ name to look for cells to ignore (because they are dead). By default all cells are kept.
1465
+ compute_cum_sum: bool,
1466
+ compute cumulated time of presence of neighbours (only if trajectories available for both sets)
1467
+ attention_weight: bool,
1468
+ compute the attention weight (how much a cell of set B is shared across cells of set A)
1469
+ symmetrize: bool,
1470
+ write in set B the neighborhood of set A
1471
+ include_dead_weight: bool
1472
+ do not count dead cells when establishing attention weight
1473
+ """
1474
+
1475
+ if setA is not None and setB is not None:
1476
+ setA, setB, status = set_live_status(setA, setB, status, not_status_option)
1477
+ else:
1478
+ return None, None
1479
+
1480
+ # Check distance option
1481
+ if not isinstance(distance, list):
1482
+ distance = [distance]
1483
+
1484
+ cl = []
1485
+ for s in [setA, setB]:
1486
+
1487
+ # Check whether data can be tracked
1488
+ temp_column_labels = column_labels.copy()
1489
+
1490
+ id_col = extract_identity_col(s)
1491
+ temp_column_labels.update({"track": id_col})
1492
+ if id_col == "ID":
1493
+ compute_cum_sum = False
1494
+
1495
+ cl.append(temp_column_labels)
1496
+
1497
+ setA = setA.loc[~setA[cl[0]["track"]].isnull(), :].copy()
1498
+ setB = setB.loc[~setB[cl[1]["track"]].isnull(), :].copy()
1499
+
1500
+ if labelsB is None:
1501
+ labelsB = [None] * len(labelsA)
1502
+
1503
+ for d in distance:
1504
+ # loop over each provided distance
1505
+
1506
+ if mode == "two-pop":
1507
+ neigh_col = f"neighborhood_2_contact_{d}_px"
1508
+ elif mode == "self":
1509
+ neigh_col = f"neighborhood_self_contact_{d}_px"
1510
+ else:
1511
+ print("Please provide a valid mode between `two-pop` and `self`...")
1512
+ return None
1513
+
1514
+ setA[neigh_col] = np.nan
1515
+ setA[neigh_col] = setA[neigh_col].astype(object)
1516
+
1517
+ setB[neigh_col] = np.nan
1518
+ setB[neigh_col] = setB[neigh_col].astype(object)
1519
+
1520
+ # Loop over each available timestep
1521
+ timeline = np.unique(
1522
+ np.concatenate(
1523
+ [setA[cl[0]["time"]].to_numpy(), setB[cl[1]["time"]].to_numpy()]
1524
+ )
1525
+ ).astype(int)
1526
+ for t in tqdm(timeline):
1527
+
1528
+ setA_t = setA.loc[setA[cl[0]["time"]] == t, :]
1529
+ setB_t = setB.loc[setB[cl[1]["time"]] == t, :]
1530
+
1531
+ if len(setA_t) > 0 and len(setB_t) > 0:
1532
+ dist_map, intersection_map = _compute_mask_contact_dist_map(
1533
+ setA_t,
1534
+ setB_t,
1535
+ labelsA[t],
1536
+ labelsB[t],
1537
+ distance=d,
1538
+ mode=mode,
1539
+ column_labelsA=cl[0],
1540
+ column_labelsB=cl[1],
1541
+ )
1542
+
1543
+ d_filter = 1.0e05
1544
+ if attention_weight:
1545
+ status_A = setA_t[status[0]].to_numpy()
1546
+ ids_A = setA_t[cl[0]["track"]].to_numpy()
1547
+ weights, closest_A = compute_attention_weight(
1548
+ dist_map,
1549
+ d_filter,
1550
+ status_A,
1551
+ ids_A,
1552
+ axis=1,
1553
+ include_dead_weight=include_dead_weight,
1554
+ )
1555
+ else:
1556
+ weights = None
1557
+ closest_A = None
1558
+
1559
+ _fill_contact_neighborhood_at_t(
1560
+ t,
1561
+ setA,
1562
+ setB,
1563
+ dist_map,
1564
+ intersection_map=intersection_map,
1565
+ attention_weight=attention_weight,
1566
+ include_dead_weight=include_dead_weight,
1567
+ symmetrize=symmetrize,
1568
+ compute_cum_sum=compute_cum_sum,
1569
+ weights=weights,
1570
+ closest_A=closest_A,
1571
+ neigh_col=neigh_col,
1572
+ column_labelsA=cl[0],
1573
+ column_labelsB=cl[1],
1574
+ statusA=status[0],
1575
+ statusB=status[1],
1576
+ d_filter=d_filter,
1577
+ )
1578
+
1579
+ return setA, setB
1580
+
1581
+
1582
+ def compute_contact_neighborhood_at_position(
1583
+ pos,
1584
+ distance,
1585
+ population=["targets", "effectors"],
1586
+ theta_dist=None,
1587
+ img_shape=(2048, 2048),
1588
+ return_tables=False,
1589
+ clear_neigh=False,
1590
+ event_time_col=None,
1591
+ neighborhood_kwargs={
1592
+ "mode": "two-pop",
1593
+ "status": None,
1594
+ "not_status_option": None,
1595
+ "include_dead_weight": True,
1596
+ "compute_cum_sum": False,
1597
+ "attention_weight": True,
1598
+ "symmetrize": True,
1599
+ },
1600
+ ):
1601
+ """
1602
+ Computes neighborhood metrics for specified cell populations within a given position, based on distance criteria and additional parameters.
1603
+
1604
+ This function assesses the neighborhood interactions between two specified cell populations (or within a single population) at a given position.
1605
+ It computes various neighborhood metrics based on specified distances, considering the entire image or excluding edge regions.
1606
+ The results are optionally cleared of previous neighborhood calculations and can be returned as updated tables.
1607
+
1608
+ Parameters
1609
+ ----------
1610
+ pos : str
1611
+ The path to the position directory where the analysis is to be performed.
1612
+ distance : float or list of float
1613
+ The distance(s) in pixels to define neighborhoods.
1614
+ population : list of str, optional
1615
+ Names of the cell populations to analyze. If a single population is provided, it is used for both populations in the analysis (default is ['targets', 'effectors']).
1616
+ theta_dist : float or list of float, optional
1617
+ Edge threshold(s) in pixels to exclude cells close to the image boundaries from the analysis. If not provided, defaults to 90% of each specified distance.
1618
+ img_shape : tuple of int, optional
1619
+ The dimensions (height, width) of the images in pixels (default is (2048, 2048)).
1620
+ return_tables : bool, optional
1621
+ If True, returns the updated data tables for both populations (default is False).
1622
+ clear_neigh : bool, optional
1623
+ If True, clears existing neighborhood columns from the data tables before computing new metrics (default is False).
1624
+ event_time_col : str, optional
1625
+ The column name indicating the event time for each cell, required if mean neighborhood metrics are to be computed before events.
1626
+ neighborhood_kwargs : dict, optional
1627
+ Additional keyword arguments for neighborhood computation, including mode, status options, and metrics (default includes mode 'two-pop', and symmetrization).
1628
+
1629
+ Returns
1630
+ -------
1631
+ pandas.DataFrame or (pandas.DataFrame, pandas.DataFrame)
1632
+ If `return_tables` is True, returns the updated data tables for the specified populations. If only one population is analyzed, both returned data frames will be identical.
1633
+
1634
+ Raises
1635
+ ------
1636
+ AssertionError
1637
+ If the specified position path does not exist or if the number of distances and edge thresholds do not match.
1638
+
1639
+ """
1640
+
1641
+ pos = pos.replace("\\", "/")
1642
+ pos = rf"{pos}"
1643
+ assert os.path.exists(pos), f"Position {pos} is not a valid path."
1644
+
1645
+ if isinstance(population, str):
1646
+ population = [population, population]
1647
+
1648
+ if not isinstance(distance, list):
1649
+ distance = [distance]
1650
+ if not theta_dist is None and not isinstance(theta_dist, list):
1651
+ theta_dist = [theta_dist]
1652
+
1653
+ if theta_dist is None:
1654
+ theta_dist = [0 for d in distance] # 0.9*d
1655
+ assert len(theta_dist) == len(
1656
+ distance
1657
+ ), "Incompatible number of distances and number of edge thresholds."
1658
+
1659
+ if population[0] == population[1]:
1660
+ neighborhood_kwargs.update({"mode": "self"})
1661
+ if population[1] != population[0]:
1662
+ neighborhood_kwargs.update({"mode": "two-pop"})
1663
+
1664
+ df_A, path_A = get_position_table(pos, population=population[0], return_path=True)
1665
+ df_B, path_B = get_position_table(pos, population=population[1], return_path=True)
1666
+ if df_A is None or df_B is None:
1667
+ return None
1668
+
1669
+ if clear_neigh:
1670
+ if os.path.exists(path_A.replace(".csv", ".pkl")):
1671
+ os.remove(path_A.replace(".csv", ".pkl"))
1672
+ if os.path.exists(path_B.replace(".csv", ".pkl")):
1673
+ os.remove(path_B.replace(".csv", ".pkl"))
1674
+ df_pair, pair_path = get_position_table(
1675
+ pos, population="pairs", return_path=True
1676
+ )
1677
+ if df_pair is not None:
1678
+ os.remove(pair_path)
1679
+
1680
+ df_A_pkl = get_position_pickle(pos, population=population[0], return_path=False)
1681
+ df_B_pkl = get_position_pickle(pos, population=population[1], return_path=False)
1682
+
1683
+ if df_A_pkl is not None:
1684
+ pkl_columns = np.array(df_A_pkl.columns)
1685
+ neigh_columns = np.array([c.startswith("neighborhood") for c in pkl_columns])
1686
+ cols = list(pkl_columns[neigh_columns]) + ["FRAME"]
1687
+
1688
+ id_col = extract_identity_col(df_A_pkl)
1689
+ cols.append(id_col)
1690
+ on_cols = [id_col, "FRAME"]
1691
+
1692
+ print(f"Recover {cols} from the pickle file...")
1693
+ try:
1694
+ df_A = pd.merge(df_A, df_A_pkl.loc[:, cols], how="outer", on=on_cols)
1695
+ print(df_A.columns)
1696
+ except Exception as e:
1697
+ print(f"Failure to merge pickle and csv files: {e}")
1698
+
1699
+ if df_B_pkl is not None and df_B is not None:
1700
+ pkl_columns = np.array(df_B_pkl.columns)
1701
+ neigh_columns = np.array([c.startswith("neighborhood") for c in pkl_columns])
1702
+ cols = list(pkl_columns[neigh_columns]) + ["FRAME"]
1703
+
1704
+ id_col = extract_identity_col(df_B_pkl)
1705
+ cols.append(id_col)
1706
+ on_cols = [id_col, "FRAME"]
1707
+
1708
+ print(f"Recover {cols} from the pickle file...")
1709
+ try:
1710
+ df_B = pd.merge(df_B, df_B_pkl.loc[:, cols], how="outer", on=on_cols)
1711
+ except Exception as e:
1712
+ print(f"Failure to merge pickle and csv files: {e}")
1713
+
1714
+ labelsA = locate_labels(pos, population=population[0])
1715
+ if population[1] == population[0]:
1716
+ labelsB = None
1717
+ else:
1718
+ labelsB = locate_labels(pos, population=population[1])
1719
+
1720
+ if clear_neigh:
1721
+ unwanted = df_A.columns[df_A.columns.str.contains("neighborhood")]
1722
+ df_A = df_A.drop(columns=unwanted)
1723
+ unwanted = df_B.columns[df_B.columns.str.contains("neighborhood")]
1724
+ df_B = df_B.drop(columns=unwanted)
1725
+
1726
+ print(f"Distance: {distance} for mask contact")
1727
+ df_A, df_B = mask_contact_neighborhood(
1728
+ df_A, df_B, labelsA, labelsB, distance, **neighborhood_kwargs
1729
+ )
1730
+ if df_A is None or df_B is None or len(df_A) == 0:
1731
+ return None
1732
+
1733
+ for td, d in zip(theta_dist, distance):
1734
+
1735
+ if neighborhood_kwargs["mode"] == "two-pop":
1736
+ neigh_col = f"neighborhood_2_contact_{d}_px"
1737
+ elif neighborhood_kwargs["mode"] == "self":
1738
+ neigh_col = f"neighborhood_self_contact_{d}_px"
1739
+
1740
+ df_A.loc[df_A["class_id"].isnull(), neigh_col] = np.nan
1741
+
1742
+ # edge_filter_A = (df_A['POSITION_X'] > td)&(df_A['POSITION_Y'] > td)&(df_A['POSITION_Y'] < (img_shape[0] - td))&(df_A['POSITION_X'] < (img_shape[1] - td))
1743
+ # edge_filter_B = (df_B['POSITION_X'] > td)&(df_B['POSITION_Y'] > td)&(df_B['POSITION_Y'] < (img_shape[0] - td))&(df_B['POSITION_X'] < (img_shape[1] - td))
1744
+ # df_A.loc[~edge_filter_A, neigh_col] = np.nan
1745
+ # df_B.loc[~edge_filter_B, neigh_col] = np.nan
1746
+
1747
+ df_A = compute_neighborhood_metrics(
1748
+ df_A,
1749
+ neigh_col,
1750
+ metrics=["inclusive", "intermediate"],
1751
+ decompose_by_status=True,
1752
+ )
1753
+ if "TRACK_ID" in list(df_A.columns):
1754
+ if not np.all(df_A["TRACK_ID"].isnull()):
1755
+ df_A = mean_neighborhood_before_event(
1756
+ df_A,
1757
+ neigh_col,
1758
+ event_time_col,
1759
+ metrics=["inclusive", "intermediate"],
1760
+ )
1761
+ if event_time_col is not None:
1762
+ df_A = mean_neighborhood_after_event(
1763
+ df_A,
1764
+ neigh_col,
1765
+ event_time_col,
1766
+ metrics=["inclusive", "intermediate"],
1767
+ )
1768
+ print("Done...")
1769
+
1770
+ if not population[0] == population[1]:
1771
+ # Remove neighborhood column from neighbor table, rename with actual population name
1772
+ for td, d in zip(theta_dist, distance):
1773
+ if neighborhood_kwargs["mode"] == "two-pop":
1774
+ neigh_col = f"neighborhood_2_contact_{d}_px"
1775
+ new_neigh_col = neigh_col.replace(
1776
+ "_2_", f"_({population[0]}-{population[1]})_"
1777
+ )
1778
+ df_A = df_A.rename(columns={neigh_col: new_neigh_col})
1779
+ elif neighborhood_kwargs["mode"] == "self":
1780
+ neigh_col = f"neighborhood_self_contact_{d}_px"
1781
+ df_B = df_B.drop(columns=[neigh_col])
1782
+ df_B.to_pickle(path_B.replace(".csv", ".pkl"))
1783
+
1784
+ cols_to_rename = [
1785
+ c
1786
+ for c in list(df_A.columns)
1787
+ if c.startswith("intermediate_count_")
1788
+ or c.startswith("inclusive_count_")
1789
+ or c.startswith("exclusive_count_")
1790
+ or c.startswith("mean_count_")
1791
+ ]
1792
+ new_col_names = [
1793
+ c.replace("_2_", f"_({population[0]}-{population[1]})_") for c in cols_to_rename
1794
+ ]
1795
+ new_name_map = {}
1796
+ for k, c in enumerate(cols_to_rename):
1797
+ new_name_map.update({c: new_col_names[k]})
1798
+ df_A = df_A.rename(columns=new_name_map)
1799
+
1800
+ print(f"{df_A.columns=}")
1801
+ df_A.to_pickle(path_A.replace(".csv", ".pkl"))
1802
+
1803
+ unwanted = df_A.columns[df_A.columns.str.startswith("neighborhood_")]
1804
+ df_A2 = df_A.drop(columns=unwanted)
1805
+ df_A2.to_csv(path_A, index=False)
1806
+
1807
+ if not population[0] == population[1]:
1808
+ unwanted = df_B.columns[df_B.columns.str.startswith("neighborhood_")]
1809
+ df_B_csv = df_B.drop(unwanted, axis=1, inplace=False)
1810
+ df_B_csv.to_csv(path_B, index=False)
1811
+
1812
+ if return_tables:
1813
+ return df_A, df_B
1814
+
1815
+
1816
+ def extract_neighborhood_in_pair_table(
1817
+ df,
1818
+ distance=None,
1819
+ reference_population="targets",
1820
+ neighbor_population="effectors",
1821
+ mode="circle",
1822
+ neighborhood_key=None,
1823
+ contact_only=True,
1824
+ ):
1825
+ """
1826
+ Extracts data from a pair table that matches specific neighborhood criteria based on reference and neighbor
1827
+ populations, distance, and mode of neighborhood computation (e.g., circular or contact-based).
1828
+
1829
+ Parameters
1830
+ ----------
1831
+ df : pandas.DataFrame
1832
+ DataFrame containing the pair table, which includes columns for 'reference_population', 'neighbor_population',
1833
+ and a column for neighborhood status.
1834
+ distance : int, optional
1835
+ Radius in pixels for neighborhood calculation, used only if `neighborhood_key` is not provided.
1836
+ reference_population : str, default="targets"
1837
+ The reference population to consider. Must be either "targets" or "effectors".
1838
+ neighbor_population : str, default="effectors"
1839
+ The neighbor population to consider. Must be either "targets" or "effectors", used only if `neighborhood_key` is not provided.
1840
+ mode : str, default="circle"
1841
+ Neighborhood computation mode. Options are "circle" for radius-based or "contact" for contact-based neighborhood, used only if `neighborhood_key` is not provided.
1842
+ neighborhood_key : str, optional
1843
+ A precomputed neighborhood key to identify specific neighborhoods. If provided, this key overrides `distance`,
1844
+ `mode`, and `neighbor_population`.
1845
+ contact_only : bool, default=True
1846
+ If True, only rows indicating contact with the neighbor population (status=1) are kept; if False, both
1847
+ contact (status=1) and no-contact (status=0) rows are included.
1848
+
1849
+ Returns
1850
+ -------
1851
+ pandas.DataFrame
1852
+ Filtered DataFrame containing rows that meet the specified neighborhood criteria.
1853
+
1854
+ Notes
1855
+ -----
1856
+ - When `neighborhood_key` is None, the neighborhood column is generated based on the provided `reference_population`,
1857
+ `neighbor_population`, `distance`, and `mode`.
1858
+ - The function uses `status_<neigh_col>` to filter rows based on `contact_only` criteria.
1859
+ - Ensures that `reference_population` and `neighbor_population` are valid inputs and consistent with the neighborhood
1860
+ mode and key.
1861
+
1862
+ Example
1863
+ -------
1864
+ >>> neighborhood_data = extract_neighborhood_in_pair_table(df, distance=50, reference_population="targets",
1865
+ neighbor_population="effectors", mode="circle")
1866
+ >>> neighborhood_data.head()
1867
+
1868
+ Raises
1869
+ ------
1870
+ AssertionError
1871
+ If `reference_population` or `neighbor_population` is not valid, or if the required neighborhood status
1872
+ column does not exist in `df`.
1873
+ """
1874
+
1875
+ # assert reference_population in ["targets", "effectors"], "Please set a valid reference population ('targets' or 'effectors')"
1876
+ if neighborhood_key is None:
1877
+ # assert neighbor_population in ["targets", "effectors"], "Please set a valid neighbor population ('targets' or 'effectors')"
1878
+ assert mode in [
1879
+ "circle",
1880
+ "contact",
1881
+ ], "Please set a valid neighborhood computation mode ('circle' or 'contact')"
1882
+ type = "(" + "-".join([reference_population, neighbor_population]) + ")"
1883
+ neigh_col = f"neighborhood_{type}_{mode}_{distance}_px"
1884
+ else:
1885
+ neigh_col = neighborhood_key.replace("status_", "")
1886
+ if "_(" in neigh_col and ")_" in neigh_col:
1887
+ neighbor_population = (
1888
+ neigh_col.split("_(")[-1].split(")_")[0].split("-")[-1]
1889
+ )
1890
+ else:
1891
+ if "self" in neigh_col:
1892
+ neighbor_population = reference_population
1893
+ else:
1894
+ if reference_population == "effectors":
1895
+ neighbor_population = "targets"
1896
+ else:
1897
+ neighbor_population = "effectors"
1898
+
1899
+ assert "status_" + neigh_col in list(
1900
+ df.columns
1901
+ ), "The selected neighborhood does not appear in the data..."
1902
+
1903
+ print(df[["reference_population", "neighbor_population", "status_" + neigh_col]])
1904
+
1905
+ if contact_only:
1906
+ s_keep = [1]
1907
+ else:
1908
+ s_keep = [0, 1]
1909
+
1910
+ data = df.loc[
1911
+ (df["reference_population"] == reference_population)
1912
+ & (df["neighbor_population"] == neighbor_population)
1913
+ & (df["status_" + neigh_col].isin(s_keep))
1914
+ ]
1915
+
1916
+ return data
1462
1917
 
1463
1918
 
1464
1919
  # def mask_intersection_neighborhood(setA, labelsA, setB, labelsB, threshold_iou=0.5, viewpoint='B'):
@@ -1467,17 +1922,28 @@ def extract_neighborhood_in_pair_table(df, distance=None, reference_population="
1467
1922
 
1468
1923
  if __name__ == "__main__":
1469
1924
 
1470
- print('None')
1471
- pos = "/home/torro/Documents/Experiments/NKratio_Exp/W5/500"
1472
-
1473
- test, _ = compute_neighborhood_at_position(pos, [62], population=['targets', 'effectors'], theta_dist=None,
1474
- img_shape=(2048, 2048), return_tables=True, clear_neigh=True,
1475
- neighborhood_kwargs={'mode': 'two-pop', 'status': ['class', None],
1476
- 'not_status_option': [True, False],
1477
- 'include_dead_weight': True,
1478
- "compute_cum_sum": False, "attention_weight": True,
1479
- 'symmetrize': False})
1480
-
1481
- # test = compute_neighborhood_metrics(test, 'neighborhood_self_circle_150_px', metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
1482
- print(test.columns)
1483
- #print(segment(None,'test'))
1925
+ print("None")
1926
+ pos = "/home/torro/Documents/Experiments/NKratio_Exp/W5/500"
1927
+
1928
+ test, _ = compute_neighborhood_at_position(
1929
+ pos,
1930
+ [62],
1931
+ population=["targets", "effectors"],
1932
+ theta_dist=None,
1933
+ img_shape=(2048, 2048),
1934
+ return_tables=True,
1935
+ clear_neigh=True,
1936
+ neighborhood_kwargs={
1937
+ "mode": "two-pop",
1938
+ "status": ["class", None],
1939
+ "not_status_option": [True, False],
1940
+ "include_dead_weight": True,
1941
+ "compute_cum_sum": False,
1942
+ "attention_weight": True,
1943
+ "symmetrize": False,
1944
+ },
1945
+ )
1946
+
1947
+ # test = compute_neighborhood_metrics(test, 'neighborhood_self_circle_150_px', metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
1948
+ print(test.columns)
1949
+ # print(segment(None,'test'))