celldetective 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. celldetective/__init__.py +2 -0
  2. celldetective/__main__.py +432 -0
  3. celldetective/datasets/segmentation_annotations/blank +0 -0
  4. celldetective/datasets/signal_annotations/blank +0 -0
  5. celldetective/events.py +149 -0
  6. celldetective/extra_properties.py +100 -0
  7. celldetective/filters.py +89 -0
  8. celldetective/gui/__init__.py +20 -0
  9. celldetective/gui/about.py +44 -0
  10. celldetective/gui/analyze_block.py +563 -0
  11. celldetective/gui/btrack_options.py +898 -0
  12. celldetective/gui/classifier_widget.py +386 -0
  13. celldetective/gui/configure_new_exp.py +532 -0
  14. celldetective/gui/control_panel.py +438 -0
  15. celldetective/gui/gui_utils.py +495 -0
  16. celldetective/gui/json_readers.py +113 -0
  17. celldetective/gui/measurement_options.py +1425 -0
  18. celldetective/gui/neighborhood_options.py +452 -0
  19. celldetective/gui/plot_signals_ui.py +1042 -0
  20. celldetective/gui/process_block.py +1055 -0
  21. celldetective/gui/retrain_segmentation_model_options.py +706 -0
  22. celldetective/gui/retrain_signal_model_options.py +643 -0
  23. celldetective/gui/seg_model_loader.py +460 -0
  24. celldetective/gui/signal_annotator.py +2388 -0
  25. celldetective/gui/signal_annotator_options.py +340 -0
  26. celldetective/gui/styles.py +217 -0
  27. celldetective/gui/survival_ui.py +903 -0
  28. celldetective/gui/tableUI.py +608 -0
  29. celldetective/gui/thresholds_gui.py +1300 -0
  30. celldetective/icons/logo-large.png +0 -0
  31. celldetective/icons/logo.png +0 -0
  32. celldetective/icons/signals_icon.png +0 -0
  33. celldetective/icons/splash-test.png +0 -0
  34. celldetective/icons/splash.png +0 -0
  35. celldetective/icons/splash0.png +0 -0
  36. celldetective/icons/survival2.png +0 -0
  37. celldetective/icons/vignette_signals2.png +0 -0
  38. celldetective/icons/vignette_signals2.svg +114 -0
  39. celldetective/io.py +2050 -0
  40. celldetective/links/zenodo.json +561 -0
  41. celldetective/measure.py +1258 -0
  42. celldetective/models/segmentation_effectors/blank +0 -0
  43. celldetective/models/segmentation_generic/blank +0 -0
  44. celldetective/models/segmentation_targets/blank +0 -0
  45. celldetective/models/signal_detection/blank +0 -0
  46. celldetective/models/tracking_configs/mcf7.json +68 -0
  47. celldetective/models/tracking_configs/ricm.json +203 -0
  48. celldetective/models/tracking_configs/ricm2.json +203 -0
  49. celldetective/neighborhood.py +717 -0
  50. celldetective/scripts/analyze_signals.py +51 -0
  51. celldetective/scripts/measure_cells.py +275 -0
  52. celldetective/scripts/segment_cells.py +212 -0
  53. celldetective/scripts/segment_cells_thresholds.py +140 -0
  54. celldetective/scripts/track_cells.py +206 -0
  55. celldetective/scripts/train_segmentation_model.py +246 -0
  56. celldetective/scripts/train_signal_model.py +49 -0
  57. celldetective/segmentation.py +712 -0
  58. celldetective/signals.py +2826 -0
  59. celldetective/tracking.py +974 -0
  60. celldetective/utils.py +1681 -0
  61. celldetective-1.0.2.dist-info/LICENSE +674 -0
  62. celldetective-1.0.2.dist-info/METADATA +192 -0
  63. celldetective-1.0.2.dist-info/RECORD +66 -0
  64. celldetective-1.0.2.dist-info/WHEEL +5 -0
  65. celldetective-1.0.2.dist-info/entry_points.txt +2 -0
  66. celldetective-1.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,717 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from tqdm import tqdm
4
+ from skimage.measure import regionprops_table
5
+ from functools import reduce
6
+ from mahotas.features import haralick
7
+ from scipy.ndimage import zoom
8
+ import os
9
+ import subprocess
10
+ from celldetective.utils import rename_intensity_column, create_patch_mask, remove_redundant_features
11
+ from celldetective.io import get_position_table
12
+ from scipy.spatial.distance import cdist
13
+ import re
14
+
15
+ abs_path = os.sep.join([os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], 'celldetective'])
16
+
17
+
18
+ def set_live_status(setA,setB,status, not_status_option):
19
+
20
+ """
21
+ Updates the live status for cells in two datasets based on specified status columns and options.
22
+
23
+ This function assigns a live status to cells in two datasets (setA and setB) based on the provided
24
+ status columns and options. If no status column is provided, all cells are marked as live. Otherwise,
25
+ the function updates the datasets based on the status criteria, potentially inverting the status
26
+ based on the `not_status_option`.
27
+
28
+ Parameters
29
+ ----------
30
+ setA : pandas.DataFrame
31
+ The first dataset containing trajectory or position information for cells.
32
+ setB : pandas.DataFrame
33
+ The second dataset containing trajectory or position information for cells.
34
+ status : list or None
35
+ A list containing the names of the columns in setA and setB that classify cells as alive (1) or dead (0).
36
+ If None, all cells are considered alive. The list should contain exactly two elements.
37
+ not_status_option : list
38
+ A list containing boolean values indicating whether to invert the status for setA and setB, respectively.
39
+ True means the status should be inverted; False means it should not.
40
+
41
+ Returns
42
+ -------
43
+ tuple
44
+ A tuple containing the updated setA and setB DataFrames, along with the final status column names
45
+ used to classify cells in each set.
46
+
47
+ """
48
+
49
+
50
+ if status is None:
51
+ setA.loc[:,'live_status'] = 1
52
+ setB.loc[:,'live_status'] = 1
53
+ status = ['live_status', 'live_status']
54
+ elif isinstance(status,list):
55
+ assert len(status)==2,'Please provide only two columns to classify cells as alive or dead.'
56
+ if status[0] is None:
57
+ setA.loc[:,'live_status'] = 1
58
+ status[0] = 'live_status'
59
+ elif status[0] is not None and isinstance(not_status_option,list):
60
+ setA.loc[setA[status[0]]==2,status[0]] = 1 #already happened events become event
61
+ if not_status_option[0]:
62
+ setA.loc[:,'not_'+status[0]] = [not a if a==0 or a==1 else np.nan for a in setA.loc[:,status[0]].values]
63
+ status[0] = 'not_'+status[0]
64
+ if status[1] is None:
65
+ setB.loc[:,'live_status'] = 1
66
+ status[1] = 'live_status'
67
+ elif status[1] is not None and isinstance(not_status_option,list):
68
+ setB.loc[setB[status[1]]==2,status[1]] = 1 #already happened events become event
69
+ if not_status_option[1]:
70
+ setB.loc[:,'not_'+status[1]] = [not a if a==0 or a==1 else np.nan for a in setB.loc[:,status[1]].values]
71
+ status[1] = 'not_'+status[1]
72
+
73
+ assert status[0] in list(setA.columns)
74
+ assert status[1] in list(setB.columns)
75
+
76
+ setA = setA.reset_index(drop=True)
77
+ setB = setB.reset_index(drop=True)
78
+
79
+ return setA, setB, status
80
+
81
+ def compute_attention_weight(dist_matrix, cut_distance, opposite_cell_status, opposite_cell_ids, axis=1, include_dead_weight=True):
82
+
83
+ """
84
+ Computes the attention weight for each cell based on its proximity to cells of an opposite type within a specified distance.
85
+
86
+ This function calculates the attention weight for cells by considering the distance to the cells of an opposite type
87
+ within a given cutoff distance. It optionally considers only the 'live' opposite cells based on their status. The function
88
+ returns two arrays: one containing the attention weights and another containing the IDs of the closest opposite cells.
89
+
90
+ Parameters
91
+ ----------
92
+ dist_matrix : ndarray
93
+ A 2D array representing the distance matrix between cells of two types.
94
+ cut_distance : float
95
+ The cutoff distance within which opposite cells will influence the attention weight.
96
+ opposite_cell_status : ndarray
97
+ An array indicating the status (e.g., live or dead) of each opposite cell. Only used when `include_dead_weight` is False.
98
+ opposite_cell_ids : ndarray
99
+ An array containing the IDs of the opposite cells.
100
+ axis : int, optional
101
+ The axis along which to compute the weights (default is 1). Axis 0 corresponds to rows, and axis 1 corresponds to columns.
102
+ include_dead_weight : bool, optional
103
+ If True, includes all opposite cells within the cutoff distance in the weight calculation, regardless of their status.
104
+ If False, only considers opposite cells that are 'live' (default is True).
105
+
106
+ Returns
107
+ -------
108
+ tuple of ndarrays
109
+ A tuple containing two arrays: `weights` and `closest_opposite`. `weights` is an array of attention weights for each cell,
110
+ and `closest_opposite` is an array of the IDs of the closest opposite cells within the cutoff distance.
111
+
112
+ """
113
+
114
+ weights = np.empty(dist_matrix.shape[axis])
115
+ closest_opposite = np.empty(dist_matrix.shape[axis])
116
+
117
+ for i in range(dist_matrix.shape[axis]):
118
+ if axis==1:
119
+ row = dist_matrix[:,i]
120
+ elif axis==0:
121
+ row = dist_matrix[i,:]
122
+ row[row==0.] = 1.0E06
123
+ nbr_opposite = len(row[row<=cut_distance])
124
+
125
+ if not include_dead_weight:
126
+ stat = opposite_cell_status[np.where(row<=cut_distance)[0]]
127
+ nbr_opposite = len(stat[stat==1])
128
+ index_subpop = np.argmin(row[opposite_cell_status==1])
129
+ closest_opposite[i] = opposite_cell_ids[opposite_cell_status==1][index_subpop]
130
+ else:
131
+ closest_opposite[i] = opposite_cell_ids[np.argmin(row)]
132
+
133
+ if nbr_opposite>0:
134
+ weight = 1./float(nbr_opposite)
135
+ weights[i] = weight
136
+
137
+ return weights, closest_opposite
138
+
139
+ def distance_cut_neighborhood(setA, setB, distance, mode='two-pop', status=None, not_status_option=None, compute_cum_sum=True,
140
+ attention_weight=True, symmetrize=True, include_dead_weight=True,
141
+ column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
142
+
143
+ """
144
+
145
+ Match neighbors in set A and B within a circle of radius d.
146
+
147
+ Parameters
148
+ ----------
149
+ setA,setB : pandas DataFrame
150
+ Trajectory or position sets A and B.
151
+ distance : float
152
+ Cut-distance in pixels to match neighboring pairs.
153
+ mode: str
154
+ neighboring mode, between 'two-pop' (e.g. target-effector) and 'self' (target-target or effector-effector).
155
+ status: None or status
156
+ name to look for cells to ignore (because they are dead). By default all cells are kept.
157
+ compute_cum_sum: bool,
158
+ compute cumulated time of presence of neighbours (only if trajectories available for both sets)
159
+ attention_weight: bool,
160
+ compute the attention weight (how much a cell of set B is shared across cells of set A)
161
+ symmetrize: bool,
162
+ write in set B the neighborhood of set A
163
+ include_dead_weight: bool
164
+ do not count dead cells when establishing attention weight
165
+ """
166
+
167
+ # Check live_status option
168
+ if setA is not None and setB is not None:
169
+ setA, setB, status = set_live_status(setA, setB, status, not_status_option)
170
+ else:
171
+ return None,None
172
+
173
+ # Check distance option
174
+ if not isinstance(distance, list):
175
+ distance = [distance]
176
+
177
+ for d in distance:
178
+ # loop over each provided distance
179
+
180
+ if mode=='two-pop':
181
+ neigh_col = f'neighborhood_2_circle_{d}_px'
182
+ elif mode=='self':
183
+ neigh_col = f'neighborhood_self_circle_{d}_px'
184
+
185
+ cl = []
186
+ for s in [setA,setB]:
187
+
188
+ # Check whether data can be tracked
189
+ temp_column_labels = column_labels.copy()
190
+
191
+ if not 'TRACK_ID' in list(s.columns):
192
+ temp_column_labels.update({'track': 'ID'})
193
+ compute_cum_sum = False # if no tracking data then cum_sum is not relevant
194
+ cl.append(temp_column_labels)
195
+
196
+ # Remove nan tracks (cells that do not belong to a track)
197
+ s[neigh_col] = np.nan
198
+ s[neigh_col] = s[neigh_col].astype(object)
199
+ s.dropna(subset=[cl[-1]['track']],inplace=True)
200
+
201
+ # Loop over each available timestep
202
+ timeline = np.unique(np.concatenate([setA[cl[0]['time']].to_numpy(), setB[cl[1]['time']].to_numpy()])).astype(int)
203
+ for t in tqdm(timeline):
204
+
205
+ index_A = list(setA.loc[setA[cl[0]['time']]==t].index)
206
+ coordinates_A = setA.loc[setA[cl[0]['time']]==t,[cl[0]['x'], cl[0]['y']]].to_numpy()
207
+ ids_A = setA.loc[setA[cl[0]['time']]==t,cl[0]['track']].to_numpy()
208
+ status_A = setA.loc[setA[cl[0]['time']]==t,status[0]].to_numpy()
209
+
210
+ index_B = list(setB.loc[setB[cl[1]['time']]==t].index)
211
+ coordinates_B = setB.loc[setB[cl[1]['time']]==t,[cl[1]['x'], cl[1]['y']]].to_numpy()
212
+ ids_B = setB.loc[setB[cl[1]['time']]==t,cl[1]['track']].to_numpy()
213
+ status_B = setB.loc[setB[cl[1]['time']]==t,status[1]].to_numpy()
214
+
215
+ if len(ids_A) > 0 and len(ids_B) > 0:
216
+
217
+ # compute distance matrix
218
+ dist_map = cdist(coordinates_A, coordinates_B, metric="euclidean")
219
+
220
+ if attention_weight:
221
+ weights, closest_A = compute_attention_weight(dist_map, d, status_A, ids_A, axis=1, include_dead_weight=include_dead_weight)
222
+
223
+ # Target centric
224
+ for k in range(dist_map.shape[0]):
225
+
226
+ col = dist_map[k,:]
227
+ col[col==0.] = 1.0E06
228
+
229
+ neighs_B = np.array([ids_B[i] for i in np.where((col<=d))[0]])
230
+ status_neigh_B = np.array([status_B[i] for i in np.where((col<=d))[0]])
231
+ dist_B = [round(col[i],2) for i in np.where((col<=d))[0]]
232
+ if len(dist_B)>0:
233
+ closest_B_cell = neighs_B[np.argmin(dist_B)]
234
+
235
+ if symmetrize and attention_weight:
236
+ n_neighs = float(len(neighs_B))
237
+ if not include_dead_weight:
238
+ n_neighs_alive = len(np.where(status_neigh_B==1)[0])
239
+ neigh_count = n_neighs_alive
240
+ else:
241
+ neigh_count = n_neighs
242
+ if neigh_count>0:
243
+ weight_A = 1./neigh_count
244
+ else:
245
+ weight_A = np.nan
246
+
247
+ if not include_dead_weight and status_A[k]==0:
248
+ weight_A = 0
249
+
250
+ neighs = []
251
+ setA.at[index_A[k], neigh_col] = []
252
+ for n in range(len(neighs_B)):
253
+
254
+ # index in setB
255
+ n_index = np.where(ids_B==neighs_B[n])[0][0]
256
+ # Assess if neigh B is closest to A
257
+ if attention_weight:
258
+ if closest_A[n_index]==ids_A[k]:
259
+ closest = True
260
+ else:
261
+ closest = False
262
+
263
+ if symmetrize:
264
+ # Load neighborhood previous data
265
+ sym_neigh = setB.loc[index_B[n_index], neigh_col]
266
+ if neighs_B[n]==closest_B_cell:
267
+ closest_b=True
268
+ else:
269
+ closest_b=False
270
+ if isinstance(sym_neigh, list):
271
+ sym_neigh.append({'id': ids_A[k], 'distance': dist_B[n], 'status': status_A[k]})
272
+ else:
273
+ sym_neigh = [{'id': ids_A[k], 'distance': dist_B[n],'status': status_A[k]}]
274
+ if attention_weight:
275
+ sym_neigh[-1].update({'weight': weight_A, 'closest': closest_b})
276
+
277
+ # Write the minimum info about neighborhing cell B
278
+ neigh_dico = {'id': neighs_B[n], 'distance': dist_B[n], 'status': status_neigh_B[n]}
279
+ if attention_weight:
280
+ neigh_dico.update({'weight': weights[n_index], 'closest': closest})
281
+
282
+ if compute_cum_sum:
283
+ # Compute the integrated presence of the neighboring cell B
284
+ assert cl[1]['track'] == 'TRACK_ID','The set B does not seem to contain tracked data. The cumulative time will be meaningless.'
285
+ past_neighs = [[ll['id'] for ll in l] if len(l)>0 else [None] for l in setA.loc[(setA[cl[0]['track']]==ids_A[k])&(setA[cl[0]['time']]<=t), neigh_col].to_numpy()]
286
+ past_neighs = [item for sublist in past_neighs for item in sublist]
287
+
288
+ if attention_weight:
289
+ past_weights = [[ll['weight'] for ll in l] if len(l)>0 else [None] for l in setA.loc[(setA[cl[0]['track']]==ids_A[k])&(setA[cl[0]['time']]<=t), neigh_col].to_numpy()]
290
+ past_weights = [item for sublist in past_weights for item in sublist]
291
+
292
+ cum_sum = len(np.where(past_neighs==neighs_B[n])[0])
293
+ neigh_dico.update({'cumulated_presence': cum_sum+1})
294
+
295
+ if attention_weight:
296
+ cum_sum_weighted = np.sum([w if l==neighs_B[n] else 0 for l,w in zip(past_neighs, past_weights)])
297
+ neigh_dico.update({'cumulated_presence_weighted': cum_sum_weighted + weights[n_index]})
298
+
299
+ if symmetrize:
300
+ setB.at[index_B[n_index], neigh_col] = sym_neigh
301
+
302
+ neighs.append(neigh_dico)
303
+
304
+ setA.at[index_A[k], neigh_col] = neighs
305
+
306
+ return setA, setB
307
+
308
+ def compute_neighborhood_at_position(pos, distance, population=['targets','effectors'], theta_dist=None, img_shape=(2048,2048), return_tables=False, clear_neigh=False, event_time_col=None,
309
+ neighborhood_kwargs={'mode': 'two-pop','status': None, 'not_status_option': None,'include_dead_weight': True,"compute_cum_sum": False,"attention_weight": True, 'symmetrize': True}):
310
+
311
+ """
312
+ Computes neighborhood metrics for specified cell populations within a given position, based on distance criteria and additional parameters.
313
+
314
+ This function assesses the neighborhood interactions between two specified cell populations (or within a single population) at a given position.
315
+ It computes various neighborhood metrics based on specified distances, considering the entire image or excluding edge regions.
316
+ The results are optionally cleared of previous neighborhood calculations and can be returned as updated tables.
317
+
318
+ Parameters
319
+ ----------
320
+ pos : str
321
+ The path to the position directory where the analysis is to be performed.
322
+ distance : float or list of float
323
+ The distance(s) in pixels to define neighborhoods.
324
+ population : list of str, optional
325
+ Names of the cell populations to analyze. If a single population is provided, it is used for both populations in the analysis (default is ['targets', 'effectors']).
326
+ theta_dist : float or list of float, optional
327
+ Edge threshold(s) in pixels to exclude cells close to the image boundaries from the analysis. If not provided, defaults to 90% of each specified distance.
328
+ img_shape : tuple of int, optional
329
+ The dimensions (height, width) of the images in pixels (default is (2048, 2048)).
330
+ return_tables : bool, optional
331
+ If True, returns the updated data tables for both populations (default is False).
332
+ clear_neigh : bool, optional
333
+ If True, clears existing neighborhood columns from the data tables before computing new metrics (default is False).
334
+ event_time_col : str, optional
335
+ The column name indicating the event time for each cell, required if mean neighborhood metrics are to be computed before events.
336
+ neighborhood_kwargs : dict, optional
337
+ Additional keyword arguments for neighborhood computation, including mode, status options, and metrics (default includes mode 'two-pop', and symmetrization).
338
+
339
+ Returns
340
+ -------
341
+ pandas.DataFrame or (pandas.DataFrame, pandas.DataFrame)
342
+ If `return_tables` is True, returns the updated data tables for the specified populations. If only one population is analyzed, both returned data frames will be identical.
343
+
344
+ Raises
345
+ ------
346
+ AssertionError
347
+ If the specified position path does not exist or if the number of distances and edge thresholds do not match.
348
+
349
+ """
350
+
351
+ pos = pos.replace('\\','/')
352
+ pos = rf"{pos}"
353
+ assert os.path.exists(pos),f'Position {pos} is not a valid path.'
354
+
355
+ if isinstance(population, str):
356
+ population = [population, population]
357
+
358
+ if not isinstance(distance, list):
359
+ distance = [distance]
360
+ if not theta_dist is None and not isinstance(theta_dist, list):
361
+ theta_dist = [theta_dist]
362
+
363
+ if theta_dist is None:
364
+ theta_dist = [0.9*d for d in distance]
365
+ assert len(theta_dist)==len(distance),'Incompatible number of distances and number of edge thresholds.'
366
+
367
+ if population[0]==population[1]:
368
+ neighborhood_kwargs.update({'mode': 'self'})
369
+ if population[1]!=population[0]:
370
+ neighborhood_kwargs.update({'mode': 'two-pop'})
371
+
372
+ df_A, path_A = get_position_table(pos, population=population[0], return_path=True)
373
+ df_B, path_B = get_position_table(pos, population=population[1], return_path=True)
374
+
375
+ if clear_neigh:
376
+ unwanted = df_A.columns[df_A.columns.str.contains('neighborhood')]
377
+ df_A = df_A.drop(columns=unwanted)
378
+ unwanted = df_B.columns[df_B.columns.str.contains('neighborhood')]
379
+ df_B = df_B.drop(columns=unwanted)
380
+
381
+ df_A, df_B = distance_cut_neighborhood(df_A,df_B, distance,**neighborhood_kwargs)
382
+ if df_A is None or df_B is None:
383
+ return None
384
+
385
+ for td,d in zip(theta_dist, distance):
386
+
387
+ if neighborhood_kwargs['mode']=='two-pop':
388
+ neigh_col = f'neighborhood_2_circle_{d}_px'
389
+ elif neighborhood_kwargs['mode']=='self':
390
+ neigh_col = f'neighborhood_self_circle_{d}_px'
391
+
392
+ edge_filter_A = (df_A['POSITION_X'] > td)&(df_A['POSITION_Y'] > td)&(df_A['POSITION_Y'] < (img_shape[0] - td))&(df_A['POSITION_X'] < (img_shape[1] - td))
393
+ edge_filter_B = (df_B['POSITION_X'] > td)&(df_B['POSITION_Y'] > td)&(df_B['POSITION_Y'] < (img_shape[0] - td))&(df_B['POSITION_X'] < (img_shape[1] - td))
394
+ df_A.loc[~edge_filter_A, neigh_col] = np.nan
395
+ df_B.loc[~edge_filter_B, neigh_col] = np.nan
396
+
397
+ df_A = compute_neighborhood_metrics(df_A, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
398
+ if neighborhood_kwargs['symmetrize']:
399
+ df_B = compute_neighborhood_metrics(df_B, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
400
+
401
+ df_A = mean_neighborhood_before_event(df_A, neigh_col, event_time_col)
402
+ df_A = mean_neighborhood_after_event(df_A, neigh_col, event_time_col)
403
+
404
+ df_A.to_pickle(path_A.replace('.csv','.pkl'))
405
+ if not population[0]==population[1]:
406
+ df_B.to_pickle(path_B.replace('.csv','.pkl'))
407
+
408
+ unwanted = df_A.columns[df_A.columns.str.startswith('neighborhood_')]
409
+ df_A2 = df_A.drop(columns=unwanted)
410
+ df_A2.to_csv(path_A, index=False)
411
+
412
+ if not population[0]==population[1]:
413
+ unwanted = df_B.columns[df_B.columns.str.startswith('neighborhood_')]
414
+ df_B_csv = df_B.drop(unwanted, axis=1, inplace=False)
415
+ df_B_csv.to_csv(path_B,index=False)
416
+
417
+ if return_tables:
418
+ return df_A, df_B
419
+
420
+ def compute_neighborhood_metrics(neigh_table, neigh_col, metrics=['inclusive','exclusive','intermediate'], decompose_by_status=False):
421
+
422
+ """
423
+ Computes and appends neighborhood metrics to a dataframe based on specified neighborhood characteristics.
424
+
425
+ This function iterates through a dataframe grouped by either 'TRACK_ID' or ['position', 'TRACK_ID'] (if 'position' column exists)
426
+ and computes various neighborhood metrics (inclusive, exclusive, intermediate counts) for each cell. It can also decompose these
427
+ metrics by cell status (e.g., live or dead) if specified.
428
+
429
+ Parameters
430
+ ----------
431
+ neigh_table : pandas.DataFrame
432
+ A dataframe containing neighborhood information for each cell, including position, track ID, frame, and a specified neighborhood column.
433
+ neigh_col : str
434
+ The column name in `neigh_table` that contains neighborhood information (e.g., a list of neighbors with their attributes).
435
+ metrics : list of str, optional
436
+ The metrics to be computed from the neighborhood information. Possible values include 'inclusive', 'exclusive', and 'intermediate'.
437
+ Default is ['inclusive', 'exclusive', 'intermediate'].
438
+ decompose_by_status : bool, optional
439
+ If True, the metrics are computed separately for different statuses (e.g., live or dead) of the neighboring cells. Default is False.
440
+
441
+ Returns
442
+ -------
443
+ pandas.DataFrame
444
+ The input dataframe with additional columns for each of the specified metrics, and, if `decompose_by_status` is True, separate
445
+ metrics for each status.
446
+
447
+ Notes
448
+ -----
449
+ - 'inclusive' count refers to the total number of neighbors.
450
+ - 'exclusive' count refers to the number of neighbors that are closest.
451
+ - 'intermediate' count refers to the sum of weights attributed to neighbors, representing a weighted count.
452
+ - If `decompose_by_status` is True, metrics are appended with '_s0' or '_s1' to indicate the status they correspond to.
453
+
454
+ Examples
455
+ --------
456
+ >>> neigh_table = pd.DataFrame({
457
+ ... 'TRACK_ID': [1, 1, 2, 2],
458
+ ... 'FRAME': [1, 2, 1, 2],
459
+ ... 'neighborhood_info': [{'weight': 1, 'status': 1, 'closest': 1}, ...] # example neighborhood info
460
+ ... })
461
+ >>> neigh_col = 'neighborhood_info'
462
+ >>> updated_neigh_table = compute_neighborhood_metrics(neigh_table, neigh_col, metrics=['inclusive'], decompose_by_status=True)
463
+ # Computes the inclusive count of neighbors for each cell, decomposed by cell status.
464
+
465
+ """
466
+
467
+ neigh_table = neigh_table.reset_index(drop=True)
468
+ if 'position' in list(neigh_table.columns):
469
+ groupbycols = ['position','TRACK_ID']
470
+ else:
471
+ groupbycols = ['TRACK_ID']
472
+ neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
473
+
474
+ for tid,group in neigh_table.groupby(groupbycols):
475
+ group = group.dropna(subset=neigh_col)
476
+ indices = list(group.index)
477
+ neighbors = group[neigh_col].to_numpy()
478
+
479
+ if 'inclusive' in metrics:
480
+ n_inclusive = [len(n) for n in neighbors]
481
+
482
+ if 'intermediate' in metrics:
483
+ n_intermediate = np.zeros(len(neighbors))
484
+ n_intermediate[:] = np.nan
485
+
486
+ if 'exclusive' in metrics:
487
+ n_exclusive = np.zeros(len(neighbors))
488
+ n_exclusive[:] = np.nan
489
+
490
+ if decompose_by_status:
491
+
492
+ if 'inclusive' in metrics:
493
+ n_inclusive_status_0 = np.zeros(len(neighbors))
494
+ n_inclusive_status_0[:] = np.nan
495
+ n_inclusive_status_1 = np.zeros(len(neighbors))
496
+ n_inclusive_status_1[:] = np.nan
497
+
498
+ if 'intermediate' in metrics:
499
+ n_intermediate_status_0 = np.zeros(len(neighbors))
500
+ n_intermediate_status_0[:] = np.nan
501
+ n_intermediate_status_1 = np.zeros(len(neighbors))
502
+ n_intermediate_status_1[:] = np.nan
503
+
504
+ if 'exclusive' in metrics:
505
+ n_exclusive_status_0 = np.zeros(len(neighbors))
506
+ n_exclusive_status_0[:] = np.nan
507
+ n_exclusive_status_1 = np.zeros(len(neighbors))
508
+ n_exclusive_status_1[:] = np.nan
509
+
510
+ for t in range(len(neighbors)):
511
+
512
+ neighs_at_t = neighbors[t]
513
+ weights_at_t = [n['weight'] for n in neighs_at_t]
514
+ status_at_t = [n['status'] for n in neighs_at_t]
515
+ closest_at_t = [n['closest'] for n in neighs_at_t]
516
+
517
+ if 'intermediate' in metrics:
518
+ n_intermediate[t] = np.sum(weights_at_t)
519
+ if 'exclusive' in metrics:
520
+ n_exclusive[t] = sum([c==1.0 for c in closest_at_t])
521
+
522
+ if decompose_by_status:
523
+
524
+ if 'inclusive' in metrics:
525
+ n_inclusive_status_0[t] = sum([s==0.0 for s in status_at_t])
526
+ n_inclusive_status_1[t] = sum([s==1.0 for s in status_at_t])
527
+
528
+ if 'intermediate' in metrics:
529
+ weights_at_t = np.array(weights_at_t)
530
+
531
+ # intermediate
532
+ weights_status_1 = weights_at_t[np.array([s==1.0 for s in status_at_t],dtype=bool)]
533
+ weights_status_0 = weights_at_t[np.array([s==0.0 for s in status_at_t],dtype=bool)]
534
+ n_intermediate_status_1[t] = np.sum(weights_status_1)
535
+ n_intermediate_status_0[t] = np.sum(weights_status_0)
536
+
537
+ if 'exclusive' in metrics:
538
+ n_exclusive_status_0[t] = sum([c==1.0 if s==0.0 else False for c,s in zip(closest_at_t,status_at_t)])
539
+ n_exclusive_status_1[t] = sum([c==1.0 if s==1.0 else False for c,s in zip(closest_at_t,status_at_t)])
540
+
541
+ if 'inclusive' in metrics:
542
+ neigh_table.loc[indices, 'inclusive_count_'+neigh_col] = n_inclusive
543
+ if 'intermediate' in metrics:
544
+ neigh_table.loc[indices, 'intermediate_count_'+neigh_col] = n_intermediate
545
+ if 'exclusive' in metrics:
546
+ neigh_table.loc[indices, 'exclusive_count_'+neigh_col] = n_exclusive
547
+
548
+ if decompose_by_status:
549
+ if 'inclusive' in metrics:
550
+ neigh_table.loc[indices, 'inclusive_count_s0_'+neigh_col] = n_inclusive_status_0
551
+ neigh_table.loc[indices, 'inclusive_count_s1_'+neigh_col] = n_inclusive_status_1
552
+ if 'intermediate' in metrics:
553
+ neigh_table.loc[indices, 'intermediate_count_s0_'+neigh_col] = n_intermediate_status_0
554
+ neigh_table.loc[indices, 'intermediate_count_s1_'+neigh_col] = n_intermediate_status_1
555
+ if 'exclusive' in metrics:
556
+ neigh_table.loc[indices, 'exclusive_count_s0_'+neigh_col] = n_exclusive_status_0
557
+ neigh_table.loc[indices, 'exclusive_count_s1_'+neigh_col] = n_exclusive_status_1
558
+
559
+ return neigh_table
560
+
561
+ def mean_neighborhood_before_event(neigh_table, neigh_col, event_time_col):
562
+
563
+ """
564
+ Computes the mean neighborhood metrics for each cell track before a specified event time.
565
+
566
+ This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
567
+ for each cell track up to and including the frame of an event. The function requires the neighborhood metrics to
568
+ have been previously computed and appended to the input dataframe. It operates on grouped data based on position
569
+ and track ID, handling cases with or without position information.
570
+
571
+ Parameters
572
+ ----------
573
+ neigh_table : pandas.DataFrame
574
+ A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
575
+ neigh_col : str
576
+ The base name of the neighborhood metric columns in `neigh_table`.
577
+ event_time_col : str or None
578
+ The column name indicating the event time for each cell track. If None, the maximum frame number in the
579
+ dataframe is used as the event time for all tracks.
580
+
581
+ Returns
582
+ -------
583
+ pandas.DataFrame
584
+ The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
585
+ The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
586
+ 'inclusive', 'exclusive', 'intermediate'.
587
+
588
+ """
589
+
590
+
591
+ if 'position' in list(neigh_table.columns):
592
+ groupbycols = ['position','TRACK_ID']
593
+ else:
594
+ groupbycols = ['TRACK_ID']
595
+ neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
596
+
597
+ if event_time_col is None:
598
+ print('No event time was provided... Estimating the mean neighborhood over the whole observation time...')
599
+ neigh_table.loc[:,'event_time_temp'] = neigh_table['FRAME'].max()
600
+ event_time_col = 'event_time_temp'
601
+
602
+ for tid,group in neigh_table.groupby(groupbycols):
603
+
604
+ group = group.dropna(subset=neigh_col)
605
+ indices = list(group.index)
606
+
607
+ event_time_values = group[event_time_col].to_numpy()
608
+ if len(event_time_values)>0:
609
+ event_time = event_time_values[0]
610
+ else:
611
+ continue
612
+
613
+ if event_time<0.:
614
+ event_time = group['FRAME'].max()
615
+
616
+ valid_counts_intermediate = group.loc[group['FRAME']<=event_time,'intermediate_count_s1_'+neigh_col].to_numpy()
617
+ valid_counts_inclusive = group.loc[group['FRAME']<=event_time,'inclusive_count_s1_'+neigh_col].to_numpy()
618
+ valid_counts_exclusive = group.loc[group['FRAME']<=event_time,'exclusive_count_s1_'+neigh_col].to_numpy()
619
+
620
+ if len(valid_counts_intermediate[valid_counts_intermediate==valid_counts_intermediate])>0:
621
+ neigh_table.loc[indices, f'mean_count_intermediate_{neigh_col}_before_event'] = np.nanmean(valid_counts_intermediate)
622
+ if len(valid_counts_inclusive[valid_counts_inclusive==valid_counts_inclusive])>0:
623
+ neigh_table.loc[indices, f'mean_count_inclusive_{neigh_col}_before_event'] = np.nanmean(valid_counts_inclusive)
624
+ if len(valid_counts_exclusive[valid_counts_exclusive==valid_counts_exclusive])>0:
625
+ neigh_table.loc[indices, f'mean_count_exclusive_{neigh_col}_before_event'] = np.nanmean(valid_counts_exclusive)
626
+
627
+ if event_time_col=='event_time_temp':
628
+ neigh_table = neigh_table.drop(columns='event_time_temp')
629
+ return neigh_table
630
+
631
+ def mean_neighborhood_after_event(neigh_table, neigh_col, event_time_col):
632
+
633
+ """
634
+ Computes the mean neighborhood metrics for each cell track after a specified event time.
635
+
636
+ This function calculates the mean values of specified neighborhood metrics (inclusive, exclusive, intermediate)
637
+ for each cell track after the event time. The function requires the neighborhood metrics to
638
+ have been previously computed and appended to the input dataframe. It operates on grouped data based on position
639
+ and track ID, handling cases with or without position information.
640
+
641
+ Parameters
642
+ ----------
643
+ neigh_table : pandas.DataFrame
644
+ A dataframe containing cell track data with precomputed neighborhood metrics and event time information.
645
+ neigh_col : str
646
+ The base name of the neighborhood metric columns in `neigh_table`.
647
+ event_time_col : str or None
648
+ The column name indicating the event time for each cell track. If None, the maximum frame number in the
649
+ dataframe is used as the event time for all tracks.
650
+
651
+ Returns
652
+ -------
653
+ pandas.DataFrame
654
+ The input dataframe with added columns for the mean neighborhood metrics before the event for each cell track.
655
+ The new columns are named as 'mean_count_{metric}_{neigh_col}_before_event', where {metric} is one of
656
+ 'inclusive', 'exclusive', 'intermediate'.
657
+
658
+ """
659
+
660
+
661
+ if 'position' in list(neigh_table.columns):
662
+ groupbycols = ['position','TRACK_ID']
663
+ else:
664
+ groupbycols = ['TRACK_ID']
665
+ neigh_table.sort_values(by=groupbycols+['FRAME'],inplace=True)
666
+
667
+ if event_time_col is None:
668
+ neigh_table.loc[:,'event_time_temp'] = None #neigh_table['FRAME'].max()
669
+ event_time_col = 'event_time_temp'
670
+
671
+ for tid,group in neigh_table.groupby(groupbycols):
672
+
673
+ group = group.dropna(subset=neigh_col)
674
+ indices = list(group.index)
675
+
676
+ event_time_values = group[event_time_col].to_numpy()
677
+ if len(event_time_values)>0:
678
+ event_time = event_time_values[0]
679
+ else:
680
+ continue
681
+
682
+ if event_time is None or (event_time<0.):
683
+ neigh_table.loc[indices, f'mean_count_intermediate_{neigh_col}_after_event'] = np.nan
684
+ neigh_table.loc[indices, f'mean_count_inclusive_{neigh_col}_after_event'] = np.nan
685
+ neigh_table.loc[indices, f'mean_count_exclusive_{neigh_col}_after_event'] = np.nan
686
+ else:
687
+ valid_counts_intermediate = group.loc[group['FRAME']>event_time,'intermediate_count_s1_'+neigh_col].to_numpy()
688
+ valid_counts_inclusive = group.loc[group['FRAME']>event_time,'inclusive_count_s1_'+neigh_col].to_numpy()
689
+ valid_counts_exclusive = group.loc[group['FRAME']>event_time,'exclusive_count_s1_'+neigh_col].to_numpy()
690
+
691
+ if len(valid_counts_intermediate[valid_counts_intermediate==valid_counts_intermediate])>0:
692
+ neigh_table.loc[indices, f'mean_count_intermediate_{neigh_col}_after_event'] = np.nanmean(valid_counts_intermediate)
693
+ if len(valid_counts_inclusive[valid_counts_inclusive==valid_counts_inclusive])>0:
694
+ neigh_table.loc[indices, f'mean_count_inclusive_{neigh_col}_after_event'] = np.nanmean(valid_counts_inclusive)
695
+ if len(valid_counts_exclusive[valid_counts_exclusive==valid_counts_exclusive])>0:
696
+ neigh_table.loc[indices, f'mean_count_exclusive_{neigh_col}_after_event'] = np.nanmean(valid_counts_exclusive)
697
+
698
+ if event_time_col=='event_time_temp':
699
+ neigh_table = neigh_table.drop(columns='event_time_temp')
700
+ return neigh_table
701
+
702
+
703
+ # def mask_intersection_neighborhood(setA, labelsA, setB, labelsB, threshold_iou=0.5, viewpoint='B'):
704
+ # # do whatever to match objects in A and B
705
+ # return setA, setB
706
+
707
+ if __name__ == "__main__":
708
+
709
+ print('None')
710
+ pos = "/home/torro/Documents/Experiments/NKratio_Exp/W5/500"
711
+
712
+ test,_ = compute_neighborhood_at_position(pos, [62], population=['targets','effectors'], theta_dist=None, img_shape=(2048,2048), return_tables=True, clear_neigh=True,
713
+ neighborhood_kwargs={'mode': 'two-pop','status': ['class', None],'not_status_option': [True, False],'include_dead_weight': True,"compute_cum_sum": False,"attention_weight": True, 'symmetrize': False})
714
+
715
+ #test = compute_neighborhood_metrics(test, 'neighborhood_self_circle_150_px', metrics=['inclusive','exclusive','intermediate'], decompose_by_status=True)
716
+ print(test.columns)
717
+ #print(segment(None,'test'))