celldetective 1.3.4.post1__py3-none-any.whl → 1.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
celldetective/measure.py CHANGED
@@ -193,6 +193,12 @@ def measure(stack=None, labels=None, trajectories=None, channel_names=None,
193
193
  elif do_features*(trajectories is None):
194
194
  measurements_at_t = positions_at_t
195
195
 
196
+ try:
197
+ measurements_at_t['radial_distance'] = np.sqrt((measurements_at_t[column_labels['x']] - img.shape[0] / 2) ** 2 + (
198
+ measurements_at_t[column_labels['y']] - img.shape[1] / 2) ** 2)
199
+ except Exception as e:
200
+ print(f"{e=}")
201
+
196
202
  timestep_dataframes.append(measurements_at_t)
197
203
 
198
204
  measurements = pd.concat(timestep_dataframes)
@@ -300,19 +306,28 @@ def measure_features(img, label, features=['area', 'intensity_mean'], channels=N
300
306
 
301
307
  """
302
308
 
309
+ if isinstance(features, list):
310
+ features = features.copy()
311
+
303
312
  if features is None:
304
313
  features = []
305
314
 
306
- # Add label to have identity of mask
307
- if 'label' not in features:
308
- features.append('label')
309
-
315
+ measure_mean_intensities = False
310
316
  if img is None:
311
317
  if verbose:
312
318
  print('No image was provided... Skip intensity measurements.')
313
319
  border_dist = None;
314
320
  haralick_options = None;
315
321
  features = drop_tonal_features(features)
322
+
323
+ if 'intensity_mean' in features:
324
+ measure_mean_intensities = True
325
+ features.remove('intensity_mean')
326
+
327
+ # Add label to have identity of mask
328
+ if 'label' not in features:
329
+ features.append('label')
330
+
316
331
  if img is not None:
317
332
  if img.ndim == 2:
318
333
  img = img[:, :, np.newaxis]
@@ -351,10 +366,16 @@ def measure_features(img, label, features=['area', 'intensity_mean'], channels=N
351
366
  if f in extra_props:
352
367
  feats.remove(f)
353
368
  extra_props_list.append(getattr(extra_properties, f))
369
+
370
+ # Add intensity nan mean if need to measure mean intensities
371
+ if measure_mean_intensities:
372
+ extra_props_list.append(getattr(extra_properties, 'intensity_nanmean'))
373
+
354
374
  if len(extra_props_list) == 0:
355
375
  extra_props_list = None
356
376
  else:
357
377
  extra_props_list = tuple(extra_props_list)
378
+
358
379
  props = regionprops_table(label, intensity_image=img, properties=feats, extra_properties=extra_props_list)
359
380
  df_props = pd.DataFrame(props)
360
381
  if spot_detection is not None:
@@ -1105,7 +1126,7 @@ def estimate_time(df, class_attr, model='step_function', class_of_interest=[2],
1105
1126
  return df
1106
1127
 
1107
1128
 
1108
- def interpret_track_classification(df, class_attr, irreversible_event=False, unique_state=False,r2_threshold=0.5, percentile_recovery=50):
1129
+ def interpret_track_classification(df, class_attr, irreversible_event=False, unique_state=False,r2_threshold=0.5, percentile_recovery=50, pre_event=None):
1109
1130
 
1110
1131
  """
1111
1132
  Interpret and classify tracked cells based on their status signals.
@@ -1164,15 +1185,15 @@ def interpret_track_classification(df, class_attr, irreversible_event=False, uni
1164
1185
 
1165
1186
  if irreversible_event:
1166
1187
 
1167
- df = classify_irreversible_events(df, class_attr, r2_threshold=r2_threshold, percentile_recovery=percentile_recovery)
1188
+ df = classify_irreversible_events(df, class_attr, r2_threshold=r2_threshold, percentile_recovery=percentile_recovery, pre_event=pre_event)
1168
1189
 
1169
1190
  elif unique_state:
1170
1191
 
1171
- df = classify_unique_states(df, class_attr, percentile=50)
1192
+ df = classify_unique_states(df, class_attr, percentile=50, pre_event=pre_event)
1172
1193
 
1173
1194
  return df
1174
1195
 
1175
- def classify_irreversible_events(df, class_attr, r2_threshold=0.5, percentile_recovery=50):
1196
+ def classify_irreversible_events(data, class_attr, r2_threshold=0.5, percentile_recovery=50, pre_event=None):
1176
1197
 
1177
1198
  """
1178
1199
  Classify irreversible events in a tracked dataset based on the status of cells and transitions.
@@ -1210,45 +1231,83 @@ def classify_irreversible_events(df, class_attr, r2_threshold=0.5, percentile_re
1210
1231
  >>> df = classify_irreversible_events(df, 'class', r2_threshold=0.7)
1211
1232
  """
1212
1233
 
1234
+ df = data.copy()
1213
1235
  cols = list(df.columns)
1236
+
1237
+ # Control input
1214
1238
  assert 'TRACK_ID' in cols,'Please provide tracked data...'
1215
1239
  if 'position' in cols:
1216
1240
  sort_cols = ['position', 'TRACK_ID']
1217
1241
  else:
1218
1242
  sort_cols = ['TRACK_ID']
1243
+ if pre_event is not None:
1244
+ assert 't_'+pre_event in cols,"Pre-event time does not seem to be a valid column in the DataFrame..."
1245
+ assert 'class_'+pre_event in cols,"Pre-event class does not seem to be a valid column in the DataFrame..."
1219
1246
 
1220
1247
  stat_col = class_attr.replace('class','status')
1221
1248
 
1222
- for tid,track in df.groupby(sort_cols):
1249
+ if pre_event is not None:
1250
+
1251
+ # Version with pre event; intuition: mask status value before pre-event takes place with NaN
1252
+ for tid, track in df.groupby(sort_cols):
1253
+
1254
+ indices = track[class_attr].index
1255
+
1256
+ if track['class_'+pre_event].values[0]==1:
1257
+ # Pre-event never took place, all NaN
1258
+ df.loc[indices, class_attr] = np.nan
1259
+ df.loc[indices, stat_col] = np.nan
1260
+ else:
1261
+ # pre-event took place (if left-censored took place at time -1)
1262
+ t_pre_event = track['t_'+pre_event].values[0]
1263
+ indices_pre = track.loc[track['FRAME']<=t_pre_event,class_attr].index
1264
+ df.loc[indices_pre, stat_col] = np.nan # set to NaN all statuses before pre-event
1265
+ track.loc[track['FRAME']<=t_pre_event, stat_col] = np.nan
1266
+
1267
+ # The non-NaN part of track (post pre-event)
1268
+ track_valid = track.dropna(subset=stat_col, inplace=False)
1269
+ status_values = track_valid[stat_col].to_numpy()
1270
+
1271
+ if np.all([s==0 for s in status_values]):
1272
+ # all negative to condition, event not observed
1273
+ df.loc[indices, class_attr] = 1
1274
+ elif np.all([s==1 for s in status_values]):
1275
+ # all positive, event already observed (left-censored)
1276
+ df.loc[indices, class_attr] = 2
1277
+ else:
1278
+ # ambiguity, possible transition, use `unique_state` technique after
1279
+ df.loc[indices, class_attr] = 2
1280
+ else:
1281
+ for tid,track in df.groupby(sort_cols):
1282
+
1283
+ # Set status to 0.0 before first detection
1284
+ t_firstdetection = track['t_firstdetection'].values[0]
1285
+ indices_pre_detection = track.loc[track['FRAME']<=t_firstdetection,class_attr].index
1286
+ track.loc[indices_pre_detection,stat_col] = 0.0
1287
+ df.loc[indices_pre_detection,stat_col] = 0.0
1288
+
1289
+ track_valid = track.dropna(subset=stat_col)
1290
+
1291
+ indices = track[class_attr].index
1292
+ status_values = track_valid[stat_col].to_numpy()
1293
+
1294
+ if np.all([s==0 for s in status_values]):
1295
+ # all negative, no event
1296
+ df.loc[indices, class_attr] = 1
1297
+
1298
+ elif np.all([s==1 for s in status_values]):
1299
+ # all positive, event already observed
1300
+ df.loc[indices, class_attr] = 2
1301
+ #df.loc[indices, class_attr.replace('class','status')] = 2
1302
+ else:
1303
+ # ambiguity, possible transition
1304
+ df.loc[indices, class_attr] = 2
1223
1305
 
1224
- # Set status to 0.0 before first detection
1225
- t_firstdetection = track['t_firstdetection'].values[0]
1226
- indices_pre_detection = track.loc[track['FRAME']<=t_firstdetection,class_attr].index
1227
- track.loc[indices_pre_detection,stat_col] = 0.0
1228
- df.loc[indices_pre_detection,stat_col] = 0.0
1229
-
1230
- track_valid = track.dropna(subset=stat_col)
1231
- indices_valid = track_valid[class_attr].index
1232
-
1233
- indices = track[class_attr].index
1234
- status_values = track_valid[stat_col].to_numpy()
1235
-
1236
- if np.all([s==0 for s in status_values]):
1237
- # all negative, no event
1238
- df.loc[indices, class_attr] = 1
1239
-
1240
- elif np.all([s==1 for s in status_values]):
1241
- # all positive, event already observed
1242
- df.loc[indices, class_attr] = 2
1243
- #df.loc[indices, class_attr.replace('class','status')] = 2
1244
- else:
1245
- # ambiguity, possible transition
1246
- df.loc[indices, class_attr] = 2
1247
-
1248
1306
  print("Classes after initial pass: ",df.loc[df['FRAME']==0,class_attr].value_counts())
1249
1307
 
1250
1308
  df.loc[df[class_attr]!=2, class_attr.replace('class', 't')] = -1
1251
- df = estimate_time(df, class_attr, model='step_function', class_of_interest=[2],r2_threshold=r2_threshold)
1309
+ # Try to fit time on class 2 cells (ambiguous)
1310
+ df = estimate_time(df, class_attr, model='step_function', class_of_interest=[2], r2_threshold=r2_threshold)
1252
1311
  print("Classes after fit: ", df.loc[df['FRAME']==0,class_attr].value_counts())
1253
1312
 
1254
1313
  # Revisit class 2 cells to classify as neg/pos with percentile tolerance
@@ -1257,7 +1316,7 @@ def classify_irreversible_events(df, class_attr, r2_threshold=0.5, percentile_re
1257
1316
 
1258
1317
  return df
1259
1318
 
1260
- def classify_unique_states(df, class_attr, percentile=50):
1319
+ def classify_unique_states(df, class_attr, percentile=50, pre_event=None):
1261
1320
 
1262
1321
  """
1263
1322
  Classify unique cell states based on percentile values of a status attribute in a tracked dataset.
@@ -1300,31 +1359,67 @@ def classify_unique_states(df, class_attr, percentile=50):
1300
1359
  else:
1301
1360
  sort_cols = ['TRACK_ID']
1302
1361
 
1303
- stat_col = class_attr.replace('class','status')
1362
+ if pre_event is not None:
1363
+ assert 't_'+pre_event in cols,"Pre-event time does not seem to be a valid column in the DataFrame..."
1364
+ assert 'class_'+pre_event in cols,"Pre-event class does not seem to be a valid column in the DataFrame..."
1304
1365
 
1366
+ stat_col = class_attr.replace('class','status')
1305
1367
 
1306
- for tid,track in df.groupby(sort_cols):
1368
+ if pre_event is not None:
1307
1369
 
1370
+ for tid, track in df.groupby(sort_cols):
1371
+
1372
+ indices = track[class_attr].index
1308
1373
 
1309
- track_valid = track.dropna(subset=stat_col)
1310
- indices_valid = track_valid[class_attr].index
1374
+ if track['class_'+pre_event].values[0]==1:
1375
+ # then pre event not satisfied, class/status is NaN
1376
+ df.loc[indices, class_attr] = np.nan
1377
+ df.loc[indices, stat_col] = np.nan
1378
+ df.loc[indices, stat_col.replace('status_','t_')] = -1
1379
+ else:
1380
+ # Pre event might happen, set to NaN observations before pre event
1381
+ t_pre_event = track['t_'+pre_event].values[0]
1382
+ indices_pre = track.loc[track['FRAME']<=t_pre_event,class_attr].index
1383
+ df.loc[indices_pre, stat_col] = np.nan
1384
+ track.loc[track['FRAME']<=t_pre_event, stat_col] = np.nan
1385
+
1386
+ # Post pre-event track
1387
+ track_valid = track.dropna(subset=stat_col, inplace=False)
1388
+ status_values = track_valid[stat_col].to_numpy()
1389
+
1390
+ frames = track_valid['FRAME'].to_numpy() # from t_pre-event to T
1391
+ t_first = track['t_firstdetection'].to_numpy()[0]
1392
+ perc_status = np.nanpercentile(status_values[frames>=t_first], percentile)
1393
+
1394
+ if perc_status==perc_status:
1395
+ c = ceil(perc_status)
1396
+ if c==0:
1397
+ df.loc[indices, class_attr] = 1
1398
+ df.loc[indices, class_attr.replace('class','t')] = -1
1399
+ elif c==1:
1400
+ df.loc[indices, class_attr] = 2
1401
+ df.loc[indices, class_attr.replace('class','t')] = -1
1402
+ else:
1403
+ for tid,track in df.groupby(sort_cols):
1311
1404
 
1312
- indices = track[class_attr].index
1313
- status_values = track_valid[stat_col].to_numpy()
1405
+ track_valid = track.dropna(subset=stat_col)
1406
+ indices_valid = track_valid[class_attr].index
1314
1407
 
1408
+ indices = track[class_attr].index
1409
+ status_values = track_valid[stat_col].to_numpy()
1315
1410
 
1316
- frames = track_valid['FRAME'].to_numpy()
1317
- t_first = track['t_firstdetection'].to_numpy()[0]
1318
- perc_status = np.nanpercentile(status_values[frames>=t_first], percentile)
1319
-
1320
- if perc_status==perc_status:
1321
- c = ceil(perc_status)
1322
- if c==0:
1323
- df.loc[indices, class_attr] = 1
1324
- df.loc[indices, class_attr.replace('class','t')] = -1
1325
- elif c==1:
1326
- df.loc[indices, class_attr] = 2
1327
- df.loc[indices, class_attr.replace('class','t')] = -1
1411
+ frames = track_valid['FRAME'].to_numpy()
1412
+ t_first = track['t_firstdetection'].to_numpy()[0]
1413
+ perc_status = np.nanpercentile(status_values[frames>=t_first], percentile)
1414
+
1415
+ if perc_status==perc_status:
1416
+ c = ceil(perc_status)
1417
+ if c==0:
1418
+ df.loc[indices, class_attr] = 1
1419
+ df.loc[indices, class_attr.replace('class','t')] = -1
1420
+ elif c==1:
1421
+ df.loc[indices, class_attr] = 2
1422
+ df.loc[indices, class_attr.replace('class','t')] = -1
1328
1423
  return df
1329
1424
 
1330
1425
  def classify_cells_from_query(df, status_attr, query):
@@ -1383,8 +1478,11 @@ def classify_cells_from_query(df, status_attr, query):
1383
1478
 
1384
1479
  df = df.copy()
1385
1480
  df.loc[:,status_attr] = 0
1481
+ df[status_attr] = df[status_attr].astype(float)
1386
1482
 
1387
1483
  cols = extract_cols_from_query(query)
1484
+ print(f"{cols=}")
1485
+
1388
1486
  cols_in_df = np.all([c in list(df.columns) for c in cols], axis=0)
1389
1487
  if query=='':
1390
1488
  print('The provided query is empty...')
@@ -894,7 +894,7 @@ def fit_and_apply_model_background_to_stack(stack_path,
894
894
  else:
895
895
  newfile = '_'.join([prefix,file])
896
896
 
897
- with tiff.TiffWriter(os.sep.join([path,newfile]),imagej=True) as tif:
897
+ with tiff.TiffWriter(os.sep.join([path,newfile]), imagej=True, bigtiff=True) as tif:
898
898
 
899
899
  for i in tqdm(range(0,int(stack_length*nbr_channels),nbr_channels)):
900
900
 
@@ -1156,7 +1156,7 @@ def correct_channel_offset_single_stack(stack_path,
1156
1156
  else:
1157
1157
  newfile = '_'.join([prefix,file])
1158
1158
 
1159
- with tiff.TiffWriter(os.sep.join([path,newfile]),imagej=True) as tif:
1159
+ with tiff.TiffWriter(os.sep.join([path,newfile]),bigtiff=True,imagej=True) as tif:
1160
1160
 
1161
1161
  for i in tqdm(range(0,int(stack_length*nbr_channels),nbr_channels)):
1162
1162
 
@@ -128,10 +128,6 @@ def measure_pairs(pos, neighborhood_protocol):
128
128
 
129
129
 
130
130
 
131
-
132
-
133
-
134
-
135
131
  def measure_pair_signals_at_position(pos, neighborhood_protocol, velocity_kwargs={'window': 3, 'mode': 'bi'}):
136
132
  """
137
133
  pos: position to process
@@ -340,9 +336,10 @@ def measure_pair_signals_at_position(pos, neighborhood_protocol, velocity_kwargs
340
336
  cum_sum = 0
341
337
  for t in range(len(full_timeline)):
342
338
 
343
- if t in timeline_reference: # meaning position exists on both sides
339
+ if t in timeline_reference and t in timeline_neighbor: # meaning position exists on both sides
344
340
 
345
341
  idx_reference = list(timeline_reference).index(t)
342
+ idx_neighbor = list(timeline_neighbor).index(t)
346
343
  inter = intersection_values.loc[(intersection_values['neigh_id']==nc)&(intersection_values["frame"]==t),"intersection"].values
347
344
  if len(inter)==0:
348
345
  inter = np.nan
@@ -350,12 +347,12 @@ def measure_pair_signals_at_position(pos, neighborhood_protocol, velocity_kwargs
350
347
  inter = inter[0]
351
348
 
352
349
  neigh_inter_fraction = np.nan
353
- if inter==inter and neigh_area[t]==neigh_area[t]:
354
- neigh_inter_fraction = inter / neigh_area[t]
350
+ if inter==inter and neigh_area[idx_neighbor]==neigh_area[idx_neighbor]:
351
+ neigh_inter_fraction = inter / neigh_area[idx_neighbor]
355
352
 
356
353
  ref_inter_fraction = np.nan
357
- if inter==inter and ref_area[t]==ref_area[t]:
358
- ref_inter_fraction = inter / ref_area[t]
354
+ if inter==inter and ref_area[idx_reference]==ref_area[idx_reference]:
355
+ ref_inter_fraction = inter / ref_area[idx_reference]
359
356
 
360
357
  if nc in neighbor_ids_per_t[idx_reference]:
361
358
 
@@ -259,13 +259,13 @@ def measure_index(indices):
259
259
  'y': column_labels['y']}
260
260
  feature_table.rename(columns={'centroid-1': 'POSITION_X', 'centroid-0': 'POSITION_Y'}, inplace=True)
261
261
 
262
- if do_iso_intensities:
262
+ if do_iso_intensities and not trajectories is None:
263
263
  iso_table = measure_isotropic_intensity(positions_at_t, img, channels=channel_names, intensity_measurement_radii=intensity_measurement_radii, column_labels=column_labels, operations=isotropic_operations, verbose=False)
264
264
 
265
- if do_iso_intensities and do_features:
265
+ if do_iso_intensities and do_features and not trajectories is None:
266
266
  measurements_at_t = iso_table.merge(feature_table, how='outer', on='class_id',suffixes=('_delme', ''))
267
267
  measurements_at_t = measurements_at_t[[c for c in measurements_at_t.columns if not c.endswith('_delme')]]
268
- elif do_iso_intensities * (not do_features):
268
+ elif do_iso_intensities * (not do_features) * (not trajectories is None):
269
269
  measurements_at_t = iso_table
270
270
  elif do_features:
271
271
  measurements_at_t = positions_at_t.merge(feature_table, how='outer', on='class_id',suffixes=('_delme', ''))
@@ -279,6 +279,12 @@ def measure_index(indices):
279
279
  measurements_at_t.loc[:,c.replace('_y','_POSITION_Y')] = measurements_at_t[c] + measurements_at_t['POSITION_Y']
280
280
  measurements_at_t = measurements_at_t.drop(columns = center_of_mass_x_cols+center_of_mass_y_cols)
281
281
 
282
+ try:
283
+ measurements_at_t['radial_distance'] = np.sqrt((measurements_at_t[column_labels['x']] - img.shape[0] / 2) ** 2 + (
284
+ measurements_at_t[column_labels['y']] - img.shape[1] / 2) ** 2)
285
+ except Exception as e:
286
+ print(f"{e=}")
287
+
282
288
  if measurements_at_t is not None:
283
289
  measurements_at_t[column_labels['time']] = t
284
290
  timestep_dataframes.append(measurements_at_t)
@@ -308,6 +314,10 @@ if len(timestep_dataframes)>0:
308
314
  df['ID'] = np.arange(len(df))
309
315
 
310
316
  df = df.reset_index(drop=True)
317
+
318
+ invalid_cols = [c for c in list(df.columns) if c.startswith('Unnamed')]
319
+ if len(invalid_cols)>0:
320
+ df = df.drop(invalid_cols, axis=1)
311
321
 
312
322
  df.to_csv(pos+os.sep.join(["output", "tables", table_name]), index=False)
313
323
  print(f'Measurements successfully written in table {pos+os.sep.join(["output", "tables", table_name])}')
@@ -47,7 +47,6 @@ else:
47
47
 
48
48
  if not use_gpu:
49
49
  os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
50
- n_threads = int(process_arguments['threads'])
51
50
 
52
51
  modelname = str(process_arguments['model'])
53
52
 
celldetective/signals.py CHANGED
@@ -167,12 +167,13 @@ def analyze_signals(trajectories, model, interpolate_na=True,
167
167
  if selected_signals is None:
168
168
  selected_signals = []
169
169
  for s in required_signals:
170
- pattern_test = [s in a or s==a for a in available_signals]
171
- #print(f'Pattern test for signal {s}: ', pattern_test)
172
- assert np.any(pattern_test),f'No signal matches with the requirements of the model {required_signals}. Please pass the signals manually with the argument selected_signals or add measurements. Abort.'
173
- valid_columns = natsorted(np.array(available_signals)[np.array(pattern_test)])
174
- print(f"Selecting the first time series among: {valid_columns} for input requirement {s}...")
175
- selected_signals.append(valid_columns[0])
170
+ priority_cols = [a for a in available_signals if a==s]
171
+ second_priority_cols = [a for a in available_signals if a.startswith(s) and a!=s]
172
+ third_priority_cols = [a for a in available_signals if s in a and not a.startswith(s)]
173
+ candidates = priority_cols + second_priority_cols + third_priority_cols
174
+ assert len(candidates)>0,f'No signal matches with the requirements of the model {required_signals}. Please pass the signals manually with the argument selected_signals or add measurements. Abort.'
175
+ print(f"Selecting the first time series among: {candidates} for input requirement {s}...")
176
+ selected_signals.append(candidates[0])
176
177
  else:
177
178
  assert len(selected_signals)==len(required_signals),f'Mismatch between the number of required signals {required_signals} and the provided signals {selected_signals}... Abort.'
178
179
 
@@ -878,6 +879,7 @@ class SignalDetectionModel(object):
878
879
 
879
880
  assert self.model_class.layers[0].input_shape[0] == self.model_reg.layers[0].input_shape[0], f"mismatch between input shape of classification: {self.model_class.layers[0].input_shape[0]} and regression {self.model_reg.layers[0].input_shape[0]} models... Error."
880
881
 
882
+
881
883
  return True
882
884
 
883
885
  def create_models_from_scratch(self):
@@ -3143,4 +3145,4 @@ if __name__ == "__main__":
3143
3145
  model = ResNetModelCurrent(1, 2, depth=2, use_pooling=True, n_classes = 3, dropout_rate=0.1, dense_collection=512,
3144
3146
  header="classifier", model_signal_length = 128)
3145
3147
  print(model.summary())
3146
- #plot_model(model, to_file='test.png', show_shapes=True)
3148
+ #plot_model(model, to_file='test.png', show_shapes=True)
celldetective/tracking.py CHANGED
@@ -8,7 +8,7 @@ from btrack import BayesianTracker
8
8
 
9
9
  from celldetective.measure import measure_features
10
10
  from celldetective.utils import rename_intensity_column, velocity_per_track
11
- from celldetective.io import view_on_napari_btrack, interpret_tracking_configuration
11
+ from celldetective.io import interpret_tracking_configuration
12
12
 
13
13
  import os
14
14
  import subprocess
@@ -160,15 +160,19 @@ def track(labels, configuration=None, stack=None, spatial_calibration=1, feature
160
160
  if channel_names is not None:
161
161
  df = rename_intensity_column(df, channel_names)
162
162
 
163
- df = write_first_detection_class(df, column_labels=column_labels)
163
+ df = write_first_detection_class(df, img_shape=volume, column_labels=column_labels)
164
164
 
165
165
  if clean_trajectories_kwargs is not None:
166
166
  df = clean_trajectories(df.copy(),**clean_trajectories_kwargs)
167
167
 
168
168
  df['ID'] = np.arange(len(df)).astype(int)
169
169
 
170
- if view_on_napari:
171
- view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
170
+ invalid_cols = [c for c in list(df.columns) if c.startswith('Unnamed')]
171
+ if len(invalid_cols)>0:
172
+ df = df.drop(invalid_cols, axis=1)
173
+
174
+ # if view_on_napari:
175
+ # view_on_napari_btrack(data,properties,graph,stack=stack,labels=labels,relabel=True)
172
176
 
173
177
  if return_napari_data:
174
178
  napari_data = {"data": data, "properties": properties, "graph": graph}
@@ -921,44 +925,58 @@ def track_at_position(pos, mode, return_tracks=False, view_on_napari=False, thre
921
925
  # # else:
922
926
  # return None
923
927
 
924
- def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
928
+ def write_first_detection_class(df, img_shape=None, edge_threshold=20, column_labels={'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'}):
925
929
 
926
930
  """
927
- Annotates a dataframe with the time of the first detection and classifies tracks based on their detection status.
931
+ Assigns a classification and first detection time to tracks in the given DataFrame. This function must be called
932
+ before any track post-processing.
928
933
 
929
- This function processes a dataframe containing tracking data, identifying the first point of detection for each
930
- track based on the x-coordinate values. It annotates the dataframe with the time of the first detection and
931
- assigns a class to each track indicating whether the first detection occurs at the start, during, or if there's
932
- no detection within the tracking data.
934
+ This function computes the first detection time and a detection class (`class_firstdetection`) for each track in the data.
935
+ Tracks that start on or near the image edge, or those detected at the initial frame, are marked with special classes.
933
936
 
934
937
  Parameters
935
938
  ----------
936
- tab : pandas.DataFrame
937
- The dataframe containing tracking data, expected to have columns for track ID, time, and spatial coordinates.
939
+ df : pandas.DataFrame
940
+ A DataFrame containing track data. Expected to have at least the columns specified in `column_labels` and `class_id` (mask value).
941
+
942
+ img_shape : tuple of int, optional
943
+ The shape of the image as `(height, width)`. Used to determine whether the first detection occurs near the image edge.
944
+
945
+ edge_threshold : int, optional, default=20
946
+ The distance in pixels from the image edge to consider a detection as near the edge.
947
+
938
948
  column_labels : dict, optional
939
- A dictionary mapping standard column names ('track', 'time', 'x', 'y') to the corresponding column names in
940
- `tab`. Default column names are 'TRACK_ID', 'FRAME', 'POSITION_X', 'POSITION_Y'.
949
+ A dictionary mapping logical column names to actual column names in `tab`. Keys include:
950
+ - `'track'`: The column indicating the track ID (default: `"TRACK_ID"`).
951
+ - `'time'`: The column indicating the frame/time (default: `"FRAME"`).
952
+ - `'x'`: The column indicating the X-coordinate (default: `"POSITION_X"`).
953
+ - `'y'`: The column indicating the Y-coordinate (default: `"POSITION_Y"`).
941
954
 
942
955
  Returns
943
956
  -------
944
957
  pandas.DataFrame
945
- The input dataframe `tab` with two additional columns: 'class_firstdetection' indicating the detection class,
946
- and 't_firstdetection' indicating the time of the first detection.
958
+ The input DataFrame `df` with two additional columns:
959
+ - `'class_firstdetection'`: A class assigned based on detection status:
960
+ - `0`: Valid detection not near the edge and not at the initial frame.
961
+ - `2`: Detection near the edge, at the initial frame, or no detection available.
962
+ - `'t_firstdetection'`: The adjusted first detection time (in frame units):
963
+ - `-1`: Indicates no valid detection or detection near the edge.
964
+ - A float value representing the adjusted first detection time otherwise.
947
965
 
948
966
  Notes
949
967
  -----
950
- - Detection is based on the presence of non-NaN values in the 'x' column for each track.
951
- - Tracks with their first detection at the first time point are classified differently (`cclass=2`) and assigned
952
- a `t_first` of -1, indicating no prior detection.
953
- - The function assumes uniform time steps between each frame in the tracking data.
954
-
968
+ - The function assumes that tracks are grouped and sorted by track ID and frame.
969
+ - Detections near the edge or at the initial frame (frame 0) are considered invalid and assigned special values.
970
+ - If `img_shape` is not provided, edge checks are skipped.
955
971
  """
956
972
 
957
- tab = tab.sort_values(by=[column_labels['track'],column_labels['time']])
958
- for tid,track_group in tab.groupby(column_labels['track']):
973
+ df = df.sort_values(by=[column_labels['track'],column_labels['time']])
974
+ for tid,track_group in df.groupby(column_labels['track']):
959
975
  indices = track_group.index
960
- detection = track_group[column_labels['x']].values
976
+ detection = track_group['class_id'].values
961
977
  timeline = track_group[column_labels['time']].values
978
+ positions_x = track_group[column_labels['x']].values
979
+ positions_y = track_group[column_labels['y']].values
962
980
  dt = 1
963
981
 
964
982
  # Initialize
@@ -966,8 +984,14 @@ def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time':
966
984
 
967
985
  if np.any(detection==detection):
968
986
  t_first = timeline[detection==detection][0]
987
+ x_first = positions_x[detection==detection][0]; y_first = positions_y[detection==detection][0];
988
+
989
+ edge_test = False
990
+ if img_shape is not None:
991
+ edge_test = (x_first < edge_threshold) or (y_first < edge_threshold) or (y_first > (img_shape[0] - edge_threshold)) or (x_first > (img_shape[1] - edge_threshold))
992
+
969
993
  cclass = 0
970
- if t_first<=0:
994
+ if t_first<=0 or edge_test:
971
995
  t_first = -1
972
996
  cclass = 2
973
997
  else:
@@ -978,10 +1002,10 @@ def write_first_detection_class(tab, column_labels={'track': "TRACK_ID", 'time':
978
1002
  t_first = -1
979
1003
  cclass = 2
980
1004
 
981
- tab.loc[indices, 'class_firstdetection'] = cclass
982
- tab.loc[indices, 't_firstdetection'] = t_first
1005
+ df.loc[indices, 'class_firstdetection'] = cclass
1006
+ df.loc[indices, 't_firstdetection'] = t_first
983
1007
 
984
- return tab
1008
+ return df
985
1009
 
986
1010
 
987
1011
 
celldetective/utils.py CHANGED
@@ -30,6 +30,15 @@ from skimage.morphology import disk
30
30
  from scipy.stats import ks_2samp
31
31
  from cliffs_delta import cliffs_delta
32
32
 
33
+
34
+ def extract_cols_from_table_list(tables, nrows=1):
35
+ all_columns = []
36
+ for tab in tables:
37
+ cols = pd.read_csv(tab, nrows=1).columns.tolist()
38
+ all_columns.extend(cols)
39
+ all_columns = np.unique(all_columns)
40
+ return all_columns
41
+
33
42
  def safe_log(array):
34
43
 
35
44
  if isinstance(array,int) or isinstance(array,float):
@@ -547,8 +556,14 @@ def extract_cols_from_query(query: str):
547
556
  # Add the name to the globals dictionary with a dummy value.
548
557
  variables[name] = None
549
558
 
550
- return list(variables.keys())
559
+ # Reverse mangling for special characters in column names.
560
+ def demangle_column_name(name):
561
+ if name.startswith("BACKTICK_QUOTED_STRING_"):
562
+ # Unquote backtick-quoted string.
563
+ return name[len("BACKTICK_QUOTED_STRING_"):].replace("_DOT_", ".").replace("_SLASH_", "/")
564
+ return name
551
565
 
566
+ return [demangle_column_name(name) for name in variables.keys()]
552
567
 
553
568
  def create_patch_mask(h, w, center=None, radius=None):
554
569
 
@@ -646,15 +661,14 @@ def rename_intensity_column(df, channels):
646
661
  channel_names = np.array(channels)
647
662
  channel_indices = np.arange(len(channel_names),dtype=int)
648
663
 
649
- if np.any(['intensity' in c for c in df.columns]):
664
+ if np.any(['intensity' in c for c in list(df.columns)]):
650
665
 
651
666
  intensity_indices = [s.startswith('intensity') for s in df.columns]
652
667
  intensity_columns = df.columns[intensity_indices]
653
668
 
654
- if len(channel_names) > 1:
669
+ if len(channel_names) >= 1:
655
670
  to_rename = {}
656
671
  for k in range(len(intensity_columns)):
657
- #print(intensity_columns[k])
658
672
 
659
673
  sections = np.array(re.split('-|_', intensity_columns[k]))
660
674
  test_digit = np.array([s.isdigit() for s in sections])
@@ -664,7 +678,11 @@ def rename_intensity_column(df, channels):
664
678
  new_name = np.delete(sections, np.where(test_digit)[0]) #np.where(test_digit)[0]
665
679
  new_name = '_'.join(list(new_name))
666
680
  new_name = new_name.replace('intensity', channel_name)
667
- to_rename.update({intensity_columns[k]: new_name.replace('-','_')})
681
+ new_name = new_name.replace('-','_')
682
+ new_name = new_name.replace('_nanmean','_mean')
683
+
684
+ to_rename.update({intensity_columns[k]: new_name})
685
+
668
686
  if 'centre' in intensity_columns[k]:
669
687
  # sections = np.array(re.split('-|_', intensity_columns[k]))
670
688
  measure = np.array(re.split('-|_', new_name))