fucciphase 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fucciphase/phase.py CHANGED
@@ -1,12 +1,13 @@
1
+ import logging
1
2
  from enum import Enum
2
- from typing import List
3
+ from typing import Literal
3
4
 
4
5
  import dtaidistance.preprocessing
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
  from dtaidistance.dtw import warping_amount
8
9
  from dtaidistance.subsequence.dtw import subsequence_alignment
9
- from scipy import interpolate, stats
10
+ from scipy import interpolate, signal, stats
10
11
 
11
12
  from .sensor import FUCCISensor
12
13
  from .utils import (
@@ -16,6 +17,11 @@ from .utils import (
16
17
  get_time_distortion_coefficient,
17
18
  )
18
19
 
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Type alias for signal processing mode
23
+ SignalMode = Literal["signal", "derivative", "both"]
24
+
19
25
 
20
26
  class NewColumns(str, Enum):
21
27
  """Columns generated by the analysis.
@@ -99,45 +105,40 @@ class NewColumns(str, Enum):
99
105
 
100
106
  def generate_cycle_phases(
101
107
  df: pd.DataFrame,
102
- channels: List[str],
108
+ channels: list[str],
103
109
  sensor: FUCCISensor,
104
- thresholds: List[float],
110
+ thresholds: list[float],
105
111
  estimate_percentage: bool = False,
106
112
  ) -> None:
107
- """Add a column in place to the dataframe with the phase of the cell cycle.
108
-
109
- The phase is determined using a threshold on the channel intensities
110
- assuming a FUCCI sensor.
113
+ """Add cell-cycle phase (and optionally percentage) columns to the dataframe.
111
114
 
112
- The thresholds per channel must be between 0 and 1.
115
+ The phase is determined using thresholds on the normalized channel
116
+ intensities, assuming a FUCCI-like sensor. For each row (spot) in the
117
+ dataframe, this function:
113
118
 
114
- Example:
115
- channels = ["CH1", "CH2"]
116
- thresholds = [0.1, 0.1]
119
+ 1. Checks that normalized intensity columns exist for all requested channels.
120
+ 2. Uses :func:`estimate_cell_phase_from_max_intensity` to assign a
121
+ discrete phase label based on whether each channel is ON/OFF.
122
+ 3. Optionally calls :func:`estimate_cell_cycle_percentage` to estimate
123
+ a continuous cell-cycle percentage from the intensities and the
124
+ discrete phase.
117
125
 
118
- The sensor needs to be calibrated for each cell line.
119
- For that, record the FUCCI intensities of multiple cell cycles
120
- by live-cell fluorescence microscopy.
121
- See the examples for more details.
122
-
123
- The thresholds need to be chosen based on the expected noise of the background and
124
- uncertainty in intensity computation.
125
- They give the ratio to the maximum intensity.
126
- E.g., a threshold of 0.1 means that all intensities below 0.1 times the maximum
127
- intensity are considered background signal.
126
+ The thresholds per channel must be between 0 and 1 and are interpreted
127
+ as fractions of the maximum intensity in that channel (e.g. 0.1 means
128
+ “10% of max”).
128
129
 
129
130
  Parameters
130
131
  ----------
131
- df : pd.DataFrame
132
- Dataframe with columns holding normalized intensities
133
- sensor: FUCCISensor
134
- FUCCI sensor with phase specifics
135
- channels: List[str]
136
- Names of channels
137
- thresholds: List[float]
138
- Thresholds to separate phases
139
- estimate_percentage: bool
140
- Estimate cell cycle percentages
132
+ df : pandas.DataFrame
133
+ Dataframe with columns holding normalized intensities.
134
+ channels : List[str]
135
+ Names of normalized channels to use for phase estimation.
136
+ sensor : FUCCISensor
137
+ FUCCI sensor with phase-specific information.
138
+ thresholds : List[float]
139
+ Thresholds (0-1) used to separate phases.
140
+ estimate_percentage : bool, optional
141
+ If True, also estimate a continuous cell-cycle percentage.
141
142
 
142
143
 
143
144
  Raises
@@ -163,7 +164,7 @@ def generate_cycle_phases(
163
164
  # check that all channels are present
164
165
  check_channels(sensor.fluorophores, channels)
165
166
 
166
- # compute phases
167
+ # compute discrete phases based on normalized intensities
167
168
  estimate_cell_phase_from_max_intensity(
168
169
  df,
169
170
  norm_channel_names,
@@ -172,28 +173,33 @@ def generate_cycle_phases(
172
173
  thresholds=thresholds,
173
174
  )
174
175
 
175
- # name of phase_column
176
+ # name of phase column
176
177
  phase_column = NewColumns.discrete_phase_max()
177
- # compute percentages
178
+ # optionally compute continuous cell-cycle percentages
178
179
  if estimate_percentage:
179
180
  estimate_cell_cycle_percentage(df, norm_channel_names, sensor, phase_column)
180
181
 
181
182
 
182
183
  def estimate_cell_cycle_percentage(
183
- df: pd.DataFrame, channels: List[str], sensor: FUCCISensor, phase_column: str
184
+ df: pd.DataFrame, channels: list[str], sensor: FUCCISensor, phase_column: str
184
185
  ) -> None:
185
186
  """Estimate cell cycle percentage from intensity pairs.
186
187
 
188
+ For each row in the dataframe, this function reads the normalized
189
+ intensities in ``channels`` together with the discrete phase label in
190
+ ``phase_column`` and queries the sensor for an estimated cell-cycle
191
+ percentage. The result is stored in the ``CELL_CYCLE_PERC`` column.
192
+
187
193
  Parameters
188
194
  ----------
189
- df : pd.DataFrame
190
- Dataframe with columns holding normalized intensities
191
- sensor: FUCCISensor
192
- FUCCI sensor with phase specifics
193
- channels: List[str]
194
- Names of channels
195
- phase_column: str
196
- Name of phase column
195
+ df : pandas.DataFrame
196
+ Dataframe with normalized intensity columns and a phase column.
197
+ channels : List[str]
198
+ Names of normalized intensity columns for each fluorophore.
199
+ sensor : FUCCISensor
200
+ FUCCI sensor used to map intensities and phase to cycle percentage.
201
+ phase_column : str
202
+ Name of the column storing discrete phase labels.
197
203
  """
198
204
  percentages = []
199
205
  # iterate through data frame
@@ -210,39 +216,39 @@ def estimate_cell_cycle_percentage(
210
216
 
211
217
  def estimate_cell_phase_from_max_intensity(
212
218
  df: pd.DataFrame,
213
- channels: List[str],
219
+ channels: list[str],
214
220
  sensor: FUCCISensor,
215
- background: List[float],
216
- thresholds: List[float],
221
+ background: list[float],
222
+ thresholds: list[float],
217
223
  ) -> None:
218
- """Add a column in place to the dataframe with the estimated phase of the cell
219
- cycle, where the phase is determined by thresholding the channel intensities.
224
+ """Estimate discrete cell-cycle phase by thresholding normalized intensities.
220
225
 
221
- The provided thresholds are used to decide if a channel is switched on (ON).
222
- For that, the background is subtracted from the mean intensity.
223
- The obtained values are normalized w.r.t. the maximum mean intensity in the
224
- respective channel available in the DataFrame.
225
- Hence, the threshold values should be between 0 and 1.
226
- This method will not work reliably if not enough cells from different phases
227
- are contained in the DataFrame.
226
+ For each channel, the background value is subtracted from the mean
227
+ intensity. The resulting intensities are normalized by the maximum
228
+ mean intensity observed in that channel. A channel is considered ON
229
+ if its normalized intensity exceeds the corresponding threshold.
230
+
231
+ The ON/OFF pattern across channels is then mapped to a discrete phase
232
+ using the sensor model.
228
233
 
229
234
  Parameters
230
235
  ----------
231
- df: pd.DataFrame
232
- Dataframe with a CELL_CYCLE_PERC column
233
- channels: List[str]
234
- Names of channels
235
- sensor: FUCCISensor
236
- FUCCI sensor with specific phase analysis information
237
- background: List[float]
238
- Single value per channel representing background
239
- thresholds: List[float]
240
- Thresholds to separate phases
236
+ df : pandas.DataFrame
237
+ Dataframe containing the normalized intensity columns.
238
+ channels : List[str]
239
+ Names of normalized intensity columns.
240
+ sensor : FUCCISensor
241
+ FUCCI sensor with phase analysis information.
242
+ background : List[float]
243
+ Single background value per channel.
244
+ thresholds : List[float]
245
+ Thresholds (0-1) used to separate phases.
241
246
 
242
247
  Raises
243
248
  ------
244
249
  ValueError
245
- If the dataframe does not contain the normalized channels.
250
+ If required channels are missing or if background/threshold lists
251
+ are inconsistent with the number of channels.
246
252
  """
247
253
  # sanity check: check that channels are present
248
254
  for channel in channels:
@@ -257,8 +263,10 @@ def estimate_cell_phase_from_max_intensity(
257
263
  check_channels(sensor.fluorophores, channels)
258
264
  check_thresholds(sensor.fluorophores, thresholds)
259
265
 
260
- phase_markers_list: List[pd.Series[bool]] = []
261
- for channel, bg_value, threshold in zip(channels, background, thresholds):
266
+ phase_markers_list: list[pd.Series[bool]] = []
267
+ for channel, bg_value, threshold in zip(
268
+ channels, background, thresholds, strict=True
269
+ ):
262
270
  # get intensities and subtract background
263
271
  intensity = df[channel] - bg_value
264
272
  # threshold channels to decide if ON / OFF (data is in list per spot)
@@ -275,10 +283,10 @@ def estimate_cell_phase_from_max_intensity(
275
283
 
276
284
  def estimate_cell_phase_from_background(
277
285
  df: pd.DataFrame,
278
- channels: List[str],
286
+ channels: list[str],
279
287
  sensor: FUCCISensor,
280
- background: List[float],
281
- thresholds: List[float],
288
+ background: list[float],
289
+ thresholds: list[float],
282
290
  ) -> None:
283
291
  """Add a column in place to the dataframe with the estimated phase of the cell
284
292
  cycle, where the phase is determined by comparing the channel intensities to
@@ -319,8 +327,10 @@ def estimate_cell_phase_from_background(
319
327
 
320
328
  check_channels(sensor.fluorophores, channels)
321
329
 
322
- phase_markers_list: List[pd.Series[bool]] = []
323
- for channel, bg_value, threshold in zip(channels, background, thresholds):
330
+ phase_markers_list: list[pd.Series[bool]] = []
331
+ for channel, bg_value, threshold in zip(
332
+ channels, background, thresholds, strict=True
333
+ ):
324
334
  intensity = df[channel]
325
335
  # threshold channels to decide if ON / OFF (data is in list per spot)
326
336
  phase_markers_list.append(intensity > threshold * bg_value)
@@ -333,18 +343,159 @@ def estimate_cell_phase_from_background(
333
343
  df[NewColumns.discrete_phase_bg()] = pd.Series(phase_names, dtype=str) # add as str
334
344
 
335
345
 
346
+ def _process_channel(
347
+ series: np.ndarray,
348
+ signal_mode: SignalMode,
349
+ smooth: float,
350
+ channel_name: str = "",
351
+ signal_smooth: int = 0,
352
+ ) -> list[np.ndarray]:
353
+ """Process a single channel according to the signal mode.
354
+
355
+ Parameters
356
+ ----------
357
+ series : np.ndarray
358
+ The input signal array.
359
+ signal_mode : SignalMode
360
+ Processing mode: "signal", "derivative", or "both".
361
+ smooth : float
362
+ Smoothing factor for differencing (removes high frequencies).
363
+ channel_name : str, optional
364
+ Channel name for warning messages.
365
+ signal_smooth : int, optional
366
+ Window size for signal smoothing (Savitzky-Golay filter with polyorder=3).
367
+ 0 means no smoothing. Must be > 3 if used.
368
+ Only applies when signal_mode is "signal" or "both".
369
+
370
+ Returns
371
+ -------
372
+ list[np.ndarray]
373
+ List of processed arrays. Length 1 for "signal" or "derivative",
374
+ length 2 for "both" (signal first, then derivative).
375
+ """
376
+ results = []
377
+
378
+ if signal_mode in ("signal", "both"):
379
+ smoothed_signal = series.copy()
380
+ if signal_smooth > 3:
381
+ smoothed_signal = signal.savgol_filter(
382
+ series, window_length=signal_smooth, polyorder=3, mode="nearest"
383
+ )
384
+ elif signal_smooth > 0:
385
+ logger.warning(
386
+ "signal_smooth=%d is too small (must be > 3), skipping smoothing",
387
+ signal_smooth,
388
+ )
389
+ results.append(smoothed_signal)
390
+
391
+ if signal_mode in ("derivative", "both"):
392
+ try:
393
+ diff = dtaidistance.preprocessing.differencing(series, smooth=smooth)
394
+ except ValueError:
395
+ if channel_name:
396
+ logger.warning(
397
+ "Smoothing failed for channel %s, continuing without smoothing",
398
+ channel_name,
399
+ )
400
+ diff = dtaidistance.preprocessing.differencing(series)
401
+ results.append(diff)
402
+
403
+ return results
404
+
405
+
406
+ def _compute_both_mode_scale_factor(processed_series: list[np.ndarray]) -> float:
407
+ """Compute scale factor to balance signal and derivative contributions.
408
+
409
+ In "both" mode, signals and derivatives may have different magnitudes.
410
+ This function computes a scale factor to apply to signals so they
411
+ contribute equally to the DTW distance.
412
+
413
+ Parameters
414
+ ----------
415
+ processed_series : list[np.ndarray]
416
+ List of processed arrays in order:
417
+ [signal_ch1, deriv_ch1, signal_ch2, deriv_ch2, ...]
418
+
419
+ Returns
420
+ -------
421
+ float
422
+ Scale factor to multiply signals by. Returns 1.0 if derivatives have zero std.
423
+ """
424
+ # In "both" mode, signals are at even indices, derivatives at odd indices
425
+ signals = [processed_series[i] for i in range(0, len(processed_series), 2)]
426
+ derivatives = [processed_series[i] for i in range(1, len(processed_series), 2)]
427
+
428
+ signal_std = np.mean([np.std(s) for s in signals])
429
+ deriv_std = np.mean([np.std(d) for d in derivatives])
430
+
431
+ if signal_std == 0:
432
+ return 1.0
433
+ return deriv_std / signal_std # type: ignore[no-any-return]
434
+
435
+
436
+ def _apply_both_mode_scaling(
437
+ processed_series: list[np.ndarray], scale_factor: float
438
+ ) -> list[np.ndarray]:
439
+ """Apply scale factor to signal features in "both" mode.
440
+
441
+ Parameters
442
+ ----------
443
+ processed_series : list[np.ndarray]
444
+ List of processed arrays in order:
445
+ [signal_ch1, deriv_ch1, signal_ch2, deriv_ch2, ...]
446
+ scale_factor : float
447
+ Scale factor to multiply signals by.
448
+
449
+ Returns
450
+ -------
451
+ list[np.ndarray]
452
+ Scaled processed series with signals multiplied by scale_factor.
453
+ """
454
+ scaled = []
455
+ for i, arr in enumerate(processed_series):
456
+ if i % 2 == 0: # Signal (even index)
457
+ scaled.append(arr * scale_factor)
458
+ else: # Derivative (odd index)
459
+ scaled.append(arr)
460
+ return scaled
461
+
462
+
463
+ def _compute_output_length_offset(signal_mode: SignalMode) -> int:
464
+ """Return the offset to add to query length for output array size.
465
+
466
+ When using derivatives, the output is 1 element shorter, so we need
467
+ to add 1 to get back to the original track length.
468
+
469
+ Parameters
470
+ ----------
471
+ signal_mode : SignalMode
472
+ The signal processing mode.
473
+
474
+ Returns
475
+ -------
476
+ int
477
+ Offset to add: 1 if derivative is used, 0 otherwise.
478
+ """
479
+ if signal_mode in ("derivative", "both"):
480
+ return 1
481
+ return 0
482
+
483
+
336
484
  # flake8: noqa: C901
337
485
  def estimate_percentage_by_subsequence_alignment(
338
486
  df: pd.DataFrame,
339
487
  dt: float,
340
- channels: List[str],
488
+ channels: list[str],
341
489
  reference_data: pd.DataFrame,
342
490
  smooth: float = 0.1,
343
491
  penalty: float = 0.05,
344
492
  track_id_name: str = "TRACK_ID",
345
493
  minimum_track_length: int = 10,
346
494
  use_zscore_norm: bool = True,
347
- use_derivative: bool = True,
495
+ signal_mode: SignalMode = "derivative",
496
+ signal_weight: float = 1.0,
497
+ signal_smooth: int = 0,
498
+ use_derivative: bool | None = None,
348
499
  ) -> None:
349
500
  """Use subsequence alignment to estimate percentage.
350
501
 
@@ -359,7 +510,7 @@ def estimate_percentage_by_subsequence_alignment(
359
510
  reference_data: pd.DataFrame
360
511
  Containing reference intensities over time
361
512
  smooth: float
362
- Smoothing factor, see dtaidistance documentation
513
+ Smoothing factor for derivative (removes high frequencies, 0-0.5)
363
514
  penalty: float
364
515
  Penalty for DTW algorithm, enforces diagonal warping path
365
516
  track_id_name: str
@@ -370,10 +521,34 @@ def estimate_percentage_by_subsequence_alignment(
370
521
  Use z-score normalization before differencing curves
371
522
  Probably not needed if intensities of reference and measured
372
523
  curve are similar
373
- use_derivative: bool
374
- Take derivative to perform alignment independent of intensity
375
- baseline (in default mode also after normalization)
524
+ signal_mode: SignalMode
525
+ Signal processing mode:
526
+ - "signal": use raw signal only
527
+ - "derivative": use derivative only (default, for baseline independence)
528
+ - "both": use both signal and derivative as features
529
+ signal_weight: float
530
+ Weight for signal relative to derivative in "both" mode.
531
+ Default 1.0 means equal contribution. Values > 1.0 weight signal
532
+ higher, values < 1.0 weight derivative higher. Ignored for other modes.
533
+ signal_smooth: int
534
+ Window size for signal smoothing (Savitzky-Golay filter, polyorder=3).
535
+ 0 means no smoothing. Must be > 3 if used.
536
+ Only applies in "signal" or "both" modes.
537
+ use_derivative: bool | None
538
+ Deprecated. Use signal_mode instead. If provided, overrides signal_mode
539
+ for backward compatibility (True -> "derivative", False -> "signal").
376
540
  """
541
+ # Handle backward compatibility with use_derivative parameter
542
+ if use_derivative is not None:
543
+ import warnings
544
+
545
+ warnings.warn(
546
+ "use_derivative is deprecated, use signal_mode instead",
547
+ DeprecationWarning,
548
+ stacklevel=2,
549
+ )
550
+ signal_mode = "derivative" if use_derivative else "signal"
551
+
377
552
  if "time" not in reference_data:
378
553
  raise ValueError("Need to provide time column in reference_data.")
379
554
  if "percentage" not in reference_data:
@@ -395,33 +570,48 @@ def estimate_percentage_by_subsequence_alignment(
395
570
 
396
571
  num_time = int(time_scale[-1] / dt)
397
572
  new_time_scale = np.linspace(0, dt * num_time, num=num_time + 1)
398
- assert np.isclose(dt, new_time_scale[1] - new_time_scale[0])
573
+ actual_dt = new_time_scale[1] - new_time_scale[0]
574
+ if not np.isclose(dt, actual_dt):
575
+ raise ValueError(
576
+ f"Time scale mismatch: requested dt={dt}, but computed dt={actual_dt}. "
577
+ "Check that the reference data time scale is compatible with "
578
+ "the requested timestep."
579
+ )
399
580
 
400
581
  # reference curve in time scale of provided track
401
582
  percentage_ref = f_percentage(new_time_scale)
402
583
 
403
- series_diff = []
584
+ processed_series = []
404
585
  for channel in channels:
405
586
  series = interpolation_functions[channel](new_time_scale)
406
587
  if use_zscore_norm:
407
588
  series = stats.zscore(series)
408
- # if all values are the same, we zero to not numerical issues
589
+ # if all values are the same, we zero to avoid numerical issues
409
590
  if np.all(np.isnan(series)):
410
- series = 0.0
411
-
412
- if use_derivative:
413
- try:
414
- diff_ch = dtaidistance.preprocessing.differencing(series, smooth=smooth)
415
- except ValueError:
416
- print(
417
- "WARNING: The smoothing failed, continue without smoothing"
418
- f" for channel {channel}"
419
- )
420
- diff_ch = dtaidistance.preprocessing.differencing(series)
421
- else:
422
- diff_ch = series
423
- series_diff.append(diff_ch)
424
- series = np.array(series_diff)
591
+ series = np.zeros_like(series)
592
+
593
+ channel_features = _process_channel(
594
+ series, signal_mode, smooth, channel, signal_smooth
595
+ )
596
+ processed_series.extend(channel_features)
597
+
598
+ # For "both" mode, trim signal features to match derivative length and scale
599
+ both_mode_scale_factor = 1.0
600
+ if signal_mode == "both":
601
+ min_len = min(len(s) for s in processed_series)
602
+ processed_series = [s[-min_len:] for s in processed_series]
603
+ # Also trim the percentage reference to match
604
+ percentage_ref = percentage_ref[-min_len:]
605
+ # Compute and apply scaling to balance signal and derivative contributions
606
+ # signal_weight > 1.0 weights signal higher relative to derivative
607
+ both_mode_scale_factor = (
608
+ _compute_both_mode_scale_factor(processed_series) * signal_weight
609
+ )
610
+ processed_series = _apply_both_mode_scaling(
611
+ processed_series, both_mode_scale_factor
612
+ )
613
+
614
+ series = np.array(processed_series)
425
615
  series = np.swapaxes(series, 0, 1)
426
616
 
427
617
  df.loc[:, NewColumns.cell_cycle_dtw()] = np.nan
@@ -441,37 +631,48 @@ def estimate_percentage_by_subsequence_alignment(
441
631
  # find percentages if track is long enough
442
632
  queries = track_df[channels].to_numpy()
443
633
 
444
- queries_diff = []
634
+ processed_queries = []
445
635
  for idx in range(len(channels)):
636
+ query_series = queries[:, idx].copy()
446
637
  if use_zscore_norm:
447
- queries[:, idx] = stats.zscore(queries[:, idx])
448
- # if all values are the same, we zero to not numerical issues
449
- if np.all(np.isnan(queries[:, idx])):
450
- queries[:, idx] = 0.0
451
- if use_derivative:
452
- diff_ch = dtaidistance.preprocessing.differencing(
453
- queries[:, idx], smooth=smooth
454
- )
455
- else:
456
- diff_ch = queries[:, idx]
457
- queries_diff.append(diff_ch)
638
+ query_series = stats.zscore(query_series)
639
+ # if all values are the same, we zero to avoid numerical issues
640
+ if np.all(np.isnan(query_series)):
641
+ query_series = np.zeros_like(query_series)
458
642
 
459
- query = np.array(queries_diff)
643
+ channel_features = _process_channel(
644
+ query_series, signal_mode, smooth, signal_smooth=signal_smooth
645
+ )
646
+ processed_queries.extend(channel_features)
647
+
648
+ # For "both" mode, trim signal features to match derivative length and scale
649
+ if signal_mode == "both":
650
+ min_len = min(len(q) for q in processed_queries)
651
+ processed_queries = [q[-min_len:] for q in processed_queries]
652
+ # Apply same scale factor as reference to ensure consistent weighting
653
+ processed_queries = _apply_both_mode_scaling(
654
+ processed_queries, both_mode_scale_factor
655
+ )
656
+
657
+ query = np.array(processed_queries)
460
658
  query = np.swapaxes(query, 0, 1)
461
659
 
462
660
  sa = subsequence_alignment(query, series, penalty=penalty)
463
661
  best_match = sa.best_match()
464
- if use_derivative:
465
- new_percentage = np.zeros(query.shape[0] + 1)
466
- else:
467
- new_percentage = np.zeros(query.shape[0])
468
- for p in best_match.path:
469
- new_percentage[p[0]] = percentage_ref[p[1]]
470
- if p[1] + 1 < len(percentage_ref):
471
- last_percentage = p[1] + 1
662
+ length_offset = _compute_output_length_offset(signal_mode)
663
+ new_percentage = np.zeros(query.shape[0] + length_offset)
664
+
665
+ # Handle empty path case
666
+ if len(best_match.path) == 0:
667
+ new_percentage[:] = np.nan
472
668
  else:
473
- last_percentage = p[1]
474
- new_percentage[-1] = percentage_ref[last_percentage]
669
+ for p in best_match.path:
670
+ new_percentage[p[0]] = percentage_ref[p[1]]
671
+ if p[1] + 1 < len(percentage_ref):
672
+ last_percentage = p[1] + 1
673
+ else:
674
+ last_percentage = p[1]
675
+ new_percentage[-1] = percentage_ref[last_percentage]
475
676
  # save estimated cell cycle percentages
476
677
  df.loc[df[track_id_name] == track_id, NewColumns.cell_cycle_dtw()] = (
477
678
  new_percentage[:]
@@ -481,21 +682,33 @@ def estimate_percentage_by_subsequence_alignment(
481
682
  best_match.value
482
683
  )
483
684
 
484
- _, distortion_score, _, _ = get_time_distortion_coefficient(best_match.path)
485
- # save DTW distortion
486
- df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = (
487
- distortion_score
488
- )
489
- df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
490
- distortion_score / len(track_df)
491
- )
685
+ # Handle empty path case for DTW metrics
686
+ if len(best_match.path) == 0:
687
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = np.nan
688
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
689
+ np.nan
690
+ )
691
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
692
+ np.nan
693
+ )
694
+ df.loc[
695
+ df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()
696
+ ] = np.nan
697
+ else:
698
+ _, distortion_score, _, _ = get_time_distortion_coefficient(best_match.path)
699
+ # save DTW distortion
700
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = (
701
+ distortion_score
702
+ )
703
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
704
+ distortion_score / len(track_df)
705
+ )
492
706
 
493
- # save DTW warping amount
494
- df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
495
- warping_amount(best_match.path)
496
- )
707
+ # save DTW warping amount
708
+ df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
709
+ warping_amount(best_match.path)
710
+ )
497
711
 
498
- # save DTW warping amount
499
- df.loc[df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()] = (
500
- warping_amount(best_match.path) / len(track_df)
501
- )
712
+ df.loc[
713
+ df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()
714
+ ] = warping_amount(best_match.path) / len(track_df)