fucciphase 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fucciphase/__init__.py +7 -1
- fucciphase/__main__.py +12 -0
- fucciphase/fucci_phase.py +123 -53
- fucciphase/io.py +18 -17
- fucciphase/main_cli.py +151 -34
- fucciphase/napari/tracks_to_napari.py +20 -21
- fucciphase/phase.py +350 -137
- fucciphase/plot.py +240 -88
- fucciphase/sensor.py +47 -33
- fucciphase/tracking_utilities.py +70 -9
- fucciphase/utils/__init__.py +14 -1
- fucciphase/utils/checks.py +2 -5
- fucciphase/utils/dtw.py +2 -4
- fucciphase/utils/normalize.py +46 -12
- fucciphase/utils/phase_fit.py +11 -7
- fucciphase/utils/simulator.py +1 -1
- fucciphase/utils/track_postprocessing.py +16 -11
- fucciphase/utils/trackmate.py +30 -13
- fucciphase-0.0.4.dist-info/METADATA +238 -0
- fucciphase-0.0.4.dist-info/RECORD +25 -0
- {fucciphase-0.0.2.dist-info → fucciphase-0.0.4.dist-info}/WHEEL +1 -1
- fucciphase-0.0.2.dist-info/METADATA +0 -137
- fucciphase-0.0.2.dist-info/RECORD +0 -24
- {fucciphase-0.0.2.dist-info → fucciphase-0.0.4.dist-info}/entry_points.txt +0 -0
- {fucciphase-0.0.2.dist-info → fucciphase-0.0.4.dist-info}/licenses/LICENSE +0 -0
fucciphase/phase.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from enum import Enum
|
|
2
|
-
from typing import
|
|
3
|
+
from typing import Literal
|
|
3
4
|
|
|
4
5
|
import dtaidistance.preprocessing
|
|
5
6
|
import numpy as np
|
|
6
7
|
import pandas as pd
|
|
7
8
|
from dtaidistance.dtw import warping_amount
|
|
8
9
|
from dtaidistance.subsequence.dtw import subsequence_alignment
|
|
9
|
-
from scipy import interpolate, stats
|
|
10
|
+
from scipy import interpolate, signal, stats
|
|
10
11
|
|
|
11
12
|
from .sensor import FUCCISensor
|
|
12
13
|
from .utils import (
|
|
@@ -16,6 +17,11 @@ from .utils import (
|
|
|
16
17
|
get_time_distortion_coefficient,
|
|
17
18
|
)
|
|
18
19
|
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Type alias for signal processing mode
|
|
23
|
+
SignalMode = Literal["signal", "derivative", "both"]
|
|
24
|
+
|
|
19
25
|
|
|
20
26
|
class NewColumns(str, Enum):
|
|
21
27
|
"""Columns generated by the analysis.
|
|
@@ -99,45 +105,40 @@ class NewColumns(str, Enum):
|
|
|
99
105
|
|
|
100
106
|
def generate_cycle_phases(
|
|
101
107
|
df: pd.DataFrame,
|
|
102
|
-
channels:
|
|
108
|
+
channels: list[str],
|
|
103
109
|
sensor: FUCCISensor,
|
|
104
|
-
thresholds:
|
|
110
|
+
thresholds: list[float],
|
|
105
111
|
estimate_percentage: bool = False,
|
|
106
112
|
) -> None:
|
|
107
|
-
"""Add
|
|
108
|
-
|
|
109
|
-
The phase is determined using a threshold on the channel intensities
|
|
110
|
-
assuming a FUCCI sensor.
|
|
113
|
+
"""Add cell-cycle phase (and optionally percentage) columns to the dataframe.
|
|
111
114
|
|
|
112
|
-
The
|
|
115
|
+
The phase is determined using thresholds on the normalized channel
|
|
116
|
+
intensities, assuming a FUCCI-like sensor. For each row (spot) in the
|
|
117
|
+
dataframe, this function:
|
|
113
118
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
119
|
+
1. Checks that normalized intensity columns exist for all requested channels.
|
|
120
|
+
2. Uses :func:`estimate_cell_phase_from_max_intensity` to assign a
|
|
121
|
+
discrete phase label based on whether each channel is ON/OFF.
|
|
122
|
+
3. Optionally calls :func:`estimate_cell_cycle_percentage` to estimate
|
|
123
|
+
a continuous cell-cycle percentage from the intensities and the
|
|
124
|
+
discrete phase.
|
|
117
125
|
|
|
118
|
-
The
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
See the examples for more details.
|
|
122
|
-
|
|
123
|
-
The thresholds need to be chosen based on the expected noise of the background and
|
|
124
|
-
uncertainty in intensity computation.
|
|
125
|
-
They give the ratio to the maximum intensity.
|
|
126
|
-
E.g., a threshold of 0.1 means that all intensities below 0.1 times the maximum
|
|
127
|
-
intensity are considered background signal.
|
|
126
|
+
The thresholds per channel must be between 0 and 1 and are interpreted
|
|
127
|
+
as fractions of the maximum intensity in that channel (e.g. 0.1 means
|
|
128
|
+
“10% of max”).
|
|
128
129
|
|
|
129
130
|
Parameters
|
|
130
131
|
----------
|
|
131
|
-
df :
|
|
132
|
-
Dataframe with columns holding normalized intensities
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
thresholds: List[float]
|
|
138
|
-
Thresholds to separate phases
|
|
139
|
-
estimate_percentage: bool
|
|
140
|
-
|
|
132
|
+
df : pandas.DataFrame
|
|
133
|
+
Dataframe with columns holding normalized intensities.
|
|
134
|
+
channels : List[str]
|
|
135
|
+
Names of normalized channels to use for phase estimation.
|
|
136
|
+
sensor : FUCCISensor
|
|
137
|
+
FUCCI sensor with phase-specific information.
|
|
138
|
+
thresholds : List[float]
|
|
139
|
+
Thresholds (0-1) used to separate phases.
|
|
140
|
+
estimate_percentage : bool, optional
|
|
141
|
+
If True, also estimate a continuous cell-cycle percentage.
|
|
141
142
|
|
|
142
143
|
|
|
143
144
|
Raises
|
|
@@ -163,7 +164,7 @@ def generate_cycle_phases(
|
|
|
163
164
|
# check that all channels are present
|
|
164
165
|
check_channels(sensor.fluorophores, channels)
|
|
165
166
|
|
|
166
|
-
# compute phases
|
|
167
|
+
# compute discrete phases based on normalized intensities
|
|
167
168
|
estimate_cell_phase_from_max_intensity(
|
|
168
169
|
df,
|
|
169
170
|
norm_channel_names,
|
|
@@ -172,28 +173,33 @@ def generate_cycle_phases(
|
|
|
172
173
|
thresholds=thresholds,
|
|
173
174
|
)
|
|
174
175
|
|
|
175
|
-
# name of
|
|
176
|
+
# name of phase column
|
|
176
177
|
phase_column = NewColumns.discrete_phase_max()
|
|
177
|
-
# compute percentages
|
|
178
|
+
# optionally compute continuous cell-cycle percentages
|
|
178
179
|
if estimate_percentage:
|
|
179
180
|
estimate_cell_cycle_percentage(df, norm_channel_names, sensor, phase_column)
|
|
180
181
|
|
|
181
182
|
|
|
182
183
|
def estimate_cell_cycle_percentage(
|
|
183
|
-
df: pd.DataFrame, channels:
|
|
184
|
+
df: pd.DataFrame, channels: list[str], sensor: FUCCISensor, phase_column: str
|
|
184
185
|
) -> None:
|
|
185
186
|
"""Estimate cell cycle percentage from intensity pairs.
|
|
186
187
|
|
|
188
|
+
For each row in the dataframe, this function reads the normalized
|
|
189
|
+
intensities in ``channels`` together with the discrete phase label in
|
|
190
|
+
``phase_column`` and queries the sensor for an estimated cell-cycle
|
|
191
|
+
percentage. The result is stored in the ``CELL_CYCLE_PERC`` column.
|
|
192
|
+
|
|
187
193
|
Parameters
|
|
188
194
|
----------
|
|
189
|
-
df :
|
|
190
|
-
Dataframe with columns
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
phase_column: str
|
|
196
|
-
Name of phase
|
|
195
|
+
df : pandas.DataFrame
|
|
196
|
+
Dataframe with normalized intensity columns and a phase column.
|
|
197
|
+
channels : List[str]
|
|
198
|
+
Names of normalized intensity columns for each fluorophore.
|
|
199
|
+
sensor : FUCCISensor
|
|
200
|
+
FUCCI sensor used to map intensities and phase to cycle percentage.
|
|
201
|
+
phase_column : str
|
|
202
|
+
Name of the column storing discrete phase labels.
|
|
197
203
|
"""
|
|
198
204
|
percentages = []
|
|
199
205
|
# iterate through data frame
|
|
@@ -210,39 +216,39 @@ def estimate_cell_cycle_percentage(
|
|
|
210
216
|
|
|
211
217
|
def estimate_cell_phase_from_max_intensity(
|
|
212
218
|
df: pd.DataFrame,
|
|
213
|
-
channels:
|
|
219
|
+
channels: list[str],
|
|
214
220
|
sensor: FUCCISensor,
|
|
215
|
-
background:
|
|
216
|
-
thresholds:
|
|
221
|
+
background: list[float],
|
|
222
|
+
thresholds: list[float],
|
|
217
223
|
) -> None:
|
|
218
|
-
"""
|
|
219
|
-
cycle, where the phase is determined by thresholding the channel intensities.
|
|
224
|
+
"""Estimate discrete cell-cycle phase by thresholding normalized intensities.
|
|
220
225
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
226
|
+
For each channel, the background value is subtracted from the mean
|
|
227
|
+
intensity. The resulting intensities are normalized by the maximum
|
|
228
|
+
mean intensity observed in that channel. A channel is considered ON
|
|
229
|
+
if its normalized intensity exceeds the corresponding threshold.
|
|
230
|
+
|
|
231
|
+
The ON/OFF pattern across channels is then mapped to a discrete phase
|
|
232
|
+
using the sensor model.
|
|
228
233
|
|
|
229
234
|
Parameters
|
|
230
235
|
----------
|
|
231
|
-
df:
|
|
232
|
-
Dataframe
|
|
233
|
-
channels: List[str]
|
|
234
|
-
Names of
|
|
235
|
-
sensor: FUCCISensor
|
|
236
|
-
FUCCI sensor with
|
|
237
|
-
background: List[float]
|
|
238
|
-
Single value per channel
|
|
239
|
-
thresholds: List[float]
|
|
240
|
-
Thresholds to separate phases
|
|
236
|
+
df : pandas.DataFrame
|
|
237
|
+
Dataframe containing the normalized intensity columns.
|
|
238
|
+
channels : List[str]
|
|
239
|
+
Names of normalized intensity columns.
|
|
240
|
+
sensor : FUCCISensor
|
|
241
|
+
FUCCI sensor with phase analysis information.
|
|
242
|
+
background : List[float]
|
|
243
|
+
Single background value per channel.
|
|
244
|
+
thresholds : List[float]
|
|
245
|
+
Thresholds (0-1) used to separate phases.
|
|
241
246
|
|
|
242
247
|
Raises
|
|
243
248
|
------
|
|
244
249
|
ValueError
|
|
245
|
-
If
|
|
250
|
+
If required channels are missing or if background/threshold lists
|
|
251
|
+
are inconsistent with the number of channels.
|
|
246
252
|
"""
|
|
247
253
|
# sanity check: check that channels are present
|
|
248
254
|
for channel in channels:
|
|
@@ -257,8 +263,10 @@ def estimate_cell_phase_from_max_intensity(
|
|
|
257
263
|
check_channels(sensor.fluorophores, channels)
|
|
258
264
|
check_thresholds(sensor.fluorophores, thresholds)
|
|
259
265
|
|
|
260
|
-
phase_markers_list:
|
|
261
|
-
for channel, bg_value, threshold in zip(
|
|
266
|
+
phase_markers_list: list[pd.Series[bool]] = []
|
|
267
|
+
for channel, bg_value, threshold in zip(
|
|
268
|
+
channels, background, thresholds, strict=True
|
|
269
|
+
):
|
|
262
270
|
# get intensities and subtract background
|
|
263
271
|
intensity = df[channel] - bg_value
|
|
264
272
|
# threshold channels to decide if ON / OFF (data is in list per spot)
|
|
@@ -275,10 +283,10 @@ def estimate_cell_phase_from_max_intensity(
|
|
|
275
283
|
|
|
276
284
|
def estimate_cell_phase_from_background(
|
|
277
285
|
df: pd.DataFrame,
|
|
278
|
-
channels:
|
|
286
|
+
channels: list[str],
|
|
279
287
|
sensor: FUCCISensor,
|
|
280
|
-
background:
|
|
281
|
-
thresholds:
|
|
288
|
+
background: list[float],
|
|
289
|
+
thresholds: list[float],
|
|
282
290
|
) -> None:
|
|
283
291
|
"""Add a column in place to the dataframe with the estimated phase of the cell
|
|
284
292
|
cycle, where the phase is determined by comparing the channel intensities to
|
|
@@ -319,8 +327,10 @@ def estimate_cell_phase_from_background(
|
|
|
319
327
|
|
|
320
328
|
check_channels(sensor.fluorophores, channels)
|
|
321
329
|
|
|
322
|
-
phase_markers_list:
|
|
323
|
-
for channel, bg_value, threshold in zip(
|
|
330
|
+
phase_markers_list: list[pd.Series[bool]] = []
|
|
331
|
+
for channel, bg_value, threshold in zip(
|
|
332
|
+
channels, background, thresholds, strict=True
|
|
333
|
+
):
|
|
324
334
|
intensity = df[channel]
|
|
325
335
|
# threshold channels to decide if ON / OFF (data is in list per spot)
|
|
326
336
|
phase_markers_list.append(intensity > threshold * bg_value)
|
|
@@ -333,18 +343,159 @@ def estimate_cell_phase_from_background(
|
|
|
333
343
|
df[NewColumns.discrete_phase_bg()] = pd.Series(phase_names, dtype=str) # add as str
|
|
334
344
|
|
|
335
345
|
|
|
346
|
+
def _process_channel(
|
|
347
|
+
series: np.ndarray,
|
|
348
|
+
signal_mode: SignalMode,
|
|
349
|
+
smooth: float,
|
|
350
|
+
channel_name: str = "",
|
|
351
|
+
signal_smooth: int = 0,
|
|
352
|
+
) -> list[np.ndarray]:
|
|
353
|
+
"""Process a single channel according to the signal mode.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
series : np.ndarray
|
|
358
|
+
The input signal array.
|
|
359
|
+
signal_mode : SignalMode
|
|
360
|
+
Processing mode: "signal", "derivative", or "both".
|
|
361
|
+
smooth : float
|
|
362
|
+
Smoothing factor for differencing (removes high frequencies).
|
|
363
|
+
channel_name : str, optional
|
|
364
|
+
Channel name for warning messages.
|
|
365
|
+
signal_smooth : int, optional
|
|
366
|
+
Window size for signal smoothing (Savitzky-Golay filter with polyorder=3).
|
|
367
|
+
0 means no smoothing. Must be > 3 if used.
|
|
368
|
+
Only applies when signal_mode is "signal" or "both".
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
list[np.ndarray]
|
|
373
|
+
List of processed arrays. Length 1 for "signal" or "derivative",
|
|
374
|
+
length 2 for "both" (signal first, then derivative).
|
|
375
|
+
"""
|
|
376
|
+
results = []
|
|
377
|
+
|
|
378
|
+
if signal_mode in ("signal", "both"):
|
|
379
|
+
smoothed_signal = series.copy()
|
|
380
|
+
if signal_smooth > 3:
|
|
381
|
+
smoothed_signal = signal.savgol_filter(
|
|
382
|
+
series, window_length=signal_smooth, polyorder=3, mode="nearest"
|
|
383
|
+
)
|
|
384
|
+
elif signal_smooth > 0:
|
|
385
|
+
logger.warning(
|
|
386
|
+
"signal_smooth=%d is too small (must be > 3), skipping smoothing",
|
|
387
|
+
signal_smooth,
|
|
388
|
+
)
|
|
389
|
+
results.append(smoothed_signal)
|
|
390
|
+
|
|
391
|
+
if signal_mode in ("derivative", "both"):
|
|
392
|
+
try:
|
|
393
|
+
diff = dtaidistance.preprocessing.differencing(series, smooth=smooth)
|
|
394
|
+
except ValueError:
|
|
395
|
+
if channel_name:
|
|
396
|
+
logger.warning(
|
|
397
|
+
"Smoothing failed for channel %s, continuing without smoothing",
|
|
398
|
+
channel_name,
|
|
399
|
+
)
|
|
400
|
+
diff = dtaidistance.preprocessing.differencing(series)
|
|
401
|
+
results.append(diff)
|
|
402
|
+
|
|
403
|
+
return results
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _compute_both_mode_scale_factor(processed_series: list[np.ndarray]) -> float:
|
|
407
|
+
"""Compute scale factor to balance signal and derivative contributions.
|
|
408
|
+
|
|
409
|
+
In "both" mode, signals and derivatives may have different magnitudes.
|
|
410
|
+
This function computes a scale factor to apply to signals so they
|
|
411
|
+
contribute equally to the DTW distance.
|
|
412
|
+
|
|
413
|
+
Parameters
|
|
414
|
+
----------
|
|
415
|
+
processed_series : list[np.ndarray]
|
|
416
|
+
List of processed arrays in order:
|
|
417
|
+
[signal_ch1, deriv_ch1, signal_ch2, deriv_ch2, ...]
|
|
418
|
+
|
|
419
|
+
Returns
|
|
420
|
+
-------
|
|
421
|
+
float
|
|
422
|
+
Scale factor to multiply signals by. Returns 1.0 if derivatives have zero std.
|
|
423
|
+
"""
|
|
424
|
+
# In "both" mode, signals are at even indices, derivatives at odd indices
|
|
425
|
+
signals = [processed_series[i] for i in range(0, len(processed_series), 2)]
|
|
426
|
+
derivatives = [processed_series[i] for i in range(1, len(processed_series), 2)]
|
|
427
|
+
|
|
428
|
+
signal_std = np.mean([np.std(s) for s in signals])
|
|
429
|
+
deriv_std = np.mean([np.std(d) for d in derivatives])
|
|
430
|
+
|
|
431
|
+
if signal_std == 0:
|
|
432
|
+
return 1.0
|
|
433
|
+
return deriv_std / signal_std # type: ignore[no-any-return]
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _apply_both_mode_scaling(
|
|
437
|
+
processed_series: list[np.ndarray], scale_factor: float
|
|
438
|
+
) -> list[np.ndarray]:
|
|
439
|
+
"""Apply scale factor to signal features in "both" mode.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
processed_series : list[np.ndarray]
|
|
444
|
+
List of processed arrays in order:
|
|
445
|
+
[signal_ch1, deriv_ch1, signal_ch2, deriv_ch2, ...]
|
|
446
|
+
scale_factor : float
|
|
447
|
+
Scale factor to multiply signals by.
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
list[np.ndarray]
|
|
452
|
+
Scaled processed series with signals multiplied by scale_factor.
|
|
453
|
+
"""
|
|
454
|
+
scaled = []
|
|
455
|
+
for i, arr in enumerate(processed_series):
|
|
456
|
+
if i % 2 == 0: # Signal (even index)
|
|
457
|
+
scaled.append(arr * scale_factor)
|
|
458
|
+
else: # Derivative (odd index)
|
|
459
|
+
scaled.append(arr)
|
|
460
|
+
return scaled
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _compute_output_length_offset(signal_mode: SignalMode) -> int:
|
|
464
|
+
"""Return the offset to add to query length for output array size.
|
|
465
|
+
|
|
466
|
+
When using derivatives, the output is 1 element shorter, so we need
|
|
467
|
+
to add 1 to get back to the original track length.
|
|
468
|
+
|
|
469
|
+
Parameters
|
|
470
|
+
----------
|
|
471
|
+
signal_mode : SignalMode
|
|
472
|
+
The signal processing mode.
|
|
473
|
+
|
|
474
|
+
Returns
|
|
475
|
+
-------
|
|
476
|
+
int
|
|
477
|
+
Offset to add: 1 if derivative is used, 0 otherwise.
|
|
478
|
+
"""
|
|
479
|
+
if signal_mode in ("derivative", "both"):
|
|
480
|
+
return 1
|
|
481
|
+
return 0
|
|
482
|
+
|
|
483
|
+
|
|
336
484
|
# flake8: noqa: C901
|
|
337
485
|
def estimate_percentage_by_subsequence_alignment(
|
|
338
486
|
df: pd.DataFrame,
|
|
339
487
|
dt: float,
|
|
340
|
-
channels:
|
|
488
|
+
channels: list[str],
|
|
341
489
|
reference_data: pd.DataFrame,
|
|
342
490
|
smooth: float = 0.1,
|
|
343
491
|
penalty: float = 0.05,
|
|
344
492
|
track_id_name: str = "TRACK_ID",
|
|
345
493
|
minimum_track_length: int = 10,
|
|
346
494
|
use_zscore_norm: bool = True,
|
|
347
|
-
|
|
495
|
+
signal_mode: SignalMode = "derivative",
|
|
496
|
+
signal_weight: float = 1.0,
|
|
497
|
+
signal_smooth: int = 0,
|
|
498
|
+
use_derivative: bool | None = None,
|
|
348
499
|
) -> None:
|
|
349
500
|
"""Use subsequence alignment to estimate percentage.
|
|
350
501
|
|
|
@@ -359,7 +510,7 @@ def estimate_percentage_by_subsequence_alignment(
|
|
|
359
510
|
reference_data: pd.DataFrame
|
|
360
511
|
Containing reference intensities over time
|
|
361
512
|
smooth: float
|
|
362
|
-
Smoothing factor
|
|
513
|
+
Smoothing factor for derivative (removes high frequencies, 0-0.5)
|
|
363
514
|
penalty: float
|
|
364
515
|
Penalty for DTW algorithm, enforces diagonal warping path
|
|
365
516
|
track_id_name: str
|
|
@@ -370,10 +521,34 @@ def estimate_percentage_by_subsequence_alignment(
|
|
|
370
521
|
Use z-score normalization before differencing curves
|
|
371
522
|
Probably not needed if intensities of reference and measured
|
|
372
523
|
curve are similar
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
524
|
+
signal_mode: SignalMode
|
|
525
|
+
Signal processing mode:
|
|
526
|
+
- "signal": use raw signal only
|
|
527
|
+
- "derivative": use derivative only (default, for baseline independence)
|
|
528
|
+
- "both": use both signal and derivative as features
|
|
529
|
+
signal_weight: float
|
|
530
|
+
Weight for signal relative to derivative in "both" mode.
|
|
531
|
+
Default 1.0 means equal contribution. Values > 1.0 weight signal
|
|
532
|
+
higher, values < 1.0 weight derivative higher. Ignored for other modes.
|
|
533
|
+
signal_smooth: int
|
|
534
|
+
Window size for signal smoothing (Savitzky-Golay filter, polyorder=3).
|
|
535
|
+
0 means no smoothing. Must be > 3 if used.
|
|
536
|
+
Only applies in "signal" or "both" modes.
|
|
537
|
+
use_derivative: bool | None
|
|
538
|
+
Deprecated. Use signal_mode instead. If provided, overrides signal_mode
|
|
539
|
+
for backward compatibility (True -> "derivative", False -> "signal").
|
|
376
540
|
"""
|
|
541
|
+
# Handle backward compatibility with use_derivative parameter
|
|
542
|
+
if use_derivative is not None:
|
|
543
|
+
import warnings
|
|
544
|
+
|
|
545
|
+
warnings.warn(
|
|
546
|
+
"use_derivative is deprecated, use signal_mode instead",
|
|
547
|
+
DeprecationWarning,
|
|
548
|
+
stacklevel=2,
|
|
549
|
+
)
|
|
550
|
+
signal_mode = "derivative" if use_derivative else "signal"
|
|
551
|
+
|
|
377
552
|
if "time" not in reference_data:
|
|
378
553
|
raise ValueError("Need to provide time column in reference_data.")
|
|
379
554
|
if "percentage" not in reference_data:
|
|
@@ -395,33 +570,48 @@ def estimate_percentage_by_subsequence_alignment(
|
|
|
395
570
|
|
|
396
571
|
num_time = int(time_scale[-1] / dt)
|
|
397
572
|
new_time_scale = np.linspace(0, dt * num_time, num=num_time + 1)
|
|
398
|
-
|
|
573
|
+
actual_dt = new_time_scale[1] - new_time_scale[0]
|
|
574
|
+
if not np.isclose(dt, actual_dt):
|
|
575
|
+
raise ValueError(
|
|
576
|
+
f"Time scale mismatch: requested dt={dt}, but computed dt={actual_dt}. "
|
|
577
|
+
"Check that the reference data time scale is compatible with "
|
|
578
|
+
"the requested timestep."
|
|
579
|
+
)
|
|
399
580
|
|
|
400
581
|
# reference curve in time scale of provided track
|
|
401
582
|
percentage_ref = f_percentage(new_time_scale)
|
|
402
583
|
|
|
403
|
-
|
|
584
|
+
processed_series = []
|
|
404
585
|
for channel in channels:
|
|
405
586
|
series = interpolation_functions[channel](new_time_scale)
|
|
406
587
|
if use_zscore_norm:
|
|
407
588
|
series = stats.zscore(series)
|
|
408
|
-
# if all values are the same, we zero to
|
|
589
|
+
# if all values are the same, we zero to avoid numerical issues
|
|
409
590
|
if np.all(np.isnan(series)):
|
|
410
|
-
series =
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
591
|
+
series = np.zeros_like(series)
|
|
592
|
+
|
|
593
|
+
channel_features = _process_channel(
|
|
594
|
+
series, signal_mode, smooth, channel, signal_smooth
|
|
595
|
+
)
|
|
596
|
+
processed_series.extend(channel_features)
|
|
597
|
+
|
|
598
|
+
# For "both" mode, trim signal features to match derivative length and scale
|
|
599
|
+
both_mode_scale_factor = 1.0
|
|
600
|
+
if signal_mode == "both":
|
|
601
|
+
min_len = min(len(s) for s in processed_series)
|
|
602
|
+
processed_series = [s[-min_len:] for s in processed_series]
|
|
603
|
+
# Also trim the percentage reference to match
|
|
604
|
+
percentage_ref = percentage_ref[-min_len:]
|
|
605
|
+
# Compute and apply scaling to balance signal and derivative contributions
|
|
606
|
+
# signal_weight > 1.0 weights signal higher relative to derivative
|
|
607
|
+
both_mode_scale_factor = (
|
|
608
|
+
_compute_both_mode_scale_factor(processed_series) * signal_weight
|
|
609
|
+
)
|
|
610
|
+
processed_series = _apply_both_mode_scaling(
|
|
611
|
+
processed_series, both_mode_scale_factor
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
series = np.array(processed_series)
|
|
425
615
|
series = np.swapaxes(series, 0, 1)
|
|
426
616
|
|
|
427
617
|
df.loc[:, NewColumns.cell_cycle_dtw()] = np.nan
|
|
@@ -441,37 +631,48 @@ def estimate_percentage_by_subsequence_alignment(
|
|
|
441
631
|
# find percentages if track is long enough
|
|
442
632
|
queries = track_df[channels].to_numpy()
|
|
443
633
|
|
|
444
|
-
|
|
634
|
+
processed_queries = []
|
|
445
635
|
for idx in range(len(channels)):
|
|
636
|
+
query_series = queries[:, idx].copy()
|
|
446
637
|
if use_zscore_norm:
|
|
447
|
-
|
|
448
|
-
# if all values are the same, we zero to
|
|
449
|
-
if np.all(np.isnan(
|
|
450
|
-
|
|
451
|
-
if use_derivative:
|
|
452
|
-
diff_ch = dtaidistance.preprocessing.differencing(
|
|
453
|
-
queries[:, idx], smooth=smooth
|
|
454
|
-
)
|
|
455
|
-
else:
|
|
456
|
-
diff_ch = queries[:, idx]
|
|
457
|
-
queries_diff.append(diff_ch)
|
|
638
|
+
query_series = stats.zscore(query_series)
|
|
639
|
+
# if all values are the same, we zero to avoid numerical issues
|
|
640
|
+
if np.all(np.isnan(query_series)):
|
|
641
|
+
query_series = np.zeros_like(query_series)
|
|
458
642
|
|
|
459
|
-
|
|
643
|
+
channel_features = _process_channel(
|
|
644
|
+
query_series, signal_mode, smooth, signal_smooth=signal_smooth
|
|
645
|
+
)
|
|
646
|
+
processed_queries.extend(channel_features)
|
|
647
|
+
|
|
648
|
+
# For "both" mode, trim signal features to match derivative length and scale
|
|
649
|
+
if signal_mode == "both":
|
|
650
|
+
min_len = min(len(q) for q in processed_queries)
|
|
651
|
+
processed_queries = [q[-min_len:] for q in processed_queries]
|
|
652
|
+
# Apply same scale factor as reference to ensure consistent weighting
|
|
653
|
+
processed_queries = _apply_both_mode_scaling(
|
|
654
|
+
processed_queries, both_mode_scale_factor
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
query = np.array(processed_queries)
|
|
460
658
|
query = np.swapaxes(query, 0, 1)
|
|
461
659
|
|
|
462
660
|
sa = subsequence_alignment(query, series, penalty=penalty)
|
|
463
661
|
best_match = sa.best_match()
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
new_percentage[
|
|
470
|
-
if p[1] + 1 < len(percentage_ref):
|
|
471
|
-
last_percentage = p[1] + 1
|
|
662
|
+
length_offset = _compute_output_length_offset(signal_mode)
|
|
663
|
+
new_percentage = np.zeros(query.shape[0] + length_offset)
|
|
664
|
+
|
|
665
|
+
# Handle empty path case
|
|
666
|
+
if len(best_match.path) == 0:
|
|
667
|
+
new_percentage[:] = np.nan
|
|
472
668
|
else:
|
|
473
|
-
|
|
474
|
-
|
|
669
|
+
for p in best_match.path:
|
|
670
|
+
new_percentage[p[0]] = percentage_ref[p[1]]
|
|
671
|
+
if p[1] + 1 < len(percentage_ref):
|
|
672
|
+
last_percentage = p[1] + 1
|
|
673
|
+
else:
|
|
674
|
+
last_percentage = p[1]
|
|
675
|
+
new_percentage[-1] = percentage_ref[last_percentage]
|
|
475
676
|
# save estimated cell cycle percentages
|
|
476
677
|
df.loc[df[track_id_name] == track_id, NewColumns.cell_cycle_dtw()] = (
|
|
477
678
|
new_percentage[:]
|
|
@@ -481,21 +682,33 @@ def estimate_percentage_by_subsequence_alignment(
|
|
|
481
682
|
best_match.value
|
|
482
683
|
)
|
|
483
684
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
685
|
+
# Handle empty path case for DTW metrics
|
|
686
|
+
if len(best_match.path) == 0:
|
|
687
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = np.nan
|
|
688
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
|
|
689
|
+
np.nan
|
|
690
|
+
)
|
|
691
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
|
|
692
|
+
np.nan
|
|
693
|
+
)
|
|
694
|
+
df.loc[
|
|
695
|
+
df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()
|
|
696
|
+
] = np.nan
|
|
697
|
+
else:
|
|
698
|
+
_, distortion_score, _, _ = get_time_distortion_coefficient(best_match.path)
|
|
699
|
+
# save DTW distortion
|
|
700
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = (
|
|
701
|
+
distortion_score
|
|
702
|
+
)
|
|
703
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
|
|
704
|
+
distortion_score / len(track_df)
|
|
705
|
+
)
|
|
492
706
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
707
|
+
# save DTW warping amount
|
|
708
|
+
df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
|
|
709
|
+
warping_amount(best_match.path)
|
|
710
|
+
)
|
|
497
711
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
warping_amount(best_match.path) / len(track_df)
|
|
501
|
-
)
|
|
712
|
+
df.loc[
|
|
713
|
+
df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()
|
|
714
|
+
] = warping_amount(best_match.path) / len(track_df)
|