PyPI - fucciphase - Versions diffs - 0.0.1__py3-none-any.whl - Mend

fucciphase 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

fucciphase/__init__.py +12 -0
fucciphase/fucci_phase.py +178 -0
fucciphase/io.py +67 -0
fucciphase/napari/__init__.py +5 -0
fucciphase/napari/tracks_to_napari.py +117 -0
fucciphase/phase.py +501 -0
fucciphase/plot.py +548 -0
fucciphase/py.typed +5 -0
fucciphase/sensor.py +454 -0
fucciphase/tracking_utilities.py +81 -0
fucciphase/utils/__init__.py +35 -0
fucciphase/utils/checks.py +16 -0
fucciphase/utils/dtw.py +59 -0
fucciphase/utils/normalize.py +202 -0
fucciphase/utils/phase_fit.py +47 -0
fucciphase/utils/simulator.py +85 -0
fucciphase/utils/track_postprocessing.py +454 -0
fucciphase/utils/trackmate.py +295 -0
fucciphase-0.0.1.dist-info/METADATA +137 -0
fucciphase-0.0.1.dist-info/RECORD +22 -0
fucciphase-0.0.1.dist-info/WHEEL +4 -0
fucciphase-0.0.1.dist-info/licenses/LICENSE +29 -0

fucciphase/phase.py ADDED Viewed

@@ -0,0 +1,501 @@
+from enum import Enum
+from typing import List
+import dtaidistance.preprocessing
+import numpy as np
+import pandas as pd
+from dtaidistance.dtw import warping_amount
+from dtaidistance.subsequence.dtw import subsequence_alignment
+from scipy import interpolate, stats
+from .sensor import FUCCISensor
+from .utils import (
+    check_channels,
+    check_thresholds,
+    get_norm_channel_name,
+    get_time_distortion_coefficient,
+)
+class NewColumns(str, Enum):
+    """Columns generated by the analysis.
+    Attributes
+    ----------
+    CELL_CYCLE_PERC : str
+        Unique cell cycle percentage value
+    PHASE : str
+        Phase of the cell cycle
+    """
+    CELL_CYCLE_PERC_DTW = "CELL_CYCLE_PERC_DTW"
+    CELL_CYCLE_PERC = "CELL_CYCLE_PERC"
+    PHASE = "PHASE"
+    DISCRETE_PHASE_MAX = "DISCRETE_PHASE_MAX"
+    DISCRETE_PHASE_BG = "DISCRETE_PHASE_BG"
+    DISCRETE_PHASE_DIFF = "DISCRETE_PHASE_DIFF"
+    DTW_DISTORTION = "DTW_DISTORTION"
+    DTW_DISTORTION_REL = "DTW_DISTORTION_REL"
+    DTW_DISTANCE = "DTW_DISTANCE"
+    DTW_WARPING = "DTW_WARP"
+    REL_DTW_WARPING = "DTW_WARP_REL"
+    @staticmethod
+    def cell_cycle() -> str:
+        """Return the name of the unique intensity column."""
+        return NewColumns.CELL_CYCLE_PERC.value
+    @staticmethod
+    def phase() -> str:
+        """Return the name of the phase column."""
+        return NewColumns.PHASE.value
+    @staticmethod
+    def cell_cycle_dtw() -> str:
+        """Return the name of the cell cycle percentage column."""
+        return NewColumns.CELL_CYCLE_PERC_DTW.value
+    @staticmethod
+    def discrete_phase_max() -> str:
+        """Return the name of the discrete phase column."""
+        return NewColumns.DISCRETE_PHASE_MAX.value
+    @staticmethod
+    def discrete_phase_bg() -> str:
+        """Return the name of the discrete phase column."""
+        return NewColumns.DISCRETE_PHASE_BG.value
+    @staticmethod
+    def discrete_phase_diff() -> str:
+        """Return the name of the discrete phase column."""
+        return NewColumns.DISCRETE_PHASE_DIFF.value
+    @staticmethod
+    def dtw_distortion() -> str:
+        """Return the name of the DTW distortion."""
+        return NewColumns.DTW_DISTORTION.value
+    @staticmethod
+    def dtw_distortion_norm() -> str:
+        """Return the name of the DTW distortion."""
+        return NewColumns.DTW_DISTORTION_REL.value
+    @staticmethod
+    def dtw_distance() -> str:
+        """Return the name of the DTW distance."""
+        return NewColumns.DTW_DISTANCE.value
+    @staticmethod
+    def dtw_warping_amount() -> str:
+        """Return the name of the DTW warping amount."""
+        return NewColumns.DTW_WARPING.value
+    @staticmethod
+    def rel_dtw_warping_amount() -> str:
+        """Return the name of the relative DTW warping amount."""
+        return NewColumns.REL_DTW_WARPING.value
+def generate_cycle_phases(
+    df: pd.DataFrame,
+    channels: List[str],
+    sensor: FUCCISensor,
+    thresholds: List[float],
+    estimate_percentage: bool = False,
+) -> None:
+    """Add a column in place to the dataframe with the phase of the cell cycle.
+    The phase is determined using a threshold on the channel intensities
+    assuming a FUCCI sensor.
+    The thresholds per channel must be between 0 and 1.
+    Example:
+        channels = ["CH1", "CH2"]
+        thresholds = [0.1, 0.1]
+    The sensor needs to be calibrated for each cell line.
+    For that, record the FUCCI intensities of multiple cell cycles
+    by live-cell fluorescence microscopy.
+    See the examples for more details.
+    The thresholds need to be chosen based on the expected noise of the background and
+    uncertainty in intensity computation.
+    They give the ratio to the maximum intensity.
+    E.g., a threshold of 0.1 means that all intensities below 0.1 times the maximum
+    intensity are considered background signal.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Dataframe with columns holding normalized intensities
+    sensor: FUCCISensor
+        FUCCI sensor with phase specifics
+    channels: List[str]
+        Names of channels
+    thresholds: List[float]
+        Thresholds to separate phases
+    estimate_percentage: bool
+        Estimate cell cycle percentages
+    Raises
+    ------
+    ValueError
+        If the number of thresholds is not 2
+    ValueError
+        If the phases are not unique
+    ValueError
+        If the thresholds are not between 0 and 1, one excluded
+    """
+    # sanity check: check that the normalized channels are present
+    norm_channel_names = []
+    for channel in channels:
+        norm_channel_name = get_norm_channel_name(channel)
+        if norm_channel_name not in df.columns:
+            raise ValueError(
+                f"Column {get_norm_channel_name(channel)} not found, call "
+                f"normalize_channel({channel}) on the dataframe."
+            )
+        norm_channel_names.append(norm_channel_name)
+    # check that all channels are present
+    check_channels(sensor.fluorophores, channels)
+    # compute phases
+    estimate_cell_phase_from_max_intensity(
+        df,
+        norm_channel_names,
+        sensor,
+        background=[0] * sensor.fluorophores,
+        thresholds=thresholds,
+    )
+    # name of phase_column
+    phase_column = NewColumns.discrete_phase_max()
+    # compute percentages
+    if estimate_percentage:
+        estimate_cell_cycle_percentage(df, norm_channel_names, sensor, phase_column)
+def estimate_cell_cycle_percentage(
+    df: pd.DataFrame, channels: List[str], sensor: FUCCISensor, phase_column: str
+) -> None:
+    """Estimate cell cycle percentage from intensity pairs.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Dataframe with columns holding normalized intensities
+    sensor: FUCCISensor
+        FUCCI sensor with phase specifics
+    channels: List[str]
+        Names of channels
+    phase_column: str
+        Name of phase column
+    """
+    percentages = []
+    # iterate through data frame
+    for _, row in df.iterrows():
+        intensities = [row[channel] for channel in channels]
+        phase = row[phase_column]
+        percentage = sensor.get_estimated_cycle_percentage(phase, intensities)
+        percentages.append(percentage)
+    # TODO add inplace to dataframe
+    # df[NewColumns.cell_cycle()] = pd.Series(percentages, dtype=float)
+    df[NewColumns.cell_cycle()] = percentages
+def estimate_cell_phase_from_max_intensity(
+    df: pd.DataFrame,
+    channels: List[str],
+    sensor: FUCCISensor,
+    background: List[float],
+    thresholds: List[float],
+) -> None:
+    """Add a column in place to the dataframe with the estimated phase of the cell
+    cycle, where the phase is determined by thresholding the channel intensities.
+    The provided thresholds are used to decide if a channel is switched on (ON).
+    For that, the background is subtracted from the mean intensity.
+    The obtained values are normalized w.r.t. the maximum mean intensity in the
+    respective channel available in the DataFrame.
+    Hence, the threshold values should be between 0 and 1.
+    This method will not work reliably if not enough cells from different phases
+    are contained in the DataFrame.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        Dataframe with a CELL_CYCLE_PERC column
+    channels: List[str]
+        Names of channels
+    sensor: FUCCISensor
+        FUCCI sensor with specific phase analysis information
+    background: List[float]
+        Single value per channel representing background
+    thresholds: List[float]
+        Thresholds to separate phases
+    Raises
+    ------
+    ValueError
+        If the dataframe does not contain the normalized channels.
+    """
+    # sanity check: check that channels are present
+    for channel in channels:
+        if channel not in df.columns:
+            raise ValueError(
+                f"Column {channel} not found, provide correct input parameters."
+            )
+    if len(channels) != len(background):
+        raise ValueError("Provide one background value per channel.")
+    check_channels(sensor.fluorophores, channels)
+    check_thresholds(sensor.fluorophores, thresholds)
+    phase_markers_list: List[pd.Series[bool]] = []
+    for channel, bg_value, threshold in zip(channels, background, thresholds):
+        # get intensities and subtract background
+        intensity = df[channel] - bg_value
+        # threshold channels to decide if ON / OFF (data is in list per spot)
+        phase_markers_list.append(intensity > threshold * intensity.max())
+    phase_markers_list_tilted = np.array(phase_markers_list).T
+    # store phases
+    phase_names = []
+    for phase_markers in phase_markers_list_tilted:
+        phase_names.append(sensor.get_phase(phase_markers))
+    # TODO check pd.Series issue
+    df[NewColumns.discrete_phase_max()] = phase_names
+def estimate_cell_phase_from_background(
+    df: pd.DataFrame,
+    channels: List[str],
+    sensor: FUCCISensor,
+    background: List[float],
+    thresholds: List[float],
+) -> None:
+    """Add a column in place to the dataframe with the estimated phase of the cell
+    cycle, where the phase is determined by comparing the channel intensities to
+    the respective background intensities.
+    The provided factors are used to decide if a channel is switched on (ON).
+    If the intensity exceeds the background level times the factor, the channel
+    is ON. Hence, the factors should be greater than 0.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        Dataframe with a CELL_CYCLE_PERC column
+    channels: List[str]
+        Names of channels
+    sensor: FUCCISensor
+        FUCCI sensor with specific phase analysis information
+    background: List[float]
+        Single value per channel representing background
+    thresholds: List[float]
+        Thresholds to separate phases
+    Raises
+    ------
+    ValueError
+        If the dataframe does not contain the normalized channels.
+    """
+    # sanity check: check that channels are present
+    for channel in channels:
+        if channel not in df.columns:
+            raise ValueError(
+                f"Column {channel} not found, provide correct input parameters."
+            )
+    if len(channels) != len(background):
+        raise ValueError("Provide one background value per channel.")
+    check_channels(sensor.fluorophores, channels)
+    phase_markers_list: List[pd.Series[bool]] = []
+    for channel, bg_value, threshold in zip(channels, background, thresholds):
+        intensity = df[channel]
+        # threshold channels to decide if ON / OFF (data is in list per spot)
+        phase_markers_list.append(intensity > threshold * bg_value)
+    phase_markers_list_tilted = np.array(phase_markers_list).T
+    # store phases
+    phase_names = []
+    for phase_markers in phase_markers_list_tilted:
+        phase_names.append(sensor.get_phase(phase_markers))
+    df[NewColumns.discrete_phase_bg()] = pd.Series(phase_names, dtype=str)  # add as str
+# flake8: noqa: C901
+def estimate_percentage_by_subsequence_alignment(
+    df: pd.DataFrame,
+    dt: float,
+    channels: List[str],
+    reference_data: pd.DataFrame,
+    smooth: float = 0.1,
+    penalty: float = 0.05,
+    track_id_name: str = "TRACK_ID",
+    minimum_track_length: int = 10,
+    use_zscore_norm: bool = True,
+    use_derivative: bool = True,
+) -> None:
+    """Use subsequence alignment to estimate percentage.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        DataFrame with tracks
+    dt: float
+        Timestep between frames in hours
+    channels: List[str]
+        List of channels to be matched with reference data
+    reference_data: pd.DataFrame
+        Containing reference intensities over time
+    smooth: float
+        Smoothing factor, see dtaidistance documentation
+    penalty: float
+        Penalty for DTW algorithm, enforces diagonal warping path
+    track_id_name: str
+        Name of column with track IDs
+    minimum_track_length: int
+        Only estimate phase for tracks longer than this
+    use_zscore_norm: bool
+        Use z-score normalization before differencing curves
+        Probably not needed if intensities of reference and measured
+        curve are similar
+    use_derivative: bool
+        Take derivative to perform alignment independent of intensity
+        baseline (in default mode also after normalization)
+    """
+    if "time" not in reference_data:
+        raise ValueError("Need to provide time column in reference_data.")
+    if "percentage" not in reference_data:
+        raise ValueError("Need to provide percentage column in reference_data.")
+    if not set(channels).issubset(reference_data.columns):
+        raise ValueError("Provide channel names in reference_data.")
+    # interpolate reference curve
+    time_scale = reference_data["time"].to_numpy()
+    interpolation_functions = {}
+    for channel in channels:
+        interpolation_functions[channel] = interpolate.interp1d(
+            time_scale, reference_data[channel].to_numpy()
+        )
+    f_percentage = interpolate.interp1d(
+        time_scale, reference_data["percentage"].to_numpy()
+    )
+    num_time = int(time_scale[-1] / dt)
+    new_time_scale = np.linspace(0, dt * num_time, num=num_time + 1)
+    assert np.isclose(dt, new_time_scale[1] - new_time_scale[0])
+    # reference curve in time scale of provided track
+    percentage_ref = f_percentage(new_time_scale)
+    series_diff = []
+    for channel in channels:
+        series = interpolation_functions[channel](new_time_scale)
+        if use_zscore_norm:
+            series = stats.zscore(series)
+        # if all values are the same, we zero to not numerical issues
+        if np.all(np.isnan(series)):
+            series = 0.0
+        if use_derivative:
+            try:
+                diff_ch = dtaidistance.preprocessing.differencing(series, smooth=smooth)
+            except ValueError:
+                print(
+                    "WARNING: The smoothing failed, continue without smoothing"
+                    f" for channel {channel}"
+                )
+            diff_ch = dtaidistance.preprocessing.differencing(series)
+        else:
+            diff_ch = series
+        series_diff.append(diff_ch)
+    series = np.array(series_diff)
+    series = np.swapaxes(series, 0, 1)
+    df.loc[:, NewColumns.cell_cycle_dtw()] = np.nan
+    track_ids = df[track_id_name].unique()
+    for track_id in track_ids:
+        track_df = df.loc[df[track_id_name] == track_id]
+        # the algorithm does not work for short tracks
+        if len(track_df) < minimum_track_length:
+            # insert NaN
+            new_percentage = np.full(len(track_df), np.nan)
+            df.loc[df[track_id_name] == track_id, NewColumns.cell_cycle_dtw()] = (
+                new_percentage[:]
+            )
+            continue
+        # find percentages if track is long enough
+        queries = track_df[channels].to_numpy()
+        queries_diff = []
+        for idx in range(len(channels)):
+            if use_zscore_norm:
+                queries[:, idx] = stats.zscore(queries[:, idx])
+            # if all values are the same, we zero to not numerical issues
+            if np.all(np.isnan(queries[:, idx])):
+                queries[:, idx] = 0.0
+            if use_derivative:
+                diff_ch = dtaidistance.preprocessing.differencing(
+                    queries[:, idx], smooth=smooth
+                )
+            else:
+                diff_ch = queries[:, idx]
+            queries_diff.append(diff_ch)
+        query = np.array(queries_diff)
+        query = np.swapaxes(query, 0, 1)
+        sa = subsequence_alignment(query, series, penalty=penalty)
+        best_match = sa.best_match()
+        if use_derivative:
+            new_percentage = np.zeros(query.shape[0] + 1)
+        else:
+            new_percentage = np.zeros(query.shape[0])
+        for p in best_match.path:
+            new_percentage[p[0]] = percentage_ref[p[1]]
+        if p[1] + 1 < len(percentage_ref):
+            last_percentage = p[1] + 1
+        else:
+            last_percentage = p[1]
+        new_percentage[-1] = percentage_ref[last_percentage]
+        # save estimated cell cycle percentages
+        df.loc[df[track_id_name] == track_id, NewColumns.cell_cycle_dtw()] = (
+            new_percentage[:]
+        )
+        # save DTW distance
+        df.loc[df[track_id_name] == track_id, NewColumns.dtw_distance()] = (
+            best_match.value
+        )
+        _, distortion_score, _, _ = get_time_distortion_coefficient(best_match.path)
+        # save DTW distortion
+        df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion()] = (
+            distortion_score
+        )
+        df.loc[df[track_id_name] == track_id, NewColumns.dtw_distortion_norm()] = (
+            distortion_score / len(track_df)
+        )
+        # save DTW warping amount
+        df.loc[df[track_id_name] == track_id, NewColumns.dtw_warping_amount()] = (
+            warping_amount(best_match.path)
+        )
+        # save DTW warping amount
+        df.loc[df[track_id_name] == track_id, NewColumns.rel_dtw_warping_amount()] = (
+            warping_amount(best_match.path) / len(track_df)
+        )