PyPI - circaPy - Versions diffs - 0.1.5__py3-none-any.whl - Mend

circaPy 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

circaPy/.idea/actigraphy_analysis.iml +11 -0
circaPy/.idea/misc.xml +4 -0
circaPy/.idea/modules.xml +8 -0
circaPy/.idea/vcs.xml +6 -0
circaPy/.idea/workspace.xml +95 -0
circaPy/__init__.py +0 -0
circaPy/activity.py +391 -0
circaPy/episodes.py +505 -0
circaPy/periodogram.py +101 -0
circaPy/plots.py +351 -0
circaPy/preprocessing.py +261 -0
circaPy/sleep_process.py +96 -0
circapy-0.1.5.dist-info/METADATA +104 -0
circapy-0.1.5.dist-info/RECORD +16 -0
circapy-0.1.5.dist-info/WHEEL +4 -0
circapy-0.1.5.dist-info/licenses/LICENSE +674 -0

circaPy/plots.py ADDED Viewed

@@ -0,0 +1,351 @@
+import re
+import pdb
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gs
+from matplotlib.transforms import Bbox
+import circaPy.activity as act
+import circaPy.preprocessing as prep
+@prep.validate_input
+@prep.invert_light_values
+@prep.plot_kwarg_decorator
+def plot_actogram(data,
+                  subject_no=0,
+                  light_col=-1,
+                  ylim=[0, 120],
+                  fig=False,
+                  subplot=False,
+                  ldralpha=0.5,
+                  start_day=0,
+                  day_label_size=5,
+                  linewidth=0.5,
+                  **kwargs):
+    """
+    Plot an double plotted actogram of activity data over several days
+    with background shading set by the lights
+    Parameters:
+    ----------
+    data : (pd.DataFrame)
+        time-indexed pandas dataframe with activity values in
+        columns for each subject and one column for the light levels.
+        WRONG - currently expecting list of dataframes, one for each animal
+        and single column for each day
+    subject_no : int
+        which column number to plot, defaults to 0
+    light_col : int
+        which columns contains light information, defaults to -1
+    ylim : list of two ints
+        set the minimum and maximum values to plot
+    fig : matplotlib figure object
+        Figure to create plot on, if not passed defaults to false and
+        new figure is passed
+    subplot : matplotlib subplotspec object
+        Subplotspec from larger figure on which to draw actogram.
+        Must be created from gridspec
+        If not passed
+        defaults to False, which requires a fig object to be provided
+    ldralpha : float
+        Set the alpha level for how opaque to have the light shading,
+        defaults to 0.5
+    startday : int
+        sets which day to start as day 0 in plot, defaults to 0
+    day_label_size : int
+        sets size of labels on bottom x axis, defaults to 5
+    Returns
+    -------
+    matplotlib.pyplot.figure
+        instance containing overall figure
+    matplotlib.pyplot.subplot
+        the final subplot so can manipulate for xaxis
+    dict
+        dict containing plotting kwargs
+    """
+    # grab line plot constant
+    if "linewidth" in kwargs:
+        linewidth = kwargs["linewidth"]
+    # check if data is empty
+    if data.empty:
+        raise ValueError("Input Dataframe is empty. Cannot plot actogram")
+    # select the correct data to plot for activity and light
+    col_data = data.columns[subject_no]
+    ldr_col = data.columns[light_col]
+    data_plot = data.loc[:, col_data].copy()
+    data_light = data.loc[:, ldr_col].copy()
+    # add entire day of 0s at start and end by extending index
+    # grab values from current index
+    freq = pd.infer_freq(data_plot.index)
+    start = data_plot.index.min()
+    end = data_plot.index.max()
+    # check frequency works
+    try:
+        pd.Timedelta(freq)
+    except BaseException:
+        freq = pd.Timedelta(f"1{freq}")
+    # Extend the range by 1 day but make sure lines up with original index
+    # select the length of one day
+    day_length = len(data_plot.loc[str(data_plot.index[0].date())])
+    extended_start = start - (pd.Timedelta(freq) * day_length)
+    extended_end = end + (pd.Timedelta(freq) * day_length)
+    # create new index and set data to it
+    extended_index = pd.date_range(
+        start=extended_start, end=extended_end, freq=freq)
+    data_plot = data_plot.reindex(extended_index, fill_value=-100)
+    # select just the days
+    days = data_plot.index.normalize().unique()
+    # set all 0 values to be very low so not showing on y index starting at 0
+    for mask in data_plot, data_light:
+        mask[mask == 0] = -100
+    # Create figure and subplot for every day
+    # create a new figure if not passed one when called
+    if not fig:
+        fig, ax = plt.subplots(nrows=(len(days) - 1))
+        fig.subplots_adjust(hspace=0)
+    # add subplots to figure if passed when called
+    else:
+        # remove ticks so don't draw over when we add later
+        subplot.set(yticks=[], xticks=[])
+        # draw subplots for each day on the subplot given
+        subplot_spec = subplot.get_subplotspec()
+        subplot_grid = gs.GridSpecFromSubplotSpec(nrows=(len(days) - 1),
+                                                  ncols=1,
+                                                  subplot_spec=subplot_spec,
+                                                  wspace=0,
+                                                  hspace=0)
+        ax = []
+        for grid in subplot_grid:
+            sub_ax = plt.Subplot(fig, grid)
+            fig.add_subplot(sub_ax)
+            ax.append(sub_ax)
+    # select each day to then plot on separate axis
+    # plot two days on each row
+    for day_label, axis in zip(days, ax):
+        # get two days of data to plot
+        curr_day = str(day_label.date())
+        next_day = str(day_label.date() + pd.Timedelta("1d"))
+        curr_data = data_plot.loc[curr_day:next_day]
+        curr_data_light = data_light.loc[curr_day:next_day]
+        # create masked data for fill between to avoid horizontal lines
+        fill_data = curr_data.where(curr_data > 0)
+        fill_ldr = curr_data_light.where(curr_data_light > 0)
+        # plot the data and light_col
+        axis.fill_between(fill_ldr.index,
+                          fill_ldr,
+                          alpha=ldralpha,
+                          facecolor="grey")
+        axis.plot(curr_data, linewidth=linewidth)
+        axis.fill_between(fill_data.index,
+                          fill_data)
+        # need to hide all the axis to make visible
+        axis.set(xticks=[],
+                 xlim=[curr_data.index[0],
+                       curr_data.index[-1]],
+                 yticks=[],
+                 ylim=ylim)
+        spines = ["left", "right", "top", "bottom"]
+        for pos in spines:
+            axis.spines[pos].set_visible(False)
+    # create the y labels for every 10th row
+    day_markers = np.arange(0, len(days), 10)
+    day_markers = day_markers + start_day
+    for axis, day in zip(ax[::10], day_markers):
+        axis.set_ylabel(day,
+                        rotation=0,
+                        va='center',
+                        ha='right',
+                        fontsize=day_label_size)
+    # create defaults dict
+    params_dict = {
+        "xlabel": "Time",
+        "ylabel": "Days",
+        "interval": 6,
+        "title": "Double Plotted Actogram",
+        "timeaxis": True,
+        "subplot": subplot
+    }
+    # put axis as a controllable parameter
+    if "timeaxis" in kwargs:
+        params_dict['timeaxis'] = kwargs["timeaxis"]
+    return fig, ax, params_dict
+@prep.validate_input
+@prep.invert_light_values
+@prep.plot_kwarg_decorator
+def plot_activity_profile(data,
+                          col=0,
+                          light_col=-1,
+                          subplot=None,
+                          resample=False,
+                          resample_freq="h",
+                          *args,
+                          **kwargs):
+    """
+    Plot the activity profile with mean and SEM (Standard Error of the Mean).
+    Optionally resample the data before plotting.
+    Parameters
+    ----------
+    data : pd.DataFrame or pd.Series
+        Activity data indexed by time. If `data` is a DataFrame, the
+        function uses the column specified by `col` (default is the
+        first column).
+    col : int, optional
+        The index of the column to plot, used when `data` is a
+        DataFrame (default is 0).
+    subplot : matplotlib.axes._axes.Axes, optional
+        Subplot to plot on. If None, a new figure and axis are
+        created (default is None).
+    resample : bool, optional
+        Whether to resample the data before plotting.
+        If `True`, the data will be resampled to the frequency
+        specified by `resample_freq` (default is `False`).
+    resample_freq : str, optional
+        The frequency to resample the data to.
+        This can be any valid pandas offset string
+        (e.g., "h" for hourly, "min" for minutely).
+        The default is "h" (hourly).
+    *args, **kwargs : additional arguments
+        These are passed to the plotting function,
+        such as `timeaxis` to control the appearance of the x-axis.
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        The figure containing the plot.
+    ax : matplotlib.axes._axes.Axes
+        The axis with the plot.
+    params_dict : dict
+        A dictionary containing the plot's parameters,
+        including labels, title, and xlim.
+    """
+    # ability to resample if required
+    if resample:
+        data = data.resample(resample_freq).mean()
+    # select just the subject
+    curr_data = data.iloc[:, col]
+    light_data = data.iloc[:, light_col]
+    # Calculate mean activity and SEM
+    mean, sem = act.calculate_mean_activity(curr_data, sem=True)
+    light_mean = act.calculate_mean_activity(light_data)
+    # Convert the index of mean and sem to a DatetimeIndex starting 2001-01-01
+    start_date = "2001-01-01"
+    freq = pd.infer_freq(data.index)
+    datetime_index = pd.date_range(
+        start=start_date, periods=len(mean), freq=freq)
+    mean.index = datetime_index
+    sem.index = datetime_index
+    light_mean.index = datetime_index
+    # Ensure freq has a numeric component
+    if not any(char.isdigit() for char in freq):
+        freq = pd.Timedelta('1' + freq)  # Prepend '1' if missing
+    # Extend the light_mean data by one extra period and forward fill
+    light_mean = pd.concat([light_mean, pd.Series(
+        [light_mean.iloc[-1]], index=[light_mean.index[-1] + pd.Timedelta(freq)])])
+    light_mean.ffill(inplace=True)
+    # Offset the mean and sem data to plot in the middle of the hour
+    offset_time = 0.5 * pd.Timedelta(freq)
+    mean.index += offset_time
+    sem.index += offset_time
+    light_mean.index += offset_time
+    # Create plot if no subplot is provided
+    if subplot is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+    else:
+        fig = plt.gcf()
+        ax = subplot
+    # Plot the mean line
+    ax.plot(
+        mean.index, mean, label="Mean Activity", color="blue", linewidth=2)
+    # Add shaded SEM region
+    ax.fill_between(
+        mean.index,
+        mean - sem,
+        mean + sem,
+        color="blue",
+        alpha=0.3,
+        label="± SEM"
+    )
+    # get ylims to set at this level later
+    ylim = ax.get_ylim()
+    # Find the min and max of light_mean
+    min_light_mean = light_mean.min()
+    max_light_mean = light_mean.max()
+    # Define the target range
+    target_max = 1000 * ylim[1]
+    target_min = -1 * target_max
+    # Scale the light_mean values to the target range
+    # The formula to scale the values is:
+    # scaled_value = (value - min_value) / (max_value - min_value)
+    # * (target_max - target_min) + target_min
+    scaled_light_mean = (light_mean - min_light_mean
+                         ) / (max_light_mean - min_light_mean
+                              ) * (target_max - target_min) + target_min
+    # Add lights region
+    ax.fill_between(
+        scaled_light_mean.index,
+        scaled_light_mean,
+        color='grey',
+        alpha=0.2
+    )
+    # Add labels, legend, and title
+    ax.set_xlabel("Time")
+    ax.set_ylabel("Activity")
+    ax.set_ylim([0, ylim[1]])
+    ax.set_title("Activity Profile with Mean and SEM")
+    ax.legend()
+    # create defaults dict
+    xlim = [mean.index[0], (mean.index[0] + pd.Timedelta("24h"))]
+    params_dict = {
+        "xlabel": "Time",
+        "ylabel": "Activity",
+        "interval": 6,
+        "title": "Mean activity profile",
+        "timeaxis": True,
+        "xlim": xlim,
+    }
+    # put axis as a controllable parameter
+    if "timeaxis" in kwargs:
+        params_dict['timeaxis'] = kwargs["timeaxis"]
+    return fig, ax, params_dict

circaPy/preprocessing.py ADDED Viewed

@@ -0,0 +1,261 @@
+import pdb
+from functools import wraps
+import pingouin as pg
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+idx = pd.IndexSlice
+# This script contains functions which are useful for preprocessing of
+# actigraphy data
+#### Decorators ####
+def plot_kwarg_decorator(func):
+    """
+    Universal decorator for plot formatting and configuration.
+    Handles xlabels, ylabels, titles, legends, time formatting, saving,
+    and showing plots.
+    :param func: The plotting function to decorate.
+    :return: A decorated function that applies plot configurations.
+    """
+    @wraps(func)
+    def wrapper(data, *args, **kwargs):
+        # Call the original plotting function
+        fig, ax, params_dict = func(data, *args, **kwargs)
+        # check if multiple subplots or not
+        if isinstance(ax, (np.ndarray, list)):
+            final_ax = ax[-1]
+        else:
+            final_ax = ax
+        # Configure x-axis limits
+        if "xlim" in kwargs or "xlim" in params_dict:
+            xlim = kwargs.get("xlim", params_dict.get("xlim", None))
+            if xlim:
+                final_ax.set_xlim(xlim)
+        # Configure x-axis time formatting
+        if "timeaxis" in params_dict and params_dict["timeaxis"]:
+            xfmt = kwargs.get("xfmt", mdates.DateFormatter("%H:%M"))
+            final_ax.xaxis.set_major_formatter(xfmt)
+            interval = kwargs.get("interval", params_dict.get("interval", 1))
+            final_ax.xaxis.set_major_locator(
+                mdates.HourLocator(interval=interval))
+            fig.autofmt_xdate()
+        # Set x-axis label
+        xlabel = kwargs.get("xlabel", params_dict.get("xlabel", ""))
+        if xlabel:
+            final_ax.set_xlabel(
+                xlabel,
+                labelpad=5,
+                ha='center',
+                va='center')
+        # Set y-axis label
+        ylabel = kwargs.get("ylabel", params_dict.get("ylabel", ""))
+        ylabelpos = kwargs.get("ylabelpos", (0.02, 0.5))
+        subplot = kwargs.get("subplot", params_dict.get("subplot", None))
+        if ylabel:
+            if subplot:
+                subplot.set_ylabel(
+                    ylabel,
+                    labelpad=5,
+                    ha='center',
+                    va='center',
+                    rotation='vertical')
+            else:
+                fig.text(
+                    ylabelpos[0],
+                    ylabelpos[1],
+                    ylabel,
+                    ha="center",
+                    va="center",
+                    rotation="vertical"
+                )
+        # Set plot title
+        title = kwargs.get("title", params_dict.get("title", ""))
+        if title:
+            fig.suptitle(title)
+        # Configure legend
+        if kwargs.get("legend", False):
+            legend_loc = kwargs.get("legend_loc", 1)
+            handles, labels = final_ax.get_legend_handles_labels()
+            fig.legend(handles, labels, loc=legend_loc)
+        # Configure figure size
+        if "figsize" in kwargs:
+            fig.set_size_inches(kwargs["figsize"])
+        # Save or show the plot
+        if kwargs.get("savefig", False):
+            fname = kwargs.get("fname", "plot.png")
+            plt.savefig(fname)
+            plt.close()
+        if kwargs.get("showfig", False):
+            plt.show()
+        return fig, ax, params_dict
+    return wrapper
+def validate_input(func):
+    """
+    Decorator to validate DataFrames or Series passed to the function.
+    - Checks if any input consists only of zeros.
+    - Checks if any DataFrame is empty.
+    - Checks if the index of any DataFrame is a DatetimeIndex.
+    Raises a ValueError if any condition is not met.
+    """
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        # Helper function to validate a DataFrame or Series
+        def _validate(input_data, name):
+            if isinstance(
+                    input_data,
+                    pd.DataFrame) or isinstance(
+                        input_data,
+                        pd.Series):
+                # Check if consists only of zeros
+                if (input_data.values == 0).all():
+                    raise ValueError(f"Input {name} consists only of zeros.")
+                # Check if empty
+                if input_data.empty:
+                    raise ValueError(f"Input {name} is empty.")
+                # Check if index is a DatetimeIndex (only for DataFrames)
+                if isinstance(
+                        input_data,
+                        pd.DataFrame) and not isinstance(
+                            input_data.index,
+                            pd.DatetimeIndex):
+                    raise TypeError(
+                        f"Input {name} does not have a DatetimeIndex.")
+        # Validate positional arguments
+        for i, arg in enumerate(args):
+            _validate(arg, f"arg[{i}]")
+        # Validate keyword arguments
+        for key, value in kwargs.items():
+            _validate(value, f"kwarg[{key}]")
+        # Call the original function
+        return func(*args, **kwargs)
+    return wrapper
+def invert_light_values(func):
+    """
+    Decorator to invert the light values in the given light column.
+    Used to ensure that on plots, darkness is shaded grey, not the lights.
+    Parameters
+    ----------
+    func : function
+        The function to wrap.
+    Returns
+    -------
+    function
+        The wrapped function with inverted light values in the specified column.
+    """
+    @wraps(func)
+    def wrapper(data, *args, light_col=-1, **kwargs):
+        # Ensure light_col is a valid index
+        if isinstance(light_col, int):  # If specified as column index
+            light_col_name = data.columns[light_col]
+        elif isinstance(light_col, str):  # If specified as column name
+            light_col_name = light_col
+        else:
+            raise ValueError(
+                "light_col must be an integer index or a column name")
+        # Copy the data to avoid modifying the original DataFrame
+        data = data.copy()
+        # Invert the light values
+        max_value = data[light_col_name].max()
+        min_value = data[light_col_name].min()
+        data[light_col_name] = max_value - data[light_col_name] + min_value
+        # Call the original function with the modified data
+        return func(data, *args, **kwargs)
+    return wrapper
+#### Functions ####
+# function to set data by circadian period
+@validate_input
+def set_circadian_time(
+        data,
+        period='24h'):
+    """
+    Reindexes current data to 24 hours CT instead of ZT by setting
+    frequency to the ratio of 24hrs/new period
+    Parameters
+    ----------
+    data : pd.DataFrame
+        Dataframe with a pandas timeindex
+    period : str or float
+        The new period to set the data to.
+        Timedelta string (e.g., '24h', '1d', '72h')
+    Returns
+    -------
+    pd.DataFrame
+        Original data but with a new datetimeindex, starting at the same time
+        as the original but now 24 hours is equal to the given period instead
+        of real time.
+    """
+    # Convert period string to timedelta
+    if isinstance(period, str):
+        period = pd.to_timedelta(period)
+    else:
+        raise TypeError("Period must be in timedelta string format")
+    # Calculate the frequency ratio based on the period
+    freq_ratio = 24 / (period.total_seconds() / 3600)
+    # get data frequency as timedelta
+    base_freq = pd.infer_freq(data.index)
+    # Ensure base_freq has a numeric component
+    if not any(char.isdigit() for char in base_freq):
+        base_freq = '1' + base_freq  # Prepend '1' if missing
+    # convert to timedelta
+    base_timedelta = pd.to_timedelta(base_freq)
+    # calculate ratio as a string
+    new_timedelta = base_timedelta * freq_ratio
+    new_freq_str = str(np.round(new_timedelta.total_seconds() * 1000)) + "ms"
+    # create new index based on this
+    start_time = data.index[0]
+    data_length = len(data)
+    new_index = pd.date_range(
+        start=start_time,
+        periods=data_length,
+        freq=new_freq_str
+    )
+    # reindex the data
+    reindexed_data = pd.DataFrame(
+        data=data.values,
+        index=new_index,
+        columns=data.columns
+    )
+    return reindexed_data

circaPy/sleep_process.py ADDED Viewed

@@ -0,0 +1,96 @@
+# functions for sleep processing
+import os
+import numpy as np
+import circaPy.preprocessing as prep
+def sleep_process(data, window=4):
+    """
+    Function to score activity data as sleep given
+    a certain window of activity
+    Future development implement thresholds for breaking
+    sleep episodes.
+    Returns scored dataframe
+    :param data:
+    :param window:
+    :return:
+    """
+    # score > window as inactivity score of 1
+    rolling_sum_data = data.rolling(window).sum()
+    bool_scored_data = rolling_sum_data == 0
+    scored_data = bool_scored_data.astype(int)
+    return scored_data
+def create_scored_df(data, **kwargs):
+    """
+    Function to take dataframe as input and return the same data
+    and labels but scored for sleep -> then appropriate to save
+    :param data:
+    :return:
+    """
+    # remove object columns, score, return columns to df
+    sleep_df = _score_active_times(data, **kwargs)
+    return sleep_df
+def _score_active_times(data,
+                        ldr_col=-1,
+                        test_col=0,
+                        threshold=1,
+                        drop_level=True):
+    """
+    Scores all times between start and end of activity as sleep, sets all
+    other values to 0
+    :param data:
+    :param ldr_col:
+    :param test_col:
+    :param threshold:
+    :param drop_level:
+    :return:
+    """
+    if drop_level:
+        data = data.reset_index(0)
+        label_name = data.columns[0]
+        label_col = data.pop(label_name)
+    # score the df minus the LDR
+    ldr_label = data.columns[ldr_col]
+    ldr_data = data.pop(ldr_label)
+    scored_df = sleep_process(data)
+    # find start and end of activity
+    mask = data.iloc[:, test_col] > threshold
+    start = data.where(mask).first_valid_index()
+    end = data.where(mask)[::-1].first_valid_index()
+    # set scored df times outside of start and end to be 0
+    scored_df.loc[:start] = 0
+    scored_df.loc[end:] = 0
+    scored_df[ldr_label] = ldr_data
+    if drop_level:
+        scored_df[label_name] = label_col
+        new_cols = [scored_df.columns[-1], scored_df.index]
+        scored_df.set_index(new_cols, inplace=True)
+    return scored_df
+def alter_file_name(file_name,
+                    suffix,
+                    remove_slice_after=-9):
+    """
+    Function to take in the file name and remove part of the
+    name, replace with "suffix" and rename the file
+    :param file_name:
+    :param suffix:
+    :param slice_range:
+    :return:
+    """
+    new_file_name = file_name.stem[:remove_slice_after] + \
+        suffix + \
+        file_name.suffix
+    new_file_path = file_name.parent / new_file_name
+    os.rename(file_name, new_file_path)