PyPI - paradigma - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

paradigma 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

paradigma/classification.py +28 -11
paradigma/config.py +158 -101
paradigma/constants.py +39 -34
paradigma/feature_extraction.py +270 -211
paradigma/pipelines/gait_pipeline.py +286 -190
paradigma/pipelines/pulse_rate_pipeline.py +202 -133
paradigma/pipelines/pulse_rate_utils.py +144 -142
paradigma/pipelines/tremor_pipeline.py +139 -95
paradigma/preprocessing.py +179 -110
paradigma/segmenting.py +138 -113
paradigma/testing.py +359 -172
paradigma/util.py +171 -80
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/METADATA +39 -36
paradigma-1.0.4.dist-info/RECORD +23 -0
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
paradigma-1.0.4.dist-info/entry_points.txt +4 -0
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
paradigma-1.0.2.dist-info/RECORD +0 -22

paradigma/preprocessing.py CHANGED Viewed

@@ -1,25 +1,22 @@
-import json
+from datetime import datetime
+from typing import List, Tuple, Union
 import numpy as np
 import pandas as pd
-import tsdf
-from pathlib import Path
 from scipy import signal
 from scipy.interpolate import interp1d
-from typing import List, Tuple, Union
-from datetime import datetime
-from paradigma.constants import TimeUnit, DataColumns
-from paradigma.config import PPGConfig, IMUConfig
-from paradigma.util import write_df_data, read_metadata, invert_watch_side
+from paradigma.config import IMUConfig, PPGConfig
+from paradigma.util import invert_watch_side
 def resample_data(
     df: pd.DataFrame,
-    time_column : str,
+    time_column: str,
     values_column_names: List[str],
     sampling_frequency: int,
     resampling_frequency: int,
-    tolerance: float | None = None
+    tolerance: float | None = None,
 ) -> pd.DataFrame:
     """
     Resamples sensor data to a specified frequency using cubic interpolation.
@@ -37,9 +34,8 @@ def resample_data(
     resampling_frequency : int
         The frequency to which the data should be resampled (in Hz).
     tolerance : float, optional
-        The tolerance added to the expected difference when checking
-        for contiguous timestamps. If not provided, it defaults to
-        twice the expected interval.
+        The tolerance added to the expected difference when checking
+        for contiguous timestamps. If not provided, it defaults to the tolerance specified in IMUConfig.
     Returns
     -------
@@ -57,9 +53,10 @@ def resample_data(
     - Uses cubic interpolation for smooth resampling if there are enough points.
     - If only two timestamps are available, it falls back to linear interpolation.
     """
-    # Set default tolerance if not provided to twice the expected interval
+    # Set default tolerance if not provided to tolerance specified in IMUConfig
     if tolerance is None:
-        tolerance = 2 * 1 / sampling_frequency
+        tolerance = IMUConfig().tolerance
     # Extract time and values
     time_abs_array = np.array(df[time_column])
@@ -68,7 +65,7 @@ def resample_data(
     # Ensure the time array is strictly increasing
     if not np.all(np.diff(time_abs_array) > 0):
         raise ValueError("Time array is not strictly increasing")
     # Ensure the time array is contiguous
     expected_interval = 1 / sampling_frequency
     timestamp_diffs = np.diff(time_abs_array)
@@ -76,12 +73,20 @@ def resample_data(
         raise ValueError("Time array is not contiguous")
     # Resample the time data using the specified frequency
-    t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
+    t_resampled = np.arange(
+        time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency
+    )
     # Choose interpolation method
     interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
-    interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
+    interpolator = interp1d(
+        time_abs_array,
+        values_array,
+        axis=0,
+        kind=interpolation_kind,
+        fill_value="extrapolate",
+    )
     # Interpolate
     resampled_values = interpolator(t_resampled)
@@ -103,20 +108,20 @@ def butterworth_filter(
     """
     Applies a Butterworth filter to 1D or 2D sensor data.
-    This function applies a low-pass, high-pass, or band-pass Butterworth filter to the
-    input data. The filter is designed using the specified order, cutoff frequency,
+    This function applies a low-pass, high-pass, or band-pass Butterworth filter to the
+    input data. The filter is designed using the specified order, cutoff frequency,
     and passband type. The function can handle both 1D and 2D data arrays.
     Parameters
     ----------
     data : np.ndarray
-        The sensor data to be filtered. Can be 1D (e.g., a single signal) or 2D
+        The sensor data to be filtered. Can be 1D (e.g., a single signal) or 2D
         (e.g., multi-axis sensor data).
     order : int
         The order of the Butterworth filter. Higher values result in a steeper roll-off.
     cutoff_frequency : float or List[float]
-        The cutoff frequency (or frequencies) for the filter. For a low-pass or high-pass filter,
-        this is a single float. For a band-pass filter, this should be a list of two floats,
+        The cutoff frequency (or frequencies) for the filter. For a low-pass or high-pass filter,
+        this is a single float. For a band-pass filter, this should be a list of two floats,
         specifying the lower and upper cutoff frequencies.
     passband : str
         The type of passband to apply. Options are:
@@ -159,7 +164,10 @@ def butterworth_filter(
     else:
         raise ValueError("Data must be either 1D or 2D.")
-def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_side: str) -> pd.DataFrame:
+def preprocess_imu_data(
+    df: pd.DataFrame, config: IMUConfig, sensor: str, watch_side: str
+) -> pd.DataFrame:
     """
     Preprocesses IMU data by resampling and applying filters.
@@ -186,69 +194,88 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
         The preprocessed accelerometer and or gyroscope data with the following transformations:
         - Resampled data at the specified frequency.
         - Filtered accelerometer data with high-pass and low-pass filtering applied.
     Notes
     -----
     - The function applies Butterworth filters to accelerometer data, both high-pass and low-pass.
     """
     # Extract sensor column
-    if sensor == 'accelerometer':
-        values_colnames = config.accelerometer_cols
-    elif sensor == 'gyroscope':
-        values_colnames = config.gyroscope_cols
-    elif sensor == 'both':
-        values_colnames = config.accelerometer_cols + config.gyroscope_cols
+    if sensor == "accelerometer":
+        values_colnames = config.accelerometer_colnames
+    elif sensor == "gyroscope":
+        values_colnames = config.gyroscope_colnames
+    elif sensor == "both":
+        values_colnames = config.accelerometer_colnames + config.gyroscope_colnames
     else:
-        raise('Sensor should be either accelerometer, gyroscope, or both')
+        raise ("Sensor should be either accelerometer, gyroscope, or both")
     # Resample the data to the specified frequency
     df = resample_data(
         df=df,
-        time_column=DataColumns.TIME,
+        time_column=config.time_colname,
         values_column_names=values_colnames,
         sampling_frequency=config.sampling_frequency,
-        resampling_frequency=config.sampling_frequency
+        resampling_frequency=config.resampling_frequency,
+        tolerance=config.tolerance,
     )
     # Invert the IMU data if the watch was worn on the right wrist
     df = invert_watch_side(df, watch_side, sensor)
-    if sensor in ['accelerometer', 'both']:
+    if sensor in ["accelerometer", "both"]:
         # Extract accelerometer data for filtering
-        accel_data = df[config.accelerometer_cols].values
+        accel_data = df[config.accelerometer_colnames].values
         # Define filter configurations for high-pass and low-pass
         filter_renaming_configs = {
-        "hp": {"result_columns": config.accelerometer_cols, "replace_original": True},
-        "lp": {"result_columns": [f'{col}_grav' for col in config.accelerometer_cols], "replace_original": False},
+            "hp": {
+                "result_columns": config.accelerometer_colnames,
+                "replace_original": True,
+            },
+            "lp": {
+                "result_columns": [
+                    f"{col}_grav" for col in config.accelerometer_colnames
+                ],
+                "replace_original": False,
+            },
         }
         # Apply filters in a loop
         for passband, filter_config in filter_renaming_configs.items():
             filtered_data = butterworth_filter(
-            data=accel_data,
-            order=config.filter_order,
-            cutoff_frequency=config.lower_cutoff_frequency,
-            passband=passband,
-            sampling_frequency=config.sampling_frequency,
+                data=accel_data,
+                order=config.filter_order,
+                cutoff_frequency=config.lower_cutoff_frequency,
+                passband=passband,
+                sampling_frequency=config.sampling_frequency,
             )
             # Replace or add new columns based on configuration
             df[filter_config["result_columns"]] = filtered_data
-        values_colnames += config.gravity_cols
+        values_colnames += config.gravity_colnames
-    df = df[[DataColumns.TIME, *values_colnames]]
+    df = df[[config.time_colname, *values_colnames]]
     return df
-def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PPGConfig,
-                        imu_config: IMUConfig, start_time_ppg: str, start_time_imu: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
+def preprocess_ppg_data(
+    df_ppg: pd.DataFrame,
+    ppg_config: PPGConfig,
+    start_time_ppg: str | None = None,
+    df_acc: pd.DataFrame | None = None,
+    imu_config: IMUConfig | None = None,
+    start_time_imu: str | None = None,
+) -> Tuple[pd.DataFrame, pd.DataFrame | None]:
     """
-    Preprocess PPG and IMU (accelerometer only) data by resampling, filtering, and aligning the data segments.
+    This function preprocesses PPG and accelerometer data by resampling, filtering and aligning the data segments of both sensors (if applicable).
+    Aligning is done using the extract_overlapping_segments function which is based on the provided start times of the PPG and IMU data and returns
+    only the data points where both signals overlap in time. The remaining data points are discarded.
+    After alignment, the function resamples the data to the specified frequency and applies Butterworth filters to both PPG and accelerometer data (if applicable).
+    The output is two DataFrames: one for the preprocessed PPG data and another for the preprocessed accelerometer data (if provided, otherwise return is None).
     Parameters
     ----------
@@ -267,79 +294,117 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
     Returns
     -------
-    Tuple[pd.DataFrame, pd.DataFrame]
-        Preprocessed PPG and IMU data as DataFrames.
+    Tuple[pd.DataFrame, pd.DataFrame | None]
+        A tuple containing two DataFrames:
+        - Preprocessed PPG data with the following transformations:
+            - Resampled data at the specified frequency.
+            - Filtered PPG data with bandpass filtering applied.
+        - Preprocessed accelerometer data (if provided, otherwise return is None) with the following transformations:
+            - Resampled data at the specified frequency.
+            - Filtered accelerometer data with high-pass and low-pass filtering applied.
+    Notes
+    -----
+    - If accelerometer data or IMU configuration is not provided, the function only preprocesses PPG data.
+    - The function applies Butterworth filters to PPG and accelerometer (if applicable) data, both high-pass and low-pass.
     """
+    if df_acc is not None and imu_config is not None:
+        # Extract overlapping segments
+        df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(
+            df_ppg=df_ppg,
+            df_acc=df_acc,
+            time_colname_ppg=ppg_config.time_colname,
+            time_colname_imu=imu_config.time_colname,
+            start_time_ppg=start_time_ppg,
+            start_time_acc=start_time_imu,
+        )
-    # Extract overlapping segments
-    df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc, start_time_ppg, start_time_imu)
-    # Resample accelerometer data
-    df_acc_proc = resample_data(
-        df=df_acc_overlapping,
-        time_column=DataColumns.TIME,
-        values_column_names = list(imu_config.d_channels_accelerometer.keys()),
-        sampling_frequency=imu_config.sampling_frequency,
-        resampling_frequency=imu_config.sampling_frequency
-    )
+        # Resample accelerometer data
+        df_acc_proc = resample_data(
+            df=df_acc_overlapping,
+            time_column=imu_config.time_colname,
+            values_column_names=list(imu_config.d_channels_accelerometer.keys()),
+            sampling_frequency=imu_config.sampling_frequency,
+            resampling_frequency=imu_config.resampling_frequency,
+            tolerance=imu_config.tolerance,
+        )
+        # Extract accelerometer data for filtering
+        accel_data = df_acc_proc[imu_config.accelerometer_colnames].values
+        # Define filter configurations for high-pass and low-pass
+        filter_renaming_configs = {
+            "hp": {
+                "result_columns": imu_config.accelerometer_colnames,
+                "replace_original": True,
+            }
+        }
+        # Apply filters in a loop
+        for passband, filter_config in filter_renaming_configs.items():
+            filtered_data = butterworth_filter(
+                data=accel_data,
+                order=imu_config.filter_order,
+                cutoff_frequency=imu_config.lower_cutoff_frequency,
+                passband=passband,
+                sampling_frequency=imu_config.sampling_frequency,
+            )
+            # Replace or add new columns based on configuration
+            df_acc_proc[filter_config["result_columns"]] = filtered_data
+    else:
+        df_ppg_overlapping = df_ppg
     # Resample PPG data
     df_ppg_proc = resample_data(
         df=df_ppg_overlapping,
-        time_column=DataColumns.TIME,
-        values_column_names = list(ppg_config.d_channels_ppg.keys()),
+        time_column=ppg_config.time_colname,
+        values_column_names=list(ppg_config.d_channels_ppg.keys()),
         sampling_frequency=ppg_config.sampling_frequency,
-        resampling_frequency=ppg_config.sampling_frequency
+        resampling_frequency=ppg_config.resampling_frequency,
+        tolerance=ppg_config.tolerance,
     )
-    # Extract accelerometer data for filtering
-    accel_data = df_acc_proc[imu_config.accelerometer_cols].values
-    # Define filter configurations for high-pass and low-pass
-    filter_renaming_configs = {
-    "hp": {"result_columns": imu_config.accelerometer_cols, "replace_original": True}}
-    # Apply filters in a loop
-    for passband, filter_config in filter_renaming_configs.items():
-        filtered_data = butterworth_filter(
-        data=accel_data,
-        order=imu_config.filter_order,
-        cutoff_frequency=imu_config.lower_cutoff_frequency,
-        passband=passband,
-        sampling_frequency=imu_config.sampling_frequency,
-        )
-        # Replace or add new columns based on configuration
-        df_acc_proc[filter_config["result_columns"]] = filtered_data
     # Extract accelerometer data for filtering
     ppg_data = df_ppg_proc[ppg_config.ppg_colname].values
     # Define filter configurations for high-pass and low-pass
     filter_renaming_configs = {
-    "bandpass": {"result_columns": ppg_config.ppg_colname, "replace_original": True}}
+        "bandpass": {"result_columns": ppg_config.ppg_colname, "replace_original": True}
+    }
     # Apply filters in a loop
     for passband, filter_config in filter_renaming_configs.items():
         filtered_data = butterworth_filter(
-        data=ppg_data,
-        order=ppg_config.filter_order,
-        cutoff_frequency=[ppg_config.lower_cutoff_frequency, ppg_config.upper_cutoff_frequency],
-        passband=passband,
-        sampling_frequency=ppg_config.sampling_frequency,
+            data=ppg_data,
+            order=ppg_config.filter_order,
+            cutoff_frequency=[
+                ppg_config.lower_cutoff_frequency,
+                ppg_config.upper_cutoff_frequency,
+            ],
+            passband=passband,
+            sampling_frequency=ppg_config.sampling_frequency,
         )
         # Replace or add new columns based on configuration
         df_ppg_proc[filter_config["result_columns"]] = filtered_data
-    return df_ppg_proc, df_acc_proc
+    if df_acc is not None and imu_config is not None:
+        return df_ppg_proc, df_acc_proc
+    else:
+        return df_ppg_proc, None
-def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, start_time_ppg: str, start_time_acc: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
+def extract_overlapping_segments(
+    df_ppg: pd.DataFrame,
+    df_acc: pd.DataFrame,
+    time_colname_ppg: str,
+    time_colname_imu: str,
+    start_time_ppg: str,
+    start_time_acc: str,
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
     Extract DataFrames with overlapping data segments between accelerometer (from the IMU) and PPG datasets based on their timestamps.
@@ -349,6 +414,10 @@ def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, sta
         DataFrame containing PPG data.
     df_acc : pd.DataFrame
         DataFrame containing accelerometer data from the IMU.
+    time_colname_ppg : str
+        The name of the column containing the time data in the PPG dataframe.
+    time_colname_imu : str
+        The name of the column containing the time data in the IMU dataframe.
     start_time_ppg : str
         iso8601 formatted start time of the PPG data.
     start_time_acc : str
@@ -366,21 +435,21 @@ def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, sta
     start_acc_ppg = int(datetime_acc_start.timestamp())
     # Calculate the time in Unix timestamps for each dataset because the timestamps are relative to the start time
-    ppg_time = df_ppg[DataColumns.TIME] + start_unix_ppg
-    acc_time = df_acc[DataColumns.TIME] + start_acc_ppg
+    ppg_time = df_ppg[time_colname_ppg] + start_unix_ppg
+    acc_time = df_acc[time_colname_imu] + start_acc_ppg
     # Determine the overlapping time interval
     start_time = max(ppg_time.iloc[0], acc_time.iloc[0])
     end_time = min(ppg_time.iloc[-1], acc_time.iloc[-1])
     # Extract indices for overlapping segments
-    ppg_start_index = np.searchsorted(ppg_time, start_time, 'left')
-    ppg_end_index = np.searchsorted(ppg_time, end_time, 'right') - 1
-    acc_start_index = np.searchsorted(acc_time, start_time, 'left')
-    acc_end_index = np.searchsorted(acc_time, end_time, 'right') - 1
+    ppg_start_index = np.searchsorted(ppg_time, start_time, "left")
+    ppg_end_index = np.searchsorted(ppg_time, end_time, "right") - 1
+    acc_start_index = np.searchsorted(acc_time, start_time, "left")
+    acc_end_index = np.searchsorted(acc_time, end_time, "right") - 1
     # Extract overlapping segments from DataFrames
-    df_ppg_overlapping = df_ppg.iloc[ppg_start_index:ppg_end_index + 1]
-    df_acc_overlapping = df_acc.iloc[acc_start_index:acc_end_index + 1]
+    df_ppg_overlapping = df_ppg.iloc[ppg_start_index : ppg_end_index + 1]
+    df_acc_overlapping = df_acc.iloc[acc_start_index : acc_end_index + 1]
-    return df_ppg_overlapping, df_acc_overlapping
+    return df_ppg_overlapping, df_acc_overlapping

paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

paradigma 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl