PyPI - paradigma - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

paradigma 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

paradigma/__init__.py +10 -1
paradigma/classification.py +14 -14
paradigma/config.py +38 -29
paradigma/constants.py +10 -2
paradigma/feature_extraction.py +106 -75
paradigma/load.py +476 -0
paradigma/orchestrator.py +670 -0
paradigma/pipelines/gait_pipeline.py +488 -97
paradigma/pipelines/pulse_rate_pipeline.py +278 -46
paradigma/pipelines/pulse_rate_utils.py +176 -137
paradigma/pipelines/tremor_pipeline.py +292 -72
paradigma/prepare_data.py +409 -0
paradigma/preprocessing.py +345 -77
paradigma/segmenting.py +57 -42
paradigma/testing.py +14 -9
paradigma/util.py +36 -22
paradigma-1.1.0.dist-info/METADATA +229 -0
paradigma-1.1.0.dist-info/RECORD +26 -0
{paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
paradigma-1.0.4.dist-info/METADATA +0 -140
paradigma-1.0.4.dist-info/RECORD +0 -23
{paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
{paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0

paradigma/pipelines/pulse_rate_pipeline.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from typing import List
+import json
+import logging
+from importlib.resources import files
+from pathlib import Path
 import numpy as np
 import pandas as pd
@@ -6,7 +9,7 @@ from scipy.signal import welch
 from scipy.signal.windows import hamming, hann
 from paradigma.classification import ClassifierPackage
-from paradigma.config import PulseRateConfig
+from paradigma.config import PPGConfig, PulseRateConfig
 from paradigma.constants import DataColumns
 from paradigma.feature_extraction import (
     compute_auto_correlation,
@@ -21,6 +24,7 @@ from paradigma.pipelines.pulse_rate_utils import (
     extract_pr_from_segment,
     extract_pr_segments,
 )
+from paradigma.preprocessing import preprocess_ppg_data
 from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
 from paradigma.util import aggregate_parameter
@@ -33,9 +37,11 @@ def extract_signal_quality_features(
 ) -> pd.DataFrame:
     """
     Extract signal quality features from the PPG signal.
-    The features are extracted from the temporal and spectral domain of the PPG signal.
-    The temporal domain features include variance, mean, median, kurtosis, skewness, signal-to-noise ratio, and autocorrelation.
-    The spectral domain features include the dominant frequency, relative power, spectral entropy.
+    The features are extracted from the temporal and spectral domain of the
+    PPG signal. The temporal domain features include variance, mean, median,
+    kurtosis, skewness, signal-to-noise ratio, and autocorrelation. The
+    spectral domain features include the dominant frequency, relative power,
+    spectral entropy.
     Parameters
     ----------
@@ -44,9 +50,11 @@ def extract_signal_quality_features(
     df_acc : pd.DataFrame
         The DataFrame containing the accelerometer signal.
     ppg_config: PulseRateConfig
-        The configuration for the signal quality feature extraction of the PPG signal.
+        The configuration for the signal quality feature extraction of the PPG
+        signal.
     acc_config: PulseRateConfig
-        The configuration for the signal quality feature extraction of the accelerometer signal.
+        The configuration for the signal quality feature extraction of the
+        accelerometer signal.
     Returns
     -------
@@ -68,9 +76,9 @@ def extract_signal_quality_features(
     extractor = WindowedDataExtractor(ppg_windowed_colnames)
     idx_time = extractor.get_index(ppg_config.time_colname)
     idx_ppg = extractor.get_index(ppg_config.ppg_colname)
-    start_time_ppg = np.min(
-        ppg_windowed[:, :, idx_time], axis=1
-    )  # Start time of the window is relative to the first datapoint in the PPG data
+    # Start time of the window is relative to the first datapoint in the PPG
+    # data
+    start_time_ppg = np.min(ppg_windowed[:, :, idx_time], axis=1)
     ppg_values_windowed = ppg_windowed[:, :, idx_ppg]
     df_features = pd.DataFrame(start_time_ppg, columns=[ppg_config.time_colname])
@@ -125,15 +133,22 @@ def signal_quality_classification(
     df: pd.DataFrame, config: PulseRateConfig, clf_package: ClassifierPackage
 ) -> pd.DataFrame:
     """
-    Classify the signal quality of the PPG signal using a logistic regression classifier. A probability close to 1 indicates a high-quality signal, while a probability close to 0 indicates a low-quality signal.
-    The classifier is trained on features extracted from the PPG signal. The features are extracted using the extract_signal_quality_features function.
-    The accelerometer signal is used to determine the signal quality based on the power ratio of the accelerometer signal and returns a binary label based on a threshold.
-    A value of 1 on the indicates no/minor periodic motion influence of the accelerometer on the PPG signal, 0 indicates major periodic motion influence.
+    Classify the signal quality of the PPG signal using a logistic regression
+    classifier. A probability close to 1 indicates a high-quality signal,
+    while a probability close to 0 indicates a low-quality signal. The
+    classifier is trained on features extracted from the PPG signal. The
+    features are extracted using the extract_signal_quality_features
+    function. The accelerometer signal is used to determine the signal
+    quality based on the power ratio of the accelerometer signal and returns
+    a binary label based on a threshold. A value of 1 on the indicates
+    no/minor periodic motion influence of the accelerometer on the PPG
+    signal, 0 indicates major periodic motion influence.
     Parameters
     ----------
     df : pd.DataFrame
-        The DataFrame containing the PPG features and the accelerometer feature for signal quality classification.
+        The DataFrame containing the PPG features and the accelerometer
+        feature for signal quality classification.
     config : PulseRateConfig
         The configuration for the signal quality classification.
     clf_package : ClassifierPackage
@@ -142,7 +157,9 @@ def signal_quality_classification(
     Returns
     -------
     df_sqa pd.DataFrame
-        The DataFrame containing the PPG signal quality predictions (both probabilities of the PPG signal quality classification and the accelerometer label based on the threshold).
+        The DataFrame containing the PPG signal quality predictions (both
+        probabilities of the PPG signal quality classification and the
+        accelerometer label based on the threshold).
     """
     # Set classifier
     clf = clf_package.classifier  # Load the logistic regression classifier
@@ -152,16 +169,16 @@ def signal_quality_classification(
         df.loc[:, clf.feature_names_in]
     )  # Apply scaling to the features
-    # Make predictions for PPG signal quality assessment, and assign the probabilities to the DataFrame and drop the features
+    # Make predictions for PPG signal quality assessment, and assign the
+    # probabilities to the DataFrame and drop the features
     df[DataColumns.PRED_SQA_PROBA] = clf.predict_proba(scaled_features)[:, 0]
     keep_cols = [config.time_colname, DataColumns.PRED_SQA_PROBA]
     if DataColumns.ACC_POWER_RATIO in df.columns:
+        # Assign accelerometer label to the DataFrame based on the threshold
         df[DataColumns.PRED_SQA_ACC_LABEL] = (
             df[DataColumns.ACC_POWER_RATIO] < config.threshold_sqa_accelerometer
-        ).astype(
-            int
-        )  # Assign accelerometer label to the DataFrame based on the threshold
+        ).astype(int)
         keep_cols += [DataColumns.PRED_SQA_ACC_LABEL]
     return df[keep_cols]
@@ -252,9 +269,9 @@ def estimate_pulse_rate(
             config.kern_params,
         )
         n_pr = len(pr_est)  # Number of pulse rate estimates
-        end_idx_time = (
-            n_pr * step_size + start_idx
-        )  # Calculate end index for time, different from end_idx since it is always a multiple of step_size, while end_idx is not
+        # Calculate end index for time, different from end_idx since it is
+        # always a multiple of step_size, while end_idx is not
+        end_idx_time = n_pr * step_size + start_idx
         # Extract relative time for PR estimates
         pr_time = ppg_preprocessed[start_idx:end_idx_time:step_size, time_idx]
@@ -270,7 +287,7 @@ def estimate_pulse_rate(
 def aggregate_pulse_rate(
-    pr_values: np.ndarray, aggregates: List[str] = ["mode", "99p"]
+    pr_values: np.ndarray, aggregates: list[str] = ["mode", "99p"]
 ) -> dict:
     """
     Aggregate the pulse rate estimates using the specified aggregation methods.
@@ -280,7 +297,8 @@ def aggregate_pulse_rate(
     pr_values : np.ndarray
         The array containing the pulse rate estimates
     aggregates : List[str]
-        The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
+        The list of aggregation methods to be used for the pulse rate
+        estimates. The default is ['mode', '99p'].
     Returns
     -------
@@ -306,10 +324,12 @@ def aggregate_pulse_rate(
 def extract_temporal_domain_features(
     ppg_windowed: np.ndarray,
     config: PulseRateConfig,
-    quality_stats: List[str] = ["mean", "std"],
+    quality_stats: list[str] = ["mean", "std"],
 ) -> pd.DataFrame:
     """
-    Compute temporal domain features for the ppg signal. The features are added to the dataframe. Therefore the original dataframe is modified, and the modified dataframe is returned.
+    Compute temporal domain features for the ppg signal. The features are
+    added to the dataframe. Therefore the original dataframe is modified,
+    and the modified dataframe is returned.
     Parameters
     ----------
@@ -320,7 +340,8 @@ def extract_temporal_domain_features(
         The configuration object containing the parameters for the feature extraction
     quality_stats: list, optional
-        The statistics to be computed for the gravity component of the accelerometer signal (default: ['mean', 'std'])
+        The statistics to be computed for the gravity component of the
+        accelerometer signal (default: ['mean', 'std'])
     Returns
     -------
@@ -344,9 +365,11 @@ def extract_spectral_domain_features(
     config: PulseRateConfig,
 ) -> pd.DataFrame:
     """
-    Calculate the spectral features (dominant frequency, relative power, and spectral entropy)
-    for each segment of a PPG signal using a single Welch's method computation. The features are added to the dataframe.
-    Therefore the original dataframe is modified, and the modified dataframe is returned.
+    Calculate the spectral features (dominant frequency, relative power, and
+    spectral entropy) for each segment of a PPG signal using a single
+    Welch's method computation. The features are added to the dataframe.
+    Therefore the original dataframe is modified, and the modified dataframe
+    is returned.
     Parameters
     ----------
@@ -386,7 +409,10 @@ def extract_spectral_domain_features(
 def extract_acc_power_feature(
-    f1: np.ndarray, PSD_acc: np.ndarray, f2: np.ndarray, PSD_ppg: np.ndarray
+    f1: np.ndarray,
+    psd_acc: np.ndarray,
+    f2: np.ndarray,
+    psd_ppg: np.ndarray,
 ) -> np.ndarray:
     """
     Extract the accelerometer power feature in the PPG frequency range.
@@ -395,11 +421,11 @@ def extract_acc_power_feature(
     ----------
     f1: np.ndarray
         The frequency bins of the accelerometer signal.
-    PSD_acc: np.ndarray
+    psd_acc: np.ndarray
         The power spectral density of the accelerometer signal.
     f2: np.ndarray
         The frequency bins of the PPG signal.
-    PSD_ppg: np.ndarray
+    psd_ppg: np.ndarray
         The power spectral density of the PPG signal.
     Returns
@@ -409,32 +435,33 @@ def extract_acc_power_feature(
     """
     # Find the index of the maximum PSD value in the PPG signal
-    max_PPG_psd_idx = np.argmax(PSD_ppg, axis=1)
-    max_PPG_freq_psd = f2[max_PPG_psd_idx]
+    max_ppg_psd_idx = np.argmax(psd_ppg, axis=1)
+    max_ppg_freq_psd = f2[max_ppg_psd_idx]
     # Find the neighboring indices of the maximum PSD value in the PPG signal
     df_idx = np.column_stack(
-        (max_PPG_psd_idx - 1, max_PPG_psd_idx, max_PPG_psd_idx + 1)
+        (max_ppg_psd_idx - 1, max_ppg_psd_idx, max_ppg_psd_idx + 1)
     )
-    # Find the index of the closest frequency in the accelerometer signal to the first harmonic of the PPG frequency
-    corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_PPG_freq_psd * 2), axis=0)
+    # Find the index of the closest frequency in the accelerometer signal
+    # to the first harmonic of the PPG frequency
+    corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_ppg_freq_psd * 2), axis=0)
     fh_idx = np.column_stack(
         (corr_acc_psd_fh_idx - 1, corr_acc_psd_fh_idx, corr_acc_psd_fh_idx + 1)
     )
     # Compute the power in the ranges corresponding to the PPG frequency
-    acc_power_PPG_range = np.trapz(
-        PSD_acc[np.arange(PSD_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1
-    ) + np.trapz(
-        PSD_acc[np.arange(PSD_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1
+    acc_power_ppg_range = np.trapezoid(
+        psd_acc[np.arange(psd_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1
+    ) + np.trapezoid(
+        psd_acc[np.arange(psd_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1
     )
     # Compute the total power across the entire frequency range
-    acc_power_total = np.trapz(PSD_acc, f1)
+    acc_power_total = np.trapezoid(psd_acc, f1)
     # Compute the power ratio of the accelerometer signal in the PPG frequency range
-    acc_power_ratio = acc_power_PPG_range / acc_power_total
+    acc_power_ratio = acc_power_ppg_range / acc_power_total
     return acc_power_ratio
@@ -443,7 +470,8 @@ def extract_accelerometer_feature(
     acc_windowed: np.ndarray, ppg_windowed: np.ndarray, config: PulseRateConfig
 ) -> pd.DataFrame:
     """
-    Extract accelerometer features from the accelerometer signal in the PPG frequency range.
+    Extract accelerometer features from the accelerometer signal in the PPG
+    frequency range.
     Parameters
     ----------
@@ -493,3 +521,207 @@ def extract_accelerometer_feature(
     )
     return pd.DataFrame(acc_power_ratio, columns=["acc_power_ratio"])
+def run_pulse_rate_pipeline(
+    df_ppg_prepared: pd.DataFrame,
+    output_dir: str | Path,
+    store_intermediate: list[str] = [],
+    pulse_rate_config: PulseRateConfig | None = None,
+    ppg_config: PPGConfig | None = None,
+    logging_level: int = logging.INFO,
+    custom_logger: logging.Logger | None = None,
+) -> pd.DataFrame:
+    """
+    High-level pulse rate analysis pipeline for a single segment.
+    This function implements the complete pulse rate analysis workflow from the
+    pulse rate tutorial:
+    1. Preprocess PPG and accelerometer data (accelerometer is optional)
+    2. Extract signal quality features
+    3. Signal quality classification
+    4. Pulse rate estimation
+    5. Quantify pulse rate (select relevant columns)
+    Parameters
+    ----------
+    df_ppg_prepared : pd.DataFrame
+        Prepared sensor data with time and PPG column.
+    output_dir : str or Path
+        Output directory for intermediate results (required)
+    store_intermediate : list of str, default []
+        Which intermediate results to store.
+    pulse_rate_config : PulseRateConfig, optional
+        Pulse rate analysis configuration
+    ppg_config : PPGConfig, optional
+        PPG preprocessing configuration
+    logging_level : int, default logging.INFO
+        Logging level using standard logging constants
+    custom_logger : logging.Logger, optional
+        Custom logger instance
+    Returns
+    -------
+    pd.DataFrame
+        Quantified pulse rate data with columns:
+        - time: timestamp
+        - pulse_rate: pulse rate estimate
+        - signal_quality: quality assessment (if available)
+    """
+    # Setup logger
+    active_logger = (
+        custom_logger if custom_logger is not None else logging.getLogger(__name__)
+    )
+    if custom_logger is None:
+        active_logger.setLevel(logging_level)
+    if pulse_rate_config is None:
+        pulse_rate_config = PulseRateConfig()
+    if ppg_config is None:
+        ppg_config = PPGConfig()
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Validate input data columns (PPG is required, accelerometer is optional)
+    required_columns = [DataColumns.TIME, DataColumns.PPG]
+    missing_columns = [
+        col for col in required_columns if col not in df_ppg_prepared.columns
+    ]
+    if missing_columns:
+        active_logger.warning(
+            f"Missing required columns for pulse rate pipeline: {missing_columns}"
+        )
+        return pd.DataFrame()
+    # Step 1: Preprocess PPG and accelerometer data (following tutorial)
+    active_logger.info("Step 1: Preprocessing PPG and accelerometer data")
+    try:
+        # Separate PPG data (always available)
+        ppg_cols = [DataColumns.TIME, DataColumns.PPG]
+        df_ppg = df_ppg_prepared[ppg_cols].copy()
+        # Preprocess the data
+        df_ppg_proc, _ = preprocess_ppg_data(
+            df_ppg=df_ppg,
+            ppg_config=ppg_config,
+            verbose=1 if logging_level <= logging.INFO else 0,
+        )
+        if "preprocessing" in store_intermediate:
+            preprocessing_dir = output_dir / "preprocessing"
+            preprocessing_dir.mkdir(exist_ok=True)
+            df_ppg_proc.to_parquet(preprocessing_dir / "ppg_preprocessed.parquet")
+            active_logger.info(f"Saved preprocessed data to {preprocessing_dir}")
+    except Exception as e:
+        active_logger.error(f"Preprocessing failed: {e}")
+        return pd.DataFrame()
+    # Step 2: Extract signal quality features
+    active_logger.info("Step 2: Extracting signal quality features")
+    try:
+        df_features = extract_signal_quality_features(df_ppg_proc, pulse_rate_config)
+        if "pulse_rate" in store_intermediate:
+            pulse_rate_dir = output_dir / "pulse_rate"
+            pulse_rate_dir.mkdir(exist_ok=True)
+            df_features.to_parquet(pulse_rate_dir / "signal_quality_features.parquet")
+            active_logger.info(f"Saved signal quality features to {pulse_rate_dir}")
+    except Exception as e:
+        active_logger.error(f"Feature extraction failed: {e}")
+        return pd.DataFrame()
+    # Step 3: Signal quality classification
+    active_logger.info("Step 3: Signal quality classification")
+    try:
+        classifier_path = files("paradigma.assets") / "ppg_quality_clf_package.pkl"
+        classifier_package = ClassifierPackage.load(classifier_path)
+        df_classified = signal_quality_classification(
+            df_features, pulse_rate_config, classifier_package
+        )
+    except Exception as e:
+        active_logger.error(f"Signal quality classification failed: {e}")
+        return pd.DataFrame()
+    # Step 4: Pulse rate estimation
+    active_logger.info("Step 4: Pulse rate estimation")
+    try:
+        df_pulse_rates = estimate_pulse_rate(
+            df_sqa=df_classified,
+            df_ppg_preprocessed=df_ppg_proc,
+            config=pulse_rate_config,
+        )
+    except Exception as e:
+        active_logger.error(f"Pulse rate estimation failed: {e}")
+        return pd.DataFrame()
+    # Step 5: Quantify pulse rate (select relevant columns and apply quality filtering)
+    active_logger.info("Step 5: Quantifying pulse rate")
+    # Select quantification columns
+    quantification_columns = []
+    if DataColumns.TIME in df_pulse_rates.columns:
+        quantification_columns.append(DataColumns.TIME)
+    if DataColumns.PULSE_RATE in df_pulse_rates.columns:
+        quantification_columns.append(DataColumns.PULSE_RATE)
+    if "signal_quality" in df_pulse_rates.columns:
+        quantification_columns.append("signal_quality")
+    # Use available columns
+    available_columns = [
+        col for col in quantification_columns if col in df_pulse_rates.columns
+    ]
+    if not available_columns:
+        active_logger.warning("No valid quantification columns found")
+        return pd.DataFrame()
+    df_quantification = df_pulse_rates[available_columns].copy()
+    # Apply quality filtering if signal quality is available
+    if (
+        "signal_quality" in df_quantification.columns
+        and DataColumns.PULSE_RATE in df_quantification.columns
+    ):
+        quality_threshold = getattr(pulse_rate_config, "threshold_sqa", 0.5)
+        low_quality_mask = df_quantification["signal_quality"] < quality_threshold
+        df_quantification.loc[low_quality_mask, DataColumns.PULSE_RATE] = np.nan
+    if "quantification" in store_intermediate:
+        quantification_dir = output_dir / "quantification"
+        quantification_dir.mkdir(exist_ok=True)
+        df_quantification.to_parquet(
+            quantification_dir / "pulse_rate_quantification.parquet"
+        )
+        # Save quantification metadata
+        valid_pulse_rates = (
+            df_quantification[DataColumns.PULSE_RATE].dropna()
+            if DataColumns.PULSE_RATE in df_quantification.columns
+            else pd.Series(dtype=float)
+        )
+        quantification_meta = {
+            "total_windows": len(df_quantification),
+            "valid_pulse_rate_estimates": len(valid_pulse_rates),
+            "columns": list(df_quantification.columns),
+        }
+        with open(quantification_dir / "pulse_rate_quantification_meta.json", "w") as f:
+            json.dump(quantification_meta, f, indent=2)
+        active_logger.info(f"Saved pulse rate quantification to {quantification_dir}")
+    pulse_rate_estimates = (
+        len(df_quantification[DataColumns.PULSE_RATE].dropna())
+        if DataColumns.PULSE_RATE in df_quantification.columns
+        else 0
+    )
+    active_logger.info(
+        f"Pulse rate analysis completed: {pulse_rate_estimates} valid pulse "
+        f"rate estimates from {len(df_quantification)} total windows"
+    )
+    return df_quantification

paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

paradigma 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl