PyPI - paradigma - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

paradigma 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

paradigma/classification.py +28 -11
paradigma/config.py +158 -101
paradigma/constants.py +39 -34
paradigma/feature_extraction.py +270 -211
paradigma/pipelines/gait_pipeline.py +286 -190
paradigma/pipelines/pulse_rate_pipeline.py +202 -133
paradigma/pipelines/pulse_rate_utils.py +144 -142
paradigma/pipelines/tremor_pipeline.py +139 -95
paradigma/preprocessing.py +179 -110
paradigma/segmenting.py +138 -113
paradigma/testing.py +359 -172
paradigma/util.py +171 -80
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/METADATA +39 -36
paradigma-1.0.4.dist-info/RECORD +23 -0
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
paradigma-1.0.4.dist-info/entry_points.txt +4 -0
{paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
paradigma-1.0.2.dist-info/RECORD +0 -22

paradigma/pipelines/tremor_pipeline.py CHANGED Viewed

@@ -1,21 +1,26 @@
-import pandas as pd
-import numpy as np
 from pathlib import Path
+import numpy as np
+import pandas as pd
 from scipy import signal
-from scipy.stats import gaussian_kde
 from paradigma.classification import ClassifierPackage
-from paradigma.constants import DataColumns
 from paradigma.config import TremorConfig
-from paradigma.feature_extraction import compute_mfccs, compute_power_in_bandwidth, compute_total_power, extract_frequency_peak, \
-    extract_tremor_power
-from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
+from paradigma.constants import DataColumns
+from paradigma.feature_extraction import (
+    compute_mfccs,
+    compute_power_in_bandwidth,
+    compute_total_power,
+    extract_frequency_peak,
+    extract_tremor_power,
+)
+from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
 from paradigma.util import aggregate_parameter
 def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFrame:
     """
-    This function groups sequences of timestamps into windows and subsequently extracts
+    This function groups sequences of timestamps into windows and subsequently extracts
     tremor features from windowed gyroscope data.
     Parameters
@@ -32,7 +37,7 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
     -------
     pd.DataFrame
         A DataFrame containing extracted tremor features and a column corresponding to time.
     Notes
     -----
     - This function groups the data into windows based on timestamps.
@@ -44,21 +49,27 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
         If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
     """
     # group sequences of timestamps into windows
-    windowed_cols = [DataColumns.TIME] + config.gyroscope_cols
-    windowed_data = tabulate_windows(df, windowed_cols, config.window_length_s, config.window_step_length_s, config.sampling_frequency)
+    windowed_colnames = [config.time_colname] + config.gyroscope_colnames
+    windowed_data = tabulate_windows(
+        df,
+        windowed_colnames,
+        config.window_length_s,
+        config.window_step_length_s,
+        config.sampling_frequency,
+    )
-    extractor = WindowedDataExtractor(windowed_cols)
+    extractor = WindowedDataExtractor(windowed_colnames)
     # Extract the start time and gyroscope data from the windowed data
-    idx_time = extractor.get_index(DataColumns.TIME)
-    idx_gyro = extractor.get_slice(config.gyroscope_cols)
+    idx_time = extractor.get_index(config.time_colname)
+    idx_gyro = extractor.get_slice(config.gyroscope_colnames)
     # Extract data
     start_time = np.min(windowed_data[:, :, idx_time], axis=1)
     windowed_gyro = windowed_data[:, :, idx_gyro]
-    df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
+    df_features = pd.DataFrame(start_time, columns=[config.time_colname])
     # transform the signals from the temporal domain to the spectral domain and extract tremor features
     df_spectral_features = extract_spectral_domain_features(windowed_gyro, config)
@@ -68,7 +79,9 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
     return df_features
-def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
+def detect_tremor(
+    df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path
+) -> pd.DataFrame:
     """
     Detects tremor in the input DataFrame using a pre-trained classifier and applies a threshold to the predicted probabilities.
@@ -130,26 +143,39 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
     X = df.loc[:, feature_names_predictions].copy()
     X.loc[:, feature_names_scaling] = scaled_features
-    # Get the tremor probability
+    # Get the tremor probability
     df[DataColumns.PRED_TREMOR_PROBA] = clf_package.predict_proba(X)
     # Make prediction based on pre-defined threshold
-    df[DataColumns.PRED_TREMOR_LOGREG] = (df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold).astype(int)
+    df[DataColumns.PRED_TREMOR_LOGREG] = (
+        df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold
+    ).astype(int)
+    # Perform extra checks for rest tremor
+    peak_check = (df["freq_peak"] >= config.fmin_rest_tremor) & (
+        df["freq_peak"] <= config.fmax_rest_tremor
+    )  # peak within 3-7 Hz
+    df[DataColumns.PRED_ARM_AT_REST] = (
+        df["below_tremor_power"] <= config.movement_threshold
+    ).astype(
+        int
+    )  # arm at rest or in stable posture
+    df[DataColumns.PRED_TREMOR_CHECKED] = (
+        (df[DataColumns.PRED_TREMOR_LOGREG] == 1)
+        & peak_check
+        & df[DataColumns.PRED_ARM_AT_REST]
+    ).astype(int)
-    # Perform extra checks for rest tremor
-    peak_check = (df['freq_peak'] >= config.fmin_rest_tremor) & (df['freq_peak']<=config.fmax_rest_tremor) # peak within 3-7 Hz
-    df[DataColumns.PRED_ARM_AT_REST] = (df['below_tremor_power'] <= config.movement_threshold).astype(int) # arm at rest or in stable posture
-    df[DataColumns.PRED_TREMOR_CHECKED] = ((df[DataColumns.PRED_TREMOR_LOGREG]==1) & (peak_check==True) & (df[DataColumns.PRED_ARM_AT_REST] == True)).astype(int)
     return df
 def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
     """
     Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
-    Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
+    Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
     without significant non-tremor movement (at rest). For tremor power the following aggregates are derived:
-    the median, mode and percentile of tremor power specified in the configuration object.
+    the median, mode and percentile of tremor power specified in the configuration object.
     Parameters
     ----------
     df : pd.DataFrame
@@ -170,61 +196,69 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
     - Tremor power is converted to log scale, after adding a constant of 1, so that zero tremor power
     corresponds to a value of 0 in log scale.
     - The modal tremor power is computed based on gaussian kernel density estimation.
     """
-    nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
-    nr_windows_total = df.shape[0] # number of windows in the input dataframe
+    nr_valid_days = (
+        df["time_dt"].dt.date.unique().size
+    )  # number of valid days in the input dataframe
+    nr_windows_total = df.shape[0]  # number of windows in the input dataframe
     # remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
     df_filtered = df.loc[df.pred_arm_at_rest == 1]
-    nr_windows_rest = df_filtered.shape[0] # number of windows without non-tremor arm movement
+    nr_windows_rest = df_filtered.shape[
+        0
+    ]  # number of windows without non-tremor arm movement
-    if nr_windows_rest == 0: # if no windows without non-tremor arm movement are detected
-        raise Warning('No windows without non-tremor arm movement are detected.')
+    if (
+        nr_windows_rest == 0
+    ):  # if no windows without non-tremor arm movement are detected
+        raise Warning("No windows without non-tremor arm movement are detected.")
     # calculate tremor time
-    n_windows_tremor = np.sum(df_filtered['pred_tremor_checked'])
-    perc_windows_tremor = n_windows_tremor / nr_windows_rest * 100 # as percentage of total measured time without non-tremor arm movement
+    n_windows_tremor = np.sum(df_filtered["pred_tremor_checked"])
+    perc_windows_tremor = (
+        n_windows_tremor / nr_windows_rest * 100
+    )  # as percentage of total measured time without non-tremor arm movement
-    aggregated_tremor_power = {} # initialize dictionary to store aggregated tremor power measures
-    if n_windows_tremor == 0: # if no tremor is detected, the tremor power measures are set to NaN
+    aggregated_tremor_power = (
+        {}
+    )  # initialize dictionary to store aggregated tremor power measures
-        aggregated_tremor_power['median_tremor_power'] = np.nan
-        aggregated_tremor_power['modal_tremor_power'] = np.nan
-        aggregated_tremor_power['90p_tremor_power'] = np.nan
+    if (
+        n_windows_tremor == 0
+    ):  # if no tremor is detected, the tremor power measures are set to NaN
+        aggregated_tremor_power["median_tremor_power"] = np.nan
+        aggregated_tremor_power["mode_binned_tremor_power"] = np.nan
+        aggregated_tremor_power["90p_tremor_power"] = np.nan
     else:
         # calculate aggregated tremor power measures
-        tremor_power = df_filtered.loc[df_filtered['pred_tremor_checked'] == 1, 'tremor_power']
-        tremor_power = np.log10(tremor_power+1) # convert to log scale
+        tremor_power = df_filtered.loc[
+            df_filtered["pred_tremor_checked"] == 1, "tremor_power"
+        ]
+        tremor_power = np.log10(tremor_power + 1)  # convert to log scale
         for aggregate in config.aggregates_tremor_power:
             aggregate_name = f"{aggregate}_tremor_power"
-            if aggregate == 'mode':
-                # calculate modal tremor power
-                bin_edges = np.linspace(0, 6, 301)
-                kde = gaussian_kde(tremor_power)
-                kde_values = kde(bin_edges)
-                max_index = np.argmax(kde_values)
-                aggregated_tremor_power['modal_tremor_power'] = bin_edges[max_index]
-            else: # calculate te other aggregates (e.g. median and 90th percentile) of tremor power
-                aggregated_tremor_power[aggregate_name] = aggregate_parameter(tremor_power, aggregate)
+            aggregated_tremor_power[aggregate_name] = aggregate_parameter(
+                tremor_power, aggregate, config.evaluation_points_tremor_power
+            )
     # store aggregates in json format
     d_aggregates = {
-        'metadata': {
-            'nr_valid_days': nr_valid_days,
-            'nr_windows_total': nr_windows_total,
-            'nr_windows_rest': nr_windows_rest
+        "metadata": {
+            "nr_valid_days": nr_valid_days,
+            "nr_windows_total": nr_windows_total,
+            "nr_windows_rest": nr_windows_rest,
+        },
+        "aggregated_tremor_measures": {
+            "perc_windows_tremor": perc_windows_tremor,
+            "median_tremor_power": aggregated_tremor_power["median_tremor_power"],
+            "modal_tremor_power": aggregated_tremor_power["mode_binned_tremor_power"],
+            "90p_tremor_power": aggregated_tremor_power["90p_tremor_power"],
         },
-        'aggregated_tremor_measures': {
-            'perc_windows_tremor': perc_windows_tremor,
-            'median_tremor_power': aggregated_tremor_power['median_tremor_power'],
-            'modal_tremor_power': aggregated_tremor_power['modal_tremor_power'],
-            '90p_tremor_power': aggregated_tremor_power['90p_tremor_power']
-        }
     }
     return d_aggregates
@@ -234,7 +268,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     """
     Compute spectral domain features from the gyroscope data.
-    This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
+    This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
     the tremor power, and the below tremor power based on the total power spectral density of the windowed gyroscope data.
     Parameters
@@ -242,15 +276,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     data : numpy.ndarray
         A 2D numpy array where each row corresponds to a window of gyroscope data.
     config : object
-        Configuration object containing settings such as sampling frequency, window type,
+        Configuration object containing settings such as sampling frequency, window type,
         and MFCC parameters.
     Returns
     -------
     pd.DataFrame
-        The feature dataframe containing the extracted spectral features, including
+        The feature dataframe containing the extracted spectral features, including
         MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
     """
     # Initialize a dictionary to hold the results
@@ -262,7 +296,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     segment_length_spectrogram_s = config.segment_length_spectrogram_s
     overlap_fraction = config.overlap_fraction
     spectral_resolution = config.spectral_resolution
-    window_type = 'hann'
+    window_type = "hann"
     # Compute the power spectral density
     segment_length_n = sampling_frequency * segment_length_psd_s
@@ -271,15 +305,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     nfft = sampling_frequency / spectral_resolution
     freqs, psd = signal.welch(
-        x=data,
-        fs=sampling_frequency,
-        window=window,
+        x=data,
+        fs=sampling_frequency,
+        window=window,
         nperseg=segment_length_n,
-        noverlap=overlap_n,
-        nfft=nfft,
-        detrend=False,
-        scaling='density',
-        axis=1
+        noverlap=overlap_n,
+        nfft=nfft,
+        detrend=False,
+        scaling="density",
+        axis=1,
     )
     # Compute the spectrogram
@@ -288,18 +322,18 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     window = signal.get_window(window_type, segment_length_n)
     f, t, S1 = signal.stft(
-        x=data,
-        fs=sampling_frequency,
-        window=window,
-        nperseg=segment_length_n,
+        x=data,
+        fs=sampling_frequency,
+        window=window,
+        nperseg=segment_length_n,
         noverlap=overlap_n,
         boundary=None,
-        axis=1
+        axis=1,
     )
     # Compute total power in the PSD and the total spectrogram (summed over the three axes)
     total_psd = compute_total_power(psd)
-    total_spectrogram = np.sum(np.abs(S1)*sampling_frequency, axis=2)
+    total_spectrogram = np.sum(np.abs(S1) * sampling_frequency, axis=2)
     # Compute the MFCC's
     config.mfcc_low_frequency = config.fmin_mfcc
@@ -310,21 +344,31 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
     mfccs = compute_mfccs(
         total_power_array=total_spectrogram,
         config=config,
-        total_power_type='spectrogram',
-        rounding_method='round',
-        multiplication_factor=1
+        total_power_type="spectrogram",
+        rounding_method="round",
+        multiplication_factor=1,
     )
     # Combine the MFCCs into the features DataFrame
-    mfcc_colnames = [f'mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
+    mfcc_colnames = [f"mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)]
     for i, colname in enumerate(mfcc_colnames):
         feature_dict[colname] = mfccs[:, i]
     # Compute the frequency of the peak, non-tremor power and tremor power
-    feature_dict['freq_peak'] = extract_frequency_peak(freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search)
-    feature_dict['below_tremor_power'] = compute_power_in_bandwidth(freqs, total_psd, config.fmin_below_rest_tremor, config.fmax_below_rest_tremor,
-                                                                include_max=False, spectral_resolution=config.spectral_resolution,
-                                                                cumulative_sum_method='sum')
-    feature_dict['tremor_power'] = extract_tremor_power(freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor)
+    feature_dict["freq_peak"] = extract_frequency_peak(
+        freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search
+    )
+    feature_dict["below_tremor_power"] = compute_power_in_bandwidth(
+        freqs,
+        total_psd,
+        config.fmin_below_rest_tremor,
+        config.fmax_below_rest_tremor,
+        include_max=False,
+        spectral_resolution=config.spectral_resolution,
+        cumulative_sum_method="sum",
+    )
+    feature_dict["tremor_power"] = extract_tremor_power(
+        freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor
+    )
-    return pd.DataFrame(feature_dict)
+    return pd.DataFrame(feature_dict)

paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

paradigma 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl