PyPI - paradigma - Versions diffs - 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

paradigma 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

paradigma/__init__.py +10 -1
paradigma/classification.py +38 -21
paradigma/config.py +187 -123
paradigma/constants.py +48 -35
paradigma/feature_extraction.py +345 -255
paradigma/load.py +476 -0
paradigma/orchestrator.py +670 -0
paradigma/pipelines/gait_pipeline.py +685 -246
paradigma/pipelines/pulse_rate_pipeline.py +456 -155
paradigma/pipelines/pulse_rate_utils.py +289 -248
paradigma/pipelines/tremor_pipeline.py +405 -132
paradigma/prepare_data.py +409 -0
paradigma/preprocessing.py +500 -163
paradigma/segmenting.py +180 -140
paradigma/testing.py +370 -178
paradigma/util.py +190 -101
paradigma-1.1.0.dist-info/METADATA +229 -0
paradigma-1.1.0.dist-info/RECORD +26 -0
{paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
paradigma-1.1.0.dist-info/entry_points.txt +4 -0
{paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info/licenses}/LICENSE +0 -1
paradigma-1.0.3.dist-info/METADATA +0 -138
paradigma-1.0.3.dist-info/RECORD +0 -22

paradigma/util.py CHANGED Viewed

@@ -1,17 +1,45 @@
+import functools
 import os
+import warnings
+from datetime import datetime, timedelta
 import numpy as np
 import pandas as pd
-from datetime import datetime, timedelta
+import tsdf
 from dateutil import parser
-from typing import List, Tuple, Optional
 from scipy.stats import gaussian_kde
-import tsdf
 from tsdf import TSDFMetadata
 from paradigma.constants import DataColumns, TimeUnit
+def deprecated(reason: str = ""):
+    """
+    Decorator to mark functions as deprecated. It will show a warning when the
+    function is used.
+    Parameters
+    ----------
+    reason : str, optional
+        Additional message to explain why it is deprecated and what to use
+        instead.
+    """
+    def decorator(func):
+        message = f"Function {func.__name__} is deprecated."
+        if reason:
+            message += f" {reason}"
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            warnings.warn(message, category=DeprecationWarning, stacklevel=2)
+            return func(*args, **kwargs)
+        return wrapper
+    return decorator
 def parse_iso8601_to_datetime(date_str):
     return parser.parse(date_str)
@@ -28,7 +56,7 @@ def get_end_iso8601(start_iso8601, window_length_seconds):
 def write_np_data(
     metadata_time: TSDFMetadata,
-    np_array_time: np.ndarray,
+    np_array_time: np.ndarray,
     metadata_values: TSDFMetadata,
     np_array_values: np.ndarray,
     output_path: str,
@@ -53,7 +81,7 @@ def write_np_data(
         The filename for the metadata.
     """
     if not os.path.exists(output_path):
         os.makedirs(output_path)
@@ -62,9 +90,19 @@ def write_np_data(
     metadata_values.file_dir_path = output_path
     # store binaries and metadata
-    time_tsdf = tsdf.write_binary_file(file_dir=output_path, file_name=metadata_time.file_name, data=np_array_time, metadata=metadata_time.get_plain_tsdf_dict_copy())
+    time_tsdf = tsdf.write_binary_file(
+        file_dir=output_path,
+        file_name=metadata_time.file_name,
+        data=np_array_time,
+        metadata=metadata_time.get_plain_tsdf_dict_copy(),
+    )
-    samples_tsdf = tsdf.write_binary_file(file_dir=output_path, file_name=metadata_values.file_name, data=np_array_values, metadata=metadata_values.get_plain_tsdf_dict_copy())
+    samples_tsdf = tsdf.write_binary_file(
+        file_dir=output_path,
+        file_name=metadata_values.file_name,
+        data=np_array_values,
+        metadata=metadata_values.get_plain_tsdf_dict_copy(),
+    )
     tsdf.write_metadata([time_tsdf, samples_tsdf], output_filename)
@@ -118,7 +156,7 @@ def write_df_data(
 def read_metadata(
     input_path: str, meta_filename: str, time_filename: str, values_filename: str
-) -> Tuple[TSDFMetadata, TSDFMetadata]:
+) -> tuple[TSDFMetadata, TSDFMetadata]:
     metadata_dict = tsdf.load_metadata_from_path(
         os.path.join(input_path, meta_filename)
     )
@@ -127,20 +165,30 @@ def read_metadata(
     return metadata_time, metadata_values
-def load_tsdf_dataframe(path_to_data, prefix, meta_suffix='meta.json', time_suffix='time.bin', values_suffix='values.bin'):
+def load_tsdf_dataframe(
+    path_to_data,
+    prefix,
+    meta_suffix="meta.json",
+    time_suffix="time.bin",
+    values_suffix="values.bin",
+):
     meta_filename = f"{prefix}_{meta_suffix}"
     time_filename = f"{prefix}_{time_suffix}"
     values_filename = f"{prefix}_{values_suffix}"
-    metadata_time, metadata_values = read_metadata(path_to_data, meta_filename, time_filename, values_filename)
-    df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns)
+    metadata_time, metadata_values = read_metadata(
+        path_to_data, meta_filename, time_filename, values_filename
+    )
+    df = tsdf.load_dataframe_from_binaries(
+        [metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns
+    )
     return df, metadata_time, metadata_values
 def load_metadata_list(
-    dir_path: str, meta_filename: str, filenames: List[str]
-) -> List[TSDFMetadata]:
+    dir_path: str, meta_filename: str, filenames: list[str]
+) -> list[TSDFMetadata]:
     """
     Load the metadata objects from a metadata file according to the specified binaries.
@@ -152,11 +200,9 @@ def load_metadata_list(
         The filename of the metadata file.
     filenames : List[str]
         The list of binary files of which the metadata files need to be loaded
-    """
-    metadata_dict = tsdf.load_metadata_from_path(
-        os.path.join(dir_path, meta_filename)
-    )
+    """
+    metadata_dict = tsdf.load_metadata_from_path(os.path.join(dir_path, meta_filename))
     metadata_list = []
     for filename in filenames:
         metadata_list.append(metadata_dict[filename])
@@ -171,7 +217,8 @@ def transform_time_array(
     start_time: float = 0.0,
 ) -> np.ndarray:
     """
-    Transforms the time array to relative time (when defined in delta time) and scales the values.
+    Transforms the time array to relative time (when defined in delta time)
+    and scales the values.
     Parameters
     ----------
@@ -180,7 +227,8 @@ def transform_time_array(
     input_unit_type : str
         The time unit type of the input time array.
     output_unit_type : str
-        The time unit type of the output time array. ParaDigMa expects `TimeUnit.RELATIVE_S`.
+        The time unit type of the output time array. ParaDigMa expects
+        `TimeUnit.RELATIVE_S`.
     start_time : float, optional
         The start time of the time array in UNIX seconds (default is 0.0)
@@ -191,41 +239,65 @@ def transform_time_array(
     Notes
     -----
-    - The function handles different time units (`TimeUnit.RELATIVE_MS`, `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`, `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
-    - The transformation allows for scaling of the time array, converting between time unit types (e.g., relative, absolute, or difference).
-    - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the relative time starting from the provided or default start time.
+    - The function handles different time units (`TimeUnit.RELATIVE_MS`,
+      `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`,
+      `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
+    - The transformation allows for scaling of the time array, converting
+      between time unit types (e.g., relative, absolute, or difference).
+    - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the
+      relative time starting from the provided or default start time.
     """
-    input_units = input_unit_type.split('_')[-1].lower()
-    output_units = output_unit_type.split('_')[-1].lower()
+    input_units = input_unit_type.split("_")[-1].lower()
+    output_units = output_unit_type.split("_")[-1].lower()
     if input_units == output_units:
         scale_factor = 1
-    elif input_units == 's' and output_units == 'ms':
+    elif input_units == "s" and output_units == "ms":
         scale_factor = 1e3
-    elif input_units == 'ms' and output_units == 's':
+    elif input_units == "ms" and output_units == "s":
         scale_factor = 1 / 1e3
     else:
-        raise ValueError(f"Unsupported time units conversion: {input_units} to {output_units}")
-    # Transform to relative time (`TimeUnit.RELATIVE_MS`)
-    if input_unit_type == TimeUnit.DIFFERENCE_MS or input_unit_type == TimeUnit.DIFFERENCE_S:
-    # Convert a series of differences into cumulative sum to reconstruct original time series.
+        raise ValueError(
+            f"Unsupported time units conversion: {input_units} to {output_units}"
+        )
+    # Transform to relative time (`TimeUnit.RELATIVE_MS`)
+    if (
+        input_unit_type == TimeUnit.DIFFERENCE_MS
+        or input_unit_type == TimeUnit.DIFFERENCE_S
+    ):
+        # Convert a series of differences into cumulative sum to
+        # reconstruct original time series.
         time_array = np.cumsum(np.double(time_array))
-    elif input_unit_type == TimeUnit.ABSOLUTE_MS or input_unit_type == TimeUnit.ABSOLUTE_S:
+    elif (
+        input_unit_type == TimeUnit.ABSOLUTE_MS
+        or input_unit_type == TimeUnit.ABSOLUTE_S
+    ):
         # Set the start time if not provided.
         if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
             start_time = time_array[0]
         # Convert absolute time stamps into a time series relative to start_time.
-        time_array = (time_array - start_time)
-    # Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
-    if output_unit_type == TimeUnit.ABSOLUTE_MS or output_unit_type == TimeUnit.ABSOLUTE_S:
+        time_array = time_array - start_time
+    # Transform the time array from `TimeUnit.RELATIVE_MS` to the
+    # specified time unit type
+    if (
+        output_unit_type == TimeUnit.ABSOLUTE_MS
+        or output_unit_type == TimeUnit.ABSOLUTE_S
+    ):
         # Converts time array to absolute time by adding the start time to each element.
         time_array = time_array + start_time
-    elif output_unit_type == TimeUnit.DIFFERENCE_MS or output_unit_type == TimeUnit.DIFFERENCE_S:
-        # Creates a new array starting with 0, followed by the differences between consecutive elements.
+    elif (
+        output_unit_type == TimeUnit.DIFFERENCE_MS
+        or output_unit_type == TimeUnit.DIFFERENCE_S
+    ):
+        # Creates a new array starting with 0, followed by the
+        # differences between consecutive elements.
         time_array = np.diff(np.insert(time_array, 0, start_time))
-    elif output_unit_type == TimeUnit.RELATIVE_MS or output_unit_type == TimeUnit.RELATIVE_S:
+    elif (
+        output_unit_type == TimeUnit.RELATIVE_MS
+        or output_unit_type == TimeUnit.RELATIVE_S
+    ):
         # The array is already in relative format, do nothing.
         pass
@@ -256,25 +328,25 @@ def convert_units_accelerometer(data: np.ndarray, units: str) -> np.ndarray:
         return data
     else:
         raise ValueError(f"Unsupported unit: {units}")
 def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
     """
     Convert gyroscope data to deg/s.
     Parameters
     ----------
     data : np.ndarray
         The gyroscope data.
     units : str
         The unit of the data (currently supports deg/s and rad/s).
     Returns
     -------
     np.ndarray
         The gyroscope data in deg/s.
     """
     if units == "deg/s":
         return data
@@ -282,9 +354,9 @@ def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
         return np.degrees(data)
     else:
         raise ValueError(f"Unsupported unit: {units}")
-def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
+def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
     """
     Invert the data based on the watch side.
@@ -305,78 +377,88 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
     """
     if side not in ["left", "right"]:
         raise ValueError(f"Unsupported side: {side}")
-    if sensor not in ['accelerometer', 'gyroscope', 'both']:
+    if sensor not in ["accelerometer", "gyroscope", "both"]:
         raise ValueError(f"Unsupported sensor: {sensor}")
     elif side == "right":
-        if sensor in ['gyroscope', 'both']:
+        if sensor in ["gyroscope", "both"]:
             df[DataColumns.GYROSCOPE_Y] *= -1
             df[DataColumns.GYROSCOPE_Z] *= -1
-        if sensor in ['accelerometer', 'both']:
+        if sensor in ["accelerometer", "both"]:
             df[DataColumns.ACCELEROMETER_X] *= -1
     return df
-def aggregate_parameter(parameter: np.ndarray, aggregate: str, evaluation_points: Optional[np.ndarray] = None) -> np.ndarray | int:
+def aggregate_parameter(
+    parameter: np.ndarray,
+    aggregate: str,
+    evaluation_points: np.ndarray | None = None,
+) -> np.ndarray | int:
     """
     Aggregate a parameter based on the specified method.
     Parameters
     ----------
     parameter : np.ndarray
         The parameter to aggregate.
     aggregate : str
         The aggregation method to apply.
     evaluation_points : np.ndarray, optional
-        Should be specified if the mode is derived for a continuous parameter.
-        Defines the evaluation points for the kernel density estimation function, from which the maximum is derived as the mode.
+        Should be specified if the mode is derived for a continuous parameter.
+        Defines the evaluation points for the kernel density estimation
+        function, from which the maximum is derived as the mode.
     Returns
     -------
     np.ndarray
         The aggregated parameter.
     """
-    if aggregate == 'mean':
+    if aggregate == "mean":
         return np.mean(parameter)
-    elif aggregate == 'median':
+    elif aggregate == "median":
         return np.median(parameter)
-    elif aggregate == 'mode_binned':
+    elif aggregate == "mode_binned":
         if evaluation_points is None:
-            raise ValueError("evaluation_points must be provided for 'mode_binned' aggregation.")
+            raise ValueError(
+                "evaluation_points must be provided for 'mode_binned' aggregation."
+            )
         else:
             kde = gaussian_kde(parameter)
             kde_values = kde(evaluation_points)
             max_index = np.argmax(kde_values)
             return evaluation_points[max_index]
-    elif aggregate == 'mode':
+    elif aggregate == "mode":
         unique_values, counts = np.unique(parameter, return_counts=True)
         return unique_values[np.argmax(counts)]
-    elif aggregate == '90p':
+    elif aggregate == "90p":
         return np.percentile(parameter, 90)
-    elif aggregate == '95p':
+    elif aggregate == "95p":
         return np.percentile(parameter, 95)
-    elif aggregate == '99p':
+    elif aggregate == "99p":
         return np.percentile(parameter, 99)
-    elif aggregate == 'std':
+    elif aggregate == "std":
         return np.std(parameter)
-    elif aggregate == 'cov':
+    elif aggregate == "cov":
         mean_value = np.mean(parameter)
         return np.std(parameter) / mean_value if mean_value != 0 else 0
     else:
         raise ValueError(f"Invalid aggregation method: {aggregate}")
 def merge_predictions_with_timestamps(
-        df_ts: pd.DataFrame,
-        df_predictions: pd.DataFrame,
-        pred_proba_colname: str,
-        window_length_s: float,
-        fs: int
-    ) -> pd.DataFrame:
+    df_ts: pd.DataFrame,
+    df_predictions: pd.DataFrame,
+    pred_proba_colname: str,
+    window_length_s: float,
+    fs: int,
+) -> pd.DataFrame:
     """
-    Merges prediction probabilities with timestamps by expanding overlapping windows
-    into individual timestamps and averaging probabilities per unique timestamp.
+    Merges prediction probabilities with timestamps by expanding overlapping
+    windows into individual timestamps and averaging probabilities per unique
+    timestamp.
     Parameters:
     ----------
@@ -385,10 +467,11 @@ def merge_predictions_with_timestamps(
         Must include the timestamp column specified in `DataColumns.TIME`.
     df_predictions : pd.DataFrame
-        DataFrame containing prediction windows with start times and probabilities.
-        Must include:
+        DataFrame containing prediction windows with start times and
+        probabilities. Must include:
         - A column for window start times (defined by `DataColumns.TIME`).
-        - A column for prediction probabilities (defined by `DataColumns.PRED_GAIT_PROBA`).
+        - A column for prediction probabilities (defined by
+          `DataColumns.PRED_GAIT_PROBA`).
     pred_proba_colname : str
         The column name for the prediction probabilities in `df_predictions`.
@@ -398,7 +481,7 @@ def merge_predictions_with_timestamps(
     fs : int
         The sampling frequency of the data.
     Returns:
     -------
     pd.DataFrame
@@ -419,22 +502,18 @@ def merge_predictions_with_timestamps(
     # Step 1: Generate all timestamps for prediction windows using NumPy broadcasting
     window_length = int(window_length_s * fs)
     timestamps = (
-        df_predictions[DataColumns.TIME].values[:, None] +
-        np.arange(0, window_length) / fs
+        df_predictions[DataColumns.TIME].values[:, None]
+        + np.arange(0, window_length) / fs
     )
     # Flatten timestamps and probabilities into a single array for efficient processing
     flat_timestamps = timestamps.ravel()
-    flat_proba = np.repeat(
-        df_predictions[pred_proba_colname].values,
-        window_length
-    )
+    flat_proba = np.repeat(df_predictions[pred_proba_colname].values, window_length)
     # Step 2: Create a DataFrame for expanded data
-    expanded_df = pd.DataFrame({
-        DataColumns.TIME: flat_timestamps,
-        pred_proba_colname: flat_proba
-    })
+    expanded_df = pd.DataFrame(
+        {DataColumns.TIME: flat_timestamps, pred_proba_colname: flat_proba}
+    )
     # Step 3: Round timestamps and aggregate probabilities
     expanded_df[DataColumns.TIME] = expanded_df[DataColumns.TIME].round(2)
@@ -442,14 +521,15 @@ def merge_predictions_with_timestamps(
     # Step 4: Round timestamps in `df_ts` and merge
     df_ts[DataColumns.TIME] = df_ts[DataColumns.TIME].round(2)
-    df_ts = pd.merge(df_ts, mean_proba, how='left', on=DataColumns.TIME)
+    df_ts = pd.merge(df_ts, mean_proba, how="left", on=DataColumns.TIME)
     df_ts = df_ts.dropna(subset=[pred_proba_colname])
     return df_ts
-def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
+def select_hours(
+    df: pd.DataFrame, select_hours_start: str, select_hours_end: str
+) -> pd.DataFrame:
     """
     Select hours of interest from the data to include in the aggregation step.
@@ -460,7 +540,7 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
     select_hours_start: str
         The start time of the selected hours in "HH:MM" format.
     select_hours_end: str
             The end time of the selected hours in "HH:MM" format.
@@ -471,14 +551,18 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
     """
-    select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
-    select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
-    df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
+    select_hours_start = datetime.strptime(
+        select_hours_start, "%H:%M"
+    ).time()  # convert to time object
+    select_hours_end = datetime.strptime(select_hours_end, "%H:%M").time()
+    df_subset = df[
+        df["time_dt"].dt.time.between(select_hours_start, select_hours_end)
+    ]  # select the hours of interest
     return df_subset
-def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
+def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
     """
     Select days of interest from the data to include in the aggregation step.
@@ -488,7 +572,8 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
         Input data with column 'time_dt' in which the date is stored.
     min_hours_per_day: int
-        The minimum number of hours per day required for including the day in the aggregation step.
+        The minimum number of hours per day required for including the day
+        in the aggregation step.
     Returns
@@ -499,8 +584,12 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
     """
     min_s_per_day = min_hours_per_day * 3600
-    window_length_s = df['time_dt'].diff().dt.total_seconds().iloc[1] # determine the length of the first window in seconds
+    window_length_s = (
+        df["time_dt"].diff().dt.total_seconds().iloc[1]
+    )  # determine the length of the first window in seconds
     min_windows_per_day = min_s_per_day / window_length_s
-    df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
+    df_subset = df.groupby(df["time_dt"].dt.date).filter(
+        lambda x: len(x) >= min_windows_per_day
+    )
-    return df_subset
+    return df_subset

paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

paradigma 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl