paradigma 0.4.7__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
paradigma/config.py CHANGED
@@ -244,7 +244,7 @@ class TremorConfig(IMUConfig):
244
244
  }
245
245
 
246
246
 
247
- class HeartRateConfig(PPGConfig):
247
+ class PulseRateConfig(PPGConfig):
248
248
  def __init__(self, sensor: str = 'ppg', min_window_length_s: int = 30) -> None:
249
249
  super().__init__()
250
250
 
@@ -265,14 +265,14 @@ class HeartRateConfig(PPGConfig):
265
265
  self.freq_bin_resolution = 0.05 # Hz
266
266
 
267
267
  # ---------------------
268
- # Heart rate estimation
268
+ # Pulse rate estimation
269
269
  # ---------------------
270
270
  self.set_tfd_length(min_window_length_s) # Set tfd length to default of 30 seconds
271
271
  self.threshold_sqa = 0.5
272
- self.threshold_sqa_accelerometer = 0.13
272
+ self.threshold_sqa_accelerometer = 0.10
273
273
 
274
- hr_est_length = 2
275
- self.hr_est_samples = hr_est_length * self.sampling_frequency
274
+ pr_est_length = 2 # pulse rate estimation length in seconds
275
+ self.pr_est_samples = pr_est_length * self.sampling_frequency
276
276
 
277
277
  # Time-frequency distribution parameters
278
278
  self.kern_type = 'sep'
@@ -297,7 +297,7 @@ class HeartRateConfig(PPGConfig):
297
297
 
298
298
  def set_tfd_length(self, tfd_length: int):
299
299
  self.tfd_length = tfd_length
300
- self.min_hr_samples = int(round(self.tfd_length * self.sampling_frequency))
300
+ self.min_pr_samples = int(round(self.tfd_length * self.sampling_frequency))
301
301
 
302
302
  def set_sensor(self, sensor):
303
303
  self.sensor = sensor
paradigma/constants.py CHANGED
@@ -58,8 +58,8 @@ class DataColumns():
58
58
  PRED_SQA_ACC_LABEL: str = "pred_sqa_acc_label"
59
59
  PRED_SQA: str = "pred_sqa"
60
60
 
61
- # Constants for heart rate
62
- HEART_RATE: str = "heart_rate"
61
+ # Constants for pulse rate
62
+ PULSE_RATE: str = "pulse_rate"
63
63
 
64
64
  @dataclass(frozen=True)
65
65
  class DataUnits():
@@ -7,7 +7,7 @@ from scipy.signal import find_peaks, windows
7
7
  from scipy.stats import kurtosis, skew
8
8
  from sklearn.decomposition import PCA
9
9
 
10
- from paradigma.config import HeartRateConfig
10
+ from paradigma.config import PulseRateConfig
11
11
 
12
12
 
13
13
  def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False) -> np.ndarray:
@@ -353,7 +353,7 @@ def extract_frequency_peak(
353
353
  def compute_relative_power(
354
354
  freqs: np.ndarray,
355
355
  psd: np.ndarray,
356
- config: HeartRateConfig
356
+ config: PulseRateConfig
357
357
  ) -> list:
358
358
  """
359
359
  Calculate relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
@@ -364,11 +364,11 @@ def compute_relative_power(
364
364
  The frequency bins of the power spectral density.
365
365
  psd: np.ndarray
366
366
  The power spectral density of the signal.
367
- config: HeartRateConfig
367
+ config: PulseRateConfig
368
368
  The configuration object containing the parameters for the feature extraction. The following
369
369
  attributes are used:
370
370
  - freq_band_physio: tuple
371
- The frequency band for physiological heart rate (default: (0.75, 3)).
371
+ The frequency band for physiological pulse rate (default: (0.75, 3)).
372
372
  - bandwidth: float
373
373
  The bandwidth around the peak frequency to consider for relative power calculation (default: 0.5).
374
374
 
@@ -597,11 +597,9 @@ def pca_transform_gyroscope(
597
597
  df: pd.DataFrame,
598
598
  y_gyro_colname: str,
599
599
  z_gyro_colname: str,
600
- pred_colname: str | None = None,
601
600
  ) -> np.ndarray:
602
601
  """
603
- Perform principal component analysis (PCA) on gyroscope data to estimate velocity. If pred_colname is provided,
604
- the PCA is fitted on the predicted gait data. Otherwise, the PCA is fitted on the entire dataset.
602
+ Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
605
603
 
606
604
  Parameters
607
605
  ----------
@@ -611,8 +609,6 @@ def pca_transform_gyroscope(
611
609
  The column name for the y-axis gyroscope data.
612
610
  z_gyro_colname : str
613
611
  The column name for the z-axis gyroscope data.
614
- pred_colname : str, optional
615
- The column name for the predicted gait (default: None).
616
612
 
617
613
  Returns
618
614
  -------
@@ -623,19 +619,9 @@ def pca_transform_gyroscope(
623
619
  y_gyro_array = df[y_gyro_colname].to_numpy()
624
620
  z_gyro_array = df[z_gyro_colname].to_numpy()
625
621
 
626
- # Filter data based on predicted gait if pred_colname is provided
627
- if pred_colname is not None:
628
- pred_mask = df[pred_colname] == 1
629
- y_gyro_fit_array = y_gyro_array[pred_mask]
630
- z_gyro_fit_array = z_gyro_array[pred_mask]
631
-
632
- # Fit PCA on predicted gait data
633
- fit_data = np.column_stack((y_gyro_fit_array, z_gyro_fit_array))
634
- full_data = np.column_stack((y_gyro_array, z_gyro_array))
635
- else:
636
- # Fit PCA on entire dataset
637
- fit_data = np.column_stack((y_gyro_array, z_gyro_array))
638
- full_data = fit_data
622
+ # Fit PCA
623
+ fit_data = np.column_stack((y_gyro_array, z_gyro_array))
624
+ full_data = fit_data
639
625
 
640
626
  pca = PCA(n_components=2, svd_solver='auto', random_state=22)
641
627
  pca.fit(fit_data)
@@ -1,20 +1,17 @@
1
1
  import numpy as np
2
- import os
3
2
  import pandas as pd
4
- from pathlib import Path
5
3
  from scipy.signal import periodogram
6
4
  from typing import List, Tuple
7
- import tsdf
8
5
 
9
6
  from paradigma.classification import ClassifierPackage
10
- from paradigma.constants import DataColumns, TimeUnit
7
+ from paradigma.constants import DataColumns
11
8
  from paradigma.config import GaitConfig
12
9
  from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
13
10
  extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
14
11
  compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
15
12
  compute_total_power
16
13
  from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
17
- from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps, read_metadata, write_df_data, get_end_iso8601
14
+ from paradigma.util import aggregate_parameter
18
15
 
19
16
 
20
17
  def extract_gait_features(
@@ -160,66 +157,35 @@ def detect_gait(
160
157
 
161
158
 
162
159
  def extract_arm_activity_features(
160
+ df: pd.DataFrame,
163
161
  config: GaitConfig,
164
- df_timestamps: pd.DataFrame,
165
- df_predictions: pd.DataFrame,
166
- threshold: float
167
162
  ) -> pd.DataFrame:
168
163
  """
169
164
  Extract features related to arm activity from a time-series DataFrame.
170
165
 
171
166
  This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
172
167
  and extracts features related to arm activity by performing the following steps:
173
- 1. Merges the gait predictions with timestamps by expanding overlapping windows into individual timestamps.
174
- 2. Computes the angle and velocity from gyroscope data.
175
- 3. Filters the data to include only predicted gait segments.
176
- 4. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
177
- 5. Removes segments that do not meet predefined criteria.
178
- 6. Creates fixed-length windows from the time series data.
179
- 7. Extracts angle-related features, temporal domain features, and spectral domain features.
168
+ 1. Computes the angle and velocity from gyroscope data.
169
+ 2. Filters the data to include only predicted gait segments.
170
+ 3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
171
+ 4. Removes segments that do not meet predefined criteria.
172
+ 5. Creates fixed-length windows from the time series data.
173
+ 6. Extracts angle-related features, temporal domain features, and spectral domain features.
180
174
 
181
175
  Parameters
182
176
  ----------
183
- config : GaitConfig
184
- Configuration object containing column names and parameters for feature extraction.
185
-
186
- df_timestamps : pd.DataFrame
187
- A DataFrame containing the raw sensor data, including accelerometer, gravity, and gyroscope columns.
188
-
189
- df_predictions : pd.DataFrame
190
- A DataFrame containing the predicted probabilities for gait activity per window.
177
+ df: pd.DataFrame
178
+ The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
191
179
 
192
180
  config : ArmActivityFeatureExtractionConfig
193
181
  Configuration object containing column names and parameters for feature extraction.
194
182
 
195
- path_to_classifier_input : str | Path
196
- The path to the directory containing the classifier files and other necessary input files for feature extraction.
197
-
198
183
  Returns
199
184
  -------
200
185
  pd.DataFrame
201
186
  A DataFrame containing the extracted arm activity features, including angle, velocity,
202
187
  temporal, and spectral features.
203
188
  """
204
- if not any(df_predictions[DataColumns.PRED_GAIT_PROBA] >= threshold):
205
- raise ValueError("No gait detected in the input data.")
206
-
207
- # Merge gait predictions with timestamps
208
- gait_preprocessing_config = GaitConfig(step='gait')
209
- df = merge_predictions_with_timestamps(
210
- df_ts=df_timestamps,
211
- df_predictions=df_predictions,
212
- pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
213
- window_length_s=gait_preprocessing_config.window_length_s,
214
- fs=gait_preprocessing_config.sampling_frequency
215
- )
216
-
217
- # Add a column for predicted gait based on a fitted threshold
218
- df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= threshold).astype(int)
219
-
220
- # Filter the DataFrame to only include predicted gait (1)
221
- df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
222
-
223
189
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
224
190
  df[DataColumns.SEGMENT_NR] = create_segments(
225
191
  time_array=df[DataColumns.TIME],
@@ -315,8 +281,8 @@ def filter_gait(
315
281
  ----------
316
282
  df : pd.DataFrame
317
283
  The input DataFrame containing features extracted from gait data.
318
- full_path_to_classifier_package : str | Path
319
- The path to the pre-trained classifier file.
284
+ clf_package: ClassifierPackage
285
+ The pre-trained classifier package containing the classifier, threshold, and scaler.
320
286
  parallel : bool, optional, default=False
321
287
  If `True`, enables parallel processing.
322
288
 
@@ -351,10 +317,10 @@ def filter_gait(
351
317
 
352
318
  def quantify_arm_swing(
353
319
  df: pd.DataFrame,
354
- max_segment_gap_s: float,
355
- min_segment_length_s: float,
356
320
  fs: int,
357
321
  filtered: bool = False,
322
+ max_segment_gap_s: float = 1.5,
323
+ min_segment_length_s: float = 1.5
358
324
  ) -> Tuple[dict[str, pd.DataFrame], dict]:
359
325
  """
360
326
  Quantify arm swing parameters for segments of motion based on gyroscope data.
@@ -362,28 +328,27 @@ def quantify_arm_swing(
362
328
  Parameters
363
329
  ----------
364
330
  df : pd.DataFrame
365
- A DataFrame containing the raw sensor data, including gyroscope columns. Should include a column
331
+ A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
366
332
  for predicted no other arm activity based on a fitted threshold if filtered is True.
367
333
 
368
- max_segment_gap_s : float
369
- The maximum gap allowed between segments.
370
-
371
- min_segment_length_s : float
372
- The minimum length required for a segment to be considered valid.
373
-
374
334
  fs : int
375
335
  The sampling frequency of the sensor data.
376
336
 
377
337
  filtered : bool, optional, default=True
378
338
  If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
379
339
 
340
+ max_segment_gap_s : float, optional, default=1.5
341
+ The maximum gap in seconds between consecutive timestamps to group them into segments.
342
+
343
+ min_segment_length_s : float, optional, default=1.5
344
+ The minimum length in seconds for a segment to be considered valid.
345
+
380
346
  Returns
381
347
  -------
382
348
  Tuple[pd.DataFrame, dict]
383
349
  A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
384
350
  metadata for each segment.
385
351
  """
386
-
387
352
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
388
353
  # Segments are made based on predicted gait
389
354
  df[DataColumns.SEGMENT_NR] = create_segments(
@@ -391,6 +356,10 @@ def quantify_arm_swing(
391
356
  max_segment_gap_s=max_segment_gap_s
392
357
  )
393
358
 
359
+ # Segment category is determined based on predicted gait, hence it is set
360
+ # before filtering the DataFrame to only include predicted no other arm activity
361
+ df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
362
+
394
363
  # Remove segments that do not meet predetermined criteria
395
364
  df = discard_segments(
396
365
  df=df,
@@ -401,40 +370,51 @@ def quantify_arm_swing(
401
370
  )
402
371
 
403
372
  if df.empty:
404
- raise ValueError("No segments found in the input data.")
373
+ raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
405
374
 
406
375
  # If no arm swing data is remaining, return an empty dictionary
407
376
  if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
408
377
  raise ValueError("No gait without other arm activities to quantify.")
409
-
410
- df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
411
-
412
- # Group and process segments
413
- arm_swing_quantified = []
414
- segment_meta = {}
415
-
416
- if filtered:
378
+ elif filtered:
417
379
  # Filter the DataFrame to only include predicted no other arm activity (1)
418
380
  df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
419
381
 
420
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
421
- # Now segments are based on predicted gait without other arm activity for subsequent processes
382
+ # Group consecutive timestamps into segments of filtered gait
422
383
  df[DataColumns.SEGMENT_NR] = create_segments(
423
384
  time_array=df[DataColumns.TIME],
424
385
  max_segment_gap_s=max_segment_gap_s
425
386
  )
426
387
 
427
- pred_colname_pca = DataColumns.PRED_NO_OTHER_ARM_ACTIVITY
428
- else:
429
- pred_colname_pca = None
388
+ # Remove segments that do not meet predetermined criteria
389
+ df = discard_segments(
390
+ df=df,
391
+ segment_nr_colname=DataColumns.SEGMENT_NR,
392
+ min_segment_length_s=min_segment_length_s,
393
+ fs=fs,
394
+ )
395
+
396
+ if df.empty:
397
+ raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
398
+
399
+ arm_swing_quantified = []
400
+ segment_meta = {
401
+ 'aggregated': {
402
+ 'all': {
403
+ 'duration_s': len(df[DataColumns.TIME]) / fs
404
+ },
405
+ },
406
+ 'per_segment': {}
407
+ }
430
408
 
409
+ # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
410
+ # it is fitted on the entire gyroscope data
431
411
  df[DataColumns.VELOCITY] = pca_transform_gyroscope(
432
412
  df=df,
433
413
  y_gyro_colname=DataColumns.GYROSCOPE_Y,
434
414
  z_gyro_colname=DataColumns.GYROSCOPE_Z,
435
- pred_colname=pred_colname_pca
436
415
  )
437
416
 
417
+ # Group and process segments
438
418
  for segment_nr, group in df.groupby(DataColumns.SEGMENT_NR, sort=False):
439
419
  segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
440
420
  time_array = group[DataColumns.TIME].to_numpy()
@@ -452,8 +432,10 @@ def quantify_arm_swing(
452
432
  fs=fs,
453
433
  )
454
434
 
455
- segment_meta[segment_nr] = {
456
- 'time_s': len(angle_array) / fs,
435
+ segment_meta['per_segment'][segment_nr] = {
436
+ 'start_time_s': time_array.min(),
437
+ 'end_time_s': time_array.max(),
438
+ 'duration_s': len(angle_array) / fs,
457
439
  DataColumns.SEGMENT_CAT: segment_cat
458
440
  }
459
441
 
@@ -487,12 +469,20 @@ def quantify_arm_swing(
487
469
 
488
470
  df_params_segment = pd.DataFrame({
489
471
  DataColumns.SEGMENT_NR: segment_nr,
472
+ DataColumns.SEGMENT_CAT: segment_cat,
490
473
  DataColumns.RANGE_OF_MOTION: rom,
491
474
  DataColumns.PEAK_VELOCITY: pav
492
475
  })
493
476
 
494
477
  arm_swing_quantified.append(df_params_segment)
495
478
 
479
+ # Combine segment categories
480
+ segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
481
+ for segment_cat in segment_categories:
482
+ segment_meta['aggregated'][segment_cat] = {
483
+ 'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
484
+ }
485
+
496
486
  arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
497
487
 
498
488
  return arm_swing_quantified, segment_meta
@@ -527,7 +517,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
527
517
  cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
528
518
 
529
519
  aggregated_results[segment_cat] = {
530
- 'time_s': sum([segment_meta[x]['time_s'] for x in cat_segments])
520
+ 'duration_s': sum([segment_meta[x]['duration_s'] for x in cat_segments])
531
521
  }
532
522
 
533
523
  df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
@@ -537,7 +527,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
537
527
  aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
538
528
 
539
529
  aggregated_results['all_segment_categories'] = {
540
- 'time_s': sum([segment_meta[x]['time_s'] for x in segment_meta.keys()])
530
+ 'duration_s': sum([segment_meta[x]['duration_s'] for x in segment_meta.keys()])
541
531
  }
542
532
 
543
533
  for arm_swing_parameter in arm_swing_parameters:
@@ -10,14 +10,14 @@ from typing import List
10
10
 
11
11
  from paradigma.classification import ClassifierPackage
12
12
  from paradigma.constants import DataColumns
13
- from paradigma.config import HeartRateConfig
13
+ from paradigma.config import PulseRateConfig
14
14
  from paradigma.feature_extraction import compute_statistics, compute_signal_to_noise_ratio, compute_auto_correlation, \
15
15
  compute_dominant_frequency, compute_relative_power, compute_spectral_entropy
16
- from paradigma.pipelines.heart_rate_utils import assign_sqa_label, extract_hr_segments, extract_hr_from_segment
16
+ from paradigma.pipelines.pulse_rate_utils import assign_sqa_label, extract_pr_segments, extract_pr_from_segment
17
17
  from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
18
- from paradigma.util import read_metadata, aggregate_parameter
18
+ from paradigma.util import aggregate_parameter
19
19
 
20
- def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: HeartRateConfig, acc_config: HeartRateConfig) -> pd.DataFrame:
20
+ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PulseRateConfig, acc_config: PulseRateConfig) -> pd.DataFrame:
21
21
  """
22
22
  Extract signal quality features from the PPG signal.
23
23
  The features are extracted from the temporal and spectral domain of the PPG signal.
@@ -30,9 +30,9 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
30
30
  The DataFrame containing the PPG signal.
31
31
  df_acc : pd.DataFrame
32
32
  The DataFrame containing the accelerometer signal.
33
- ppg_config: HeartRateConfig
33
+ ppg_config: PulseRateConfig
34
34
  The configuration for the signal quality feature extraction of the PPG signal.
35
- acc_config: HeartRateConfig
35
+ acc_config: PulseRateConfig
36
36
  The configuration for the signal quality feature extraction of the accelerometer signal.
37
37
 
38
38
  Returns
@@ -94,7 +94,7 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
94
94
  return df_features
95
95
 
96
96
 
97
- def signal_quality_classification(df: pd.DataFrame, config: HeartRateConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
97
+ def signal_quality_classification(df: pd.DataFrame, config: PulseRateConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
98
98
  """
99
99
  Classify the signal quality of the PPG signal using a logistic regression classifier. A probability close to 1 indicates a high-quality signal, while a probability close to 0 indicates a low-quality signal.
100
100
  The classifier is trained on features extracted from the PPG signal. The features are extracted using the extract_signal_quality_features function.
@@ -105,7 +105,7 @@ def signal_quality_classification(df: pd.DataFrame, config: HeartRateConfig, ful
105
105
  ----------
106
106
  df : pd.DataFrame
107
107
  The DataFrame containing the PPG features and the accelerometer feature for signal quality classification.
108
- config : HeartRateConfig
108
+ config : PulseRateConfig
109
109
  The configuration for the signal quality classification.
110
110
  full_path_to_classifier_package : str | Path
111
111
  The path to the directory containing the classifier.
@@ -128,9 +128,9 @@ def signal_quality_classification(df: pd.DataFrame, config: HeartRateConfig, ful
128
128
  return df[[DataColumns.TIME, DataColumns.PRED_SQA_PROBA, DataColumns.PRED_SQA_ACC_LABEL]] # Return only the relevant columns, namely the predicted probabilities for the PPG signal quality and the accelerometer label
129
129
 
130
130
 
131
- def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: HeartRateConfig) -> pd.DataFrame:
131
+ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: PulseRateConfig) -> pd.DataFrame:
132
132
  """
133
- Estimate the heart rate from the PPG signal using the time-frequency domain method.
133
+ Estimate the pulse rate from the PPG signal using the time-frequency domain method.
134
134
 
135
135
  Parameters
136
136
  ----------
@@ -138,13 +138,13 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
138
138
  The DataFrame containing the signal quality assessment predictions.
139
139
  df_ppg_preprocessed : pd.DataFrame
140
140
  The DataFrame containing the preprocessed PPG signal.
141
- config : HeartRateConfig
142
- The configuration for the heart rate estimation.
141
+ config : PulseRateConfig
142
+ The configuration for the pulse rate estimation.
143
143
 
144
144
  Returns
145
145
  -------
146
- df_hr : pd.DataFrame
147
- The DataFrame containing the heart rate estimations.
146
+ df_pr : pd.DataFrame
147
+ The DataFrame containing the pulse rate estimations.
148
148
  """
149
149
 
150
150
  # Extract NumPy arrays for faster operations
@@ -156,13 +156,13 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
156
156
 
157
157
  # Assign window-level probabilities to individual samples
158
158
  sqa_label = assign_sqa_label(ppg_post_prob, config, acc_label) # assigns a signal quality label to every individual data point
159
- v_start_idx, v_end_idx = extract_hr_segments(sqa_label, config.min_hr_samples) # extracts heart rate segments based on the SQA label
159
+ v_start_idx, v_end_idx = extract_pr_segments(sqa_label, config.min_pr_samples) # extracts pulse rate segments based on the SQA label
160
160
 
161
- v_hr_rel = np.array([])
162
- t_hr_rel = np.array([])
161
+ v_pr_rel = np.array([])
162
+ t_pr_rel = np.array([])
163
163
 
164
- edge_add = 2 * config.sampling_frequency # Add 2s on both sides of the segment for HR estimation
165
- step_size = config.hr_est_samples # Step size for HR estimation
164
+ edge_add = 2 * config.sampling_frequency # Add 2s on both sides of the segment for PR estimation
165
+ step_size = config.pr_est_samples # Step size for PR estimation
166
166
 
167
167
  # Estimate the maximum size for preallocation
168
168
  valid_segments = (v_start_idx >= edge_add) & (v_end_idx <= len(ppg_preprocessed) - edge_add) # check if the segments are valid, e.g. not too close to the edges (2s)
@@ -171,55 +171,55 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
171
171
  max_size = np.sum((valid_end_idx - valid_start_idx) // step_size) # maximum size for preallocation
172
172
 
173
173
  # Preallocate arrays
174
- v_hr_rel = np.empty(max_size, dtype=float)
175
- t_hr_rel = np.empty(max_size, dtype=float)
174
+ v_pr_rel = np.empty(max_size, dtype=float)
175
+ t_pr_rel = np.empty(max_size, dtype=float)
176
176
 
177
177
  # Track current position
178
- hr_pos = 0
178
+ pr_pos = 0
179
179
 
180
180
  for start_idx, end_idx in zip(valid_start_idx, valid_end_idx):
181
181
  # Extract extended PPG segment
182
182
  extended_ppg_segment = ppg_preprocessed[start_idx - edge_add : end_idx + edge_add, ppg_idx]
183
183
 
184
- # Estimate heart rate
185
- hr_est = extract_hr_from_segment(
184
+ # Estimate pulse rate
185
+ pr_est = extract_pr_from_segment(
186
186
  extended_ppg_segment,
187
187
  config.tfd_length,
188
188
  config.sampling_frequency,
189
189
  config.kern_type,
190
190
  config.kern_params,
191
191
  )
192
- n_hr = len(hr_est) # Number of heart rate estimates
193
- end_idx_time = n_hr * step_size + start_idx # Calculate end index for time, different from end_idx since it is always a multiple of step_size, while end_idx is not
192
+ n_pr = len(pr_est) # Number of pulse rate estimates
193
+ end_idx_time = n_pr * step_size + start_idx # Calculate end index for time, different from end_idx since it is always a multiple of step_size, while end_idx is not
194
194
 
195
- # Extract relative time for HR estimates
196
- hr_time = ppg_preprocessed[start_idx : end_idx_time : step_size, time_idx]
195
+ # Extract relative time for PR estimates
196
+ pr_time = ppg_preprocessed[start_idx : end_idx_time : step_size, time_idx]
197
197
 
198
198
  # Insert into preallocated arrays
199
- v_hr_rel[hr_pos:hr_pos + n_hr] = hr_est
200
- t_hr_rel[hr_pos:hr_pos + n_hr] = hr_time
201
- hr_pos += n_hr
199
+ v_pr_rel[pr_pos:pr_pos + n_pr] = pr_est
200
+ t_pr_rel[pr_pos:pr_pos + n_pr] = pr_time
201
+ pr_pos += n_pr
202
202
 
203
- df_hr = pd.DataFrame({"time": t_hr_rel, "heart_rate": v_hr_rel})
203
+ df_pr = pd.DataFrame({"time": t_pr_rel, "pulse_rate": v_pr_rel})
204
204
 
205
- return df_hr
205
+ return df_pr
206
206
 
207
207
 
208
- def aggregate_heart_rate(hr_values: np.ndarray, aggregates: List[str] = ['mode', '99p']) -> dict:
208
+ def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode', '99p']) -> dict:
209
209
  """
210
- Aggregate the heart rate estimates using the specified aggregation methods.
210
+ Aggregate the pulse rate estimates using the specified aggregation methods.
211
211
 
212
212
  Parameters
213
213
  ----------
214
- hr_values : np.ndarray
215
- The array containing the heart rate estimates
214
+ pr_values : np.ndarray
215
+ The array containing the pulse rate estimates
216
216
  aggregates : List[str]
217
- The list of aggregation methods to be used for the heart rate estimates. The default is ['mode', '99p'].
217
+ The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
218
218
 
219
219
  Returns
220
220
  -------
221
221
  aggregated_results : dict
222
- The dictionary containing the aggregated results of the heart rate estimates.
222
+ The dictionary containing the aggregated results of the pulse rate estimates.
223
223
  """
224
224
  # Initialize the dictionary for the aggregated results
225
225
  aggregated_results = {}
@@ -227,19 +227,19 @@ def aggregate_heart_rate(hr_values: np.ndarray, aggregates: List[str] = ['mode',
227
227
  # Initialize the dictionary for the aggregated results with the metadata
228
228
  aggregated_results = {
229
229
  'metadata': {
230
- 'nr_hr_est': len(hr_values)
230
+ 'nr_pr_est': len(pr_values)
231
231
  },
232
- 'hr_aggregates': {}
232
+ 'pr_aggregates': {}
233
233
  }
234
234
  for aggregate in aggregates:
235
- aggregated_results['hr_aggregates'][f'{aggregate}_{DataColumns.HEART_RATE}'] = aggregate_parameter(hr_values, aggregate)
235
+ aggregated_results['pr_aggregates'][f'{aggregate}_{DataColumns.PULSE_RATE}'] = aggregate_parameter(pr_values, aggregate)
236
236
 
237
237
  return aggregated_results
238
238
 
239
239
 
240
240
  def extract_temporal_domain_features(
241
241
  ppg_windowed: np.ndarray,
242
- config: HeartRateConfig,
242
+ config: PulseRateConfig,
243
243
  quality_stats: List[str] = ['mean', 'std']
244
244
  ) -> pd.DataFrame:
245
245
  """
@@ -250,7 +250,7 @@ def extract_temporal_domain_features(
250
250
  ppg_windowed: np.ndarray
251
251
  The dataframe containing the windowed accelerometer signal
252
252
 
253
- config: HeartRateConfig
253
+ config: PulseRateConfig
254
254
  The configuration object containing the parameters for the feature extraction
255
255
 
256
256
  quality_stats: list, optional
@@ -273,7 +273,7 @@ def extract_temporal_domain_features(
273
273
 
274
274
  def extract_spectral_domain_features(
275
275
  ppg_windowed: np.ndarray,
276
- config: HeartRateConfig,
276
+ config: PulseRateConfig,
277
277
  ) -> pd.DataFrame:
278
278
  """
279
279
  Calculate the spectral features (dominant frequency, relative power, and spectral entropy)
@@ -285,7 +285,7 @@ def extract_spectral_domain_features(
285
285
  ppg_windowed: np.ndarray
286
286
  The dataframe containing the windowed ppg signal
287
287
 
288
- config: HeartRateConfig
288
+ config: PulseRateConfig
289
289
  The configuration object containing the parameters for the feature extraction
290
290
 
291
291
  Returns
@@ -371,7 +371,7 @@ def extract_acc_power_feature(
371
371
  def extract_accelerometer_feature(
372
372
  acc_windowed: np.ndarray,
373
373
  ppg_windowed: np.ndarray,
374
- config: HeartRateConfig
374
+ config: PulseRateConfig
375
375
  ) -> pd.DataFrame:
376
376
  """
377
377
  Extract accelerometer features from the accelerometer signal in the PPG frequency range.
@@ -384,7 +384,7 @@ def extract_accelerometer_feature(
384
384
  ppg_windowed: np.ndarray
385
385
  The dataframe containing the corresponding windowed ppg signal
386
386
 
387
- config: HeartRateConfig
387
+ config: PulseRateConfig
388
388
  The configuration object containing the parameters for the feature extraction
389
389
 
390
390
  Returns
@@ -2,12 +2,12 @@ import numpy as np
2
2
  from scipy import signal
3
3
  from typing import Tuple
4
4
 
5
- from paradigma.config import HeartRateConfig
5
+ from paradigma.config import PulseRateConfig
6
6
 
7
7
 
8
8
  def assign_sqa_label(
9
9
  ppg_prob: np.ndarray,
10
- config: HeartRateConfig,
10
+ config: PulseRateConfig,
11
11
  acc_label=None
12
12
  ) -> np.ndarray:
13
13
  """
@@ -17,7 +17,7 @@ def assign_sqa_label(
17
17
  ----------
18
18
  ppg_prob : np.ndarray
19
19
  The probabilities for PPG.
20
- config : HeartRateConfig
20
+ config : PulseRateConfig
21
21
  The configuration parameters.
22
22
  acc_label : np.ndarray, optional
23
23
  The labels for the accelerometer.
@@ -61,23 +61,23 @@ def assign_sqa_label(
61
61
  return sqa_label
62
62
 
63
63
 
64
- def extract_hr_segments(sqa_label: np.ndarray, min_hr_samples: int) -> Tuple[np.ndarray, np.ndarray]:
64
+ def extract_pr_segments(sqa_label: np.ndarray, min_pr_samples: int) -> Tuple[np.ndarray, np.ndarray]:
65
65
  """
66
- Extracts heart rate segments based on the SQA label.
66
+ Extracts pulse rate segments based on the SQA label.
67
67
 
68
68
  Parameters
69
69
  ----------
70
70
  sqa_label : np.ndarray
71
71
  The signal quality assessment label.
72
- min_hr_samples : int
73
- The minimum number of samples required for a heart rate segment.
72
+ min_pr_samples : int
73
+ The minimum number of samples required for a pulse rate segment.
74
74
 
75
75
  Returns
76
76
  -------
77
77
  Tuple[v_start_idx_long, v_end_idx_long]
78
- The start and end indices of the heart rate segments.
78
+ The start and end indices of the pulse rate segments.
79
79
  """
80
- # Find the start and end indices of the heart rate segments
80
+ # Find the start and end indices of the pulse rate segments
81
81
  v_start_idx = np.where(np.diff(sqa_label.astype(int)) == 1)[0] + 1
82
82
  v_end_idx = np.where(np.diff(sqa_label.astype(int)) == -1)[0] + 1
83
83
 
@@ -88,13 +88,13 @@ def extract_hr_segments(sqa_label: np.ndarray, min_hr_samples: int) -> Tuple[np.
88
88
  v_end_idx = np.append(v_end_idx, len(sqa_label))
89
89
 
90
90
  # Check if the segments are long enough
91
- v_start_idx_long = v_start_idx[(v_end_idx - v_start_idx) >= min_hr_samples]
92
- v_end_idx_long = v_end_idx[(v_end_idx - v_start_idx) >= min_hr_samples]
91
+ v_start_idx_long = v_start_idx[(v_end_idx - v_start_idx) >= min_pr_samples]
92
+ v_end_idx_long = v_end_idx[(v_end_idx - v_start_idx) >= min_pr_samples]
93
93
 
94
94
  return v_start_idx_long, v_end_idx_long
95
95
 
96
96
 
97
- def extract_hr_from_segment(
97
+ def extract_pr_from_segment(
98
98
  ppg: np.ndarray,
99
99
  tfd_length: int,
100
100
  fs: int,
@@ -102,7 +102,7 @@ def extract_hr_from_segment(
102
102
  kern_params: dict
103
103
  ) -> np.ndarray:
104
104
  """
105
- Extracts heart rate from the time-frequency distribution of the PPG signal.
105
+ Extracts pulse rate from the time-frequency distribution of the PPG signal.
106
106
 
107
107
  Parameters
108
108
  ----------
@@ -121,7 +121,7 @@ def extract_hr_from_segment(
121
121
  Returns
122
122
  -------
123
123
  np.ndarray
124
- The estimated heart rate.
124
+ The estimated pulse rate.
125
125
  """
126
126
 
127
127
  # Constants to handle boundary effects
@@ -145,23 +145,23 @@ def extract_hr_from_segment(
145
145
  end_idx = len(ppg)
146
146
  ppg_segments.append(ppg[start_idx:end_idx])
147
147
 
148
- hr_est_from_ppg = np.array([])
148
+ pr_est_from_ppg = np.array([])
149
149
  for segment in ppg_segments:
150
150
  # Calculate the time-frequency distribution
151
- hr_tfd = extract_hr_with_tfd(segment, fs, kern_type, kern_params)
152
- hr_est_from_ppg = np.concatenate((hr_est_from_ppg, hr_tfd))
151
+ pr_tfd = extract_pr_with_tfd(segment, fs, kern_type, kern_params)
152
+ pr_est_from_ppg = np.concatenate((pr_est_from_ppg, pr_tfd))
153
153
 
154
- return hr_est_from_ppg
154
+ return pr_est_from_ppg
155
155
 
156
156
 
157
- def extract_hr_with_tfd(
157
+ def extract_pr_with_tfd(
158
158
  ppg: np.ndarray,
159
159
  fs: int,
160
160
  kern_type: str,
161
161
  kern_params: dict
162
162
  ) -> np.ndarray:
163
163
  """
164
- Estimate heart rate (HR) from a PPG segment using a TFD method with optional
164
+ Estimate pulse rate (PR) from a PPG segment using a TFD method with optional
165
165
  moving average filtering.
166
166
 
167
167
  Parameters
@@ -177,8 +177,8 @@ def extract_hr_with_tfd(
177
177
 
178
178
  Returns
179
179
  -------
180
- hr_smooth_tfd : np.ndarray
181
- Estimated HR values (in beats per minute) for each 2-second segment of the PPG signal.
180
+ pr_smooth_tfd : np.ndarray
181
+ Estimated pr values (in beats per minute) for each 2-second segment of the PPG signal.
182
182
  """
183
183
  # Generate the TFD matrix using the specified kernel
184
184
  tfd_obj = TimeFreqDistr()
@@ -189,16 +189,16 @@ def extract_hr_with_tfd(
189
189
  time_axis = np.arange(num_time_samples) / fs
190
190
  freq_axis = np.linspace(0, 0.5, num_freq_bins) * fs
191
191
 
192
- # Estimate HR by identifying the max frequency in the TFD
192
+ # Estimate pulse rate by identifying the max frequency in the TFD
193
193
  max_freq_indices = np.argmax(tfd, axis=0)
194
194
 
195
- hr_smooth_tfd = np.array([])
195
+ pr_smooth_tfd = np.array([])
196
196
  for i in range(2, int(len(ppg) / fs) - 4 + 1, 2): # Skip the first and last 2 seconds, add 1 to include the last segment
197
197
  relevant_indices = (time_axis >= i) & (time_axis < i + 2)
198
198
  avg_frequency = np.mean(freq_axis[max_freq_indices[relevant_indices]])
199
- hr_smooth_tfd = np.concatenate((hr_smooth_tfd, [60 * avg_frequency])) # Convert frequency to BPM
199
+ pr_smooth_tfd = np.concatenate((pr_smooth_tfd, [60 * avg_frequency])) # Convert frequency to BPM
200
200
 
201
- return hr_smooth_tfd
201
+ return pr_smooth_tfd
202
202
 
203
203
 
204
204
  class TimeFreqDistr:
@@ -143,7 +143,6 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
143
143
 
144
144
  return df
145
145
 
146
-
147
146
  def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
148
147
  """
149
148
  Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
@@ -154,8 +153,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
154
153
  Parameters
155
154
  ----------
156
155
  df : pd.DataFrame
157
- The input DataFrame containing extracted tremor features. The DataFrame must include
158
- the necessary columns as specified in the classifier's feature names.
156
+ The input DataFrame containing the tremor predictions and computed tremor power.
157
+ The DataFrame must also contain a datatime column ('time_dt').
159
158
 
160
159
  config : TremorConfig
161
160
  Configuration object containing the percentile for aggregating tremor power.
@@ -163,8 +162,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
163
162
  Returns
164
163
  -------
165
164
  dict
166
- A dictionary with the aggregated tremor time and tremor power measures, as well as the total number of windows
167
- available in the input dataframe, and the number of windows at rest.
165
+ A dictionary with the aggregated tremor time and tremor power measures, as well as the number of valid days,
166
+ the total number of windows, and the number of windows at rest available in the input dataframe.
168
167
 
169
168
  Notes
170
169
  -----
@@ -173,7 +172,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
173
172
  - The modal tremor power is computed based on gaussian kernel density estimation.
174
173
 
175
174
  """
176
-
175
+ nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
177
176
  nr_windows_total = df.shape[0] # number of windows in the input dataframe
178
177
 
179
178
  # remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
@@ -216,6 +215,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
216
215
  # store aggregates in json format
217
216
  d_aggregates = {
218
217
  'metadata': {
218
+ 'nr_valid_days': nr_valid_days,
219
219
  'nr_windows_total': nr_windows_total,
220
220
  'nr_windows_rest': nr_windows_rest
221
221
  },
@@ -250,6 +250,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
250
250
  pd.DataFrame
251
251
  The feature dataframe containing the extracted spectral features, including
252
252
  MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
253
+
253
254
  """
254
255
 
255
256
  # Initialize a dictionary to hold the results
@@ -17,7 +17,9 @@ def resample_data(
17
17
  df: pd.DataFrame,
18
18
  time_column : str,
19
19
  values_column_names: List[str],
20
+ sampling_frequency: int,
20
21
  resampling_frequency: int,
22
+ tolerance: float | None = None
21
23
  ) -> pd.DataFrame:
22
24
  """
23
25
  Resamples sensor data to a specified frequency using cubic interpolation.
@@ -30,8 +32,14 @@ def resample_data(
30
32
  The name of the column containing the time data.
31
33
  values_column_names : List[str]
32
34
  A list of column names that should be resampled.
35
+ sampling_frequency : int
36
+ The original sampling frequency of the data (in Hz).
33
37
  resampling_frequency : int
34
38
  The frequency to which the data should be resampled (in Hz).
39
+ tolerance : float, optional
40
+ The tolerance added to the expected difference when checking
41
+ for contiguous timestamps. If not provided, it defaults to
42
+ twice the expected interval.
35
43
 
36
44
  Returns
37
45
  -------
@@ -46,23 +54,35 @@ def resample_data(
46
54
 
47
55
  Notes
48
56
  -----
49
- The function uses cubic interpolation to resample the data to the specified frequency.
50
- It requires the input time array to be strictly increasing.
57
+ - Uses cubic interpolation for smooth resampling if there are enough points.
58
+ - If only two timestamps are available, it falls back to linear interpolation.
51
59
  """
60
+ # Set default tolerance if not provided to twice the expected interval
61
+ if tolerance is None:
62
+ tolerance = 2 * 1 / sampling_frequency
52
63
 
53
- # Extract time and values from DataFrame
64
+ # Extract time and values
54
65
  time_abs_array = np.array(df[time_column])
55
66
  values_array = np.array(df[values_column_names])
56
67
 
57
68
  # Ensure the time array is strictly increasing
58
69
  if not np.all(np.diff(time_abs_array) > 0):
59
- raise ValueError("time_abs_array is not strictly increasing")
70
+ raise ValueError("Time array is not strictly increasing")
71
+
72
+ # Ensure the time array is contiguous
73
+ expected_interval = 1 / sampling_frequency
74
+ timestamp_diffs = np.diff(time_abs_array)
75
+ if np.any(np.abs(timestamp_diffs - expected_interval) > tolerance):
76
+ raise ValueError("Time array is not contiguous")
60
77
 
61
78
  # Resample the time data using the specified frequency
62
79
  t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
63
80
 
64
- # Interpolate the data using cubic interpolation
65
- interpolator = interp1d(time_abs_array, values_array, axis=0, kind="cubic")
81
+ # Choose interpolation method
82
+ interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
83
+ interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
84
+
85
+ # Interpolate
66
86
  resampled_values = interpolator(t_resampled)
67
87
 
68
88
  # Create a DataFrame with the resampled data
@@ -186,7 +206,8 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
186
206
  df = resample_data(
187
207
  df=df,
188
208
  time_column=DataColumns.TIME,
189
- values_column_names = values_colnames,
209
+ values_column_names=values_colnames,
210
+ sampling_frequency=config.sampling_frequency,
190
211
  resampling_frequency=config.sampling_frequency
191
212
  )
192
213
 
@@ -259,6 +280,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
259
280
  df=df_acc_overlapping,
260
281
  time_column=DataColumns.TIME,
261
282
  values_column_names = list(imu_config.d_channels_accelerometer.keys()),
283
+ sampling_frequency=imu_config.sampling_frequency,
262
284
  resampling_frequency=imu_config.sampling_frequency
263
285
  )
264
286
 
@@ -267,6 +289,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
267
289
  df=df_ppg_overlapping,
268
290
  time_column=DataColumns.TIME,
269
291
  values_column_names = list(ppg_config.d_channels_ppg.keys()),
292
+ sampling_frequency=ppg_config.sampling_frequency,
270
293
  resampling_frequency=ppg_config.sampling_frequency
271
294
  )
272
295
 
paradigma/segmenting.py CHANGED
@@ -168,7 +168,7 @@ def create_segments(
168
168
  gap_exceeds = time_diff > max_segment_gap_s
169
169
 
170
170
  # Create the segment number based on the cumulative sum of the gap_exceeds mask
171
- segments = gap_exceeds.cumsum() + 1 # +1 to start enumeration from 1
171
+ segments = gap_exceeds.cumsum()
172
172
 
173
173
  return segments
174
174
 
@@ -236,6 +236,9 @@ def discard_segments(
236
236
 
237
237
  df = df[valid_segment_mask].copy()
238
238
 
239
+ if df.empty:
240
+ raise ValueError("All segments were removed.")
241
+
239
242
  # Reset segment numbers in a single step
240
243
  unique_segments = pd.factorize(df[segment_nr_colname])[0] + 1
241
244
  df[segment_nr_colname] = unique_segments
paradigma/testing.py CHANGED
@@ -7,16 +7,16 @@ import tsdf
7
7
  from typing import List
8
8
 
9
9
  from paradigma.classification import ClassifierPackage
10
- from paradigma.config import IMUConfig, PPGConfig, GaitConfig, TremorConfig, HeartRateConfig
10
+ from paradigma.config import IMUConfig, PPGConfig, GaitConfig, TremorConfig, PulseRateConfig
11
11
  from paradigma.constants import DataColumns, TimeUnit
12
12
  from paradigma.pipelines.gait_pipeline import extract_gait_features, detect_gait, \
13
13
  extract_arm_activity_features, filter_gait
14
14
  from paradigma.pipelines.tremor_pipeline import extract_tremor_features, detect_tremor, \
15
15
  aggregate_tremor
16
- from paradigma.pipelines.heart_rate_pipeline import extract_signal_quality_features, signal_quality_classification, \
17
- aggregate_heart_rate
16
+ from paradigma.pipelines.pulse_rate_pipeline import extract_signal_quality_features, signal_quality_classification, \
17
+ aggregate_pulse_rate
18
18
  from paradigma.preprocessing import preprocess_imu_data, preprocess_ppg_data
19
- from paradigma.util import read_metadata, write_df_data, get_end_iso8601
19
+ from paradigma.util import read_metadata, write_df_data, get_end_iso8601, merge_predictions_with_timestamps
20
20
 
21
21
 
22
22
  def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path,
@@ -208,13 +208,27 @@ def extract_arm_activity_features_io(
208
208
 
209
209
  clf_package = ClassifierPackage.load(full_path_to_classifier_package)
210
210
 
211
+ gait_preprocessing_config = GaitConfig(step='gait')
212
+
213
+ df = merge_predictions_with_timestamps(
214
+ df_ts=df_ts,
215
+ df_predictions=df_pred_gait,
216
+ pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
217
+ window_length_s=gait_preprocessing_config.window_length_s,
218
+ fs=gait_preprocessing_config.sampling_frequency
219
+ )
220
+
221
+ # Add a column for predicted gait based on a fitted threshold
222
+ df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= clf_package.threshold).astype(int)
223
+
224
+ # Filter the DataFrame to only include predicted gait (1)
225
+ df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
226
+
211
227
  # Extract arm activity features
212
228
  config = GaitConfig(step='arm_activity')
213
229
  df_features = extract_arm_activity_features(
230
+ df=df,
214
231
  config=config,
215
- df_timestamps=df_ts,
216
- df_predictions=df_pred_gait,
217
- threshold=clf_package.threshold
218
232
  )
219
233
 
220
234
  end_iso8601 = get_end_iso8601(metadata_ts_values.start_iso8601, df_features[DataColumns.TIME][-1:].values[0] + config.window_length_s)
@@ -339,7 +353,7 @@ def aggregate_tremor_io(path_to_feature_input: str | Path, path_to_prediction_in
339
353
  json.dump(d_aggregates, json_file, indent=4)
340
354
 
341
355
 
342
- def extract_signal_quality_features_io(input_path: str | Path, output_path: str | Path, ppg_config: HeartRateConfig, acc_config: HeartRateConfig) -> pd.DataFrame:
356
+ def extract_signal_quality_features_io(input_path: str | Path, output_path: str | Path, ppg_config: PulseRateConfig, acc_config: PulseRateConfig) -> pd.DataFrame:
343
357
  """
344
358
  Extract signal quality features from the PPG signal and save them to a file.
345
359
 
@@ -349,9 +363,9 @@ def extract_signal_quality_features_io(input_path: str | Path, output_path: str
349
363
  The path to the directory containing the preprocessed PPG and accelerometer data.
350
364
  output_path : str | Path
351
365
  The path to the directory where the extracted features will be saved.
352
- ppg_config: HeartRateConfig
366
+ ppg_config: PulseRateConfig
353
367
  The configuration for the signal quality feature extraction of the ppg signal.
354
- acc_config: HeartRateConfig
368
+ acc_config: PulseRateConfig
355
369
  The configuration for the signal quality feature extraction of the accelerometer signal.
356
370
 
357
371
  Returns
@@ -376,7 +390,7 @@ def extract_signal_quality_features_io(input_path: str | Path, output_path: str
376
390
  return df_windowed
377
391
 
378
392
 
379
- def signal_quality_classification_io(input_path: str | Path, output_path: str | Path, path_to_classifier_input: str | Path, config: HeartRateConfig) -> None:
393
+ def signal_quality_classification_io(input_path: str | Path, output_path: str | Path, path_to_classifier_input: str | Path, config: PulseRateConfig) -> None:
380
394
 
381
395
  # Load the data
382
396
  metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
@@ -385,32 +399,32 @@ def signal_quality_classification_io(input_path: str | Path, output_path: str |
385
399
  df_sqa = signal_quality_classification(df_windowed, config, path_to_classifier_input)
386
400
 
387
401
 
388
- def aggregate_heart_rate_io(
402
+ def aggregate_pulse_rate_io(
389
403
  full_path_to_input: str | Path,
390
404
  full_path_to_output: str | Path,
391
405
  aggregates: List[str] = ['mode', '99p']
392
406
  ) -> None:
393
407
  """
394
- Extract heart rate from the PPG signal and save the aggregated heart rate estimates to a file.
408
+ Extract pulse rate from the PPG signal and save the aggregated pulse rate estimates to a file.
395
409
 
396
410
  Parameters
397
411
  ----------
398
412
  input_path : str | Path
399
- The path to the directory containing the heart rate estimates.
413
+ The path to the directory containing the pulse rate estimates.
400
414
  output_path : str | Path
401
- The path to the directory where the aggregated heart rate estimates will be saved.
415
+ The path to the directory where the aggregated pulse rate estimates will be saved.
402
416
  aggregates : List[str]
403
- The list of aggregation methods to be used for the heart rate estimates. The default is ['mode', '99p'].
417
+ The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
404
418
  """
405
419
 
406
- # Load the heart rate estimates
420
+ # Load the pulse rate estimates
407
421
  with open(full_path_to_input, 'r') as f:
408
- df_hr = json.load(f)
422
+ df_pr = json.load(f)
409
423
 
410
- # Aggregate the heart rate estimates
411
- hr_values = df_hr['heart_rate'].values
412
- df_hr_aggregates = aggregate_heart_rate(hr_values, aggregates)
424
+ # Aggregate the pulse rate estimates
425
+ pr_values = df_pr['pulse_rate'].values
426
+ df_pr_aggregates = aggregate_pulse_rate(pr_values, aggregates)
413
427
 
414
- # Save the aggregated heart rate estimates
428
+ # Save the aggregated pulse rate estimates
415
429
  with open(full_path_to_output, 'w') as json_file:
416
- json.dump(df_hr_aggregates, json_file, indent=4)
430
+ json.dump(df_pr_aggregates, json_file, indent=4)
paradigma/util.py CHANGED
@@ -1,9 +1,7 @@
1
- import json
2
1
  import os
3
2
  import numpy as np
4
3
  import pandas as pd
5
- from pathlib import Path
6
- from datetime import timedelta
4
+ from datetime import datetime, timedelta
7
5
  from dateutil import parser
8
6
  from typing import List, Tuple
9
7
 
@@ -432,3 +430,61 @@ def merge_predictions_with_timestamps(
432
430
  df_ts = df_ts.dropna(subset=[pred_proba_colname])
433
431
 
434
432
  return df_ts
433
+
434
+
435
+ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
436
+
437
+ """
438
+ Select hours of interest from the data to include in the aggregation step.
439
+
440
+ Parameters
441
+ ----------
442
+ df : pd.DataFrame
443
+ Input data.
444
+
445
+ select_hours_start: str
446
+ The start time of the selected hours in "HH:MM" format.
447
+
448
+ select_hours_end: str
449
+ The end time of the selected hours in "HH:MM" format.
450
+
451
+ Returns
452
+ -------
453
+ pd.DataFrame
454
+ The selected data.
455
+
456
+ """
457
+
458
+ select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
459
+ select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
460
+ df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
461
+
462
+ return df_subset
463
+
464
+ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
465
+
466
+ """
467
+ Select days of interest from the data to include in the aggregation step.
468
+
469
+ Parameters
470
+ ----------
471
+ df : pd.DataFrame
472
+ Input data with column 'time_dt' in which the date is stored.
473
+
474
+ min_hours_per_day: int
475
+ The minimum number of hours per day required for including the day in the aggregation step.
476
+
477
+
478
+ Returns
479
+ -------
480
+ pd.DataFrame
481
+ The selected data.
482
+
483
+ """
484
+
485
+ min_s_per_day = min_hours_per_day * 3600
486
+ window_length_s = df['time_dt'].diff().dt.total_seconds()[1] # determine the length of the first window in seconds
487
+ min_windows_per_day = min_s_per_day / window_length_s
488
+ df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
489
+
490
+ return df_subset
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: paradigma
3
- Version: 0.4.7
3
+ Version: 1.0.1
4
4
  Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
5
5
  License: Apache-2.0
6
6
  Author: Erik Post
@@ -26,7 +26,7 @@ Description-Content-Type: text/markdown
26
26
  |:----:|----|
27
27
  | **Packages and Releases** | [![Latest release](https://img.shields.io/github/release/biomarkersparkinson/paradigma.svg)](https://github.com/biomarkersparkinson/paradigma/releases/latest) [![PyPI](https://img.shields.io/pypi/v/paradigma.svg)](https://pypi.python.org/pypi/paradigma/) [![Static Badge](https://img.shields.io/badge/RSD-paradigma-lib)](https://research-software-directory.org/software/paradigma) |
28
28
  | **DOI** | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13838392.svg)](https://doi.org/10.5281/zenodo.13838392) |
29
- | **Build Status** | [![](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![Build and test](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [![pages-build-deployment](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
29
+ | **Build Status** | [![](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) [![Build and test](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [![pages-build-deployment](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
30
30
  | **License** | [![GitHub license](https://img.shields.io/github/license/biomarkersParkinson/paradigma)](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
31
31
  <!-- | **Fairness** | [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu) [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/8083/badge)](https://www.bestpractices.dev/projects/8083) | -->
32
32
 
@@ -95,7 +95,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
95
95
  Specific requirements include:
96
96
  | Pipeline | Sensor Configuration | Context of Use |
97
97
  |------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
98
- | **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
98
+ | **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
99
99
  | **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
100
100
  | **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
101
101
  | **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
@@ -111,8 +111,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
111
111
 
112
112
  ## Scientific validation
113
113
 
114
- The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
115
- and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). Details and validation of the different pipelines shall be shared in upcoming scientific publications.
114
+ The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
115
+ * [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
116
+
117
+ Details and validation of the other pipelines shall be shared in upcoming scientific publications.
116
118
 
117
119
  ## Contributing
118
120
 
@@ -0,0 +1,22 @@
1
+ paradigma/__init__.py,sha256=vCLqo7vOEgcnYs10gUVYvEFfi8y-jBi7w1YKRoqn95k,127
2
+ paradigma/assets/gait_detection_clf_package.pkl,sha256=8jCbuM_4dkilSjOEk9ss7bJbSppgzXe72y0X4BCnzCU,11497247
3
+ paradigma/assets/gait_filtering_clf_package.pkl,sha256=lAaLyhmXdV4X_drmYt0EM6wGwSo80yhpxtncWGq4RfQ,3915
4
+ paradigma/assets/ppg_quality_clf_package.pkl,sha256=vUcM4v8gZwWAmDVK7E4UcHhVnhlEg27RSB71oPGloSc,1292
5
+ paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0AhNZThA6Qe-cs0QPcWw4,1475
6
+ paradigma/classification.py,sha256=sBJSePvwHZNPUQuLdx-pncfnDzMq-1naomsCxSJneWY,2921
7
+ paradigma/config.py,sha256=hGmWpK1sjwjlmCn43bBa6DEWBTDZjRXlGANIp6X42mY,11206
8
+ paradigma/constants.py,sha256=gR--OzxaZqS5nJnYlWLqnJ9xN05_GMNtd6ec3upsfms,3543
9
+ paradigma/feature_extraction.py,sha256=zgu_fW1zpPvHxpgsPVpJILUiyWH44b9n1bGG7lV2HwE,35323
10
+ paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ paradigma/pipelines/gait_pipeline.py,sha256=ZhAc2RZbBX52SJ8hvSRjb5THM47WCfY50iEdImlszJM,26231
12
+ paradigma/pipelines/pulse_rate_pipeline.py,sha256=aBDopwWvfabLCQM6De9PHNKKzL03xD_29jWcsElnjCw,17711
13
+ paradigma/pipelines/pulse_rate_utils.py,sha256=rlXze04meLFlyPaxMBYhvz3_vu3SM77RF-7mLPegTm0,26772
14
+ paradigma/pipelines/tremor_pipeline.py,sha256=qsKEV3QFPQ4bsTGdEX0nXHVMjVlBUpcEHbWEDcGsmVw,14758
15
+ paradigma/preprocessing.py,sha256=OcrwiyNjZpw41IKCf9QRY75A-532kU4gSSSXjqWuTeE,14556
16
+ paradigma/segmenting.py,sha256=hgT4dtg23eyvjUraEXCzX8u0kSRx4vArjQgF10r61P8,13909
17
+ paradigma/testing.py,sha256=zWPBj7Q1Td6rgeMGoAWi6rIVLB8M6_FNUxlZSbpWqEM,18547
18
+ paradigma/util.py,sha256=E1keTX7vMDowSUG1AGx3juUMIXD7znaGwuqWDfQTpXo,16424
19
+ paradigma-1.0.1.dist-info/LICENSE,sha256=Lda8kIVC2kbmlSeYaUWwUwV75Q-q31idYvo18HUTfiw,9807
20
+ paradigma-1.0.1.dist-info/METADATA,sha256=F1mFpZV1uyGepjtWJ4U6FNSLmsrWSplmS0IJKdA4dh8,11654
21
+ paradigma-1.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
22
+ paradigma-1.0.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,22 +0,0 @@
1
- paradigma/__init__.py,sha256=vCLqo7vOEgcnYs10gUVYvEFfi8y-jBi7w1YKRoqn95k,127
2
- paradigma/assets/gait_detection_clf_package.pkl,sha256=8jCbuM_4dkilSjOEk9ss7bJbSppgzXe72y0X4BCnzCU,11497247
3
- paradigma/assets/gait_filtering_clf_package.pkl,sha256=lAaLyhmXdV4X_drmYt0EM6wGwSo80yhpxtncWGq4RfQ,3915
4
- paradigma/assets/ppg_quality_clf_package.pkl,sha256=vUcM4v8gZwWAmDVK7E4UcHhVnhlEg27RSB71oPGloSc,1292
5
- paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0AhNZThA6Qe-cs0QPcWw4,1475
6
- paradigma/classification.py,sha256=sBJSePvwHZNPUQuLdx-pncfnDzMq-1naomsCxSJneWY,2921
7
- paradigma/config.py,sha256=72KkIEVV1v5dD9ZJDPI-mFNvorA8nBADEcA0A-jviHU,11163
8
- paradigma/constants.py,sha256=JlrD4Zx66g7myQALYAc4Gw_y6yW5EipZuvwj9_fjjpI,3543
9
- paradigma/feature_extraction.py,sha256=v_AwbBmvYo21XbULkOV6Ob_sZ1iboyXdDRRAsmCBh-Q,36061
10
- paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- paradigma/pipelines/gait_pipeline.py,sha256=guz6RZlM0muarxG_GtOMf117XqV0YMNPrK2KlyIP4Jg,26426
12
- paradigma/pipelines/heart_rate_pipeline.py,sha256=0-D9KcW9nwE5sgXsWHONkeKrsX6qZ5BYqjDttoffwL8,17726
13
- paradigma/pipelines/heart_rate_utils.py,sha256=aV2mTMWrFWHZD0KpHqy3IIC1onZykbppyp7_OUWxFTU,26764
14
- paradigma/pipelines/tremor_pipeline.py,sha256=B5uZB3IP5pwb30PE4xztRbdYmZt4JQj193BRksC9N94,14590
15
- paradigma/preprocessing.py,sha256=-Vt_awvJe8MGqXACqWp7R6LWq6XFOcAVUyd0anNaytc,13506
16
- paradigma/segmenting.py,sha256=Jrz2JQX5eSfR9jBfpBhc6QV0SFmPVT5O6T8MyL0sdSw,13874
17
- paradigma/testing.py,sha256=DSbWeYl5HuZ-bNyOKwgwMHQGG8KlTabvGTR1Yzd-9CY,17955
18
- paradigma/util.py,sha256=MEoe0zWigxwqy6aVd8zKdHifiuUTc9Mqyrh4xsy1oHY,14759
19
- paradigma-0.4.7.dist-info/LICENSE,sha256=Lda8kIVC2kbmlSeYaUWwUwV75Q-q31idYvo18HUTfiw,9807
20
- paradigma-0.4.7.dist-info/METADATA,sha256=QIxavCCDzE1qxtls69w5vED0xgq8_tZB--qe1cXVfTE,11323
21
- paradigma-0.4.7.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
22
- paradigma-0.4.7.dist-info/RECORD,,