paradigma 0.4.7__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -597,11 +597,9 @@ def pca_transform_gyroscope(
597
597
  df: pd.DataFrame,
598
598
  y_gyro_colname: str,
599
599
  z_gyro_colname: str,
600
- pred_colname: str | None = None,
601
600
  ) -> np.ndarray:
602
601
  """
603
- Perform principal component analysis (PCA) on gyroscope data to estimate velocity. If pred_colname is provided,
604
- the PCA is fitted on the predicted gait data. Otherwise, the PCA is fitted on the entire dataset.
602
+ Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
605
603
 
606
604
  Parameters
607
605
  ----------
@@ -611,8 +609,6 @@ def pca_transform_gyroscope(
611
609
  The column name for the y-axis gyroscope data.
612
610
  z_gyro_colname : str
613
611
  The column name for the z-axis gyroscope data.
614
- pred_colname : str, optional
615
- The column name for the predicted gait (default: None).
616
612
 
617
613
  Returns
618
614
  -------
@@ -623,19 +619,9 @@ def pca_transform_gyroscope(
623
619
  y_gyro_array = df[y_gyro_colname].to_numpy()
624
620
  z_gyro_array = df[z_gyro_colname].to_numpy()
625
621
 
626
- # Filter data based on predicted gait if pred_colname is provided
627
- if pred_colname is not None:
628
- pred_mask = df[pred_colname] == 1
629
- y_gyro_fit_array = y_gyro_array[pred_mask]
630
- z_gyro_fit_array = z_gyro_array[pred_mask]
631
-
632
- # Fit PCA on predicted gait data
633
- fit_data = np.column_stack((y_gyro_fit_array, z_gyro_fit_array))
634
- full_data = np.column_stack((y_gyro_array, z_gyro_array))
635
- else:
636
- # Fit PCA on entire dataset
637
- fit_data = np.column_stack((y_gyro_array, z_gyro_array))
638
- full_data = fit_data
622
+ # Fit PCA
623
+ fit_data = np.column_stack((y_gyro_array, z_gyro_array))
624
+ full_data = fit_data
639
625
 
640
626
  pca = PCA(n_components=2, svd_solver='auto', random_state=22)
641
627
  pca.fit(fit_data)
@@ -1,20 +1,17 @@
1
1
  import numpy as np
2
- import os
3
2
  import pandas as pd
4
- from pathlib import Path
5
3
  from scipy.signal import periodogram
6
4
  from typing import List, Tuple
7
- import tsdf
8
5
 
9
6
  from paradigma.classification import ClassifierPackage
10
- from paradigma.constants import DataColumns, TimeUnit
7
+ from paradigma.constants import DataColumns
11
8
  from paradigma.config import GaitConfig
12
9
  from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
13
10
  extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
14
11
  compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
15
12
  compute_total_power
16
13
  from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
17
- from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps, read_metadata, write_df_data, get_end_iso8601
14
+ from paradigma.util import aggregate_parameter
18
15
 
19
16
 
20
17
  def extract_gait_features(
@@ -160,66 +157,35 @@ def detect_gait(
160
157
 
161
158
 
162
159
  def extract_arm_activity_features(
160
+ df: pd.DataFrame,
163
161
  config: GaitConfig,
164
- df_timestamps: pd.DataFrame,
165
- df_predictions: pd.DataFrame,
166
- threshold: float
167
162
  ) -> pd.DataFrame:
168
163
  """
169
164
  Extract features related to arm activity from a time-series DataFrame.
170
165
 
171
166
  This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
172
167
  and extracts features related to arm activity by performing the following steps:
173
- 1. Merges the gait predictions with timestamps by expanding overlapping windows into individual timestamps.
174
- 2. Computes the angle and velocity from gyroscope data.
175
- 3. Filters the data to include only predicted gait segments.
176
- 4. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
177
- 5. Removes segments that do not meet predefined criteria.
178
- 6. Creates fixed-length windows from the time series data.
179
- 7. Extracts angle-related features, temporal domain features, and spectral domain features.
168
+ 1. Computes the angle and velocity from gyroscope data.
169
+ 2. Filters the data to include only predicted gait segments.
170
+ 3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
171
+ 4. Removes segments that do not meet predefined criteria.
172
+ 5. Creates fixed-length windows from the time series data.
173
+ 6. Extracts angle-related features, temporal domain features, and spectral domain features.
180
174
 
181
175
  Parameters
182
176
  ----------
183
- config : GaitConfig
184
- Configuration object containing column names and parameters for feature extraction.
185
-
186
- df_timestamps : pd.DataFrame
187
- A DataFrame containing the raw sensor data, including accelerometer, gravity, and gyroscope columns.
188
-
189
- df_predictions : pd.DataFrame
190
- A DataFrame containing the predicted probabilities for gait activity per window.
177
+ df: pd.DataFrame
178
+ The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
191
179
 
192
180
  config : ArmActivityFeatureExtractionConfig
193
181
  Configuration object containing column names and parameters for feature extraction.
194
182
 
195
- path_to_classifier_input : str | Path
196
- The path to the directory containing the classifier files and other necessary input files for feature extraction.
197
-
198
183
  Returns
199
184
  -------
200
185
  pd.DataFrame
201
186
  A DataFrame containing the extracted arm activity features, including angle, velocity,
202
187
  temporal, and spectral features.
203
188
  """
204
- if not any(df_predictions[DataColumns.PRED_GAIT_PROBA] >= threshold):
205
- raise ValueError("No gait detected in the input data.")
206
-
207
- # Merge gait predictions with timestamps
208
- gait_preprocessing_config = GaitConfig(step='gait')
209
- df = merge_predictions_with_timestamps(
210
- df_ts=df_timestamps,
211
- df_predictions=df_predictions,
212
- pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
213
- window_length_s=gait_preprocessing_config.window_length_s,
214
- fs=gait_preprocessing_config.sampling_frequency
215
- )
216
-
217
- # Add a column for predicted gait based on a fitted threshold
218
- df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= threshold).astype(int)
219
-
220
- # Filter the DataFrame to only include predicted gait (1)
221
- df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
222
-
223
189
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
224
190
  df[DataColumns.SEGMENT_NR] = create_segments(
225
191
  time_array=df[DataColumns.TIME],
@@ -315,8 +281,8 @@ def filter_gait(
315
281
  ----------
316
282
  df : pd.DataFrame
317
283
  The input DataFrame containing features extracted from gait data.
318
- full_path_to_classifier_package : str | Path
319
- The path to the pre-trained classifier file.
284
+ clf_package: ClassifierPackage
285
+ The pre-trained classifier package containing the classifier, threshold, and scaler.
320
286
  parallel : bool, optional, default=False
321
287
  If `True`, enables parallel processing.
322
288
 
@@ -351,10 +317,10 @@ def filter_gait(
351
317
 
352
318
  def quantify_arm_swing(
353
319
  df: pd.DataFrame,
354
- max_segment_gap_s: float,
355
- min_segment_length_s: float,
356
320
  fs: int,
357
321
  filtered: bool = False,
322
+ max_segment_gap_s: float = 1.5,
323
+ min_segment_length_s: float = 1.5
358
324
  ) -> Tuple[dict[str, pd.DataFrame], dict]:
359
325
  """
360
326
  Quantify arm swing parameters for segments of motion based on gyroscope data.
@@ -362,28 +328,27 @@ def quantify_arm_swing(
362
328
  Parameters
363
329
  ----------
364
330
  df : pd.DataFrame
365
- A DataFrame containing the raw sensor data, including gyroscope columns. Should include a column
331
+ A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
366
332
  for predicted no other arm activity based on a fitted threshold if filtered is True.
367
333
 
368
- max_segment_gap_s : float
369
- The maximum gap allowed between segments.
370
-
371
- min_segment_length_s : float
372
- The minimum length required for a segment to be considered valid.
373
-
374
334
  fs : int
375
335
  The sampling frequency of the sensor data.
376
336
 
377
337
  filtered : bool, optional, default=True
378
338
  If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
379
339
 
340
+ max_segment_gap_s : float, optional, default=1.5
341
+ The maximum gap in seconds between consecutive timestamps to group them into segments.
342
+
343
+ min_segment_length_s : float, optional, default=1.5
344
+ The minimum length in seconds for a segment to be considered valid.
345
+
380
346
  Returns
381
347
  -------
382
348
  Tuple[pd.DataFrame, dict]
383
349
  A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
384
350
  metadata for each segment.
385
351
  """
386
-
387
352
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
388
353
  # Segments are made based on predicted gait
389
354
  df[DataColumns.SEGMENT_NR] = create_segments(
@@ -391,6 +356,10 @@ def quantify_arm_swing(
391
356
  max_segment_gap_s=max_segment_gap_s
392
357
  )
393
358
 
359
+ # Segment category is determined based on predicted gait, hence it is set
360
+ # before filtering the DataFrame to only include predicted no other arm activity
361
+ df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
362
+
394
363
  # Remove segments that do not meet predetermined criteria
395
364
  df = discard_segments(
396
365
  df=df,
@@ -401,40 +370,51 @@ def quantify_arm_swing(
401
370
  )
402
371
 
403
372
  if df.empty:
404
- raise ValueError("No segments found in the input data.")
373
+ raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
405
374
 
406
375
  # If no arm swing data is remaining, return an empty dictionary
407
376
  if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
408
377
  raise ValueError("No gait without other arm activities to quantify.")
409
-
410
- df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
411
-
412
- # Group and process segments
413
- arm_swing_quantified = []
414
- segment_meta = {}
415
-
416
- if filtered:
378
+ elif filtered:
417
379
  # Filter the DataFrame to only include predicted no other arm activity (1)
418
380
  df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
419
381
 
420
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
421
- # Now segments are based on predicted gait without other arm activity for subsequent processes
382
+ # Group consecutive timestamps into segments of filtered gait
422
383
  df[DataColumns.SEGMENT_NR] = create_segments(
423
384
  time_array=df[DataColumns.TIME],
424
385
  max_segment_gap_s=max_segment_gap_s
425
386
  )
426
387
 
427
- pred_colname_pca = DataColumns.PRED_NO_OTHER_ARM_ACTIVITY
428
- else:
429
- pred_colname_pca = None
388
+ # Remove segments that do not meet predetermined criteria
389
+ df = discard_segments(
390
+ df=df,
391
+ segment_nr_colname=DataColumns.SEGMENT_NR,
392
+ min_segment_length_s=min_segment_length_s,
393
+ fs=fs,
394
+ )
395
+
396
+ if df.empty:
397
+ raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
398
+
399
+ arm_swing_quantified = []
400
+ segment_meta = {
401
+ 'aggregated': {
402
+ 'all': {
403
+ 'duration_s': len(df[DataColumns.TIME]) / fs
404
+ },
405
+ },
406
+ 'per_segment': {}
407
+ }
430
408
 
409
+ # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
410
+ # it is fitted on the entire gyroscope data
431
411
  df[DataColumns.VELOCITY] = pca_transform_gyroscope(
432
412
  df=df,
433
413
  y_gyro_colname=DataColumns.GYROSCOPE_Y,
434
414
  z_gyro_colname=DataColumns.GYROSCOPE_Z,
435
- pred_colname=pred_colname_pca
436
415
  )
437
416
 
417
+ # Group and process segments
438
418
  for segment_nr, group in df.groupby(DataColumns.SEGMENT_NR, sort=False):
439
419
  segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
440
420
  time_array = group[DataColumns.TIME].to_numpy()
@@ -452,8 +432,10 @@ def quantify_arm_swing(
452
432
  fs=fs,
453
433
  )
454
434
 
455
- segment_meta[segment_nr] = {
456
- 'time_s': len(angle_array) / fs,
435
+ segment_meta['per_segment'][segment_nr] = {
436
+ 'start_time_s': time_array.min(),
437
+ 'end_time_s': time_array.max(),
438
+ 'duration_s': len(angle_array) / fs,
457
439
  DataColumns.SEGMENT_CAT: segment_cat
458
440
  }
459
441
 
@@ -487,12 +469,20 @@ def quantify_arm_swing(
487
469
 
488
470
  df_params_segment = pd.DataFrame({
489
471
  DataColumns.SEGMENT_NR: segment_nr,
472
+ DataColumns.SEGMENT_CAT: segment_cat,
490
473
  DataColumns.RANGE_OF_MOTION: rom,
491
474
  DataColumns.PEAK_VELOCITY: pav
492
475
  })
493
476
 
494
477
  arm_swing_quantified.append(df_params_segment)
495
478
 
479
+ # Combine segment categories
480
+ segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
481
+ for segment_cat in segment_categories:
482
+ segment_meta['aggregated'][segment_cat] = {
483
+ 'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
484
+ }
485
+
496
486
  arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
497
487
 
498
488
  return arm_swing_quantified, segment_meta
@@ -527,7 +517,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
527
517
  cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
528
518
 
529
519
  aggregated_results[segment_cat] = {
530
- 'time_s': sum([segment_meta[x]['time_s'] for x in cat_segments])
520
+ 'duration_s': sum([segment_meta[x]['duration_s'] for x in cat_segments])
531
521
  }
532
522
 
533
523
  df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
@@ -537,7 +527,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
537
527
  aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
538
528
 
539
529
  aggregated_results['all_segment_categories'] = {
540
- 'time_s': sum([segment_meta[x]['time_s'] for x in segment_meta.keys()])
530
+ 'duration_s': sum([segment_meta[x]['duration_s'] for x in segment_meta.keys()])
541
531
  }
542
532
 
543
533
  for arm_swing_parameter in arm_swing_parameters:
@@ -163,8 +163,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
163
163
  Returns
164
164
  -------
165
165
  dict
166
- A dictionary with the aggregated tremor time and tremor power measures, as well as the total number of windows
167
- available in the input dataframe, and the number of windows at rest.
166
+ A dictionary with the aggregated tremor time and tremor power measures, as well as the number of valid days,
167
+ the total number of windows, and the number of windows at rest available in the input dataframe.
168
168
 
169
169
  Notes
170
170
  -----
@@ -173,7 +173,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
173
173
  - The modal tremor power is computed based on gaussian kernel density estimation.
174
174
 
175
175
  """
176
-
176
+ nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
177
177
  nr_windows_total = df.shape[0] # number of windows in the input dataframe
178
178
 
179
179
  # remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
@@ -216,6 +216,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
216
216
  # store aggregates in json format
217
217
  d_aggregates = {
218
218
  'metadata': {
219
+ 'nr_valid_days': nr_valid_days,
219
220
  'nr_windows_total': nr_windows_total,
220
221
  'nr_windows_rest': nr_windows_rest
221
222
  },
@@ -17,7 +17,9 @@ def resample_data(
17
17
  df: pd.DataFrame,
18
18
  time_column : str,
19
19
  values_column_names: List[str],
20
+ sampling_frequency: int,
20
21
  resampling_frequency: int,
22
+ tolerance: float | None = None
21
23
  ) -> pd.DataFrame:
22
24
  """
23
25
  Resamples sensor data to a specified frequency using cubic interpolation.
@@ -30,8 +32,14 @@ def resample_data(
30
32
  The name of the column containing the time data.
31
33
  values_column_names : List[str]
32
34
  A list of column names that should be resampled.
35
+ sampling_frequency : int
36
+ The original sampling frequency of the data (in Hz).
33
37
  resampling_frequency : int
34
38
  The frequency to which the data should be resampled (in Hz).
39
+ tolerance : float, optional
40
+ The tolerance added to the expected difference when checking
41
+ for contiguous timestamps. If not provided, it defaults to
42
+ twice the expected interval.
35
43
 
36
44
  Returns
37
45
  -------
@@ -46,23 +54,35 @@ def resample_data(
46
54
 
47
55
  Notes
48
56
  -----
49
- The function uses cubic interpolation to resample the data to the specified frequency.
50
- It requires the input time array to be strictly increasing.
57
+ - Uses cubic interpolation for smooth resampling if there are enough points.
58
+ - If only two timestamps are available, it falls back to linear interpolation.
51
59
  """
60
+ # Set default tolerance if not provided to twice the expected interval
61
+ if tolerance is None:
62
+ tolerance = 2 * 1 / sampling_frequency
52
63
 
53
- # Extract time and values from DataFrame
64
+ # Extract time and values
54
65
  time_abs_array = np.array(df[time_column])
55
66
  values_array = np.array(df[values_column_names])
56
67
 
57
68
  # Ensure the time array is strictly increasing
58
69
  if not np.all(np.diff(time_abs_array) > 0):
59
- raise ValueError("time_abs_array is not strictly increasing")
70
+ raise ValueError("Time array is not strictly increasing")
71
+
72
+ # Ensure the time array is contiguous
73
+ expected_interval = 1 / sampling_frequency
74
+ timestamp_diffs = np.diff(time_abs_array)
75
+ if np.any(np.abs(timestamp_diffs - expected_interval) > tolerance):
76
+ raise ValueError("Time array is not contiguous")
60
77
 
61
78
  # Resample the time data using the specified frequency
62
79
  t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
63
80
 
64
- # Interpolate the data using cubic interpolation
65
- interpolator = interp1d(time_abs_array, values_array, axis=0, kind="cubic")
81
+ # Choose interpolation method
82
+ interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
83
+ interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
84
+
85
+ # Interpolate
66
86
  resampled_values = interpolator(t_resampled)
67
87
 
68
88
  # Create a DataFrame with the resampled data
@@ -186,7 +206,8 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
186
206
  df = resample_data(
187
207
  df=df,
188
208
  time_column=DataColumns.TIME,
189
- values_column_names = values_colnames,
209
+ values_column_names=values_colnames,
210
+ sampling_frequency=config.sampling_frequency,
190
211
  resampling_frequency=config.sampling_frequency
191
212
  )
192
213
 
@@ -259,6 +280,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
259
280
  df=df_acc_overlapping,
260
281
  time_column=DataColumns.TIME,
261
282
  values_column_names = list(imu_config.d_channels_accelerometer.keys()),
283
+ sampling_frequency=imu_config.sampling_frequency,
262
284
  resampling_frequency=imu_config.sampling_frequency
263
285
  )
264
286
 
@@ -267,6 +289,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
267
289
  df=df_ppg_overlapping,
268
290
  time_column=DataColumns.TIME,
269
291
  values_column_names = list(ppg_config.d_channels_ppg.keys()),
292
+ sampling_frequency=ppg_config.sampling_frequency,
270
293
  resampling_frequency=ppg_config.sampling_frequency
271
294
  )
272
295
 
paradigma/segmenting.py CHANGED
@@ -168,7 +168,7 @@ def create_segments(
168
168
  gap_exceeds = time_diff > max_segment_gap_s
169
169
 
170
170
  # Create the segment number based on the cumulative sum of the gap_exceeds mask
171
- segments = gap_exceeds.cumsum() + 1 # +1 to start enumeration from 1
171
+ segments = gap_exceeds.cumsum()
172
172
 
173
173
  return segments
174
174
 
@@ -236,6 +236,9 @@ def discard_segments(
236
236
 
237
237
  df = df[valid_segment_mask].copy()
238
238
 
239
+ if df.empty:
240
+ raise ValueError("All segments were removed.")
241
+
239
242
  # Reset segment numbers in a single step
240
243
  unique_segments = pd.factorize(df[segment_nr_colname])[0] + 1
241
244
  df[segment_nr_colname] = unique_segments
paradigma/testing.py CHANGED
@@ -16,7 +16,7 @@ from paradigma.pipelines.tremor_pipeline import extract_tremor_features, detect_
16
16
  from paradigma.pipelines.heart_rate_pipeline import extract_signal_quality_features, signal_quality_classification, \
17
17
  aggregate_heart_rate
18
18
  from paradigma.preprocessing import preprocess_imu_data, preprocess_ppg_data
19
- from paradigma.util import read_metadata, write_df_data, get_end_iso8601
19
+ from paradigma.util import read_metadata, write_df_data, get_end_iso8601, merge_predictions_with_timestamps
20
20
 
21
21
 
22
22
  def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path,
@@ -208,13 +208,27 @@ def extract_arm_activity_features_io(
208
208
 
209
209
  clf_package = ClassifierPackage.load(full_path_to_classifier_package)
210
210
 
211
+ gait_preprocessing_config = GaitConfig(step='gait')
212
+
213
+ df = merge_predictions_with_timestamps(
214
+ df_ts=df_ts,
215
+ df_predictions=df_pred_gait,
216
+ pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
217
+ window_length_s=gait_preprocessing_config.window_length_s,
218
+ fs=gait_preprocessing_config.sampling_frequency
219
+ )
220
+
221
+ # Add a column for predicted gait based on a fitted threshold
222
+ df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= clf_package.threshold).astype(int)
223
+
224
+ # Filter the DataFrame to only include predicted gait (1)
225
+ df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
226
+
211
227
  # Extract arm activity features
212
228
  config = GaitConfig(step='arm_activity')
213
229
  df_features = extract_arm_activity_features(
230
+ df=df,
214
231
  config=config,
215
- df_timestamps=df_ts,
216
- df_predictions=df_pred_gait,
217
- threshold=clf_package.threshold
218
232
  )
219
233
 
220
234
  end_iso8601 = get_end_iso8601(metadata_ts_values.start_iso8601, df_features[DataColumns.TIME][-1:].values[0] + config.window_length_s)
paradigma/util.py CHANGED
@@ -1,9 +1,7 @@
1
- import json
2
1
  import os
3
2
  import numpy as np
4
3
  import pandas as pd
5
- from pathlib import Path
6
- from datetime import timedelta
4
+ from datetime import datetime, timedelta
7
5
  from dateutil import parser
8
6
  from typing import List, Tuple
9
7
 
@@ -432,3 +430,61 @@ def merge_predictions_with_timestamps(
432
430
  df_ts = df_ts.dropna(subset=[pred_proba_colname])
433
431
 
434
432
  return df_ts
433
+
434
+
435
+ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
436
+
437
+ """
438
+ Select hours of interest from the data to include in the aggregation step.
439
+
440
+ Parameters
441
+ ----------
442
+ df : pd.DataFrame
443
+ Input data.
444
+
445
+ select_hours_start: str
446
+ The start time of the selected hours in "HH:MM" format.
447
+
448
+ select_hours_end: str
449
+ The end time of the selected hours in "HH:MM" format.
450
+
451
+ Returns
452
+ -------
453
+ pd.DataFrame
454
+ The selected data.
455
+
456
+ """
457
+
458
+ select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
459
+ select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
460
+ df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
461
+
462
+ return df_subset
463
+
464
+ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
465
+
466
+ """
467
+ Select days of interest from the data to include in the aggregation step.
468
+
469
+ Parameters
470
+ ----------
471
+ df : pd.DataFrame
472
+ Input data with column 'time_dt' in which the date is stored.
473
+
474
+ min_hours_per_day: int
475
+ The minimum number of hours per day required for including the day in the aggregation step.
476
+
477
+
478
+ Returns
479
+ -------
480
+ pd.DataFrame
481
+ The selected data.
482
+
483
+ """
484
+
485
+ min_s_per_day = min_hours_per_day * 3600
486
+ window_length_s = df['time_dt'].diff().dt.total_seconds()[1] # determine the length of the first window in seconds
487
+ min_windows_per_day = min_s_per_day / window_length_s
488
+ df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
489
+
490
+ return df_subset
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: paradigma
3
- Version: 0.4.7
3
+ Version: 1.0.0
4
4
  Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
5
5
  License: Apache-2.0
6
6
  Author: Erik Post
@@ -95,7 +95,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
95
95
  Specific requirements include:
96
96
  | Pipeline | Sensor Configuration | Context of Use |
97
97
  |------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
98
- | **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
98
+ | **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
99
99
  | **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
100
100
  | **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
101
101
  | **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
@@ -111,8 +111,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
111
111
 
112
112
  ## Scientific validation
113
113
 
114
- The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
115
- and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). Details and validation of the different pipelines shall be shared in upcoming scientific publications.
114
+ The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
115
+ * [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
116
+
117
+ Details and validation of the other pipelines shall be shared in upcoming scientific publications.
116
118
 
117
119
  ## Contributing
118
120
 
@@ -6,17 +6,17 @@ paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0
6
6
  paradigma/classification.py,sha256=sBJSePvwHZNPUQuLdx-pncfnDzMq-1naomsCxSJneWY,2921
7
7
  paradigma/config.py,sha256=72KkIEVV1v5dD9ZJDPI-mFNvorA8nBADEcA0A-jviHU,11163
8
8
  paradigma/constants.py,sha256=JlrD4Zx66g7myQALYAc4Gw_y6yW5EipZuvwj9_fjjpI,3543
9
- paradigma/feature_extraction.py,sha256=v_AwbBmvYo21XbULkOV6Ob_sZ1iboyXdDRRAsmCBh-Q,36061
9
+ paradigma/feature_extraction.py,sha256=PAl9DgjTljxtifjPLpM1L_7xRs2fI6eUcybkaJ9kxAA,35323
10
10
  paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- paradigma/pipelines/gait_pipeline.py,sha256=guz6RZlM0muarxG_GtOMf117XqV0YMNPrK2KlyIP4Jg,26426
11
+ paradigma/pipelines/gait_pipeline.py,sha256=ZhAc2RZbBX52SJ8hvSRjb5THM47WCfY50iEdImlszJM,26231
12
12
  paradigma/pipelines/heart_rate_pipeline.py,sha256=0-D9KcW9nwE5sgXsWHONkeKrsX6qZ5BYqjDttoffwL8,17726
13
13
  paradigma/pipelines/heart_rate_utils.py,sha256=aV2mTMWrFWHZD0KpHqy3IIC1onZykbppyp7_OUWxFTU,26764
14
- paradigma/pipelines/tremor_pipeline.py,sha256=B5uZB3IP5pwb30PE4xztRbdYmZt4JQj193BRksC9N94,14590
15
- paradigma/preprocessing.py,sha256=-Vt_awvJe8MGqXACqWp7R6LWq6XFOcAVUyd0anNaytc,13506
16
- paradigma/segmenting.py,sha256=Jrz2JQX5eSfR9jBfpBhc6QV0SFmPVT5O6T8MyL0sdSw,13874
17
- paradigma/testing.py,sha256=DSbWeYl5HuZ-bNyOKwgwMHQGG8KlTabvGTR1Yzd-9CY,17955
18
- paradigma/util.py,sha256=MEoe0zWigxwqy6aVd8zKdHifiuUTc9Mqyrh4xsy1oHY,14759
19
- paradigma-0.4.7.dist-info/LICENSE,sha256=Lda8kIVC2kbmlSeYaUWwUwV75Q-q31idYvo18HUTfiw,9807
20
- paradigma-0.4.7.dist-info/METADATA,sha256=QIxavCCDzE1qxtls69w5vED0xgq8_tZB--qe1cXVfTE,11323
21
- paradigma-0.4.7.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
22
- paradigma-0.4.7.dist-info/RECORD,,
14
+ paradigma/pipelines/tremor_pipeline.py,sha256=ihe6QeTk4MT_tXBE8PUN5rWNh2r5n5HCC1k4xyJjtUw,14761
15
+ paradigma/preprocessing.py,sha256=OcrwiyNjZpw41IKCf9QRY75A-532kU4gSSSXjqWuTeE,14556
16
+ paradigma/segmenting.py,sha256=hgT4dtg23eyvjUraEXCzX8u0kSRx4vArjQgF10r61P8,13909
17
+ paradigma/testing.py,sha256=Ni68clfyHz_mQWBD8cDmls_uadd0rpKPq3-IKzolDZc,18547
18
+ paradigma/util.py,sha256=E1keTX7vMDowSUG1AGx3juUMIXD7znaGwuqWDfQTpXo,16424
19
+ paradigma-1.0.0.dist-info/LICENSE,sha256=Lda8kIVC2kbmlSeYaUWwUwV75Q-q31idYvo18HUTfiw,9807
20
+ paradigma-1.0.0.dist-info/METADATA,sha256=rnOYOPZkoDVOMF0z9dDLfEulaCspPPjmGWjDtBgUDPI,11654
21
+ paradigma-1.0.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
22
+ paradigma-1.0.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any