paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,14 @@
1
+ import json
1
2
  import logging
2
- from typing import List, Tuple
3
+ from importlib.resources import files
4
+ from pathlib import Path
3
5
 
4
6
  import numpy as np
5
7
  import pandas as pd
6
8
  from scipy.signal import periodogram
7
9
 
8
10
  from paradigma.classification import ClassifierPackage
9
- from paradigma.config import GaitConfig
11
+ from paradigma.config import GaitConfig, IMUConfig
10
12
  from paradigma.constants import DataColumns
11
13
  from paradigma.feature_extraction import (
12
14
  compute_angle,
@@ -22,13 +24,14 @@ from paradigma.feature_extraction import (
22
24
  pca_transform_gyroscope,
23
25
  remove_moving_average_angle,
24
26
  )
27
+ from paradigma.preprocessing import preprocess_imu_data
25
28
  from paradigma.segmenting import (
26
29
  WindowedDataExtractor,
27
30
  create_segments,
28
31
  discard_segments,
29
32
  tabulate_windows,
30
33
  )
31
- from paradigma.util import aggregate_parameter
34
+ from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps
32
35
 
33
36
  logger = logging.getLogger(__name__)
34
37
 
@@ -39,42 +42,54 @@ if not logger.hasHandlers():
39
42
 
40
43
  def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
41
44
  """
42
- Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
45
+ Extracts gait features from accelerometer and gravity sensor data in the
46
+ input DataFrame by computing temporal and spectral features.
43
47
 
44
48
  This function performs the following steps:
45
- 1. Groups sequences of timestamps into windows, using accelerometer and gravity data.
46
- 2. Computes temporal domain features such as mean and standard deviation for accelerometer and gravity data.
47
- 3. Transforms the signals from the temporal domain to the spectral domain using the Fast Fourier Transform (FFT).
49
+ 1. Groups sequences of timestamps into windows, using accelerometer and
50
+ gravity data.
51
+ 2. Computes temporal domain features such as mean and standard deviation
52
+ for accelerometer and gravity data.
53
+ 3. Transforms the signals from the temporal domain to the spectral
54
+ domain using the Fast Fourier Transform (FFT).
48
55
  4. Computes spectral domain features for the accelerometer data.
49
56
  5. Combines both temporal and spectral features into a final DataFrame.
50
57
 
51
58
  Parameters
52
59
  ----------
53
60
  df : pd.DataFrame
54
- The input DataFrame containing gait data, which includes time, accelerometer, and gravity sensor data. The data should be
61
+ The input DataFrame containing gait data, which includes time,
62
+ accelerometer, and gravity sensor data. The data should be
55
63
  structured with the necessary columns as specified in the `config`.
56
64
 
57
65
  onfig : GaitConfig
58
- Configuration object containing parameters for feature extraction, including column names for time, accelerometer data, and
59
- gravity data, as well as settings for windowing, and feature computation.
66
+ Configuration object containing parameters for feature extraction,
67
+ including column names for time, accelerometer data, and gravity
68
+ data, as well as settings for windowing, and feature computation.
60
69
 
61
70
  Returns
62
71
  -------
63
72
  pd.DataFrame
64
- A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
65
- columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
66
- accelerometer data.
73
+ A DataFrame containing extracted gait features, including temporal
74
+ and spectral domain features. The DataFrame will have columns
75
+ corresponding to time, statistical features of the accelerometer and
76
+ gravity data, and spectral features of the accelerometer data.
67
77
 
68
78
  Notes
69
79
  -----
70
- - This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
71
- - The temporal features are extracted from the accelerometer and gravity data, and include statistics like mean and standard deviation.
72
- - The input DataFrame must include columns as specified in the `config` object for proper feature extraction.
80
+ - This function groups the data into windows based on timestamps and
81
+ applies Fast Fourier Transform to compute spectral features.
82
+ - The temporal features are extracted from the accelerometer and gravity
83
+ data, and include statistics like mean and standard deviation.
84
+ - The input DataFrame must include columns as specified in the `config`
85
+ object for proper feature extraction.
73
86
 
74
87
  Raises
75
88
  ------
76
89
  ValueError
77
- If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
90
+ If the input DataFrame does not contain the required columns as
91
+ specified in the configuration or if any step in the feature
92
+ extraction fails.
78
93
  """
79
94
  # Group sequences of timestamps into windows
80
95
  windowed_colnames = (
@@ -101,7 +116,8 @@ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
101
116
 
102
117
  df_features = pd.DataFrame(start_time, columns=[config.time_colname])
103
118
 
104
- # Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
119
+ # Compute statistics of the temporal domain signals (mean, std) for
120
+ # accelerometer and gravity
105
121
  df_temporal_features = extract_temporal_domain_features(
106
122
  config=config,
107
123
  windowed_acc=windowed_acc,
@@ -112,7 +128,8 @@ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
112
128
  # Combine temporal features with the start time
113
129
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
114
130
 
115
- # Transform the accelerometer data to the spectral domain using FFT and extract spectral features
131
+ # Transform the accelerometer data to the spectral domain using FFT and
132
+ # extract spectral features
116
133
  df_spectral_features = extract_spectral_domain_features(
117
134
  config=config, sensor="accelerometer", windowed_data=windowed_acc
118
135
  )
@@ -127,31 +144,40 @@ def detect_gait(
127
144
  df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
128
145
  ) -> pd.Series:
129
146
  """
130
- Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
147
+ Detects gait activity in the input DataFrame using a pre-trained
148
+ classifier and applies a threshold to classify results.
131
149
 
132
150
  This function performs the following steps:
133
- 1. Loads the pre-trained classifier and scaling parameters from the specified directory.
134
- 2. Scales the relevant features in the input DataFrame (`df`) using the loaded scaling parameters.
135
- 3. Predicts the probability of gait activity for each sample in the DataFrame using the classifier.
136
- 4. Applies a threshold to the predicted probabilities to determine whether gait activity is present.
151
+ 1. Loads the pre-trained classifier and scaling parameters from the
152
+ specified directory.
153
+ 2. Scales the relevant features in the input DataFrame (`df`) using the
154
+ loaded scaling parameters.
155
+ 3. Predicts the probability of gait activity for each sample in the
156
+ DataFrame using the classifier.
157
+ 4. Applies a threshold to the predicted probabilities to determine
158
+ whether gait activity is present.
137
159
  5. Returns predicted probabilities
138
160
 
139
161
  Parameters
140
162
  ----------
141
163
  df : pd.DataFrame
142
- The input DataFrame containing features extracted from gait data. It must include the necessary columns
143
- as specified in the classifier's feature names.
164
+ The input DataFrame containing features extracted from gait data. It
165
+ must include the necessary columns as specified in the classifier's
166
+ feature names.
144
167
 
145
168
  clf_package : ClassifierPackage
146
- The pre-trained classifier package containing the classifier, threshold, and scaler.
169
+ The pre-trained classifier package containing the classifier,
170
+ threshold, and scaler.
147
171
 
148
172
  parallel : bool, optional, default=False
149
- If `True`, enables parallel processing during classification. If `False`, the classifier uses a single core.
173
+ If `True`, enables parallel processing during classification. If
174
+ `False`, the classifier uses a single core.
150
175
 
151
176
  Returns
152
177
  -------
153
178
  pd.Series
154
- A Series containing the predicted probabilities of gait activity for each sample in the input DataFrame.
179
+ A Series containing the predicted probabilities of gait activity for
180
+ each sample in the input DataFrame.
155
181
  """
156
182
  # Set classifier
157
183
  clf = clf_package.classifier
@@ -165,11 +191,11 @@ def detect_gait(
165
191
  scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
166
192
 
167
193
  # Replace scaled features in a copy of the relevant features for prediction
168
- X = df.loc[:, feature_names_predictions].copy()
169
- X.loc[:, feature_names_scaling] = scaled_features
194
+ x_features = df.loc[:, feature_names_predictions].copy()
195
+ x_features.loc[:, feature_names_scaling] = scaled_features
170
196
 
171
197
  # Make prediction and add the probability of gait activity to the DataFrame
172
- pred_gait_proba_series = clf_package.predict_proba(X)
198
+ pred_gait_proba_series = clf_package.predict_proba(x_features)
173
199
 
174
200
  return pred_gait_proba_series
175
201
 
@@ -181,38 +207,69 @@ def extract_arm_activity_features(
181
207
  """
182
208
  Extract features related to arm activity from a time-series DataFrame.
183
209
 
184
- This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
185
- and extracts features related to arm activity by performing the following steps:
210
+ This function processes a DataFrame containing accelerometer, gravity,
211
+ and gyroscope signals, and extracts features related to arm activity by
212
+ performing the following steps:
186
213
  1. Computes the angle and velocity from gyroscope data.
187
214
  2. Filters the data to include only predicted gait segments.
188
- 3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
215
+ 3. Groups the data into segments based on consecutive timestamps and
216
+ pre-specified gaps.
189
217
  4. Removes segments that do not meet predefined criteria.
190
218
  5. Creates fixed-length windows from the time series data.
191
- 6. Extracts angle-related features, temporal domain features, and spectral domain features.
219
+ 6. Extracts angle-related features, temporal domain features, and
220
+ spectral domain features.
192
221
 
193
222
  Parameters
194
223
  ----------
195
224
  df: pd.DataFrame
196
- The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
225
+ The input DataFrame containing accelerometer, gravity, and
226
+ gyroscope data of predicted gait.
197
227
 
198
228
  config : ArmActivityFeatureExtractionConfig
199
- Configuration object containing column names and parameters for feature extraction.
229
+ Configuration object containing column names and parameters
230
+ for feature extraction.
200
231
 
201
232
  Returns
202
233
  -------
203
234
  pd.DataFrame
204
- A DataFrame containing the extracted arm activity features, including angle, velocity,
205
- temporal, and spectral features.
235
+ A DataFrame containing the extracted arm activity features,
236
+ including angle, velocity, temporal, and spectral features.
206
237
  """
207
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
208
- df[DataColumns.SEGMENT_NR] = create_segments(
209
- time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
210
- )
238
+ # Group consecutive timestamps into segments, with new segments
239
+ # starting after a pre-specified gap. If data_segment_nr exists,
240
+ # create gait segments per data segment to preserve both
241
+ has_data_segments = DataColumns.DATA_SEGMENT_NR in df.columns
242
+
243
+ if has_data_segments:
244
+ df_list = []
245
+ gait_segment_offset = 0
246
+
247
+ for data_seg_nr in sorted(df[DataColumns.DATA_SEGMENT_NR].unique()):
248
+ df_seg = df[df[DataColumns.DATA_SEGMENT_NR] == data_seg_nr].copy()
249
+
250
+ # Create gait segments within this data segment
251
+ df_seg[DataColumns.GAIT_SEGMENT_NR] = create_segments(
252
+ time_array=df_seg[DataColumns.TIME].values,
253
+ max_segment_gap_s=config.max_segment_gap_s,
254
+ )
255
+
256
+ # Offset gait segment numbers to be unique across data segments
257
+ if gait_segment_offset > 0:
258
+ df_seg[DataColumns.GAIT_SEGMENT_NR] += gait_segment_offset
259
+ gait_segment_offset = df_seg[DataColumns.GAIT_SEGMENT_NR].max() + 1
260
+
261
+ df_list.append(df_seg)
262
+
263
+ df = pd.concat(df_list, ignore_index=True)
264
+ else:
265
+ df[DataColumns.GAIT_SEGMENT_NR] = create_segments(
266
+ time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
267
+ )
211
268
 
212
269
  # Remove segments that do not meet predetermined criteria
213
270
  df = discard_segments(
214
271
  df=df,
215
- segment_nr_colname=DataColumns.SEGMENT_NR,
272
+ segment_nr_colname=DataColumns.GAIT_SEGMENT_NR,
216
273
  min_segment_length_s=config.min_segment_length_s,
217
274
  fs=config.sampling_frequency,
218
275
  format="timestamps",
@@ -220,7 +277,7 @@ def extract_arm_activity_features(
220
277
 
221
278
  # Create windows of fixed length and step size from the time series per segment
222
279
  windowed_data = []
223
- df_grouped = df.groupby(DataColumns.SEGMENT_NR)
280
+ df_grouped = df.groupby(DataColumns.GAIT_SEGMENT_NR)
224
281
  windowed_colnames = (
225
282
  [config.time_colname]
226
283
  + config.accelerometer_colnames
@@ -290,14 +347,16 @@ def filter_gait(
290
347
  df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
291
348
  ) -> pd.Series:
292
349
  """
293
- Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
350
+ Filters gait data to identify windows with no other arm activity using
351
+ a pre-trained classifier.
294
352
 
295
353
  Parameters
296
354
  ----------
297
355
  df : pd.DataFrame
298
356
  The input DataFrame containing features extracted from gait data.
299
357
  clf_package: ClassifierPackage
300
- The pre-trained classifier package containing the classifier, threshold, and scaler.
358
+ The pre-trained classifier package containing the classifier,
359
+ threshold, and scaler.
301
360
  parallel : bool, optional, default=False
302
361
  If `True`, enables parallel processing.
303
362
 
@@ -321,11 +380,11 @@ def filter_gait(
321
380
  scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
322
381
 
323
382
  # Replace scaled features in a copy of the relevant features for prediction
324
- X = df.loc[:, feature_names_predictions].copy()
325
- X.loc[:, feature_names_scaling] = scaled_features
383
+ x_features = df.loc[:, feature_names_predictions].copy()
384
+ x_features.loc[:, feature_names_scaling] = scaled_features
326
385
 
327
386
  # Make predictions
328
- pred_no_other_arm_activity_proba_series = clf_package.predict_proba(X)
387
+ pred_no_other_arm_activity_proba_series = clf_package.predict_proba(x_features)
329
388
 
330
389
  return pred_no_other_arm_activity_proba_series
331
390
 
@@ -336,24 +395,27 @@ def quantify_arm_swing(
336
395
  filtered: bool = False,
337
396
  max_segment_gap_s: float = 1.5,
338
397
  min_segment_length_s: float = 1.5,
339
- ) -> Tuple[dict[str, pd.DataFrame], dict]:
398
+ ) -> tuple[dict[str, pd.DataFrame], dict]:
340
399
  """
341
400
  Quantify arm swing parameters for segments of motion based on gyroscope data.
342
401
 
343
402
  Parameters
344
403
  ----------
345
404
  df : pd.DataFrame
346
- A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
347
- for predicted no other arm activity based on a fitted threshold if filtered is True.
405
+ A DataFrame containing the raw sensor data of predicted gait
406
+ timestamps. Should include a column for predicted no other arm
407
+ activity based on a fitted threshold if filtered is True.
348
408
 
349
409
  fs : int
350
410
  The sampling frequency of the sensor data.
351
411
 
352
412
  filtered : bool, optional, default=True
353
- If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
413
+ If `True`, the gyroscope data is filtered to only include predicted
414
+ no other arm activity.
354
415
 
355
416
  max_segment_gap_s : float, optional, default=1.5
356
- The maximum gap in seconds between consecutive timestamps to group them into segments.
417
+ The maximum gap in seconds between consecutive timestamps to group
418
+ them into segments.
357
419
 
358
420
  min_segment_length_s : float, optional, default=1.5
359
421
  The minimum length in seconds for a segment to be considered valid.
@@ -361,11 +423,11 @@ def quantify_arm_swing(
361
423
  Returns
362
424
  -------
363
425
  Tuple[pd.DataFrame, dict]
364
- A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
365
- metadata for each segment.
426
+ A tuple containing a dataframe with quantified arm swing parameters
427
+ and a dictionary containing metadata for each segment.
366
428
  """
367
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
368
- # Segments are made based on predicted gait
429
+ # Group consecutive timestamps into segments, with new segments starting
430
+ # after a pre-specified gap. Segments are made based on predicted gait
369
431
  df["unfiltered_segment_nr"] = create_segments(
370
432
  time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
371
433
  )
@@ -381,7 +443,8 @@ def quantify_arm_swing(
381
443
 
382
444
  if df.empty:
383
445
  raise ValueError(
384
- "No segments found in the input data after discarding segments of invalid shape."
446
+ "No segments found in the input data after discarding segments "
447
+ "of invalid shape."
385
448
  )
386
449
 
387
450
  # Create dictionary of gait segment number and duration
@@ -414,7 +477,8 @@ def quantify_arm_swing(
414
477
 
415
478
  if df.empty:
416
479
  raise ValueError(
417
- "No filtered gait segments found in the input data after discarding segments of invalid shape."
480
+ "No filtered gait segments found in the input data after "
481
+ "discarding segments of invalid shape."
418
482
  )
419
483
 
420
484
  grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
@@ -425,8 +489,8 @@ def quantify_arm_swing(
425
489
  "per_segment": {},
426
490
  }
427
491
 
428
- # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
429
- # it is fitted on the entire gyroscope data
492
+ # PCA is fitted on only predicted gait without other arm activity if
493
+ # filtered, otherwise it is fitted on the entire gyroscope data
430
494
  df[DataColumns.VELOCITY] = pca_transform_gyroscope(
431
495
  df=df,
432
496
  y_gyro_colname=DataColumns.GYROSCOPE_Y,
@@ -446,7 +510,8 @@ def quantify_arm_swing(
446
510
  gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
447
511
  except KeyError:
448
512
  logger.warning(
449
- "Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
513
+ "Segment %s (filtered = %s) not found in gait segment "
514
+ "duration dictionary. Skipping this segment.",
450
515
  gait_segment_nr,
451
516
  filtered,
452
517
  )
@@ -471,8 +536,8 @@ def quantify_arm_swing(
471
536
  )
472
537
 
473
538
  segment_meta["per_segment"][segment_nr] = {
474
- "start_time_s": time_array.min(),
475
- "end_time_s": time_array.max(),
539
+ "start_time_s": float(time_array.min()),
540
+ "end_time_s": float(time_array.max()),
476
541
  "duration_unfiltered_segment_s": gait_segment_duration_s,
477
542
  }
478
543
 
@@ -497,7 +562,8 @@ def quantify_arm_swing(
497
562
  except Exception as e:
498
563
  # Handle the error, set RoM to NaN, and log the error
499
564
  print(
500
- f"Error computing range of motion for segment {segment_nr}: {e}"
565
+ f"Error computing range of motion for segment "
566
+ f"{segment_nr}: {e}"
501
567
  )
502
568
  rom = np.array([np.nan])
503
569
 
@@ -509,17 +575,24 @@ def quantify_arm_swing(
509
575
  except Exception as e:
510
576
  # Handle the error, set pav to NaN, and log the error
511
577
  print(
512
- f"Error computing peak angular velocity for segment {segment_nr}: {e}"
578
+ f"Error computing peak angular velocity for segment "
579
+ f"{segment_nr}: {e}"
513
580
  )
514
581
  pav = np.array([np.nan])
515
582
 
516
- df_params_segment = pd.DataFrame(
517
- {
518
- DataColumns.SEGMENT_NR: segment_nr,
519
- DataColumns.RANGE_OF_MOTION: rom,
520
- DataColumns.PEAK_VELOCITY: pav,
521
- }
522
- )
583
+ params_dict = {
584
+ DataColumns.GAIT_SEGMENT_NR: segment_nr,
585
+ DataColumns.RANGE_OF_MOTION: rom,
586
+ DataColumns.PEAK_VELOCITY: pav,
587
+ }
588
+
589
+ # Add data_segment_nr if it exists in the input data
590
+ if DataColumns.DATA_SEGMENT_NR in group.columns:
591
+ params_dict[DataColumns.DATA_SEGMENT_NR] = group[
592
+ DataColumns.DATA_SEGMENT_NR
593
+ ].iloc[0]
594
+
595
+ df_params_segment = pd.DataFrame(params_dict)
523
596
 
524
597
  arm_swing_quantified.append(df_params_segment)
525
598
 
@@ -531,8 +604,8 @@ def quantify_arm_swing(
531
604
  def aggregate_arm_swing_params(
532
605
  df_arm_swing_params: pd.DataFrame,
533
606
  segment_meta: dict,
534
- segment_cats: List[tuple],
535
- aggregates: List[str] = ["median"],
607
+ segment_cats: list[tuple],
608
+ aggregates: list[str] = ["median"],
536
609
  ) -> dict:
537
610
  """
538
611
  Aggregate the quantification results for arm swing parameters.
@@ -546,14 +619,17 @@ def aggregate_arm_swing_params(
546
619
  A dictionary containing metadata for each segment.
547
620
 
548
621
  segment_cats : List[tuple]
549
- A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
622
+ A list of tuples defining the segment categories, where each tuple
623
+ contains the lower and upper bounds for the segment duration.
550
624
  aggregates : List[str], optional
551
- A list of aggregation methods to apply to the quantification results.
625
+ A list of aggregation methods to apply to the quantification
626
+ results.
552
627
 
553
628
  Returns
554
629
  -------
555
630
  dict
556
- A dictionary containing the aggregated quantification results for arm swing parameters.
631
+ A dictionary containing the aggregated quantification results for
632
+ arm swing parameters.
557
633
  """
558
634
  arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
559
635
 
@@ -568,7 +644,8 @@ def aggregate_arm_swing_params(
568
644
  ]
569
645
 
570
646
  if len(cat_segments) > 0:
571
- # For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
647
+ # For each segment, use 'duration_filtered_segment_s' if present,
648
+ # else 'duration_unfiltered_segment_s'
572
649
  aggregated_results[segment_cat_str] = {
573
650
  "duration_s": sum(
574
651
  [
@@ -583,7 +660,7 @@ def aggregate_arm_swing_params(
583
660
  }
584
661
 
585
662
  df_arm_swing_params_cat = df_arm_swing_params.loc[
586
- df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)
663
+ df_arm_swing_params[DataColumns.GAIT_SEGMENT_NR].isin(cat_segments)
587
664
  ]
588
665
 
589
666
  # Aggregate across all segments
@@ -593,10 +670,14 @@ def aggregate_arm_swing_params(
593
670
  for aggregate in aggregates:
594
671
  if aggregate in ["std", "cov"]:
595
672
  per_segment_agg = []
596
- # If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
673
+ # If the aggregate is 'cov' (coefficient of variation),
674
+ # we also compute the mean and standard deviation per
675
+ # segment
597
676
  segment_groups = dict(
598
677
  tuple(
599
- df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)
678
+ df_arm_swing_params_cat.groupby(
679
+ DataColumns.GAIT_SEGMENT_NR
680
+ )
600
681
  )
601
682
  )
602
683
  for segment_nr in cat_segments:
@@ -613,10 +694,14 @@ def aggregate_arm_swing_params(
613
694
  per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
614
695
 
615
696
  for segment_level_aggregate in aggregates_per_segment:
616
- aggregated_results[segment_cat_str][
617
- f"{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}"
618
- ] = aggregate_parameter(
619
- per_segment_agg, segment_level_aggregate
697
+ key = (
698
+ f"{segment_level_aggregate}_{aggregate}_"
699
+ f"{arm_swing_parameter}"
700
+ )
701
+ aggregated_results[segment_cat_str][key] = (
702
+ aggregate_parameter(
703
+ per_segment_agg, segment_level_aggregate
704
+ )
620
705
  )
621
706
  else:
622
707
  aggregated_results[segment_cat_str][
@@ -638,7 +723,7 @@ def extract_temporal_domain_features(
638
723
  config,
639
724
  windowed_acc: np.ndarray,
640
725
  windowed_grav: np.ndarray,
641
- grav_stats: List[str] = ["mean"],
726
+ grav_stats: list[str] = ["mean"],
642
727
  ) -> pd.DataFrame:
643
728
  """
644
729
  Compute temporal domain features for the accelerometer signal.
@@ -688,9 +773,10 @@ def extract_spectral_domain_features(
688
773
  """
689
774
  Compute spectral domain features for a sensor's data.
690
775
 
691
- This function computes the periodogram, extracts power in specific frequency bands,
692
- calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
693
- for a given sensor's windowed data.
776
+ This function computes the periodogram, extracts power in specific
777
+ frequency bands, calculates the dominant frequency, and computes
778
+ Mel-frequency cepstral coefficients (MFCCs) for a given sensor's
779
+ windowed data.
694
780
 
695
781
  Parameters
696
782
  ----------
@@ -698,8 +784,8 @@ def extract_spectral_domain_features(
698
784
  A 2D numpy array where each row corresponds to a window of sensor data.
699
785
 
700
786
  config : object
701
- Configuration object containing settings such as sampling frequency, window type,
702
- frequency bands, and MFCC parameters.
787
+ Configuration object containing settings such as sampling frequency,
788
+ window type, frequency bands, and MFCC parameters.
703
789
 
704
790
  sensor : str
705
791
  The name of the sensor (e.g., 'accelerometer', 'gyroscope').
@@ -707,8 +793,9 @@ def extract_spectral_domain_features(
707
793
  Returns
708
794
  -------
709
795
  pd.DataFrame
710
- A DataFrame containing the computed spectral features, with each row corresponding
711
- to a window and each column representing a specific feature.
796
+ A DataFrame containing the computed spectral features, with each row
797
+ corresponding to a window and each column representing a specific
798
+ feature.
712
799
  """
713
800
  # Initialize a dictionary to hold the results
714
801
  feature_dict = {}
@@ -758,3 +845,307 @@ def extract_spectral_domain_features(
758
845
  feature_dict[colname] = mfccs[:, i]
759
846
 
760
847
  return pd.DataFrame(feature_dict)
848
+
849
+
850
+ def run_gait_pipeline(
851
+ df_prepared: pd.DataFrame,
852
+ watch_side: str,
853
+ output_dir: str | Path,
854
+ imu_config: IMUConfig | None = None,
855
+ gait_config: GaitConfig | None = None,
856
+ arm_activity_config: GaitConfig | None = None,
857
+ store_intermediate: list[str] = [],
858
+ segment_number_offset: int = 0,
859
+ logging_level: int = logging.INFO,
860
+ custom_logger: logging.Logger | None = None,
861
+ ) -> tuple[pd.DataFrame, dict]:
862
+ """
863
+ Run the complete gait analysis pipeline on prepared data (steps 1-6).
864
+
865
+ This function implements the gait analysis workflow as described in the tutorials:
866
+ 1. Preprocessing
867
+ 2. Gait feature extraction
868
+ 3. Gait detection
869
+ 4. Arm activity feature extraction
870
+ 5. Filtering gait
871
+ 6. Arm swing quantification
872
+
873
+ Step 7 (aggregation) should be done after processing all segments.
874
+
875
+ Parameters
876
+ ----------
877
+ df_prepared : pd.DataFrame
878
+ Prepared IMU data with time, accelerometer, and gyroscope columns.
879
+ Should contain columns: time, accelerometer_x/y/z, gyroscope_x/y/z.
880
+ Will be preprocessed as step 1 of the pipeline.
881
+ watch_side : str
882
+ Side of the watch ('left' or 'right') to configure preprocessing accordingly.
883
+ output_dir : str or Path
884
+ Directory to save intermediate results (required)
885
+ imu_config : IMUConfig, optional
886
+ Configuration for IMU data preprocessing.
887
+ If None, uses default IMUConfig.
888
+ gait_config : GaitConfig, optional
889
+ Configuration for gait feature extraction and detection.
890
+ If None, uses default GaitConfig(step="gait").
891
+ arm_activity_config : GaitConfig, optional
892
+ Configuration for arm activity feature extraction and filtering.
893
+ If None, uses default GaitConfig(step="arm_activity").
894
+ store_intermediate : List[str]
895
+ Steps of which intermediate results should be stored:
896
+ - 'preprocessing': Store preprocessed data after step 1
897
+ - 'gait': Store gait features and predictions after step 3
898
+ - 'arm_activity': Store arm activity features and predictions after step 5
899
+ - 'quantification': Store arm swing quantification results after step 6
900
+ If empty, only returns the final quantified results.
901
+ segment_number_offset : int, optional, default=0
902
+ Offset to add to all segment numbers to avoid conflicts when concatenating
903
+ multiple data segments. Used for proper segment numbering across multiple files.
904
+ logging_level : int, default logging.INFO
905
+ Logging level using standard logging constants (logging.DEBUG, logging.INFO,
906
+ etc.)
907
+ custom_logger : logging.Logger, optional
908
+ Custom logger instance. If provided, logging_level is ignored.
909
+
910
+ Returns
911
+ -------
912
+ tuple[pd.DataFrame, dict]
913
+ A tuple containing:
914
+ - pd.DataFrame: Quantified arm swing parameters with the following columns:
915
+ - gait_segment_nr: Gait segment number within this data segment
916
+ - Various arm swing metrics (range of motion, peak angular velocity, etc.)
917
+ - Additional metadata columns
918
+ - dict: Gait segment metadata containing information about each detected
919
+ gait segment
920
+
921
+ Notes
922
+ -----
923
+ This function processes a single contiguous data segment. For multiple segments,
924
+ call this function for each segment, then use aggregate_arm_swing_params()
925
+ on the concatenated results.
926
+
927
+ The function follows the exact workflow from the gait analysis tutorial:
928
+ https://github.com/biomarkersParkinson/paradigma/blob/main/docs/
929
+ tutorials/gait_analysis.ipynb
930
+ """
931
+ # Setup logger
932
+ active_logger = custom_logger if custom_logger is not None else logger
933
+ if custom_logger is None:
934
+ active_logger.setLevel(logging_level)
935
+
936
+ # Set default configurations
937
+ if imu_config is None:
938
+ imu_config = IMUConfig()
939
+ if gait_config is None:
940
+ gait_config = GaitConfig(step="gait")
941
+ if arm_activity_config is None:
942
+ arm_activity_config = GaitConfig(step="arm_activity")
943
+
944
+ output_dir = Path(output_dir)
945
+
946
+ # Validate input data has required columns
947
+ required_columns = [
948
+ DataColumns.TIME,
949
+ DataColumns.ACCELEROMETER_X,
950
+ DataColumns.ACCELEROMETER_Y,
951
+ DataColumns.ACCELEROMETER_Z,
952
+ DataColumns.GYROSCOPE_X,
953
+ DataColumns.GYROSCOPE_Y,
954
+ DataColumns.GYROSCOPE_Z,
955
+ ]
956
+ missing_columns = [
957
+ col for col in required_columns if col not in df_prepared.columns
958
+ ]
959
+ if missing_columns:
960
+ raise ValueError(f"Missing required columns: {missing_columns}")
961
+
962
+ # Step 1: Preprocess data
963
+ active_logger.info("Step 1: Preprocessing IMU data")
964
+
965
+ df_preprocessed = preprocess_imu_data(
966
+ df=df_prepared,
967
+ config=imu_config,
968
+ sensor="both",
969
+ watch_side=watch_side,
970
+ verbose=1 if logging_level <= logging.INFO else 0,
971
+ )
972
+
973
+ if "preprocessing" in store_intermediate:
974
+ preprocessing_dir = output_dir / "preprocessing"
975
+ preprocessing_dir.mkdir(parents=True, exist_ok=True)
976
+ df_preprocessed.to_parquet(
977
+ preprocessing_dir / "preprocessed_data.parquet", index=False
978
+ )
979
+ active_logger.debug(
980
+ f"Saved preprocessed data to "
981
+ f"{preprocessing_dir / 'preprocessed_data.parquet'}"
982
+ )
983
+
984
+ # Step 2: Extract gait features
985
+ active_logger.info("Step 2: Extracting gait features")
986
+ df_gait = extract_gait_features(df_preprocessed, gait_config)
987
+
988
+ if "gait" in store_intermediate:
989
+ gait_dir = output_dir / "gait"
990
+ gait_dir.mkdir(parents=True, exist_ok=True)
991
+ df_gait.to_parquet(gait_dir / "gait_features.parquet", index=False)
992
+ active_logger.debug(
993
+ f"Saved gait features to {gait_dir / 'gait_features.parquet'}"
994
+ )
995
+
996
+ # Step 3: Detect gait
997
+ active_logger.info("Step 3: Detecting gait")
998
+ try:
999
+ classifier_path = files("paradigma.assets") / "gait_detection_clf_package.pkl"
1000
+ classifier_package_gait = ClassifierPackage.load(classifier_path)
1001
+ except Exception as e:
1002
+ active_logger.error(f"Could not load gait detection classifier: {e}")
1003
+ raise RuntimeError("Gait detection classifier not available")
1004
+
1005
+ gait_proba = detect_gait(df_gait, classifier_package_gait, parallel=False)
1006
+ df_gait[DataColumns.PRED_GAIT_PROBA] = gait_proba
1007
+
1008
+ # Merge predictions back with timestamps
1009
+ df_gait_with_time = merge_predictions_with_timestamps(
1010
+ df_ts=df_preprocessed,
1011
+ df_predictions=df_gait,
1012
+ pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
1013
+ window_length_s=gait_config.window_length_s,
1014
+ fs=gait_config.sampling_frequency,
1015
+ )
1016
+
1017
+ # Add binary prediction column
1018
+ df_gait_with_time[DataColumns.PRED_GAIT] = (
1019
+ df_gait_with_time[DataColumns.PRED_GAIT_PROBA]
1020
+ >= classifier_package_gait.threshold
1021
+ ).astype(int)
1022
+
1023
+ if "gait" in store_intermediate:
1024
+ gait_dir = output_dir / "gait"
1025
+ gait_dir.mkdir(parents=True, exist_ok=True)
1026
+ df_gait_with_time.to_parquet(gait_dir / "gait_predictions.parquet", index=False)
1027
+ active_logger.info(
1028
+ f"Saved gait predictions to {gait_dir / 'gait_predictions.parquet'}"
1029
+ )
1030
+
1031
+ # Filter to only gait periods
1032
+ df_gait_only = df_gait_with_time.loc[
1033
+ df_gait_with_time[DataColumns.PRED_GAIT] == 1
1034
+ ].reset_index(drop=True)
1035
+
1036
+ if len(df_gait_only) == 0:
1037
+ active_logger.warning("No gait detected in this segment")
1038
+ return pd.DataFrame(), {}
1039
+
1040
+ # Step 4: Extract arm activity features
1041
+ active_logger.info("Step 4: Extracting arm activity features")
1042
+ df_arm_activity = extract_arm_activity_features(df_gait_only, arm_activity_config)
1043
+
1044
+ if "arm_activity" in store_intermediate:
1045
+ arm_activity_dir = output_dir / "arm_activity"
1046
+ arm_activity_dir.mkdir(parents=True, exist_ok=True)
1047
+ df_arm_activity.to_parquet(
1048
+ arm_activity_dir / "arm_activity_features.parquet", index=False
1049
+ )
1050
+ active_logger.debug(
1051
+ f"Saved arm activity features to "
1052
+ f"{arm_activity_dir / 'arm_activity_features.parquet'}"
1053
+ )
1054
+
1055
+ # Step 5: Filter gait (remove other arm activities)
1056
+ active_logger.info("Step 5: Filtering gait")
1057
+ try:
1058
+ classifier_path = files("paradigma.assets") / "gait_filtering_clf_package.pkl"
1059
+ classifier_package_arm_activity = ClassifierPackage.load(classifier_path)
1060
+ except Exception as e:
1061
+ active_logger.error(f"Could not load arm activity classifier: {e}")
1062
+ raise RuntimeError("Arm activity classifier not available")
1063
+
1064
+ # Filter gait returns probabilities which we add to the arm activity features
1065
+ arm_activity_probabilities = filter_gait(
1066
+ df_arm_activity, classifier_package_arm_activity, parallel=False
1067
+ )
1068
+
1069
+ df_arm_activity[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] = (
1070
+ arm_activity_probabilities
1071
+ )
1072
+
1073
+ # Merge predictions back with timestamps
1074
+ df_filtered = merge_predictions_with_timestamps(
1075
+ df_ts=df_gait_only,
1076
+ df_predictions=df_arm_activity,
1077
+ pred_proba_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
1078
+ window_length_s=arm_activity_config.window_length_s,
1079
+ fs=arm_activity_config.sampling_frequency,
1080
+ )
1081
+
1082
+ # Add binary prediction column
1083
+ filt_threshold = classifier_package_arm_activity.threshold
1084
+ df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] = (
1085
+ df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= filt_threshold
1086
+ ).astype(int)
1087
+
1088
+ if "arm_activity" in store_intermediate:
1089
+ arm_activity_dir = output_dir / "arm_activity"
1090
+ arm_activity_dir.mkdir(parents=True, exist_ok=True)
1091
+ df_filtered.to_parquet(arm_activity_dir / "filtered_gait.parquet", index=False)
1092
+ active_logger.debug(
1093
+ f"Saved filtered gait to {arm_activity_dir / 'filtered_gait.parquet'}"
1094
+ )
1095
+
1096
+ if (
1097
+ len(df_filtered.loc[df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1])
1098
+ == 0
1099
+ ):
1100
+ active_logger.warning("No clean gait data remaining after filtering")
1101
+ return pd.DataFrame(), {}
1102
+
1103
+ # Step 6: Quantify arm swing
1104
+ active_logger.info("Step 6: Quantifying arm swing")
1105
+ quantified_arm_swing, gait_segment_meta = quantify_arm_swing(
1106
+ df=df_filtered,
1107
+ fs=arm_activity_config.sampling_frequency,
1108
+ filtered=True,
1109
+ max_segment_gap_s=arm_activity_config.max_segment_gap_s,
1110
+ min_segment_length_s=arm_activity_config.min_segment_length_s,
1111
+ )
1112
+
1113
+ if "quantification" in store_intermediate:
1114
+ quantification_dir = output_dir / "quantification"
1115
+ quantification_dir.mkdir(parents=True, exist_ok=True)
1116
+ quantified_arm_swing.to_parquet(
1117
+ quantification_dir / "arm_swing_quantified.parquet", index=False
1118
+ )
1119
+
1120
+ # Save gait segment metadata as JSON
1121
+ with open(quantification_dir / "gait_segment_meta.json", "w") as f:
1122
+ json.dump(gait_segment_meta, f, indent=2)
1123
+
1124
+ active_logger.debug(
1125
+ f"Saved arm swing quantification to "
1126
+ f"{quantification_dir / 'arm_swing_quantified.parquet'}"
1127
+ )
1128
+ active_logger.debug(
1129
+ f"Saved gait segment metadata to "
1130
+ f"{quantification_dir / 'gait_segment_meta.json'}"
1131
+ )
1132
+
1133
+ active_logger.info(
1134
+ f"Gait analysis pipeline completed. Found "
1135
+ f"{len(quantified_arm_swing)} windows of gait "
1136
+ f"without other arm activities."
1137
+ )
1138
+
1139
+ # Apply segment number offset if specified (for multi-segment concatenation)
1140
+ if segment_number_offset > 0 and len(quantified_arm_swing) > 0:
1141
+ quantified_arm_swing = quantified_arm_swing.copy()
1142
+ quantified_arm_swing["gait_segment_nr"] += segment_number_offset
1143
+
1144
+ # Also update the metadata with the new segment numbers
1145
+ if gait_segment_meta and "per_segment" in gait_segment_meta:
1146
+ updated_per_segment_meta = {}
1147
+ for seg_id, meta in gait_segment_meta["per_segment"].items():
1148
+ updated_per_segment_meta[seg_id + segment_number_offset] = meta
1149
+ gait_segment_meta["per_segment"] = updated_per_segment_meta
1150
+
1151
+ return quantified_arm_swing, gait_segment_meta