paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,107 +1,137 @@
1
+ import json
1
2
  import logging
3
+ from importlib.resources import files
4
+ from pathlib import Path
5
+
2
6
  import numpy as np
3
7
  import pandas as pd
4
8
  from scipy.signal import periodogram
5
- from typing import List, Tuple
6
9
 
7
10
  from paradigma.classification import ClassifierPackage
11
+ from paradigma.config import GaitConfig, IMUConfig
8
12
  from paradigma.constants import DataColumns
9
- from paradigma.config import GaitConfig
10
- from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
11
- extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
12
- compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
13
- compute_total_power
14
- from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, WindowedDataExtractor
15
- from paradigma.util import aggregate_parameter
16
-
13
+ from paradigma.feature_extraction import (
14
+ compute_angle,
15
+ compute_dominant_frequency,
16
+ compute_mfccs,
17
+ compute_peak_angular_velocity,
18
+ compute_power_in_bandwidth,
19
+ compute_range_of_motion,
20
+ compute_statistics,
21
+ compute_std_euclidean_norm,
22
+ compute_total_power,
23
+ extract_angle_extremes,
24
+ pca_transform_gyroscope,
25
+ remove_moving_average_angle,
26
+ )
27
+ from paradigma.preprocessing import preprocess_imu_data
28
+ from paradigma.segmenting import (
29
+ WindowedDataExtractor,
30
+ create_segments,
31
+ discard_segments,
32
+ tabulate_windows,
33
+ )
34
+ from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps
17
35
 
18
36
  logger = logging.getLogger(__name__)
19
37
 
20
38
  # Only configure basic logging if no handlers exist
21
39
  if not logger.hasHandlers():
22
- logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
40
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
23
41
 
24
- def extract_gait_features(
25
- df: pd.DataFrame,
26
- config: GaitConfig
27
- ) -> pd.DataFrame:
42
+
43
+ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
28
44
  """
29
- Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
45
+ Extracts gait features from accelerometer and gravity sensor data in the
46
+ input DataFrame by computing temporal and spectral features.
30
47
 
31
48
  This function performs the following steps:
32
- 1. Groups sequences of timestamps into windows, using accelerometer and gravity data.
33
- 2. Computes temporal domain features such as mean and standard deviation for accelerometer and gravity data.
34
- 3. Transforms the signals from the temporal domain to the spectral domain using the Fast Fourier Transform (FFT).
49
+ 1. Groups sequences of timestamps into windows, using accelerometer and
50
+ gravity data.
51
+ 2. Computes temporal domain features such as mean and standard deviation
52
+ for accelerometer and gravity data.
53
+ 3. Transforms the signals from the temporal domain to the spectral
54
+ domain using the Fast Fourier Transform (FFT).
35
55
  4. Computes spectral domain features for the accelerometer data.
36
56
  5. Combines both temporal and spectral features into a final DataFrame.
37
57
 
38
58
  Parameters
39
59
  ----------
40
60
  df : pd.DataFrame
41
- The input DataFrame containing gait data, which includes time, accelerometer, and gravity sensor data. The data should be
61
+ The input DataFrame containing gait data, which includes time,
62
+ accelerometer, and gravity sensor data. The data should be
42
63
  structured with the necessary columns as specified in the `config`.
43
64
 
44
65
  onfig : GaitConfig
45
- Configuration object containing parameters for feature extraction, including column names for time, accelerometer data, and
46
- gravity data, as well as settings for windowing, and feature computation.
66
+ Configuration object containing parameters for feature extraction,
67
+ including column names for time, accelerometer data, and gravity
68
+ data, as well as settings for windowing, and feature computation.
47
69
 
48
70
  Returns
49
71
  -------
50
72
  pd.DataFrame
51
- A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
52
- columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
53
- accelerometer data.
54
-
73
+ A DataFrame containing extracted gait features, including temporal
74
+ and spectral domain features. The DataFrame will have columns
75
+ corresponding to time, statistical features of the accelerometer and
76
+ gravity data, and spectral features of the accelerometer data.
77
+
55
78
  Notes
56
79
  -----
57
- - This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
58
- - The temporal features are extracted from the accelerometer and gravity data, and include statistics like mean and standard deviation.
59
- - The input DataFrame must include columns as specified in the `config` object for proper feature extraction.
80
+ - This function groups the data into windows based on timestamps and
81
+ applies Fast Fourier Transform to compute spectral features.
82
+ - The temporal features are extracted from the accelerometer and gravity
83
+ data, and include statistics like mean and standard deviation.
84
+ - The input DataFrame must include columns as specified in the `config`
85
+ object for proper feature extraction.
60
86
 
61
87
  Raises
62
88
  ------
63
89
  ValueError
64
- If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
90
+ If the input DataFrame does not contain the required columns as
91
+ specified in the configuration or if any step in the feature
92
+ extraction fails.
65
93
  """
66
94
  # Group sequences of timestamps into windows
67
- windowed_cols = [DataColumns.TIME] + config.accelerometer_cols + config.gravity_cols
95
+ windowed_colnames = (
96
+ [config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
97
+ )
68
98
  windowed_data = tabulate_windows(
69
- df=df,
70
- columns=windowed_cols,
99
+ df=df,
100
+ columns=windowed_colnames,
71
101
  window_length_s=config.window_length_s,
72
102
  window_step_length_s=config.window_step_length_s,
73
- fs=config.sampling_frequency
103
+ fs=config.sampling_frequency,
74
104
  )
75
105
 
76
- extractor = WindowedDataExtractor(windowed_cols)
106
+ extractor = WindowedDataExtractor(windowed_colnames)
77
107
 
78
- idx_time = extractor.get_index(DataColumns.TIME)
79
- idx_acc = extractor.get_slice(config.accelerometer_cols)
80
- idx_grav = extractor.get_slice(config.gravity_cols)
108
+ idx_time = extractor.get_index(config.time_colname)
109
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
110
+ idx_grav = extractor.get_slice(config.gravity_colnames)
81
111
 
82
112
  # Extract data
83
113
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
84
114
  windowed_acc = windowed_data[:, :, idx_acc]
85
115
  windowed_grav = windowed_data[:, :, idx_grav]
86
116
 
87
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
88
-
89
- # Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
117
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
118
+
119
+ # Compute statistics of the temporal domain signals (mean, std) for
120
+ # accelerometer and gravity
90
121
  df_temporal_features = extract_temporal_domain_features(
91
- config=config,
122
+ config=config,
92
123
  windowed_acc=windowed_acc,
93
124
  windowed_grav=windowed_grav,
94
- grav_stats=['mean', 'std']
125
+ grav_stats=["mean", "std"],
95
126
  )
96
127
 
97
128
  # Combine temporal features with the start time
98
129
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
99
130
 
100
- # Transform the accelerometer data to the spectral domain using FFT and extract spectral features
131
+ # Transform the accelerometer data to the spectral domain using FFT and
132
+ # extract spectral features
101
133
  df_spectral_features = extract_spectral_domain_features(
102
- config=config,
103
- sensor='accelerometer',
104
- windowed_data=windowed_acc
134
+ config=config, sensor="accelerometer", windowed_data=windowed_acc
105
135
  )
106
136
 
107
137
  # Combine the spectral features with the previously computed temporal features
@@ -111,40 +141,47 @@ def extract_gait_features(
111
141
 
112
142
 
113
143
  def detect_gait(
114
- df: pd.DataFrame,
115
- clf_package: ClassifierPackage,
116
- parallel: bool=False
117
- ) -> pd.Series:
144
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
145
+ ) -> pd.Series:
118
146
  """
119
- Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
147
+ Detects gait activity in the input DataFrame using a pre-trained
148
+ classifier and applies a threshold to classify results.
120
149
 
121
150
  This function performs the following steps:
122
- 1. Loads the pre-trained classifier and scaling parameters from the specified directory.
123
- 2. Scales the relevant features in the input DataFrame (`df`) using the loaded scaling parameters.
124
- 3. Predicts the probability of gait activity for each sample in the DataFrame using the classifier.
125
- 4. Applies a threshold to the predicted probabilities to determine whether gait activity is present.
151
+ 1. Loads the pre-trained classifier and scaling parameters from the
152
+ specified directory.
153
+ 2. Scales the relevant features in the input DataFrame (`df`) using the
154
+ loaded scaling parameters.
155
+ 3. Predicts the probability of gait activity for each sample in the
156
+ DataFrame using the classifier.
157
+ 4. Applies a threshold to the predicted probabilities to determine
158
+ whether gait activity is present.
126
159
  5. Returns predicted probabilities
127
160
 
128
161
  Parameters
129
162
  ----------
130
163
  df : pd.DataFrame
131
- The input DataFrame containing features extracted from gait data. It must include the necessary columns
132
- as specified in the classifier's feature names.
164
+ The input DataFrame containing features extracted from gait data. It
165
+ must include the necessary columns as specified in the classifier's
166
+ feature names.
133
167
 
134
168
  clf_package : ClassifierPackage
135
- The pre-trained classifier package containing the classifier, threshold, and scaler.
169
+ The pre-trained classifier package containing the classifier,
170
+ threshold, and scaler.
136
171
 
137
172
  parallel : bool, optional, default=False
138
- If `True`, enables parallel processing during classification. If `False`, the classifier uses a single core.
173
+ If `True`, enables parallel processing during classification. If
174
+ `False`, the classifier uses a single core.
139
175
 
140
176
  Returns
141
177
  -------
142
178
  pd.Series
143
- A Series containing the predicted probabilities of gait activity for each sample in the input DataFrame.
179
+ A Series containing the predicted probabilities of gait activity for
180
+ each sample in the input DataFrame.
144
181
  """
145
182
  # Set classifier
146
183
  clf = clf_package.classifier
147
- if not parallel and hasattr(clf, 'n_jobs'):
184
+ if not parallel and hasattr(clf, "n_jobs"):
148
185
  clf.n_jobs = 1
149
186
 
150
187
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -154,78 +191,108 @@ def detect_gait(
154
191
  scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
155
192
 
156
193
  # Replace scaled features in a copy of the relevant features for prediction
157
- X = df.loc[:, feature_names_predictions].copy()
158
- X.loc[:, feature_names_scaling] = scaled_features
194
+ x_features = df.loc[:, feature_names_predictions].copy()
195
+ x_features.loc[:, feature_names_scaling] = scaled_features
159
196
 
160
197
  # Make prediction and add the probability of gait activity to the DataFrame
161
- pred_gait_proba_series = clf_package.predict_proba(X)
198
+ pred_gait_proba_series = clf_package.predict_proba(x_features)
162
199
 
163
200
  return pred_gait_proba_series
164
201
 
165
202
 
166
203
  def extract_arm_activity_features(
167
- df: pd.DataFrame,
168
- config: GaitConfig,
169
- ) -> pd.DataFrame:
204
+ df: pd.DataFrame,
205
+ config: GaitConfig,
206
+ ) -> pd.DataFrame:
170
207
  """
171
208
  Extract features related to arm activity from a time-series DataFrame.
172
209
 
173
- This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
174
- and extracts features related to arm activity by performing the following steps:
210
+ This function processes a DataFrame containing accelerometer, gravity,
211
+ and gyroscope signals, and extracts features related to arm activity by
212
+ performing the following steps:
175
213
  1. Computes the angle and velocity from gyroscope data.
176
214
  2. Filters the data to include only predicted gait segments.
177
- 3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
215
+ 3. Groups the data into segments based on consecutive timestamps and
216
+ pre-specified gaps.
178
217
  4. Removes segments that do not meet predefined criteria.
179
218
  5. Creates fixed-length windows from the time series data.
180
- 6. Extracts angle-related features, temporal domain features, and spectral domain features.
219
+ 6. Extracts angle-related features, temporal domain features, and
220
+ spectral domain features.
181
221
 
182
222
  Parameters
183
223
  ----------
184
224
  df: pd.DataFrame
185
- The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
225
+ The input DataFrame containing accelerometer, gravity, and
226
+ gyroscope data of predicted gait.
186
227
 
187
228
  config : ArmActivityFeatureExtractionConfig
188
- Configuration object containing column names and parameters for feature extraction.
229
+ Configuration object containing column names and parameters
230
+ for feature extraction.
189
231
 
190
232
  Returns
191
233
  -------
192
234
  pd.DataFrame
193
- A DataFrame containing the extracted arm activity features, including angle, velocity,
194
- temporal, and spectral features.
235
+ A DataFrame containing the extracted arm activity features,
236
+ including angle, velocity, temporal, and spectral features.
195
237
  """
196
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
197
- df[DataColumns.SEGMENT_NR] = create_segments(
198
- time_array=df[DataColumns.TIME],
199
- max_segment_gap_s=config.max_segment_gap_s
200
- )
238
+ # Group consecutive timestamps into segments, with new segments
239
+ # starting after a pre-specified gap. If data_segment_nr exists,
240
+ # create gait segments per data segment to preserve both
241
+ has_data_segments = DataColumns.DATA_SEGMENT_NR in df.columns
242
+
243
+ if has_data_segments:
244
+ df_list = []
245
+ gait_segment_offset = 0
246
+
247
+ for data_seg_nr in sorted(df[DataColumns.DATA_SEGMENT_NR].unique()):
248
+ df_seg = df[df[DataColumns.DATA_SEGMENT_NR] == data_seg_nr].copy()
249
+
250
+ # Create gait segments within this data segment
251
+ df_seg[DataColumns.GAIT_SEGMENT_NR] = create_segments(
252
+ time_array=df_seg[DataColumns.TIME].values,
253
+ max_segment_gap_s=config.max_segment_gap_s,
254
+ )
255
+
256
+ # Offset gait segment numbers to be unique across data segments
257
+ if gait_segment_offset > 0:
258
+ df_seg[DataColumns.GAIT_SEGMENT_NR] += gait_segment_offset
259
+ gait_segment_offset = df_seg[DataColumns.GAIT_SEGMENT_NR].max() + 1
260
+
261
+ df_list.append(df_seg)
262
+
263
+ df = pd.concat(df_list, ignore_index=True)
264
+ else:
265
+ df[DataColumns.GAIT_SEGMENT_NR] = create_segments(
266
+ time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
267
+ )
201
268
 
202
269
  # Remove segments that do not meet predetermined criteria
203
270
  df = discard_segments(
204
271
  df=df,
205
- segment_nr_colname=DataColumns.SEGMENT_NR,
272
+ segment_nr_colname=DataColumns.GAIT_SEGMENT_NR,
206
273
  min_segment_length_s=config.min_segment_length_s,
207
274
  fs=config.sampling_frequency,
208
- format='timestamps'
275
+ format="timestamps",
209
276
  )
210
277
 
211
278
  # Create windows of fixed length and step size from the time series per segment
212
279
  windowed_data = []
213
- df_grouped = df.groupby(DataColumns.SEGMENT_NR)
214
- windowed_cols = (
215
- [DataColumns.TIME] +
216
- config.accelerometer_cols +
217
- config.gravity_cols +
218
- config.gyroscope_cols
280
+ df_grouped = df.groupby(DataColumns.GAIT_SEGMENT_NR)
281
+ windowed_colnames = (
282
+ [config.time_colname]
283
+ + config.accelerometer_colnames
284
+ + config.gravity_colnames
285
+ + config.gyroscope_colnames
219
286
  )
220
287
 
221
288
  # Collect windows from all segments in a list for faster concatenation
222
289
  for _, group in df_grouped:
223
290
  windows = tabulate_windows(
224
- df=group,
225
- columns=windowed_cols,
291
+ df=group,
292
+ columns=windowed_colnames,
226
293
  window_length_s=config.window_length_s,
227
294
  window_step_length_s=config.window_step_length_s,
228
- fs=config.sampling_frequency
295
+ fs=config.sampling_frequency,
229
296
  )
230
297
  if len(windows) > 0: # Skip if no windows are created
231
298
  windowed_data.append(windows)
@@ -239,12 +306,12 @@ def extract_arm_activity_features(
239
306
  windowed_data = np.concatenate(windowed_data, axis=0)
240
307
 
241
308
  # Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
242
- extractor = WindowedDataExtractor(windowed_cols)
309
+ extractor = WindowedDataExtractor(windowed_colnames)
243
310
 
244
- idx_time = extractor.get_index(DataColumns.TIME)
245
- idx_acc = extractor.get_slice(config.accelerometer_cols)
246
- idx_grav = extractor.get_slice(config.gravity_cols)
247
- idx_gyro = extractor.get_slice(config.gyroscope_cols)
311
+ idx_time = extractor.get_index(config.time_colname)
312
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
313
+ idx_grav = extractor.get_slice(config.gravity_colnames)
314
+ idx_gyro = extractor.get_slice(config.gyroscope_colnames)
248
315
 
249
316
  # Extract data
250
317
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
@@ -253,23 +320,23 @@ def extract_arm_activity_features(
253
320
  windowed_gyro = windowed_data[:, :, idx_gyro]
254
321
 
255
322
  # Initialize DataFrame for features
256
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
323
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
257
324
 
258
325
  # Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
259
326
  df_temporal_features = extract_temporal_domain_features(
260
- config=config,
261
- windowed_acc=windowed_acc,
262
- windowed_grav=windowed_grav,
263
- grav_stats=['mean', 'std']
327
+ config=config,
328
+ windowed_acc=windowed_acc,
329
+ windowed_grav=windowed_grav,
330
+ grav_stats=["mean", "std"],
264
331
  )
265
332
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
266
333
 
267
334
  # Extract spectral domain features for accelerometer and gyroscope signals
268
- for sensor_name, windowed_sensor in zip(['accelerometer', 'gyroscope'], [windowed_acc, windowed_gyro]):
335
+ for sensor_name, windowed_sensor in zip(
336
+ ["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
337
+ ):
269
338
  df_spectral_features = extract_spectral_domain_features(
270
- config=config,
271
- sensor=sensor_name,
272
- windowed_data=windowed_sensor
339
+ config=config, sensor=sensor_name, windowed_data=windowed_sensor
273
340
  )
274
341
  df_features = pd.concat([df_features, df_spectral_features], axis=1)
275
342
 
@@ -277,19 +344,19 @@ def extract_arm_activity_features(
277
344
 
278
345
 
279
346
  def filter_gait(
280
- df: pd.DataFrame,
281
- clf_package: ClassifierPackage,
282
- parallel: bool=False
283
- ) -> pd.Series:
347
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
348
+ ) -> pd.Series:
284
349
  """
285
- Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
350
+ Filters gait data to identify windows with no other arm activity using
351
+ a pre-trained classifier.
286
352
 
287
353
  Parameters
288
354
  ----------
289
355
  df : pd.DataFrame
290
356
  The input DataFrame containing features extracted from gait data.
291
357
  clf_package: ClassifierPackage
292
- The pre-trained classifier package containing the classifier, threshold, and scaler.
358
+ The pre-trained classifier package containing the classifier,
359
+ threshold, and scaler.
293
360
  parallel : bool, optional, default=False
294
361
  If `True`, enables parallel processing.
295
362
 
@@ -300,10 +367,10 @@ def filter_gait(
300
367
  """
301
368
  if df.shape[0] == 0:
302
369
  raise ValueError("No data found in the input DataFrame.")
303
-
370
+
304
371
  # Set classifier
305
372
  clf = clf_package.classifier
306
- if not parallel and hasattr(clf, 'n_jobs'):
373
+ if not parallel and hasattr(clf, "n_jobs"):
307
374
  clf.n_jobs = 1
308
375
 
309
376
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -313,109 +380,117 @@ def filter_gait(
313
380
  scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
314
381
 
315
382
  # Replace scaled features in a copy of the relevant features for prediction
316
- X = df.loc[:, feature_names_predictions].copy()
317
- X.loc[:, feature_names_scaling] = scaled_features
383
+ x_features = df.loc[:, feature_names_predictions].copy()
384
+ x_features.loc[:, feature_names_scaling] = scaled_features
318
385
 
319
386
  # Make predictions
320
- pred_no_other_arm_activity_proba_series = clf_package.predict_proba(X)
387
+ pred_no_other_arm_activity_proba_series = clf_package.predict_proba(x_features)
321
388
 
322
389
  return pred_no_other_arm_activity_proba_series
323
390
 
324
391
 
325
392
  def quantify_arm_swing(
326
- df: pd.DataFrame,
327
- fs: int,
328
- filtered: bool = False,
329
- max_segment_gap_s: float = 1.5,
330
- min_segment_length_s: float = 1.5
331
- ) -> Tuple[dict[str, pd.DataFrame], dict]:
393
+ df: pd.DataFrame,
394
+ fs: int,
395
+ filtered: bool = False,
396
+ max_segment_gap_s: float = 1.5,
397
+ min_segment_length_s: float = 1.5,
398
+ ) -> tuple[dict[str, pd.DataFrame], dict]:
332
399
  """
333
400
  Quantify arm swing parameters for segments of motion based on gyroscope data.
334
401
 
335
402
  Parameters
336
403
  ----------
337
404
  df : pd.DataFrame
338
- A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
339
- for predicted no other arm activity based on a fitted threshold if filtered is True.
405
+ A DataFrame containing the raw sensor data of predicted gait
406
+ timestamps. Should include a column for predicted no other arm
407
+ activity based on a fitted threshold if filtered is True.
340
408
 
341
409
  fs : int
342
410
  The sampling frequency of the sensor data.
343
411
 
344
412
  filtered : bool, optional, default=True
345
- If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
413
+ If `True`, the gyroscope data is filtered to only include predicted
414
+ no other arm activity.
346
415
 
347
416
  max_segment_gap_s : float, optional, default=1.5
348
- The maximum gap in seconds between consecutive timestamps to group them into segments.
349
-
417
+ The maximum gap in seconds between consecutive timestamps to group
418
+ them into segments.
419
+
350
420
  min_segment_length_s : float, optional, default=1.5
351
421
  The minimum length in seconds for a segment to be considered valid.
352
422
 
353
423
  Returns
354
424
  -------
355
425
  Tuple[pd.DataFrame, dict]
356
- A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
357
- metadata for each segment.
426
+ A tuple containing a dataframe with quantified arm swing parameters
427
+ and a dictionary containing metadata for each segment.
358
428
  """
359
- # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
360
- # Segments are made based on predicted gait
361
- df['unfiltered_segment_nr'] = create_segments(
362
- time_array=df[DataColumns.TIME],
363
- max_segment_gap_s=max_segment_gap_s
429
+ # Group consecutive timestamps into segments, with new segments starting
430
+ # after a pre-specified gap. Segments are made based on predicted gait
431
+ df["unfiltered_segment_nr"] = create_segments(
432
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
364
433
  )
365
434
 
366
435
  # Remove segments that do not meet predetermined criteria
367
436
  df = discard_segments(
368
437
  df=df,
369
- segment_nr_colname='unfiltered_segment_nr',
438
+ segment_nr_colname="unfiltered_segment_nr",
370
439
  min_segment_length_s=min_segment_length_s,
371
440
  fs=fs,
372
- format='timestamps'
441
+ format="timestamps",
373
442
  )
374
443
 
375
444
  if df.empty:
376
- raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
377
-
445
+ raise ValueError(
446
+ "No segments found in the input data after discarding segments "
447
+ "of invalid shape."
448
+ )
449
+
378
450
  # Create dictionary of gait segment number and duration
379
- gait_segment_duration_dict = {segment_nr: len(group[DataColumns.TIME]) / fs for segment_nr, group in df.groupby('unfiltered_segment_nr', sort=False)}
380
-
451
+ gait_segment_duration_dict = {
452
+ segment_nr: len(group[DataColumns.TIME]) / fs
453
+ for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
454
+ }
455
+
381
456
  # If no arm swing data is remaining, return an empty dictionary
382
- if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
457
+ if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
383
458
  raise ValueError("No gait without other arm activities to quantify.")
384
459
  elif filtered:
385
460
  # Filter the DataFrame to only include predicted no other arm activity (1)
386
- df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
461
+ df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
462
+ drop=True
463
+ )
387
464
 
388
465
  # Group consecutive timestamps into segments of filtered gait
389
- df['filtered_segment_nr'] = create_segments(
390
- time_array=df[DataColumns.TIME],
391
- max_segment_gap_s=max_segment_gap_s
466
+ df["filtered_segment_nr"] = create_segments(
467
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
392
468
  )
393
469
 
394
470
  # Remove segments that do not meet predetermined criteria
395
471
  df = discard_segments(
396
472
  df=df,
397
- segment_nr_colname='filtered_segment_nr',
473
+ segment_nr_colname="filtered_segment_nr",
398
474
  min_segment_length_s=min_segment_length_s,
399
475
  fs=fs,
400
476
  )
401
477
 
402
478
  if df.empty:
403
- raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
404
-
405
- grouping_colname = 'filtered_segment_nr' if filtered else 'unfiltered_segment_nr'
479
+ raise ValueError(
480
+ "No filtered gait segments found in the input data after "
481
+ "discarding segments of invalid shape."
482
+ )
483
+
484
+ grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
406
485
 
407
486
  arm_swing_quantified = []
408
487
  segment_meta = {
409
- 'aggregated': {
410
- 'all': {
411
- 'duration_s': len(df[DataColumns.TIME]) / fs
412
- },
413
- },
414
- 'per_segment': {}
488
+ "all": {"duration_s": len(df[DataColumns.TIME]) / fs},
489
+ "per_segment": {},
415
490
  }
416
491
 
417
- # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
418
- # it is fitted on the entire gyroscope data
492
+ # PCA is fitted on only predicted gait without other arm activity if
493
+ # filtered, otherwise it is fitted on the entire gyroscope data
419
494
  df[DataColumns.VELOCITY] = pca_transform_gyroscope(
420
495
  df=df,
421
496
  y_gyro_colname=DataColumns.GYROSCOPE_Y,
@@ -425,7 +500,9 @@ def quantify_arm_swing(
425
500
  # Group and process segments
426
501
  for segment_nr, group in df.groupby(grouping_colname, sort=False):
427
502
  if filtered:
428
- gait_segment_nr = group['unfiltered_segment_nr'].iloc[0] # Each filtered segment is contained within an unfiltered segment
503
+ gait_segment_nr = group["unfiltered_segment_nr"].iloc[
504
+ 0
505
+ ] # Each filtered segment is contained within an unfiltered segment
429
506
  else:
430
507
  gait_segment_nr = segment_nr
431
508
 
@@ -433,10 +510,14 @@ def quantify_arm_swing(
433
510
  gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
434
511
  except KeyError:
435
512
  logger.warning(
436
- "Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
437
- gait_segment_nr, filtered
513
+ "Segment %s (filtered = %s) not found in gait segment "
514
+ "duration dictionary. Skipping this segment.",
515
+ gait_segment_nr,
516
+ filtered,
517
+ )
518
+ logger.debug(
519
+ "Available segments: %s", list(gait_segment_duration_dict.keys())
438
520
  )
439
- logger.debug("Available segments: %s", list(gait_segment_duration_dict.keys()))
440
521
  continue
441
522
 
442
523
  time_array = group[DataColumns.TIME].to_numpy()
@@ -454,20 +535,22 @@ def quantify_arm_swing(
454
535
  fs=fs,
455
536
  )
456
537
 
457
- segment_meta['per_segment'][segment_nr] = {
458
- 'start_time_s': time_array.min(),
459
- 'end_time_s': time_array.max(),
460
- 'duration_unfiltered_segment_s': gait_segment_duration_s,
538
+ segment_meta["per_segment"][segment_nr] = {
539
+ "start_time_s": float(time_array.min()),
540
+ "end_time_s": float(time_array.max()),
541
+ "duration_unfiltered_segment_s": gait_segment_duration_s,
461
542
  }
462
543
 
463
544
  if filtered:
464
- segment_meta['per_segment'][segment_nr]['duration_filtered_segment_s'] = len(time_array) / fs
545
+ segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
546
+ len(time_array) / fs
547
+ )
465
548
 
466
- if angle_array.size > 0:
549
+ if angle_array.size > 0:
467
550
  angle_extrema_indices, _, _ = extract_angle_extremes(
468
551
  angle_array=angle_array,
469
552
  sampling_frequency=fs,
470
- max_frequency_activity=1.75
553
+ max_frequency_activity=1.75,
471
554
  )
472
555
 
473
556
  if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
@@ -478,36 +561,55 @@ def quantify_arm_swing(
478
561
  )
479
562
  except Exception as e:
480
563
  # Handle the error, set RoM to NaN, and log the error
481
- print(f"Error computing range of motion for segment {segment_nr}: {e}")
564
+ print(
565
+ f"Error computing range of motion for segment "
566
+ f"{segment_nr}: {e}"
567
+ )
482
568
  rom = np.array([np.nan])
483
569
 
484
570
  try:
485
571
  pav = compute_peak_angular_velocity(
486
572
  velocity_array=velocity_array,
487
- angle_extrema_indices=angle_extrema_indices
573
+ angle_extrema_indices=angle_extrema_indices,
488
574
  )
489
575
  except Exception as e:
490
576
  # Handle the error, set pav to NaN, and log the error
491
- print(f"Error computing peak angular velocity for segment {segment_nr}: {e}")
577
+ print(
578
+ f"Error computing peak angular velocity for segment "
579
+ f"{segment_nr}: {e}"
580
+ )
492
581
  pav = np.array([np.nan])
493
582
 
494
- df_params_segment = pd.DataFrame({
495
- DataColumns.SEGMENT_NR: segment_nr,
583
+ params_dict = {
584
+ DataColumns.GAIT_SEGMENT_NR: segment_nr,
496
585
  DataColumns.RANGE_OF_MOTION: rom,
497
- DataColumns.PEAK_VELOCITY: pav
498
- })
586
+ DataColumns.PEAK_VELOCITY: pav,
587
+ }
588
+
589
+ # Add data_segment_nr if it exists in the input data
590
+ if DataColumns.DATA_SEGMENT_NR in group.columns:
591
+ params_dict[DataColumns.DATA_SEGMENT_NR] = group[
592
+ DataColumns.DATA_SEGMENT_NR
593
+ ].iloc[0]
594
+
595
+ df_params_segment = pd.DataFrame(params_dict)
499
596
 
500
597
  arm_swing_quantified.append(df_params_segment)
501
598
 
502
599
  arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
503
-
600
+
504
601
  return arm_swing_quantified, segment_meta
505
602
 
506
603
 
507
- def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta: dict, segment_cats: List[tuple], aggregates: List[str] = ['median']) -> dict:
604
+ def aggregate_arm_swing_params(
605
+ df_arm_swing_params: pd.DataFrame,
606
+ segment_meta: dict,
607
+ segment_cats: list[tuple],
608
+ aggregates: list[str] = ["median"],
609
+ ) -> dict:
508
610
  """
509
611
  Aggregate the quantification results for arm swing parameters.
510
-
612
+
511
613
  Parameters
512
614
  ----------
513
615
  df_arm_swing_params : pd.DataFrame
@@ -517,84 +619,116 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
517
619
  A dictionary containing metadata for each segment.
518
620
 
519
621
  segment_cats : List[tuple]
520
- A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
521
-
622
+ A list of tuples defining the segment categories, where each tuple
623
+ contains the lower and upper bounds for the segment duration.
522
624
  aggregates : List[str], optional
523
- A list of aggregation methods to apply to the quantification results.
524
-
625
+ A list of aggregation methods to apply to the quantification
626
+ results.
627
+
525
628
  Returns
526
629
  -------
527
630
  dict
528
- A dictionary containing the aggregated quantification results for arm swing parameters.
631
+ A dictionary containing the aggregated quantification results for
632
+ arm swing parameters.
529
633
  """
530
634
  arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
531
635
 
532
636
  aggregated_results = {}
533
637
  for segment_cat_range in segment_cats:
534
- segment_cat_str = f'{segment_cat_range[0]}_{segment_cat_range[1]}'
638
+ segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
535
639
  cat_segments = [
536
- x for x in segment_meta.keys()
537
- if segment_meta[x]['duration_unfiltered_segment_s'] >= segment_cat_range[0]
538
- and segment_meta[x]['duration_unfiltered_segment_s'] < segment_cat_range[1]
640
+ x
641
+ for x in segment_meta.keys()
642
+ if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
643
+ and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
539
644
  ]
540
645
 
541
- if len(cat_segments) > 0:
542
- # For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
646
+ if len(cat_segments) > 0:
647
+ # For each segment, use 'duration_filtered_segment_s' if present,
648
+ # else 'duration_unfiltered_segment_s'
543
649
  aggregated_results[segment_cat_str] = {
544
- 'duration_s': sum(
650
+ "duration_s": sum(
545
651
  [
546
- segment_meta[x]['duration_filtered_segment_s']
547
- if 'duration_filtered_segment_s' in segment_meta[x]
548
- else segment_meta[x]['duration_unfiltered_segment_s']
652
+ (
653
+ segment_meta[x]["duration_filtered_segment_s"]
654
+ if "duration_filtered_segment_s" in segment_meta[x]
655
+ else segment_meta[x]["duration_unfiltered_segment_s"]
656
+ )
549
657
  for x in cat_segments
550
658
  ]
551
- )}
659
+ )
660
+ }
661
+
662
+ df_arm_swing_params_cat = df_arm_swing_params.loc[
663
+ df_arm_swing_params[DataColumns.GAIT_SEGMENT_NR].isin(cat_segments)
664
+ ]
552
665
 
553
- df_arm_swing_params_cat = df_arm_swing_params.loc[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
554
-
555
666
  # Aggregate across all segments
556
- aggregates_per_segment = ['median', 'mean']
667
+ aggregates_per_segment = ["median", "mean"]
557
668
 
558
669
  for arm_swing_parameter in arm_swing_parameters:
559
670
  for aggregate in aggregates:
560
- if aggregate in ['std', 'cov']:
671
+ if aggregate in ["std", "cov"]:
561
672
  per_segment_agg = []
562
- # If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
563
- segment_groups = dict(tuple(df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)))
673
+ # If the aggregate is 'cov' (coefficient of variation),
674
+ # we also compute the mean and standard deviation per
675
+ # segment
676
+ segment_groups = dict(
677
+ tuple(
678
+ df_arm_swing_params_cat.groupby(
679
+ DataColumns.GAIT_SEGMENT_NR
680
+ )
681
+ )
682
+ )
564
683
  for segment_nr in cat_segments:
565
684
  segment_df = segment_groups.get(segment_nr)
566
685
  if segment_df is not None:
567
- per_segment_agg.append(aggregate_parameter(segment_df[arm_swing_parameter], aggregate))
686
+ per_segment_agg.append(
687
+ aggregate_parameter(
688
+ segment_df[arm_swing_parameter], aggregate
689
+ )
690
+ )
568
691
 
569
692
  # Drop nans
570
693
  per_segment_agg = np.array(per_segment_agg)
571
694
  per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
572
695
 
573
-
574
696
  for segment_level_aggregate in aggregates_per_segment:
575
- aggregated_results[segment_cat_str][f'{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(per_segment_agg, segment_level_aggregate)
697
+ key = (
698
+ f"{segment_level_aggregate}_{aggregate}_"
699
+ f"{arm_swing_parameter}"
700
+ )
701
+ aggregated_results[segment_cat_str][key] = (
702
+ aggregate_parameter(
703
+ per_segment_agg, segment_level_aggregate
704
+ )
705
+ )
576
706
  else:
577
- aggregated_results[segment_cat_str][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
707
+ aggregated_results[segment_cat_str][
708
+ f"{aggregate}_{arm_swing_parameter}"
709
+ ] = aggregate_parameter(
710
+ df_arm_swing_params_cat[arm_swing_parameter], aggregate
711
+ )
578
712
 
579
713
  else:
580
714
  # If no segments are found for this category, initialize with NaN
581
715
  aggregated_results[segment_cat_str] = {
582
- 'duration_s': 0,
716
+ "duration_s": 0,
583
717
  }
584
718
 
585
719
  return aggregated_results
586
720
 
587
721
 
588
722
  def extract_temporal_domain_features(
589
- config,
590
- windowed_acc: np.ndarray,
591
- windowed_grav: np.ndarray,
592
- grav_stats: List[str] = ['mean']
593
- ) -> pd.DataFrame:
723
+ config,
724
+ windowed_acc: np.ndarray,
725
+ windowed_grav: np.ndarray,
726
+ grav_stats: list[str] = ["mean"],
727
+ ) -> pd.DataFrame:
594
728
  """
595
729
  Compute temporal domain features for the accelerometer signal.
596
730
 
597
- This function calculates various statistical features for the gravity signal
731
+ This function calculates various statistical features for the gravity signal
598
732
  and computes the standard deviation of the accelerometer's Euclidean norm.
599
733
 
600
734
  Parameters
@@ -602,10 +736,10 @@ def extract_temporal_domain_features(
602
736
  config : object
603
737
  Configuration object containing the accelerometer and gravity column names.
604
738
  windowed_acc : numpy.ndarray
605
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
739
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
606
740
  the number of accelerometer values per window.
607
741
  windowed_grav : numpy.ndarray
608
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
742
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
609
743
  the number of gravity signal values per window.
610
744
  grav_stats : list of str, optional
611
745
  A list of statistics to compute for the gravity signal (default is ['mean']).
@@ -613,33 +747,36 @@ def extract_temporal_domain_features(
613
747
  Returns
614
748
  -------
615
749
  pd.DataFrame
616
- A DataFrame containing the computed features, with each row corresponding
750
+ A DataFrame containing the computed features, with each row corresponding
617
751
  to a window and each column representing a specific feature.
618
752
  """
619
753
  # Compute gravity statistics (e.g., mean, std, etc.)
620
754
  feature_dict = {}
621
755
  for stat in grav_stats:
622
756
  stats_result = compute_statistics(data=windowed_grav, statistic=stat)
623
- for i, col in enumerate(config.gravity_cols):
624
- feature_dict[f'{col}_{stat}'] = stats_result[:, i]
757
+ for i, col in enumerate(config.gravity_colnames):
758
+ feature_dict[f"{col}_{stat}"] = stats_result[:, i]
625
759
 
626
760
  # Compute standard deviation of the Euclidean norm of the accelerometer signal
627
- feature_dict['accelerometer_std_norm'] = compute_std_euclidean_norm(data=windowed_acc)
761
+ feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
762
+ data=windowed_acc
763
+ )
628
764
 
629
765
  return pd.DataFrame(feature_dict)
630
766
 
631
767
 
632
768
  def extract_spectral_domain_features(
633
- windowed_data: np.ndarray,
634
- config,
635
- sensor: str,
636
- ) -> pd.DataFrame:
769
+ windowed_data: np.ndarray,
770
+ config,
771
+ sensor: str,
772
+ ) -> pd.DataFrame:
637
773
  """
638
774
  Compute spectral domain features for a sensor's data.
639
775
 
640
- This function computes the periodogram, extracts power in specific frequency bands,
641
- calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
642
- for a given sensor's windowed data.
776
+ This function computes the periodogram, extracts power in specific
777
+ frequency bands, calculates the dominant frequency, and computes
778
+ Mel-frequency cepstral coefficients (MFCCs) for a given sensor's
779
+ windowed data.
643
780
 
644
781
  Parameters
645
782
  ----------
@@ -647,66 +784,368 @@ def extract_spectral_domain_features(
647
784
  A 2D numpy array where each row corresponds to a window of sensor data.
648
785
 
649
786
  config : object
650
- Configuration object containing settings such as sampling frequency, window type,
651
- frequency bands, and MFCC parameters.
787
+ Configuration object containing settings such as sampling frequency,
788
+ window type, frequency bands, and MFCC parameters.
652
789
 
653
790
  sensor : str
654
791
  The name of the sensor (e.g., 'accelerometer', 'gyroscope').
655
-
792
+
656
793
  Returns
657
794
  -------
658
795
  pd.DataFrame
659
- A DataFrame containing the computed spectral features, with each row corresponding
660
- to a window and each column representing a specific feature.
796
+ A DataFrame containing the computed spectral features, with each row
797
+ corresponding to a window and each column representing a specific
798
+ feature.
661
799
  """
662
800
  # Initialize a dictionary to hold the results
663
801
  feature_dict = {}
664
802
 
665
803
  # Compute periodogram (power spectral density)
666
804
  freqs, psd = periodogram(
667
- x=windowed_data,
668
- fs=config.sampling_frequency,
669
- window=config.window_type,
670
- axis=1
805
+ x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
671
806
  )
672
807
 
673
808
  # Compute power in specified frequency bands
674
809
  for band_name, band_freqs in config.d_frequency_bandwidths.items():
675
810
  band_powers = compute_power_in_bandwidth(
676
811
  freqs=freqs,
677
- psd=psd,
812
+ psd=psd,
678
813
  fmin=band_freqs[0],
679
814
  fmax=band_freqs[1],
680
- include_max=False
815
+ include_max=False,
681
816
  )
682
817
  for i, col in enumerate(config.axes):
683
- feature_dict[f'{sensor}_{col}_{band_name}'] = band_powers[:, i]
818
+ feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
684
819
 
685
820
  # Compute dominant frequency for each axis
686
821
  dominant_frequencies = compute_dominant_frequency(
687
- freqs=freqs,
688
- psd=psd,
689
- fmin=config.spectrum_low_frequency,
690
- fmax=config.spectrum_high_frequency
822
+ freqs=freqs,
823
+ psd=psd,
824
+ fmin=config.spectrum_low_frequency,
825
+ fmax=config.spectrum_high_frequency,
691
826
  )
692
827
 
693
828
  # Add dominant frequency features to the feature_dict
694
829
  for axis, freq in zip(config.axes, dominant_frequencies.T):
695
- feature_dict[f'{sensor}_{axis}_dominant_frequency'] = freq
830
+ feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
696
831
 
697
832
  # Compute total power in the PSD
698
833
  total_power_psd = compute_total_power(psd)
699
834
 
700
835
  # Compute MFCCs
701
836
  mfccs = compute_mfccs(
702
- total_power_array=total_power_psd,
703
- config=config,
704
- multiplication_factor=4
837
+ total_power_array=total_power_psd, config=config, multiplication_factor=4
705
838
  )
706
839
 
707
840
  # Combine the MFCCs into the features DataFrame
708
- mfcc_colnames = [f'{sensor}_mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
841
+ mfcc_colnames = [
842
+ f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
843
+ ]
709
844
  for i, colname in enumerate(mfcc_colnames):
710
845
  feature_dict[colname] = mfccs[:, i]
711
846
 
712
- return pd.DataFrame(feature_dict)
847
+ return pd.DataFrame(feature_dict)
848
+
849
+
850
+ def run_gait_pipeline(
851
+ df_prepared: pd.DataFrame,
852
+ watch_side: str,
853
+ output_dir: str | Path,
854
+ imu_config: IMUConfig | None = None,
855
+ gait_config: GaitConfig | None = None,
856
+ arm_activity_config: GaitConfig | None = None,
857
+ store_intermediate: list[str] = [],
858
+ segment_number_offset: int = 0,
859
+ logging_level: int = logging.INFO,
860
+ custom_logger: logging.Logger | None = None,
861
+ ) -> tuple[pd.DataFrame, dict]:
862
+ """
863
+ Run the complete gait analysis pipeline on prepared data (steps 1-6).
864
+
865
+ This function implements the gait analysis workflow as described in the tutorials:
866
+ 1. Preprocessing
867
+ 2. Gait feature extraction
868
+ 3. Gait detection
869
+ 4. Arm activity feature extraction
870
+ 5. Filtering gait
871
+ 6. Arm swing quantification
872
+
873
+ Step 7 (aggregation) should be done after processing all segments.
874
+
875
+ Parameters
876
+ ----------
877
+ df_prepared : pd.DataFrame
878
+ Prepared IMU data with time, accelerometer, and gyroscope columns.
879
+ Should contain columns: time, accelerometer_x/y/z, gyroscope_x/y/z.
880
+ Will be preprocessed as step 1 of the pipeline.
881
+ watch_side : str
882
+ Side of the watch ('left' or 'right') to configure preprocessing accordingly.
883
+ output_dir : str or Path
884
+ Directory to save intermediate results (required)
885
+ imu_config : IMUConfig, optional
886
+ Configuration for IMU data preprocessing.
887
+ If None, uses default IMUConfig.
888
+ gait_config : GaitConfig, optional
889
+ Configuration for gait feature extraction and detection.
890
+ If None, uses default GaitConfig(step="gait").
891
+ arm_activity_config : GaitConfig, optional
892
+ Configuration for arm activity feature extraction and filtering.
893
+ If None, uses default GaitConfig(step="arm_activity").
894
+ store_intermediate : List[str]
895
+ Steps of which intermediate results should be stored:
896
+ - 'preprocessing': Store preprocessed data after step 1
897
+ - 'gait': Store gait features and predictions after step 3
898
+ - 'arm_activity': Store arm activity features and predictions after step 5
899
+ - 'quantification': Store arm swing quantification results after step 6
900
+ If empty, only returns the final quantified results.
901
+ segment_number_offset : int, optional, default=0
902
+ Offset to add to all segment numbers to avoid conflicts when concatenating
903
+ multiple data segments. Used for proper segment numbering across multiple files.
904
+ logging_level : int, default logging.INFO
905
+ Logging level using standard logging constants (logging.DEBUG, logging.INFO,
906
+ etc.)
907
+ custom_logger : logging.Logger, optional
908
+ Custom logger instance. If provided, logging_level is ignored.
909
+
910
+ Returns
911
+ -------
912
+ tuple[pd.DataFrame, dict]
913
+ A tuple containing:
914
+ - pd.DataFrame: Quantified arm swing parameters with the following columns:
915
+ - gait_segment_nr: Gait segment number within this data segment
916
+ - Various arm swing metrics (range of motion, peak angular velocity, etc.)
917
+ - Additional metadata columns
918
+ - dict: Gait segment metadata containing information about each detected
919
+ gait segment
920
+
921
+ Notes
922
+ -----
923
+ This function processes a single contiguous data segment. For multiple segments,
924
+ call this function for each segment, then use aggregate_arm_swing_params()
925
+ on the concatenated results.
926
+
927
+ The function follows the exact workflow from the gait analysis tutorial:
928
+ https://github.com/biomarkersParkinson/paradigma/blob/main/docs/
929
+ tutorials/gait_analysis.ipynb
930
+ """
931
+ # Setup logger
932
+ active_logger = custom_logger if custom_logger is not None else logger
933
+ if custom_logger is None:
934
+ active_logger.setLevel(logging_level)
935
+
936
+ # Set default configurations
937
+ if imu_config is None:
938
+ imu_config = IMUConfig()
939
+ if gait_config is None:
940
+ gait_config = GaitConfig(step="gait")
941
+ if arm_activity_config is None:
942
+ arm_activity_config = GaitConfig(step="arm_activity")
943
+
944
+ output_dir = Path(output_dir)
945
+
946
+ # Validate input data has required columns
947
+ required_columns = [
948
+ DataColumns.TIME,
949
+ DataColumns.ACCELEROMETER_X,
950
+ DataColumns.ACCELEROMETER_Y,
951
+ DataColumns.ACCELEROMETER_Z,
952
+ DataColumns.GYROSCOPE_X,
953
+ DataColumns.GYROSCOPE_Y,
954
+ DataColumns.GYROSCOPE_Z,
955
+ ]
956
+ missing_columns = [
957
+ col for col in required_columns if col not in df_prepared.columns
958
+ ]
959
+ if missing_columns:
960
+ raise ValueError(f"Missing required columns: {missing_columns}")
961
+
962
+ # Step 1: Preprocess data
963
+ active_logger.info("Step 1: Preprocessing IMU data")
964
+
965
+ df_preprocessed = preprocess_imu_data(
966
+ df=df_prepared,
967
+ config=imu_config,
968
+ sensor="both",
969
+ watch_side=watch_side,
970
+ verbose=1 if logging_level <= logging.INFO else 0,
971
+ )
972
+
973
+ if "preprocessing" in store_intermediate:
974
+ preprocessing_dir = output_dir / "preprocessing"
975
+ preprocessing_dir.mkdir(parents=True, exist_ok=True)
976
+ df_preprocessed.to_parquet(
977
+ preprocessing_dir / "preprocessed_data.parquet", index=False
978
+ )
979
+ active_logger.debug(
980
+ f"Saved preprocessed data to "
981
+ f"{preprocessing_dir / 'preprocessed_data.parquet'}"
982
+ )
983
+
984
+ # Step 2: Extract gait features
985
+ active_logger.info("Step 2: Extracting gait features")
986
+ df_gait = extract_gait_features(df_preprocessed, gait_config)
987
+
988
+ if "gait" in store_intermediate:
989
+ gait_dir = output_dir / "gait"
990
+ gait_dir.mkdir(parents=True, exist_ok=True)
991
+ df_gait.to_parquet(gait_dir / "gait_features.parquet", index=False)
992
+ active_logger.debug(
993
+ f"Saved gait features to {gait_dir / 'gait_features.parquet'}"
994
+ )
995
+
996
+ # Step 3: Detect gait
997
+ active_logger.info("Step 3: Detecting gait")
998
+ try:
999
+ classifier_path = files("paradigma.assets") / "gait_detection_clf_package.pkl"
1000
+ classifier_package_gait = ClassifierPackage.load(classifier_path)
1001
+ except Exception as e:
1002
+ active_logger.error(f"Could not load gait detection classifier: {e}")
1003
+ raise RuntimeError("Gait detection classifier not available")
1004
+
1005
+ gait_proba = detect_gait(df_gait, classifier_package_gait, parallel=False)
1006
+ df_gait[DataColumns.PRED_GAIT_PROBA] = gait_proba
1007
+
1008
+ # Merge predictions back with timestamps
1009
+ df_gait_with_time = merge_predictions_with_timestamps(
1010
+ df_ts=df_preprocessed,
1011
+ df_predictions=df_gait,
1012
+ pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
1013
+ window_length_s=gait_config.window_length_s,
1014
+ fs=gait_config.sampling_frequency,
1015
+ )
1016
+
1017
+ # Add binary prediction column
1018
+ df_gait_with_time[DataColumns.PRED_GAIT] = (
1019
+ df_gait_with_time[DataColumns.PRED_GAIT_PROBA]
1020
+ >= classifier_package_gait.threshold
1021
+ ).astype(int)
1022
+
1023
+ if "gait" in store_intermediate:
1024
+ gait_dir = output_dir / "gait"
1025
+ gait_dir.mkdir(parents=True, exist_ok=True)
1026
+ df_gait_with_time.to_parquet(gait_dir / "gait_predictions.parquet", index=False)
1027
+ active_logger.info(
1028
+ f"Saved gait predictions to {gait_dir / 'gait_predictions.parquet'}"
1029
+ )
1030
+
1031
+ # Filter to only gait periods
1032
+ df_gait_only = df_gait_with_time.loc[
1033
+ df_gait_with_time[DataColumns.PRED_GAIT] == 1
1034
+ ].reset_index(drop=True)
1035
+
1036
+ if len(df_gait_only) == 0:
1037
+ active_logger.warning("No gait detected in this segment")
1038
+ return pd.DataFrame(), {}
1039
+
1040
+ # Step 4: Extract arm activity features
1041
+ active_logger.info("Step 4: Extracting arm activity features")
1042
+ df_arm_activity = extract_arm_activity_features(df_gait_only, arm_activity_config)
1043
+
1044
+ if "arm_activity" in store_intermediate:
1045
+ arm_activity_dir = output_dir / "arm_activity"
1046
+ arm_activity_dir.mkdir(parents=True, exist_ok=True)
1047
+ df_arm_activity.to_parquet(
1048
+ arm_activity_dir / "arm_activity_features.parquet", index=False
1049
+ )
1050
+ active_logger.debug(
1051
+ f"Saved arm activity features to "
1052
+ f"{arm_activity_dir / 'arm_activity_features.parquet'}"
1053
+ )
1054
+
1055
+ # Step 5: Filter gait (remove other arm activities)
1056
+ active_logger.info("Step 5: Filtering gait")
1057
+ try:
1058
+ classifier_path = files("paradigma.assets") / "gait_filtering_clf_package.pkl"
1059
+ classifier_package_arm_activity = ClassifierPackage.load(classifier_path)
1060
+ except Exception as e:
1061
+ active_logger.error(f"Could not load arm activity classifier: {e}")
1062
+ raise RuntimeError("Arm activity classifier not available")
1063
+
1064
+ # Filter gait returns probabilities which we add to the arm activity features
1065
+ arm_activity_probabilities = filter_gait(
1066
+ df_arm_activity, classifier_package_arm_activity, parallel=False
1067
+ )
1068
+
1069
+ df_arm_activity[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] = (
1070
+ arm_activity_probabilities
1071
+ )
1072
+
1073
+ # Merge predictions back with timestamps
1074
+ df_filtered = merge_predictions_with_timestamps(
1075
+ df_ts=df_gait_only,
1076
+ df_predictions=df_arm_activity,
1077
+ pred_proba_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
1078
+ window_length_s=arm_activity_config.window_length_s,
1079
+ fs=arm_activity_config.sampling_frequency,
1080
+ )
1081
+
1082
+ # Add binary prediction column
1083
+ filt_threshold = classifier_package_arm_activity.threshold
1084
+ df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] = (
1085
+ df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= filt_threshold
1086
+ ).astype(int)
1087
+
1088
+ if "arm_activity" in store_intermediate:
1089
+ arm_activity_dir = output_dir / "arm_activity"
1090
+ arm_activity_dir.mkdir(parents=True, exist_ok=True)
1091
+ df_filtered.to_parquet(arm_activity_dir / "filtered_gait.parquet", index=False)
1092
+ active_logger.debug(
1093
+ f"Saved filtered gait to {arm_activity_dir / 'filtered_gait.parquet'}"
1094
+ )
1095
+
1096
+ if (
1097
+ len(df_filtered.loc[df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1])
1098
+ == 0
1099
+ ):
1100
+ active_logger.warning("No clean gait data remaining after filtering")
1101
+ return pd.DataFrame(), {}
1102
+
1103
+ # Step 6: Quantify arm swing
1104
+ active_logger.info("Step 6: Quantifying arm swing")
1105
+ quantified_arm_swing, gait_segment_meta = quantify_arm_swing(
1106
+ df=df_filtered,
1107
+ fs=arm_activity_config.sampling_frequency,
1108
+ filtered=True,
1109
+ max_segment_gap_s=arm_activity_config.max_segment_gap_s,
1110
+ min_segment_length_s=arm_activity_config.min_segment_length_s,
1111
+ )
1112
+
1113
+ if "quantification" in store_intermediate:
1114
+ quantification_dir = output_dir / "quantification"
1115
+ quantification_dir.mkdir(parents=True, exist_ok=True)
1116
+ quantified_arm_swing.to_parquet(
1117
+ quantification_dir / "arm_swing_quantified.parquet", index=False
1118
+ )
1119
+
1120
+ # Save gait segment metadata as JSON
1121
+ with open(quantification_dir / "gait_segment_meta.json", "w") as f:
1122
+ json.dump(gait_segment_meta, f, indent=2)
1123
+
1124
+ active_logger.debug(
1125
+ f"Saved arm swing quantification to "
1126
+ f"{quantification_dir / 'arm_swing_quantified.parquet'}"
1127
+ )
1128
+ active_logger.debug(
1129
+ f"Saved gait segment metadata to "
1130
+ f"{quantification_dir / 'gait_segment_meta.json'}"
1131
+ )
1132
+
1133
+ active_logger.info(
1134
+ f"Gait analysis pipeline completed. Found "
1135
+ f"{len(quantified_arm_swing)} windows of gait "
1136
+ f"without other arm activities."
1137
+ )
1138
+
1139
+ # Apply segment number offset if specified (for multi-segment concatenation)
1140
+ if segment_number_offset > 0 and len(quantified_arm_swing) > 0:
1141
+ quantified_arm_swing = quantified_arm_swing.copy()
1142
+ quantified_arm_swing["gait_segment_nr"] += segment_number_offset
1143
+
1144
+ # Also update the metadata with the new segment numbers
1145
+ if gait_segment_meta and "per_segment" in gait_segment_meta:
1146
+ updated_per_segment_meta = {}
1147
+ for seg_id, meta in gait_segment_meta["per_segment"].items():
1148
+ updated_per_segment_meta[seg_id + segment_number_offset] = meta
1149
+ gait_segment_meta["per_segment"] = updated_per_segment_meta
1150
+
1151
+ return quantified_arm_swing, gait_segment_meta