paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,43 @@
1
+ import logging
2
+ from typing import List, Tuple
3
+
1
4
  import numpy as np
2
5
  import pandas as pd
3
6
  from scipy.signal import periodogram
4
- from typing import List, Tuple
5
7
 
6
8
  from paradigma.classification import ClassifierPackage
7
- from paradigma.constants import DataColumns
8
9
  from paradigma.config import GaitConfig
9
- from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
10
- extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
11
- compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
12
- compute_total_power
13
- from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
10
+ from paradigma.constants import DataColumns
11
+ from paradigma.feature_extraction import (
12
+ compute_angle,
13
+ compute_dominant_frequency,
14
+ compute_mfccs,
15
+ compute_peak_angular_velocity,
16
+ compute_power_in_bandwidth,
17
+ compute_range_of_motion,
18
+ compute_statistics,
19
+ compute_std_euclidean_norm,
20
+ compute_total_power,
21
+ extract_angle_extremes,
22
+ pca_transform_gyroscope,
23
+ remove_moving_average_angle,
24
+ )
25
+ from paradigma.segmenting import (
26
+ WindowedDataExtractor,
27
+ create_segments,
28
+ discard_segments,
29
+ tabulate_windows,
30
+ )
14
31
  from paradigma.util import aggregate_parameter
15
32
 
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # Only configure basic logging if no handlers exist
36
+ if not logger.hasHandlers():
37
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
38
+
16
39
 
17
- def extract_gait_features(
18
- df: pd.DataFrame,
19
- config: GaitConfig
20
- ) -> pd.DataFrame:
40
+ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
21
41
  """
22
42
  Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
23
43
 
@@ -44,7 +64,7 @@ def extract_gait_features(
44
64
  A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
45
65
  columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
46
66
  accelerometer data.
47
-
67
+
48
68
  Notes
49
69
  -----
50
70
  - This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
@@ -57,34 +77,36 @@ def extract_gait_features(
57
77
  If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
58
78
  """
59
79
  # Group sequences of timestamps into windows
60
- windowed_cols = [DataColumns.TIME] + config.accelerometer_cols + config.gravity_cols
80
+ windowed_colnames = (
81
+ [config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
82
+ )
61
83
  windowed_data = tabulate_windows(
62
- df=df,
63
- columns=windowed_cols,
84
+ df=df,
85
+ columns=windowed_colnames,
64
86
  window_length_s=config.window_length_s,
65
87
  window_step_length_s=config.window_step_length_s,
66
- fs=config.sampling_frequency
88
+ fs=config.sampling_frequency,
67
89
  )
68
90
 
69
- extractor = WindowedDataExtractor(windowed_cols)
91
+ extractor = WindowedDataExtractor(windowed_colnames)
70
92
 
71
- idx_time = extractor.get_index(DataColumns.TIME)
72
- idx_acc = extractor.get_slice(config.accelerometer_cols)
73
- idx_grav = extractor.get_slice(config.gravity_cols)
93
+ idx_time = extractor.get_index(config.time_colname)
94
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
95
+ idx_grav = extractor.get_slice(config.gravity_colnames)
74
96
 
75
97
  # Extract data
76
98
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
77
99
  windowed_acc = windowed_data[:, :, idx_acc]
78
100
  windowed_grav = windowed_data[:, :, idx_grav]
79
101
 
80
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
81
-
102
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
103
+
82
104
  # Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
83
105
  df_temporal_features = extract_temporal_domain_features(
84
- config=config,
106
+ config=config,
85
107
  windowed_acc=windowed_acc,
86
108
  windowed_grav=windowed_grav,
87
- grav_stats=['mean', 'std']
109
+ grav_stats=["mean", "std"],
88
110
  )
89
111
 
90
112
  # Combine temporal features with the start time
@@ -92,9 +114,7 @@ def extract_gait_features(
92
114
 
93
115
  # Transform the accelerometer data to the spectral domain using FFT and extract spectral features
94
116
  df_spectral_features = extract_spectral_domain_features(
95
- config=config,
96
- sensor='accelerometer',
97
- windowed_data=windowed_acc
117
+ config=config, sensor="accelerometer", windowed_data=windowed_acc
98
118
  )
99
119
 
100
120
  # Combine the spectral features with the previously computed temporal features
@@ -104,10 +124,8 @@ def extract_gait_features(
104
124
 
105
125
 
106
126
  def detect_gait(
107
- df: pd.DataFrame,
108
- clf_package: ClassifierPackage,
109
- parallel: bool=False
110
- ) -> pd.Series:
127
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
128
+ ) -> pd.Series:
111
129
  """
112
130
  Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
113
131
 
@@ -121,7 +139,7 @@ def detect_gait(
121
139
  Parameters
122
140
  ----------
123
141
  df : pd.DataFrame
124
- The input DataFrame containing features extracted from gait data. It must include the necessary columns
142
+ The input DataFrame containing features extracted from gait data. It must include the necessary columns
125
143
  as specified in the classifier's feature names.
126
144
 
127
145
  clf_package : ClassifierPackage
@@ -137,7 +155,7 @@ def detect_gait(
137
155
  """
138
156
  # Set classifier
139
157
  clf = clf_package.classifier
140
- if not parallel and hasattr(clf, 'n_jobs'):
158
+ if not parallel and hasattr(clf, "n_jobs"):
141
159
  clf.n_jobs = 1
142
160
 
143
161
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -157,13 +175,13 @@ def detect_gait(
157
175
 
158
176
 
159
177
  def extract_arm_activity_features(
160
- df: pd.DataFrame,
161
- config: GaitConfig,
162
- ) -> pd.DataFrame:
178
+ df: pd.DataFrame,
179
+ config: GaitConfig,
180
+ ) -> pd.DataFrame:
163
181
  """
164
182
  Extract features related to arm activity from a time-series DataFrame.
165
183
 
166
- This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
184
+ This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
167
185
  and extracts features related to arm activity by performing the following steps:
168
186
  1. Computes the angle and velocity from gyroscope data.
169
187
  2. Filters the data to include only predicted gait segments.
@@ -183,13 +201,12 @@ def extract_arm_activity_features(
183
201
  Returns
184
202
  -------
185
203
  pd.DataFrame
186
- A DataFrame containing the extracted arm activity features, including angle, velocity,
204
+ A DataFrame containing the extracted arm activity features, including angle, velocity,
187
205
  temporal, and spectral features.
188
206
  """
189
207
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
190
208
  df[DataColumns.SEGMENT_NR] = create_segments(
191
- time_array=df[DataColumns.TIME],
192
- max_segment_gap_s=config.max_segment_gap_s
209
+ time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
193
210
  )
194
211
 
195
212
  # Remove segments that do not meet predetermined criteria
@@ -198,27 +215,27 @@ def extract_arm_activity_features(
198
215
  segment_nr_colname=DataColumns.SEGMENT_NR,
199
216
  min_segment_length_s=config.min_segment_length_s,
200
217
  fs=config.sampling_frequency,
201
- format='timestamps'
218
+ format="timestamps",
202
219
  )
203
220
 
204
221
  # Create windows of fixed length and step size from the time series per segment
205
222
  windowed_data = []
206
223
  df_grouped = df.groupby(DataColumns.SEGMENT_NR)
207
- windowed_cols = (
208
- [DataColumns.TIME] +
209
- config.accelerometer_cols +
210
- config.gravity_cols +
211
- config.gyroscope_cols
224
+ windowed_colnames = (
225
+ [config.time_colname]
226
+ + config.accelerometer_colnames
227
+ + config.gravity_colnames
228
+ + config.gyroscope_colnames
212
229
  )
213
230
 
214
231
  # Collect windows from all segments in a list for faster concatenation
215
232
  for _, group in df_grouped:
216
233
  windows = tabulate_windows(
217
- df=group,
218
- columns=windowed_cols,
234
+ df=group,
235
+ columns=windowed_colnames,
219
236
  window_length_s=config.window_length_s,
220
237
  window_step_length_s=config.window_step_length_s,
221
- fs=config.sampling_frequency
238
+ fs=config.sampling_frequency,
222
239
  )
223
240
  if len(windows) > 0: # Skip if no windows are created
224
241
  windowed_data.append(windows)
@@ -232,12 +249,12 @@ def extract_arm_activity_features(
232
249
  windowed_data = np.concatenate(windowed_data, axis=0)
233
250
 
234
251
  # Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
235
- extractor = WindowedDataExtractor(windowed_cols)
252
+ extractor = WindowedDataExtractor(windowed_colnames)
236
253
 
237
- idx_time = extractor.get_index(DataColumns.TIME)
238
- idx_acc = extractor.get_slice(config.accelerometer_cols)
239
- idx_grav = extractor.get_slice(config.gravity_cols)
240
- idx_gyro = extractor.get_slice(config.gyroscope_cols)
254
+ idx_time = extractor.get_index(config.time_colname)
255
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
256
+ idx_grav = extractor.get_slice(config.gravity_colnames)
257
+ idx_gyro = extractor.get_slice(config.gyroscope_colnames)
241
258
 
242
259
  # Extract data
243
260
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
@@ -246,23 +263,23 @@ def extract_arm_activity_features(
246
263
  windowed_gyro = windowed_data[:, :, idx_gyro]
247
264
 
248
265
  # Initialize DataFrame for features
249
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
266
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
250
267
 
251
268
  # Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
252
269
  df_temporal_features = extract_temporal_domain_features(
253
- config=config,
254
- windowed_acc=windowed_acc,
255
- windowed_grav=windowed_grav,
256
- grav_stats=['mean', 'std']
270
+ config=config,
271
+ windowed_acc=windowed_acc,
272
+ windowed_grav=windowed_grav,
273
+ grav_stats=["mean", "std"],
257
274
  )
258
275
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
259
276
 
260
277
  # Extract spectral domain features for accelerometer and gyroscope signals
261
- for sensor_name, windowed_sensor in zip(['accelerometer', 'gyroscope'], [windowed_acc, windowed_gyro]):
278
+ for sensor_name, windowed_sensor in zip(
279
+ ["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
280
+ ):
262
281
  df_spectral_features = extract_spectral_domain_features(
263
- config=config,
264
- sensor=sensor_name,
265
- windowed_data=windowed_sensor
282
+ config=config, sensor=sensor_name, windowed_data=windowed_sensor
266
283
  )
267
284
  df_features = pd.concat([df_features, df_spectral_features], axis=1)
268
285
 
@@ -270,10 +287,8 @@ def extract_arm_activity_features(
270
287
 
271
288
 
272
289
  def filter_gait(
273
- df: pd.DataFrame,
274
- clf_package: ClassifierPackage,
275
- parallel: bool=False
276
- ) -> pd.Series:
290
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
291
+ ) -> pd.Series:
277
292
  """
278
293
  Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
279
294
 
@@ -293,10 +308,10 @@ def filter_gait(
293
308
  """
294
309
  if df.shape[0] == 0:
295
310
  raise ValueError("No data found in the input DataFrame.")
296
-
311
+
297
312
  # Set classifier
298
313
  clf = clf_package.classifier
299
- if not parallel and hasattr(clf, 'n_jobs'):
314
+ if not parallel and hasattr(clf, "n_jobs"):
300
315
  clf.n_jobs = 1
301
316
 
302
317
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -316,12 +331,12 @@ def filter_gait(
316
331
 
317
332
 
318
333
  def quantify_arm_swing(
319
- df: pd.DataFrame,
320
- fs: int,
321
- filtered: bool = False,
322
- max_segment_gap_s: float = 1.5,
323
- min_segment_length_s: float = 1.5
324
- ) -> Tuple[dict[str, pd.DataFrame], dict]:
334
+ df: pd.DataFrame,
335
+ fs: int,
336
+ filtered: bool = False,
337
+ max_segment_gap_s: float = 1.5,
338
+ min_segment_length_s: float = 1.5,
339
+ ) -> Tuple[dict[str, pd.DataFrame], dict]:
325
340
  """
326
341
  Quantify arm swing parameters for segments of motion based on gyroscope data.
327
342
 
@@ -339,71 +354,75 @@ def quantify_arm_swing(
339
354
 
340
355
  max_segment_gap_s : float, optional, default=1.5
341
356
  The maximum gap in seconds between consecutive timestamps to group them into segments.
342
-
357
+
343
358
  min_segment_length_s : float, optional, default=1.5
344
359
  The minimum length in seconds for a segment to be considered valid.
345
360
 
346
361
  Returns
347
362
  -------
348
363
  Tuple[pd.DataFrame, dict]
349
- A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
364
+ A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
350
365
  metadata for each segment.
351
366
  """
352
367
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
353
368
  # Segments are made based on predicted gait
354
- df[DataColumns.SEGMENT_NR] = create_segments(
355
- time_array=df[DataColumns.TIME],
356
- max_segment_gap_s=max_segment_gap_s
369
+ df["unfiltered_segment_nr"] = create_segments(
370
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
357
371
  )
358
372
 
359
- # Segment category is determined based on predicted gait, hence it is set
360
- # before filtering the DataFrame to only include predicted no other arm activity
361
- df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
362
-
363
373
  # Remove segments that do not meet predetermined criteria
364
374
  df = discard_segments(
365
375
  df=df,
366
- segment_nr_colname=DataColumns.SEGMENT_NR,
376
+ segment_nr_colname="unfiltered_segment_nr",
367
377
  min_segment_length_s=min_segment_length_s,
368
378
  fs=fs,
369
- format='timestamps'
379
+ format="timestamps",
370
380
  )
371
381
 
372
382
  if df.empty:
373
- raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
383
+ raise ValueError(
384
+ "No segments found in the input data after discarding segments of invalid shape."
385
+ )
386
+
387
+ # Create dictionary of gait segment number and duration
388
+ gait_segment_duration_dict = {
389
+ segment_nr: len(group[DataColumns.TIME]) / fs
390
+ for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
391
+ }
374
392
 
375
393
  # If no arm swing data is remaining, return an empty dictionary
376
- if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
394
+ if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
377
395
  raise ValueError("No gait without other arm activities to quantify.")
378
396
  elif filtered:
379
397
  # Filter the DataFrame to only include predicted no other arm activity (1)
380
- df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
398
+ df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
399
+ drop=True
400
+ )
381
401
 
382
402
  # Group consecutive timestamps into segments of filtered gait
383
- df[DataColumns.SEGMENT_NR] = create_segments(
384
- time_array=df[DataColumns.TIME],
385
- max_segment_gap_s=max_segment_gap_s
403
+ df["filtered_segment_nr"] = create_segments(
404
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
386
405
  )
387
406
 
388
407
  # Remove segments that do not meet predetermined criteria
389
408
  df = discard_segments(
390
409
  df=df,
391
- segment_nr_colname=DataColumns.SEGMENT_NR,
410
+ segment_nr_colname="filtered_segment_nr",
392
411
  min_segment_length_s=min_segment_length_s,
393
412
  fs=fs,
394
413
  )
395
414
 
396
415
  if df.empty:
397
- raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
416
+ raise ValueError(
417
+ "No filtered gait segments found in the input data after discarding segments of invalid shape."
418
+ )
419
+
420
+ grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
398
421
 
399
422
  arm_swing_quantified = []
400
423
  segment_meta = {
401
- 'aggregated': {
402
- 'all': {
403
- 'duration_s': len(df[DataColumns.TIME]) / fs
404
- },
405
- },
406
- 'per_segment': {}
424
+ "all": {"duration_s": len(df[DataColumns.TIME]) / fs},
425
+ "per_segment": {},
407
426
  }
408
427
 
409
428
  # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
@@ -415,8 +434,27 @@ def quantify_arm_swing(
415
434
  )
416
435
 
417
436
  # Group and process segments
418
- for segment_nr, group in df.groupby(DataColumns.SEGMENT_NR, sort=False):
419
- segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
437
+ for segment_nr, group in df.groupby(grouping_colname, sort=False):
438
+ if filtered:
439
+ gait_segment_nr = group["unfiltered_segment_nr"].iloc[
440
+ 0
441
+ ] # Each filtered segment is contained within an unfiltered segment
442
+ else:
443
+ gait_segment_nr = segment_nr
444
+
445
+ try:
446
+ gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
447
+ except KeyError:
448
+ logger.warning(
449
+ "Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
450
+ gait_segment_nr,
451
+ filtered,
452
+ )
453
+ logger.debug(
454
+ "Available segments: %s", list(gait_segment_duration_dict.keys())
455
+ )
456
+ continue
457
+
420
458
  time_array = group[DataColumns.TIME].to_numpy()
421
459
  velocity_array = group[DataColumns.VELOCITY].to_numpy()
422
460
 
@@ -432,18 +470,22 @@ def quantify_arm_swing(
432
470
  fs=fs,
433
471
  )
434
472
 
435
- segment_meta['per_segment'][segment_nr] = {
436
- 'start_time_s': time_array.min(),
437
- 'end_time_s': time_array.max(),
438
- 'duration_s': len(angle_array) / fs,
439
- DataColumns.SEGMENT_CAT: segment_cat
473
+ segment_meta["per_segment"][segment_nr] = {
474
+ "start_time_s": time_array.min(),
475
+ "end_time_s": time_array.max(),
476
+ "duration_unfiltered_segment_s": gait_segment_duration_s,
440
477
  }
441
478
 
442
- if angle_array.size > 0:
479
+ if filtered:
480
+ segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
481
+ len(time_array) / fs
482
+ )
483
+
484
+ if angle_array.size > 0:
443
485
  angle_extrema_indices, _, _ = extract_angle_extremes(
444
486
  angle_array=angle_array,
445
487
  sampling_frequency=fs,
446
- max_frequency_activity=1.75
488
+ max_frequency_activity=1.75,
447
489
  )
448
490
 
449
491
  if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
@@ -454,44 +496,47 @@ def quantify_arm_swing(
454
496
  )
455
497
  except Exception as e:
456
498
  # Handle the error, set RoM to NaN, and log the error
457
- print(f"Error computing range of motion for segment {segment_nr}: {e}")
499
+ print(
500
+ f"Error computing range of motion for segment {segment_nr}: {e}"
501
+ )
458
502
  rom = np.array([np.nan])
459
503
 
460
504
  try:
461
505
  pav = compute_peak_angular_velocity(
462
506
  velocity_array=velocity_array,
463
- angle_extrema_indices=angle_extrema_indices
507
+ angle_extrema_indices=angle_extrema_indices,
464
508
  )
465
509
  except Exception as e:
466
510
  # Handle the error, set pav to NaN, and log the error
467
- print(f"Error computing peak angular velocity for segment {segment_nr}: {e}")
511
+ print(
512
+ f"Error computing peak angular velocity for segment {segment_nr}: {e}"
513
+ )
468
514
  pav = np.array([np.nan])
469
515
 
470
- df_params_segment = pd.DataFrame({
471
- DataColumns.SEGMENT_NR: segment_nr,
472
- DataColumns.SEGMENT_CAT: segment_cat,
473
- DataColumns.RANGE_OF_MOTION: rom,
474
- DataColumns.PEAK_VELOCITY: pav
475
- })
516
+ df_params_segment = pd.DataFrame(
517
+ {
518
+ DataColumns.SEGMENT_NR: segment_nr,
519
+ DataColumns.RANGE_OF_MOTION: rom,
520
+ DataColumns.PEAK_VELOCITY: pav,
521
+ }
522
+ )
476
523
 
477
524
  arm_swing_quantified.append(df_params_segment)
478
525
 
479
- # Combine segment categories
480
- segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
481
- for segment_cat in segment_categories:
482
- segment_meta['aggregated'][segment_cat] = {
483
- 'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
484
- }
485
-
486
526
  arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
487
-
527
+
488
528
  return arm_swing_quantified, segment_meta
489
529
 
490
530
 
491
- def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta: dict, aggregates: List[str] = ['median']) -> dict:
531
+ def aggregate_arm_swing_params(
532
+ df_arm_swing_params: pd.DataFrame,
533
+ segment_meta: dict,
534
+ segment_cats: List[tuple],
535
+ aggregates: List[str] = ["median"],
536
+ ) -> dict:
492
537
  """
493
538
  Aggregate the quantification results for arm swing parameters.
494
-
539
+
495
540
  Parameters
496
541
  ----------
497
542
  df_arm_swing_params : pd.DataFrame
@@ -499,10 +544,12 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
499
544
 
500
545
  segment_meta : dict
501
546
  A dictionary containing metadata for each segment.
502
-
547
+
548
+ segment_cats : List[tuple]
549
+ A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
503
550
  aggregates : List[str], optional
504
551
  A list of aggregation methods to apply to the quantification results.
505
-
552
+
506
553
  Returns
507
554
  -------
508
555
  dict
@@ -510,43 +557,93 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
510
557
  """
511
558
  arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
512
559
 
513
- uq_segment_cats = set([segment_meta[x][DataColumns.SEGMENT_CAT] for x in df_arm_swing_params[DataColumns.SEGMENT_NR].unique()])
514
-
515
560
  aggregated_results = {}
516
- for segment_cat in uq_segment_cats:
517
- cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
518
-
519
- aggregated_results[segment_cat] = {
520
- 'duration_s': sum([segment_meta[x]['duration_s'] for x in cat_segments])
521
- }
522
-
523
- df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
524
-
525
- for arm_swing_parameter in arm_swing_parameters:
526
- for aggregate in aggregates:
527
- aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
528
-
529
- aggregated_results['all_segment_categories'] = {
530
- 'duration_s': sum([segment_meta[x]['duration_s'] for x in segment_meta.keys()])
531
- }
532
-
533
- for arm_swing_parameter in arm_swing_parameters:
534
- for aggregate in aggregates:
535
- aggregated_results['all_segment_categories'][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params[arm_swing_parameter], aggregate)
561
+ for segment_cat_range in segment_cats:
562
+ segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
563
+ cat_segments = [
564
+ x
565
+ for x in segment_meta.keys()
566
+ if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
567
+ and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
568
+ ]
569
+
570
+ if len(cat_segments) > 0:
571
+ # For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
572
+ aggregated_results[segment_cat_str] = {
573
+ "duration_s": sum(
574
+ [
575
+ (
576
+ segment_meta[x]["duration_filtered_segment_s"]
577
+ if "duration_filtered_segment_s" in segment_meta[x]
578
+ else segment_meta[x]["duration_unfiltered_segment_s"]
579
+ )
580
+ for x in cat_segments
581
+ ]
582
+ )
583
+ }
584
+
585
+ df_arm_swing_params_cat = df_arm_swing_params.loc[
586
+ df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)
587
+ ]
588
+
589
+ # Aggregate across all segments
590
+ aggregates_per_segment = ["median", "mean"]
591
+
592
+ for arm_swing_parameter in arm_swing_parameters:
593
+ for aggregate in aggregates:
594
+ if aggregate in ["std", "cov"]:
595
+ per_segment_agg = []
596
+ # If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
597
+ segment_groups = dict(
598
+ tuple(
599
+ df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)
600
+ )
601
+ )
602
+ for segment_nr in cat_segments:
603
+ segment_df = segment_groups.get(segment_nr)
604
+ if segment_df is not None:
605
+ per_segment_agg.append(
606
+ aggregate_parameter(
607
+ segment_df[arm_swing_parameter], aggregate
608
+ )
609
+ )
610
+
611
+ # Drop nans
612
+ per_segment_agg = np.array(per_segment_agg)
613
+ per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
614
+
615
+ for segment_level_aggregate in aggregates_per_segment:
616
+ aggregated_results[segment_cat_str][
617
+ f"{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}"
618
+ ] = aggregate_parameter(
619
+ per_segment_agg, segment_level_aggregate
620
+ )
621
+ else:
622
+ aggregated_results[segment_cat_str][
623
+ f"{aggregate}_{arm_swing_parameter}"
624
+ ] = aggregate_parameter(
625
+ df_arm_swing_params_cat[arm_swing_parameter], aggregate
626
+ )
627
+
628
+ else:
629
+ # If no segments are found for this category, initialize with NaN
630
+ aggregated_results[segment_cat_str] = {
631
+ "duration_s": 0,
632
+ }
536
633
 
537
634
  return aggregated_results
538
635
 
539
636
 
540
637
  def extract_temporal_domain_features(
541
- config,
542
- windowed_acc: np.ndarray,
543
- windowed_grav: np.ndarray,
544
- grav_stats: List[str] = ['mean']
545
- ) -> pd.DataFrame:
638
+ config,
639
+ windowed_acc: np.ndarray,
640
+ windowed_grav: np.ndarray,
641
+ grav_stats: List[str] = ["mean"],
642
+ ) -> pd.DataFrame:
546
643
  """
547
644
  Compute temporal domain features for the accelerometer signal.
548
645
 
549
- This function calculates various statistical features for the gravity signal
646
+ This function calculates various statistical features for the gravity signal
550
647
  and computes the standard deviation of the accelerometer's Euclidean norm.
551
648
 
552
649
  Parameters
@@ -554,10 +651,10 @@ def extract_temporal_domain_features(
554
651
  config : object
555
652
  Configuration object containing the accelerometer and gravity column names.
556
653
  windowed_acc : numpy.ndarray
557
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
654
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
558
655
  the number of accelerometer values per window.
559
656
  windowed_grav : numpy.ndarray
560
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
657
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
561
658
  the number of gravity signal values per window.
562
659
  grav_stats : list of str, optional
563
660
  A list of statistics to compute for the gravity signal (default is ['mean']).
@@ -565,32 +662,34 @@ def extract_temporal_domain_features(
565
662
  Returns
566
663
  -------
567
664
  pd.DataFrame
568
- A DataFrame containing the computed features, with each row corresponding
665
+ A DataFrame containing the computed features, with each row corresponding
569
666
  to a window and each column representing a specific feature.
570
667
  """
571
668
  # Compute gravity statistics (e.g., mean, std, etc.)
572
669
  feature_dict = {}
573
670
  for stat in grav_stats:
574
671
  stats_result = compute_statistics(data=windowed_grav, statistic=stat)
575
- for i, col in enumerate(config.gravity_cols):
576
- feature_dict[f'{col}_{stat}'] = stats_result[:, i]
672
+ for i, col in enumerate(config.gravity_colnames):
673
+ feature_dict[f"{col}_{stat}"] = stats_result[:, i]
577
674
 
578
675
  # Compute standard deviation of the Euclidean norm of the accelerometer signal
579
- feature_dict['accelerometer_std_norm'] = compute_std_euclidean_norm(data=windowed_acc)
676
+ feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
677
+ data=windowed_acc
678
+ )
580
679
 
581
680
  return pd.DataFrame(feature_dict)
582
681
 
583
682
 
584
683
  def extract_spectral_domain_features(
585
- windowed_data: np.ndarray,
586
- config,
587
- sensor: str,
588
- ) -> pd.DataFrame:
684
+ windowed_data: np.ndarray,
685
+ config,
686
+ sensor: str,
687
+ ) -> pd.DataFrame:
589
688
  """
590
689
  Compute spectral domain features for a sensor's data.
591
690
 
592
- This function computes the periodogram, extracts power in specific frequency bands,
593
- calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
691
+ This function computes the periodogram, extracts power in specific frequency bands,
692
+ calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
594
693
  for a given sensor's windowed data.
595
694
 
596
695
  Parameters
@@ -599,16 +698,16 @@ def extract_spectral_domain_features(
599
698
  A 2D numpy array where each row corresponds to a window of sensor data.
600
699
 
601
700
  config : object
602
- Configuration object containing settings such as sampling frequency, window type,
701
+ Configuration object containing settings such as sampling frequency, window type,
603
702
  frequency bands, and MFCC parameters.
604
703
 
605
704
  sensor : str
606
705
  The name of the sensor (e.g., 'accelerometer', 'gyroscope').
607
-
706
+
608
707
  Returns
609
708
  -------
610
709
  pd.DataFrame
611
- A DataFrame containing the computed spectral features, with each row corresponding
710
+ A DataFrame containing the computed spectral features, with each row corresponding
612
711
  to a window and each column representing a specific feature.
613
712
  """
614
713
  # Initialize a dictionary to hold the results
@@ -616,49 +715,46 @@ def extract_spectral_domain_features(
616
715
 
617
716
  # Compute periodogram (power spectral density)
618
717
  freqs, psd = periodogram(
619
- x=windowed_data,
620
- fs=config.sampling_frequency,
621
- window=config.window_type,
622
- axis=1
718
+ x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
623
719
  )
624
720
 
625
721
  # Compute power in specified frequency bands
626
722
  for band_name, band_freqs in config.d_frequency_bandwidths.items():
627
723
  band_powers = compute_power_in_bandwidth(
628
724
  freqs=freqs,
629
- psd=psd,
725
+ psd=psd,
630
726
  fmin=band_freqs[0],
631
727
  fmax=band_freqs[1],
632
- include_max=False
728
+ include_max=False,
633
729
  )
634
730
  for i, col in enumerate(config.axes):
635
- feature_dict[f'{sensor}_{col}_{band_name}'] = band_powers[:, i]
731
+ feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
636
732
 
637
733
  # Compute dominant frequency for each axis
638
734
  dominant_frequencies = compute_dominant_frequency(
639
- freqs=freqs,
640
- psd=psd,
641
- fmin=config.spectrum_low_frequency,
642
- fmax=config.spectrum_high_frequency
735
+ freqs=freqs,
736
+ psd=psd,
737
+ fmin=config.spectrum_low_frequency,
738
+ fmax=config.spectrum_high_frequency,
643
739
  )
644
740
 
645
741
  # Add dominant frequency features to the feature_dict
646
742
  for axis, freq in zip(config.axes, dominant_frequencies.T):
647
- feature_dict[f'{sensor}_{axis}_dominant_frequency'] = freq
743
+ feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
648
744
 
649
745
  # Compute total power in the PSD
650
746
  total_power_psd = compute_total_power(psd)
651
747
 
652
748
  # Compute MFCCs
653
749
  mfccs = compute_mfccs(
654
- total_power_array=total_power_psd,
655
- config=config,
656
- multiplication_factor=4
750
+ total_power_array=total_power_psd, config=config, multiplication_factor=4
657
751
  )
658
752
 
659
753
  # Combine the MFCCs into the features DataFrame
660
- mfcc_colnames = [f'{sensor}_mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
754
+ mfcc_colnames = [
755
+ f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
756
+ ]
661
757
  for i, colname in enumerate(mfcc_colnames):
662
758
  feature_dict[colname] = mfccs[:, i]
663
759
 
664
- return pd.DataFrame(feature_dict)
760
+ return pd.DataFrame(feature_dict)