paradigma 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,30 +1,43 @@
1
1
  import logging
2
+ from typing import List, Tuple
3
+
2
4
  import numpy as np
3
5
  import pandas as pd
4
6
  from scipy.signal import periodogram
5
- from typing import List, Tuple
6
7
 
7
8
  from paradigma.classification import ClassifierPackage
8
- from paradigma.constants import DataColumns
9
9
  from paradigma.config import GaitConfig
10
- from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
11
- extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
12
- compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
13
- compute_total_power
14
- from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, WindowedDataExtractor
10
+ from paradigma.constants import DataColumns
11
+ from paradigma.feature_extraction import (
12
+ compute_angle,
13
+ compute_dominant_frequency,
14
+ compute_mfccs,
15
+ compute_peak_angular_velocity,
16
+ compute_power_in_bandwidth,
17
+ compute_range_of_motion,
18
+ compute_statistics,
19
+ compute_std_euclidean_norm,
20
+ compute_total_power,
21
+ extract_angle_extremes,
22
+ pca_transform_gyroscope,
23
+ remove_moving_average_angle,
24
+ )
25
+ from paradigma.segmenting import (
26
+ WindowedDataExtractor,
27
+ create_segments,
28
+ discard_segments,
29
+ tabulate_windows,
30
+ )
15
31
  from paradigma.util import aggregate_parameter
16
32
 
17
-
18
33
  logger = logging.getLogger(__name__)
19
34
 
20
35
  # Only configure basic logging if no handlers exist
21
36
  if not logger.hasHandlers():
22
- logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
37
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
38
+
23
39
 
24
- def extract_gait_features(
25
- df: pd.DataFrame,
26
- config: GaitConfig
27
- ) -> pd.DataFrame:
40
+ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
28
41
  """
29
42
  Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
30
43
 
@@ -51,7 +64,7 @@ def extract_gait_features(
51
64
  A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
52
65
  columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
53
66
  accelerometer data.
54
-
67
+
55
68
  Notes
56
69
  -----
57
70
  - This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
@@ -64,34 +77,36 @@ def extract_gait_features(
64
77
  If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
65
78
  """
66
79
  # Group sequences of timestamps into windows
67
- windowed_cols = [DataColumns.TIME] + config.accelerometer_cols + config.gravity_cols
80
+ windowed_colnames = (
81
+ [config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
82
+ )
68
83
  windowed_data = tabulate_windows(
69
- df=df,
70
- columns=windowed_cols,
84
+ df=df,
85
+ columns=windowed_colnames,
71
86
  window_length_s=config.window_length_s,
72
87
  window_step_length_s=config.window_step_length_s,
73
- fs=config.sampling_frequency
88
+ fs=config.sampling_frequency,
74
89
  )
75
90
 
76
- extractor = WindowedDataExtractor(windowed_cols)
91
+ extractor = WindowedDataExtractor(windowed_colnames)
77
92
 
78
- idx_time = extractor.get_index(DataColumns.TIME)
79
- idx_acc = extractor.get_slice(config.accelerometer_cols)
80
- idx_grav = extractor.get_slice(config.gravity_cols)
93
+ idx_time = extractor.get_index(config.time_colname)
94
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
95
+ idx_grav = extractor.get_slice(config.gravity_colnames)
81
96
 
82
97
  # Extract data
83
98
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
84
99
  windowed_acc = windowed_data[:, :, idx_acc]
85
100
  windowed_grav = windowed_data[:, :, idx_grav]
86
101
 
87
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
88
-
102
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
103
+
89
104
  # Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
90
105
  df_temporal_features = extract_temporal_domain_features(
91
- config=config,
106
+ config=config,
92
107
  windowed_acc=windowed_acc,
93
108
  windowed_grav=windowed_grav,
94
- grav_stats=['mean', 'std']
109
+ grav_stats=["mean", "std"],
95
110
  )
96
111
 
97
112
  # Combine temporal features with the start time
@@ -99,9 +114,7 @@ def extract_gait_features(
99
114
 
100
115
  # Transform the accelerometer data to the spectral domain using FFT and extract spectral features
101
116
  df_spectral_features = extract_spectral_domain_features(
102
- config=config,
103
- sensor='accelerometer',
104
- windowed_data=windowed_acc
117
+ config=config, sensor="accelerometer", windowed_data=windowed_acc
105
118
  )
106
119
 
107
120
  # Combine the spectral features with the previously computed temporal features
@@ -111,10 +124,8 @@ def extract_gait_features(
111
124
 
112
125
 
113
126
  def detect_gait(
114
- df: pd.DataFrame,
115
- clf_package: ClassifierPackage,
116
- parallel: bool=False
117
- ) -> pd.Series:
127
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
128
+ ) -> pd.Series:
118
129
  """
119
130
  Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
120
131
 
@@ -128,7 +139,7 @@ def detect_gait(
128
139
  Parameters
129
140
  ----------
130
141
  df : pd.DataFrame
131
- The input DataFrame containing features extracted from gait data. It must include the necessary columns
142
+ The input DataFrame containing features extracted from gait data. It must include the necessary columns
132
143
  as specified in the classifier's feature names.
133
144
 
134
145
  clf_package : ClassifierPackage
@@ -144,7 +155,7 @@ def detect_gait(
144
155
  """
145
156
  # Set classifier
146
157
  clf = clf_package.classifier
147
- if not parallel and hasattr(clf, 'n_jobs'):
158
+ if not parallel and hasattr(clf, "n_jobs"):
148
159
  clf.n_jobs = 1
149
160
 
150
161
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -164,13 +175,13 @@ def detect_gait(
164
175
 
165
176
 
166
177
  def extract_arm_activity_features(
167
- df: pd.DataFrame,
168
- config: GaitConfig,
169
- ) -> pd.DataFrame:
178
+ df: pd.DataFrame,
179
+ config: GaitConfig,
180
+ ) -> pd.DataFrame:
170
181
  """
171
182
  Extract features related to arm activity from a time-series DataFrame.
172
183
 
173
- This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
184
+ This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
174
185
  and extracts features related to arm activity by performing the following steps:
175
186
  1. Computes the angle and velocity from gyroscope data.
176
187
  2. Filters the data to include only predicted gait segments.
@@ -190,13 +201,12 @@ def extract_arm_activity_features(
190
201
  Returns
191
202
  -------
192
203
  pd.DataFrame
193
- A DataFrame containing the extracted arm activity features, including angle, velocity,
204
+ A DataFrame containing the extracted arm activity features, including angle, velocity,
194
205
  temporal, and spectral features.
195
206
  """
196
207
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
197
208
  df[DataColumns.SEGMENT_NR] = create_segments(
198
- time_array=df[DataColumns.TIME],
199
- max_segment_gap_s=config.max_segment_gap_s
209
+ time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
200
210
  )
201
211
 
202
212
  # Remove segments that do not meet predetermined criteria
@@ -205,27 +215,27 @@ def extract_arm_activity_features(
205
215
  segment_nr_colname=DataColumns.SEGMENT_NR,
206
216
  min_segment_length_s=config.min_segment_length_s,
207
217
  fs=config.sampling_frequency,
208
- format='timestamps'
218
+ format="timestamps",
209
219
  )
210
220
 
211
221
  # Create windows of fixed length and step size from the time series per segment
212
222
  windowed_data = []
213
223
  df_grouped = df.groupby(DataColumns.SEGMENT_NR)
214
- windowed_cols = (
215
- [DataColumns.TIME] +
216
- config.accelerometer_cols +
217
- config.gravity_cols +
218
- config.gyroscope_cols
224
+ windowed_colnames = (
225
+ [config.time_colname]
226
+ + config.accelerometer_colnames
227
+ + config.gravity_colnames
228
+ + config.gyroscope_colnames
219
229
  )
220
230
 
221
231
  # Collect windows from all segments in a list for faster concatenation
222
232
  for _, group in df_grouped:
223
233
  windows = tabulate_windows(
224
- df=group,
225
- columns=windowed_cols,
234
+ df=group,
235
+ columns=windowed_colnames,
226
236
  window_length_s=config.window_length_s,
227
237
  window_step_length_s=config.window_step_length_s,
228
- fs=config.sampling_frequency
238
+ fs=config.sampling_frequency,
229
239
  )
230
240
  if len(windows) > 0: # Skip if no windows are created
231
241
  windowed_data.append(windows)
@@ -239,12 +249,12 @@ def extract_arm_activity_features(
239
249
  windowed_data = np.concatenate(windowed_data, axis=0)
240
250
 
241
251
  # Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
242
- extractor = WindowedDataExtractor(windowed_cols)
252
+ extractor = WindowedDataExtractor(windowed_colnames)
243
253
 
244
- idx_time = extractor.get_index(DataColumns.TIME)
245
- idx_acc = extractor.get_slice(config.accelerometer_cols)
246
- idx_grav = extractor.get_slice(config.gravity_cols)
247
- idx_gyro = extractor.get_slice(config.gyroscope_cols)
254
+ idx_time = extractor.get_index(config.time_colname)
255
+ idx_acc = extractor.get_slice(config.accelerometer_colnames)
256
+ idx_grav = extractor.get_slice(config.gravity_colnames)
257
+ idx_gyro = extractor.get_slice(config.gyroscope_colnames)
248
258
 
249
259
  # Extract data
250
260
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
@@ -253,23 +263,23 @@ def extract_arm_activity_features(
253
263
  windowed_gyro = windowed_data[:, :, idx_gyro]
254
264
 
255
265
  # Initialize DataFrame for features
256
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
266
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
257
267
 
258
268
  # Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
259
269
  df_temporal_features = extract_temporal_domain_features(
260
- config=config,
261
- windowed_acc=windowed_acc,
262
- windowed_grav=windowed_grav,
263
- grav_stats=['mean', 'std']
270
+ config=config,
271
+ windowed_acc=windowed_acc,
272
+ windowed_grav=windowed_grav,
273
+ grav_stats=["mean", "std"],
264
274
  )
265
275
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
266
276
 
267
277
  # Extract spectral domain features for accelerometer and gyroscope signals
268
- for sensor_name, windowed_sensor in zip(['accelerometer', 'gyroscope'], [windowed_acc, windowed_gyro]):
278
+ for sensor_name, windowed_sensor in zip(
279
+ ["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
280
+ ):
269
281
  df_spectral_features = extract_spectral_domain_features(
270
- config=config,
271
- sensor=sensor_name,
272
- windowed_data=windowed_sensor
282
+ config=config, sensor=sensor_name, windowed_data=windowed_sensor
273
283
  )
274
284
  df_features = pd.concat([df_features, df_spectral_features], axis=1)
275
285
 
@@ -277,10 +287,8 @@ def extract_arm_activity_features(
277
287
 
278
288
 
279
289
  def filter_gait(
280
- df: pd.DataFrame,
281
- clf_package: ClassifierPackage,
282
- parallel: bool=False
283
- ) -> pd.Series:
290
+ df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
291
+ ) -> pd.Series:
284
292
  """
285
293
  Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
286
294
 
@@ -300,10 +308,10 @@ def filter_gait(
300
308
  """
301
309
  if df.shape[0] == 0:
302
310
  raise ValueError("No data found in the input DataFrame.")
303
-
311
+
304
312
  # Set classifier
305
313
  clf = clf_package.classifier
306
- if not parallel and hasattr(clf, 'n_jobs'):
314
+ if not parallel and hasattr(clf, "n_jobs"):
307
315
  clf.n_jobs = 1
308
316
 
309
317
  feature_names_scaling = clf_package.scaler.feature_names_in_
@@ -323,12 +331,12 @@ def filter_gait(
323
331
 
324
332
 
325
333
  def quantify_arm_swing(
326
- df: pd.DataFrame,
327
- fs: int,
328
- filtered: bool = False,
329
- max_segment_gap_s: float = 1.5,
330
- min_segment_length_s: float = 1.5
331
- ) -> Tuple[dict[str, pd.DataFrame], dict]:
334
+ df: pd.DataFrame,
335
+ fs: int,
336
+ filtered: bool = False,
337
+ max_segment_gap_s: float = 1.5,
338
+ min_segment_length_s: float = 1.5,
339
+ ) -> Tuple[dict[str, pd.DataFrame], dict]:
332
340
  """
333
341
  Quantify arm swing parameters for segments of motion based on gyroscope data.
334
342
 
@@ -346,72 +354,75 @@ def quantify_arm_swing(
346
354
 
347
355
  max_segment_gap_s : float, optional, default=1.5
348
356
  The maximum gap in seconds between consecutive timestamps to group them into segments.
349
-
357
+
350
358
  min_segment_length_s : float, optional, default=1.5
351
359
  The minimum length in seconds for a segment to be considered valid.
352
360
 
353
361
  Returns
354
362
  -------
355
363
  Tuple[pd.DataFrame, dict]
356
- A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
364
+ A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
357
365
  metadata for each segment.
358
366
  """
359
367
  # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
360
368
  # Segments are made based on predicted gait
361
- df['unfiltered_segment_nr'] = create_segments(
362
- time_array=df[DataColumns.TIME],
363
- max_segment_gap_s=max_segment_gap_s
369
+ df["unfiltered_segment_nr"] = create_segments(
370
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
364
371
  )
365
372
 
366
373
  # Remove segments that do not meet predetermined criteria
367
374
  df = discard_segments(
368
375
  df=df,
369
- segment_nr_colname='unfiltered_segment_nr',
376
+ segment_nr_colname="unfiltered_segment_nr",
370
377
  min_segment_length_s=min_segment_length_s,
371
378
  fs=fs,
372
- format='timestamps'
379
+ format="timestamps",
373
380
  )
374
381
 
375
382
  if df.empty:
376
- raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
377
-
383
+ raise ValueError(
384
+ "No segments found in the input data after discarding segments of invalid shape."
385
+ )
386
+
378
387
  # Create dictionary of gait segment number and duration
379
- gait_segment_duration_dict = {segment_nr: len(group[DataColumns.TIME]) / fs for segment_nr, group in df.groupby('unfiltered_segment_nr', sort=False)}
380
-
388
+ gait_segment_duration_dict = {
389
+ segment_nr: len(group[DataColumns.TIME]) / fs
390
+ for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
391
+ }
392
+
381
393
  # If no arm swing data is remaining, return an empty dictionary
382
- if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
394
+ if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
383
395
  raise ValueError("No gait without other arm activities to quantify.")
384
396
  elif filtered:
385
397
  # Filter the DataFrame to only include predicted no other arm activity (1)
386
- df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
398
+ df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
399
+ drop=True
400
+ )
387
401
 
388
402
  # Group consecutive timestamps into segments of filtered gait
389
- df['filtered_segment_nr'] = create_segments(
390
- time_array=df[DataColumns.TIME],
391
- max_segment_gap_s=max_segment_gap_s
403
+ df["filtered_segment_nr"] = create_segments(
404
+ time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
392
405
  )
393
406
 
394
407
  # Remove segments that do not meet predetermined criteria
395
408
  df = discard_segments(
396
409
  df=df,
397
- segment_nr_colname='filtered_segment_nr',
410
+ segment_nr_colname="filtered_segment_nr",
398
411
  min_segment_length_s=min_segment_length_s,
399
412
  fs=fs,
400
413
  )
401
414
 
402
415
  if df.empty:
403
- raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
404
-
405
- grouping_colname = 'filtered_segment_nr' if filtered else 'unfiltered_segment_nr'
416
+ raise ValueError(
417
+ "No filtered gait segments found in the input data after discarding segments of invalid shape."
418
+ )
419
+
420
+ grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
406
421
 
407
422
  arm_swing_quantified = []
408
423
  segment_meta = {
409
- 'aggregated': {
410
- 'all': {
411
- 'duration_s': len(df[DataColumns.TIME]) / fs
412
- },
413
- },
414
- 'per_segment': {}
424
+ "all": {"duration_s": len(df[DataColumns.TIME]) / fs},
425
+ "per_segment": {},
415
426
  }
416
427
 
417
428
  # PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
@@ -425,7 +436,9 @@ def quantify_arm_swing(
425
436
  # Group and process segments
426
437
  for segment_nr, group in df.groupby(grouping_colname, sort=False):
427
438
  if filtered:
428
- gait_segment_nr = group['unfiltered_segment_nr'].iloc[0] # Each filtered segment is contained within an unfiltered segment
439
+ gait_segment_nr = group["unfiltered_segment_nr"].iloc[
440
+ 0
441
+ ] # Each filtered segment is contained within an unfiltered segment
429
442
  else:
430
443
  gait_segment_nr = segment_nr
431
444
 
@@ -434,9 +447,12 @@ def quantify_arm_swing(
434
447
  except KeyError:
435
448
  logger.warning(
436
449
  "Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
437
- gait_segment_nr, filtered
450
+ gait_segment_nr,
451
+ filtered,
452
+ )
453
+ logger.debug(
454
+ "Available segments: %s", list(gait_segment_duration_dict.keys())
438
455
  )
439
- logger.debug("Available segments: %s", list(gait_segment_duration_dict.keys()))
440
456
  continue
441
457
 
442
458
  time_array = group[DataColumns.TIME].to_numpy()
@@ -454,20 +470,22 @@ def quantify_arm_swing(
454
470
  fs=fs,
455
471
  )
456
472
 
457
- segment_meta['per_segment'][segment_nr] = {
458
- 'start_time_s': time_array.min(),
459
- 'end_time_s': time_array.max(),
460
- 'duration_unfiltered_segment_s': gait_segment_duration_s,
473
+ segment_meta["per_segment"][segment_nr] = {
474
+ "start_time_s": time_array.min(),
475
+ "end_time_s": time_array.max(),
476
+ "duration_unfiltered_segment_s": gait_segment_duration_s,
461
477
  }
462
478
 
463
479
  if filtered:
464
- segment_meta['per_segment'][segment_nr]['duration_filtered_segment_s'] = len(time_array) / fs
480
+ segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
481
+ len(time_array) / fs
482
+ )
465
483
 
466
- if angle_array.size > 0:
484
+ if angle_array.size > 0:
467
485
  angle_extrema_indices, _, _ = extract_angle_extremes(
468
486
  angle_array=angle_array,
469
487
  sampling_frequency=fs,
470
- max_frequency_activity=1.75
488
+ max_frequency_activity=1.75,
471
489
  )
472
490
 
473
491
  if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
@@ -478,36 +496,47 @@ def quantify_arm_swing(
478
496
  )
479
497
  except Exception as e:
480
498
  # Handle the error, set RoM to NaN, and log the error
481
- print(f"Error computing range of motion for segment {segment_nr}: {e}")
499
+ print(
500
+ f"Error computing range of motion for segment {segment_nr}: {e}"
501
+ )
482
502
  rom = np.array([np.nan])
483
503
 
484
504
  try:
485
505
  pav = compute_peak_angular_velocity(
486
506
  velocity_array=velocity_array,
487
- angle_extrema_indices=angle_extrema_indices
507
+ angle_extrema_indices=angle_extrema_indices,
488
508
  )
489
509
  except Exception as e:
490
510
  # Handle the error, set pav to NaN, and log the error
491
- print(f"Error computing peak angular velocity for segment {segment_nr}: {e}")
511
+ print(
512
+ f"Error computing peak angular velocity for segment {segment_nr}: {e}"
513
+ )
492
514
  pav = np.array([np.nan])
493
515
 
494
- df_params_segment = pd.DataFrame({
495
- DataColumns.SEGMENT_NR: segment_nr,
496
- DataColumns.RANGE_OF_MOTION: rom,
497
- DataColumns.PEAK_VELOCITY: pav
498
- })
516
+ df_params_segment = pd.DataFrame(
517
+ {
518
+ DataColumns.SEGMENT_NR: segment_nr,
519
+ DataColumns.RANGE_OF_MOTION: rom,
520
+ DataColumns.PEAK_VELOCITY: pav,
521
+ }
522
+ )
499
523
 
500
524
  arm_swing_quantified.append(df_params_segment)
501
525
 
502
526
  arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
503
-
527
+
504
528
  return arm_swing_quantified, segment_meta
505
529
 
506
530
 
507
- def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta: dict, segment_cats: List[tuple], aggregates: List[str] = ['median']) -> dict:
531
+ def aggregate_arm_swing_params(
532
+ df_arm_swing_params: pd.DataFrame,
533
+ segment_meta: dict,
534
+ segment_cats: List[tuple],
535
+ aggregates: List[str] = ["median"],
536
+ ) -> dict:
508
537
  """
509
538
  Aggregate the quantification results for arm swing parameters.
510
-
539
+
511
540
  Parameters
512
541
  ----------
513
542
  df_arm_swing_params : pd.DataFrame
@@ -518,10 +547,9 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
518
547
 
519
548
  segment_cats : List[tuple]
520
549
  A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
521
-
522
550
  aggregates : List[str], optional
523
551
  A list of aggregation methods to apply to the quantification results.
524
-
552
+
525
553
  Returns
526
554
  -------
527
555
  dict
@@ -531,70 +559,91 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
531
559
 
532
560
  aggregated_results = {}
533
561
  for segment_cat_range in segment_cats:
534
- segment_cat_str = f'{segment_cat_range[0]}_{segment_cat_range[1]}'
562
+ segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
535
563
  cat_segments = [
536
- x for x in segment_meta.keys()
537
- if segment_meta[x]['duration_unfiltered_segment_s'] >= segment_cat_range[0]
538
- and segment_meta[x]['duration_unfiltered_segment_s'] < segment_cat_range[1]
564
+ x
565
+ for x in segment_meta.keys()
566
+ if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
567
+ and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
539
568
  ]
540
569
 
541
- if len(cat_segments) > 0:
570
+ if len(cat_segments) > 0:
542
571
  # For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
543
572
  aggregated_results[segment_cat_str] = {
544
- 'duration_s': sum(
573
+ "duration_s": sum(
545
574
  [
546
- segment_meta[x]['duration_filtered_segment_s']
547
- if 'duration_filtered_segment_s' in segment_meta[x]
548
- else segment_meta[x]['duration_unfiltered_segment_s']
575
+ (
576
+ segment_meta[x]["duration_filtered_segment_s"]
577
+ if "duration_filtered_segment_s" in segment_meta[x]
578
+ else segment_meta[x]["duration_unfiltered_segment_s"]
579
+ )
549
580
  for x in cat_segments
550
581
  ]
551
- )}
582
+ )
583
+ }
584
+
585
+ df_arm_swing_params_cat = df_arm_swing_params.loc[
586
+ df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)
587
+ ]
552
588
 
553
- df_arm_swing_params_cat = df_arm_swing_params.loc[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
554
-
555
589
  # Aggregate across all segments
556
- aggregates_per_segment = ['median', 'mean']
590
+ aggregates_per_segment = ["median", "mean"]
557
591
 
558
592
  for arm_swing_parameter in arm_swing_parameters:
559
593
  for aggregate in aggregates:
560
- if aggregate in ['std', 'cov']:
594
+ if aggregate in ["std", "cov"]:
561
595
  per_segment_agg = []
562
596
  # If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
563
- segment_groups = dict(tuple(df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)))
597
+ segment_groups = dict(
598
+ tuple(
599
+ df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)
600
+ )
601
+ )
564
602
  for segment_nr in cat_segments:
565
603
  segment_df = segment_groups.get(segment_nr)
566
604
  if segment_df is not None:
567
- per_segment_agg.append(aggregate_parameter(segment_df[arm_swing_parameter], aggregate))
605
+ per_segment_agg.append(
606
+ aggregate_parameter(
607
+ segment_df[arm_swing_parameter], aggregate
608
+ )
609
+ )
568
610
 
569
611
  # Drop nans
570
612
  per_segment_agg = np.array(per_segment_agg)
571
613
  per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
572
614
 
573
-
574
615
  for segment_level_aggregate in aggregates_per_segment:
575
- aggregated_results[segment_cat_str][f'{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(per_segment_agg, segment_level_aggregate)
616
+ aggregated_results[segment_cat_str][
617
+ f"{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}"
618
+ ] = aggregate_parameter(
619
+ per_segment_agg, segment_level_aggregate
620
+ )
576
621
  else:
577
- aggregated_results[segment_cat_str][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
622
+ aggregated_results[segment_cat_str][
623
+ f"{aggregate}_{arm_swing_parameter}"
624
+ ] = aggregate_parameter(
625
+ df_arm_swing_params_cat[arm_swing_parameter], aggregate
626
+ )
578
627
 
579
628
  else:
580
629
  # If no segments are found for this category, initialize with NaN
581
630
  aggregated_results[segment_cat_str] = {
582
- 'duration_s': 0,
631
+ "duration_s": 0,
583
632
  }
584
633
 
585
634
  return aggregated_results
586
635
 
587
636
 
588
637
  def extract_temporal_domain_features(
589
- config,
590
- windowed_acc: np.ndarray,
591
- windowed_grav: np.ndarray,
592
- grav_stats: List[str] = ['mean']
593
- ) -> pd.DataFrame:
638
+ config,
639
+ windowed_acc: np.ndarray,
640
+ windowed_grav: np.ndarray,
641
+ grav_stats: List[str] = ["mean"],
642
+ ) -> pd.DataFrame:
594
643
  """
595
644
  Compute temporal domain features for the accelerometer signal.
596
645
 
597
- This function calculates various statistical features for the gravity signal
646
+ This function calculates various statistical features for the gravity signal
598
647
  and computes the standard deviation of the accelerometer's Euclidean norm.
599
648
 
600
649
  Parameters
@@ -602,10 +651,10 @@ def extract_temporal_domain_features(
602
651
  config : object
603
652
  Configuration object containing the accelerometer and gravity column names.
604
653
  windowed_acc : numpy.ndarray
605
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
654
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
606
655
  the number of accelerometer values per window.
607
656
  windowed_grav : numpy.ndarray
608
- A 2D numpy array of shape (N, M) where N is the number of windows and M is
657
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
609
658
  the number of gravity signal values per window.
610
659
  grav_stats : list of str, optional
611
660
  A list of statistics to compute for the gravity signal (default is ['mean']).
@@ -613,32 +662,34 @@ def extract_temporal_domain_features(
613
662
  Returns
614
663
  -------
615
664
  pd.DataFrame
616
- A DataFrame containing the computed features, with each row corresponding
665
+ A DataFrame containing the computed features, with each row corresponding
617
666
  to a window and each column representing a specific feature.
618
667
  """
619
668
  # Compute gravity statistics (e.g., mean, std, etc.)
620
669
  feature_dict = {}
621
670
  for stat in grav_stats:
622
671
  stats_result = compute_statistics(data=windowed_grav, statistic=stat)
623
- for i, col in enumerate(config.gravity_cols):
624
- feature_dict[f'{col}_{stat}'] = stats_result[:, i]
672
+ for i, col in enumerate(config.gravity_colnames):
673
+ feature_dict[f"{col}_{stat}"] = stats_result[:, i]
625
674
 
626
675
  # Compute standard deviation of the Euclidean norm of the accelerometer signal
627
- feature_dict['accelerometer_std_norm'] = compute_std_euclidean_norm(data=windowed_acc)
676
+ feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
677
+ data=windowed_acc
678
+ )
628
679
 
629
680
  return pd.DataFrame(feature_dict)
630
681
 
631
682
 
632
683
  def extract_spectral_domain_features(
633
- windowed_data: np.ndarray,
634
- config,
635
- sensor: str,
636
- ) -> pd.DataFrame:
684
+ windowed_data: np.ndarray,
685
+ config,
686
+ sensor: str,
687
+ ) -> pd.DataFrame:
637
688
  """
638
689
  Compute spectral domain features for a sensor's data.
639
690
 
640
- This function computes the periodogram, extracts power in specific frequency bands,
641
- calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
691
+ This function computes the periodogram, extracts power in specific frequency bands,
692
+ calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
642
693
  for a given sensor's windowed data.
643
694
 
644
695
  Parameters
@@ -647,16 +698,16 @@ def extract_spectral_domain_features(
647
698
  A 2D numpy array where each row corresponds to a window of sensor data.
648
699
 
649
700
  config : object
650
- Configuration object containing settings such as sampling frequency, window type,
701
+ Configuration object containing settings such as sampling frequency, window type,
651
702
  frequency bands, and MFCC parameters.
652
703
 
653
704
  sensor : str
654
705
  The name of the sensor (e.g., 'accelerometer', 'gyroscope').
655
-
706
+
656
707
  Returns
657
708
  -------
658
709
  pd.DataFrame
659
- A DataFrame containing the computed spectral features, with each row corresponding
710
+ A DataFrame containing the computed spectral features, with each row corresponding
660
711
  to a window and each column representing a specific feature.
661
712
  """
662
713
  # Initialize a dictionary to hold the results
@@ -664,49 +715,46 @@ def extract_spectral_domain_features(
664
715
 
665
716
  # Compute periodogram (power spectral density)
666
717
  freqs, psd = periodogram(
667
- x=windowed_data,
668
- fs=config.sampling_frequency,
669
- window=config.window_type,
670
- axis=1
718
+ x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
671
719
  )
672
720
 
673
721
  # Compute power in specified frequency bands
674
722
  for band_name, band_freqs in config.d_frequency_bandwidths.items():
675
723
  band_powers = compute_power_in_bandwidth(
676
724
  freqs=freqs,
677
- psd=psd,
725
+ psd=psd,
678
726
  fmin=band_freqs[0],
679
727
  fmax=band_freqs[1],
680
- include_max=False
728
+ include_max=False,
681
729
  )
682
730
  for i, col in enumerate(config.axes):
683
- feature_dict[f'{sensor}_{col}_{band_name}'] = band_powers[:, i]
731
+ feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
684
732
 
685
733
  # Compute dominant frequency for each axis
686
734
  dominant_frequencies = compute_dominant_frequency(
687
- freqs=freqs,
688
- psd=psd,
689
- fmin=config.spectrum_low_frequency,
690
- fmax=config.spectrum_high_frequency
735
+ freqs=freqs,
736
+ psd=psd,
737
+ fmin=config.spectrum_low_frequency,
738
+ fmax=config.spectrum_high_frequency,
691
739
  )
692
740
 
693
741
  # Add dominant frequency features to the feature_dict
694
742
  for axis, freq in zip(config.axes, dominant_frequencies.T):
695
- feature_dict[f'{sensor}_{axis}_dominant_frequency'] = freq
743
+ feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
696
744
 
697
745
  # Compute total power in the PSD
698
746
  total_power_psd = compute_total_power(psd)
699
747
 
700
748
  # Compute MFCCs
701
749
  mfccs = compute_mfccs(
702
- total_power_array=total_power_psd,
703
- config=config,
704
- multiplication_factor=4
750
+ total_power_array=total_power_psd, config=config, multiplication_factor=4
705
751
  )
706
752
 
707
753
  # Combine the MFCCs into the features DataFrame
708
- mfcc_colnames = [f'{sensor}_mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
754
+ mfcc_colnames = [
755
+ f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
756
+ ]
709
757
  for i, colname in enumerate(mfcc_colnames):
710
758
  feature_dict[colname] = mfccs[:, i]
711
759
 
712
- return pd.DataFrame(feature_dict)
760
+ return pd.DataFrame(feature_dict)