paradigma 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. paradigma/assets/gait_detection_clf_package.pkl +0 -0
  2. paradigma/assets/gait_filtering_clf_package.pkl +0 -0
  3. paradigma/assets/ppg_quality_clf_package.pkl +0 -0
  4. paradigma/assets/tremor_detection_clf_package.pkl +0 -0
  5. paradigma/classification.py +115 -0
  6. paradigma/config.py +314 -0
  7. paradigma/constants.py +48 -7
  8. paradigma/feature_extraction.py +811 -547
  9. paradigma/pipelines/__init__.py +0 -0
  10. paradigma/pipelines/gait_pipeline.py +727 -0
  11. paradigma/pipelines/heart_rate_pipeline.py +426 -0
  12. paradigma/pipelines/heart_rate_utils.py +780 -0
  13. paradigma/pipelines/tremor_pipeline.py +299 -0
  14. paradigma/preprocessing.py +363 -0
  15. paradigma/segmenting.py +396 -0
  16. paradigma/testing.py +416 -0
  17. paradigma/util.py +393 -16
  18. {paradigma-0.3.1.dist-info → paradigma-0.4.0.dist-info}/METADATA +58 -14
  19. paradigma-0.4.0.dist-info/RECORD +22 -0
  20. {paradigma-0.3.1.dist-info → paradigma-0.4.0.dist-info}/WHEEL +1 -1
  21. paradigma/gait_analysis.py +0 -415
  22. paradigma/gait_analysis_config.py +0 -266
  23. paradigma/heart_rate_analysis.py +0 -127
  24. paradigma/heart_rate_analysis_config.py +0 -9
  25. paradigma/heart_rate_util.py +0 -173
  26. paradigma/imu_preprocessing.py +0 -232
  27. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  28. paradigma/ppg/classifier/LR_model.mat +0 -0
  29. paradigma/ppg/feat_extraction/acc_feature.m +0 -20
  30. paradigma/ppg/feat_extraction/peakdet.m +0 -64
  31. paradigma/ppg/feat_extraction/ppg_features.m +0 -53
  32. paradigma/ppg/glob_functions/extract_hr_segments.m +0 -37
  33. paradigma/ppg/glob_functions/extract_overlapping_segments.m +0 -23
  34. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +0 -41
  35. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +0 -74
  36. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +0 -25
  37. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +0 -699
  38. paradigma/ppg/glob_functions/jsonlab/README.txt +0 -394
  39. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +0 -368
  40. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +0 -180
  41. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +0 -180
  42. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +0 -23
  43. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +0 -22
  44. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +0 -11
  45. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +0 -34
  46. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +0 -662
  47. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +0 -27
  48. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +0 -144
  49. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +0 -21
  50. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +0 -180
  51. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +0 -180
  52. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +0 -23
  53. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +0 -22
  54. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +0 -11
  55. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +0 -34
  56. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +0 -662
  57. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +0 -27
  58. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +0 -144
  59. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +0 -21
  60. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +0 -32
  61. paradigma/ppg/glob_functions/jsonlab/loadjson.m +0 -566
  62. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +0 -528
  63. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +0 -33
  64. paradigma/ppg/glob_functions/jsonlab/savejson.m +0 -475
  65. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +0 -504
  66. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +0 -40
  67. paradigma/ppg/glob_functions/sample_prob_final.m +0 -49
  68. paradigma/ppg/glob_functions/synchronization.m +0 -76
  69. paradigma/ppg/glob_functions/tsdf_scan_meta.m +0 -22
  70. paradigma/ppg/hr_functions/Long_TFD_JOT.m +0 -37
  71. paradigma/ppg/hr_functions/PPG_TFD_HR.m +0 -59
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +0 -4
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +0 -23
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +0 -27
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +0 -251
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +0 -142
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +0 -314
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +0 -123
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +0 -154
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +0 -194
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +0 -200
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +0 -229
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +0 -241
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +0 -157
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +0 -190
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +0 -196
  88. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +0 -199
  89. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +0 -144
  90. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +0 -13
  91. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  92. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  93. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +0 -79
  94. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +0 -9
  95. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +0 -26
  96. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +0 -25
  97. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +0 -40
  98. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +0 -34
  99. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +0 -29
  100. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +0 -76
  101. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +0 -176
  102. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +0 -11
  103. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +0 -97
  104. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +0 -149
  105. paradigma/ppg/preprocessing/preprocessing_imu.m +0 -15
  106. paradigma/ppg/preprocessing/preprocessing_ppg.m +0 -13
  107. paradigma/ppg_preprocessing.py +0 -313
  108. paradigma/preprocessing_config.py +0 -69
  109. paradigma/quantification.py +0 -58
  110. paradigma/tremor/TremorFeaturesAndClassification.m +0 -345
  111. paradigma/tremor/feat_extraction/DerivativesExtract.m +0 -22
  112. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +0 -72
  113. paradigma/tremor/feat_extraction/MFCCExtract.m +0 -100
  114. paradigma/tremor/feat_extraction/PSDBandPower.m +0 -52
  115. paradigma/tremor/feat_extraction/PSDEst.m +0 -63
  116. paradigma/tremor/feat_extraction/PSDExtrAxis.m +0 -88
  117. paradigma/tremor/feat_extraction/PSDExtrOpt.m +0 -95
  118. paradigma/tremor/preprocessing/InterpData.m +0 -32
  119. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +0 -295
  120. paradigma/windowing.py +0 -219
  121. paradigma-0.3.1.dist-info/RECORD +0 -108
  122. {paradigma-0.3.1.dist-info → paradigma-0.4.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,727 @@
1
+ import numpy as np
2
+ import os
3
+ import pandas as pd
4
+ from pathlib import Path
5
+ from scipy.signal import periodogram
6
+ from typing import List, Tuple
7
+ import tsdf
8
+
9
+ from paradigma.classification import ClassifierPackage
10
+ from paradigma.constants import DataColumns, TimeUnit
11
+ from paradigma.config import GaitConfig
12
+ from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
13
+ extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
14
+ compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
15
+ compute_total_power
16
+ from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
17
+ from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps, read_metadata, write_df_data, get_end_iso8601
18
+
19
+
20
+ def extract_gait_features(
21
+ df: pd.DataFrame,
22
+ config: GaitConfig
23
+ ) -> pd.DataFrame:
24
+ """
25
+ Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
26
+
27
+ This function performs the following steps:
28
+ 1. Groups sequences of timestamps into windows, using accelerometer and gravity data.
29
+ 2. Computes temporal domain features such as mean and standard deviation for accelerometer and gravity data.
30
+ 3. Transforms the signals from the temporal domain to the spectral domain using the Fast Fourier Transform (FFT).
31
+ 4. Computes spectral domain features for the accelerometer data.
32
+ 5. Combines both temporal and spectral features into a final DataFrame.
33
+
34
+ Parameters
35
+ ----------
36
+ df : pd.DataFrame
37
+ The input DataFrame containing gait data, which includes time, accelerometer, and gravity sensor data. The data should be
38
+ structured with the necessary columns as specified in the `config`.
39
+
40
+ onfig : GaitConfig
41
+ Configuration object containing parameters for feature extraction, including column names for time, accelerometer data, and
42
+ gravity data, as well as settings for windowing, and feature computation.
43
+
44
+ Returns
45
+ -------
46
+ pd.DataFrame
47
+ A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
48
+ columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
49
+ accelerometer data.
50
+
51
+ Notes
52
+ -----
53
+ - This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
54
+ - The temporal features are extracted from the accelerometer and gravity data, and include statistics like mean and standard deviation.
55
+ - The input DataFrame must include columns as specified in the `config` object for proper feature extraction.
56
+
57
+ Raises
58
+ ------
59
+ ValueError
60
+ If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
61
+ """
62
+ # Group sequences of timestamps into windows
63
+ windowed_cols = [DataColumns.TIME] + config.accelerometer_cols + config.gravity_cols
64
+ windowed_data = tabulate_windows(
65
+ df=df,
66
+ columns=windowed_cols,
67
+ window_length_s=config.window_length_s,
68
+ window_step_length_s=config.window_step_length_s,
69
+ fs=config.sampling_frequency
70
+ )
71
+
72
+ extractor = WindowedDataExtractor(windowed_cols)
73
+
74
+ idx_time = extractor.get_index(DataColumns.TIME)
75
+ idx_acc = extractor.get_slice(config.accelerometer_cols)
76
+ idx_grav = extractor.get_slice(config.gravity_cols)
77
+
78
+ # Extract data
79
+ start_time = np.min(windowed_data[:, :, idx_time], axis=1)
80
+ windowed_acc = windowed_data[:, :, idx_acc]
81
+ windowed_grav = windowed_data[:, :, idx_grav]
82
+
83
+ df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
84
+
85
+ # Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
86
+ df_temporal_features = extract_temporal_domain_features(
87
+ config=config,
88
+ windowed_acc=windowed_acc,
89
+ windowed_grav=windowed_grav,
90
+ grav_stats=['mean', 'std']
91
+ )
92
+
93
+ # Combine temporal features with the start time
94
+ df_features = pd.concat([df_features, df_temporal_features], axis=1)
95
+
96
+ # Transform the accelerometer data to the spectral domain using FFT and extract spectral features
97
+ df_spectral_features = extract_spectral_domain_features(
98
+ config=config,
99
+ sensor='accelerometer',
100
+ windowed_data=windowed_acc
101
+ )
102
+
103
+ # Combine the spectral features with the previously computed temporal features
104
+ df_features = pd.concat([df_features, df_spectral_features], axis=1)
105
+
106
+ return df_features
107
+
108
+
109
+ def detect_gait(
110
+ df: pd.DataFrame,
111
+ clf_package: ClassifierPackage,
112
+ parallel: bool=False
113
+ ) -> pd.Series:
114
+ """
115
+ Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
116
+
117
+ This function performs the following steps:
118
+ 1. Loads the pre-trained classifier and scaling parameters from the specified directory.
119
+ 2. Scales the relevant features in the input DataFrame (`df`) using the loaded scaling parameters.
120
+ 3. Predicts the probability of gait activity for each sample in the DataFrame using the classifier.
121
+ 4. Applies a threshold to the predicted probabilities to determine whether gait activity is present.
122
+ 5. Returns predicted probabilities
123
+
124
+ Parameters
125
+ ----------
126
+ df : pd.DataFrame
127
+ The input DataFrame containing features extracted from gait data. It must include the necessary columns
128
+ as specified in the classifier's feature names.
129
+
130
+ clf_package : ClassifierPackage
131
+ The pre-trained classifier package containing the classifier, threshold, and scaler.
132
+
133
+ parallel : bool, optional, default=False
134
+ If `True`, enables parallel processing during classification. If `False`, the classifier uses a single core.
135
+
136
+ Returns
137
+ -------
138
+ pd.Series
139
+ A Series containing the predicted probabilities of gait activity for each sample in the input DataFrame.
140
+ """
141
+ # Set classifier
142
+ clf = clf_package.classifier
143
+ if not parallel and hasattr(clf, 'n_jobs'):
144
+ clf.n_jobs = 1
145
+
146
+ feature_names_scaling = clf_package.scaler.feature_names_in_
147
+ feature_names_predictions = clf.feature_names_in_
148
+
149
+ # Apply scaling to relevant columns
150
+ scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
151
+
152
+ # Replace scaled features in a copy of the relevant features for prediction
153
+ X = df.loc[:, feature_names_predictions].copy()
154
+ X.loc[:, feature_names_scaling] = scaled_features
155
+
156
+ # Make prediction and add the probability of gait activity to the DataFrame
157
+ pred_gait_proba_series = clf_package.predict_proba(X)
158
+
159
+ return pred_gait_proba_series
160
+
161
+
162
+ def extract_arm_activity_features(
163
+ config: GaitConfig,
164
+ df_timestamps: pd.DataFrame,
165
+ df_predictions: pd.DataFrame,
166
+ threshold: float
167
+ ) -> pd.DataFrame:
168
+ """
169
+ Extract features related to arm activity from a time-series DataFrame.
170
+
171
+ This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
172
+ and extracts features related to arm activity by performing the following steps:
173
+ 1. Merges the gait predictions with timestamps by expanding overlapping windows into individual timestamps.
174
+ 2. Computes the angle and velocity from gyroscope data.
175
+ 3. Filters the data to include only predicted gait segments.
176
+ 4. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
177
+ 5. Removes segments that do not meet predefined criteria.
178
+ 6. Creates fixed-length windows from the time series data.
179
+ 7. Extracts angle-related features, temporal domain features, and spectral domain features.
180
+
181
+ Parameters
182
+ ----------
183
+ config : GaitConfig
184
+ Configuration object containing column names and parameters for feature extraction.
185
+
186
+ df_timestamps : pd.DataFrame
187
+ A DataFrame containing the raw sensor data, including accelerometer, gravity, and gyroscope columns.
188
+
189
+ df_predictions : pd.DataFrame
190
+ A DataFrame containing the predicted probabilities for gait activity per window.
191
+
192
+ config : ArmActivityFeatureExtractionConfig
193
+ Configuration object containing column names and parameters for feature extraction.
194
+
195
+ path_to_classifier_input : str | Path
196
+ The path to the directory containing the classifier files and other necessary input files for feature extraction.
197
+
198
+ Returns
199
+ -------
200
+ pd.DataFrame
201
+ A DataFrame containing the extracted arm activity features, including angle, velocity,
202
+ temporal, and spectral features.
203
+ """
204
+ if not any(df_predictions[DataColumns.PRED_GAIT_PROBA] >= threshold):
205
+ raise ValueError("No gait detected in the input data.")
206
+
207
+ # Merge gait predictions with timestamps
208
+ gait_preprocessing_config = GaitConfig(step='gait')
209
+ df = merge_predictions_with_timestamps(
210
+ df_ts=df_timestamps,
211
+ df_predictions=df_predictions,
212
+ pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
213
+ window_length_s=gait_preprocessing_config.window_length_s,
214
+ fs=gait_preprocessing_config.sampling_frequency
215
+ )
216
+
217
+ # Add a column for predicted gait based on a fitted threshold
218
+ df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= threshold).astype(int)
219
+
220
+ # Filter the DataFrame to only include predicted gait (1)
221
+ df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
222
+
223
+ # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
224
+ df[DataColumns.SEGMENT_NR] = create_segments(
225
+ time_array=df[DataColumns.TIME],
226
+ max_segment_gap_s=config.max_segment_gap_s
227
+ )
228
+
229
+ # Remove segments that do not meet predetermined criteria
230
+ df = discard_segments(
231
+ df=df,
232
+ segment_nr_colname=DataColumns.SEGMENT_NR,
233
+ min_segment_length_s=config.min_segment_length_s,
234
+ fs=config.sampling_frequency,
235
+ format='timestamps'
236
+ )
237
+
238
+ # Create windows of fixed length and step size from the time series per segment
239
+ windowed_data = []
240
+ df_grouped = df.groupby(DataColumns.SEGMENT_NR)
241
+ windowed_cols = (
242
+ [DataColumns.TIME] +
243
+ config.accelerometer_cols +
244
+ config.gravity_cols +
245
+ config.gyroscope_cols
246
+ )
247
+
248
+ # Collect windows from all segments in a list for faster concatenation
249
+ for _, group in df_grouped:
250
+ windows = tabulate_windows(
251
+ df=group,
252
+ columns=windowed_cols,
253
+ window_length_s=config.window_length_s,
254
+ window_step_length_s=config.window_step_length_s,
255
+ fs=config.sampling_frequency
256
+ )
257
+ if len(windows) > 0: # Skip if no windows are created
258
+ windowed_data.append(windows)
259
+
260
+ # If no windows were created, raise an error
261
+ if not windowed_data:
262
+ print("No windows were created from the given data.")
263
+ return pd.DataFrame()
264
+
265
+ # Concatenate the windows into one array at the end
266
+ windowed_data = np.concatenate(windowed_data, axis=0)
267
+
268
+ # Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
269
+ extractor = WindowedDataExtractor(windowed_cols)
270
+
271
+ idx_time = extractor.get_index(DataColumns.TIME)
272
+ idx_acc = extractor.get_slice(config.accelerometer_cols)
273
+ idx_grav = extractor.get_slice(config.gravity_cols)
274
+ idx_gyro = extractor.get_slice(config.gyroscope_cols)
275
+
276
+ # Extract data
277
+ start_time = np.min(windowed_data[:, :, idx_time], axis=1)
278
+ windowed_acc = windowed_data[:, :, idx_acc]
279
+ windowed_grav = windowed_data[:, :, idx_grav]
280
+ windowed_gyro = windowed_data[:, :, idx_gyro]
281
+
282
+ # Initialize DataFrame for features
283
+ df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
284
+
285
+ # Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
286
+ df_temporal_features = extract_temporal_domain_features(
287
+ config=config,
288
+ windowed_acc=windowed_acc,
289
+ windowed_grav=windowed_grav,
290
+ grav_stats=['mean', 'std']
291
+ )
292
+ df_features = pd.concat([df_features, df_temporal_features], axis=1)
293
+
294
+ # Extract spectral domain features for accelerometer and gyroscope signals
295
+ for sensor_name, windowed_sensor in zip(['accelerometer', 'gyroscope'], [windowed_acc, windowed_gyro]):
296
+ df_spectral_features = extract_spectral_domain_features(
297
+ config=config,
298
+ sensor=sensor_name,
299
+ windowed_data=windowed_sensor
300
+ )
301
+ df_features = pd.concat([df_features, df_spectral_features], axis=1)
302
+
303
+ return df_features
304
+
305
+
306
+ def filter_gait(
307
+ df: pd.DataFrame,
308
+ clf_package: ClassifierPackage,
309
+ parallel: bool=False
310
+ ) -> pd.Series:
311
+ """
312
+ Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
313
+
314
+ Parameters
315
+ ----------
316
+ df : pd.DataFrame
317
+ The input DataFrame containing features extracted from gait data.
318
+ full_path_to_classifier_package : str | Path
319
+ The path to the pre-trained classifier file.
320
+ parallel : bool, optional, default=False
321
+ If `True`, enables parallel processing.
322
+
323
+ Returns
324
+ -------
325
+ pd.Series
326
+ A Series containing the predicted probabilities.
327
+ """
328
+ if df.shape[0] == 0:
329
+ raise ValueError("No data found in the input DataFrame.")
330
+
331
+ # Set classifier
332
+ clf = clf_package.classifier
333
+ if not parallel and hasattr(clf, 'n_jobs'):
334
+ clf.n_jobs = 1
335
+
336
+ feature_names_scaling = clf_package.scaler.feature_names_in_
337
+ feature_names_predictions = clf.feature_names_in_
338
+
339
+ # Apply scaling to relevant columns
340
+ scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
341
+
342
+ # Replace scaled features in a copy of the relevant features for prediction
343
+ X = df.loc[:, feature_names_predictions].copy()
344
+ X.loc[:, feature_names_scaling] = scaled_features
345
+
346
+ # Make predictions
347
+ pred_no_other_arm_activity_proba_series = clf_package.predict_proba(X)
348
+
349
+ return pred_no_other_arm_activity_proba_series
350
+
351
+
352
+ def quantify_arm_swing(
353
+ df_timestamps: pd.DataFrame,
354
+ df_predictions: pd.DataFrame,
355
+ classification_threshold: float,
356
+ window_length_s: float,
357
+ max_segment_gap_s: float,
358
+ min_segment_length_s: float,
359
+ fs: int,
360
+ dfs_to_quantify: List[str] | str = ['unfiltered', 'filtered'],
361
+ ) -> Tuple[dict[str, pd.DataFrame], dict]:
362
+ """
363
+ Quantify arm swing parameters for segments of motion based on gyroscope data.
364
+
365
+ Parameters
366
+ ----------
367
+ df_timestamps : pd.DataFrame
368
+ A DataFrame containing the raw sensor data, including gyroscope columns.
369
+
370
+ df_predictions : pd.DataFrame
371
+ A DataFrame containing the predicted probabilities for no other arm activity per window.
372
+
373
+ classification_threshold : float
374
+ The threshold used to classify no other arm activity based on the predicted probabilities.
375
+
376
+ window_length_s : float
377
+ The length of the window used for feature extraction.
378
+
379
+ max_segment_gap_s : float
380
+ The maximum gap allowed between segments.
381
+
382
+ min_segment_length_s : float
383
+ The minimum length required for a segment to be considered valid.
384
+
385
+ fs : int
386
+ The sampling frequency of the sensor data.
387
+
388
+ dfs_to_quantify : List[str] | str, optional
389
+ The DataFrames to quantify arm swing parameters for. Options are 'unfiltered' and 'filtered', with 'unfiltered' being predicted gait, and
390
+ 'filtered' being predicted gait without other arm activities.
391
+
392
+ Returns
393
+ -------
394
+ Tuple[dict, dict]
395
+ A tuple containing a dictionary with quantified arm swing parameters for dfs_to_quantify,
396
+ and a dictionary containing metadata for each segment.
397
+ """
398
+ if not any(df_predictions[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= classification_threshold):
399
+ raise ValueError("No gait without other arm activity detected in the input data.")
400
+
401
+ if isinstance(dfs_to_quantify, str):
402
+ dfs_to_quantify = [dfs_to_quantify]
403
+ elif not isinstance(dfs_to_quantify, list):
404
+ raise ValueError("dfs_to_quantify must be either 'unfiltered', 'filtered', or a list containing both.")
405
+
406
+ valid_values = {'unfiltered', 'filtered'}
407
+ if set(dfs_to_quantify) - valid_values:
408
+ raise ValueError(
409
+ f"Invalid value in dfs_to_quantify: {dfs_to_quantify}. "
410
+ f"Valid options are 'unfiltered', 'filtered', or both in a list."
411
+ )
412
+
413
+ # Merge arm activity predictions with timestamps
414
+ df = merge_predictions_with_timestamps(
415
+ df_ts=df_timestamps,
416
+ df_predictions=df_predictions,
417
+ pred_proba_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
418
+ window_length_s=window_length_s,
419
+ fs=fs
420
+ )
421
+
422
+ # Add a column for predicted no other arm activity based on a fitted threshold
423
+ df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] = (
424
+ df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= classification_threshold
425
+ ).astype(int)
426
+
427
+ # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
428
+ # Segments are made based on predicted gait
429
+ df[DataColumns.SEGMENT_NR] = create_segments(
430
+ time_array=df[DataColumns.TIME],
431
+ max_segment_gap_s=max_segment_gap_s
432
+ )
433
+
434
+ # Remove segments that do not meet predetermined criteria
435
+ df = discard_segments(
436
+ df=df,
437
+ segment_nr_colname=DataColumns.SEGMENT_NR,
438
+ min_segment_length_s=min_segment_length_s,
439
+ fs=fs,
440
+ format='timestamps'
441
+ )
442
+
443
+ if df.empty:
444
+ raise ValueError("No segments found in the input data.")
445
+
446
+ # If no arm swing data is remaining, return an empty dictionary
447
+ if df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
448
+
449
+ if 'filtered' in dfs_to_quantify and len(dfs_to_quantify) == 1:
450
+ raise ValueError("No gait without other arm activities to quantify.")
451
+
452
+ dfs_to_quantify = [x for x in dfs_to_quantify if x != 'filtered']
453
+
454
+ df[DataColumns.SEGMENT_CAT] = categorize_segments(
455
+ df=df,
456
+ fs=fs
457
+ )
458
+
459
+ df[DataColumns.VELOCITY] = pca_transform_gyroscope(
460
+ df=df,
461
+ y_gyro_colname=DataColumns.GYROSCOPE_Y,
462
+ z_gyro_colname=DataColumns.GYROSCOPE_Z,
463
+ pred_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY
464
+ )
465
+
466
+ # Group and process segments
467
+ arm_swing_quantified = {}
468
+ segment_meta = {}
469
+
470
+ # If both unfiltered and filtered gait are to be quantified, start with the unfiltered data
471
+ # and subset to get filtered data afterwards.
472
+ dfs_to_quantify = sorted(dfs_to_quantify, reverse=True)
473
+
474
+ for df_name in dfs_to_quantify:
475
+ if df_name == 'filtered':
476
+ # Filter the DataFrame to only include predicted no other arm activity (1)
477
+ df_focus = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].copy().reset_index(drop=True)
478
+
479
+ # Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
480
+ # Now segments are based on predicted gait without other arm activity for subsequent processes
481
+ df_focus[DataColumns.SEGMENT_NR] = create_segments(
482
+ time_array=df[DataColumns.TIME],
483
+ max_segment_gap_s=max_segment_gap_s
484
+ )
485
+ else:
486
+ df_focus = df.copy()
487
+
488
+ arm_swing_quantified[df_name] = []
489
+ segment_meta[df_name] = {}
490
+
491
+ for segment_nr, group in df_focus.groupby(DataColumns.SEGMENT_NR, sort=False):
492
+ segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
493
+ time_array = group[DataColumns.TIME].to_numpy()
494
+ velocity_array = group[DataColumns.VELOCITY].to_numpy()
495
+
496
+ # Integrate the angular velocity to obtain an estimation of the angle
497
+ angle_array = compute_angle(
498
+ time_array=time_array,
499
+ velocity_array=velocity_array,
500
+ )
501
+
502
+ # Detrend angle using moving average
503
+ angle_array = remove_moving_average_angle(
504
+ angle_array=angle_array,
505
+ fs=fs,
506
+ )
507
+
508
+ segment_meta[df_name][segment_nr] = {
509
+ 'time_s': len(angle_array) / fs,
510
+ DataColumns.SEGMENT_CAT: segment_cat
511
+ }
512
+
513
+ if angle_array.size > 0:
514
+ angle_extrema_indices, _, _ = extract_angle_extremes(
515
+ angle_array=angle_array,
516
+ sampling_frequency=fs,
517
+ max_frequency_activity=1.75
518
+ )
519
+
520
+ if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
521
+ try:
522
+ rom = compute_range_of_motion(
523
+ angle_array=angle_array,
524
+ extrema_indices=angle_extrema_indices,
525
+ )
526
+ except Exception as e:
527
+ # Handle the error, set RoM to NaN, and log the error
528
+ print(f"Error computing range of motion for segment {segment_nr}: {e}")
529
+ rom = np.array([np.nan])
530
+
531
+ try:
532
+ pav = compute_peak_angular_velocity(
533
+ velocity_array=velocity_array,
534
+ angle_extrema_indices=angle_extrema_indices
535
+ )
536
+ except Exception as e:
537
+ # Handle the error, set pav to NaN, and log the error
538
+ print(f"Error computing peak angular velocity for segment {segment_nr}: {e}")
539
+ pav = np.array([np.nan])
540
+
541
+ df_params_segment = pd.DataFrame({
542
+ DataColumns.SEGMENT_NR: segment_nr,
543
+ DataColumns.RANGE_OF_MOTION: rom,
544
+ DataColumns.PEAK_VELOCITY: pav
545
+ })
546
+
547
+ arm_swing_quantified[df_name].append(df_params_segment)
548
+
549
+ arm_swing_quantified[df_name] = pd.concat(arm_swing_quantified[df_name], ignore_index=True)
550
+
551
+ return {df_name: arm_swing_quantified[df_name] for df_name in dfs_to_quantify}, segment_meta
552
+
553
+
554
+ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta: dict, aggregates: List[str] = ['median']) -> dict:
555
+ """
556
+ Aggregate the quantification results for arm swing parameters.
557
+
558
+ Parameters
559
+ ----------
560
+ df_arm_swing_params : pd.DataFrame
561
+ A dataframe containing the arm swing parameters to be aggregated
562
+
563
+ segment_meta : dict
564
+ A dictionary containing metadata for each segment.
565
+
566
+ aggregates : List[str], optional
567
+ A list of aggregation methods to apply to the quantification results.
568
+
569
+ Returns
570
+ -------
571
+ dict
572
+ A dictionary containing the aggregated quantification results for arm swing parameters.
573
+ """
574
+ arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
575
+
576
+ uq_segment_cats = set([segment_meta[x][DataColumns.SEGMENT_CAT] for x in df_arm_swing_params[DataColumns.SEGMENT_NR].unique()])
577
+
578
+ aggregated_results = {}
579
+ for segment_cat in uq_segment_cats:
580
+ cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
581
+
582
+ aggregated_results[segment_cat] = {
583
+ 'time_s': sum([segment_meta[x]['time_s'] for x in cat_segments])
584
+ }
585
+
586
+ df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
587
+
588
+ for arm_swing_parameter in arm_swing_parameters:
589
+ for aggregate in aggregates:
590
+ aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
591
+
592
+ aggregated_results['all_segment_categories'] = {
593
+ 'time_s': sum([segment_meta[x]['time_s'] for x in segment_meta.keys()])
594
+ }
595
+
596
+ for arm_swing_parameter in arm_swing_parameters:
597
+ for aggregate in aggregates:
598
+ aggregated_results['all_segment_categories'][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params[arm_swing_parameter], aggregate)
599
+
600
+ return aggregated_results
601
+
602
+
603
+ def extract_temporal_domain_features(
604
+ config,
605
+ windowed_acc: np.ndarray,
606
+ windowed_grav: np.ndarray,
607
+ grav_stats: List[str] = ['mean']
608
+ ) -> pd.DataFrame:
609
+ """
610
+ Compute temporal domain features for the accelerometer signal.
611
+
612
+ This function calculates various statistical features for the gravity signal
613
+ and computes the standard deviation of the accelerometer's Euclidean norm.
614
+
615
+ Parameters
616
+ ----------
617
+ config : object
618
+ Configuration object containing the accelerometer and gravity column names.
619
+ windowed_acc : numpy.ndarray
620
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
621
+ the number of accelerometer values per window.
622
+ windowed_grav : numpy.ndarray
623
+ A 2D numpy array of shape (N, M) where N is the number of windows and M is
624
+ the number of gravity signal values per window.
625
+ grav_stats : list of str, optional
626
+ A list of statistics to compute for the gravity signal (default is ['mean']).
627
+
628
+ Returns
629
+ -------
630
+ pd.DataFrame
631
+ A DataFrame containing the computed features, with each row corresponding
632
+ to a window and each column representing a specific feature.
633
+ """
634
+ # Compute gravity statistics (e.g., mean, std, etc.)
635
+ feature_dict = {}
636
+ for stat in grav_stats:
637
+ stats_result = compute_statistics(data=windowed_grav, statistic=stat)
638
+ for i, col in enumerate(config.gravity_cols):
639
+ feature_dict[f'{col}_{stat}'] = stats_result[:, i]
640
+
641
+ # Compute standard deviation of the Euclidean norm of the accelerometer signal
642
+ feature_dict['accelerometer_std_norm'] = compute_std_euclidean_norm(data=windowed_acc)
643
+
644
+ return pd.DataFrame(feature_dict)
645
+
646
+
647
+ def extract_spectral_domain_features(
648
+ windowed_data: np.ndarray,
649
+ config,
650
+ sensor: str,
651
+ ) -> pd.DataFrame:
652
+ """
653
+ Compute spectral domain features for a sensor's data.
654
+
655
+ This function computes the periodogram, extracts power in specific frequency bands,
656
+ calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
657
+ for a given sensor's windowed data.
658
+
659
+ Parameters
660
+ ----------
661
+ windowed_data : numpy.ndarray
662
+ A 2D numpy array where each row corresponds to a window of sensor data.
663
+
664
+ config : object
665
+ Configuration object containing settings such as sampling frequency, window type,
666
+ frequency bands, and MFCC parameters.
667
+
668
+ sensor : str
669
+ The name of the sensor (e.g., 'accelerometer', 'gyroscope').
670
+
671
+ Returns
672
+ -------
673
+ pd.DataFrame
674
+ A DataFrame containing the computed spectral features, with each row corresponding
675
+ to a window and each column representing a specific feature.
676
+ """
677
+ # Initialize a dictionary to hold the results
678
+ feature_dict = {}
679
+
680
+ # Compute periodogram (power spectral density)
681
+ freqs, psd = periodogram(
682
+ x=windowed_data,
683
+ fs=config.sampling_frequency,
684
+ window=config.window_type,
685
+ axis=1
686
+ )
687
+
688
+ # Compute power in specified frequency bands
689
+ for band_name, band_freqs in config.d_frequency_bandwidths.items():
690
+ band_powers = compute_power_in_bandwidth(
691
+ freqs=freqs,
692
+ psd=psd,
693
+ fmin=band_freqs[0],
694
+ fmax=band_freqs[1],
695
+ include_max=False
696
+ )
697
+ for i, col in enumerate(config.axes):
698
+ feature_dict[f'{sensor}_{col}_{band_name}'] = band_powers[:, i]
699
+
700
+ # Compute dominant frequency for each axis
701
+ dominant_frequencies = compute_dominant_frequency(
702
+ freqs=freqs,
703
+ psd=psd,
704
+ fmin=config.spectrum_low_frequency,
705
+ fmax=config.spectrum_high_frequency
706
+ )
707
+
708
+ # Add dominant frequency features to the feature_dict
709
+ for axis, freq in zip(config.axes, dominant_frequencies.T):
710
+ feature_dict[f'{sensor}_{axis}_dominant_frequency'] = freq
711
+
712
+ # Compute total power in the PSD
713
+ total_power_psd = compute_total_power(psd)
714
+
715
+ # Compute MFCCs
716
+ mfccs = compute_mfccs(
717
+ total_power_array=total_power_psd,
718
+ config=config,
719
+ multiplication_factor=4
720
+ )
721
+
722
+ # Combine the MFCCs into the features DataFrame
723
+ mfcc_colnames = [f'{sensor}_mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
724
+ for i, colname in enumerate(mfcc_colnames):
725
+ feature_dict[colname] = mfccs[:, i]
726
+
727
+ return pd.DataFrame(feature_dict)