paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +158 -101
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +286 -190
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +139 -95
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +171 -80
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/METADATA +39 -36
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.2.dist-info/RECORD +0 -22
|
@@ -1,23 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
import pandas as pd
|
|
3
6
|
from scipy.signal import periodogram
|
|
4
|
-
from typing import List, Tuple
|
|
5
7
|
|
|
6
8
|
from paradigma.classification import ClassifierPackage
|
|
7
|
-
from paradigma.constants import DataColumns
|
|
8
9
|
from paradigma.config import GaitConfig
|
|
9
|
-
from paradigma.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
from paradigma.constants import DataColumns
|
|
11
|
+
from paradigma.feature_extraction import (
|
|
12
|
+
compute_angle,
|
|
13
|
+
compute_dominant_frequency,
|
|
14
|
+
compute_mfccs,
|
|
15
|
+
compute_peak_angular_velocity,
|
|
16
|
+
compute_power_in_bandwidth,
|
|
17
|
+
compute_range_of_motion,
|
|
18
|
+
compute_statistics,
|
|
19
|
+
compute_std_euclidean_norm,
|
|
20
|
+
compute_total_power,
|
|
21
|
+
extract_angle_extremes,
|
|
22
|
+
pca_transform_gyroscope,
|
|
23
|
+
remove_moving_average_angle,
|
|
24
|
+
)
|
|
25
|
+
from paradigma.segmenting import (
|
|
26
|
+
WindowedDataExtractor,
|
|
27
|
+
create_segments,
|
|
28
|
+
discard_segments,
|
|
29
|
+
tabulate_windows,
|
|
30
|
+
)
|
|
14
31
|
from paradigma.util import aggregate_parameter
|
|
15
32
|
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# Only configure basic logging if no handlers exist
|
|
36
|
+
if not logger.hasHandlers():
|
|
37
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
38
|
+
|
|
16
39
|
|
|
17
|
-
def extract_gait_features(
|
|
18
|
-
df: pd.DataFrame,
|
|
19
|
-
config: GaitConfig
|
|
20
|
-
) -> pd.DataFrame:
|
|
40
|
+
def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
21
41
|
"""
|
|
22
42
|
Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
|
|
23
43
|
|
|
@@ -44,7 +64,7 @@ def extract_gait_features(
|
|
|
44
64
|
A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
|
|
45
65
|
columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
|
|
46
66
|
accelerometer data.
|
|
47
|
-
|
|
67
|
+
|
|
48
68
|
Notes
|
|
49
69
|
-----
|
|
50
70
|
- This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
|
|
@@ -57,34 +77,36 @@ def extract_gait_features(
|
|
|
57
77
|
If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
|
|
58
78
|
"""
|
|
59
79
|
# Group sequences of timestamps into windows
|
|
60
|
-
|
|
80
|
+
windowed_colnames = (
|
|
81
|
+
[config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
|
|
82
|
+
)
|
|
61
83
|
windowed_data = tabulate_windows(
|
|
62
|
-
df=df,
|
|
63
|
-
columns=
|
|
84
|
+
df=df,
|
|
85
|
+
columns=windowed_colnames,
|
|
64
86
|
window_length_s=config.window_length_s,
|
|
65
87
|
window_step_length_s=config.window_step_length_s,
|
|
66
|
-
fs=config.sampling_frequency
|
|
88
|
+
fs=config.sampling_frequency,
|
|
67
89
|
)
|
|
68
90
|
|
|
69
|
-
extractor = WindowedDataExtractor(
|
|
91
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
70
92
|
|
|
71
|
-
idx_time = extractor.get_index(
|
|
72
|
-
idx_acc = extractor.get_slice(config.
|
|
73
|
-
idx_grav = extractor.get_slice(config.
|
|
93
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
94
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
95
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
74
96
|
|
|
75
97
|
# Extract data
|
|
76
98
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
77
99
|
windowed_acc = windowed_data[:, :, idx_acc]
|
|
78
100
|
windowed_grav = windowed_data[:, :, idx_grav]
|
|
79
101
|
|
|
80
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
81
|
-
|
|
102
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
103
|
+
|
|
82
104
|
# Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
|
|
83
105
|
df_temporal_features = extract_temporal_domain_features(
|
|
84
|
-
config=config,
|
|
106
|
+
config=config,
|
|
85
107
|
windowed_acc=windowed_acc,
|
|
86
108
|
windowed_grav=windowed_grav,
|
|
87
|
-
grav_stats=[
|
|
109
|
+
grav_stats=["mean", "std"],
|
|
88
110
|
)
|
|
89
111
|
|
|
90
112
|
# Combine temporal features with the start time
|
|
@@ -92,9 +114,7 @@ def extract_gait_features(
|
|
|
92
114
|
|
|
93
115
|
# Transform the accelerometer data to the spectral domain using FFT and extract spectral features
|
|
94
116
|
df_spectral_features = extract_spectral_domain_features(
|
|
95
|
-
config=config,
|
|
96
|
-
sensor='accelerometer',
|
|
97
|
-
windowed_data=windowed_acc
|
|
117
|
+
config=config, sensor="accelerometer", windowed_data=windowed_acc
|
|
98
118
|
)
|
|
99
119
|
|
|
100
120
|
# Combine the spectral features with the previously computed temporal features
|
|
@@ -104,10 +124,8 @@ def extract_gait_features(
|
|
|
104
124
|
|
|
105
125
|
|
|
106
126
|
def detect_gait(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
parallel: bool=False
|
|
110
|
-
) -> pd.Series:
|
|
127
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
128
|
+
) -> pd.Series:
|
|
111
129
|
"""
|
|
112
130
|
Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
|
|
113
131
|
|
|
@@ -121,7 +139,7 @@ def detect_gait(
|
|
|
121
139
|
Parameters
|
|
122
140
|
----------
|
|
123
141
|
df : pd.DataFrame
|
|
124
|
-
The input DataFrame containing features extracted from gait data. It must include the necessary columns
|
|
142
|
+
The input DataFrame containing features extracted from gait data. It must include the necessary columns
|
|
125
143
|
as specified in the classifier's feature names.
|
|
126
144
|
|
|
127
145
|
clf_package : ClassifierPackage
|
|
@@ -137,7 +155,7 @@ def detect_gait(
|
|
|
137
155
|
"""
|
|
138
156
|
# Set classifier
|
|
139
157
|
clf = clf_package.classifier
|
|
140
|
-
if not parallel and hasattr(clf,
|
|
158
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
141
159
|
clf.n_jobs = 1
|
|
142
160
|
|
|
143
161
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -157,13 +175,13 @@ def detect_gait(
|
|
|
157
175
|
|
|
158
176
|
|
|
159
177
|
def extract_arm_activity_features(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
178
|
+
df: pd.DataFrame,
|
|
179
|
+
config: GaitConfig,
|
|
180
|
+
) -> pd.DataFrame:
|
|
163
181
|
"""
|
|
164
182
|
Extract features related to arm activity from a time-series DataFrame.
|
|
165
183
|
|
|
166
|
-
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
184
|
+
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
167
185
|
and extracts features related to arm activity by performing the following steps:
|
|
168
186
|
1. Computes the angle and velocity from gyroscope data.
|
|
169
187
|
2. Filters the data to include only predicted gait segments.
|
|
@@ -183,13 +201,12 @@ def extract_arm_activity_features(
|
|
|
183
201
|
Returns
|
|
184
202
|
-------
|
|
185
203
|
pd.DataFrame
|
|
186
|
-
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
204
|
+
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
187
205
|
temporal, and spectral features.
|
|
188
206
|
"""
|
|
189
207
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
|
|
190
208
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
191
|
-
time_array=df[DataColumns.TIME],
|
|
192
|
-
max_segment_gap_s=config.max_segment_gap_s
|
|
209
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
|
|
193
210
|
)
|
|
194
211
|
|
|
195
212
|
# Remove segments that do not meet predetermined criteria
|
|
@@ -198,27 +215,27 @@ def extract_arm_activity_features(
|
|
|
198
215
|
segment_nr_colname=DataColumns.SEGMENT_NR,
|
|
199
216
|
min_segment_length_s=config.min_segment_length_s,
|
|
200
217
|
fs=config.sampling_frequency,
|
|
201
|
-
format=
|
|
218
|
+
format="timestamps",
|
|
202
219
|
)
|
|
203
220
|
|
|
204
221
|
# Create windows of fixed length and step size from the time series per segment
|
|
205
222
|
windowed_data = []
|
|
206
223
|
df_grouped = df.groupby(DataColumns.SEGMENT_NR)
|
|
207
|
-
|
|
208
|
-
[
|
|
209
|
-
config.
|
|
210
|
-
config.
|
|
211
|
-
config.
|
|
224
|
+
windowed_colnames = (
|
|
225
|
+
[config.time_colname]
|
|
226
|
+
+ config.accelerometer_colnames
|
|
227
|
+
+ config.gravity_colnames
|
|
228
|
+
+ config.gyroscope_colnames
|
|
212
229
|
)
|
|
213
230
|
|
|
214
231
|
# Collect windows from all segments in a list for faster concatenation
|
|
215
232
|
for _, group in df_grouped:
|
|
216
233
|
windows = tabulate_windows(
|
|
217
|
-
df=group,
|
|
218
|
-
columns=
|
|
234
|
+
df=group,
|
|
235
|
+
columns=windowed_colnames,
|
|
219
236
|
window_length_s=config.window_length_s,
|
|
220
237
|
window_step_length_s=config.window_step_length_s,
|
|
221
|
-
fs=config.sampling_frequency
|
|
238
|
+
fs=config.sampling_frequency,
|
|
222
239
|
)
|
|
223
240
|
if len(windows) > 0: # Skip if no windows are created
|
|
224
241
|
windowed_data.append(windows)
|
|
@@ -232,12 +249,12 @@ def extract_arm_activity_features(
|
|
|
232
249
|
windowed_data = np.concatenate(windowed_data, axis=0)
|
|
233
250
|
|
|
234
251
|
# Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
|
|
235
|
-
extractor = WindowedDataExtractor(
|
|
252
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
236
253
|
|
|
237
|
-
idx_time = extractor.get_index(
|
|
238
|
-
idx_acc = extractor.get_slice(config.
|
|
239
|
-
idx_grav = extractor.get_slice(config.
|
|
240
|
-
idx_gyro = extractor.get_slice(config.
|
|
254
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
255
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
256
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
257
|
+
idx_gyro = extractor.get_slice(config.gyroscope_colnames)
|
|
241
258
|
|
|
242
259
|
# Extract data
|
|
243
260
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
@@ -246,23 +263,23 @@ def extract_arm_activity_features(
|
|
|
246
263
|
windowed_gyro = windowed_data[:, :, idx_gyro]
|
|
247
264
|
|
|
248
265
|
# Initialize DataFrame for features
|
|
249
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
266
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
250
267
|
|
|
251
268
|
# Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
|
|
252
269
|
df_temporal_features = extract_temporal_domain_features(
|
|
253
|
-
config=config,
|
|
254
|
-
windowed_acc=windowed_acc,
|
|
255
|
-
windowed_grav=windowed_grav,
|
|
256
|
-
grav_stats=[
|
|
270
|
+
config=config,
|
|
271
|
+
windowed_acc=windowed_acc,
|
|
272
|
+
windowed_grav=windowed_grav,
|
|
273
|
+
grav_stats=["mean", "std"],
|
|
257
274
|
)
|
|
258
275
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
259
276
|
|
|
260
277
|
# Extract spectral domain features for accelerometer and gyroscope signals
|
|
261
|
-
for sensor_name, windowed_sensor in zip(
|
|
278
|
+
for sensor_name, windowed_sensor in zip(
|
|
279
|
+
["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
|
|
280
|
+
):
|
|
262
281
|
df_spectral_features = extract_spectral_domain_features(
|
|
263
|
-
config=config,
|
|
264
|
-
sensor=sensor_name,
|
|
265
|
-
windowed_data=windowed_sensor
|
|
282
|
+
config=config, sensor=sensor_name, windowed_data=windowed_sensor
|
|
266
283
|
)
|
|
267
284
|
df_features = pd.concat([df_features, df_spectral_features], axis=1)
|
|
268
285
|
|
|
@@ -270,10 +287,8 @@ def extract_arm_activity_features(
|
|
|
270
287
|
|
|
271
288
|
|
|
272
289
|
def filter_gait(
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
parallel: bool=False
|
|
276
|
-
) -> pd.Series:
|
|
290
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
291
|
+
) -> pd.Series:
|
|
277
292
|
"""
|
|
278
293
|
Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
|
|
279
294
|
|
|
@@ -293,10 +308,10 @@ def filter_gait(
|
|
|
293
308
|
"""
|
|
294
309
|
if df.shape[0] == 0:
|
|
295
310
|
raise ValueError("No data found in the input DataFrame.")
|
|
296
|
-
|
|
311
|
+
|
|
297
312
|
# Set classifier
|
|
298
313
|
clf = clf_package.classifier
|
|
299
|
-
if not parallel and hasattr(clf,
|
|
314
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
300
315
|
clf.n_jobs = 1
|
|
301
316
|
|
|
302
317
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -316,12 +331,12 @@ def filter_gait(
|
|
|
316
331
|
|
|
317
332
|
|
|
318
333
|
def quantify_arm_swing(
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
334
|
+
df: pd.DataFrame,
|
|
335
|
+
fs: int,
|
|
336
|
+
filtered: bool = False,
|
|
337
|
+
max_segment_gap_s: float = 1.5,
|
|
338
|
+
min_segment_length_s: float = 1.5,
|
|
339
|
+
) -> Tuple[dict[str, pd.DataFrame], dict]:
|
|
325
340
|
"""
|
|
326
341
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
327
342
|
|
|
@@ -339,71 +354,75 @@ def quantify_arm_swing(
|
|
|
339
354
|
|
|
340
355
|
max_segment_gap_s : float, optional, default=1.5
|
|
341
356
|
The maximum gap in seconds between consecutive timestamps to group them into segments.
|
|
342
|
-
|
|
357
|
+
|
|
343
358
|
min_segment_length_s : float, optional, default=1.5
|
|
344
359
|
The minimum length in seconds for a segment to be considered valid.
|
|
345
360
|
|
|
346
361
|
Returns
|
|
347
362
|
-------
|
|
348
363
|
Tuple[pd.DataFrame, dict]
|
|
349
|
-
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
364
|
+
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
350
365
|
metadata for each segment.
|
|
351
366
|
"""
|
|
352
367
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
|
|
353
368
|
# Segments are made based on predicted gait
|
|
354
|
-
df[
|
|
355
|
-
time_array=df[DataColumns.TIME],
|
|
356
|
-
max_segment_gap_s=max_segment_gap_s
|
|
369
|
+
df["unfiltered_segment_nr"] = create_segments(
|
|
370
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
357
371
|
)
|
|
358
372
|
|
|
359
|
-
# Segment category is determined based on predicted gait, hence it is set
|
|
360
|
-
# before filtering the DataFrame to only include predicted no other arm activity
|
|
361
|
-
df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
|
|
362
|
-
|
|
363
373
|
# Remove segments that do not meet predetermined criteria
|
|
364
374
|
df = discard_segments(
|
|
365
375
|
df=df,
|
|
366
|
-
segment_nr_colname=
|
|
376
|
+
segment_nr_colname="unfiltered_segment_nr",
|
|
367
377
|
min_segment_length_s=min_segment_length_s,
|
|
368
378
|
fs=fs,
|
|
369
|
-
format=
|
|
379
|
+
format="timestamps",
|
|
370
380
|
)
|
|
371
381
|
|
|
372
382
|
if df.empty:
|
|
373
|
-
raise ValueError(
|
|
383
|
+
raise ValueError(
|
|
384
|
+
"No segments found in the input data after discarding segments of invalid shape."
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Create dictionary of gait segment number and duration
|
|
388
|
+
gait_segment_duration_dict = {
|
|
389
|
+
segment_nr: len(group[DataColumns.TIME]) / fs
|
|
390
|
+
for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
|
|
391
|
+
}
|
|
374
392
|
|
|
375
393
|
# If no arm swing data is remaining, return an empty dictionary
|
|
376
|
-
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
|
|
394
|
+
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
|
|
377
395
|
raise ValueError("No gait without other arm activities to quantify.")
|
|
378
396
|
elif filtered:
|
|
379
397
|
# Filter the DataFrame to only include predicted no other arm activity (1)
|
|
380
|
-
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(
|
|
398
|
+
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
|
|
399
|
+
drop=True
|
|
400
|
+
)
|
|
381
401
|
|
|
382
402
|
# Group consecutive timestamps into segments of filtered gait
|
|
383
|
-
df[
|
|
384
|
-
time_array=df[DataColumns.TIME],
|
|
385
|
-
max_segment_gap_s=max_segment_gap_s
|
|
403
|
+
df["filtered_segment_nr"] = create_segments(
|
|
404
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
386
405
|
)
|
|
387
406
|
|
|
388
407
|
# Remove segments that do not meet predetermined criteria
|
|
389
408
|
df = discard_segments(
|
|
390
409
|
df=df,
|
|
391
|
-
segment_nr_colname=
|
|
410
|
+
segment_nr_colname="filtered_segment_nr",
|
|
392
411
|
min_segment_length_s=min_segment_length_s,
|
|
393
412
|
fs=fs,
|
|
394
413
|
)
|
|
395
414
|
|
|
396
415
|
if df.empty:
|
|
397
|
-
raise ValueError(
|
|
416
|
+
raise ValueError(
|
|
417
|
+
"No filtered gait segments found in the input data after discarding segments of invalid shape."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
|
|
398
421
|
|
|
399
422
|
arm_swing_quantified = []
|
|
400
423
|
segment_meta = {
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
'duration_s': len(df[DataColumns.TIME]) / fs
|
|
404
|
-
},
|
|
405
|
-
},
|
|
406
|
-
'per_segment': {}
|
|
424
|
+
"all": {"duration_s": len(df[DataColumns.TIME]) / fs},
|
|
425
|
+
"per_segment": {},
|
|
407
426
|
}
|
|
408
427
|
|
|
409
428
|
# PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
|
|
@@ -415,8 +434,27 @@ def quantify_arm_swing(
|
|
|
415
434
|
)
|
|
416
435
|
|
|
417
436
|
# Group and process segments
|
|
418
|
-
for segment_nr, group in df.groupby(
|
|
419
|
-
|
|
437
|
+
for segment_nr, group in df.groupby(grouping_colname, sort=False):
|
|
438
|
+
if filtered:
|
|
439
|
+
gait_segment_nr = group["unfiltered_segment_nr"].iloc[
|
|
440
|
+
0
|
|
441
|
+
] # Each filtered segment is contained within an unfiltered segment
|
|
442
|
+
else:
|
|
443
|
+
gait_segment_nr = segment_nr
|
|
444
|
+
|
|
445
|
+
try:
|
|
446
|
+
gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
|
|
447
|
+
except KeyError:
|
|
448
|
+
logger.warning(
|
|
449
|
+
"Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
|
|
450
|
+
gait_segment_nr,
|
|
451
|
+
filtered,
|
|
452
|
+
)
|
|
453
|
+
logger.debug(
|
|
454
|
+
"Available segments: %s", list(gait_segment_duration_dict.keys())
|
|
455
|
+
)
|
|
456
|
+
continue
|
|
457
|
+
|
|
420
458
|
time_array = group[DataColumns.TIME].to_numpy()
|
|
421
459
|
velocity_array = group[DataColumns.VELOCITY].to_numpy()
|
|
422
460
|
|
|
@@ -432,18 +470,22 @@ def quantify_arm_swing(
|
|
|
432
470
|
fs=fs,
|
|
433
471
|
)
|
|
434
472
|
|
|
435
|
-
segment_meta[
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
DataColumns.SEGMENT_CAT: segment_cat
|
|
473
|
+
segment_meta["per_segment"][segment_nr] = {
|
|
474
|
+
"start_time_s": time_array.min(),
|
|
475
|
+
"end_time_s": time_array.max(),
|
|
476
|
+
"duration_unfiltered_segment_s": gait_segment_duration_s,
|
|
440
477
|
}
|
|
441
478
|
|
|
442
|
-
if
|
|
479
|
+
if filtered:
|
|
480
|
+
segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
|
|
481
|
+
len(time_array) / fs
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if angle_array.size > 0:
|
|
443
485
|
angle_extrema_indices, _, _ = extract_angle_extremes(
|
|
444
486
|
angle_array=angle_array,
|
|
445
487
|
sampling_frequency=fs,
|
|
446
|
-
max_frequency_activity=1.75
|
|
488
|
+
max_frequency_activity=1.75,
|
|
447
489
|
)
|
|
448
490
|
|
|
449
491
|
if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
|
|
@@ -454,44 +496,47 @@ def quantify_arm_swing(
|
|
|
454
496
|
)
|
|
455
497
|
except Exception as e:
|
|
456
498
|
# Handle the error, set RoM to NaN, and log the error
|
|
457
|
-
print(
|
|
499
|
+
print(
|
|
500
|
+
f"Error computing range of motion for segment {segment_nr}: {e}"
|
|
501
|
+
)
|
|
458
502
|
rom = np.array([np.nan])
|
|
459
503
|
|
|
460
504
|
try:
|
|
461
505
|
pav = compute_peak_angular_velocity(
|
|
462
506
|
velocity_array=velocity_array,
|
|
463
|
-
angle_extrema_indices=angle_extrema_indices
|
|
507
|
+
angle_extrema_indices=angle_extrema_indices,
|
|
464
508
|
)
|
|
465
509
|
except Exception as e:
|
|
466
510
|
# Handle the error, set pav to NaN, and log the error
|
|
467
|
-
print(
|
|
511
|
+
print(
|
|
512
|
+
f"Error computing peak angular velocity for segment {segment_nr}: {e}"
|
|
513
|
+
)
|
|
468
514
|
pav = np.array([np.nan])
|
|
469
515
|
|
|
470
|
-
df_params_segment = pd.DataFrame(
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
516
|
+
df_params_segment = pd.DataFrame(
|
|
517
|
+
{
|
|
518
|
+
DataColumns.SEGMENT_NR: segment_nr,
|
|
519
|
+
DataColumns.RANGE_OF_MOTION: rom,
|
|
520
|
+
DataColumns.PEAK_VELOCITY: pav,
|
|
521
|
+
}
|
|
522
|
+
)
|
|
476
523
|
|
|
477
524
|
arm_swing_quantified.append(df_params_segment)
|
|
478
525
|
|
|
479
|
-
# Combine segment categories
|
|
480
|
-
segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
|
|
481
|
-
for segment_cat in segment_categories:
|
|
482
|
-
segment_meta['aggregated'][segment_cat] = {
|
|
483
|
-
'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
|
|
484
|
-
}
|
|
485
|
-
|
|
486
526
|
arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
|
|
487
|
-
|
|
527
|
+
|
|
488
528
|
return arm_swing_quantified, segment_meta
|
|
489
529
|
|
|
490
530
|
|
|
491
|
-
def aggregate_arm_swing_params(
|
|
531
|
+
def aggregate_arm_swing_params(
|
|
532
|
+
df_arm_swing_params: pd.DataFrame,
|
|
533
|
+
segment_meta: dict,
|
|
534
|
+
segment_cats: List[tuple],
|
|
535
|
+
aggregates: List[str] = ["median"],
|
|
536
|
+
) -> dict:
|
|
492
537
|
"""
|
|
493
538
|
Aggregate the quantification results for arm swing parameters.
|
|
494
|
-
|
|
539
|
+
|
|
495
540
|
Parameters
|
|
496
541
|
----------
|
|
497
542
|
df_arm_swing_params : pd.DataFrame
|
|
@@ -499,10 +544,12 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
499
544
|
|
|
500
545
|
segment_meta : dict
|
|
501
546
|
A dictionary containing metadata for each segment.
|
|
502
|
-
|
|
547
|
+
|
|
548
|
+
segment_cats : List[tuple]
|
|
549
|
+
A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
|
|
503
550
|
aggregates : List[str], optional
|
|
504
551
|
A list of aggregation methods to apply to the quantification results.
|
|
505
|
-
|
|
552
|
+
|
|
506
553
|
Returns
|
|
507
554
|
-------
|
|
508
555
|
dict
|
|
@@ -510,43 +557,93 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
510
557
|
"""
|
|
511
558
|
arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
|
|
512
559
|
|
|
513
|
-
uq_segment_cats = set([segment_meta[x][DataColumns.SEGMENT_CAT] for x in df_arm_swing_params[DataColumns.SEGMENT_NR].unique()])
|
|
514
|
-
|
|
515
560
|
aggregated_results = {}
|
|
516
|
-
for
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
561
|
+
for segment_cat_range in segment_cats:
|
|
562
|
+
segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
|
|
563
|
+
cat_segments = [
|
|
564
|
+
x
|
|
565
|
+
for x in segment_meta.keys()
|
|
566
|
+
if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
|
|
567
|
+
and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
|
|
568
|
+
]
|
|
569
|
+
|
|
570
|
+
if len(cat_segments) > 0:
|
|
571
|
+
# For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
|
|
572
|
+
aggregated_results[segment_cat_str] = {
|
|
573
|
+
"duration_s": sum(
|
|
574
|
+
[
|
|
575
|
+
(
|
|
576
|
+
segment_meta[x]["duration_filtered_segment_s"]
|
|
577
|
+
if "duration_filtered_segment_s" in segment_meta[x]
|
|
578
|
+
else segment_meta[x]["duration_unfiltered_segment_s"]
|
|
579
|
+
)
|
|
580
|
+
for x in cat_segments
|
|
581
|
+
]
|
|
582
|
+
)
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
df_arm_swing_params_cat = df_arm_swing_params.loc[
|
|
586
|
+
df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)
|
|
587
|
+
]
|
|
588
|
+
|
|
589
|
+
# Aggregate across all segments
|
|
590
|
+
aggregates_per_segment = ["median", "mean"]
|
|
591
|
+
|
|
592
|
+
for arm_swing_parameter in arm_swing_parameters:
|
|
593
|
+
for aggregate in aggregates:
|
|
594
|
+
if aggregate in ["std", "cov"]:
|
|
595
|
+
per_segment_agg = []
|
|
596
|
+
# If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
|
|
597
|
+
segment_groups = dict(
|
|
598
|
+
tuple(
|
|
599
|
+
df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)
|
|
600
|
+
)
|
|
601
|
+
)
|
|
602
|
+
for segment_nr in cat_segments:
|
|
603
|
+
segment_df = segment_groups.get(segment_nr)
|
|
604
|
+
if segment_df is not None:
|
|
605
|
+
per_segment_agg.append(
|
|
606
|
+
aggregate_parameter(
|
|
607
|
+
segment_df[arm_swing_parameter], aggregate
|
|
608
|
+
)
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Drop nans
|
|
612
|
+
per_segment_agg = np.array(per_segment_agg)
|
|
613
|
+
per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
|
|
614
|
+
|
|
615
|
+
for segment_level_aggregate in aggregates_per_segment:
|
|
616
|
+
aggregated_results[segment_cat_str][
|
|
617
|
+
f"{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}"
|
|
618
|
+
] = aggregate_parameter(
|
|
619
|
+
per_segment_agg, segment_level_aggregate
|
|
620
|
+
)
|
|
621
|
+
else:
|
|
622
|
+
aggregated_results[segment_cat_str][
|
|
623
|
+
f"{aggregate}_{arm_swing_parameter}"
|
|
624
|
+
] = aggregate_parameter(
|
|
625
|
+
df_arm_swing_params_cat[arm_swing_parameter], aggregate
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
else:
|
|
629
|
+
# If no segments are found for this category, initialize with NaN
|
|
630
|
+
aggregated_results[segment_cat_str] = {
|
|
631
|
+
"duration_s": 0,
|
|
632
|
+
}
|
|
536
633
|
|
|
537
634
|
return aggregated_results
|
|
538
635
|
|
|
539
636
|
|
|
540
637
|
def extract_temporal_domain_features(
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
638
|
+
config,
|
|
639
|
+
windowed_acc: np.ndarray,
|
|
640
|
+
windowed_grav: np.ndarray,
|
|
641
|
+
grav_stats: List[str] = ["mean"],
|
|
642
|
+
) -> pd.DataFrame:
|
|
546
643
|
"""
|
|
547
644
|
Compute temporal domain features for the accelerometer signal.
|
|
548
645
|
|
|
549
|
-
This function calculates various statistical features for the gravity signal
|
|
646
|
+
This function calculates various statistical features for the gravity signal
|
|
550
647
|
and computes the standard deviation of the accelerometer's Euclidean norm.
|
|
551
648
|
|
|
552
649
|
Parameters
|
|
@@ -554,10 +651,10 @@ def extract_temporal_domain_features(
|
|
|
554
651
|
config : object
|
|
555
652
|
Configuration object containing the accelerometer and gravity column names.
|
|
556
653
|
windowed_acc : numpy.ndarray
|
|
557
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
654
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
558
655
|
the number of accelerometer values per window.
|
|
559
656
|
windowed_grav : numpy.ndarray
|
|
560
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
657
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
561
658
|
the number of gravity signal values per window.
|
|
562
659
|
grav_stats : list of str, optional
|
|
563
660
|
A list of statistics to compute for the gravity signal (default is ['mean']).
|
|
@@ -565,32 +662,34 @@ def extract_temporal_domain_features(
|
|
|
565
662
|
Returns
|
|
566
663
|
-------
|
|
567
664
|
pd.DataFrame
|
|
568
|
-
A DataFrame containing the computed features, with each row corresponding
|
|
665
|
+
A DataFrame containing the computed features, with each row corresponding
|
|
569
666
|
to a window and each column representing a specific feature.
|
|
570
667
|
"""
|
|
571
668
|
# Compute gravity statistics (e.g., mean, std, etc.)
|
|
572
669
|
feature_dict = {}
|
|
573
670
|
for stat in grav_stats:
|
|
574
671
|
stats_result = compute_statistics(data=windowed_grav, statistic=stat)
|
|
575
|
-
for i, col in enumerate(config.
|
|
576
|
-
feature_dict[f
|
|
672
|
+
for i, col in enumerate(config.gravity_colnames):
|
|
673
|
+
feature_dict[f"{col}_{stat}"] = stats_result[:, i]
|
|
577
674
|
|
|
578
675
|
# Compute standard deviation of the Euclidean norm of the accelerometer signal
|
|
579
|
-
feature_dict[
|
|
676
|
+
feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
|
|
677
|
+
data=windowed_acc
|
|
678
|
+
)
|
|
580
679
|
|
|
581
680
|
return pd.DataFrame(feature_dict)
|
|
582
681
|
|
|
583
682
|
|
|
584
683
|
def extract_spectral_domain_features(
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
684
|
+
windowed_data: np.ndarray,
|
|
685
|
+
config,
|
|
686
|
+
sensor: str,
|
|
687
|
+
) -> pd.DataFrame:
|
|
589
688
|
"""
|
|
590
689
|
Compute spectral domain features for a sensor's data.
|
|
591
690
|
|
|
592
|
-
This function computes the periodogram, extracts power in specific frequency bands,
|
|
593
|
-
calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
|
|
691
|
+
This function computes the periodogram, extracts power in specific frequency bands,
|
|
692
|
+
calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
|
|
594
693
|
for a given sensor's windowed data.
|
|
595
694
|
|
|
596
695
|
Parameters
|
|
@@ -599,16 +698,16 @@ def extract_spectral_domain_features(
|
|
|
599
698
|
A 2D numpy array where each row corresponds to a window of sensor data.
|
|
600
699
|
|
|
601
700
|
config : object
|
|
602
|
-
Configuration object containing settings such as sampling frequency, window type,
|
|
701
|
+
Configuration object containing settings such as sampling frequency, window type,
|
|
603
702
|
frequency bands, and MFCC parameters.
|
|
604
703
|
|
|
605
704
|
sensor : str
|
|
606
705
|
The name of the sensor (e.g., 'accelerometer', 'gyroscope').
|
|
607
|
-
|
|
706
|
+
|
|
608
707
|
Returns
|
|
609
708
|
-------
|
|
610
709
|
pd.DataFrame
|
|
611
|
-
A DataFrame containing the computed spectral features, with each row corresponding
|
|
710
|
+
A DataFrame containing the computed spectral features, with each row corresponding
|
|
612
711
|
to a window and each column representing a specific feature.
|
|
613
712
|
"""
|
|
614
713
|
# Initialize a dictionary to hold the results
|
|
@@ -616,49 +715,46 @@ def extract_spectral_domain_features(
|
|
|
616
715
|
|
|
617
716
|
# Compute periodogram (power spectral density)
|
|
618
717
|
freqs, psd = periodogram(
|
|
619
|
-
x=windowed_data,
|
|
620
|
-
fs=config.sampling_frequency,
|
|
621
|
-
window=config.window_type,
|
|
622
|
-
axis=1
|
|
718
|
+
x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
|
|
623
719
|
)
|
|
624
720
|
|
|
625
721
|
# Compute power in specified frequency bands
|
|
626
722
|
for band_name, band_freqs in config.d_frequency_bandwidths.items():
|
|
627
723
|
band_powers = compute_power_in_bandwidth(
|
|
628
724
|
freqs=freqs,
|
|
629
|
-
psd=psd,
|
|
725
|
+
psd=psd,
|
|
630
726
|
fmin=band_freqs[0],
|
|
631
727
|
fmax=band_freqs[1],
|
|
632
|
-
include_max=False
|
|
728
|
+
include_max=False,
|
|
633
729
|
)
|
|
634
730
|
for i, col in enumerate(config.axes):
|
|
635
|
-
feature_dict[f
|
|
731
|
+
feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
|
|
636
732
|
|
|
637
733
|
# Compute dominant frequency for each axis
|
|
638
734
|
dominant_frequencies = compute_dominant_frequency(
|
|
639
|
-
freqs=freqs,
|
|
640
|
-
psd=psd,
|
|
641
|
-
fmin=config.spectrum_low_frequency,
|
|
642
|
-
fmax=config.spectrum_high_frequency
|
|
735
|
+
freqs=freqs,
|
|
736
|
+
psd=psd,
|
|
737
|
+
fmin=config.spectrum_low_frequency,
|
|
738
|
+
fmax=config.spectrum_high_frequency,
|
|
643
739
|
)
|
|
644
740
|
|
|
645
741
|
# Add dominant frequency features to the feature_dict
|
|
646
742
|
for axis, freq in zip(config.axes, dominant_frequencies.T):
|
|
647
|
-
feature_dict[f
|
|
743
|
+
feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
|
|
648
744
|
|
|
649
745
|
# Compute total power in the PSD
|
|
650
746
|
total_power_psd = compute_total_power(psd)
|
|
651
747
|
|
|
652
748
|
# Compute MFCCs
|
|
653
749
|
mfccs = compute_mfccs(
|
|
654
|
-
total_power_array=total_power_psd,
|
|
655
|
-
config=config,
|
|
656
|
-
multiplication_factor=4
|
|
750
|
+
total_power_array=total_power_psd, config=config, multiplication_factor=4
|
|
657
751
|
)
|
|
658
752
|
|
|
659
753
|
# Combine the MFCCs into the features DataFrame
|
|
660
|
-
mfcc_colnames = [
|
|
754
|
+
mfcc_colnames = [
|
|
755
|
+
f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
|
|
756
|
+
]
|
|
661
757
|
for i, colname in enumerate(mfcc_colnames):
|
|
662
758
|
feature_dict[colname] = mfccs[:, i]
|
|
663
759
|
|
|
664
|
-
return pd.DataFrame(feature_dict)
|
|
760
|
+
return pd.DataFrame(feature_dict)
|