paradigma 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +157 -102
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +232 -184
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +138 -85
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +158 -83
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/METADATA +31 -29
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/RECORD +0 -22
|
@@ -1,30 +1,43 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
2
4
|
import numpy as np
|
|
3
5
|
import pandas as pd
|
|
4
6
|
from scipy.signal import periodogram
|
|
5
|
-
from typing import List, Tuple
|
|
6
7
|
|
|
7
8
|
from paradigma.classification import ClassifierPackage
|
|
8
|
-
from paradigma.constants import DataColumns
|
|
9
9
|
from paradigma.config import GaitConfig
|
|
10
|
-
from paradigma.
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
from paradigma.constants import DataColumns
|
|
11
|
+
from paradigma.feature_extraction import (
|
|
12
|
+
compute_angle,
|
|
13
|
+
compute_dominant_frequency,
|
|
14
|
+
compute_mfccs,
|
|
15
|
+
compute_peak_angular_velocity,
|
|
16
|
+
compute_power_in_bandwidth,
|
|
17
|
+
compute_range_of_motion,
|
|
18
|
+
compute_statistics,
|
|
19
|
+
compute_std_euclidean_norm,
|
|
20
|
+
compute_total_power,
|
|
21
|
+
extract_angle_extremes,
|
|
22
|
+
pca_transform_gyroscope,
|
|
23
|
+
remove_moving_average_angle,
|
|
24
|
+
)
|
|
25
|
+
from paradigma.segmenting import (
|
|
26
|
+
WindowedDataExtractor,
|
|
27
|
+
create_segments,
|
|
28
|
+
discard_segments,
|
|
29
|
+
tabulate_windows,
|
|
30
|
+
)
|
|
15
31
|
from paradigma.util import aggregate_parameter
|
|
16
32
|
|
|
17
|
-
|
|
18
33
|
logger = logging.getLogger(__name__)
|
|
19
34
|
|
|
20
35
|
# Only configure basic logging if no handlers exist
|
|
21
36
|
if not logger.hasHandlers():
|
|
22
|
-
logging.basicConfig(level=logging.INFO, format=
|
|
37
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
38
|
+
|
|
23
39
|
|
|
24
|
-
def extract_gait_features(
|
|
25
|
-
df: pd.DataFrame,
|
|
26
|
-
config: GaitConfig
|
|
27
|
-
) -> pd.DataFrame:
|
|
40
|
+
def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
28
41
|
"""
|
|
29
42
|
Extracts gait features from accelerometer and gravity sensor data in the input DataFrame by computing temporal and spectral features.
|
|
30
43
|
|
|
@@ -51,7 +64,7 @@ def extract_gait_features(
|
|
|
51
64
|
A DataFrame containing extracted gait features, including temporal and spectral domain features. The DataFrame will have
|
|
52
65
|
columns corresponding to time, statistical features of the accelerometer and gravity data, and spectral features of the
|
|
53
66
|
accelerometer data.
|
|
54
|
-
|
|
67
|
+
|
|
55
68
|
Notes
|
|
56
69
|
-----
|
|
57
70
|
- This function groups the data into windows based on timestamps and applies Fast Fourier Transform to compute spectral features.
|
|
@@ -64,34 +77,36 @@ def extract_gait_features(
|
|
|
64
77
|
If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
|
|
65
78
|
"""
|
|
66
79
|
# Group sequences of timestamps into windows
|
|
67
|
-
|
|
80
|
+
windowed_colnames = (
|
|
81
|
+
[config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
|
|
82
|
+
)
|
|
68
83
|
windowed_data = tabulate_windows(
|
|
69
|
-
df=df,
|
|
70
|
-
columns=
|
|
84
|
+
df=df,
|
|
85
|
+
columns=windowed_colnames,
|
|
71
86
|
window_length_s=config.window_length_s,
|
|
72
87
|
window_step_length_s=config.window_step_length_s,
|
|
73
|
-
fs=config.sampling_frequency
|
|
88
|
+
fs=config.sampling_frequency,
|
|
74
89
|
)
|
|
75
90
|
|
|
76
|
-
extractor = WindowedDataExtractor(
|
|
91
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
77
92
|
|
|
78
|
-
idx_time = extractor.get_index(
|
|
79
|
-
idx_acc = extractor.get_slice(config.
|
|
80
|
-
idx_grav = extractor.get_slice(config.
|
|
93
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
94
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
95
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
81
96
|
|
|
82
97
|
# Extract data
|
|
83
98
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
84
99
|
windowed_acc = windowed_data[:, :, idx_acc]
|
|
85
100
|
windowed_grav = windowed_data[:, :, idx_grav]
|
|
86
101
|
|
|
87
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
88
|
-
|
|
102
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
103
|
+
|
|
89
104
|
# Compute statistics of the temporal domain signals (mean, std) for accelerometer and gravity
|
|
90
105
|
df_temporal_features = extract_temporal_domain_features(
|
|
91
|
-
config=config,
|
|
106
|
+
config=config,
|
|
92
107
|
windowed_acc=windowed_acc,
|
|
93
108
|
windowed_grav=windowed_grav,
|
|
94
|
-
grav_stats=[
|
|
109
|
+
grav_stats=["mean", "std"],
|
|
95
110
|
)
|
|
96
111
|
|
|
97
112
|
# Combine temporal features with the start time
|
|
@@ -99,9 +114,7 @@ def extract_gait_features(
|
|
|
99
114
|
|
|
100
115
|
# Transform the accelerometer data to the spectral domain using FFT and extract spectral features
|
|
101
116
|
df_spectral_features = extract_spectral_domain_features(
|
|
102
|
-
config=config,
|
|
103
|
-
sensor='accelerometer',
|
|
104
|
-
windowed_data=windowed_acc
|
|
117
|
+
config=config, sensor="accelerometer", windowed_data=windowed_acc
|
|
105
118
|
)
|
|
106
119
|
|
|
107
120
|
# Combine the spectral features with the previously computed temporal features
|
|
@@ -111,10 +124,8 @@ def extract_gait_features(
|
|
|
111
124
|
|
|
112
125
|
|
|
113
126
|
def detect_gait(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
parallel: bool=False
|
|
117
|
-
) -> pd.Series:
|
|
127
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
128
|
+
) -> pd.Series:
|
|
118
129
|
"""
|
|
119
130
|
Detects gait activity in the input DataFrame using a pre-trained classifier and applies a threshold to classify results.
|
|
120
131
|
|
|
@@ -128,7 +139,7 @@ def detect_gait(
|
|
|
128
139
|
Parameters
|
|
129
140
|
----------
|
|
130
141
|
df : pd.DataFrame
|
|
131
|
-
The input DataFrame containing features extracted from gait data. It must include the necessary columns
|
|
142
|
+
The input DataFrame containing features extracted from gait data. It must include the necessary columns
|
|
132
143
|
as specified in the classifier's feature names.
|
|
133
144
|
|
|
134
145
|
clf_package : ClassifierPackage
|
|
@@ -144,7 +155,7 @@ def detect_gait(
|
|
|
144
155
|
"""
|
|
145
156
|
# Set classifier
|
|
146
157
|
clf = clf_package.classifier
|
|
147
|
-
if not parallel and hasattr(clf,
|
|
158
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
148
159
|
clf.n_jobs = 1
|
|
149
160
|
|
|
150
161
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -164,13 +175,13 @@ def detect_gait(
|
|
|
164
175
|
|
|
165
176
|
|
|
166
177
|
def extract_arm_activity_features(
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
178
|
+
df: pd.DataFrame,
|
|
179
|
+
config: GaitConfig,
|
|
180
|
+
) -> pd.DataFrame:
|
|
170
181
|
"""
|
|
171
182
|
Extract features related to arm activity from a time-series DataFrame.
|
|
172
183
|
|
|
173
|
-
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
184
|
+
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
174
185
|
and extracts features related to arm activity by performing the following steps:
|
|
175
186
|
1. Computes the angle and velocity from gyroscope data.
|
|
176
187
|
2. Filters the data to include only predicted gait segments.
|
|
@@ -190,13 +201,12 @@ def extract_arm_activity_features(
|
|
|
190
201
|
Returns
|
|
191
202
|
-------
|
|
192
203
|
pd.DataFrame
|
|
193
|
-
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
204
|
+
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
194
205
|
temporal, and spectral features.
|
|
195
206
|
"""
|
|
196
207
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
|
|
197
208
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
198
|
-
time_array=df[DataColumns.TIME],
|
|
199
|
-
max_segment_gap_s=config.max_segment_gap_s
|
|
209
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
|
|
200
210
|
)
|
|
201
211
|
|
|
202
212
|
# Remove segments that do not meet predetermined criteria
|
|
@@ -205,27 +215,27 @@ def extract_arm_activity_features(
|
|
|
205
215
|
segment_nr_colname=DataColumns.SEGMENT_NR,
|
|
206
216
|
min_segment_length_s=config.min_segment_length_s,
|
|
207
217
|
fs=config.sampling_frequency,
|
|
208
|
-
format=
|
|
218
|
+
format="timestamps",
|
|
209
219
|
)
|
|
210
220
|
|
|
211
221
|
# Create windows of fixed length and step size from the time series per segment
|
|
212
222
|
windowed_data = []
|
|
213
223
|
df_grouped = df.groupby(DataColumns.SEGMENT_NR)
|
|
214
|
-
|
|
215
|
-
[
|
|
216
|
-
config.
|
|
217
|
-
config.
|
|
218
|
-
config.
|
|
224
|
+
windowed_colnames = (
|
|
225
|
+
[config.time_colname]
|
|
226
|
+
+ config.accelerometer_colnames
|
|
227
|
+
+ config.gravity_colnames
|
|
228
|
+
+ config.gyroscope_colnames
|
|
219
229
|
)
|
|
220
230
|
|
|
221
231
|
# Collect windows from all segments in a list for faster concatenation
|
|
222
232
|
for _, group in df_grouped:
|
|
223
233
|
windows = tabulate_windows(
|
|
224
|
-
df=group,
|
|
225
|
-
columns=
|
|
234
|
+
df=group,
|
|
235
|
+
columns=windowed_colnames,
|
|
226
236
|
window_length_s=config.window_length_s,
|
|
227
237
|
window_step_length_s=config.window_step_length_s,
|
|
228
|
-
fs=config.sampling_frequency
|
|
238
|
+
fs=config.sampling_frequency,
|
|
229
239
|
)
|
|
230
240
|
if len(windows) > 0: # Skip if no windows are created
|
|
231
241
|
windowed_data.append(windows)
|
|
@@ -239,12 +249,12 @@ def extract_arm_activity_features(
|
|
|
239
249
|
windowed_data = np.concatenate(windowed_data, axis=0)
|
|
240
250
|
|
|
241
251
|
# Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
|
|
242
|
-
extractor = WindowedDataExtractor(
|
|
252
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
243
253
|
|
|
244
|
-
idx_time = extractor.get_index(
|
|
245
|
-
idx_acc = extractor.get_slice(config.
|
|
246
|
-
idx_grav = extractor.get_slice(config.
|
|
247
|
-
idx_gyro = extractor.get_slice(config.
|
|
254
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
255
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
256
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
257
|
+
idx_gyro = extractor.get_slice(config.gyroscope_colnames)
|
|
248
258
|
|
|
249
259
|
# Extract data
|
|
250
260
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
@@ -253,23 +263,23 @@ def extract_arm_activity_features(
|
|
|
253
263
|
windowed_gyro = windowed_data[:, :, idx_gyro]
|
|
254
264
|
|
|
255
265
|
# Initialize DataFrame for features
|
|
256
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
266
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
257
267
|
|
|
258
268
|
# Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
|
|
259
269
|
df_temporal_features = extract_temporal_domain_features(
|
|
260
|
-
config=config,
|
|
261
|
-
windowed_acc=windowed_acc,
|
|
262
|
-
windowed_grav=windowed_grav,
|
|
263
|
-
grav_stats=[
|
|
270
|
+
config=config,
|
|
271
|
+
windowed_acc=windowed_acc,
|
|
272
|
+
windowed_grav=windowed_grav,
|
|
273
|
+
grav_stats=["mean", "std"],
|
|
264
274
|
)
|
|
265
275
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
266
276
|
|
|
267
277
|
# Extract spectral domain features for accelerometer and gyroscope signals
|
|
268
|
-
for sensor_name, windowed_sensor in zip(
|
|
278
|
+
for sensor_name, windowed_sensor in zip(
|
|
279
|
+
["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
|
|
280
|
+
):
|
|
269
281
|
df_spectral_features = extract_spectral_domain_features(
|
|
270
|
-
config=config,
|
|
271
|
-
sensor=sensor_name,
|
|
272
|
-
windowed_data=windowed_sensor
|
|
282
|
+
config=config, sensor=sensor_name, windowed_data=windowed_sensor
|
|
273
283
|
)
|
|
274
284
|
df_features = pd.concat([df_features, df_spectral_features], axis=1)
|
|
275
285
|
|
|
@@ -277,10 +287,8 @@ def extract_arm_activity_features(
|
|
|
277
287
|
|
|
278
288
|
|
|
279
289
|
def filter_gait(
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
parallel: bool=False
|
|
283
|
-
) -> pd.Series:
|
|
290
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
291
|
+
) -> pd.Series:
|
|
284
292
|
"""
|
|
285
293
|
Filters gait data to identify windows with no other arm activity using a pre-trained classifier.
|
|
286
294
|
|
|
@@ -300,10 +308,10 @@ def filter_gait(
|
|
|
300
308
|
"""
|
|
301
309
|
if df.shape[0] == 0:
|
|
302
310
|
raise ValueError("No data found in the input DataFrame.")
|
|
303
|
-
|
|
311
|
+
|
|
304
312
|
# Set classifier
|
|
305
313
|
clf = clf_package.classifier
|
|
306
|
-
if not parallel and hasattr(clf,
|
|
314
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
307
315
|
clf.n_jobs = 1
|
|
308
316
|
|
|
309
317
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -323,12 +331,12 @@ def filter_gait(
|
|
|
323
331
|
|
|
324
332
|
|
|
325
333
|
def quantify_arm_swing(
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
334
|
+
df: pd.DataFrame,
|
|
335
|
+
fs: int,
|
|
336
|
+
filtered: bool = False,
|
|
337
|
+
max_segment_gap_s: float = 1.5,
|
|
338
|
+
min_segment_length_s: float = 1.5,
|
|
339
|
+
) -> Tuple[dict[str, pd.DataFrame], dict]:
|
|
332
340
|
"""
|
|
333
341
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
334
342
|
|
|
@@ -346,72 +354,75 @@ def quantify_arm_swing(
|
|
|
346
354
|
|
|
347
355
|
max_segment_gap_s : float, optional, default=1.5
|
|
348
356
|
The maximum gap in seconds between consecutive timestamps to group them into segments.
|
|
349
|
-
|
|
357
|
+
|
|
350
358
|
min_segment_length_s : float, optional, default=1.5
|
|
351
359
|
The minimum length in seconds for a segment to be considered valid.
|
|
352
360
|
|
|
353
361
|
Returns
|
|
354
362
|
-------
|
|
355
363
|
Tuple[pd.DataFrame, dict]
|
|
356
|
-
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
364
|
+
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
357
365
|
metadata for each segment.
|
|
358
366
|
"""
|
|
359
367
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
|
|
360
368
|
# Segments are made based on predicted gait
|
|
361
|
-
df[
|
|
362
|
-
time_array=df[DataColumns.TIME],
|
|
363
|
-
max_segment_gap_s=max_segment_gap_s
|
|
369
|
+
df["unfiltered_segment_nr"] = create_segments(
|
|
370
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
364
371
|
)
|
|
365
372
|
|
|
366
373
|
# Remove segments that do not meet predetermined criteria
|
|
367
374
|
df = discard_segments(
|
|
368
375
|
df=df,
|
|
369
|
-
segment_nr_colname=
|
|
376
|
+
segment_nr_colname="unfiltered_segment_nr",
|
|
370
377
|
min_segment_length_s=min_segment_length_s,
|
|
371
378
|
fs=fs,
|
|
372
|
-
format=
|
|
379
|
+
format="timestamps",
|
|
373
380
|
)
|
|
374
381
|
|
|
375
382
|
if df.empty:
|
|
376
|
-
raise ValueError(
|
|
377
|
-
|
|
383
|
+
raise ValueError(
|
|
384
|
+
"No segments found in the input data after discarding segments of invalid shape."
|
|
385
|
+
)
|
|
386
|
+
|
|
378
387
|
# Create dictionary of gait segment number and duration
|
|
379
|
-
gait_segment_duration_dict = {
|
|
380
|
-
|
|
388
|
+
gait_segment_duration_dict = {
|
|
389
|
+
segment_nr: len(group[DataColumns.TIME]) / fs
|
|
390
|
+
for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
|
|
391
|
+
}
|
|
392
|
+
|
|
381
393
|
# If no arm swing data is remaining, return an empty dictionary
|
|
382
|
-
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
|
|
394
|
+
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
|
|
383
395
|
raise ValueError("No gait without other arm activities to quantify.")
|
|
384
396
|
elif filtered:
|
|
385
397
|
# Filter the DataFrame to only include predicted no other arm activity (1)
|
|
386
|
-
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(
|
|
398
|
+
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
|
|
399
|
+
drop=True
|
|
400
|
+
)
|
|
387
401
|
|
|
388
402
|
# Group consecutive timestamps into segments of filtered gait
|
|
389
|
-
df[
|
|
390
|
-
time_array=df[DataColumns.TIME],
|
|
391
|
-
max_segment_gap_s=max_segment_gap_s
|
|
403
|
+
df["filtered_segment_nr"] = create_segments(
|
|
404
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
392
405
|
)
|
|
393
406
|
|
|
394
407
|
# Remove segments that do not meet predetermined criteria
|
|
395
408
|
df = discard_segments(
|
|
396
409
|
df=df,
|
|
397
|
-
segment_nr_colname=
|
|
410
|
+
segment_nr_colname="filtered_segment_nr",
|
|
398
411
|
min_segment_length_s=min_segment_length_s,
|
|
399
412
|
fs=fs,
|
|
400
413
|
)
|
|
401
414
|
|
|
402
415
|
if df.empty:
|
|
403
|
-
raise ValueError(
|
|
404
|
-
|
|
405
|
-
|
|
416
|
+
raise ValueError(
|
|
417
|
+
"No filtered gait segments found in the input data after discarding segments of invalid shape."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
|
|
406
421
|
|
|
407
422
|
arm_swing_quantified = []
|
|
408
423
|
segment_meta = {
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
'duration_s': len(df[DataColumns.TIME]) / fs
|
|
412
|
-
},
|
|
413
|
-
},
|
|
414
|
-
'per_segment': {}
|
|
424
|
+
"all": {"duration_s": len(df[DataColumns.TIME]) / fs},
|
|
425
|
+
"per_segment": {},
|
|
415
426
|
}
|
|
416
427
|
|
|
417
428
|
# PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
|
|
@@ -425,7 +436,9 @@ def quantify_arm_swing(
|
|
|
425
436
|
# Group and process segments
|
|
426
437
|
for segment_nr, group in df.groupby(grouping_colname, sort=False):
|
|
427
438
|
if filtered:
|
|
428
|
-
gait_segment_nr = group[
|
|
439
|
+
gait_segment_nr = group["unfiltered_segment_nr"].iloc[
|
|
440
|
+
0
|
|
441
|
+
] # Each filtered segment is contained within an unfiltered segment
|
|
429
442
|
else:
|
|
430
443
|
gait_segment_nr = segment_nr
|
|
431
444
|
|
|
@@ -434,9 +447,12 @@ def quantify_arm_swing(
|
|
|
434
447
|
except KeyError:
|
|
435
448
|
logger.warning(
|
|
436
449
|
"Segment %s (filtered = %s) not found in gait segment duration dictionary. Skipping this segment.",
|
|
437
|
-
gait_segment_nr,
|
|
450
|
+
gait_segment_nr,
|
|
451
|
+
filtered,
|
|
452
|
+
)
|
|
453
|
+
logger.debug(
|
|
454
|
+
"Available segments: %s", list(gait_segment_duration_dict.keys())
|
|
438
455
|
)
|
|
439
|
-
logger.debug("Available segments: %s", list(gait_segment_duration_dict.keys()))
|
|
440
456
|
continue
|
|
441
457
|
|
|
442
458
|
time_array = group[DataColumns.TIME].to_numpy()
|
|
@@ -454,20 +470,22 @@ def quantify_arm_swing(
|
|
|
454
470
|
fs=fs,
|
|
455
471
|
)
|
|
456
472
|
|
|
457
|
-
segment_meta[
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
473
|
+
segment_meta["per_segment"][segment_nr] = {
|
|
474
|
+
"start_time_s": time_array.min(),
|
|
475
|
+
"end_time_s": time_array.max(),
|
|
476
|
+
"duration_unfiltered_segment_s": gait_segment_duration_s,
|
|
461
477
|
}
|
|
462
478
|
|
|
463
479
|
if filtered:
|
|
464
|
-
segment_meta[
|
|
480
|
+
segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
|
|
481
|
+
len(time_array) / fs
|
|
482
|
+
)
|
|
465
483
|
|
|
466
|
-
if angle_array.size > 0:
|
|
484
|
+
if angle_array.size > 0:
|
|
467
485
|
angle_extrema_indices, _, _ = extract_angle_extremes(
|
|
468
486
|
angle_array=angle_array,
|
|
469
487
|
sampling_frequency=fs,
|
|
470
|
-
max_frequency_activity=1.75
|
|
488
|
+
max_frequency_activity=1.75,
|
|
471
489
|
)
|
|
472
490
|
|
|
473
491
|
if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
|
|
@@ -478,36 +496,47 @@ def quantify_arm_swing(
|
|
|
478
496
|
)
|
|
479
497
|
except Exception as e:
|
|
480
498
|
# Handle the error, set RoM to NaN, and log the error
|
|
481
|
-
print(
|
|
499
|
+
print(
|
|
500
|
+
f"Error computing range of motion for segment {segment_nr}: {e}"
|
|
501
|
+
)
|
|
482
502
|
rom = np.array([np.nan])
|
|
483
503
|
|
|
484
504
|
try:
|
|
485
505
|
pav = compute_peak_angular_velocity(
|
|
486
506
|
velocity_array=velocity_array,
|
|
487
|
-
angle_extrema_indices=angle_extrema_indices
|
|
507
|
+
angle_extrema_indices=angle_extrema_indices,
|
|
488
508
|
)
|
|
489
509
|
except Exception as e:
|
|
490
510
|
# Handle the error, set pav to NaN, and log the error
|
|
491
|
-
print(
|
|
511
|
+
print(
|
|
512
|
+
f"Error computing peak angular velocity for segment {segment_nr}: {e}"
|
|
513
|
+
)
|
|
492
514
|
pav = np.array([np.nan])
|
|
493
515
|
|
|
494
|
-
df_params_segment = pd.DataFrame(
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
516
|
+
df_params_segment = pd.DataFrame(
|
|
517
|
+
{
|
|
518
|
+
DataColumns.SEGMENT_NR: segment_nr,
|
|
519
|
+
DataColumns.RANGE_OF_MOTION: rom,
|
|
520
|
+
DataColumns.PEAK_VELOCITY: pav,
|
|
521
|
+
}
|
|
522
|
+
)
|
|
499
523
|
|
|
500
524
|
arm_swing_quantified.append(df_params_segment)
|
|
501
525
|
|
|
502
526
|
arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
|
|
503
|
-
|
|
527
|
+
|
|
504
528
|
return arm_swing_quantified, segment_meta
|
|
505
529
|
|
|
506
530
|
|
|
507
|
-
def aggregate_arm_swing_params(
|
|
531
|
+
def aggregate_arm_swing_params(
|
|
532
|
+
df_arm_swing_params: pd.DataFrame,
|
|
533
|
+
segment_meta: dict,
|
|
534
|
+
segment_cats: List[tuple],
|
|
535
|
+
aggregates: List[str] = ["median"],
|
|
536
|
+
) -> dict:
|
|
508
537
|
"""
|
|
509
538
|
Aggregate the quantification results for arm swing parameters.
|
|
510
|
-
|
|
539
|
+
|
|
511
540
|
Parameters
|
|
512
541
|
----------
|
|
513
542
|
df_arm_swing_params : pd.DataFrame
|
|
@@ -518,10 +547,9 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
518
547
|
|
|
519
548
|
segment_cats : List[tuple]
|
|
520
549
|
A list of tuples defining the segment categories, where each tuple contains the lower and upper bounds for the segment duration.
|
|
521
|
-
|
|
522
550
|
aggregates : List[str], optional
|
|
523
551
|
A list of aggregation methods to apply to the quantification results.
|
|
524
|
-
|
|
552
|
+
|
|
525
553
|
Returns
|
|
526
554
|
-------
|
|
527
555
|
dict
|
|
@@ -531,70 +559,91 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
531
559
|
|
|
532
560
|
aggregated_results = {}
|
|
533
561
|
for segment_cat_range in segment_cats:
|
|
534
|
-
segment_cat_str = f
|
|
562
|
+
segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
|
|
535
563
|
cat_segments = [
|
|
536
|
-
x
|
|
537
|
-
|
|
538
|
-
|
|
564
|
+
x
|
|
565
|
+
for x in segment_meta.keys()
|
|
566
|
+
if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
|
|
567
|
+
and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
|
|
539
568
|
]
|
|
540
569
|
|
|
541
|
-
if len(cat_segments) > 0:
|
|
570
|
+
if len(cat_segments) > 0:
|
|
542
571
|
# For each segment, use 'duration_filtered_segment_s' if present, else 'duration_unfiltered_segment_s'
|
|
543
572
|
aggregated_results[segment_cat_str] = {
|
|
544
|
-
|
|
573
|
+
"duration_s": sum(
|
|
545
574
|
[
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
575
|
+
(
|
|
576
|
+
segment_meta[x]["duration_filtered_segment_s"]
|
|
577
|
+
if "duration_filtered_segment_s" in segment_meta[x]
|
|
578
|
+
else segment_meta[x]["duration_unfiltered_segment_s"]
|
|
579
|
+
)
|
|
549
580
|
for x in cat_segments
|
|
550
581
|
]
|
|
551
|
-
)
|
|
582
|
+
)
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
df_arm_swing_params_cat = df_arm_swing_params.loc[
|
|
586
|
+
df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)
|
|
587
|
+
]
|
|
552
588
|
|
|
553
|
-
df_arm_swing_params_cat = df_arm_swing_params.loc[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
|
|
554
|
-
|
|
555
589
|
# Aggregate across all segments
|
|
556
|
-
aggregates_per_segment = [
|
|
590
|
+
aggregates_per_segment = ["median", "mean"]
|
|
557
591
|
|
|
558
592
|
for arm_swing_parameter in arm_swing_parameters:
|
|
559
593
|
for aggregate in aggregates:
|
|
560
|
-
if aggregate in [
|
|
594
|
+
if aggregate in ["std", "cov"]:
|
|
561
595
|
per_segment_agg = []
|
|
562
596
|
# If the aggregate is 'cov' (coefficient of variation), we also compute the mean and standard deviation per segment
|
|
563
|
-
segment_groups = dict(
|
|
597
|
+
segment_groups = dict(
|
|
598
|
+
tuple(
|
|
599
|
+
df_arm_swing_params_cat.groupby(DataColumns.SEGMENT_NR)
|
|
600
|
+
)
|
|
601
|
+
)
|
|
564
602
|
for segment_nr in cat_segments:
|
|
565
603
|
segment_df = segment_groups.get(segment_nr)
|
|
566
604
|
if segment_df is not None:
|
|
567
|
-
per_segment_agg.append(
|
|
605
|
+
per_segment_agg.append(
|
|
606
|
+
aggregate_parameter(
|
|
607
|
+
segment_df[arm_swing_parameter], aggregate
|
|
608
|
+
)
|
|
609
|
+
)
|
|
568
610
|
|
|
569
611
|
# Drop nans
|
|
570
612
|
per_segment_agg = np.array(per_segment_agg)
|
|
571
613
|
per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
|
|
572
614
|
|
|
573
|
-
|
|
574
615
|
for segment_level_aggregate in aggregates_per_segment:
|
|
575
|
-
aggregated_results[segment_cat_str][
|
|
616
|
+
aggregated_results[segment_cat_str][
|
|
617
|
+
f"{segment_level_aggregate}_{aggregate}_{arm_swing_parameter}"
|
|
618
|
+
] = aggregate_parameter(
|
|
619
|
+
per_segment_agg, segment_level_aggregate
|
|
620
|
+
)
|
|
576
621
|
else:
|
|
577
|
-
aggregated_results[segment_cat_str][
|
|
622
|
+
aggregated_results[segment_cat_str][
|
|
623
|
+
f"{aggregate}_{arm_swing_parameter}"
|
|
624
|
+
] = aggregate_parameter(
|
|
625
|
+
df_arm_swing_params_cat[arm_swing_parameter], aggregate
|
|
626
|
+
)
|
|
578
627
|
|
|
579
628
|
else:
|
|
580
629
|
# If no segments are found for this category, initialize with NaN
|
|
581
630
|
aggregated_results[segment_cat_str] = {
|
|
582
|
-
|
|
631
|
+
"duration_s": 0,
|
|
583
632
|
}
|
|
584
633
|
|
|
585
634
|
return aggregated_results
|
|
586
635
|
|
|
587
636
|
|
|
588
637
|
def extract_temporal_domain_features(
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
638
|
+
config,
|
|
639
|
+
windowed_acc: np.ndarray,
|
|
640
|
+
windowed_grav: np.ndarray,
|
|
641
|
+
grav_stats: List[str] = ["mean"],
|
|
642
|
+
) -> pd.DataFrame:
|
|
594
643
|
"""
|
|
595
644
|
Compute temporal domain features for the accelerometer signal.
|
|
596
645
|
|
|
597
|
-
This function calculates various statistical features for the gravity signal
|
|
646
|
+
This function calculates various statistical features for the gravity signal
|
|
598
647
|
and computes the standard deviation of the accelerometer's Euclidean norm.
|
|
599
648
|
|
|
600
649
|
Parameters
|
|
@@ -602,10 +651,10 @@ def extract_temporal_domain_features(
|
|
|
602
651
|
config : object
|
|
603
652
|
Configuration object containing the accelerometer and gravity column names.
|
|
604
653
|
windowed_acc : numpy.ndarray
|
|
605
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
654
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
606
655
|
the number of accelerometer values per window.
|
|
607
656
|
windowed_grav : numpy.ndarray
|
|
608
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
657
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
609
658
|
the number of gravity signal values per window.
|
|
610
659
|
grav_stats : list of str, optional
|
|
611
660
|
A list of statistics to compute for the gravity signal (default is ['mean']).
|
|
@@ -613,32 +662,34 @@ def extract_temporal_domain_features(
|
|
|
613
662
|
Returns
|
|
614
663
|
-------
|
|
615
664
|
pd.DataFrame
|
|
616
|
-
A DataFrame containing the computed features, with each row corresponding
|
|
665
|
+
A DataFrame containing the computed features, with each row corresponding
|
|
617
666
|
to a window and each column representing a specific feature.
|
|
618
667
|
"""
|
|
619
668
|
# Compute gravity statistics (e.g., mean, std, etc.)
|
|
620
669
|
feature_dict = {}
|
|
621
670
|
for stat in grav_stats:
|
|
622
671
|
stats_result = compute_statistics(data=windowed_grav, statistic=stat)
|
|
623
|
-
for i, col in enumerate(config.
|
|
624
|
-
feature_dict[f
|
|
672
|
+
for i, col in enumerate(config.gravity_colnames):
|
|
673
|
+
feature_dict[f"{col}_{stat}"] = stats_result[:, i]
|
|
625
674
|
|
|
626
675
|
# Compute standard deviation of the Euclidean norm of the accelerometer signal
|
|
627
|
-
feature_dict[
|
|
676
|
+
feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
|
|
677
|
+
data=windowed_acc
|
|
678
|
+
)
|
|
628
679
|
|
|
629
680
|
return pd.DataFrame(feature_dict)
|
|
630
681
|
|
|
631
682
|
|
|
632
683
|
def extract_spectral_domain_features(
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
684
|
+
windowed_data: np.ndarray,
|
|
685
|
+
config,
|
|
686
|
+
sensor: str,
|
|
687
|
+
) -> pd.DataFrame:
|
|
637
688
|
"""
|
|
638
689
|
Compute spectral domain features for a sensor's data.
|
|
639
690
|
|
|
640
|
-
This function computes the periodogram, extracts power in specific frequency bands,
|
|
641
|
-
calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
|
|
691
|
+
This function computes the periodogram, extracts power in specific frequency bands,
|
|
692
|
+
calculates the dominant frequency, and computes Mel-frequency cepstral coefficients (MFCCs)
|
|
642
693
|
for a given sensor's windowed data.
|
|
643
694
|
|
|
644
695
|
Parameters
|
|
@@ -647,16 +698,16 @@ def extract_spectral_domain_features(
|
|
|
647
698
|
A 2D numpy array where each row corresponds to a window of sensor data.
|
|
648
699
|
|
|
649
700
|
config : object
|
|
650
|
-
Configuration object containing settings such as sampling frequency, window type,
|
|
701
|
+
Configuration object containing settings such as sampling frequency, window type,
|
|
651
702
|
frequency bands, and MFCC parameters.
|
|
652
703
|
|
|
653
704
|
sensor : str
|
|
654
705
|
The name of the sensor (e.g., 'accelerometer', 'gyroscope').
|
|
655
|
-
|
|
706
|
+
|
|
656
707
|
Returns
|
|
657
708
|
-------
|
|
658
709
|
pd.DataFrame
|
|
659
|
-
A DataFrame containing the computed spectral features, with each row corresponding
|
|
710
|
+
A DataFrame containing the computed spectral features, with each row corresponding
|
|
660
711
|
to a window and each column representing a specific feature.
|
|
661
712
|
"""
|
|
662
713
|
# Initialize a dictionary to hold the results
|
|
@@ -664,49 +715,46 @@ def extract_spectral_domain_features(
|
|
|
664
715
|
|
|
665
716
|
# Compute periodogram (power spectral density)
|
|
666
717
|
freqs, psd = periodogram(
|
|
667
|
-
x=windowed_data,
|
|
668
|
-
fs=config.sampling_frequency,
|
|
669
|
-
window=config.window_type,
|
|
670
|
-
axis=1
|
|
718
|
+
x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
|
|
671
719
|
)
|
|
672
720
|
|
|
673
721
|
# Compute power in specified frequency bands
|
|
674
722
|
for band_name, band_freqs in config.d_frequency_bandwidths.items():
|
|
675
723
|
band_powers = compute_power_in_bandwidth(
|
|
676
724
|
freqs=freqs,
|
|
677
|
-
psd=psd,
|
|
725
|
+
psd=psd,
|
|
678
726
|
fmin=band_freqs[0],
|
|
679
727
|
fmax=band_freqs[1],
|
|
680
|
-
include_max=False
|
|
728
|
+
include_max=False,
|
|
681
729
|
)
|
|
682
730
|
for i, col in enumerate(config.axes):
|
|
683
|
-
feature_dict[f
|
|
731
|
+
feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
|
|
684
732
|
|
|
685
733
|
# Compute dominant frequency for each axis
|
|
686
734
|
dominant_frequencies = compute_dominant_frequency(
|
|
687
|
-
freqs=freqs,
|
|
688
|
-
psd=psd,
|
|
689
|
-
fmin=config.spectrum_low_frequency,
|
|
690
|
-
fmax=config.spectrum_high_frequency
|
|
735
|
+
freqs=freqs,
|
|
736
|
+
psd=psd,
|
|
737
|
+
fmin=config.spectrum_low_frequency,
|
|
738
|
+
fmax=config.spectrum_high_frequency,
|
|
691
739
|
)
|
|
692
740
|
|
|
693
741
|
# Add dominant frequency features to the feature_dict
|
|
694
742
|
for axis, freq in zip(config.axes, dominant_frequencies.T):
|
|
695
|
-
feature_dict[f
|
|
743
|
+
feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
|
|
696
744
|
|
|
697
745
|
# Compute total power in the PSD
|
|
698
746
|
total_power_psd = compute_total_power(psd)
|
|
699
747
|
|
|
700
748
|
# Compute MFCCs
|
|
701
749
|
mfccs = compute_mfccs(
|
|
702
|
-
total_power_array=total_power_psd,
|
|
703
|
-
config=config,
|
|
704
|
-
multiplication_factor=4
|
|
750
|
+
total_power_array=total_power_psd, config=config, multiplication_factor=4
|
|
705
751
|
)
|
|
706
752
|
|
|
707
753
|
# Combine the MFCCs into the features DataFrame
|
|
708
|
-
mfcc_colnames = [
|
|
754
|
+
mfcc_colnames = [
|
|
755
|
+
f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
|
|
756
|
+
]
|
|
709
757
|
for i, colname in enumerate(mfcc_colnames):
|
|
710
758
|
feature_dict[colname] = mfccs[:, i]
|
|
711
759
|
|
|
712
|
-
return pd.DataFrame(feature_dict)
|
|
760
|
+
return pd.DataFrame(feature_dict)
|