paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +14 -14
- paradigma/config.py +38 -29
- paradigma/constants.py +10 -2
- paradigma/feature_extraction.py +106 -75
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +488 -97
- paradigma/pipelines/pulse_rate_pipeline.py +278 -46
- paradigma/pipelines/pulse_rate_utils.py +176 -137
- paradigma/pipelines/tremor_pipeline.py +292 -72
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +345 -77
- paradigma/segmenting.py +57 -42
- paradigma/testing.py +14 -9
- paradigma/util.py +36 -22
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/METADATA +0 -140
- paradigma-1.0.4.dist-info/RECORD +0 -23
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
|
-
from
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from scipy.signal import periodogram
|
|
7
9
|
|
|
8
10
|
from paradigma.classification import ClassifierPackage
|
|
9
|
-
from paradigma.config import GaitConfig
|
|
11
|
+
from paradigma.config import GaitConfig, IMUConfig
|
|
10
12
|
from paradigma.constants import DataColumns
|
|
11
13
|
from paradigma.feature_extraction import (
|
|
12
14
|
compute_angle,
|
|
@@ -22,13 +24,14 @@ from paradigma.feature_extraction import (
|
|
|
22
24
|
pca_transform_gyroscope,
|
|
23
25
|
remove_moving_average_angle,
|
|
24
26
|
)
|
|
27
|
+
from paradigma.preprocessing import preprocess_imu_data
|
|
25
28
|
from paradigma.segmenting import (
|
|
26
29
|
WindowedDataExtractor,
|
|
27
30
|
create_segments,
|
|
28
31
|
discard_segments,
|
|
29
32
|
tabulate_windows,
|
|
30
33
|
)
|
|
31
|
-
from paradigma.util import aggregate_parameter
|
|
34
|
+
from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps
|
|
32
35
|
|
|
33
36
|
logger = logging.getLogger(__name__)
|
|
34
37
|
|
|
@@ -39,42 +42,54 @@ if not logger.hasHandlers():
|
|
|
39
42
|
|
|
40
43
|
def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
41
44
|
"""
|
|
42
|
-
Extracts gait features from accelerometer and gravity sensor data in the
|
|
45
|
+
Extracts gait features from accelerometer and gravity sensor data in the
|
|
46
|
+
input DataFrame by computing temporal and spectral features.
|
|
43
47
|
|
|
44
48
|
This function performs the following steps:
|
|
45
|
-
1. Groups sequences of timestamps into windows, using accelerometer and
|
|
46
|
-
|
|
47
|
-
|
|
49
|
+
1. Groups sequences of timestamps into windows, using accelerometer and
|
|
50
|
+
gravity data.
|
|
51
|
+
2. Computes temporal domain features such as mean and standard deviation
|
|
52
|
+
for accelerometer and gravity data.
|
|
53
|
+
3. Transforms the signals from the temporal domain to the spectral
|
|
54
|
+
domain using the Fast Fourier Transform (FFT).
|
|
48
55
|
4. Computes spectral domain features for the accelerometer data.
|
|
49
56
|
5. Combines both temporal and spectral features into a final DataFrame.
|
|
50
57
|
|
|
51
58
|
Parameters
|
|
52
59
|
----------
|
|
53
60
|
df : pd.DataFrame
|
|
54
|
-
The input DataFrame containing gait data, which includes time,
|
|
61
|
+
The input DataFrame containing gait data, which includes time,
|
|
62
|
+
accelerometer, and gravity sensor data. The data should be
|
|
55
63
|
structured with the necessary columns as specified in the `config`.
|
|
56
64
|
|
|
57
65
|
onfig : GaitConfig
|
|
58
|
-
Configuration object containing parameters for feature extraction,
|
|
59
|
-
|
|
66
|
+
Configuration object containing parameters for feature extraction,
|
|
67
|
+
including column names for time, accelerometer data, and gravity
|
|
68
|
+
data, as well as settings for windowing, and feature computation.
|
|
60
69
|
|
|
61
70
|
Returns
|
|
62
71
|
-------
|
|
63
72
|
pd.DataFrame
|
|
64
|
-
A DataFrame containing extracted gait features, including temporal
|
|
65
|
-
|
|
66
|
-
accelerometer
|
|
73
|
+
A DataFrame containing extracted gait features, including temporal
|
|
74
|
+
and spectral domain features. The DataFrame will have columns
|
|
75
|
+
corresponding to time, statistical features of the accelerometer and
|
|
76
|
+
gravity data, and spectral features of the accelerometer data.
|
|
67
77
|
|
|
68
78
|
Notes
|
|
69
79
|
-----
|
|
70
|
-
- This function groups the data into windows based on timestamps and
|
|
71
|
-
|
|
72
|
-
- The
|
|
80
|
+
- This function groups the data into windows based on timestamps and
|
|
81
|
+
applies Fast Fourier Transform to compute spectral features.
|
|
82
|
+
- The temporal features are extracted from the accelerometer and gravity
|
|
83
|
+
data, and include statistics like mean and standard deviation.
|
|
84
|
+
- The input DataFrame must include columns as specified in the `config`
|
|
85
|
+
object for proper feature extraction.
|
|
73
86
|
|
|
74
87
|
Raises
|
|
75
88
|
------
|
|
76
89
|
ValueError
|
|
77
|
-
If the input DataFrame does not contain the required columns as
|
|
90
|
+
If the input DataFrame does not contain the required columns as
|
|
91
|
+
specified in the configuration or if any step in the feature
|
|
92
|
+
extraction fails.
|
|
78
93
|
"""
|
|
79
94
|
# Group sequences of timestamps into windows
|
|
80
95
|
windowed_colnames = (
|
|
@@ -101,7 +116,8 @@ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
|
101
116
|
|
|
102
117
|
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
103
118
|
|
|
104
|
-
# Compute statistics of the temporal domain signals (mean, std) for
|
|
119
|
+
# Compute statistics of the temporal domain signals (mean, std) for
|
|
120
|
+
# accelerometer and gravity
|
|
105
121
|
df_temporal_features = extract_temporal_domain_features(
|
|
106
122
|
config=config,
|
|
107
123
|
windowed_acc=windowed_acc,
|
|
@@ -112,7 +128,8 @@ def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
|
112
128
|
# Combine temporal features with the start time
|
|
113
129
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
114
130
|
|
|
115
|
-
# Transform the accelerometer data to the spectral domain using FFT and
|
|
131
|
+
# Transform the accelerometer data to the spectral domain using FFT and
|
|
132
|
+
# extract spectral features
|
|
116
133
|
df_spectral_features = extract_spectral_domain_features(
|
|
117
134
|
config=config, sensor="accelerometer", windowed_data=windowed_acc
|
|
118
135
|
)
|
|
@@ -127,31 +144,40 @@ def detect_gait(
|
|
|
127
144
|
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
128
145
|
) -> pd.Series:
|
|
129
146
|
"""
|
|
130
|
-
Detects gait activity in the input DataFrame using a pre-trained
|
|
147
|
+
Detects gait activity in the input DataFrame using a pre-trained
|
|
148
|
+
classifier and applies a threshold to classify results.
|
|
131
149
|
|
|
132
150
|
This function performs the following steps:
|
|
133
|
-
1. Loads the pre-trained classifier and scaling parameters from the
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
151
|
+
1. Loads the pre-trained classifier and scaling parameters from the
|
|
152
|
+
specified directory.
|
|
153
|
+
2. Scales the relevant features in the input DataFrame (`df`) using the
|
|
154
|
+
loaded scaling parameters.
|
|
155
|
+
3. Predicts the probability of gait activity for each sample in the
|
|
156
|
+
DataFrame using the classifier.
|
|
157
|
+
4. Applies a threshold to the predicted probabilities to determine
|
|
158
|
+
whether gait activity is present.
|
|
137
159
|
5. Returns predicted probabilities
|
|
138
160
|
|
|
139
161
|
Parameters
|
|
140
162
|
----------
|
|
141
163
|
df : pd.DataFrame
|
|
142
|
-
The input DataFrame containing features extracted from gait data. It
|
|
143
|
-
as specified in the classifier's
|
|
164
|
+
The input DataFrame containing features extracted from gait data. It
|
|
165
|
+
must include the necessary columns as specified in the classifier's
|
|
166
|
+
feature names.
|
|
144
167
|
|
|
145
168
|
clf_package : ClassifierPackage
|
|
146
|
-
The pre-trained classifier package containing the classifier,
|
|
169
|
+
The pre-trained classifier package containing the classifier,
|
|
170
|
+
threshold, and scaler.
|
|
147
171
|
|
|
148
172
|
parallel : bool, optional, default=False
|
|
149
|
-
If `True`, enables parallel processing during classification. If
|
|
173
|
+
If `True`, enables parallel processing during classification. If
|
|
174
|
+
`False`, the classifier uses a single core.
|
|
150
175
|
|
|
151
176
|
Returns
|
|
152
177
|
-------
|
|
153
178
|
pd.Series
|
|
154
|
-
A Series containing the predicted probabilities of gait activity for
|
|
179
|
+
A Series containing the predicted probabilities of gait activity for
|
|
180
|
+
each sample in the input DataFrame.
|
|
155
181
|
"""
|
|
156
182
|
# Set classifier
|
|
157
183
|
clf = clf_package.classifier
|
|
@@ -165,11 +191,11 @@ def detect_gait(
|
|
|
165
191
|
scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
|
|
166
192
|
|
|
167
193
|
# Replace scaled features in a copy of the relevant features for prediction
|
|
168
|
-
|
|
169
|
-
|
|
194
|
+
x_features = df.loc[:, feature_names_predictions].copy()
|
|
195
|
+
x_features.loc[:, feature_names_scaling] = scaled_features
|
|
170
196
|
|
|
171
197
|
# Make prediction and add the probability of gait activity to the DataFrame
|
|
172
|
-
pred_gait_proba_series = clf_package.predict_proba(
|
|
198
|
+
pred_gait_proba_series = clf_package.predict_proba(x_features)
|
|
173
199
|
|
|
174
200
|
return pred_gait_proba_series
|
|
175
201
|
|
|
@@ -181,38 +207,69 @@ def extract_arm_activity_features(
|
|
|
181
207
|
"""
|
|
182
208
|
Extract features related to arm activity from a time-series DataFrame.
|
|
183
209
|
|
|
184
|
-
This function processes a DataFrame containing accelerometer, gravity,
|
|
185
|
-
and extracts features related to arm activity by
|
|
210
|
+
This function processes a DataFrame containing accelerometer, gravity,
|
|
211
|
+
and gyroscope signals, and extracts features related to arm activity by
|
|
212
|
+
performing the following steps:
|
|
186
213
|
1. Computes the angle and velocity from gyroscope data.
|
|
187
214
|
2. Filters the data to include only predicted gait segments.
|
|
188
|
-
3. Groups the data into segments based on consecutive timestamps and
|
|
215
|
+
3. Groups the data into segments based on consecutive timestamps and
|
|
216
|
+
pre-specified gaps.
|
|
189
217
|
4. Removes segments that do not meet predefined criteria.
|
|
190
218
|
5. Creates fixed-length windows from the time series data.
|
|
191
|
-
6. Extracts angle-related features, temporal domain features, and
|
|
219
|
+
6. Extracts angle-related features, temporal domain features, and
|
|
220
|
+
spectral domain features.
|
|
192
221
|
|
|
193
222
|
Parameters
|
|
194
223
|
----------
|
|
195
224
|
df: pd.DataFrame
|
|
196
|
-
The input DataFrame containing accelerometer, gravity, and
|
|
225
|
+
The input DataFrame containing accelerometer, gravity, and
|
|
226
|
+
gyroscope data of predicted gait.
|
|
197
227
|
|
|
198
228
|
config : ArmActivityFeatureExtractionConfig
|
|
199
|
-
Configuration object containing column names and parameters
|
|
229
|
+
Configuration object containing column names and parameters
|
|
230
|
+
for feature extraction.
|
|
200
231
|
|
|
201
232
|
Returns
|
|
202
233
|
-------
|
|
203
234
|
pd.DataFrame
|
|
204
|
-
A DataFrame containing the extracted arm activity features,
|
|
205
|
-
temporal, and spectral features.
|
|
235
|
+
A DataFrame containing the extracted arm activity features,
|
|
236
|
+
including angle, velocity, temporal, and spectral features.
|
|
206
237
|
"""
|
|
207
|
-
# Group consecutive timestamps into segments, with new segments
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
238
|
+
# Group consecutive timestamps into segments, with new segments
|
|
239
|
+
# starting after a pre-specified gap. If data_segment_nr exists,
|
|
240
|
+
# create gait segments per data segment to preserve both
|
|
241
|
+
has_data_segments = DataColumns.DATA_SEGMENT_NR in df.columns
|
|
242
|
+
|
|
243
|
+
if has_data_segments:
|
|
244
|
+
df_list = []
|
|
245
|
+
gait_segment_offset = 0
|
|
246
|
+
|
|
247
|
+
for data_seg_nr in sorted(df[DataColumns.DATA_SEGMENT_NR].unique()):
|
|
248
|
+
df_seg = df[df[DataColumns.DATA_SEGMENT_NR] == data_seg_nr].copy()
|
|
249
|
+
|
|
250
|
+
# Create gait segments within this data segment
|
|
251
|
+
df_seg[DataColumns.GAIT_SEGMENT_NR] = create_segments(
|
|
252
|
+
time_array=df_seg[DataColumns.TIME].values,
|
|
253
|
+
max_segment_gap_s=config.max_segment_gap_s,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Offset gait segment numbers to be unique across data segments
|
|
257
|
+
if gait_segment_offset > 0:
|
|
258
|
+
df_seg[DataColumns.GAIT_SEGMENT_NR] += gait_segment_offset
|
|
259
|
+
gait_segment_offset = df_seg[DataColumns.GAIT_SEGMENT_NR].max() + 1
|
|
260
|
+
|
|
261
|
+
df_list.append(df_seg)
|
|
262
|
+
|
|
263
|
+
df = pd.concat(df_list, ignore_index=True)
|
|
264
|
+
else:
|
|
265
|
+
df[DataColumns.GAIT_SEGMENT_NR] = create_segments(
|
|
266
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
|
|
267
|
+
)
|
|
211
268
|
|
|
212
269
|
# Remove segments that do not meet predetermined criteria
|
|
213
270
|
df = discard_segments(
|
|
214
271
|
df=df,
|
|
215
|
-
segment_nr_colname=DataColumns.
|
|
272
|
+
segment_nr_colname=DataColumns.GAIT_SEGMENT_NR,
|
|
216
273
|
min_segment_length_s=config.min_segment_length_s,
|
|
217
274
|
fs=config.sampling_frequency,
|
|
218
275
|
format="timestamps",
|
|
@@ -220,7 +277,7 @@ def extract_arm_activity_features(
|
|
|
220
277
|
|
|
221
278
|
# Create windows of fixed length and step size from the time series per segment
|
|
222
279
|
windowed_data = []
|
|
223
|
-
df_grouped = df.groupby(DataColumns.
|
|
280
|
+
df_grouped = df.groupby(DataColumns.GAIT_SEGMENT_NR)
|
|
224
281
|
windowed_colnames = (
|
|
225
282
|
[config.time_colname]
|
|
226
283
|
+ config.accelerometer_colnames
|
|
@@ -290,14 +347,16 @@ def filter_gait(
|
|
|
290
347
|
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
291
348
|
) -> pd.Series:
|
|
292
349
|
"""
|
|
293
|
-
Filters gait data to identify windows with no other arm activity using
|
|
350
|
+
Filters gait data to identify windows with no other arm activity using
|
|
351
|
+
a pre-trained classifier.
|
|
294
352
|
|
|
295
353
|
Parameters
|
|
296
354
|
----------
|
|
297
355
|
df : pd.DataFrame
|
|
298
356
|
The input DataFrame containing features extracted from gait data.
|
|
299
357
|
clf_package: ClassifierPackage
|
|
300
|
-
The pre-trained classifier package containing the classifier,
|
|
358
|
+
The pre-trained classifier package containing the classifier,
|
|
359
|
+
threshold, and scaler.
|
|
301
360
|
parallel : bool, optional, default=False
|
|
302
361
|
If `True`, enables parallel processing.
|
|
303
362
|
|
|
@@ -321,11 +380,11 @@ def filter_gait(
|
|
|
321
380
|
scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
|
|
322
381
|
|
|
323
382
|
# Replace scaled features in a copy of the relevant features for prediction
|
|
324
|
-
|
|
325
|
-
|
|
383
|
+
x_features = df.loc[:, feature_names_predictions].copy()
|
|
384
|
+
x_features.loc[:, feature_names_scaling] = scaled_features
|
|
326
385
|
|
|
327
386
|
# Make predictions
|
|
328
|
-
pred_no_other_arm_activity_proba_series = clf_package.predict_proba(
|
|
387
|
+
pred_no_other_arm_activity_proba_series = clf_package.predict_proba(x_features)
|
|
329
388
|
|
|
330
389
|
return pred_no_other_arm_activity_proba_series
|
|
331
390
|
|
|
@@ -336,24 +395,27 @@ def quantify_arm_swing(
|
|
|
336
395
|
filtered: bool = False,
|
|
337
396
|
max_segment_gap_s: float = 1.5,
|
|
338
397
|
min_segment_length_s: float = 1.5,
|
|
339
|
-
) ->
|
|
398
|
+
) -> tuple[dict[str, pd.DataFrame], dict]:
|
|
340
399
|
"""
|
|
341
400
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
342
401
|
|
|
343
402
|
Parameters
|
|
344
403
|
----------
|
|
345
404
|
df : pd.DataFrame
|
|
346
|
-
A DataFrame containing the raw sensor data of predicted gait
|
|
347
|
-
|
|
405
|
+
A DataFrame containing the raw sensor data of predicted gait
|
|
406
|
+
timestamps. Should include a column for predicted no other arm
|
|
407
|
+
activity based on a fitted threshold if filtered is True.
|
|
348
408
|
|
|
349
409
|
fs : int
|
|
350
410
|
The sampling frequency of the sensor data.
|
|
351
411
|
|
|
352
412
|
filtered : bool, optional, default=True
|
|
353
|
-
If `True`, the gyroscope data is filtered to only include predicted
|
|
413
|
+
If `True`, the gyroscope data is filtered to only include predicted
|
|
414
|
+
no other arm activity.
|
|
354
415
|
|
|
355
416
|
max_segment_gap_s : float, optional, default=1.5
|
|
356
|
-
The maximum gap in seconds between consecutive timestamps to group
|
|
417
|
+
The maximum gap in seconds between consecutive timestamps to group
|
|
418
|
+
them into segments.
|
|
357
419
|
|
|
358
420
|
min_segment_length_s : float, optional, default=1.5
|
|
359
421
|
The minimum length in seconds for a segment to be considered valid.
|
|
@@ -361,11 +423,11 @@ def quantify_arm_swing(
|
|
|
361
423
|
Returns
|
|
362
424
|
-------
|
|
363
425
|
Tuple[pd.DataFrame, dict]
|
|
364
|
-
A tuple containing a dataframe with quantified arm swing parameters
|
|
365
|
-
metadata for each segment.
|
|
426
|
+
A tuple containing a dataframe with quantified arm swing parameters
|
|
427
|
+
and a dictionary containing metadata for each segment.
|
|
366
428
|
"""
|
|
367
|
-
# Group consecutive timestamps into segments, with new segments starting
|
|
368
|
-
# Segments are made based on predicted gait
|
|
429
|
+
# Group consecutive timestamps into segments, with new segments starting
|
|
430
|
+
# after a pre-specified gap. Segments are made based on predicted gait
|
|
369
431
|
df["unfiltered_segment_nr"] = create_segments(
|
|
370
432
|
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
371
433
|
)
|
|
@@ -381,7 +443,8 @@ def quantify_arm_swing(
|
|
|
381
443
|
|
|
382
444
|
if df.empty:
|
|
383
445
|
raise ValueError(
|
|
384
|
-
"No segments found in the input data after discarding segments
|
|
446
|
+
"No segments found in the input data after discarding segments "
|
|
447
|
+
"of invalid shape."
|
|
385
448
|
)
|
|
386
449
|
|
|
387
450
|
# Create dictionary of gait segment number and duration
|
|
@@ -414,7 +477,8 @@ def quantify_arm_swing(
|
|
|
414
477
|
|
|
415
478
|
if df.empty:
|
|
416
479
|
raise ValueError(
|
|
417
|
-
"No filtered gait segments found in the input data after
|
|
480
|
+
"No filtered gait segments found in the input data after "
|
|
481
|
+
"discarding segments of invalid shape."
|
|
418
482
|
)
|
|
419
483
|
|
|
420
484
|
grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
|
|
@@ -425,8 +489,8 @@ def quantify_arm_swing(
|
|
|
425
489
|
"per_segment": {},
|
|
426
490
|
}
|
|
427
491
|
|
|
428
|
-
# PCA is fitted on only predicted gait without other arm activity if
|
|
429
|
-
# it is fitted on the entire gyroscope data
|
|
492
|
+
# PCA is fitted on only predicted gait without other arm activity if
|
|
493
|
+
# filtered, otherwise it is fitted on the entire gyroscope data
|
|
430
494
|
df[DataColumns.VELOCITY] = pca_transform_gyroscope(
|
|
431
495
|
df=df,
|
|
432
496
|
y_gyro_colname=DataColumns.GYROSCOPE_Y,
|
|
@@ -446,7 +510,8 @@ def quantify_arm_swing(
|
|
|
446
510
|
gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
|
|
447
511
|
except KeyError:
|
|
448
512
|
logger.warning(
|
|
449
|
-
"Segment %s (filtered = %s) not found in gait segment
|
|
513
|
+
"Segment %s (filtered = %s) not found in gait segment "
|
|
514
|
+
"duration dictionary. Skipping this segment.",
|
|
450
515
|
gait_segment_nr,
|
|
451
516
|
filtered,
|
|
452
517
|
)
|
|
@@ -471,8 +536,8 @@ def quantify_arm_swing(
|
|
|
471
536
|
)
|
|
472
537
|
|
|
473
538
|
segment_meta["per_segment"][segment_nr] = {
|
|
474
|
-
"start_time_s": time_array.min(),
|
|
475
|
-
"end_time_s": time_array.max(),
|
|
539
|
+
"start_time_s": float(time_array.min()),
|
|
540
|
+
"end_time_s": float(time_array.max()),
|
|
476
541
|
"duration_unfiltered_segment_s": gait_segment_duration_s,
|
|
477
542
|
}
|
|
478
543
|
|
|
@@ -497,7 +562,8 @@ def quantify_arm_swing(
|
|
|
497
562
|
except Exception as e:
|
|
498
563
|
# Handle the error, set RoM to NaN, and log the error
|
|
499
564
|
print(
|
|
500
|
-
f"Error computing range of motion for segment
|
|
565
|
+
f"Error computing range of motion for segment "
|
|
566
|
+
f"{segment_nr}: {e}"
|
|
501
567
|
)
|
|
502
568
|
rom = np.array([np.nan])
|
|
503
569
|
|
|
@@ -509,17 +575,24 @@ def quantify_arm_swing(
|
|
|
509
575
|
except Exception as e:
|
|
510
576
|
# Handle the error, set pav to NaN, and log the error
|
|
511
577
|
print(
|
|
512
|
-
f"Error computing peak angular velocity for segment
|
|
578
|
+
f"Error computing peak angular velocity for segment "
|
|
579
|
+
f"{segment_nr}: {e}"
|
|
513
580
|
)
|
|
514
581
|
pav = np.array([np.nan])
|
|
515
582
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
583
|
+
params_dict = {
|
|
584
|
+
DataColumns.GAIT_SEGMENT_NR: segment_nr,
|
|
585
|
+
DataColumns.RANGE_OF_MOTION: rom,
|
|
586
|
+
DataColumns.PEAK_VELOCITY: pav,
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
# Add data_segment_nr if it exists in the input data
|
|
590
|
+
if DataColumns.DATA_SEGMENT_NR in group.columns:
|
|
591
|
+
params_dict[DataColumns.DATA_SEGMENT_NR] = group[
|
|
592
|
+
DataColumns.DATA_SEGMENT_NR
|
|
593
|
+
].iloc[0]
|
|
594
|
+
|
|
595
|
+
df_params_segment = pd.DataFrame(params_dict)
|
|
523
596
|
|
|
524
597
|
arm_swing_quantified.append(df_params_segment)
|
|
525
598
|
|
|
@@ -531,8 +604,8 @@ def quantify_arm_swing(
|
|
|
531
604
|
def aggregate_arm_swing_params(
|
|
532
605
|
df_arm_swing_params: pd.DataFrame,
|
|
533
606
|
segment_meta: dict,
|
|
534
|
-
segment_cats:
|
|
535
|
-
aggregates:
|
|
607
|
+
segment_cats: list[tuple],
|
|
608
|
+
aggregates: list[str] = ["median"],
|
|
536
609
|
) -> dict:
|
|
537
610
|
"""
|
|
538
611
|
Aggregate the quantification results for arm swing parameters.
|
|
@@ -546,14 +619,17 @@ def aggregate_arm_swing_params(
|
|
|
546
619
|
A dictionary containing metadata for each segment.
|
|
547
620
|
|
|
548
621
|
segment_cats : List[tuple]
|
|
549
|
-
A list of tuples defining the segment categories, where each tuple
|
|
622
|
+
A list of tuples defining the segment categories, where each tuple
|
|
623
|
+
contains the lower and upper bounds for the segment duration.
|
|
550
624
|
aggregates : List[str], optional
|
|
551
|
-
A list of aggregation methods to apply to the quantification
|
|
625
|
+
A list of aggregation methods to apply to the quantification
|
|
626
|
+
results.
|
|
552
627
|
|
|
553
628
|
Returns
|
|
554
629
|
-------
|
|
555
630
|
dict
|
|
556
|
-
A dictionary containing the aggregated quantification results for
|
|
631
|
+
A dictionary containing the aggregated quantification results for
|
|
632
|
+
arm swing parameters.
|
|
557
633
|
"""
|
|
558
634
|
arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
|
|
559
635
|
|
|
@@ -568,7 +644,8 @@ def aggregate_arm_swing_params(
|
|
|
568
644
|
]
|
|
569
645
|
|
|
570
646
|
if len(cat_segments) > 0:
|
|
571
|
-
# For each segment, use 'duration_filtered_segment_s' if present,
|
|
647
|
+
# For each segment, use 'duration_filtered_segment_s' if present,
|
|
648
|
+
# else 'duration_unfiltered_segment_s'
|
|
572
649
|
aggregated_results[segment_cat_str] = {
|
|
573
650
|
"duration_s": sum(
|
|
574
651
|
[
|
|
@@ -583,7 +660,7 @@ def aggregate_arm_swing_params(
|
|
|
583
660
|
}
|
|
584
661
|
|
|
585
662
|
df_arm_swing_params_cat = df_arm_swing_params.loc[
|
|
586
|
-
df_arm_swing_params[DataColumns.
|
|
663
|
+
df_arm_swing_params[DataColumns.GAIT_SEGMENT_NR].isin(cat_segments)
|
|
587
664
|
]
|
|
588
665
|
|
|
589
666
|
# Aggregate across all segments
|
|
@@ -593,10 +670,14 @@ def aggregate_arm_swing_params(
|
|
|
593
670
|
for aggregate in aggregates:
|
|
594
671
|
if aggregate in ["std", "cov"]:
|
|
595
672
|
per_segment_agg = []
|
|
596
|
-
# If the aggregate is 'cov' (coefficient of variation),
|
|
673
|
+
# If the aggregate is 'cov' (coefficient of variation),
|
|
674
|
+
# we also compute the mean and standard deviation per
|
|
675
|
+
# segment
|
|
597
676
|
segment_groups = dict(
|
|
598
677
|
tuple(
|
|
599
|
-
df_arm_swing_params_cat.groupby(
|
|
678
|
+
df_arm_swing_params_cat.groupby(
|
|
679
|
+
DataColumns.GAIT_SEGMENT_NR
|
|
680
|
+
)
|
|
600
681
|
)
|
|
601
682
|
)
|
|
602
683
|
for segment_nr in cat_segments:
|
|
@@ -613,10 +694,14 @@ def aggregate_arm_swing_params(
|
|
|
613
694
|
per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
|
|
614
695
|
|
|
615
696
|
for segment_level_aggregate in aggregates_per_segment:
|
|
616
|
-
|
|
617
|
-
f"{segment_level_aggregate}_{aggregate}_
|
|
618
|
-
|
|
619
|
-
|
|
697
|
+
key = (
|
|
698
|
+
f"{segment_level_aggregate}_{aggregate}_"
|
|
699
|
+
f"{arm_swing_parameter}"
|
|
700
|
+
)
|
|
701
|
+
aggregated_results[segment_cat_str][key] = (
|
|
702
|
+
aggregate_parameter(
|
|
703
|
+
per_segment_agg, segment_level_aggregate
|
|
704
|
+
)
|
|
620
705
|
)
|
|
621
706
|
else:
|
|
622
707
|
aggregated_results[segment_cat_str][
|
|
@@ -638,7 +723,7 @@ def extract_temporal_domain_features(
|
|
|
638
723
|
config,
|
|
639
724
|
windowed_acc: np.ndarray,
|
|
640
725
|
windowed_grav: np.ndarray,
|
|
641
|
-
grav_stats:
|
|
726
|
+
grav_stats: list[str] = ["mean"],
|
|
642
727
|
) -> pd.DataFrame:
|
|
643
728
|
"""
|
|
644
729
|
Compute temporal domain features for the accelerometer signal.
|
|
@@ -688,9 +773,10 @@ def extract_spectral_domain_features(
|
|
|
688
773
|
"""
|
|
689
774
|
Compute spectral domain features for a sensor's data.
|
|
690
775
|
|
|
691
|
-
This function computes the periodogram, extracts power in specific
|
|
692
|
-
calculates the dominant frequency, and computes
|
|
693
|
-
for a given sensor's
|
|
776
|
+
This function computes the periodogram, extracts power in specific
|
|
777
|
+
frequency bands, calculates the dominant frequency, and computes
|
|
778
|
+
Mel-frequency cepstral coefficients (MFCCs) for a given sensor's
|
|
779
|
+
windowed data.
|
|
694
780
|
|
|
695
781
|
Parameters
|
|
696
782
|
----------
|
|
@@ -698,8 +784,8 @@ def extract_spectral_domain_features(
|
|
|
698
784
|
A 2D numpy array where each row corresponds to a window of sensor data.
|
|
699
785
|
|
|
700
786
|
config : object
|
|
701
|
-
Configuration object containing settings such as sampling frequency,
|
|
702
|
-
frequency bands, and MFCC parameters.
|
|
787
|
+
Configuration object containing settings such as sampling frequency,
|
|
788
|
+
window type, frequency bands, and MFCC parameters.
|
|
703
789
|
|
|
704
790
|
sensor : str
|
|
705
791
|
The name of the sensor (e.g., 'accelerometer', 'gyroscope').
|
|
@@ -707,8 +793,9 @@ def extract_spectral_domain_features(
|
|
|
707
793
|
Returns
|
|
708
794
|
-------
|
|
709
795
|
pd.DataFrame
|
|
710
|
-
A DataFrame containing the computed spectral features, with each row
|
|
711
|
-
to a window and each column representing a specific
|
|
796
|
+
A DataFrame containing the computed spectral features, with each row
|
|
797
|
+
corresponding to a window and each column representing a specific
|
|
798
|
+
feature.
|
|
712
799
|
"""
|
|
713
800
|
# Initialize a dictionary to hold the results
|
|
714
801
|
feature_dict = {}
|
|
@@ -758,3 +845,307 @@ def extract_spectral_domain_features(
|
|
|
758
845
|
feature_dict[colname] = mfccs[:, i]
|
|
759
846
|
|
|
760
847
|
return pd.DataFrame(feature_dict)
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
def run_gait_pipeline(
|
|
851
|
+
df_prepared: pd.DataFrame,
|
|
852
|
+
watch_side: str,
|
|
853
|
+
output_dir: str | Path,
|
|
854
|
+
imu_config: IMUConfig | None = None,
|
|
855
|
+
gait_config: GaitConfig | None = None,
|
|
856
|
+
arm_activity_config: GaitConfig | None = None,
|
|
857
|
+
store_intermediate: list[str] = [],
|
|
858
|
+
segment_number_offset: int = 0,
|
|
859
|
+
logging_level: int = logging.INFO,
|
|
860
|
+
custom_logger: logging.Logger | None = None,
|
|
861
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
862
|
+
"""
|
|
863
|
+
Run the complete gait analysis pipeline on prepared data (steps 1-6).
|
|
864
|
+
|
|
865
|
+
This function implements the gait analysis workflow as described in the tutorials:
|
|
866
|
+
1. Preprocessing
|
|
867
|
+
2. Gait feature extraction
|
|
868
|
+
3. Gait detection
|
|
869
|
+
4. Arm activity feature extraction
|
|
870
|
+
5. Filtering gait
|
|
871
|
+
6. Arm swing quantification
|
|
872
|
+
|
|
873
|
+
Step 7 (aggregation) should be done after processing all segments.
|
|
874
|
+
|
|
875
|
+
Parameters
|
|
876
|
+
----------
|
|
877
|
+
df_prepared : pd.DataFrame
|
|
878
|
+
Prepared IMU data with time, accelerometer, and gyroscope columns.
|
|
879
|
+
Should contain columns: time, accelerometer_x/y/z, gyroscope_x/y/z.
|
|
880
|
+
Will be preprocessed as step 1 of the pipeline.
|
|
881
|
+
watch_side : str
|
|
882
|
+
Side of the watch ('left' or 'right') to configure preprocessing accordingly.
|
|
883
|
+
output_dir : str or Path
|
|
884
|
+
Directory to save intermediate results (required)
|
|
885
|
+
imu_config : IMUConfig, optional
|
|
886
|
+
Configuration for IMU data preprocessing.
|
|
887
|
+
If None, uses default IMUConfig.
|
|
888
|
+
gait_config : GaitConfig, optional
|
|
889
|
+
Configuration for gait feature extraction and detection.
|
|
890
|
+
If None, uses default GaitConfig(step="gait").
|
|
891
|
+
arm_activity_config : GaitConfig, optional
|
|
892
|
+
Configuration for arm activity feature extraction and filtering.
|
|
893
|
+
If None, uses default GaitConfig(step="arm_activity").
|
|
894
|
+
store_intermediate : List[str]
|
|
895
|
+
Steps of which intermediate results should be stored:
|
|
896
|
+
- 'preprocessing': Store preprocessed data after step 1
|
|
897
|
+
- 'gait': Store gait features and predictions after step 3
|
|
898
|
+
- 'arm_activity': Store arm activity features and predictions after step 5
|
|
899
|
+
- 'quantification': Store arm swing quantification results after step 6
|
|
900
|
+
If empty, only returns the final quantified results.
|
|
901
|
+
segment_number_offset : int, optional, default=0
|
|
902
|
+
Offset to add to all segment numbers to avoid conflicts when concatenating
|
|
903
|
+
multiple data segments. Used for proper segment numbering across multiple files.
|
|
904
|
+
logging_level : int, default logging.INFO
|
|
905
|
+
Logging level using standard logging constants (logging.DEBUG, logging.INFO,
|
|
906
|
+
etc.)
|
|
907
|
+
custom_logger : logging.Logger, optional
|
|
908
|
+
Custom logger instance. If provided, logging_level is ignored.
|
|
909
|
+
|
|
910
|
+
Returns
|
|
911
|
+
-------
|
|
912
|
+
tuple[pd.DataFrame, dict]
|
|
913
|
+
A tuple containing:
|
|
914
|
+
- pd.DataFrame: Quantified arm swing parameters with the following columns:
|
|
915
|
+
- gait_segment_nr: Gait segment number within this data segment
|
|
916
|
+
- Various arm swing metrics (range of motion, peak angular velocity, etc.)
|
|
917
|
+
- Additional metadata columns
|
|
918
|
+
- dict: Gait segment metadata containing information about each detected
|
|
919
|
+
gait segment
|
|
920
|
+
|
|
921
|
+
Notes
|
|
922
|
+
-----
|
|
923
|
+
This function processes a single contiguous data segment. For multiple segments,
|
|
924
|
+
call this function for each segment, then use aggregate_arm_swing_params()
|
|
925
|
+
on the concatenated results.
|
|
926
|
+
|
|
927
|
+
The function follows the exact workflow from the gait analysis tutorial:
|
|
928
|
+
https://github.com/biomarkersParkinson/paradigma/blob/main/docs/
|
|
929
|
+
tutorials/gait_analysis.ipynb
|
|
930
|
+
"""
|
|
931
|
+
# Setup logger
|
|
932
|
+
active_logger = custom_logger if custom_logger is not None else logger
|
|
933
|
+
if custom_logger is None:
|
|
934
|
+
active_logger.setLevel(logging_level)
|
|
935
|
+
|
|
936
|
+
# Set default configurations
|
|
937
|
+
if imu_config is None:
|
|
938
|
+
imu_config = IMUConfig()
|
|
939
|
+
if gait_config is None:
|
|
940
|
+
gait_config = GaitConfig(step="gait")
|
|
941
|
+
if arm_activity_config is None:
|
|
942
|
+
arm_activity_config = GaitConfig(step="arm_activity")
|
|
943
|
+
|
|
944
|
+
output_dir = Path(output_dir)
|
|
945
|
+
|
|
946
|
+
# Validate input data has required columns
|
|
947
|
+
required_columns = [
|
|
948
|
+
DataColumns.TIME,
|
|
949
|
+
DataColumns.ACCELEROMETER_X,
|
|
950
|
+
DataColumns.ACCELEROMETER_Y,
|
|
951
|
+
DataColumns.ACCELEROMETER_Z,
|
|
952
|
+
DataColumns.GYROSCOPE_X,
|
|
953
|
+
DataColumns.GYROSCOPE_Y,
|
|
954
|
+
DataColumns.GYROSCOPE_Z,
|
|
955
|
+
]
|
|
956
|
+
missing_columns = [
|
|
957
|
+
col for col in required_columns if col not in df_prepared.columns
|
|
958
|
+
]
|
|
959
|
+
if missing_columns:
|
|
960
|
+
raise ValueError(f"Missing required columns: {missing_columns}")
|
|
961
|
+
|
|
962
|
+
# Step 1: Preprocess data
|
|
963
|
+
active_logger.info("Step 1: Preprocessing IMU data")
|
|
964
|
+
|
|
965
|
+
df_preprocessed = preprocess_imu_data(
|
|
966
|
+
df=df_prepared,
|
|
967
|
+
config=imu_config,
|
|
968
|
+
sensor="both",
|
|
969
|
+
watch_side=watch_side,
|
|
970
|
+
verbose=1 if logging_level <= logging.INFO else 0,
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
if "preprocessing" in store_intermediate:
|
|
974
|
+
preprocessing_dir = output_dir / "preprocessing"
|
|
975
|
+
preprocessing_dir.mkdir(parents=True, exist_ok=True)
|
|
976
|
+
df_preprocessed.to_parquet(
|
|
977
|
+
preprocessing_dir / "preprocessed_data.parquet", index=False
|
|
978
|
+
)
|
|
979
|
+
active_logger.debug(
|
|
980
|
+
f"Saved preprocessed data to "
|
|
981
|
+
f"{preprocessing_dir / 'preprocessed_data.parquet'}"
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
# Step 2: Extract gait features
|
|
985
|
+
active_logger.info("Step 2: Extracting gait features")
|
|
986
|
+
df_gait = extract_gait_features(df_preprocessed, gait_config)
|
|
987
|
+
|
|
988
|
+
if "gait" in store_intermediate:
|
|
989
|
+
gait_dir = output_dir / "gait"
|
|
990
|
+
gait_dir.mkdir(parents=True, exist_ok=True)
|
|
991
|
+
df_gait.to_parquet(gait_dir / "gait_features.parquet", index=False)
|
|
992
|
+
active_logger.debug(
|
|
993
|
+
f"Saved gait features to {gait_dir / 'gait_features.parquet'}"
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
# Step 3: Detect gait
|
|
997
|
+
active_logger.info("Step 3: Detecting gait")
|
|
998
|
+
try:
|
|
999
|
+
classifier_path = files("paradigma.assets") / "gait_detection_clf_package.pkl"
|
|
1000
|
+
classifier_package_gait = ClassifierPackage.load(classifier_path)
|
|
1001
|
+
except Exception as e:
|
|
1002
|
+
active_logger.error(f"Could not load gait detection classifier: {e}")
|
|
1003
|
+
raise RuntimeError("Gait detection classifier not available")
|
|
1004
|
+
|
|
1005
|
+
gait_proba = detect_gait(df_gait, classifier_package_gait, parallel=False)
|
|
1006
|
+
df_gait[DataColumns.PRED_GAIT_PROBA] = gait_proba
|
|
1007
|
+
|
|
1008
|
+
# Merge predictions back with timestamps
|
|
1009
|
+
df_gait_with_time = merge_predictions_with_timestamps(
|
|
1010
|
+
df_ts=df_preprocessed,
|
|
1011
|
+
df_predictions=df_gait,
|
|
1012
|
+
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
1013
|
+
window_length_s=gait_config.window_length_s,
|
|
1014
|
+
fs=gait_config.sampling_frequency,
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# Add binary prediction column
|
|
1018
|
+
df_gait_with_time[DataColumns.PRED_GAIT] = (
|
|
1019
|
+
df_gait_with_time[DataColumns.PRED_GAIT_PROBA]
|
|
1020
|
+
>= classifier_package_gait.threshold
|
|
1021
|
+
).astype(int)
|
|
1022
|
+
|
|
1023
|
+
if "gait" in store_intermediate:
|
|
1024
|
+
gait_dir = output_dir / "gait"
|
|
1025
|
+
gait_dir.mkdir(parents=True, exist_ok=True)
|
|
1026
|
+
df_gait_with_time.to_parquet(gait_dir / "gait_predictions.parquet", index=False)
|
|
1027
|
+
active_logger.info(
|
|
1028
|
+
f"Saved gait predictions to {gait_dir / 'gait_predictions.parquet'}"
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
# Filter to only gait periods
|
|
1032
|
+
df_gait_only = df_gait_with_time.loc[
|
|
1033
|
+
df_gait_with_time[DataColumns.PRED_GAIT] == 1
|
|
1034
|
+
].reset_index(drop=True)
|
|
1035
|
+
|
|
1036
|
+
if len(df_gait_only) == 0:
|
|
1037
|
+
active_logger.warning("No gait detected in this segment")
|
|
1038
|
+
return pd.DataFrame(), {}
|
|
1039
|
+
|
|
1040
|
+
# Step 4: Extract arm activity features
|
|
1041
|
+
active_logger.info("Step 4: Extracting arm activity features")
|
|
1042
|
+
df_arm_activity = extract_arm_activity_features(df_gait_only, arm_activity_config)
|
|
1043
|
+
|
|
1044
|
+
if "arm_activity" in store_intermediate:
|
|
1045
|
+
arm_activity_dir = output_dir / "arm_activity"
|
|
1046
|
+
arm_activity_dir.mkdir(parents=True, exist_ok=True)
|
|
1047
|
+
df_arm_activity.to_parquet(
|
|
1048
|
+
arm_activity_dir / "arm_activity_features.parquet", index=False
|
|
1049
|
+
)
|
|
1050
|
+
active_logger.debug(
|
|
1051
|
+
f"Saved arm activity features to "
|
|
1052
|
+
f"{arm_activity_dir / 'arm_activity_features.parquet'}"
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
# Step 5: Filter gait (remove other arm activities)
|
|
1056
|
+
active_logger.info("Step 5: Filtering gait")
|
|
1057
|
+
try:
|
|
1058
|
+
classifier_path = files("paradigma.assets") / "gait_filtering_clf_package.pkl"
|
|
1059
|
+
classifier_package_arm_activity = ClassifierPackage.load(classifier_path)
|
|
1060
|
+
except Exception as e:
|
|
1061
|
+
active_logger.error(f"Could not load arm activity classifier: {e}")
|
|
1062
|
+
raise RuntimeError("Arm activity classifier not available")
|
|
1063
|
+
|
|
1064
|
+
# Filter gait returns probabilities which we add to the arm activity features
|
|
1065
|
+
arm_activity_probabilities = filter_gait(
|
|
1066
|
+
df_arm_activity, classifier_package_arm_activity, parallel=False
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
df_arm_activity[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] = (
|
|
1070
|
+
arm_activity_probabilities
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
# Merge predictions back with timestamps
|
|
1074
|
+
df_filtered = merge_predictions_with_timestamps(
|
|
1075
|
+
df_ts=df_gait_only,
|
|
1076
|
+
df_predictions=df_arm_activity,
|
|
1077
|
+
pred_proba_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
|
|
1078
|
+
window_length_s=arm_activity_config.window_length_s,
|
|
1079
|
+
fs=arm_activity_config.sampling_frequency,
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
# Add binary prediction column
|
|
1083
|
+
filt_threshold = classifier_package_arm_activity.threshold
|
|
1084
|
+
df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] = (
|
|
1085
|
+
df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= filt_threshold
|
|
1086
|
+
).astype(int)
|
|
1087
|
+
|
|
1088
|
+
if "arm_activity" in store_intermediate:
|
|
1089
|
+
arm_activity_dir = output_dir / "arm_activity"
|
|
1090
|
+
arm_activity_dir.mkdir(parents=True, exist_ok=True)
|
|
1091
|
+
df_filtered.to_parquet(arm_activity_dir / "filtered_gait.parquet", index=False)
|
|
1092
|
+
active_logger.debug(
|
|
1093
|
+
f"Saved filtered gait to {arm_activity_dir / 'filtered_gait.parquet'}"
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
if (
|
|
1097
|
+
len(df_filtered.loc[df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1])
|
|
1098
|
+
== 0
|
|
1099
|
+
):
|
|
1100
|
+
active_logger.warning("No clean gait data remaining after filtering")
|
|
1101
|
+
return pd.DataFrame(), {}
|
|
1102
|
+
|
|
1103
|
+
# Step 6: Quantify arm swing
|
|
1104
|
+
active_logger.info("Step 6: Quantifying arm swing")
|
|
1105
|
+
quantified_arm_swing, gait_segment_meta = quantify_arm_swing(
|
|
1106
|
+
df=df_filtered,
|
|
1107
|
+
fs=arm_activity_config.sampling_frequency,
|
|
1108
|
+
filtered=True,
|
|
1109
|
+
max_segment_gap_s=arm_activity_config.max_segment_gap_s,
|
|
1110
|
+
min_segment_length_s=arm_activity_config.min_segment_length_s,
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
if "quantification" in store_intermediate:
|
|
1114
|
+
quantification_dir = output_dir / "quantification"
|
|
1115
|
+
quantification_dir.mkdir(parents=True, exist_ok=True)
|
|
1116
|
+
quantified_arm_swing.to_parquet(
|
|
1117
|
+
quantification_dir / "arm_swing_quantified.parquet", index=False
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
# Save gait segment metadata as JSON
|
|
1121
|
+
with open(quantification_dir / "gait_segment_meta.json", "w") as f:
|
|
1122
|
+
json.dump(gait_segment_meta, f, indent=2)
|
|
1123
|
+
|
|
1124
|
+
active_logger.debug(
|
|
1125
|
+
f"Saved arm swing quantification to "
|
|
1126
|
+
f"{quantification_dir / 'arm_swing_quantified.parquet'}"
|
|
1127
|
+
)
|
|
1128
|
+
active_logger.debug(
|
|
1129
|
+
f"Saved gait segment metadata to "
|
|
1130
|
+
f"{quantification_dir / 'gait_segment_meta.json'}"
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
active_logger.info(
|
|
1134
|
+
f"Gait analysis pipeline completed. Found "
|
|
1135
|
+
f"{len(quantified_arm_swing)} windows of gait "
|
|
1136
|
+
f"without other arm activities."
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
# Apply segment number offset if specified (for multi-segment concatenation)
|
|
1140
|
+
if segment_number_offset > 0 and len(quantified_arm_swing) > 0:
|
|
1141
|
+
quantified_arm_swing = quantified_arm_swing.copy()
|
|
1142
|
+
quantified_arm_swing["gait_segment_nr"] += segment_number_offset
|
|
1143
|
+
|
|
1144
|
+
# Also update the metadata with the new segment numbers
|
|
1145
|
+
if gait_segment_meta and "per_segment" in gait_segment_meta:
|
|
1146
|
+
updated_per_segment_meta = {}
|
|
1147
|
+
for seg_id, meta in gait_segment_meta["per_segment"].items():
|
|
1148
|
+
updated_per_segment_meta[seg_id + segment_number_offset] = meta
|
|
1149
|
+
gait_segment_meta["per_segment"] = updated_per_segment_meta
|
|
1150
|
+
|
|
1151
|
+
return quantified_arm_swing, gait_segment_meta
|