paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +38 -21
- paradigma/config.py +187 -123
- paradigma/constants.py +48 -35
- paradigma/feature_extraction.py +345 -255
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +685 -246
- paradigma/pipelines/pulse_rate_pipeline.py +456 -155
- paradigma/pipelines/pulse_rate_utils.py +289 -248
- paradigma/pipelines/tremor_pipeline.py +405 -132
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +500 -163
- paradigma/segmenting.py +180 -140
- paradigma/testing.py +370 -178
- paradigma/util.py +190 -101
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.1.0.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/METADATA +0 -138
- paradigma-1.0.3.dist-info/RECORD +0 -22
|
@@ -1,107 +1,137 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
2
6
|
import numpy as np
|
|
3
7
|
import pandas as pd
|
|
4
8
|
from scipy.signal import periodogram
|
|
5
|
-
from typing import List, Tuple
|
|
6
9
|
|
|
7
10
|
from paradigma.classification import ClassifierPackage
|
|
11
|
+
from paradigma.config import GaitConfig, IMUConfig
|
|
8
12
|
from paradigma.constants import DataColumns
|
|
9
|
-
from paradigma.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
from paradigma.feature_extraction import (
|
|
14
|
+
compute_angle,
|
|
15
|
+
compute_dominant_frequency,
|
|
16
|
+
compute_mfccs,
|
|
17
|
+
compute_peak_angular_velocity,
|
|
18
|
+
compute_power_in_bandwidth,
|
|
19
|
+
compute_range_of_motion,
|
|
20
|
+
compute_statistics,
|
|
21
|
+
compute_std_euclidean_norm,
|
|
22
|
+
compute_total_power,
|
|
23
|
+
extract_angle_extremes,
|
|
24
|
+
pca_transform_gyroscope,
|
|
25
|
+
remove_moving_average_angle,
|
|
26
|
+
)
|
|
27
|
+
from paradigma.preprocessing import preprocess_imu_data
|
|
28
|
+
from paradigma.segmenting import (
|
|
29
|
+
WindowedDataExtractor,
|
|
30
|
+
create_segments,
|
|
31
|
+
discard_segments,
|
|
32
|
+
tabulate_windows,
|
|
33
|
+
)
|
|
34
|
+
from paradigma.util import aggregate_parameter, merge_predictions_with_timestamps
|
|
17
35
|
|
|
18
36
|
logger = logging.getLogger(__name__)
|
|
19
37
|
|
|
20
38
|
# Only configure basic logging if no handlers exist
|
|
21
39
|
if not logger.hasHandlers():
|
|
22
|
-
logging.basicConfig(level=logging.INFO, format=
|
|
40
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
23
41
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
config: GaitConfig
|
|
27
|
-
) -> pd.DataFrame:
|
|
42
|
+
|
|
43
|
+
def extract_gait_features(df: pd.DataFrame, config: GaitConfig) -> pd.DataFrame:
|
|
28
44
|
"""
|
|
29
|
-
Extracts gait features from accelerometer and gravity sensor data in the
|
|
45
|
+
Extracts gait features from accelerometer and gravity sensor data in the
|
|
46
|
+
input DataFrame by computing temporal and spectral features.
|
|
30
47
|
|
|
31
48
|
This function performs the following steps:
|
|
32
|
-
1. Groups sequences of timestamps into windows, using accelerometer and
|
|
33
|
-
|
|
34
|
-
|
|
49
|
+
1. Groups sequences of timestamps into windows, using accelerometer and
|
|
50
|
+
gravity data.
|
|
51
|
+
2. Computes temporal domain features such as mean and standard deviation
|
|
52
|
+
for accelerometer and gravity data.
|
|
53
|
+
3. Transforms the signals from the temporal domain to the spectral
|
|
54
|
+
domain using the Fast Fourier Transform (FFT).
|
|
35
55
|
4. Computes spectral domain features for the accelerometer data.
|
|
36
56
|
5. Combines both temporal and spectral features into a final DataFrame.
|
|
37
57
|
|
|
38
58
|
Parameters
|
|
39
59
|
----------
|
|
40
60
|
df : pd.DataFrame
|
|
41
|
-
The input DataFrame containing gait data, which includes time,
|
|
61
|
+
The input DataFrame containing gait data, which includes time,
|
|
62
|
+
accelerometer, and gravity sensor data. The data should be
|
|
42
63
|
structured with the necessary columns as specified in the `config`.
|
|
43
64
|
|
|
44
65
|
onfig : GaitConfig
|
|
45
|
-
Configuration object containing parameters for feature extraction,
|
|
46
|
-
|
|
66
|
+
Configuration object containing parameters for feature extraction,
|
|
67
|
+
including column names for time, accelerometer data, and gravity
|
|
68
|
+
data, as well as settings for windowing, and feature computation.
|
|
47
69
|
|
|
48
70
|
Returns
|
|
49
71
|
-------
|
|
50
72
|
pd.DataFrame
|
|
51
|
-
A DataFrame containing extracted gait features, including temporal
|
|
52
|
-
|
|
53
|
-
accelerometer
|
|
54
|
-
|
|
73
|
+
A DataFrame containing extracted gait features, including temporal
|
|
74
|
+
and spectral domain features. The DataFrame will have columns
|
|
75
|
+
corresponding to time, statistical features of the accelerometer and
|
|
76
|
+
gravity data, and spectral features of the accelerometer data.
|
|
77
|
+
|
|
55
78
|
Notes
|
|
56
79
|
-----
|
|
57
|
-
- This function groups the data into windows based on timestamps and
|
|
58
|
-
|
|
59
|
-
- The
|
|
80
|
+
- This function groups the data into windows based on timestamps and
|
|
81
|
+
applies Fast Fourier Transform to compute spectral features.
|
|
82
|
+
- The temporal features are extracted from the accelerometer and gravity
|
|
83
|
+
data, and include statistics like mean and standard deviation.
|
|
84
|
+
- The input DataFrame must include columns as specified in the `config`
|
|
85
|
+
object for proper feature extraction.
|
|
60
86
|
|
|
61
87
|
Raises
|
|
62
88
|
------
|
|
63
89
|
ValueError
|
|
64
|
-
If the input DataFrame does not contain the required columns as
|
|
90
|
+
If the input DataFrame does not contain the required columns as
|
|
91
|
+
specified in the configuration or if any step in the feature
|
|
92
|
+
extraction fails.
|
|
65
93
|
"""
|
|
66
94
|
# Group sequences of timestamps into windows
|
|
67
|
-
|
|
95
|
+
windowed_colnames = (
|
|
96
|
+
[config.time_colname] + config.accelerometer_colnames + config.gravity_colnames
|
|
97
|
+
)
|
|
68
98
|
windowed_data = tabulate_windows(
|
|
69
|
-
df=df,
|
|
70
|
-
columns=
|
|
99
|
+
df=df,
|
|
100
|
+
columns=windowed_colnames,
|
|
71
101
|
window_length_s=config.window_length_s,
|
|
72
102
|
window_step_length_s=config.window_step_length_s,
|
|
73
|
-
fs=config.sampling_frequency
|
|
103
|
+
fs=config.sampling_frequency,
|
|
74
104
|
)
|
|
75
105
|
|
|
76
|
-
extractor = WindowedDataExtractor(
|
|
106
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
77
107
|
|
|
78
|
-
idx_time = extractor.get_index(
|
|
79
|
-
idx_acc = extractor.get_slice(config.
|
|
80
|
-
idx_grav = extractor.get_slice(config.
|
|
108
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
109
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
110
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
81
111
|
|
|
82
112
|
# Extract data
|
|
83
113
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
84
114
|
windowed_acc = windowed_data[:, :, idx_acc]
|
|
85
115
|
windowed_grav = windowed_data[:, :, idx_grav]
|
|
86
116
|
|
|
87
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
88
|
-
|
|
89
|
-
# Compute statistics of the temporal domain signals (mean, std) for
|
|
117
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
118
|
+
|
|
119
|
+
# Compute statistics of the temporal domain signals (mean, std) for
|
|
120
|
+
# accelerometer and gravity
|
|
90
121
|
df_temporal_features = extract_temporal_domain_features(
|
|
91
|
-
config=config,
|
|
122
|
+
config=config,
|
|
92
123
|
windowed_acc=windowed_acc,
|
|
93
124
|
windowed_grav=windowed_grav,
|
|
94
|
-
grav_stats=[
|
|
125
|
+
grav_stats=["mean", "std"],
|
|
95
126
|
)
|
|
96
127
|
|
|
97
128
|
# Combine temporal features with the start time
|
|
98
129
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
99
130
|
|
|
100
|
-
# Transform the accelerometer data to the spectral domain using FFT and
|
|
131
|
+
# Transform the accelerometer data to the spectral domain using FFT and
|
|
132
|
+
# extract spectral features
|
|
101
133
|
df_spectral_features = extract_spectral_domain_features(
|
|
102
|
-
config=config,
|
|
103
|
-
sensor='accelerometer',
|
|
104
|
-
windowed_data=windowed_acc
|
|
134
|
+
config=config, sensor="accelerometer", windowed_data=windowed_acc
|
|
105
135
|
)
|
|
106
136
|
|
|
107
137
|
# Combine the spectral features with the previously computed temporal features
|
|
@@ -111,40 +141,47 @@ def extract_gait_features(
|
|
|
111
141
|
|
|
112
142
|
|
|
113
143
|
def detect_gait(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
parallel: bool=False
|
|
117
|
-
) -> pd.Series:
|
|
144
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
145
|
+
) -> pd.Series:
|
|
118
146
|
"""
|
|
119
|
-
Detects gait activity in the input DataFrame using a pre-trained
|
|
147
|
+
Detects gait activity in the input DataFrame using a pre-trained
|
|
148
|
+
classifier and applies a threshold to classify results.
|
|
120
149
|
|
|
121
150
|
This function performs the following steps:
|
|
122
|
-
1. Loads the pre-trained classifier and scaling parameters from the
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
151
|
+
1. Loads the pre-trained classifier and scaling parameters from the
|
|
152
|
+
specified directory.
|
|
153
|
+
2. Scales the relevant features in the input DataFrame (`df`) using the
|
|
154
|
+
loaded scaling parameters.
|
|
155
|
+
3. Predicts the probability of gait activity for each sample in the
|
|
156
|
+
DataFrame using the classifier.
|
|
157
|
+
4. Applies a threshold to the predicted probabilities to determine
|
|
158
|
+
whether gait activity is present.
|
|
126
159
|
5. Returns predicted probabilities
|
|
127
160
|
|
|
128
161
|
Parameters
|
|
129
162
|
----------
|
|
130
163
|
df : pd.DataFrame
|
|
131
|
-
The input DataFrame containing features extracted from gait data. It
|
|
132
|
-
as specified in the classifier's
|
|
164
|
+
The input DataFrame containing features extracted from gait data. It
|
|
165
|
+
must include the necessary columns as specified in the classifier's
|
|
166
|
+
feature names.
|
|
133
167
|
|
|
134
168
|
clf_package : ClassifierPackage
|
|
135
|
-
The pre-trained classifier package containing the classifier,
|
|
169
|
+
The pre-trained classifier package containing the classifier,
|
|
170
|
+
threshold, and scaler.
|
|
136
171
|
|
|
137
172
|
parallel : bool, optional, default=False
|
|
138
|
-
If `True`, enables parallel processing during classification. If
|
|
173
|
+
If `True`, enables parallel processing during classification. If
|
|
174
|
+
`False`, the classifier uses a single core.
|
|
139
175
|
|
|
140
176
|
Returns
|
|
141
177
|
-------
|
|
142
178
|
pd.Series
|
|
143
|
-
A Series containing the predicted probabilities of gait activity for
|
|
179
|
+
A Series containing the predicted probabilities of gait activity for
|
|
180
|
+
each sample in the input DataFrame.
|
|
144
181
|
"""
|
|
145
182
|
# Set classifier
|
|
146
183
|
clf = clf_package.classifier
|
|
147
|
-
if not parallel and hasattr(clf,
|
|
184
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
148
185
|
clf.n_jobs = 1
|
|
149
186
|
|
|
150
187
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -154,78 +191,108 @@ def detect_gait(
|
|
|
154
191
|
scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
|
|
155
192
|
|
|
156
193
|
# Replace scaled features in a copy of the relevant features for prediction
|
|
157
|
-
|
|
158
|
-
|
|
194
|
+
x_features = df.loc[:, feature_names_predictions].copy()
|
|
195
|
+
x_features.loc[:, feature_names_scaling] = scaled_features
|
|
159
196
|
|
|
160
197
|
# Make prediction and add the probability of gait activity to the DataFrame
|
|
161
|
-
pred_gait_proba_series = clf_package.predict_proba(
|
|
198
|
+
pred_gait_proba_series = clf_package.predict_proba(x_features)
|
|
162
199
|
|
|
163
200
|
return pred_gait_proba_series
|
|
164
201
|
|
|
165
202
|
|
|
166
203
|
def extract_arm_activity_features(
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
204
|
+
df: pd.DataFrame,
|
|
205
|
+
config: GaitConfig,
|
|
206
|
+
) -> pd.DataFrame:
|
|
170
207
|
"""
|
|
171
208
|
Extract features related to arm activity from a time-series DataFrame.
|
|
172
209
|
|
|
173
|
-
This function processes a DataFrame containing accelerometer, gravity,
|
|
174
|
-
and extracts features related to arm activity by
|
|
210
|
+
This function processes a DataFrame containing accelerometer, gravity,
|
|
211
|
+
and gyroscope signals, and extracts features related to arm activity by
|
|
212
|
+
performing the following steps:
|
|
175
213
|
1. Computes the angle and velocity from gyroscope data.
|
|
176
214
|
2. Filters the data to include only predicted gait segments.
|
|
177
|
-
3. Groups the data into segments based on consecutive timestamps and
|
|
215
|
+
3. Groups the data into segments based on consecutive timestamps and
|
|
216
|
+
pre-specified gaps.
|
|
178
217
|
4. Removes segments that do not meet predefined criteria.
|
|
179
218
|
5. Creates fixed-length windows from the time series data.
|
|
180
|
-
6. Extracts angle-related features, temporal domain features, and
|
|
219
|
+
6. Extracts angle-related features, temporal domain features, and
|
|
220
|
+
spectral domain features.
|
|
181
221
|
|
|
182
222
|
Parameters
|
|
183
223
|
----------
|
|
184
224
|
df: pd.DataFrame
|
|
185
|
-
The input DataFrame containing accelerometer, gravity, and
|
|
225
|
+
The input DataFrame containing accelerometer, gravity, and
|
|
226
|
+
gyroscope data of predicted gait.
|
|
186
227
|
|
|
187
228
|
config : ArmActivityFeatureExtractionConfig
|
|
188
|
-
Configuration object containing column names and parameters
|
|
229
|
+
Configuration object containing column names and parameters
|
|
230
|
+
for feature extraction.
|
|
189
231
|
|
|
190
232
|
Returns
|
|
191
233
|
-------
|
|
192
234
|
pd.DataFrame
|
|
193
|
-
A DataFrame containing the extracted arm activity features,
|
|
194
|
-
temporal, and spectral features.
|
|
235
|
+
A DataFrame containing the extracted arm activity features,
|
|
236
|
+
including angle, velocity, temporal, and spectral features.
|
|
195
237
|
"""
|
|
196
|
-
# Group consecutive timestamps into segments, with new segments
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
238
|
+
# Group consecutive timestamps into segments, with new segments
|
|
239
|
+
# starting after a pre-specified gap. If data_segment_nr exists,
|
|
240
|
+
# create gait segments per data segment to preserve both
|
|
241
|
+
has_data_segments = DataColumns.DATA_SEGMENT_NR in df.columns
|
|
242
|
+
|
|
243
|
+
if has_data_segments:
|
|
244
|
+
df_list = []
|
|
245
|
+
gait_segment_offset = 0
|
|
246
|
+
|
|
247
|
+
for data_seg_nr in sorted(df[DataColumns.DATA_SEGMENT_NR].unique()):
|
|
248
|
+
df_seg = df[df[DataColumns.DATA_SEGMENT_NR] == data_seg_nr].copy()
|
|
249
|
+
|
|
250
|
+
# Create gait segments within this data segment
|
|
251
|
+
df_seg[DataColumns.GAIT_SEGMENT_NR] = create_segments(
|
|
252
|
+
time_array=df_seg[DataColumns.TIME].values,
|
|
253
|
+
max_segment_gap_s=config.max_segment_gap_s,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Offset gait segment numbers to be unique across data segments
|
|
257
|
+
if gait_segment_offset > 0:
|
|
258
|
+
df_seg[DataColumns.GAIT_SEGMENT_NR] += gait_segment_offset
|
|
259
|
+
gait_segment_offset = df_seg[DataColumns.GAIT_SEGMENT_NR].max() + 1
|
|
260
|
+
|
|
261
|
+
df_list.append(df_seg)
|
|
262
|
+
|
|
263
|
+
df = pd.concat(df_list, ignore_index=True)
|
|
264
|
+
else:
|
|
265
|
+
df[DataColumns.GAIT_SEGMENT_NR] = create_segments(
|
|
266
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=config.max_segment_gap_s
|
|
267
|
+
)
|
|
201
268
|
|
|
202
269
|
# Remove segments that do not meet predetermined criteria
|
|
203
270
|
df = discard_segments(
|
|
204
271
|
df=df,
|
|
205
|
-
segment_nr_colname=DataColumns.
|
|
272
|
+
segment_nr_colname=DataColumns.GAIT_SEGMENT_NR,
|
|
206
273
|
min_segment_length_s=config.min_segment_length_s,
|
|
207
274
|
fs=config.sampling_frequency,
|
|
208
|
-
format=
|
|
275
|
+
format="timestamps",
|
|
209
276
|
)
|
|
210
277
|
|
|
211
278
|
# Create windows of fixed length and step size from the time series per segment
|
|
212
279
|
windowed_data = []
|
|
213
|
-
df_grouped = df.groupby(DataColumns.
|
|
214
|
-
|
|
215
|
-
[
|
|
216
|
-
config.
|
|
217
|
-
config.
|
|
218
|
-
config.
|
|
280
|
+
df_grouped = df.groupby(DataColumns.GAIT_SEGMENT_NR)
|
|
281
|
+
windowed_colnames = (
|
|
282
|
+
[config.time_colname]
|
|
283
|
+
+ config.accelerometer_colnames
|
|
284
|
+
+ config.gravity_colnames
|
|
285
|
+
+ config.gyroscope_colnames
|
|
219
286
|
)
|
|
220
287
|
|
|
221
288
|
# Collect windows from all segments in a list for faster concatenation
|
|
222
289
|
for _, group in df_grouped:
|
|
223
290
|
windows = tabulate_windows(
|
|
224
|
-
df=group,
|
|
225
|
-
columns=
|
|
291
|
+
df=group,
|
|
292
|
+
columns=windowed_colnames,
|
|
226
293
|
window_length_s=config.window_length_s,
|
|
227
294
|
window_step_length_s=config.window_step_length_s,
|
|
228
|
-
fs=config.sampling_frequency
|
|
295
|
+
fs=config.sampling_frequency,
|
|
229
296
|
)
|
|
230
297
|
if len(windows) > 0: # Skip if no windows are created
|
|
231
298
|
windowed_data.append(windows)
|
|
@@ -239,12 +306,12 @@ def extract_arm_activity_features(
|
|
|
239
306
|
windowed_data = np.concatenate(windowed_data, axis=0)
|
|
240
307
|
|
|
241
308
|
# Slice columns for accelerometer, gravity, gyroscope, angle, and velocity
|
|
242
|
-
extractor = WindowedDataExtractor(
|
|
309
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
243
310
|
|
|
244
|
-
idx_time = extractor.get_index(
|
|
245
|
-
idx_acc = extractor.get_slice(config.
|
|
246
|
-
idx_grav = extractor.get_slice(config.
|
|
247
|
-
idx_gyro = extractor.get_slice(config.
|
|
311
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
312
|
+
idx_acc = extractor.get_slice(config.accelerometer_colnames)
|
|
313
|
+
idx_grav = extractor.get_slice(config.gravity_colnames)
|
|
314
|
+
idx_gyro = extractor.get_slice(config.gyroscope_colnames)
|
|
248
315
|
|
|
249
316
|
# Extract data
|
|
250
317
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
@@ -253,23 +320,23 @@ def extract_arm_activity_features(
|
|
|
253
320
|
windowed_gyro = windowed_data[:, :, idx_gyro]
|
|
254
321
|
|
|
255
322
|
# Initialize DataFrame for features
|
|
256
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
323
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
257
324
|
|
|
258
325
|
# Extract temporal domain features (e.g., mean, std for accelerometer and gravity)
|
|
259
326
|
df_temporal_features = extract_temporal_domain_features(
|
|
260
|
-
config=config,
|
|
261
|
-
windowed_acc=windowed_acc,
|
|
262
|
-
windowed_grav=windowed_grav,
|
|
263
|
-
grav_stats=[
|
|
327
|
+
config=config,
|
|
328
|
+
windowed_acc=windowed_acc,
|
|
329
|
+
windowed_grav=windowed_grav,
|
|
330
|
+
grav_stats=["mean", "std"],
|
|
264
331
|
)
|
|
265
332
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
266
333
|
|
|
267
334
|
# Extract spectral domain features for accelerometer and gyroscope signals
|
|
268
|
-
for sensor_name, windowed_sensor in zip(
|
|
335
|
+
for sensor_name, windowed_sensor in zip(
|
|
336
|
+
["accelerometer", "gyroscope"], [windowed_acc, windowed_gyro]
|
|
337
|
+
):
|
|
269
338
|
df_spectral_features = extract_spectral_domain_features(
|
|
270
|
-
config=config,
|
|
271
|
-
sensor=sensor_name,
|
|
272
|
-
windowed_data=windowed_sensor
|
|
339
|
+
config=config, sensor=sensor_name, windowed_data=windowed_sensor
|
|
273
340
|
)
|
|
274
341
|
df_features = pd.concat([df_features, df_spectral_features], axis=1)
|
|
275
342
|
|
|
@@ -277,19 +344,19 @@ def extract_arm_activity_features(
|
|
|
277
344
|
|
|
278
345
|
|
|
279
346
|
def filter_gait(
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
parallel: bool=False
|
|
283
|
-
) -> pd.Series:
|
|
347
|
+
df: pd.DataFrame, clf_package: ClassifierPackage, parallel: bool = False
|
|
348
|
+
) -> pd.Series:
|
|
284
349
|
"""
|
|
285
|
-
Filters gait data to identify windows with no other arm activity using
|
|
350
|
+
Filters gait data to identify windows with no other arm activity using
|
|
351
|
+
a pre-trained classifier.
|
|
286
352
|
|
|
287
353
|
Parameters
|
|
288
354
|
----------
|
|
289
355
|
df : pd.DataFrame
|
|
290
356
|
The input DataFrame containing features extracted from gait data.
|
|
291
357
|
clf_package: ClassifierPackage
|
|
292
|
-
The pre-trained classifier package containing the classifier,
|
|
358
|
+
The pre-trained classifier package containing the classifier,
|
|
359
|
+
threshold, and scaler.
|
|
293
360
|
parallel : bool, optional, default=False
|
|
294
361
|
If `True`, enables parallel processing.
|
|
295
362
|
|
|
@@ -300,10 +367,10 @@ def filter_gait(
|
|
|
300
367
|
"""
|
|
301
368
|
if df.shape[0] == 0:
|
|
302
369
|
raise ValueError("No data found in the input DataFrame.")
|
|
303
|
-
|
|
370
|
+
|
|
304
371
|
# Set classifier
|
|
305
372
|
clf = clf_package.classifier
|
|
306
|
-
if not parallel and hasattr(clf,
|
|
373
|
+
if not parallel and hasattr(clf, "n_jobs"):
|
|
307
374
|
clf.n_jobs = 1
|
|
308
375
|
|
|
309
376
|
feature_names_scaling = clf_package.scaler.feature_names_in_
|
|
@@ -313,109 +380,117 @@ def filter_gait(
|
|
|
313
380
|
scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
|
|
314
381
|
|
|
315
382
|
# Replace scaled features in a copy of the relevant features for prediction
|
|
316
|
-
|
|
317
|
-
|
|
383
|
+
x_features = df.loc[:, feature_names_predictions].copy()
|
|
384
|
+
x_features.loc[:, feature_names_scaling] = scaled_features
|
|
318
385
|
|
|
319
386
|
# Make predictions
|
|
320
|
-
pred_no_other_arm_activity_proba_series = clf_package.predict_proba(
|
|
387
|
+
pred_no_other_arm_activity_proba_series = clf_package.predict_proba(x_features)
|
|
321
388
|
|
|
322
389
|
return pred_no_other_arm_activity_proba_series
|
|
323
390
|
|
|
324
391
|
|
|
325
392
|
def quantify_arm_swing(
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
393
|
+
df: pd.DataFrame,
|
|
394
|
+
fs: int,
|
|
395
|
+
filtered: bool = False,
|
|
396
|
+
max_segment_gap_s: float = 1.5,
|
|
397
|
+
min_segment_length_s: float = 1.5,
|
|
398
|
+
) -> tuple[dict[str, pd.DataFrame], dict]:
|
|
332
399
|
"""
|
|
333
400
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
334
401
|
|
|
335
402
|
Parameters
|
|
336
403
|
----------
|
|
337
404
|
df : pd.DataFrame
|
|
338
|
-
A DataFrame containing the raw sensor data of predicted gait
|
|
339
|
-
|
|
405
|
+
A DataFrame containing the raw sensor data of predicted gait
|
|
406
|
+
timestamps. Should include a column for predicted no other arm
|
|
407
|
+
activity based on a fitted threshold if filtered is True.
|
|
340
408
|
|
|
341
409
|
fs : int
|
|
342
410
|
The sampling frequency of the sensor data.
|
|
343
411
|
|
|
344
412
|
filtered : bool, optional, default=True
|
|
345
|
-
If `True`, the gyroscope data is filtered to only include predicted
|
|
413
|
+
If `True`, the gyroscope data is filtered to only include predicted
|
|
414
|
+
no other arm activity.
|
|
346
415
|
|
|
347
416
|
max_segment_gap_s : float, optional, default=1.5
|
|
348
|
-
The maximum gap in seconds between consecutive timestamps to group
|
|
349
|
-
|
|
417
|
+
The maximum gap in seconds between consecutive timestamps to group
|
|
418
|
+
them into segments.
|
|
419
|
+
|
|
350
420
|
min_segment_length_s : float, optional, default=1.5
|
|
351
421
|
The minimum length in seconds for a segment to be considered valid.
|
|
352
422
|
|
|
353
423
|
Returns
|
|
354
424
|
-------
|
|
355
425
|
Tuple[pd.DataFrame, dict]
|
|
356
|
-
A tuple containing a dataframe with quantified arm swing parameters
|
|
357
|
-
metadata for each segment.
|
|
426
|
+
A tuple containing a dataframe with quantified arm swing parameters
|
|
427
|
+
and a dictionary containing metadata for each segment.
|
|
358
428
|
"""
|
|
359
|
-
# Group consecutive timestamps into segments, with new segments starting
|
|
360
|
-
# Segments are made based on predicted gait
|
|
361
|
-
df[
|
|
362
|
-
time_array=df[DataColumns.TIME],
|
|
363
|
-
max_segment_gap_s=max_segment_gap_s
|
|
429
|
+
# Group consecutive timestamps into segments, with new segments starting
|
|
430
|
+
# after a pre-specified gap. Segments are made based on predicted gait
|
|
431
|
+
df["unfiltered_segment_nr"] = create_segments(
|
|
432
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
364
433
|
)
|
|
365
434
|
|
|
366
435
|
# Remove segments that do not meet predetermined criteria
|
|
367
436
|
df = discard_segments(
|
|
368
437
|
df=df,
|
|
369
|
-
segment_nr_colname=
|
|
438
|
+
segment_nr_colname="unfiltered_segment_nr",
|
|
370
439
|
min_segment_length_s=min_segment_length_s,
|
|
371
440
|
fs=fs,
|
|
372
|
-
format=
|
|
441
|
+
format="timestamps",
|
|
373
442
|
)
|
|
374
443
|
|
|
375
444
|
if df.empty:
|
|
376
|
-
raise ValueError(
|
|
377
|
-
|
|
445
|
+
raise ValueError(
|
|
446
|
+
"No segments found in the input data after discarding segments "
|
|
447
|
+
"of invalid shape."
|
|
448
|
+
)
|
|
449
|
+
|
|
378
450
|
# Create dictionary of gait segment number and duration
|
|
379
|
-
gait_segment_duration_dict = {
|
|
380
|
-
|
|
451
|
+
gait_segment_duration_dict = {
|
|
452
|
+
segment_nr: len(group[DataColumns.TIME]) / fs
|
|
453
|
+
for segment_nr, group in df.groupby("unfiltered_segment_nr", sort=False)
|
|
454
|
+
}
|
|
455
|
+
|
|
381
456
|
# If no arm swing data is remaining, return an empty dictionary
|
|
382
|
-
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
|
|
457
|
+
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].empty:
|
|
383
458
|
raise ValueError("No gait without other arm activities to quantify.")
|
|
384
459
|
elif filtered:
|
|
385
460
|
# Filter the DataFrame to only include predicted no other arm activity (1)
|
|
386
|
-
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(
|
|
461
|
+
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1].reset_index(
|
|
462
|
+
drop=True
|
|
463
|
+
)
|
|
387
464
|
|
|
388
465
|
# Group consecutive timestamps into segments of filtered gait
|
|
389
|
-
df[
|
|
390
|
-
time_array=df[DataColumns.TIME],
|
|
391
|
-
max_segment_gap_s=max_segment_gap_s
|
|
466
|
+
df["filtered_segment_nr"] = create_segments(
|
|
467
|
+
time_array=df[DataColumns.TIME], max_segment_gap_s=max_segment_gap_s
|
|
392
468
|
)
|
|
393
469
|
|
|
394
470
|
# Remove segments that do not meet predetermined criteria
|
|
395
471
|
df = discard_segments(
|
|
396
472
|
df=df,
|
|
397
|
-
segment_nr_colname=
|
|
473
|
+
segment_nr_colname="filtered_segment_nr",
|
|
398
474
|
min_segment_length_s=min_segment_length_s,
|
|
399
475
|
fs=fs,
|
|
400
476
|
)
|
|
401
477
|
|
|
402
478
|
if df.empty:
|
|
403
|
-
raise ValueError(
|
|
404
|
-
|
|
405
|
-
|
|
479
|
+
raise ValueError(
|
|
480
|
+
"No filtered gait segments found in the input data after "
|
|
481
|
+
"discarding segments of invalid shape."
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
grouping_colname = "filtered_segment_nr" if filtered else "unfiltered_segment_nr"
|
|
406
485
|
|
|
407
486
|
arm_swing_quantified = []
|
|
408
487
|
segment_meta = {
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
'duration_s': len(df[DataColumns.TIME]) / fs
|
|
412
|
-
},
|
|
413
|
-
},
|
|
414
|
-
'per_segment': {}
|
|
488
|
+
"all": {"duration_s": len(df[DataColumns.TIME]) / fs},
|
|
489
|
+
"per_segment": {},
|
|
415
490
|
}
|
|
416
491
|
|
|
417
|
-
# PCA is fitted on only predicted gait without other arm activity if
|
|
418
|
-
# it is fitted on the entire gyroscope data
|
|
492
|
+
# PCA is fitted on only predicted gait without other arm activity if
|
|
493
|
+
# filtered, otherwise it is fitted on the entire gyroscope data
|
|
419
494
|
df[DataColumns.VELOCITY] = pca_transform_gyroscope(
|
|
420
495
|
df=df,
|
|
421
496
|
y_gyro_colname=DataColumns.GYROSCOPE_Y,
|
|
@@ -425,7 +500,9 @@ def quantify_arm_swing(
|
|
|
425
500
|
# Group and process segments
|
|
426
501
|
for segment_nr, group in df.groupby(grouping_colname, sort=False):
|
|
427
502
|
if filtered:
|
|
428
|
-
gait_segment_nr = group[
|
|
503
|
+
gait_segment_nr = group["unfiltered_segment_nr"].iloc[
|
|
504
|
+
0
|
|
505
|
+
] # Each filtered segment is contained within an unfiltered segment
|
|
429
506
|
else:
|
|
430
507
|
gait_segment_nr = segment_nr
|
|
431
508
|
|
|
@@ -433,10 +510,14 @@ def quantify_arm_swing(
|
|
|
433
510
|
gait_segment_duration_s = gait_segment_duration_dict[gait_segment_nr]
|
|
434
511
|
except KeyError:
|
|
435
512
|
logger.warning(
|
|
436
|
-
"Segment %s (filtered = %s) not found in gait segment
|
|
437
|
-
|
|
513
|
+
"Segment %s (filtered = %s) not found in gait segment "
|
|
514
|
+
"duration dictionary. Skipping this segment.",
|
|
515
|
+
gait_segment_nr,
|
|
516
|
+
filtered,
|
|
517
|
+
)
|
|
518
|
+
logger.debug(
|
|
519
|
+
"Available segments: %s", list(gait_segment_duration_dict.keys())
|
|
438
520
|
)
|
|
439
|
-
logger.debug("Available segments: %s", list(gait_segment_duration_dict.keys()))
|
|
440
521
|
continue
|
|
441
522
|
|
|
442
523
|
time_array = group[DataColumns.TIME].to_numpy()
|
|
@@ -454,20 +535,22 @@ def quantify_arm_swing(
|
|
|
454
535
|
fs=fs,
|
|
455
536
|
)
|
|
456
537
|
|
|
457
|
-
segment_meta[
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
538
|
+
segment_meta["per_segment"][segment_nr] = {
|
|
539
|
+
"start_time_s": float(time_array.min()),
|
|
540
|
+
"end_time_s": float(time_array.max()),
|
|
541
|
+
"duration_unfiltered_segment_s": gait_segment_duration_s,
|
|
461
542
|
}
|
|
462
543
|
|
|
463
544
|
if filtered:
|
|
464
|
-
segment_meta[
|
|
545
|
+
segment_meta["per_segment"][segment_nr]["duration_filtered_segment_s"] = (
|
|
546
|
+
len(time_array) / fs
|
|
547
|
+
)
|
|
465
548
|
|
|
466
|
-
if angle_array.size > 0:
|
|
549
|
+
if angle_array.size > 0:
|
|
467
550
|
angle_extrema_indices, _, _ = extract_angle_extremes(
|
|
468
551
|
angle_array=angle_array,
|
|
469
552
|
sampling_frequency=fs,
|
|
470
|
-
max_frequency_activity=1.75
|
|
553
|
+
max_frequency_activity=1.75,
|
|
471
554
|
)
|
|
472
555
|
|
|
473
556
|
if len(angle_extrema_indices) > 1: # Requires at minimum 2 peaks
|
|
@@ -478,36 +561,55 @@ def quantify_arm_swing(
|
|
|
478
561
|
)
|
|
479
562
|
except Exception as e:
|
|
480
563
|
# Handle the error, set RoM to NaN, and log the error
|
|
481
|
-
print(
|
|
564
|
+
print(
|
|
565
|
+
f"Error computing range of motion for segment "
|
|
566
|
+
f"{segment_nr}: {e}"
|
|
567
|
+
)
|
|
482
568
|
rom = np.array([np.nan])
|
|
483
569
|
|
|
484
570
|
try:
|
|
485
571
|
pav = compute_peak_angular_velocity(
|
|
486
572
|
velocity_array=velocity_array,
|
|
487
|
-
angle_extrema_indices=angle_extrema_indices
|
|
573
|
+
angle_extrema_indices=angle_extrema_indices,
|
|
488
574
|
)
|
|
489
575
|
except Exception as e:
|
|
490
576
|
# Handle the error, set pav to NaN, and log the error
|
|
491
|
-
print(
|
|
577
|
+
print(
|
|
578
|
+
f"Error computing peak angular velocity for segment "
|
|
579
|
+
f"{segment_nr}: {e}"
|
|
580
|
+
)
|
|
492
581
|
pav = np.array([np.nan])
|
|
493
582
|
|
|
494
|
-
|
|
495
|
-
DataColumns.
|
|
583
|
+
params_dict = {
|
|
584
|
+
DataColumns.GAIT_SEGMENT_NR: segment_nr,
|
|
496
585
|
DataColumns.RANGE_OF_MOTION: rom,
|
|
497
|
-
DataColumns.PEAK_VELOCITY: pav
|
|
498
|
-
}
|
|
586
|
+
DataColumns.PEAK_VELOCITY: pav,
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
# Add data_segment_nr if it exists in the input data
|
|
590
|
+
if DataColumns.DATA_SEGMENT_NR in group.columns:
|
|
591
|
+
params_dict[DataColumns.DATA_SEGMENT_NR] = group[
|
|
592
|
+
DataColumns.DATA_SEGMENT_NR
|
|
593
|
+
].iloc[0]
|
|
594
|
+
|
|
595
|
+
df_params_segment = pd.DataFrame(params_dict)
|
|
499
596
|
|
|
500
597
|
arm_swing_quantified.append(df_params_segment)
|
|
501
598
|
|
|
502
599
|
arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
|
|
503
|
-
|
|
600
|
+
|
|
504
601
|
return arm_swing_quantified, segment_meta
|
|
505
602
|
|
|
506
603
|
|
|
507
|
-
def aggregate_arm_swing_params(
|
|
604
|
+
def aggregate_arm_swing_params(
|
|
605
|
+
df_arm_swing_params: pd.DataFrame,
|
|
606
|
+
segment_meta: dict,
|
|
607
|
+
segment_cats: list[tuple],
|
|
608
|
+
aggregates: list[str] = ["median"],
|
|
609
|
+
) -> dict:
|
|
508
610
|
"""
|
|
509
611
|
Aggregate the quantification results for arm swing parameters.
|
|
510
|
-
|
|
612
|
+
|
|
511
613
|
Parameters
|
|
512
614
|
----------
|
|
513
615
|
df_arm_swing_params : pd.DataFrame
|
|
@@ -517,84 +619,116 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
517
619
|
A dictionary containing metadata for each segment.
|
|
518
620
|
|
|
519
621
|
segment_cats : List[tuple]
|
|
520
|
-
A list of tuples defining the segment categories, where each tuple
|
|
521
|
-
|
|
622
|
+
A list of tuples defining the segment categories, where each tuple
|
|
623
|
+
contains the lower and upper bounds for the segment duration.
|
|
522
624
|
aggregates : List[str], optional
|
|
523
|
-
A list of aggregation methods to apply to the quantification
|
|
524
|
-
|
|
625
|
+
A list of aggregation methods to apply to the quantification
|
|
626
|
+
results.
|
|
627
|
+
|
|
525
628
|
Returns
|
|
526
629
|
-------
|
|
527
630
|
dict
|
|
528
|
-
A dictionary containing the aggregated quantification results for
|
|
631
|
+
A dictionary containing the aggregated quantification results for
|
|
632
|
+
arm swing parameters.
|
|
529
633
|
"""
|
|
530
634
|
arm_swing_parameters = [DataColumns.RANGE_OF_MOTION, DataColumns.PEAK_VELOCITY]
|
|
531
635
|
|
|
532
636
|
aggregated_results = {}
|
|
533
637
|
for segment_cat_range in segment_cats:
|
|
534
|
-
segment_cat_str = f
|
|
638
|
+
segment_cat_str = f"{segment_cat_range[0]}_{segment_cat_range[1]}"
|
|
535
639
|
cat_segments = [
|
|
536
|
-
x
|
|
537
|
-
|
|
538
|
-
|
|
640
|
+
x
|
|
641
|
+
for x in segment_meta.keys()
|
|
642
|
+
if segment_meta[x]["duration_unfiltered_segment_s"] >= segment_cat_range[0]
|
|
643
|
+
and segment_meta[x]["duration_unfiltered_segment_s"] < segment_cat_range[1]
|
|
539
644
|
]
|
|
540
645
|
|
|
541
|
-
if len(cat_segments) > 0:
|
|
542
|
-
# For each segment, use 'duration_filtered_segment_s' if present,
|
|
646
|
+
if len(cat_segments) > 0:
|
|
647
|
+
# For each segment, use 'duration_filtered_segment_s' if present,
|
|
648
|
+
# else 'duration_unfiltered_segment_s'
|
|
543
649
|
aggregated_results[segment_cat_str] = {
|
|
544
|
-
|
|
650
|
+
"duration_s": sum(
|
|
545
651
|
[
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
652
|
+
(
|
|
653
|
+
segment_meta[x]["duration_filtered_segment_s"]
|
|
654
|
+
if "duration_filtered_segment_s" in segment_meta[x]
|
|
655
|
+
else segment_meta[x]["duration_unfiltered_segment_s"]
|
|
656
|
+
)
|
|
549
657
|
for x in cat_segments
|
|
550
658
|
]
|
|
551
|
-
)
|
|
659
|
+
)
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
df_arm_swing_params_cat = df_arm_swing_params.loc[
|
|
663
|
+
df_arm_swing_params[DataColumns.GAIT_SEGMENT_NR].isin(cat_segments)
|
|
664
|
+
]
|
|
552
665
|
|
|
553
|
-
df_arm_swing_params_cat = df_arm_swing_params.loc[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
|
|
554
|
-
|
|
555
666
|
# Aggregate across all segments
|
|
556
|
-
aggregates_per_segment = [
|
|
667
|
+
aggregates_per_segment = ["median", "mean"]
|
|
557
668
|
|
|
558
669
|
for arm_swing_parameter in arm_swing_parameters:
|
|
559
670
|
for aggregate in aggregates:
|
|
560
|
-
if aggregate in [
|
|
671
|
+
if aggregate in ["std", "cov"]:
|
|
561
672
|
per_segment_agg = []
|
|
562
|
-
# If the aggregate is 'cov' (coefficient of variation),
|
|
563
|
-
|
|
673
|
+
# If the aggregate is 'cov' (coefficient of variation),
|
|
674
|
+
# we also compute the mean and standard deviation per
|
|
675
|
+
# segment
|
|
676
|
+
segment_groups = dict(
|
|
677
|
+
tuple(
|
|
678
|
+
df_arm_swing_params_cat.groupby(
|
|
679
|
+
DataColumns.GAIT_SEGMENT_NR
|
|
680
|
+
)
|
|
681
|
+
)
|
|
682
|
+
)
|
|
564
683
|
for segment_nr in cat_segments:
|
|
565
684
|
segment_df = segment_groups.get(segment_nr)
|
|
566
685
|
if segment_df is not None:
|
|
567
|
-
per_segment_agg.append(
|
|
686
|
+
per_segment_agg.append(
|
|
687
|
+
aggregate_parameter(
|
|
688
|
+
segment_df[arm_swing_parameter], aggregate
|
|
689
|
+
)
|
|
690
|
+
)
|
|
568
691
|
|
|
569
692
|
# Drop nans
|
|
570
693
|
per_segment_agg = np.array(per_segment_agg)
|
|
571
694
|
per_segment_agg = per_segment_agg[~np.isnan(per_segment_agg)]
|
|
572
695
|
|
|
573
|
-
|
|
574
696
|
for segment_level_aggregate in aggregates_per_segment:
|
|
575
|
-
|
|
697
|
+
key = (
|
|
698
|
+
f"{segment_level_aggregate}_{aggregate}_"
|
|
699
|
+
f"{arm_swing_parameter}"
|
|
700
|
+
)
|
|
701
|
+
aggregated_results[segment_cat_str][key] = (
|
|
702
|
+
aggregate_parameter(
|
|
703
|
+
per_segment_agg, segment_level_aggregate
|
|
704
|
+
)
|
|
705
|
+
)
|
|
576
706
|
else:
|
|
577
|
-
aggregated_results[segment_cat_str][
|
|
707
|
+
aggregated_results[segment_cat_str][
|
|
708
|
+
f"{aggregate}_{arm_swing_parameter}"
|
|
709
|
+
] = aggregate_parameter(
|
|
710
|
+
df_arm_swing_params_cat[arm_swing_parameter], aggregate
|
|
711
|
+
)
|
|
578
712
|
|
|
579
713
|
else:
|
|
580
714
|
# If no segments are found for this category, initialize with NaN
|
|
581
715
|
aggregated_results[segment_cat_str] = {
|
|
582
|
-
|
|
716
|
+
"duration_s": 0,
|
|
583
717
|
}
|
|
584
718
|
|
|
585
719
|
return aggregated_results
|
|
586
720
|
|
|
587
721
|
|
|
588
722
|
def extract_temporal_domain_features(
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
723
|
+
config,
|
|
724
|
+
windowed_acc: np.ndarray,
|
|
725
|
+
windowed_grav: np.ndarray,
|
|
726
|
+
grav_stats: list[str] = ["mean"],
|
|
727
|
+
) -> pd.DataFrame:
|
|
594
728
|
"""
|
|
595
729
|
Compute temporal domain features for the accelerometer signal.
|
|
596
730
|
|
|
597
|
-
This function calculates various statistical features for the gravity signal
|
|
731
|
+
This function calculates various statistical features for the gravity signal
|
|
598
732
|
and computes the standard deviation of the accelerometer's Euclidean norm.
|
|
599
733
|
|
|
600
734
|
Parameters
|
|
@@ -602,10 +736,10 @@ def extract_temporal_domain_features(
|
|
|
602
736
|
config : object
|
|
603
737
|
Configuration object containing the accelerometer and gravity column names.
|
|
604
738
|
windowed_acc : numpy.ndarray
|
|
605
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
739
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
606
740
|
the number of accelerometer values per window.
|
|
607
741
|
windowed_grav : numpy.ndarray
|
|
608
|
-
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
742
|
+
A 2D numpy array of shape (N, M) where N is the number of windows and M is
|
|
609
743
|
the number of gravity signal values per window.
|
|
610
744
|
grav_stats : list of str, optional
|
|
611
745
|
A list of statistics to compute for the gravity signal (default is ['mean']).
|
|
@@ -613,33 +747,36 @@ def extract_temporal_domain_features(
|
|
|
613
747
|
Returns
|
|
614
748
|
-------
|
|
615
749
|
pd.DataFrame
|
|
616
|
-
A DataFrame containing the computed features, with each row corresponding
|
|
750
|
+
A DataFrame containing the computed features, with each row corresponding
|
|
617
751
|
to a window and each column representing a specific feature.
|
|
618
752
|
"""
|
|
619
753
|
# Compute gravity statistics (e.g., mean, std, etc.)
|
|
620
754
|
feature_dict = {}
|
|
621
755
|
for stat in grav_stats:
|
|
622
756
|
stats_result = compute_statistics(data=windowed_grav, statistic=stat)
|
|
623
|
-
for i, col in enumerate(config.
|
|
624
|
-
feature_dict[f
|
|
757
|
+
for i, col in enumerate(config.gravity_colnames):
|
|
758
|
+
feature_dict[f"{col}_{stat}"] = stats_result[:, i]
|
|
625
759
|
|
|
626
760
|
# Compute standard deviation of the Euclidean norm of the accelerometer signal
|
|
627
|
-
feature_dict[
|
|
761
|
+
feature_dict["accelerometer_std_norm"] = compute_std_euclidean_norm(
|
|
762
|
+
data=windowed_acc
|
|
763
|
+
)
|
|
628
764
|
|
|
629
765
|
return pd.DataFrame(feature_dict)
|
|
630
766
|
|
|
631
767
|
|
|
632
768
|
def extract_spectral_domain_features(
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
769
|
+
windowed_data: np.ndarray,
|
|
770
|
+
config,
|
|
771
|
+
sensor: str,
|
|
772
|
+
) -> pd.DataFrame:
|
|
637
773
|
"""
|
|
638
774
|
Compute spectral domain features for a sensor's data.
|
|
639
775
|
|
|
640
|
-
This function computes the periodogram, extracts power in specific
|
|
641
|
-
calculates the dominant frequency, and computes
|
|
642
|
-
for a given sensor's
|
|
776
|
+
This function computes the periodogram, extracts power in specific
|
|
777
|
+
frequency bands, calculates the dominant frequency, and computes
|
|
778
|
+
Mel-frequency cepstral coefficients (MFCCs) for a given sensor's
|
|
779
|
+
windowed data.
|
|
643
780
|
|
|
644
781
|
Parameters
|
|
645
782
|
----------
|
|
@@ -647,66 +784,368 @@ def extract_spectral_domain_features(
|
|
|
647
784
|
A 2D numpy array where each row corresponds to a window of sensor data.
|
|
648
785
|
|
|
649
786
|
config : object
|
|
650
|
-
Configuration object containing settings such as sampling frequency,
|
|
651
|
-
frequency bands, and MFCC parameters.
|
|
787
|
+
Configuration object containing settings such as sampling frequency,
|
|
788
|
+
window type, frequency bands, and MFCC parameters.
|
|
652
789
|
|
|
653
790
|
sensor : str
|
|
654
791
|
The name of the sensor (e.g., 'accelerometer', 'gyroscope').
|
|
655
|
-
|
|
792
|
+
|
|
656
793
|
Returns
|
|
657
794
|
-------
|
|
658
795
|
pd.DataFrame
|
|
659
|
-
A DataFrame containing the computed spectral features, with each row
|
|
660
|
-
to a window and each column representing a specific
|
|
796
|
+
A DataFrame containing the computed spectral features, with each row
|
|
797
|
+
corresponding to a window and each column representing a specific
|
|
798
|
+
feature.
|
|
661
799
|
"""
|
|
662
800
|
# Initialize a dictionary to hold the results
|
|
663
801
|
feature_dict = {}
|
|
664
802
|
|
|
665
803
|
# Compute periodogram (power spectral density)
|
|
666
804
|
freqs, psd = periodogram(
|
|
667
|
-
x=windowed_data,
|
|
668
|
-
fs=config.sampling_frequency,
|
|
669
|
-
window=config.window_type,
|
|
670
|
-
axis=1
|
|
805
|
+
x=windowed_data, fs=config.sampling_frequency, window=config.window_type, axis=1
|
|
671
806
|
)
|
|
672
807
|
|
|
673
808
|
# Compute power in specified frequency bands
|
|
674
809
|
for band_name, band_freqs in config.d_frequency_bandwidths.items():
|
|
675
810
|
band_powers = compute_power_in_bandwidth(
|
|
676
811
|
freqs=freqs,
|
|
677
|
-
psd=psd,
|
|
812
|
+
psd=psd,
|
|
678
813
|
fmin=band_freqs[0],
|
|
679
814
|
fmax=band_freqs[1],
|
|
680
|
-
include_max=False
|
|
815
|
+
include_max=False,
|
|
681
816
|
)
|
|
682
817
|
for i, col in enumerate(config.axes):
|
|
683
|
-
feature_dict[f
|
|
818
|
+
feature_dict[f"{sensor}_{col}_{band_name}"] = band_powers[:, i]
|
|
684
819
|
|
|
685
820
|
# Compute dominant frequency for each axis
|
|
686
821
|
dominant_frequencies = compute_dominant_frequency(
|
|
687
|
-
freqs=freqs,
|
|
688
|
-
psd=psd,
|
|
689
|
-
fmin=config.spectrum_low_frequency,
|
|
690
|
-
fmax=config.spectrum_high_frequency
|
|
822
|
+
freqs=freqs,
|
|
823
|
+
psd=psd,
|
|
824
|
+
fmin=config.spectrum_low_frequency,
|
|
825
|
+
fmax=config.spectrum_high_frequency,
|
|
691
826
|
)
|
|
692
827
|
|
|
693
828
|
# Add dominant frequency features to the feature_dict
|
|
694
829
|
for axis, freq in zip(config.axes, dominant_frequencies.T):
|
|
695
|
-
feature_dict[f
|
|
830
|
+
feature_dict[f"{sensor}_{axis}_dominant_frequency"] = freq
|
|
696
831
|
|
|
697
832
|
# Compute total power in the PSD
|
|
698
833
|
total_power_psd = compute_total_power(psd)
|
|
699
834
|
|
|
700
835
|
# Compute MFCCs
|
|
701
836
|
mfccs = compute_mfccs(
|
|
702
|
-
total_power_array=total_power_psd,
|
|
703
|
-
config=config,
|
|
704
|
-
multiplication_factor=4
|
|
837
|
+
total_power_array=total_power_psd, config=config, multiplication_factor=4
|
|
705
838
|
)
|
|
706
839
|
|
|
707
840
|
# Combine the MFCCs into the features DataFrame
|
|
708
|
-
mfcc_colnames = [
|
|
841
|
+
mfcc_colnames = [
|
|
842
|
+
f"{sensor}_mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)
|
|
843
|
+
]
|
|
709
844
|
for i, colname in enumerate(mfcc_colnames):
|
|
710
845
|
feature_dict[colname] = mfccs[:, i]
|
|
711
846
|
|
|
712
|
-
return pd.DataFrame(feature_dict)
|
|
847
|
+
return pd.DataFrame(feature_dict)
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
def run_gait_pipeline(
|
|
851
|
+
df_prepared: pd.DataFrame,
|
|
852
|
+
watch_side: str,
|
|
853
|
+
output_dir: str | Path,
|
|
854
|
+
imu_config: IMUConfig | None = None,
|
|
855
|
+
gait_config: GaitConfig | None = None,
|
|
856
|
+
arm_activity_config: GaitConfig | None = None,
|
|
857
|
+
store_intermediate: list[str] = [],
|
|
858
|
+
segment_number_offset: int = 0,
|
|
859
|
+
logging_level: int = logging.INFO,
|
|
860
|
+
custom_logger: logging.Logger | None = None,
|
|
861
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
862
|
+
"""
|
|
863
|
+
Run the complete gait analysis pipeline on prepared data (steps 1-6).
|
|
864
|
+
|
|
865
|
+
This function implements the gait analysis workflow as described in the tutorials:
|
|
866
|
+
1. Preprocessing
|
|
867
|
+
2. Gait feature extraction
|
|
868
|
+
3. Gait detection
|
|
869
|
+
4. Arm activity feature extraction
|
|
870
|
+
5. Filtering gait
|
|
871
|
+
6. Arm swing quantification
|
|
872
|
+
|
|
873
|
+
Step 7 (aggregation) should be done after processing all segments.
|
|
874
|
+
|
|
875
|
+
Parameters
|
|
876
|
+
----------
|
|
877
|
+
df_prepared : pd.DataFrame
|
|
878
|
+
Prepared IMU data with time, accelerometer, and gyroscope columns.
|
|
879
|
+
Should contain columns: time, accelerometer_x/y/z, gyroscope_x/y/z.
|
|
880
|
+
Will be preprocessed as step 1 of the pipeline.
|
|
881
|
+
watch_side : str
|
|
882
|
+
Side of the watch ('left' or 'right') to configure preprocessing accordingly.
|
|
883
|
+
output_dir : str or Path
|
|
884
|
+
Directory to save intermediate results (required)
|
|
885
|
+
imu_config : IMUConfig, optional
|
|
886
|
+
Configuration for IMU data preprocessing.
|
|
887
|
+
If None, uses default IMUConfig.
|
|
888
|
+
gait_config : GaitConfig, optional
|
|
889
|
+
Configuration for gait feature extraction and detection.
|
|
890
|
+
If None, uses default GaitConfig(step="gait").
|
|
891
|
+
arm_activity_config : GaitConfig, optional
|
|
892
|
+
Configuration for arm activity feature extraction and filtering.
|
|
893
|
+
If None, uses default GaitConfig(step="arm_activity").
|
|
894
|
+
store_intermediate : List[str]
|
|
895
|
+
Steps of which intermediate results should be stored:
|
|
896
|
+
- 'preprocessing': Store preprocessed data after step 1
|
|
897
|
+
- 'gait': Store gait features and predictions after step 3
|
|
898
|
+
- 'arm_activity': Store arm activity features and predictions after step 5
|
|
899
|
+
- 'quantification': Store arm swing quantification results after step 6
|
|
900
|
+
If empty, only returns the final quantified results.
|
|
901
|
+
segment_number_offset : int, optional, default=0
|
|
902
|
+
Offset to add to all segment numbers to avoid conflicts when concatenating
|
|
903
|
+
multiple data segments. Used for proper segment numbering across multiple files.
|
|
904
|
+
logging_level : int, default logging.INFO
|
|
905
|
+
Logging level using standard logging constants (logging.DEBUG, logging.INFO,
|
|
906
|
+
etc.)
|
|
907
|
+
custom_logger : logging.Logger, optional
|
|
908
|
+
Custom logger instance. If provided, logging_level is ignored.
|
|
909
|
+
|
|
910
|
+
Returns
|
|
911
|
+
-------
|
|
912
|
+
tuple[pd.DataFrame, dict]
|
|
913
|
+
A tuple containing:
|
|
914
|
+
- pd.DataFrame: Quantified arm swing parameters with the following columns:
|
|
915
|
+
- gait_segment_nr: Gait segment number within this data segment
|
|
916
|
+
- Various arm swing metrics (range of motion, peak angular velocity, etc.)
|
|
917
|
+
- Additional metadata columns
|
|
918
|
+
- dict: Gait segment metadata containing information about each detected
|
|
919
|
+
gait segment
|
|
920
|
+
|
|
921
|
+
Notes
|
|
922
|
+
-----
|
|
923
|
+
This function processes a single contiguous data segment. For multiple segments,
|
|
924
|
+
call this function for each segment, then use aggregate_arm_swing_params()
|
|
925
|
+
on the concatenated results.
|
|
926
|
+
|
|
927
|
+
The function follows the exact workflow from the gait analysis tutorial:
|
|
928
|
+
https://github.com/biomarkersParkinson/paradigma/blob/main/docs/
|
|
929
|
+
tutorials/gait_analysis.ipynb
|
|
930
|
+
"""
|
|
931
|
+
# Setup logger
|
|
932
|
+
active_logger = custom_logger if custom_logger is not None else logger
|
|
933
|
+
if custom_logger is None:
|
|
934
|
+
active_logger.setLevel(logging_level)
|
|
935
|
+
|
|
936
|
+
# Set default configurations
|
|
937
|
+
if imu_config is None:
|
|
938
|
+
imu_config = IMUConfig()
|
|
939
|
+
if gait_config is None:
|
|
940
|
+
gait_config = GaitConfig(step="gait")
|
|
941
|
+
if arm_activity_config is None:
|
|
942
|
+
arm_activity_config = GaitConfig(step="arm_activity")
|
|
943
|
+
|
|
944
|
+
output_dir = Path(output_dir)
|
|
945
|
+
|
|
946
|
+
# Validate input data has required columns
|
|
947
|
+
required_columns = [
|
|
948
|
+
DataColumns.TIME,
|
|
949
|
+
DataColumns.ACCELEROMETER_X,
|
|
950
|
+
DataColumns.ACCELEROMETER_Y,
|
|
951
|
+
DataColumns.ACCELEROMETER_Z,
|
|
952
|
+
DataColumns.GYROSCOPE_X,
|
|
953
|
+
DataColumns.GYROSCOPE_Y,
|
|
954
|
+
DataColumns.GYROSCOPE_Z,
|
|
955
|
+
]
|
|
956
|
+
missing_columns = [
|
|
957
|
+
col for col in required_columns if col not in df_prepared.columns
|
|
958
|
+
]
|
|
959
|
+
if missing_columns:
|
|
960
|
+
raise ValueError(f"Missing required columns: {missing_columns}")
|
|
961
|
+
|
|
962
|
+
# Step 1: Preprocess data
|
|
963
|
+
active_logger.info("Step 1: Preprocessing IMU data")
|
|
964
|
+
|
|
965
|
+
df_preprocessed = preprocess_imu_data(
|
|
966
|
+
df=df_prepared,
|
|
967
|
+
config=imu_config,
|
|
968
|
+
sensor="both",
|
|
969
|
+
watch_side=watch_side,
|
|
970
|
+
verbose=1 if logging_level <= logging.INFO else 0,
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
if "preprocessing" in store_intermediate:
|
|
974
|
+
preprocessing_dir = output_dir / "preprocessing"
|
|
975
|
+
preprocessing_dir.mkdir(parents=True, exist_ok=True)
|
|
976
|
+
df_preprocessed.to_parquet(
|
|
977
|
+
preprocessing_dir / "preprocessed_data.parquet", index=False
|
|
978
|
+
)
|
|
979
|
+
active_logger.debug(
|
|
980
|
+
f"Saved preprocessed data to "
|
|
981
|
+
f"{preprocessing_dir / 'preprocessed_data.parquet'}"
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
# Step 2: Extract gait features
|
|
985
|
+
active_logger.info("Step 2: Extracting gait features")
|
|
986
|
+
df_gait = extract_gait_features(df_preprocessed, gait_config)
|
|
987
|
+
|
|
988
|
+
if "gait" in store_intermediate:
|
|
989
|
+
gait_dir = output_dir / "gait"
|
|
990
|
+
gait_dir.mkdir(parents=True, exist_ok=True)
|
|
991
|
+
df_gait.to_parquet(gait_dir / "gait_features.parquet", index=False)
|
|
992
|
+
active_logger.debug(
|
|
993
|
+
f"Saved gait features to {gait_dir / 'gait_features.parquet'}"
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
# Step 3: Detect gait
|
|
997
|
+
active_logger.info("Step 3: Detecting gait")
|
|
998
|
+
try:
|
|
999
|
+
classifier_path = files("paradigma.assets") / "gait_detection_clf_package.pkl"
|
|
1000
|
+
classifier_package_gait = ClassifierPackage.load(classifier_path)
|
|
1001
|
+
except Exception as e:
|
|
1002
|
+
active_logger.error(f"Could not load gait detection classifier: {e}")
|
|
1003
|
+
raise RuntimeError("Gait detection classifier not available")
|
|
1004
|
+
|
|
1005
|
+
gait_proba = detect_gait(df_gait, classifier_package_gait, parallel=False)
|
|
1006
|
+
df_gait[DataColumns.PRED_GAIT_PROBA] = gait_proba
|
|
1007
|
+
|
|
1008
|
+
# Merge predictions back with timestamps
|
|
1009
|
+
df_gait_with_time = merge_predictions_with_timestamps(
|
|
1010
|
+
df_ts=df_preprocessed,
|
|
1011
|
+
df_predictions=df_gait,
|
|
1012
|
+
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
1013
|
+
window_length_s=gait_config.window_length_s,
|
|
1014
|
+
fs=gait_config.sampling_frequency,
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# Add binary prediction column
|
|
1018
|
+
df_gait_with_time[DataColumns.PRED_GAIT] = (
|
|
1019
|
+
df_gait_with_time[DataColumns.PRED_GAIT_PROBA]
|
|
1020
|
+
>= classifier_package_gait.threshold
|
|
1021
|
+
).astype(int)
|
|
1022
|
+
|
|
1023
|
+
if "gait" in store_intermediate:
|
|
1024
|
+
gait_dir = output_dir / "gait"
|
|
1025
|
+
gait_dir.mkdir(parents=True, exist_ok=True)
|
|
1026
|
+
df_gait_with_time.to_parquet(gait_dir / "gait_predictions.parquet", index=False)
|
|
1027
|
+
active_logger.info(
|
|
1028
|
+
f"Saved gait predictions to {gait_dir / 'gait_predictions.parquet'}"
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
# Filter to only gait periods
|
|
1032
|
+
df_gait_only = df_gait_with_time.loc[
|
|
1033
|
+
df_gait_with_time[DataColumns.PRED_GAIT] == 1
|
|
1034
|
+
].reset_index(drop=True)
|
|
1035
|
+
|
|
1036
|
+
if len(df_gait_only) == 0:
|
|
1037
|
+
active_logger.warning("No gait detected in this segment")
|
|
1038
|
+
return pd.DataFrame(), {}
|
|
1039
|
+
|
|
1040
|
+
# Step 4: Extract arm activity features
|
|
1041
|
+
active_logger.info("Step 4: Extracting arm activity features")
|
|
1042
|
+
df_arm_activity = extract_arm_activity_features(df_gait_only, arm_activity_config)
|
|
1043
|
+
|
|
1044
|
+
if "arm_activity" in store_intermediate:
|
|
1045
|
+
arm_activity_dir = output_dir / "arm_activity"
|
|
1046
|
+
arm_activity_dir.mkdir(parents=True, exist_ok=True)
|
|
1047
|
+
df_arm_activity.to_parquet(
|
|
1048
|
+
arm_activity_dir / "arm_activity_features.parquet", index=False
|
|
1049
|
+
)
|
|
1050
|
+
active_logger.debug(
|
|
1051
|
+
f"Saved arm activity features to "
|
|
1052
|
+
f"{arm_activity_dir / 'arm_activity_features.parquet'}"
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
# Step 5: Filter gait (remove other arm activities)
|
|
1056
|
+
active_logger.info("Step 5: Filtering gait")
|
|
1057
|
+
try:
|
|
1058
|
+
classifier_path = files("paradigma.assets") / "gait_filtering_clf_package.pkl"
|
|
1059
|
+
classifier_package_arm_activity = ClassifierPackage.load(classifier_path)
|
|
1060
|
+
except Exception as e:
|
|
1061
|
+
active_logger.error(f"Could not load arm activity classifier: {e}")
|
|
1062
|
+
raise RuntimeError("Arm activity classifier not available")
|
|
1063
|
+
|
|
1064
|
+
# Filter gait returns probabilities which we add to the arm activity features
|
|
1065
|
+
arm_activity_probabilities = filter_gait(
|
|
1066
|
+
df_arm_activity, classifier_package_arm_activity, parallel=False
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
df_arm_activity[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] = (
|
|
1070
|
+
arm_activity_probabilities
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
# Merge predictions back with timestamps
|
|
1074
|
+
df_filtered = merge_predictions_with_timestamps(
|
|
1075
|
+
df_ts=df_gait_only,
|
|
1076
|
+
df_predictions=df_arm_activity,
|
|
1077
|
+
pred_proba_colname=DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
|
|
1078
|
+
window_length_s=arm_activity_config.window_length_s,
|
|
1079
|
+
fs=arm_activity_config.sampling_frequency,
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
# Add binary prediction column
|
|
1083
|
+
filt_threshold = classifier_package_arm_activity.threshold
|
|
1084
|
+
df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] = (
|
|
1085
|
+
df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= filt_threshold
|
|
1086
|
+
).astype(int)
|
|
1087
|
+
|
|
1088
|
+
if "arm_activity" in store_intermediate:
|
|
1089
|
+
arm_activity_dir = output_dir / "arm_activity"
|
|
1090
|
+
arm_activity_dir.mkdir(parents=True, exist_ok=True)
|
|
1091
|
+
df_filtered.to_parquet(arm_activity_dir / "filtered_gait.parquet", index=False)
|
|
1092
|
+
active_logger.debug(
|
|
1093
|
+
f"Saved filtered gait to {arm_activity_dir / 'filtered_gait.parquet'}"
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
if (
|
|
1097
|
+
len(df_filtered.loc[df_filtered[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY] == 1])
|
|
1098
|
+
== 0
|
|
1099
|
+
):
|
|
1100
|
+
active_logger.warning("No clean gait data remaining after filtering")
|
|
1101
|
+
return pd.DataFrame(), {}
|
|
1102
|
+
|
|
1103
|
+
# Step 6: Quantify arm swing
|
|
1104
|
+
active_logger.info("Step 6: Quantifying arm swing")
|
|
1105
|
+
quantified_arm_swing, gait_segment_meta = quantify_arm_swing(
|
|
1106
|
+
df=df_filtered,
|
|
1107
|
+
fs=arm_activity_config.sampling_frequency,
|
|
1108
|
+
filtered=True,
|
|
1109
|
+
max_segment_gap_s=arm_activity_config.max_segment_gap_s,
|
|
1110
|
+
min_segment_length_s=arm_activity_config.min_segment_length_s,
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
if "quantification" in store_intermediate:
|
|
1114
|
+
quantification_dir = output_dir / "quantification"
|
|
1115
|
+
quantification_dir.mkdir(parents=True, exist_ok=True)
|
|
1116
|
+
quantified_arm_swing.to_parquet(
|
|
1117
|
+
quantification_dir / "arm_swing_quantified.parquet", index=False
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
# Save gait segment metadata as JSON
|
|
1121
|
+
with open(quantification_dir / "gait_segment_meta.json", "w") as f:
|
|
1122
|
+
json.dump(gait_segment_meta, f, indent=2)
|
|
1123
|
+
|
|
1124
|
+
active_logger.debug(
|
|
1125
|
+
f"Saved arm swing quantification to "
|
|
1126
|
+
f"{quantification_dir / 'arm_swing_quantified.parquet'}"
|
|
1127
|
+
)
|
|
1128
|
+
active_logger.debug(
|
|
1129
|
+
f"Saved gait segment metadata to "
|
|
1130
|
+
f"{quantification_dir / 'gait_segment_meta.json'}"
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
active_logger.info(
|
|
1134
|
+
f"Gait analysis pipeline completed. Found "
|
|
1135
|
+
f"{len(quantified_arm_swing)} windows of gait "
|
|
1136
|
+
f"without other arm activities."
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
# Apply segment number offset if specified (for multi-segment concatenation)
|
|
1140
|
+
if segment_number_offset > 0 and len(quantified_arm_swing) > 0:
|
|
1141
|
+
quantified_arm_swing = quantified_arm_swing.copy()
|
|
1142
|
+
quantified_arm_swing["gait_segment_nr"] += segment_number_offset
|
|
1143
|
+
|
|
1144
|
+
# Also update the metadata with the new segment numbers
|
|
1145
|
+
if gait_segment_meta and "per_segment" in gait_segment_meta:
|
|
1146
|
+
updated_per_segment_meta = {}
|
|
1147
|
+
for seg_id, meta in gait_segment_meta["per_segment"].items():
|
|
1148
|
+
updated_per_segment_meta[seg_id + segment_number_offset] = meta
|
|
1149
|
+
gait_segment_meta["per_segment"] = updated_per_segment_meta
|
|
1150
|
+
|
|
1151
|
+
return quantified_arm_swing, gait_segment_meta
|