paradigma 0.4.7__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {paradigma-0.4.7 → paradigma-1.0.0}/PKG-INFO +6 -4
- {paradigma-0.4.7 → paradigma-1.0.0}/README.md +5 -3
- {paradigma-0.4.7 → paradigma-1.0.0}/pyproject.toml +1 -1
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/feature_extraction.py +4 -18
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/pipelines/gait_pipeline.py +66 -76
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/pipelines/tremor_pipeline.py +4 -3
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/preprocessing.py +30 -7
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/segmenting.py +4 -1
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/testing.py +18 -4
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/util.py +59 -3
- {paradigma-0.4.7 → paradigma-1.0.0}/LICENSE +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/__init__.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/assets/gait_detection_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/assets/gait_filtering_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/assets/ppg_quality_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/assets/tremor_detection_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/classification.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/config.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/constants.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/pipelines/__init__.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/pipelines/heart_rate_pipeline.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.0}/src/paradigma/pipelines/heart_rate_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: paradigma
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Erik Post
|
|
@@ -95,7 +95,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
|
|
|
95
95
|
Specific requirements include:
|
|
96
96
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
97
97
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
98
|
-
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
98
|
+
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
99
99
|
| **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
|
|
100
100
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
101
101
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
@@ -111,8 +111,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
|
|
|
111
111
|
|
|
112
112
|
## Scientific validation
|
|
113
113
|
|
|
114
|
-
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
|
|
115
|
-
|
|
114
|
+
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
|
|
115
|
+
* [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
116
|
+
|
|
117
|
+
Details and validation of the other pipelines shall be shared in upcoming scientific publications.
|
|
116
118
|
|
|
117
119
|
## Contributing
|
|
118
120
|
|
|
@@ -75,7 +75,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
|
|
|
75
75
|
Specific requirements include:
|
|
76
76
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
77
77
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
78
|
-
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
78
|
+
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
79
79
|
| **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
|
|
80
80
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
81
81
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
@@ -91,8 +91,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
|
|
|
91
91
|
|
|
92
92
|
## Scientific validation
|
|
93
93
|
|
|
94
|
-
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
|
|
95
|
-
|
|
94
|
+
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
|
|
95
|
+
* [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
96
|
+
|
|
97
|
+
Details and validation of the other pipelines shall be shared in upcoming scientific publications.
|
|
96
98
|
|
|
97
99
|
## Contributing
|
|
98
100
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "paradigma"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "1.0.0"
|
|
4
4
|
description = "ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data"
|
|
5
5
|
authors = [ "Erik Post <erik.post@radboudumc.nl>",
|
|
6
6
|
"Kars Veldkamp <kars.veldkamp@radboudumc.nl>",
|
|
@@ -597,11 +597,9 @@ def pca_transform_gyroscope(
|
|
|
597
597
|
df: pd.DataFrame,
|
|
598
598
|
y_gyro_colname: str,
|
|
599
599
|
z_gyro_colname: str,
|
|
600
|
-
pred_colname: str | None = None,
|
|
601
600
|
) -> np.ndarray:
|
|
602
601
|
"""
|
|
603
|
-
Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
|
|
604
|
-
the PCA is fitted on the predicted gait data. Otherwise, the PCA is fitted on the entire dataset.
|
|
602
|
+
Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
|
|
605
603
|
|
|
606
604
|
Parameters
|
|
607
605
|
----------
|
|
@@ -611,8 +609,6 @@ def pca_transform_gyroscope(
|
|
|
611
609
|
The column name for the y-axis gyroscope data.
|
|
612
610
|
z_gyro_colname : str
|
|
613
611
|
The column name for the z-axis gyroscope data.
|
|
614
|
-
pred_colname : str, optional
|
|
615
|
-
The column name for the predicted gait (default: None).
|
|
616
612
|
|
|
617
613
|
Returns
|
|
618
614
|
-------
|
|
@@ -623,19 +619,9 @@ def pca_transform_gyroscope(
|
|
|
623
619
|
y_gyro_array = df[y_gyro_colname].to_numpy()
|
|
624
620
|
z_gyro_array = df[z_gyro_colname].to_numpy()
|
|
625
621
|
|
|
626
|
-
#
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
y_gyro_fit_array = y_gyro_array[pred_mask]
|
|
630
|
-
z_gyro_fit_array = z_gyro_array[pred_mask]
|
|
631
|
-
|
|
632
|
-
# Fit PCA on predicted gait data
|
|
633
|
-
fit_data = np.column_stack((y_gyro_fit_array, z_gyro_fit_array))
|
|
634
|
-
full_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
635
|
-
else:
|
|
636
|
-
# Fit PCA on entire dataset
|
|
637
|
-
fit_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
638
|
-
full_data = fit_data
|
|
622
|
+
# Fit PCA
|
|
623
|
+
fit_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
624
|
+
full_data = fit_data
|
|
639
625
|
|
|
640
626
|
pca = PCA(n_components=2, svd_solver='auto', random_state=22)
|
|
641
627
|
pca.fit(fit_data)
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
import os
|
|
3
2
|
import pandas as pd
|
|
4
|
-
from pathlib import Path
|
|
5
3
|
from scipy.signal import periodogram
|
|
6
4
|
from typing import List, Tuple
|
|
7
|
-
import tsdf
|
|
8
5
|
|
|
9
6
|
from paradigma.classification import ClassifierPackage
|
|
10
|
-
from paradigma.constants import DataColumns
|
|
7
|
+
from paradigma.constants import DataColumns
|
|
11
8
|
from paradigma.config import GaitConfig
|
|
12
9
|
from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
|
|
13
10
|
extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
|
|
14
11
|
compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
|
|
15
12
|
compute_total_power
|
|
16
13
|
from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
|
|
17
|
-
from paradigma.util import aggregate_parameter
|
|
14
|
+
from paradigma.util import aggregate_parameter
|
|
18
15
|
|
|
19
16
|
|
|
20
17
|
def extract_gait_features(
|
|
@@ -160,66 +157,35 @@ def detect_gait(
|
|
|
160
157
|
|
|
161
158
|
|
|
162
159
|
def extract_arm_activity_features(
|
|
160
|
+
df: pd.DataFrame,
|
|
163
161
|
config: GaitConfig,
|
|
164
|
-
df_timestamps: pd.DataFrame,
|
|
165
|
-
df_predictions: pd.DataFrame,
|
|
166
|
-
threshold: float
|
|
167
162
|
) -> pd.DataFrame:
|
|
168
163
|
"""
|
|
169
164
|
Extract features related to arm activity from a time-series DataFrame.
|
|
170
165
|
|
|
171
166
|
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
172
167
|
and extracts features related to arm activity by performing the following steps:
|
|
173
|
-
1.
|
|
174
|
-
2.
|
|
175
|
-
3.
|
|
176
|
-
4.
|
|
177
|
-
5.
|
|
178
|
-
6.
|
|
179
|
-
7. Extracts angle-related features, temporal domain features, and spectral domain features.
|
|
168
|
+
1. Computes the angle and velocity from gyroscope data.
|
|
169
|
+
2. Filters the data to include only predicted gait segments.
|
|
170
|
+
3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
|
|
171
|
+
4. Removes segments that do not meet predefined criteria.
|
|
172
|
+
5. Creates fixed-length windows from the time series data.
|
|
173
|
+
6. Extracts angle-related features, temporal domain features, and spectral domain features.
|
|
180
174
|
|
|
181
175
|
Parameters
|
|
182
176
|
----------
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
df_timestamps : pd.DataFrame
|
|
187
|
-
A DataFrame containing the raw sensor data, including accelerometer, gravity, and gyroscope columns.
|
|
188
|
-
|
|
189
|
-
df_predictions : pd.DataFrame
|
|
190
|
-
A DataFrame containing the predicted probabilities for gait activity per window.
|
|
177
|
+
df: pd.DataFrame
|
|
178
|
+
The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
|
|
191
179
|
|
|
192
180
|
config : ArmActivityFeatureExtractionConfig
|
|
193
181
|
Configuration object containing column names and parameters for feature extraction.
|
|
194
182
|
|
|
195
|
-
path_to_classifier_input : str | Path
|
|
196
|
-
The path to the directory containing the classifier files and other necessary input files for feature extraction.
|
|
197
|
-
|
|
198
183
|
Returns
|
|
199
184
|
-------
|
|
200
185
|
pd.DataFrame
|
|
201
186
|
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
202
187
|
temporal, and spectral features.
|
|
203
188
|
"""
|
|
204
|
-
if not any(df_predictions[DataColumns.PRED_GAIT_PROBA] >= threshold):
|
|
205
|
-
raise ValueError("No gait detected in the input data.")
|
|
206
|
-
|
|
207
|
-
# Merge gait predictions with timestamps
|
|
208
|
-
gait_preprocessing_config = GaitConfig(step='gait')
|
|
209
|
-
df = merge_predictions_with_timestamps(
|
|
210
|
-
df_ts=df_timestamps,
|
|
211
|
-
df_predictions=df_predictions,
|
|
212
|
-
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
213
|
-
window_length_s=gait_preprocessing_config.window_length_s,
|
|
214
|
-
fs=gait_preprocessing_config.sampling_frequency
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
# Add a column for predicted gait based on a fitted threshold
|
|
218
|
-
df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= threshold).astype(int)
|
|
219
|
-
|
|
220
|
-
# Filter the DataFrame to only include predicted gait (1)
|
|
221
|
-
df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
|
|
222
|
-
|
|
223
189
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
|
|
224
190
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
225
191
|
time_array=df[DataColumns.TIME],
|
|
@@ -315,8 +281,8 @@ def filter_gait(
|
|
|
315
281
|
----------
|
|
316
282
|
df : pd.DataFrame
|
|
317
283
|
The input DataFrame containing features extracted from gait data.
|
|
318
|
-
|
|
319
|
-
The
|
|
284
|
+
clf_package: ClassifierPackage
|
|
285
|
+
The pre-trained classifier package containing the classifier, threshold, and scaler.
|
|
320
286
|
parallel : bool, optional, default=False
|
|
321
287
|
If `True`, enables parallel processing.
|
|
322
288
|
|
|
@@ -351,10 +317,10 @@ def filter_gait(
|
|
|
351
317
|
|
|
352
318
|
def quantify_arm_swing(
|
|
353
319
|
df: pd.DataFrame,
|
|
354
|
-
max_segment_gap_s: float,
|
|
355
|
-
min_segment_length_s: float,
|
|
356
320
|
fs: int,
|
|
357
321
|
filtered: bool = False,
|
|
322
|
+
max_segment_gap_s: float = 1.5,
|
|
323
|
+
min_segment_length_s: float = 1.5
|
|
358
324
|
) -> Tuple[dict[str, pd.DataFrame], dict]:
|
|
359
325
|
"""
|
|
360
326
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
@@ -362,28 +328,27 @@ def quantify_arm_swing(
|
|
|
362
328
|
Parameters
|
|
363
329
|
----------
|
|
364
330
|
df : pd.DataFrame
|
|
365
|
-
A DataFrame containing the raw sensor data
|
|
331
|
+
A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
|
|
366
332
|
for predicted no other arm activity based on a fitted threshold if filtered is True.
|
|
367
333
|
|
|
368
|
-
max_segment_gap_s : float
|
|
369
|
-
The maximum gap allowed between segments.
|
|
370
|
-
|
|
371
|
-
min_segment_length_s : float
|
|
372
|
-
The minimum length required for a segment to be considered valid.
|
|
373
|
-
|
|
374
334
|
fs : int
|
|
375
335
|
The sampling frequency of the sensor data.
|
|
376
336
|
|
|
377
337
|
filtered : bool, optional, default=True
|
|
378
338
|
If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
|
|
379
339
|
|
|
340
|
+
max_segment_gap_s : float, optional, default=1.5
|
|
341
|
+
The maximum gap in seconds between consecutive timestamps to group them into segments.
|
|
342
|
+
|
|
343
|
+
min_segment_length_s : float, optional, default=1.5
|
|
344
|
+
The minimum length in seconds for a segment to be considered valid.
|
|
345
|
+
|
|
380
346
|
Returns
|
|
381
347
|
-------
|
|
382
348
|
Tuple[pd.DataFrame, dict]
|
|
383
349
|
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
384
350
|
metadata for each segment.
|
|
385
351
|
"""
|
|
386
|
-
|
|
387
352
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
|
|
388
353
|
# Segments are made based on predicted gait
|
|
389
354
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
@@ -391,6 +356,10 @@ def quantify_arm_swing(
|
|
|
391
356
|
max_segment_gap_s=max_segment_gap_s
|
|
392
357
|
)
|
|
393
358
|
|
|
359
|
+
# Segment category is determined based on predicted gait, hence it is set
|
|
360
|
+
# before filtering the DataFrame to only include predicted no other arm activity
|
|
361
|
+
df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
|
|
362
|
+
|
|
394
363
|
# Remove segments that do not meet predetermined criteria
|
|
395
364
|
df = discard_segments(
|
|
396
365
|
df=df,
|
|
@@ -401,40 +370,51 @@ def quantify_arm_swing(
|
|
|
401
370
|
)
|
|
402
371
|
|
|
403
372
|
if df.empty:
|
|
404
|
-
raise ValueError("No segments found in the input data.")
|
|
373
|
+
raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
|
|
405
374
|
|
|
406
375
|
# If no arm swing data is remaining, return an empty dictionary
|
|
407
376
|
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
|
|
408
377
|
raise ValueError("No gait without other arm activities to quantify.")
|
|
409
|
-
|
|
410
|
-
df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
|
|
411
|
-
|
|
412
|
-
# Group and process segments
|
|
413
|
-
arm_swing_quantified = []
|
|
414
|
-
segment_meta = {}
|
|
415
|
-
|
|
416
|
-
if filtered:
|
|
378
|
+
elif filtered:
|
|
417
379
|
# Filter the DataFrame to only include predicted no other arm activity (1)
|
|
418
380
|
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
|
|
419
381
|
|
|
420
|
-
# Group consecutive timestamps into segments
|
|
421
|
-
# Now segments are based on predicted gait without other arm activity for subsequent processes
|
|
382
|
+
# Group consecutive timestamps into segments of filtered gait
|
|
422
383
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
423
384
|
time_array=df[DataColumns.TIME],
|
|
424
385
|
max_segment_gap_s=max_segment_gap_s
|
|
425
386
|
)
|
|
426
387
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
388
|
+
# Remove segments that do not meet predetermined criteria
|
|
389
|
+
df = discard_segments(
|
|
390
|
+
df=df,
|
|
391
|
+
segment_nr_colname=DataColumns.SEGMENT_NR,
|
|
392
|
+
min_segment_length_s=min_segment_length_s,
|
|
393
|
+
fs=fs,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
if df.empty:
|
|
397
|
+
raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
|
|
398
|
+
|
|
399
|
+
arm_swing_quantified = []
|
|
400
|
+
segment_meta = {
|
|
401
|
+
'aggregated': {
|
|
402
|
+
'all': {
|
|
403
|
+
'duration_s': len(df[DataColumns.TIME]) / fs
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
'per_segment': {}
|
|
407
|
+
}
|
|
430
408
|
|
|
409
|
+
# PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
|
|
410
|
+
# it is fitted on the entire gyroscope data
|
|
431
411
|
df[DataColumns.VELOCITY] = pca_transform_gyroscope(
|
|
432
412
|
df=df,
|
|
433
413
|
y_gyro_colname=DataColumns.GYROSCOPE_Y,
|
|
434
414
|
z_gyro_colname=DataColumns.GYROSCOPE_Z,
|
|
435
|
-
pred_colname=pred_colname_pca
|
|
436
415
|
)
|
|
437
416
|
|
|
417
|
+
# Group and process segments
|
|
438
418
|
for segment_nr, group in df.groupby(DataColumns.SEGMENT_NR, sort=False):
|
|
439
419
|
segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
|
|
440
420
|
time_array = group[DataColumns.TIME].to_numpy()
|
|
@@ -452,8 +432,10 @@ def quantify_arm_swing(
|
|
|
452
432
|
fs=fs,
|
|
453
433
|
)
|
|
454
434
|
|
|
455
|
-
segment_meta[segment_nr] = {
|
|
456
|
-
'
|
|
435
|
+
segment_meta['per_segment'][segment_nr] = {
|
|
436
|
+
'start_time_s': time_array.min(),
|
|
437
|
+
'end_time_s': time_array.max(),
|
|
438
|
+
'duration_s': len(angle_array) / fs,
|
|
457
439
|
DataColumns.SEGMENT_CAT: segment_cat
|
|
458
440
|
}
|
|
459
441
|
|
|
@@ -487,12 +469,20 @@ def quantify_arm_swing(
|
|
|
487
469
|
|
|
488
470
|
df_params_segment = pd.DataFrame({
|
|
489
471
|
DataColumns.SEGMENT_NR: segment_nr,
|
|
472
|
+
DataColumns.SEGMENT_CAT: segment_cat,
|
|
490
473
|
DataColumns.RANGE_OF_MOTION: rom,
|
|
491
474
|
DataColumns.PEAK_VELOCITY: pav
|
|
492
475
|
})
|
|
493
476
|
|
|
494
477
|
arm_swing_quantified.append(df_params_segment)
|
|
495
478
|
|
|
479
|
+
# Combine segment categories
|
|
480
|
+
segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
|
|
481
|
+
for segment_cat in segment_categories:
|
|
482
|
+
segment_meta['aggregated'][segment_cat] = {
|
|
483
|
+
'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
|
|
484
|
+
}
|
|
485
|
+
|
|
496
486
|
arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
|
|
497
487
|
|
|
498
488
|
return arm_swing_quantified, segment_meta
|
|
@@ -527,7 +517,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
527
517
|
cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
|
|
528
518
|
|
|
529
519
|
aggregated_results[segment_cat] = {
|
|
530
|
-
'
|
|
520
|
+
'duration_s': sum([segment_meta[x]['duration_s'] for x in cat_segments])
|
|
531
521
|
}
|
|
532
522
|
|
|
533
523
|
df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
|
|
@@ -537,7 +527,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
537
527
|
aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
|
|
538
528
|
|
|
539
529
|
aggregated_results['all_segment_categories'] = {
|
|
540
|
-
'
|
|
530
|
+
'duration_s': sum([segment_meta[x]['duration_s'] for x in segment_meta.keys()])
|
|
541
531
|
}
|
|
542
532
|
|
|
543
533
|
for arm_swing_parameter in arm_swing_parameters:
|
|
@@ -163,8 +163,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
163
163
|
Returns
|
|
164
164
|
-------
|
|
165
165
|
dict
|
|
166
|
-
A dictionary with the aggregated tremor time and tremor power measures, as well as the
|
|
167
|
-
|
|
166
|
+
A dictionary with the aggregated tremor time and tremor power measures, as well as the number of valid days,
|
|
167
|
+
the total number of windows, and the number of windows at rest available in the input dataframe.
|
|
168
168
|
|
|
169
169
|
Notes
|
|
170
170
|
-----
|
|
@@ -173,7 +173,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
173
173
|
- The modal tremor power is computed based on gaussian kernel density estimation.
|
|
174
174
|
|
|
175
175
|
"""
|
|
176
|
-
|
|
176
|
+
nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
|
|
177
177
|
nr_windows_total = df.shape[0] # number of windows in the input dataframe
|
|
178
178
|
|
|
179
179
|
# remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
|
|
@@ -216,6 +216,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
216
216
|
# store aggregates in json format
|
|
217
217
|
d_aggregates = {
|
|
218
218
|
'metadata': {
|
|
219
|
+
'nr_valid_days': nr_valid_days,
|
|
219
220
|
'nr_windows_total': nr_windows_total,
|
|
220
221
|
'nr_windows_rest': nr_windows_rest
|
|
221
222
|
},
|
|
@@ -17,7 +17,9 @@ def resample_data(
|
|
|
17
17
|
df: pd.DataFrame,
|
|
18
18
|
time_column : str,
|
|
19
19
|
values_column_names: List[str],
|
|
20
|
+
sampling_frequency: int,
|
|
20
21
|
resampling_frequency: int,
|
|
22
|
+
tolerance: float | None = None
|
|
21
23
|
) -> pd.DataFrame:
|
|
22
24
|
"""
|
|
23
25
|
Resamples sensor data to a specified frequency using cubic interpolation.
|
|
@@ -30,8 +32,14 @@ def resample_data(
|
|
|
30
32
|
The name of the column containing the time data.
|
|
31
33
|
values_column_names : List[str]
|
|
32
34
|
A list of column names that should be resampled.
|
|
35
|
+
sampling_frequency : int
|
|
36
|
+
The original sampling frequency of the data (in Hz).
|
|
33
37
|
resampling_frequency : int
|
|
34
38
|
The frequency to which the data should be resampled (in Hz).
|
|
39
|
+
tolerance : float, optional
|
|
40
|
+
The tolerance added to the expected difference when checking
|
|
41
|
+
for contiguous timestamps. If not provided, it defaults to
|
|
42
|
+
twice the expected interval.
|
|
35
43
|
|
|
36
44
|
Returns
|
|
37
45
|
-------
|
|
@@ -46,23 +54,35 @@ def resample_data(
|
|
|
46
54
|
|
|
47
55
|
Notes
|
|
48
56
|
-----
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
- Uses cubic interpolation for smooth resampling if there are enough points.
|
|
58
|
+
- If only two timestamps are available, it falls back to linear interpolation.
|
|
51
59
|
"""
|
|
60
|
+
# Set default tolerance if not provided to twice the expected interval
|
|
61
|
+
if tolerance is None:
|
|
62
|
+
tolerance = 2 * 1 / sampling_frequency
|
|
52
63
|
|
|
53
|
-
# Extract time and values
|
|
64
|
+
# Extract time and values
|
|
54
65
|
time_abs_array = np.array(df[time_column])
|
|
55
66
|
values_array = np.array(df[values_column_names])
|
|
56
67
|
|
|
57
68
|
# Ensure the time array is strictly increasing
|
|
58
69
|
if not np.all(np.diff(time_abs_array) > 0):
|
|
59
|
-
raise ValueError("
|
|
70
|
+
raise ValueError("Time array is not strictly increasing")
|
|
71
|
+
|
|
72
|
+
# Ensure the time array is contiguous
|
|
73
|
+
expected_interval = 1 / sampling_frequency
|
|
74
|
+
timestamp_diffs = np.diff(time_abs_array)
|
|
75
|
+
if np.any(np.abs(timestamp_diffs - expected_interval) > tolerance):
|
|
76
|
+
raise ValueError("Time array is not contiguous")
|
|
60
77
|
|
|
61
78
|
# Resample the time data using the specified frequency
|
|
62
79
|
t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
|
|
63
80
|
|
|
64
|
-
#
|
|
65
|
-
|
|
81
|
+
# Choose interpolation method
|
|
82
|
+
interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
|
|
83
|
+
interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
|
|
84
|
+
|
|
85
|
+
# Interpolate
|
|
66
86
|
resampled_values = interpolator(t_resampled)
|
|
67
87
|
|
|
68
88
|
# Create a DataFrame with the resampled data
|
|
@@ -186,7 +206,8 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
|
|
|
186
206
|
df = resample_data(
|
|
187
207
|
df=df,
|
|
188
208
|
time_column=DataColumns.TIME,
|
|
189
|
-
values_column_names
|
|
209
|
+
values_column_names=values_colnames,
|
|
210
|
+
sampling_frequency=config.sampling_frequency,
|
|
190
211
|
resampling_frequency=config.sampling_frequency
|
|
191
212
|
)
|
|
192
213
|
|
|
@@ -259,6 +280,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
|
|
|
259
280
|
df=df_acc_overlapping,
|
|
260
281
|
time_column=DataColumns.TIME,
|
|
261
282
|
values_column_names = list(imu_config.d_channels_accelerometer.keys()),
|
|
283
|
+
sampling_frequency=imu_config.sampling_frequency,
|
|
262
284
|
resampling_frequency=imu_config.sampling_frequency
|
|
263
285
|
)
|
|
264
286
|
|
|
@@ -267,6 +289,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
|
|
|
267
289
|
df=df_ppg_overlapping,
|
|
268
290
|
time_column=DataColumns.TIME,
|
|
269
291
|
values_column_names = list(ppg_config.d_channels_ppg.keys()),
|
|
292
|
+
sampling_frequency=ppg_config.sampling_frequency,
|
|
270
293
|
resampling_frequency=ppg_config.sampling_frequency
|
|
271
294
|
)
|
|
272
295
|
|
|
@@ -168,7 +168,7 @@ def create_segments(
|
|
|
168
168
|
gap_exceeds = time_diff > max_segment_gap_s
|
|
169
169
|
|
|
170
170
|
# Create the segment number based on the cumulative sum of the gap_exceeds mask
|
|
171
|
-
segments = gap_exceeds.cumsum()
|
|
171
|
+
segments = gap_exceeds.cumsum()
|
|
172
172
|
|
|
173
173
|
return segments
|
|
174
174
|
|
|
@@ -236,6 +236,9 @@ def discard_segments(
|
|
|
236
236
|
|
|
237
237
|
df = df[valid_segment_mask].copy()
|
|
238
238
|
|
|
239
|
+
if df.empty:
|
|
240
|
+
raise ValueError("All segments were removed.")
|
|
241
|
+
|
|
239
242
|
# Reset segment numbers in a single step
|
|
240
243
|
unique_segments = pd.factorize(df[segment_nr_colname])[0] + 1
|
|
241
244
|
df[segment_nr_colname] = unique_segments
|
|
@@ -16,7 +16,7 @@ from paradigma.pipelines.tremor_pipeline import extract_tremor_features, detect_
|
|
|
16
16
|
from paradigma.pipelines.heart_rate_pipeline import extract_signal_quality_features, signal_quality_classification, \
|
|
17
17
|
aggregate_heart_rate
|
|
18
18
|
from paradigma.preprocessing import preprocess_imu_data, preprocess_ppg_data
|
|
19
|
-
from paradigma.util import read_metadata, write_df_data, get_end_iso8601
|
|
19
|
+
from paradigma.util import read_metadata, write_df_data, get_end_iso8601, merge_predictions_with_timestamps
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path,
|
|
@@ -208,13 +208,27 @@ def extract_arm_activity_features_io(
|
|
|
208
208
|
|
|
209
209
|
clf_package = ClassifierPackage.load(full_path_to_classifier_package)
|
|
210
210
|
|
|
211
|
+
gait_preprocessing_config = GaitConfig(step='gait')
|
|
212
|
+
|
|
213
|
+
df = merge_predictions_with_timestamps(
|
|
214
|
+
df_ts=df_ts,
|
|
215
|
+
df_predictions=df_pred_gait,
|
|
216
|
+
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
217
|
+
window_length_s=gait_preprocessing_config.window_length_s,
|
|
218
|
+
fs=gait_preprocessing_config.sampling_frequency
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Add a column for predicted gait based on a fitted threshold
|
|
222
|
+
df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= clf_package.threshold).astype(int)
|
|
223
|
+
|
|
224
|
+
# Filter the DataFrame to only include predicted gait (1)
|
|
225
|
+
df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
|
|
226
|
+
|
|
211
227
|
# Extract arm activity features
|
|
212
228
|
config = GaitConfig(step='arm_activity')
|
|
213
229
|
df_features = extract_arm_activity_features(
|
|
230
|
+
df=df,
|
|
214
231
|
config=config,
|
|
215
|
-
df_timestamps=df_ts,
|
|
216
|
-
df_predictions=df_pred_gait,
|
|
217
|
-
threshold=clf_package.threshold
|
|
218
232
|
)
|
|
219
233
|
|
|
220
234
|
end_iso8601 = get_end_iso8601(metadata_ts_values.start_iso8601, df_features[DataColumns.TIME][-1:].values[0] + config.window_length_s)
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import os
|
|
3
2
|
import numpy as np
|
|
4
3
|
import pandas as pd
|
|
5
|
-
from
|
|
6
|
-
from datetime import timedelta
|
|
4
|
+
from datetime import datetime, timedelta
|
|
7
5
|
from dateutil import parser
|
|
8
6
|
from typing import List, Tuple
|
|
9
7
|
|
|
@@ -432,3 +430,61 @@ def merge_predictions_with_timestamps(
|
|
|
432
430
|
df_ts = df_ts.dropna(subset=[pred_proba_colname])
|
|
433
431
|
|
|
434
432
|
return df_ts
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
|
|
436
|
+
|
|
437
|
+
"""
|
|
438
|
+
Select hours of interest from the data to include in the aggregation step.
|
|
439
|
+
|
|
440
|
+
Parameters
|
|
441
|
+
----------
|
|
442
|
+
df : pd.DataFrame
|
|
443
|
+
Input data.
|
|
444
|
+
|
|
445
|
+
select_hours_start: str
|
|
446
|
+
The start time of the selected hours in "HH:MM" format.
|
|
447
|
+
|
|
448
|
+
select_hours_end: str
|
|
449
|
+
The end time of the selected hours in "HH:MM" format.
|
|
450
|
+
|
|
451
|
+
Returns
|
|
452
|
+
-------
|
|
453
|
+
pd.DataFrame
|
|
454
|
+
The selected data.
|
|
455
|
+
|
|
456
|
+
"""
|
|
457
|
+
|
|
458
|
+
select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
|
|
459
|
+
select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
|
|
460
|
+
df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
|
|
461
|
+
|
|
462
|
+
return df_subset
|
|
463
|
+
|
|
464
|
+
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
465
|
+
|
|
466
|
+
"""
|
|
467
|
+
Select days of interest from the data to include in the aggregation step.
|
|
468
|
+
|
|
469
|
+
Parameters
|
|
470
|
+
----------
|
|
471
|
+
df : pd.DataFrame
|
|
472
|
+
Input data with column 'time_dt' in which the date is stored.
|
|
473
|
+
|
|
474
|
+
min_hours_per_day: int
|
|
475
|
+
The minimum number of hours per day required for including the day in the aggregation step.
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
Returns
|
|
479
|
+
-------
|
|
480
|
+
pd.DataFrame
|
|
481
|
+
The selected data.
|
|
482
|
+
|
|
483
|
+
"""
|
|
484
|
+
|
|
485
|
+
min_s_per_day = min_hours_per_day * 3600
|
|
486
|
+
window_length_s = df['time_dt'].diff().dt.total_seconds()[1] # determine the length of the first window in seconds
|
|
487
|
+
min_windows_per_day = min_s_per_day / window_length_s
|
|
488
|
+
df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
|
|
489
|
+
|
|
490
|
+
return df_subset
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|