paradigma 0.4.7__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {paradigma-0.4.7 → paradigma-1.0.1}/PKG-INFO +7 -5
- {paradigma-0.4.7 → paradigma-1.0.1}/README.md +6 -4
- {paradigma-0.4.7 → paradigma-1.0.1}/pyproject.toml +1 -1
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/config.py +6 -6
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/constants.py +2 -2
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/feature_extraction.py +8 -22
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/pipelines/gait_pipeline.py +66 -76
- paradigma-0.4.7/src/paradigma/pipelines/heart_rate_pipeline.py → paradigma-1.0.1/src/paradigma/pipelines/pulse_rate_pipeline.py +48 -48
- paradigma-0.4.7/src/paradigma/pipelines/heart_rate_utils.py → paradigma-1.0.1/src/paradigma/pipelines/pulse_rate_utils.py +26 -26
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/pipelines/tremor_pipeline.py +7 -6
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/preprocessing.py +30 -7
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/segmenting.py +4 -1
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/testing.py +37 -23
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/util.py +59 -3
- {paradigma-0.4.7 → paradigma-1.0.1}/LICENSE +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/__init__.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/assets/gait_detection_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/assets/gait_filtering_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/assets/ppg_quality_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/assets/tremor_detection_clf_package.pkl +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/classification.py +0 -0
- {paradigma-0.4.7 → paradigma-1.0.1}/src/paradigma/pipelines/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: paradigma
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Erik Post
|
|
@@ -26,7 +26,7 @@ Description-Content-Type: text/markdown
|
|
|
26
26
|
|:----:|----|
|
|
27
27
|
| **Packages and Releases** | [](https://github.com/biomarkersparkinson/paradigma/releases/latest) [](https://pypi.python.org/pypi/paradigma/) [](https://research-software-directory.org/software/paradigma) |
|
|
28
28
|
| **DOI** | [](https://doi.org/10.5281/zenodo.13838392) |
|
|
29
|
-
| **Build Status** | [](https://www.python.org/downloads/) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
|
|
30
30
|
| **License** | [](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
|
|
31
31
|
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
32
32
|
|
|
@@ -95,7 +95,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
|
|
|
95
95
|
Specific requirements include:
|
|
96
96
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
97
97
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
98
|
-
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
98
|
+
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
99
99
|
| **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
|
|
100
100
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
101
101
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
@@ -111,8 +111,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
|
|
|
111
111
|
|
|
112
112
|
## Scientific validation
|
|
113
113
|
|
|
114
|
-
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
|
|
115
|
-
|
|
114
|
+
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
|
|
115
|
+
* [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
116
|
+
|
|
117
|
+
Details and validation of the other pipelines shall be shared in upcoming scientific publications.
|
|
116
118
|
|
|
117
119
|
## Contributing
|
|
118
120
|
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|:----:|----|
|
|
7
7
|
| **Packages and Releases** | [](https://github.com/biomarkersparkinson/paradigma/releases/latest) [](https://pypi.python.org/pypi/paradigma/) [](https://research-software-directory.org/software/paradigma) |
|
|
8
8
|
| **DOI** | [](https://doi.org/10.5281/zenodo.13838392) |
|
|
9
|
-
| **Build Status** | [](https://www.python.org/downloads/) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
|
|
10
10
|
| **License** | [](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
|
|
11
11
|
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
12
12
|
|
|
@@ -75,7 +75,7 @@ The ParaDigMa toolbox is designed for the analysis of passive monitoring data co
|
|
|
75
75
|
Specific requirements include:
|
|
76
76
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
77
77
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
78
|
-
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
78
|
+
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
79
79
|
| **Arm swing during gait** | - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. <br> - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Population: no walking aid, no severe dyskinesia in the watch-sided arm. <br> - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm), and at least 2 minutes of arm swing. |
|
|
80
80
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
81
81
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
@@ -91,8 +91,10 @@ We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/)
|
|
|
91
91
|
|
|
92
92
|
## Scientific validation
|
|
93
93
|
|
|
94
|
-
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/)
|
|
95
|
-
|
|
94
|
+
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publication contains the details and validation of the arm swing during gait pipeline:
|
|
95
|
+
* [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
96
|
+
|
|
97
|
+
Details and validation of the other pipelines shall be shared in upcoming scientific publications.
|
|
96
98
|
|
|
97
99
|
## Contributing
|
|
98
100
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "paradigma"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "1.0.1"
|
|
4
4
|
description = "ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data"
|
|
5
5
|
authors = [ "Erik Post <erik.post@radboudumc.nl>",
|
|
6
6
|
"Kars Veldkamp <kars.veldkamp@radboudumc.nl>",
|
|
@@ -244,7 +244,7 @@ class TremorConfig(IMUConfig):
|
|
|
244
244
|
}
|
|
245
245
|
|
|
246
246
|
|
|
247
|
-
class
|
|
247
|
+
class PulseRateConfig(PPGConfig):
|
|
248
248
|
def __init__(self, sensor: str = 'ppg', min_window_length_s: int = 30) -> None:
|
|
249
249
|
super().__init__()
|
|
250
250
|
|
|
@@ -265,14 +265,14 @@ class HeartRateConfig(PPGConfig):
|
|
|
265
265
|
self.freq_bin_resolution = 0.05 # Hz
|
|
266
266
|
|
|
267
267
|
# ---------------------
|
|
268
|
-
#
|
|
268
|
+
# Pulse rate estimation
|
|
269
269
|
# ---------------------
|
|
270
270
|
self.set_tfd_length(min_window_length_s) # Set tfd length to default of 30 seconds
|
|
271
271
|
self.threshold_sqa = 0.5
|
|
272
|
-
self.threshold_sqa_accelerometer = 0.
|
|
272
|
+
self.threshold_sqa_accelerometer = 0.10
|
|
273
273
|
|
|
274
|
-
|
|
275
|
-
self.
|
|
274
|
+
pr_est_length = 2 # pulse rate estimation length in seconds
|
|
275
|
+
self.pr_est_samples = pr_est_length * self.sampling_frequency
|
|
276
276
|
|
|
277
277
|
# Time-frequency distribution parameters
|
|
278
278
|
self.kern_type = 'sep'
|
|
@@ -297,7 +297,7 @@ class HeartRateConfig(PPGConfig):
|
|
|
297
297
|
|
|
298
298
|
def set_tfd_length(self, tfd_length: int):
|
|
299
299
|
self.tfd_length = tfd_length
|
|
300
|
-
self.
|
|
300
|
+
self.min_pr_samples = int(round(self.tfd_length * self.sampling_frequency))
|
|
301
301
|
|
|
302
302
|
def set_sensor(self, sensor):
|
|
303
303
|
self.sensor = sensor
|
|
@@ -58,8 +58,8 @@ class DataColumns():
|
|
|
58
58
|
PRED_SQA_ACC_LABEL: str = "pred_sqa_acc_label"
|
|
59
59
|
PRED_SQA: str = "pred_sqa"
|
|
60
60
|
|
|
61
|
-
# Constants for
|
|
62
|
-
|
|
61
|
+
# Constants for pulse rate
|
|
62
|
+
PULSE_RATE: str = "pulse_rate"
|
|
63
63
|
|
|
64
64
|
@dataclass(frozen=True)
|
|
65
65
|
class DataUnits():
|
|
@@ -7,7 +7,7 @@ from scipy.signal import find_peaks, windows
|
|
|
7
7
|
from scipy.stats import kurtosis, skew
|
|
8
8
|
from sklearn.decomposition import PCA
|
|
9
9
|
|
|
10
|
-
from paradigma.config import
|
|
10
|
+
from paradigma.config import PulseRateConfig
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False) -> np.ndarray:
|
|
@@ -353,7 +353,7 @@ def extract_frequency_peak(
|
|
|
353
353
|
def compute_relative_power(
|
|
354
354
|
freqs: np.ndarray,
|
|
355
355
|
psd: np.ndarray,
|
|
356
|
-
config:
|
|
356
|
+
config: PulseRateConfig
|
|
357
357
|
) -> list:
|
|
358
358
|
"""
|
|
359
359
|
Calculate relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
|
|
@@ -364,11 +364,11 @@ def compute_relative_power(
|
|
|
364
364
|
The frequency bins of the power spectral density.
|
|
365
365
|
psd: np.ndarray
|
|
366
366
|
The power spectral density of the signal.
|
|
367
|
-
config:
|
|
367
|
+
config: PulseRateConfig
|
|
368
368
|
The configuration object containing the parameters for the feature extraction. The following
|
|
369
369
|
attributes are used:
|
|
370
370
|
- freq_band_physio: tuple
|
|
371
|
-
The frequency band for physiological
|
|
371
|
+
The frequency band for physiological pulse rate (default: (0.75, 3)).
|
|
372
372
|
- bandwidth: float
|
|
373
373
|
The bandwidth around the peak frequency to consider for relative power calculation (default: 0.5).
|
|
374
374
|
|
|
@@ -597,11 +597,9 @@ def pca_transform_gyroscope(
|
|
|
597
597
|
df: pd.DataFrame,
|
|
598
598
|
y_gyro_colname: str,
|
|
599
599
|
z_gyro_colname: str,
|
|
600
|
-
pred_colname: str | None = None,
|
|
601
600
|
) -> np.ndarray:
|
|
602
601
|
"""
|
|
603
|
-
Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
|
|
604
|
-
the PCA is fitted on the predicted gait data. Otherwise, the PCA is fitted on the entire dataset.
|
|
602
|
+
Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
|
|
605
603
|
|
|
606
604
|
Parameters
|
|
607
605
|
----------
|
|
@@ -611,8 +609,6 @@ def pca_transform_gyroscope(
|
|
|
611
609
|
The column name for the y-axis gyroscope data.
|
|
612
610
|
z_gyro_colname : str
|
|
613
611
|
The column name for the z-axis gyroscope data.
|
|
614
|
-
pred_colname : str, optional
|
|
615
|
-
The column name for the predicted gait (default: None).
|
|
616
612
|
|
|
617
613
|
Returns
|
|
618
614
|
-------
|
|
@@ -623,19 +619,9 @@ def pca_transform_gyroscope(
|
|
|
623
619
|
y_gyro_array = df[y_gyro_colname].to_numpy()
|
|
624
620
|
z_gyro_array = df[z_gyro_colname].to_numpy()
|
|
625
621
|
|
|
626
|
-
#
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
y_gyro_fit_array = y_gyro_array[pred_mask]
|
|
630
|
-
z_gyro_fit_array = z_gyro_array[pred_mask]
|
|
631
|
-
|
|
632
|
-
# Fit PCA on predicted gait data
|
|
633
|
-
fit_data = np.column_stack((y_gyro_fit_array, z_gyro_fit_array))
|
|
634
|
-
full_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
635
|
-
else:
|
|
636
|
-
# Fit PCA on entire dataset
|
|
637
|
-
fit_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
638
|
-
full_data = fit_data
|
|
622
|
+
# Fit PCA
|
|
623
|
+
fit_data = np.column_stack((y_gyro_array, z_gyro_array))
|
|
624
|
+
full_data = fit_data
|
|
639
625
|
|
|
640
626
|
pca = PCA(n_components=2, svd_solver='auto', random_state=22)
|
|
641
627
|
pca.fit(fit_data)
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
import os
|
|
3
2
|
import pandas as pd
|
|
4
|
-
from pathlib import Path
|
|
5
3
|
from scipy.signal import periodogram
|
|
6
4
|
from typing import List, Tuple
|
|
7
|
-
import tsdf
|
|
8
5
|
|
|
9
6
|
from paradigma.classification import ClassifierPackage
|
|
10
|
-
from paradigma.constants import DataColumns
|
|
7
|
+
from paradigma.constants import DataColumns
|
|
11
8
|
from paradigma.config import GaitConfig
|
|
12
9
|
from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
|
|
13
10
|
extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity, compute_statistics, \
|
|
14
11
|
compute_std_euclidean_norm, compute_power_in_bandwidth, compute_dominant_frequency, compute_mfccs, \
|
|
15
12
|
compute_total_power
|
|
16
13
|
from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor
|
|
17
|
-
from paradigma.util import aggregate_parameter
|
|
14
|
+
from paradigma.util import aggregate_parameter
|
|
18
15
|
|
|
19
16
|
|
|
20
17
|
def extract_gait_features(
|
|
@@ -160,66 +157,35 @@ def detect_gait(
|
|
|
160
157
|
|
|
161
158
|
|
|
162
159
|
def extract_arm_activity_features(
|
|
160
|
+
df: pd.DataFrame,
|
|
163
161
|
config: GaitConfig,
|
|
164
|
-
df_timestamps: pd.DataFrame,
|
|
165
|
-
df_predictions: pd.DataFrame,
|
|
166
|
-
threshold: float
|
|
167
162
|
) -> pd.DataFrame:
|
|
168
163
|
"""
|
|
169
164
|
Extract features related to arm activity from a time-series DataFrame.
|
|
170
165
|
|
|
171
166
|
This function processes a DataFrame containing accelerometer, gravity, and gyroscope signals,
|
|
172
167
|
and extracts features related to arm activity by performing the following steps:
|
|
173
|
-
1.
|
|
174
|
-
2.
|
|
175
|
-
3.
|
|
176
|
-
4.
|
|
177
|
-
5.
|
|
178
|
-
6.
|
|
179
|
-
7. Extracts angle-related features, temporal domain features, and spectral domain features.
|
|
168
|
+
1. Computes the angle and velocity from gyroscope data.
|
|
169
|
+
2. Filters the data to include only predicted gait segments.
|
|
170
|
+
3. Groups the data into segments based on consecutive timestamps and pre-specified gaps.
|
|
171
|
+
4. Removes segments that do not meet predefined criteria.
|
|
172
|
+
5. Creates fixed-length windows from the time series data.
|
|
173
|
+
6. Extracts angle-related features, temporal domain features, and spectral domain features.
|
|
180
174
|
|
|
181
175
|
Parameters
|
|
182
176
|
----------
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
df_timestamps : pd.DataFrame
|
|
187
|
-
A DataFrame containing the raw sensor data, including accelerometer, gravity, and gyroscope columns.
|
|
188
|
-
|
|
189
|
-
df_predictions : pd.DataFrame
|
|
190
|
-
A DataFrame containing the predicted probabilities for gait activity per window.
|
|
177
|
+
df: pd.DataFrame
|
|
178
|
+
The input DataFrame containing accelerometer, gravity, and gyroscope data of predicted gait.
|
|
191
179
|
|
|
192
180
|
config : ArmActivityFeatureExtractionConfig
|
|
193
181
|
Configuration object containing column names and parameters for feature extraction.
|
|
194
182
|
|
|
195
|
-
path_to_classifier_input : str | Path
|
|
196
|
-
The path to the directory containing the classifier files and other necessary input files for feature extraction.
|
|
197
|
-
|
|
198
183
|
Returns
|
|
199
184
|
-------
|
|
200
185
|
pd.DataFrame
|
|
201
186
|
A DataFrame containing the extracted arm activity features, including angle, velocity,
|
|
202
187
|
temporal, and spectral features.
|
|
203
188
|
"""
|
|
204
|
-
if not any(df_predictions[DataColumns.PRED_GAIT_PROBA] >= threshold):
|
|
205
|
-
raise ValueError("No gait detected in the input data.")
|
|
206
|
-
|
|
207
|
-
# Merge gait predictions with timestamps
|
|
208
|
-
gait_preprocessing_config = GaitConfig(step='gait')
|
|
209
|
-
df = merge_predictions_with_timestamps(
|
|
210
|
-
df_ts=df_timestamps,
|
|
211
|
-
df_predictions=df_predictions,
|
|
212
|
-
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
213
|
-
window_length_s=gait_preprocessing_config.window_length_s,
|
|
214
|
-
fs=gait_preprocessing_config.sampling_frequency
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
# Add a column for predicted gait based on a fitted threshold
|
|
218
|
-
df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= threshold).astype(int)
|
|
219
|
-
|
|
220
|
-
# Filter the DataFrame to only include predicted gait (1)
|
|
221
|
-
df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
|
|
222
|
-
|
|
223
189
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap
|
|
224
190
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
225
191
|
time_array=df[DataColumns.TIME],
|
|
@@ -315,8 +281,8 @@ def filter_gait(
|
|
|
315
281
|
----------
|
|
316
282
|
df : pd.DataFrame
|
|
317
283
|
The input DataFrame containing features extracted from gait data.
|
|
318
|
-
|
|
319
|
-
The
|
|
284
|
+
clf_package: ClassifierPackage
|
|
285
|
+
The pre-trained classifier package containing the classifier, threshold, and scaler.
|
|
320
286
|
parallel : bool, optional, default=False
|
|
321
287
|
If `True`, enables parallel processing.
|
|
322
288
|
|
|
@@ -351,10 +317,10 @@ def filter_gait(
|
|
|
351
317
|
|
|
352
318
|
def quantify_arm_swing(
|
|
353
319
|
df: pd.DataFrame,
|
|
354
|
-
max_segment_gap_s: float,
|
|
355
|
-
min_segment_length_s: float,
|
|
356
320
|
fs: int,
|
|
357
321
|
filtered: bool = False,
|
|
322
|
+
max_segment_gap_s: float = 1.5,
|
|
323
|
+
min_segment_length_s: float = 1.5
|
|
358
324
|
) -> Tuple[dict[str, pd.DataFrame], dict]:
|
|
359
325
|
"""
|
|
360
326
|
Quantify arm swing parameters for segments of motion based on gyroscope data.
|
|
@@ -362,28 +328,27 @@ def quantify_arm_swing(
|
|
|
362
328
|
Parameters
|
|
363
329
|
----------
|
|
364
330
|
df : pd.DataFrame
|
|
365
|
-
A DataFrame containing the raw sensor data
|
|
331
|
+
A DataFrame containing the raw sensor data of predicted gait timestamps. Should include a column
|
|
366
332
|
for predicted no other arm activity based on a fitted threshold if filtered is True.
|
|
367
333
|
|
|
368
|
-
max_segment_gap_s : float
|
|
369
|
-
The maximum gap allowed between segments.
|
|
370
|
-
|
|
371
|
-
min_segment_length_s : float
|
|
372
|
-
The minimum length required for a segment to be considered valid.
|
|
373
|
-
|
|
374
334
|
fs : int
|
|
375
335
|
The sampling frequency of the sensor data.
|
|
376
336
|
|
|
377
337
|
filtered : bool, optional, default=True
|
|
378
338
|
If `True`, the gyroscope data is filtered to only include predicted no other arm activity.
|
|
379
339
|
|
|
340
|
+
max_segment_gap_s : float, optional, default=1.5
|
|
341
|
+
The maximum gap in seconds between consecutive timestamps to group them into segments.
|
|
342
|
+
|
|
343
|
+
min_segment_length_s : float, optional, default=1.5
|
|
344
|
+
The minimum length in seconds for a segment to be considered valid.
|
|
345
|
+
|
|
380
346
|
Returns
|
|
381
347
|
-------
|
|
382
348
|
Tuple[pd.DataFrame, dict]
|
|
383
349
|
A tuple containing a dataframe with quantified arm swing parameters and a dictionary containing
|
|
384
350
|
metadata for each segment.
|
|
385
351
|
"""
|
|
386
|
-
|
|
387
352
|
# Group consecutive timestamps into segments, with new segments starting after a pre-specified gap.
|
|
388
353
|
# Segments are made based on predicted gait
|
|
389
354
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
@@ -391,6 +356,10 @@ def quantify_arm_swing(
|
|
|
391
356
|
max_segment_gap_s=max_segment_gap_s
|
|
392
357
|
)
|
|
393
358
|
|
|
359
|
+
# Segment category is determined based on predicted gait, hence it is set
|
|
360
|
+
# before filtering the DataFrame to only include predicted no other arm activity
|
|
361
|
+
df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
|
|
362
|
+
|
|
394
363
|
# Remove segments that do not meet predetermined criteria
|
|
395
364
|
df = discard_segments(
|
|
396
365
|
df=df,
|
|
@@ -401,40 +370,51 @@ def quantify_arm_swing(
|
|
|
401
370
|
)
|
|
402
371
|
|
|
403
372
|
if df.empty:
|
|
404
|
-
raise ValueError("No segments found in the input data.")
|
|
373
|
+
raise ValueError("No segments found in the input data after discarding segments of invalid shape.")
|
|
405
374
|
|
|
406
375
|
# If no arm swing data is remaining, return an empty dictionary
|
|
407
376
|
if filtered and df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].empty:
|
|
408
377
|
raise ValueError("No gait without other arm activities to quantify.")
|
|
409
|
-
|
|
410
|
-
df[DataColumns.SEGMENT_CAT] = categorize_segments(df=df, fs=fs)
|
|
411
|
-
|
|
412
|
-
# Group and process segments
|
|
413
|
-
arm_swing_quantified = []
|
|
414
|
-
segment_meta = {}
|
|
415
|
-
|
|
416
|
-
if filtered:
|
|
378
|
+
elif filtered:
|
|
417
379
|
# Filter the DataFrame to only include predicted no other arm activity (1)
|
|
418
380
|
df = df.loc[df[DataColumns.PRED_NO_OTHER_ARM_ACTIVITY]==1].reset_index(drop=True)
|
|
419
381
|
|
|
420
|
-
# Group consecutive timestamps into segments
|
|
421
|
-
# Now segments are based on predicted gait without other arm activity for subsequent processes
|
|
382
|
+
# Group consecutive timestamps into segments of filtered gait
|
|
422
383
|
df[DataColumns.SEGMENT_NR] = create_segments(
|
|
423
384
|
time_array=df[DataColumns.TIME],
|
|
424
385
|
max_segment_gap_s=max_segment_gap_s
|
|
425
386
|
)
|
|
426
387
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
388
|
+
# Remove segments that do not meet predetermined criteria
|
|
389
|
+
df = discard_segments(
|
|
390
|
+
df=df,
|
|
391
|
+
segment_nr_colname=DataColumns.SEGMENT_NR,
|
|
392
|
+
min_segment_length_s=min_segment_length_s,
|
|
393
|
+
fs=fs,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
if df.empty:
|
|
397
|
+
raise ValueError("No filtered gait segments found in the input data after discarding segments of invalid shape.")
|
|
398
|
+
|
|
399
|
+
arm_swing_quantified = []
|
|
400
|
+
segment_meta = {
|
|
401
|
+
'aggregated': {
|
|
402
|
+
'all': {
|
|
403
|
+
'duration_s': len(df[DataColumns.TIME]) / fs
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
'per_segment': {}
|
|
407
|
+
}
|
|
430
408
|
|
|
409
|
+
# PCA is fitted on only predicted gait without other arm activity if filtered, otherwise
|
|
410
|
+
# it is fitted on the entire gyroscope data
|
|
431
411
|
df[DataColumns.VELOCITY] = pca_transform_gyroscope(
|
|
432
412
|
df=df,
|
|
433
413
|
y_gyro_colname=DataColumns.GYROSCOPE_Y,
|
|
434
414
|
z_gyro_colname=DataColumns.GYROSCOPE_Z,
|
|
435
|
-
pred_colname=pred_colname_pca
|
|
436
415
|
)
|
|
437
416
|
|
|
417
|
+
# Group and process segments
|
|
438
418
|
for segment_nr, group in df.groupby(DataColumns.SEGMENT_NR, sort=False):
|
|
439
419
|
segment_cat = group[DataColumns.SEGMENT_CAT].iloc[0]
|
|
440
420
|
time_array = group[DataColumns.TIME].to_numpy()
|
|
@@ -452,8 +432,10 @@ def quantify_arm_swing(
|
|
|
452
432
|
fs=fs,
|
|
453
433
|
)
|
|
454
434
|
|
|
455
|
-
segment_meta[segment_nr] = {
|
|
456
|
-
'
|
|
435
|
+
segment_meta['per_segment'][segment_nr] = {
|
|
436
|
+
'start_time_s': time_array.min(),
|
|
437
|
+
'end_time_s': time_array.max(),
|
|
438
|
+
'duration_s': len(angle_array) / fs,
|
|
457
439
|
DataColumns.SEGMENT_CAT: segment_cat
|
|
458
440
|
}
|
|
459
441
|
|
|
@@ -487,12 +469,20 @@ def quantify_arm_swing(
|
|
|
487
469
|
|
|
488
470
|
df_params_segment = pd.DataFrame({
|
|
489
471
|
DataColumns.SEGMENT_NR: segment_nr,
|
|
472
|
+
DataColumns.SEGMENT_CAT: segment_cat,
|
|
490
473
|
DataColumns.RANGE_OF_MOTION: rom,
|
|
491
474
|
DataColumns.PEAK_VELOCITY: pav
|
|
492
475
|
})
|
|
493
476
|
|
|
494
477
|
arm_swing_quantified.append(df_params_segment)
|
|
495
478
|
|
|
479
|
+
# Combine segment categories
|
|
480
|
+
segment_categories = set([segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] for x in segment_meta['per_segment'].keys()])
|
|
481
|
+
for segment_cat in segment_categories:
|
|
482
|
+
segment_meta['aggregated'][segment_cat] = {
|
|
483
|
+
'duration_s': sum([segment_meta['per_segment'][x]['duration_s'] for x in segment_meta['per_segment'].keys() if segment_meta['per_segment'][x][DataColumns.SEGMENT_CAT] == segment_cat])
|
|
484
|
+
}
|
|
485
|
+
|
|
496
486
|
arm_swing_quantified = pd.concat(arm_swing_quantified, ignore_index=True)
|
|
497
487
|
|
|
498
488
|
return arm_swing_quantified, segment_meta
|
|
@@ -527,7 +517,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
527
517
|
cat_segments = [x for x in segment_meta.keys() if segment_meta[x][DataColumns.SEGMENT_CAT] == segment_cat]
|
|
528
518
|
|
|
529
519
|
aggregated_results[segment_cat] = {
|
|
530
|
-
'
|
|
520
|
+
'duration_s': sum([segment_meta[x]['duration_s'] for x in cat_segments])
|
|
531
521
|
}
|
|
532
522
|
|
|
533
523
|
df_arm_swing_params_cat = df_arm_swing_params[df_arm_swing_params[DataColumns.SEGMENT_NR].isin(cat_segments)]
|
|
@@ -537,7 +527,7 @@ def aggregate_arm_swing_params(df_arm_swing_params: pd.DataFrame, segment_meta:
|
|
|
537
527
|
aggregated_results[segment_cat][f'{aggregate}_{arm_swing_parameter}'] = aggregate_parameter(df_arm_swing_params_cat[arm_swing_parameter], aggregate)
|
|
538
528
|
|
|
539
529
|
aggregated_results['all_segment_categories'] = {
|
|
540
|
-
'
|
|
530
|
+
'duration_s': sum([segment_meta[x]['duration_s'] for x in segment_meta.keys()])
|
|
541
531
|
}
|
|
542
532
|
|
|
543
533
|
for arm_swing_parameter in arm_swing_parameters:
|
|
@@ -10,14 +10,14 @@ from typing import List
|
|
|
10
10
|
|
|
11
11
|
from paradigma.classification import ClassifierPackage
|
|
12
12
|
from paradigma.constants import DataColumns
|
|
13
|
-
from paradigma.config import
|
|
13
|
+
from paradigma.config import PulseRateConfig
|
|
14
14
|
from paradigma.feature_extraction import compute_statistics, compute_signal_to_noise_ratio, compute_auto_correlation, \
|
|
15
15
|
compute_dominant_frequency, compute_relative_power, compute_spectral_entropy
|
|
16
|
-
from paradigma.pipelines.
|
|
16
|
+
from paradigma.pipelines.pulse_rate_utils import assign_sqa_label, extract_pr_segments, extract_pr_from_segment
|
|
17
17
|
from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
|
|
18
|
-
from paradigma.util import
|
|
18
|
+
from paradigma.util import aggregate_parameter
|
|
19
19
|
|
|
20
|
-
def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
|
|
20
|
+
def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PulseRateConfig, acc_config: PulseRateConfig) -> pd.DataFrame:
|
|
21
21
|
"""
|
|
22
22
|
Extract signal quality features from the PPG signal.
|
|
23
23
|
The features are extracted from the temporal and spectral domain of the PPG signal.
|
|
@@ -30,9 +30,9 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
|
|
|
30
30
|
The DataFrame containing the PPG signal.
|
|
31
31
|
df_acc : pd.DataFrame
|
|
32
32
|
The DataFrame containing the accelerometer signal.
|
|
33
|
-
ppg_config:
|
|
33
|
+
ppg_config: PulseRateConfig
|
|
34
34
|
The configuration for the signal quality feature extraction of the PPG signal.
|
|
35
|
-
acc_config:
|
|
35
|
+
acc_config: PulseRateConfig
|
|
36
36
|
The configuration for the signal quality feature extraction of the accelerometer signal.
|
|
37
37
|
|
|
38
38
|
Returns
|
|
@@ -94,7 +94,7 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
|
|
|
94
94
|
return df_features
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
def signal_quality_classification(df: pd.DataFrame, config:
|
|
97
|
+
def signal_quality_classification(df: pd.DataFrame, config: PulseRateConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
|
|
98
98
|
"""
|
|
99
99
|
Classify the signal quality of the PPG signal using a logistic regression classifier. A probability close to 1 indicates a high-quality signal, while a probability close to 0 indicates a low-quality signal.
|
|
100
100
|
The classifier is trained on features extracted from the PPG signal. The features are extracted using the extract_signal_quality_features function.
|
|
@@ -105,7 +105,7 @@ def signal_quality_classification(df: pd.DataFrame, config: HeartRateConfig, ful
|
|
|
105
105
|
----------
|
|
106
106
|
df : pd.DataFrame
|
|
107
107
|
The DataFrame containing the PPG features and the accelerometer feature for signal quality classification.
|
|
108
|
-
config :
|
|
108
|
+
config : PulseRateConfig
|
|
109
109
|
The configuration for the signal quality classification.
|
|
110
110
|
full_path_to_classifier_package : str | Path
|
|
111
111
|
The path to the directory containing the classifier.
|
|
@@ -128,9 +128,9 @@ def signal_quality_classification(df: pd.DataFrame, config: HeartRateConfig, ful
|
|
|
128
128
|
return df[[DataColumns.TIME, DataColumns.PRED_SQA_PROBA, DataColumns.PRED_SQA_ACC_LABEL]] # Return only the relevant columns, namely the predicted probabilities for the PPG signal quality and the accelerometer label
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
def
|
|
131
|
+
def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: PulseRateConfig) -> pd.DataFrame:
|
|
132
132
|
"""
|
|
133
|
-
Estimate the
|
|
133
|
+
Estimate the pulse rate from the PPG signal using the time-frequency domain method.
|
|
134
134
|
|
|
135
135
|
Parameters
|
|
136
136
|
----------
|
|
@@ -138,13 +138,13 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
138
138
|
The DataFrame containing the signal quality assessment predictions.
|
|
139
139
|
df_ppg_preprocessed : pd.DataFrame
|
|
140
140
|
The DataFrame containing the preprocessed PPG signal.
|
|
141
|
-
config :
|
|
142
|
-
The configuration for the
|
|
141
|
+
config : PulseRateConfig
|
|
142
|
+
The configuration for the pulse rate estimation.
|
|
143
143
|
|
|
144
144
|
Returns
|
|
145
145
|
-------
|
|
146
|
-
|
|
147
|
-
The DataFrame containing the
|
|
146
|
+
df_pr : pd.DataFrame
|
|
147
|
+
The DataFrame containing the pulse rate estimations.
|
|
148
148
|
"""
|
|
149
149
|
|
|
150
150
|
# Extract NumPy arrays for faster operations
|
|
@@ -156,13 +156,13 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
156
156
|
|
|
157
157
|
# Assign window-level probabilities to individual samples
|
|
158
158
|
sqa_label = assign_sqa_label(ppg_post_prob, config, acc_label) # assigns a signal quality label to every individual data point
|
|
159
|
-
v_start_idx, v_end_idx =
|
|
159
|
+
v_start_idx, v_end_idx = extract_pr_segments(sqa_label, config.min_pr_samples) # extracts pulse rate segments based on the SQA label
|
|
160
160
|
|
|
161
|
-
|
|
162
|
-
|
|
161
|
+
v_pr_rel = np.array([])
|
|
162
|
+
t_pr_rel = np.array([])
|
|
163
163
|
|
|
164
|
-
edge_add = 2 * config.sampling_frequency # Add 2s on both sides of the segment for
|
|
165
|
-
step_size = config.
|
|
164
|
+
edge_add = 2 * config.sampling_frequency # Add 2s on both sides of the segment for PR estimation
|
|
165
|
+
step_size = config.pr_est_samples # Step size for PR estimation
|
|
166
166
|
|
|
167
167
|
# Estimate the maximum size for preallocation
|
|
168
168
|
valid_segments = (v_start_idx >= edge_add) & (v_end_idx <= len(ppg_preprocessed) - edge_add) # check if the segments are valid, e.g. not too close to the edges (2s)
|
|
@@ -171,55 +171,55 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
171
171
|
max_size = np.sum((valid_end_idx - valid_start_idx) // step_size) # maximum size for preallocation
|
|
172
172
|
|
|
173
173
|
# Preallocate arrays
|
|
174
|
-
|
|
175
|
-
|
|
174
|
+
v_pr_rel = np.empty(max_size, dtype=float)
|
|
175
|
+
t_pr_rel = np.empty(max_size, dtype=float)
|
|
176
176
|
|
|
177
177
|
# Track current position
|
|
178
|
-
|
|
178
|
+
pr_pos = 0
|
|
179
179
|
|
|
180
180
|
for start_idx, end_idx in zip(valid_start_idx, valid_end_idx):
|
|
181
181
|
# Extract extended PPG segment
|
|
182
182
|
extended_ppg_segment = ppg_preprocessed[start_idx - edge_add : end_idx + edge_add, ppg_idx]
|
|
183
183
|
|
|
184
|
-
# Estimate
|
|
185
|
-
|
|
184
|
+
# Estimate pulse rate
|
|
185
|
+
pr_est = extract_pr_from_segment(
|
|
186
186
|
extended_ppg_segment,
|
|
187
187
|
config.tfd_length,
|
|
188
188
|
config.sampling_frequency,
|
|
189
189
|
config.kern_type,
|
|
190
190
|
config.kern_params,
|
|
191
191
|
)
|
|
192
|
-
|
|
193
|
-
end_idx_time =
|
|
192
|
+
n_pr = len(pr_est) # Number of pulse rate estimates
|
|
193
|
+
end_idx_time = n_pr * step_size + start_idx # Calculate end index for time, different from end_idx since it is always a multiple of step_size, while end_idx is not
|
|
194
194
|
|
|
195
|
-
# Extract relative time for
|
|
196
|
-
|
|
195
|
+
# Extract relative time for PR estimates
|
|
196
|
+
pr_time = ppg_preprocessed[start_idx : end_idx_time : step_size, time_idx]
|
|
197
197
|
|
|
198
198
|
# Insert into preallocated arrays
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
199
|
+
v_pr_rel[pr_pos:pr_pos + n_pr] = pr_est
|
|
200
|
+
t_pr_rel[pr_pos:pr_pos + n_pr] = pr_time
|
|
201
|
+
pr_pos += n_pr
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
df_pr = pd.DataFrame({"time": t_pr_rel, "pulse_rate": v_pr_rel})
|
|
204
204
|
|
|
205
|
-
return
|
|
205
|
+
return df_pr
|
|
206
206
|
|
|
207
207
|
|
|
208
|
-
def
|
|
208
|
+
def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode', '99p']) -> dict:
|
|
209
209
|
"""
|
|
210
|
-
Aggregate the
|
|
210
|
+
Aggregate the pulse rate estimates using the specified aggregation methods.
|
|
211
211
|
|
|
212
212
|
Parameters
|
|
213
213
|
----------
|
|
214
|
-
|
|
215
|
-
The array containing the
|
|
214
|
+
pr_values : np.ndarray
|
|
215
|
+
The array containing the pulse rate estimates
|
|
216
216
|
aggregates : List[str]
|
|
217
|
-
The list of aggregation methods to be used for the
|
|
217
|
+
The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
|
|
218
218
|
|
|
219
219
|
Returns
|
|
220
220
|
-------
|
|
221
221
|
aggregated_results : dict
|
|
222
|
-
The dictionary containing the aggregated results of the
|
|
222
|
+
The dictionary containing the aggregated results of the pulse rate estimates.
|
|
223
223
|
"""
|
|
224
224
|
# Initialize the dictionary for the aggregated results
|
|
225
225
|
aggregated_results = {}
|
|
@@ -227,19 +227,19 @@ def aggregate_heart_rate(hr_values: np.ndarray, aggregates: List[str] = ['mode',
|
|
|
227
227
|
# Initialize the dictionary for the aggregated results with the metadata
|
|
228
228
|
aggregated_results = {
|
|
229
229
|
'metadata': {
|
|
230
|
-
'
|
|
230
|
+
'nr_pr_est': len(pr_values)
|
|
231
231
|
},
|
|
232
|
-
'
|
|
232
|
+
'pr_aggregates': {}
|
|
233
233
|
}
|
|
234
234
|
for aggregate in aggregates:
|
|
235
|
-
aggregated_results['
|
|
235
|
+
aggregated_results['pr_aggregates'][f'{aggregate}_{DataColumns.PULSE_RATE}'] = aggregate_parameter(pr_values, aggregate)
|
|
236
236
|
|
|
237
237
|
return aggregated_results
|
|
238
238
|
|
|
239
239
|
|
|
240
240
|
def extract_temporal_domain_features(
|
|
241
241
|
ppg_windowed: np.ndarray,
|
|
242
|
-
config:
|
|
242
|
+
config: PulseRateConfig,
|
|
243
243
|
quality_stats: List[str] = ['mean', 'std']
|
|
244
244
|
) -> pd.DataFrame:
|
|
245
245
|
"""
|
|
@@ -250,7 +250,7 @@ def extract_temporal_domain_features(
|
|
|
250
250
|
ppg_windowed: np.ndarray
|
|
251
251
|
The dataframe containing the windowed accelerometer signal
|
|
252
252
|
|
|
253
|
-
config:
|
|
253
|
+
config: PulseRateConfig
|
|
254
254
|
The configuration object containing the parameters for the feature extraction
|
|
255
255
|
|
|
256
256
|
quality_stats: list, optional
|
|
@@ -273,7 +273,7 @@ def extract_temporal_domain_features(
|
|
|
273
273
|
|
|
274
274
|
def extract_spectral_domain_features(
|
|
275
275
|
ppg_windowed: np.ndarray,
|
|
276
|
-
config:
|
|
276
|
+
config: PulseRateConfig,
|
|
277
277
|
) -> pd.DataFrame:
|
|
278
278
|
"""
|
|
279
279
|
Calculate the spectral features (dominant frequency, relative power, and spectral entropy)
|
|
@@ -285,7 +285,7 @@ def extract_spectral_domain_features(
|
|
|
285
285
|
ppg_windowed: np.ndarray
|
|
286
286
|
The dataframe containing the windowed ppg signal
|
|
287
287
|
|
|
288
|
-
config:
|
|
288
|
+
config: PulseRateConfig
|
|
289
289
|
The configuration object containing the parameters for the feature extraction
|
|
290
290
|
|
|
291
291
|
Returns
|
|
@@ -371,7 +371,7 @@ def extract_acc_power_feature(
|
|
|
371
371
|
def extract_accelerometer_feature(
|
|
372
372
|
acc_windowed: np.ndarray,
|
|
373
373
|
ppg_windowed: np.ndarray,
|
|
374
|
-
config:
|
|
374
|
+
config: PulseRateConfig
|
|
375
375
|
) -> pd.DataFrame:
|
|
376
376
|
"""
|
|
377
377
|
Extract accelerometer features from the accelerometer signal in the PPG frequency range.
|
|
@@ -384,7 +384,7 @@ def extract_accelerometer_feature(
|
|
|
384
384
|
ppg_windowed: np.ndarray
|
|
385
385
|
The dataframe containing the corresponding windowed ppg signal
|
|
386
386
|
|
|
387
|
-
config:
|
|
387
|
+
config: PulseRateConfig
|
|
388
388
|
The configuration object containing the parameters for the feature extraction
|
|
389
389
|
|
|
390
390
|
Returns
|
|
@@ -2,12 +2,12 @@ import numpy as np
|
|
|
2
2
|
from scipy import signal
|
|
3
3
|
from typing import Tuple
|
|
4
4
|
|
|
5
|
-
from paradigma.config import
|
|
5
|
+
from paradigma.config import PulseRateConfig
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def assign_sqa_label(
|
|
9
9
|
ppg_prob: np.ndarray,
|
|
10
|
-
config:
|
|
10
|
+
config: PulseRateConfig,
|
|
11
11
|
acc_label=None
|
|
12
12
|
) -> np.ndarray:
|
|
13
13
|
"""
|
|
@@ -17,7 +17,7 @@ def assign_sqa_label(
|
|
|
17
17
|
----------
|
|
18
18
|
ppg_prob : np.ndarray
|
|
19
19
|
The probabilities for PPG.
|
|
20
|
-
config :
|
|
20
|
+
config : PulseRateConfig
|
|
21
21
|
The configuration parameters.
|
|
22
22
|
acc_label : np.ndarray, optional
|
|
23
23
|
The labels for the accelerometer.
|
|
@@ -61,23 +61,23 @@ def assign_sqa_label(
|
|
|
61
61
|
return sqa_label
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def
|
|
64
|
+
def extract_pr_segments(sqa_label: np.ndarray, min_pr_samples: int) -> Tuple[np.ndarray, np.ndarray]:
|
|
65
65
|
"""
|
|
66
|
-
Extracts
|
|
66
|
+
Extracts pulse rate segments based on the SQA label.
|
|
67
67
|
|
|
68
68
|
Parameters
|
|
69
69
|
----------
|
|
70
70
|
sqa_label : np.ndarray
|
|
71
71
|
The signal quality assessment label.
|
|
72
|
-
|
|
73
|
-
The minimum number of samples required for a
|
|
72
|
+
min_pr_samples : int
|
|
73
|
+
The minimum number of samples required for a pulse rate segment.
|
|
74
74
|
|
|
75
75
|
Returns
|
|
76
76
|
-------
|
|
77
77
|
Tuple[v_start_idx_long, v_end_idx_long]
|
|
78
|
-
The start and end indices of the
|
|
78
|
+
The start and end indices of the pulse rate segments.
|
|
79
79
|
"""
|
|
80
|
-
# Find the start and end indices of the
|
|
80
|
+
# Find the start and end indices of the pulse rate segments
|
|
81
81
|
v_start_idx = np.where(np.diff(sqa_label.astype(int)) == 1)[0] + 1
|
|
82
82
|
v_end_idx = np.where(np.diff(sqa_label.astype(int)) == -1)[0] + 1
|
|
83
83
|
|
|
@@ -88,13 +88,13 @@ def extract_hr_segments(sqa_label: np.ndarray, min_hr_samples: int) -> Tuple[np.
|
|
|
88
88
|
v_end_idx = np.append(v_end_idx, len(sqa_label))
|
|
89
89
|
|
|
90
90
|
# Check if the segments are long enough
|
|
91
|
-
v_start_idx_long = v_start_idx[(v_end_idx - v_start_idx) >=
|
|
92
|
-
v_end_idx_long = v_end_idx[(v_end_idx - v_start_idx) >=
|
|
91
|
+
v_start_idx_long = v_start_idx[(v_end_idx - v_start_idx) >= min_pr_samples]
|
|
92
|
+
v_end_idx_long = v_end_idx[(v_end_idx - v_start_idx) >= min_pr_samples]
|
|
93
93
|
|
|
94
94
|
return v_start_idx_long, v_end_idx_long
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
def
|
|
97
|
+
def extract_pr_from_segment(
|
|
98
98
|
ppg: np.ndarray,
|
|
99
99
|
tfd_length: int,
|
|
100
100
|
fs: int,
|
|
@@ -102,7 +102,7 @@ def extract_hr_from_segment(
|
|
|
102
102
|
kern_params: dict
|
|
103
103
|
) -> np.ndarray:
|
|
104
104
|
"""
|
|
105
|
-
Extracts
|
|
105
|
+
Extracts pulse rate from the time-frequency distribution of the PPG signal.
|
|
106
106
|
|
|
107
107
|
Parameters
|
|
108
108
|
----------
|
|
@@ -121,7 +121,7 @@ def extract_hr_from_segment(
|
|
|
121
121
|
Returns
|
|
122
122
|
-------
|
|
123
123
|
np.ndarray
|
|
124
|
-
The estimated
|
|
124
|
+
The estimated pulse rate.
|
|
125
125
|
"""
|
|
126
126
|
|
|
127
127
|
# Constants to handle boundary effects
|
|
@@ -145,23 +145,23 @@ def extract_hr_from_segment(
|
|
|
145
145
|
end_idx = len(ppg)
|
|
146
146
|
ppg_segments.append(ppg[start_idx:end_idx])
|
|
147
147
|
|
|
148
|
-
|
|
148
|
+
pr_est_from_ppg = np.array([])
|
|
149
149
|
for segment in ppg_segments:
|
|
150
150
|
# Calculate the time-frequency distribution
|
|
151
|
-
|
|
152
|
-
|
|
151
|
+
pr_tfd = extract_pr_with_tfd(segment, fs, kern_type, kern_params)
|
|
152
|
+
pr_est_from_ppg = np.concatenate((pr_est_from_ppg, pr_tfd))
|
|
153
153
|
|
|
154
|
-
return
|
|
154
|
+
return pr_est_from_ppg
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
def
|
|
157
|
+
def extract_pr_with_tfd(
|
|
158
158
|
ppg: np.ndarray,
|
|
159
159
|
fs: int,
|
|
160
160
|
kern_type: str,
|
|
161
161
|
kern_params: dict
|
|
162
162
|
) -> np.ndarray:
|
|
163
163
|
"""
|
|
164
|
-
Estimate
|
|
164
|
+
Estimate pulse rate (PR) from a PPG segment using a TFD method with optional
|
|
165
165
|
moving average filtering.
|
|
166
166
|
|
|
167
167
|
Parameters
|
|
@@ -177,8 +177,8 @@ def extract_hr_with_tfd(
|
|
|
177
177
|
|
|
178
178
|
Returns
|
|
179
179
|
-------
|
|
180
|
-
|
|
181
|
-
Estimated
|
|
180
|
+
pr_smooth_tfd : np.ndarray
|
|
181
|
+
Estimated pr values (in beats per minute) for each 2-second segment of the PPG signal.
|
|
182
182
|
"""
|
|
183
183
|
# Generate the TFD matrix using the specified kernel
|
|
184
184
|
tfd_obj = TimeFreqDistr()
|
|
@@ -189,16 +189,16 @@ def extract_hr_with_tfd(
|
|
|
189
189
|
time_axis = np.arange(num_time_samples) / fs
|
|
190
190
|
freq_axis = np.linspace(0, 0.5, num_freq_bins) * fs
|
|
191
191
|
|
|
192
|
-
# Estimate
|
|
192
|
+
# Estimate pulse rate by identifying the max frequency in the TFD
|
|
193
193
|
max_freq_indices = np.argmax(tfd, axis=0)
|
|
194
194
|
|
|
195
|
-
|
|
195
|
+
pr_smooth_tfd = np.array([])
|
|
196
196
|
for i in range(2, int(len(ppg) / fs) - 4 + 1, 2): # Skip the first and last 2 seconds, add 1 to include the last segment
|
|
197
197
|
relevant_indices = (time_axis >= i) & (time_axis < i + 2)
|
|
198
198
|
avg_frequency = np.mean(freq_axis[max_freq_indices[relevant_indices]])
|
|
199
|
-
|
|
199
|
+
pr_smooth_tfd = np.concatenate((pr_smooth_tfd, [60 * avg_frequency])) # Convert frequency to BPM
|
|
200
200
|
|
|
201
|
-
return
|
|
201
|
+
return pr_smooth_tfd
|
|
202
202
|
|
|
203
203
|
|
|
204
204
|
class TimeFreqDistr:
|
|
@@ -143,7 +143,6 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
|
|
|
143
143
|
|
|
144
144
|
return df
|
|
145
145
|
|
|
146
|
-
|
|
147
146
|
def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
148
147
|
"""
|
|
149
148
|
Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
|
|
@@ -154,8 +153,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
154
153
|
Parameters
|
|
155
154
|
----------
|
|
156
155
|
df : pd.DataFrame
|
|
157
|
-
The input DataFrame containing
|
|
158
|
-
|
|
156
|
+
The input DataFrame containing the tremor predictions and computed tremor power.
|
|
157
|
+
The DataFrame must also contain a datatime column ('time_dt').
|
|
159
158
|
|
|
160
159
|
config : TremorConfig
|
|
161
160
|
Configuration object containing the percentile for aggregating tremor power.
|
|
@@ -163,8 +162,8 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
163
162
|
Returns
|
|
164
163
|
-------
|
|
165
164
|
dict
|
|
166
|
-
A dictionary with the aggregated tremor time and tremor power measures, as well as the
|
|
167
|
-
|
|
165
|
+
A dictionary with the aggregated tremor time and tremor power measures, as well as the number of valid days,
|
|
166
|
+
the total number of windows, and the number of windows at rest available in the input dataframe.
|
|
168
167
|
|
|
169
168
|
Notes
|
|
170
169
|
-----
|
|
@@ -173,7 +172,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
173
172
|
- The modal tremor power is computed based on gaussian kernel density estimation.
|
|
174
173
|
|
|
175
174
|
"""
|
|
176
|
-
|
|
175
|
+
nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
|
|
177
176
|
nr_windows_total = df.shape[0] # number of windows in the input dataframe
|
|
178
177
|
|
|
179
178
|
# remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
|
|
@@ -216,6 +215,7 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
216
215
|
# store aggregates in json format
|
|
217
216
|
d_aggregates = {
|
|
218
217
|
'metadata': {
|
|
218
|
+
'nr_valid_days': nr_valid_days,
|
|
219
219
|
'nr_windows_total': nr_windows_total,
|
|
220
220
|
'nr_windows_rest': nr_windows_rest
|
|
221
221
|
},
|
|
@@ -250,6 +250,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
250
250
|
pd.DataFrame
|
|
251
251
|
The feature dataframe containing the extracted spectral features, including
|
|
252
252
|
MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
|
|
253
|
+
|
|
253
254
|
"""
|
|
254
255
|
|
|
255
256
|
# Initialize a dictionary to hold the results
|
|
@@ -17,7 +17,9 @@ def resample_data(
|
|
|
17
17
|
df: pd.DataFrame,
|
|
18
18
|
time_column : str,
|
|
19
19
|
values_column_names: List[str],
|
|
20
|
+
sampling_frequency: int,
|
|
20
21
|
resampling_frequency: int,
|
|
22
|
+
tolerance: float | None = None
|
|
21
23
|
) -> pd.DataFrame:
|
|
22
24
|
"""
|
|
23
25
|
Resamples sensor data to a specified frequency using cubic interpolation.
|
|
@@ -30,8 +32,14 @@ def resample_data(
|
|
|
30
32
|
The name of the column containing the time data.
|
|
31
33
|
values_column_names : List[str]
|
|
32
34
|
A list of column names that should be resampled.
|
|
35
|
+
sampling_frequency : int
|
|
36
|
+
The original sampling frequency of the data (in Hz).
|
|
33
37
|
resampling_frequency : int
|
|
34
38
|
The frequency to which the data should be resampled (in Hz).
|
|
39
|
+
tolerance : float, optional
|
|
40
|
+
The tolerance added to the expected difference when checking
|
|
41
|
+
for contiguous timestamps. If not provided, it defaults to
|
|
42
|
+
twice the expected interval.
|
|
35
43
|
|
|
36
44
|
Returns
|
|
37
45
|
-------
|
|
@@ -46,23 +54,35 @@ def resample_data(
|
|
|
46
54
|
|
|
47
55
|
Notes
|
|
48
56
|
-----
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
- Uses cubic interpolation for smooth resampling if there are enough points.
|
|
58
|
+
- If only two timestamps are available, it falls back to linear interpolation.
|
|
51
59
|
"""
|
|
60
|
+
# Set default tolerance if not provided to twice the expected interval
|
|
61
|
+
if tolerance is None:
|
|
62
|
+
tolerance = 2 * 1 / sampling_frequency
|
|
52
63
|
|
|
53
|
-
# Extract time and values
|
|
64
|
+
# Extract time and values
|
|
54
65
|
time_abs_array = np.array(df[time_column])
|
|
55
66
|
values_array = np.array(df[values_column_names])
|
|
56
67
|
|
|
57
68
|
# Ensure the time array is strictly increasing
|
|
58
69
|
if not np.all(np.diff(time_abs_array) > 0):
|
|
59
|
-
raise ValueError("
|
|
70
|
+
raise ValueError("Time array is not strictly increasing")
|
|
71
|
+
|
|
72
|
+
# Ensure the time array is contiguous
|
|
73
|
+
expected_interval = 1 / sampling_frequency
|
|
74
|
+
timestamp_diffs = np.diff(time_abs_array)
|
|
75
|
+
if np.any(np.abs(timestamp_diffs - expected_interval) > tolerance):
|
|
76
|
+
raise ValueError("Time array is not contiguous")
|
|
60
77
|
|
|
61
78
|
# Resample the time data using the specified frequency
|
|
62
79
|
t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
|
|
63
80
|
|
|
64
|
-
#
|
|
65
|
-
|
|
81
|
+
# Choose interpolation method
|
|
82
|
+
interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
|
|
83
|
+
interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
|
|
84
|
+
|
|
85
|
+
# Interpolate
|
|
66
86
|
resampled_values = interpolator(t_resampled)
|
|
67
87
|
|
|
68
88
|
# Create a DataFrame with the resampled data
|
|
@@ -186,7 +206,8 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
|
|
|
186
206
|
df = resample_data(
|
|
187
207
|
df=df,
|
|
188
208
|
time_column=DataColumns.TIME,
|
|
189
|
-
values_column_names
|
|
209
|
+
values_column_names=values_colnames,
|
|
210
|
+
sampling_frequency=config.sampling_frequency,
|
|
190
211
|
resampling_frequency=config.sampling_frequency
|
|
191
212
|
)
|
|
192
213
|
|
|
@@ -259,6 +280,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
|
|
|
259
280
|
df=df_acc_overlapping,
|
|
260
281
|
time_column=DataColumns.TIME,
|
|
261
282
|
values_column_names = list(imu_config.d_channels_accelerometer.keys()),
|
|
283
|
+
sampling_frequency=imu_config.sampling_frequency,
|
|
262
284
|
resampling_frequency=imu_config.sampling_frequency
|
|
263
285
|
)
|
|
264
286
|
|
|
@@ -267,6 +289,7 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
|
|
|
267
289
|
df=df_ppg_overlapping,
|
|
268
290
|
time_column=DataColumns.TIME,
|
|
269
291
|
values_column_names = list(ppg_config.d_channels_ppg.keys()),
|
|
292
|
+
sampling_frequency=ppg_config.sampling_frequency,
|
|
270
293
|
resampling_frequency=ppg_config.sampling_frequency
|
|
271
294
|
)
|
|
272
295
|
|
|
@@ -168,7 +168,7 @@ def create_segments(
|
|
|
168
168
|
gap_exceeds = time_diff > max_segment_gap_s
|
|
169
169
|
|
|
170
170
|
# Create the segment number based on the cumulative sum of the gap_exceeds mask
|
|
171
|
-
segments = gap_exceeds.cumsum()
|
|
171
|
+
segments = gap_exceeds.cumsum()
|
|
172
172
|
|
|
173
173
|
return segments
|
|
174
174
|
|
|
@@ -236,6 +236,9 @@ def discard_segments(
|
|
|
236
236
|
|
|
237
237
|
df = df[valid_segment_mask].copy()
|
|
238
238
|
|
|
239
|
+
if df.empty:
|
|
240
|
+
raise ValueError("All segments were removed.")
|
|
241
|
+
|
|
239
242
|
# Reset segment numbers in a single step
|
|
240
243
|
unique_segments = pd.factorize(df[segment_nr_colname])[0] + 1
|
|
241
244
|
df[segment_nr_colname] = unique_segments
|
|
@@ -7,16 +7,16 @@ import tsdf
|
|
|
7
7
|
from typing import List
|
|
8
8
|
|
|
9
9
|
from paradigma.classification import ClassifierPackage
|
|
10
|
-
from paradigma.config import IMUConfig, PPGConfig, GaitConfig, TremorConfig,
|
|
10
|
+
from paradigma.config import IMUConfig, PPGConfig, GaitConfig, TremorConfig, PulseRateConfig
|
|
11
11
|
from paradigma.constants import DataColumns, TimeUnit
|
|
12
12
|
from paradigma.pipelines.gait_pipeline import extract_gait_features, detect_gait, \
|
|
13
13
|
extract_arm_activity_features, filter_gait
|
|
14
14
|
from paradigma.pipelines.tremor_pipeline import extract_tremor_features, detect_tremor, \
|
|
15
15
|
aggregate_tremor
|
|
16
|
-
from paradigma.pipelines.
|
|
17
|
-
|
|
16
|
+
from paradigma.pipelines.pulse_rate_pipeline import extract_signal_quality_features, signal_quality_classification, \
|
|
17
|
+
aggregate_pulse_rate
|
|
18
18
|
from paradigma.preprocessing import preprocess_imu_data, preprocess_ppg_data
|
|
19
|
-
from paradigma.util import read_metadata, write_df_data, get_end_iso8601
|
|
19
|
+
from paradigma.util import read_metadata, write_df_data, get_end_iso8601, merge_predictions_with_timestamps
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path,
|
|
@@ -208,13 +208,27 @@ def extract_arm_activity_features_io(
|
|
|
208
208
|
|
|
209
209
|
clf_package = ClassifierPackage.load(full_path_to_classifier_package)
|
|
210
210
|
|
|
211
|
+
gait_preprocessing_config = GaitConfig(step='gait')
|
|
212
|
+
|
|
213
|
+
df = merge_predictions_with_timestamps(
|
|
214
|
+
df_ts=df_ts,
|
|
215
|
+
df_predictions=df_pred_gait,
|
|
216
|
+
pred_proba_colname=DataColumns.PRED_GAIT_PROBA,
|
|
217
|
+
window_length_s=gait_preprocessing_config.window_length_s,
|
|
218
|
+
fs=gait_preprocessing_config.sampling_frequency
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Add a column for predicted gait based on a fitted threshold
|
|
222
|
+
df[DataColumns.PRED_GAIT] = (df[DataColumns.PRED_GAIT_PROBA] >= clf_package.threshold).astype(int)
|
|
223
|
+
|
|
224
|
+
# Filter the DataFrame to only include predicted gait (1)
|
|
225
|
+
df = df.loc[df[DataColumns.PRED_GAIT]==1].reset_index(drop=True)
|
|
226
|
+
|
|
211
227
|
# Extract arm activity features
|
|
212
228
|
config = GaitConfig(step='arm_activity')
|
|
213
229
|
df_features = extract_arm_activity_features(
|
|
230
|
+
df=df,
|
|
214
231
|
config=config,
|
|
215
|
-
df_timestamps=df_ts,
|
|
216
|
-
df_predictions=df_pred_gait,
|
|
217
|
-
threshold=clf_package.threshold
|
|
218
232
|
)
|
|
219
233
|
|
|
220
234
|
end_iso8601 = get_end_iso8601(metadata_ts_values.start_iso8601, df_features[DataColumns.TIME][-1:].values[0] + config.window_length_s)
|
|
@@ -339,7 +353,7 @@ def aggregate_tremor_io(path_to_feature_input: str | Path, path_to_prediction_in
|
|
|
339
353
|
json.dump(d_aggregates, json_file, indent=4)
|
|
340
354
|
|
|
341
355
|
|
|
342
|
-
def extract_signal_quality_features_io(input_path: str | Path, output_path: str | Path, ppg_config:
|
|
356
|
+
def extract_signal_quality_features_io(input_path: str | Path, output_path: str | Path, ppg_config: PulseRateConfig, acc_config: PulseRateConfig) -> pd.DataFrame:
|
|
343
357
|
"""
|
|
344
358
|
Extract signal quality features from the PPG signal and save them to a file.
|
|
345
359
|
|
|
@@ -349,9 +363,9 @@ def extract_signal_quality_features_io(input_path: str | Path, output_path: str
|
|
|
349
363
|
The path to the directory containing the preprocessed PPG and accelerometer data.
|
|
350
364
|
output_path : str | Path
|
|
351
365
|
The path to the directory where the extracted features will be saved.
|
|
352
|
-
ppg_config:
|
|
366
|
+
ppg_config: PulseRateConfig
|
|
353
367
|
The configuration for the signal quality feature extraction of the ppg signal.
|
|
354
|
-
acc_config:
|
|
368
|
+
acc_config: PulseRateConfig
|
|
355
369
|
The configuration for the signal quality feature extraction of the accelerometer signal.
|
|
356
370
|
|
|
357
371
|
Returns
|
|
@@ -376,7 +390,7 @@ def extract_signal_quality_features_io(input_path: str | Path, output_path: str
|
|
|
376
390
|
return df_windowed
|
|
377
391
|
|
|
378
392
|
|
|
379
|
-
def signal_quality_classification_io(input_path: str | Path, output_path: str | Path, path_to_classifier_input: str | Path, config:
|
|
393
|
+
def signal_quality_classification_io(input_path: str | Path, output_path: str | Path, path_to_classifier_input: str | Path, config: PulseRateConfig) -> None:
|
|
380
394
|
|
|
381
395
|
# Load the data
|
|
382
396
|
metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
|
|
@@ -385,32 +399,32 @@ def signal_quality_classification_io(input_path: str | Path, output_path: str |
|
|
|
385
399
|
df_sqa = signal_quality_classification(df_windowed, config, path_to_classifier_input)
|
|
386
400
|
|
|
387
401
|
|
|
388
|
-
def
|
|
402
|
+
def aggregate_pulse_rate_io(
|
|
389
403
|
full_path_to_input: str | Path,
|
|
390
404
|
full_path_to_output: str | Path,
|
|
391
405
|
aggregates: List[str] = ['mode', '99p']
|
|
392
406
|
) -> None:
|
|
393
407
|
"""
|
|
394
|
-
Extract
|
|
408
|
+
Extract pulse rate from the PPG signal and save the aggregated pulse rate estimates to a file.
|
|
395
409
|
|
|
396
410
|
Parameters
|
|
397
411
|
----------
|
|
398
412
|
input_path : str | Path
|
|
399
|
-
The path to the directory containing the
|
|
413
|
+
The path to the directory containing the pulse rate estimates.
|
|
400
414
|
output_path : str | Path
|
|
401
|
-
The path to the directory where the aggregated
|
|
415
|
+
The path to the directory where the aggregated pulse rate estimates will be saved.
|
|
402
416
|
aggregates : List[str]
|
|
403
|
-
The list of aggregation methods to be used for the
|
|
417
|
+
The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
|
|
404
418
|
"""
|
|
405
419
|
|
|
406
|
-
# Load the
|
|
420
|
+
# Load the pulse rate estimates
|
|
407
421
|
with open(full_path_to_input, 'r') as f:
|
|
408
|
-
|
|
422
|
+
df_pr = json.load(f)
|
|
409
423
|
|
|
410
|
-
# Aggregate the
|
|
411
|
-
|
|
412
|
-
|
|
424
|
+
# Aggregate the pulse rate estimates
|
|
425
|
+
pr_values = df_pr['pulse_rate'].values
|
|
426
|
+
df_pr_aggregates = aggregate_pulse_rate(pr_values, aggregates)
|
|
413
427
|
|
|
414
|
-
# Save the aggregated
|
|
428
|
+
# Save the aggregated pulse rate estimates
|
|
415
429
|
with open(full_path_to_output, 'w') as json_file:
|
|
416
|
-
json.dump(
|
|
430
|
+
json.dump(df_pr_aggregates, json_file, indent=4)
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import os
|
|
3
2
|
import numpy as np
|
|
4
3
|
import pandas as pd
|
|
5
|
-
from
|
|
6
|
-
from datetime import timedelta
|
|
4
|
+
from datetime import datetime, timedelta
|
|
7
5
|
from dateutil import parser
|
|
8
6
|
from typing import List, Tuple
|
|
9
7
|
|
|
@@ -432,3 +430,61 @@ def merge_predictions_with_timestamps(
|
|
|
432
430
|
df_ts = df_ts.dropna(subset=[pred_proba_colname])
|
|
433
431
|
|
|
434
432
|
return df_ts
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: str) -> pd.DataFrame:
|
|
436
|
+
|
|
437
|
+
"""
|
|
438
|
+
Select hours of interest from the data to include in the aggregation step.
|
|
439
|
+
|
|
440
|
+
Parameters
|
|
441
|
+
----------
|
|
442
|
+
df : pd.DataFrame
|
|
443
|
+
Input data.
|
|
444
|
+
|
|
445
|
+
select_hours_start: str
|
|
446
|
+
The start time of the selected hours in "HH:MM" format.
|
|
447
|
+
|
|
448
|
+
select_hours_end: str
|
|
449
|
+
The end time of the selected hours in "HH:MM" format.
|
|
450
|
+
|
|
451
|
+
Returns
|
|
452
|
+
-------
|
|
453
|
+
pd.DataFrame
|
|
454
|
+
The selected data.
|
|
455
|
+
|
|
456
|
+
"""
|
|
457
|
+
|
|
458
|
+
select_hours_start = datetime.strptime(select_hours_start, '%H:%M').time() # convert to time object
|
|
459
|
+
select_hours_end = datetime.strptime(select_hours_end, '%H:%M').time()
|
|
460
|
+
df_subset = df[df['time_dt'].dt.time.between(select_hours_start, select_hours_end)] # select the hours of interest
|
|
461
|
+
|
|
462
|
+
return df_subset
|
|
463
|
+
|
|
464
|
+
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
465
|
+
|
|
466
|
+
"""
|
|
467
|
+
Select days of interest from the data to include in the aggregation step.
|
|
468
|
+
|
|
469
|
+
Parameters
|
|
470
|
+
----------
|
|
471
|
+
df : pd.DataFrame
|
|
472
|
+
Input data with column 'time_dt' in which the date is stored.
|
|
473
|
+
|
|
474
|
+
min_hours_per_day: int
|
|
475
|
+
The minimum number of hours per day required for including the day in the aggregation step.
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
Returns
|
|
479
|
+
-------
|
|
480
|
+
pd.DataFrame
|
|
481
|
+
The selected data.
|
|
482
|
+
|
|
483
|
+
"""
|
|
484
|
+
|
|
485
|
+
min_s_per_day = min_hours_per_day * 3600
|
|
486
|
+
window_length_s = df['time_dt'].diff().dt.total_seconds()[1] # determine the length of the first window in seconds
|
|
487
|
+
min_windows_per_day = min_s_per_day / window_length_s
|
|
488
|
+
df_subset = df.groupby(df['time_dt'].dt.date).filter(lambda x: len(x) >= min_windows_per_day)
|
|
489
|
+
|
|
490
|
+
return df_subset
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|