paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +38 -21
- paradigma/config.py +187 -123
- paradigma/constants.py +48 -35
- paradigma/feature_extraction.py +345 -255
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +685 -246
- paradigma/pipelines/pulse_rate_pipeline.py +456 -155
- paradigma/pipelines/pulse_rate_utils.py +289 -248
- paradigma/pipelines/tremor_pipeline.py +405 -132
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +500 -163
- paradigma/segmenting.py +180 -140
- paradigma/testing.py +370 -178
- paradigma/util.py +190 -101
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.1.0.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/METADATA +0 -138
- paradigma-1.0.3.dist-info/RECORD +0 -22
|
@@ -1,28 +1,47 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
2
6
|
import numpy as np
|
|
3
|
-
import os
|
|
4
7
|
import pandas as pd
|
|
5
|
-
from pathlib import Path
|
|
6
8
|
from scipy.signal import welch
|
|
7
9
|
from scipy.signal.windows import hamming, hann
|
|
8
|
-
import tsdf
|
|
9
|
-
from typing import List
|
|
10
10
|
|
|
11
11
|
from paradigma.classification import ClassifierPackage
|
|
12
|
+
from paradigma.config import PPGConfig, PulseRateConfig
|
|
12
13
|
from paradigma.constants import DataColumns
|
|
13
|
-
from paradigma.
|
|
14
|
-
|
|
15
|
-
compute_dominant_frequency,
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
from paradigma.feature_extraction import (
|
|
15
|
+
compute_auto_correlation,
|
|
16
|
+
compute_dominant_frequency,
|
|
17
|
+
compute_relative_power,
|
|
18
|
+
compute_signal_to_noise_ratio,
|
|
19
|
+
compute_spectral_entropy,
|
|
20
|
+
compute_statistics,
|
|
21
|
+
)
|
|
22
|
+
from paradigma.pipelines.pulse_rate_utils import (
|
|
23
|
+
assign_sqa_label,
|
|
24
|
+
extract_pr_from_segment,
|
|
25
|
+
extract_pr_segments,
|
|
26
|
+
)
|
|
27
|
+
from paradigma.preprocessing import preprocess_ppg_data
|
|
28
|
+
from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
|
|
18
29
|
from paradigma.util import aggregate_parameter
|
|
19
30
|
|
|
20
|
-
|
|
21
|
-
|
|
31
|
+
|
|
32
|
+
def extract_signal_quality_features(
|
|
33
|
+
df_ppg: pd.DataFrame,
|
|
34
|
+
ppg_config: PulseRateConfig,
|
|
35
|
+
df_acc: pd.DataFrame | None = None,
|
|
36
|
+
acc_config: PulseRateConfig | None = None,
|
|
37
|
+
) -> pd.DataFrame:
|
|
38
|
+
"""
|
|
22
39
|
Extract signal quality features from the PPG signal.
|
|
23
|
-
The features are extracted from the temporal and spectral domain of the
|
|
24
|
-
The temporal domain features include variance, mean, median,
|
|
25
|
-
|
|
40
|
+
The features are extracted from the temporal and spectral domain of the
|
|
41
|
+
PPG signal. The temporal domain features include variance, mean, median,
|
|
42
|
+
kurtosis, skewness, signal-to-noise ratio, and autocorrelation. The
|
|
43
|
+
spectral domain features include the dominant frequency, relative power,
|
|
44
|
+
spectral entropy.
|
|
26
45
|
|
|
27
46
|
Parameters
|
|
28
47
|
----------
|
|
@@ -31,104 +50,143 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
|
|
|
31
50
|
df_acc : pd.DataFrame
|
|
32
51
|
The DataFrame containing the accelerometer signal.
|
|
33
52
|
ppg_config: PulseRateConfig
|
|
34
|
-
The configuration for the signal quality feature extraction of the PPG
|
|
53
|
+
The configuration for the signal quality feature extraction of the PPG
|
|
54
|
+
signal.
|
|
35
55
|
acc_config: PulseRateConfig
|
|
36
|
-
The configuration for the signal quality feature extraction of the
|
|
56
|
+
The configuration for the signal quality feature extraction of the
|
|
57
|
+
accelerometer signal.
|
|
37
58
|
|
|
38
59
|
Returns
|
|
39
60
|
-------
|
|
40
61
|
df_features : pd.DataFrame
|
|
41
62
|
The DataFrame containing the extracted signal quality features.
|
|
42
|
-
|
|
63
|
+
|
|
43
64
|
"""
|
|
44
65
|
# Group sequences of timestamps into windows
|
|
45
|
-
|
|
66
|
+
ppg_windowed_colnames = [ppg_config.time_colname, ppg_config.ppg_colname]
|
|
46
67
|
ppg_windowed = tabulate_windows(
|
|
47
|
-
df=df_ppg,
|
|
48
|
-
columns=
|
|
68
|
+
df=df_ppg,
|
|
69
|
+
columns=ppg_windowed_colnames,
|
|
49
70
|
window_length_s=ppg_config.window_length_s,
|
|
50
71
|
window_step_length_s=ppg_config.window_step_length_s,
|
|
51
|
-
fs=ppg_config.sampling_frequency
|
|
72
|
+
fs=ppg_config.sampling_frequency,
|
|
52
73
|
)
|
|
53
74
|
|
|
54
75
|
# Extract data from the windowed PPG signal
|
|
55
|
-
extractor = WindowedDataExtractor(
|
|
56
|
-
idx_time = extractor.get_index(
|
|
76
|
+
extractor = WindowedDataExtractor(ppg_windowed_colnames)
|
|
77
|
+
idx_time = extractor.get_index(ppg_config.time_colname)
|
|
57
78
|
idx_ppg = extractor.get_index(ppg_config.ppg_colname)
|
|
58
|
-
|
|
79
|
+
# Start time of the window is relative to the first datapoint in the PPG
|
|
80
|
+
# data
|
|
81
|
+
start_time_ppg = np.min(ppg_windowed[:, :, idx_time], axis=1)
|
|
59
82
|
ppg_values_windowed = ppg_windowed[:, :, idx_ppg]
|
|
60
83
|
|
|
61
|
-
|
|
62
|
-
acc_windowed = tabulate_windows(
|
|
63
|
-
df=df_acc,
|
|
64
|
-
columns=acc_windowed_cols,
|
|
65
|
-
window_length_s=acc_config.window_length_s,
|
|
66
|
-
window_step_length_s=acc_config.window_step_length_s,
|
|
67
|
-
fs=acc_config.sampling_frequency
|
|
68
|
-
)
|
|
84
|
+
df_features = pd.DataFrame(start_time_ppg, columns=[ppg_config.time_colname])
|
|
69
85
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
86
|
+
if df_acc is not None and acc_config is not None:
|
|
87
|
+
|
|
88
|
+
acc_windowed_colnames = [
|
|
89
|
+
acc_config.time_colname
|
|
90
|
+
] + acc_config.accelerometer_colnames
|
|
91
|
+
acc_windowed = tabulate_windows(
|
|
92
|
+
df=df_acc,
|
|
93
|
+
columns=acc_windowed_colnames,
|
|
94
|
+
window_length_s=acc_config.window_length_s,
|
|
95
|
+
window_step_length_s=acc_config.window_step_length_s,
|
|
96
|
+
fs=acc_config.sampling_frequency,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Extract data from the windowed accelerometer signal
|
|
100
|
+
extractor = WindowedDataExtractor(acc_windowed_colnames)
|
|
101
|
+
idx_acc = extractor.get_slice(acc_config.accelerometer_colnames)
|
|
102
|
+
acc_values_windowed = acc_windowed[:, :, idx_acc]
|
|
103
|
+
|
|
104
|
+
# Compute periodicity feature of the accelerometer signal
|
|
105
|
+
df_accelerometer_feature = extract_accelerometer_feature(
|
|
106
|
+
acc_values_windowed, ppg_values_windowed, acc_config
|
|
107
|
+
)
|
|
108
|
+
# Combine the accelerometer feature with the previously computed features
|
|
109
|
+
df_features = pd.concat([df_features, df_accelerometer_feature], axis=1)
|
|
74
110
|
|
|
75
|
-
df_features = pd.DataFrame(start_time_ppg, columns=[DataColumns.TIME])
|
|
76
111
|
# Compute features of the temporal domain of the PPG signal
|
|
77
|
-
df_temporal_features = extract_temporal_domain_features(
|
|
78
|
-
|
|
112
|
+
df_temporal_features = extract_temporal_domain_features(
|
|
113
|
+
ppg_values_windowed,
|
|
114
|
+
ppg_config,
|
|
115
|
+
quality_stats=["var", "mean", "median", "kurtosis", "skewness"],
|
|
116
|
+
)
|
|
117
|
+
|
|
79
118
|
# Combine temporal features with the start time
|
|
80
119
|
df_features = pd.concat([df_features, df_temporal_features], axis=1)
|
|
81
120
|
|
|
82
121
|
# Compute features of the spectral domain of the PPG signal
|
|
83
|
-
df_spectral_features = extract_spectral_domain_features(
|
|
122
|
+
df_spectral_features = extract_spectral_domain_features(
|
|
123
|
+
ppg_values_windowed, ppg_config
|
|
124
|
+
)
|
|
84
125
|
|
|
85
126
|
# Combine the spectral features with the previously computed temporal features
|
|
86
127
|
df_features = pd.concat([df_features, df_spectral_features], axis=1)
|
|
87
|
-
|
|
88
|
-
# Compute periodicity feature of the accelerometer signal
|
|
89
|
-
df_accelerometer_feature = extract_accelerometer_feature(acc_values_windowed, ppg_values_windowed, acc_config)
|
|
90
|
-
|
|
91
|
-
# Combine the accelerometer feature with the previously computed features
|
|
92
|
-
df_features = pd.concat([df_features, df_accelerometer_feature], axis=1)
|
|
93
128
|
|
|
94
129
|
return df_features
|
|
95
130
|
|
|
96
131
|
|
|
97
|
-
def signal_quality_classification(
|
|
132
|
+
def signal_quality_classification(
|
|
133
|
+
df: pd.DataFrame, config: PulseRateConfig, clf_package: ClassifierPackage
|
|
134
|
+
) -> pd.DataFrame:
|
|
98
135
|
"""
|
|
99
|
-
Classify the signal quality of the PPG signal using a logistic regression
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
136
|
+
Classify the signal quality of the PPG signal using a logistic regression
|
|
137
|
+
classifier. A probability close to 1 indicates a high-quality signal,
|
|
138
|
+
while a probability close to 0 indicates a low-quality signal. The
|
|
139
|
+
classifier is trained on features extracted from the PPG signal. The
|
|
140
|
+
features are extracted using the extract_signal_quality_features
|
|
141
|
+
function. The accelerometer signal is used to determine the signal
|
|
142
|
+
quality based on the power ratio of the accelerometer signal and returns
|
|
143
|
+
a binary label based on a threshold. A value of 1 on the indicates
|
|
144
|
+
no/minor periodic motion influence of the accelerometer on the PPG
|
|
145
|
+
signal, 0 indicates major periodic motion influence.
|
|
103
146
|
|
|
104
147
|
Parameters
|
|
105
148
|
----------
|
|
106
149
|
df : pd.DataFrame
|
|
107
|
-
The DataFrame containing the PPG features and the accelerometer
|
|
150
|
+
The DataFrame containing the PPG features and the accelerometer
|
|
151
|
+
feature for signal quality classification.
|
|
108
152
|
config : PulseRateConfig
|
|
109
153
|
The configuration for the signal quality classification.
|
|
110
|
-
|
|
111
|
-
The
|
|
154
|
+
clf_package : ClassifierPackage
|
|
155
|
+
The classifier package containing the classifier and scaler.
|
|
112
156
|
|
|
113
157
|
Returns
|
|
114
158
|
-------
|
|
115
159
|
df_sqa pd.DataFrame
|
|
116
|
-
The DataFrame containing the PPG signal quality predictions (both
|
|
160
|
+
The DataFrame containing the PPG signal quality predictions (both
|
|
161
|
+
probabilities of the PPG signal quality classification and the
|
|
162
|
+
accelerometer label based on the threshold).
|
|
117
163
|
"""
|
|
118
|
-
|
|
164
|
+
# Set classifier
|
|
119
165
|
clf = clf_package.classifier # Load the logistic regression classifier
|
|
120
166
|
|
|
121
167
|
# Apply scaling to relevant columns
|
|
122
|
-
scaled_features = clf_package.transform_features(
|
|
168
|
+
scaled_features = clf_package.transform_features(
|
|
169
|
+
df.loc[:, clf.feature_names_in]
|
|
170
|
+
) # Apply scaling to the features
|
|
123
171
|
|
|
124
|
-
# Make predictions for PPG signal quality assessment, and assign the
|
|
172
|
+
# Make predictions for PPG signal quality assessment, and assign the
|
|
173
|
+
# probabilities to the DataFrame and drop the features
|
|
125
174
|
df[DataColumns.PRED_SQA_PROBA] = clf.predict_proba(scaled_features)[:, 0]
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
175
|
+
keep_cols = [config.time_colname, DataColumns.PRED_SQA_PROBA]
|
|
176
|
+
|
|
177
|
+
if DataColumns.ACC_POWER_RATIO in df.columns:
|
|
178
|
+
# Assign accelerometer label to the DataFrame based on the threshold
|
|
179
|
+
df[DataColumns.PRED_SQA_ACC_LABEL] = (
|
|
180
|
+
df[DataColumns.ACC_POWER_RATIO] < config.threshold_sqa_accelerometer
|
|
181
|
+
).astype(int)
|
|
182
|
+
keep_cols += [DataColumns.PRED_SQA_ACC_LABEL]
|
|
129
183
|
|
|
184
|
+
return df[keep_cols]
|
|
130
185
|
|
|
131
|
-
|
|
186
|
+
|
|
187
|
+
def estimate_pulse_rate(
|
|
188
|
+
df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: PulseRateConfig
|
|
189
|
+
) -> pd.DataFrame:
|
|
132
190
|
"""
|
|
133
191
|
Estimate the pulse rate from the PPG signal using the time-frequency domain method.
|
|
134
192
|
|
|
@@ -149,37 +207,58 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
149
207
|
|
|
150
208
|
# Extract NumPy arrays for faster operations
|
|
151
209
|
ppg_post_prob = df_sqa[DataColumns.PRED_SQA_PROBA].to_numpy()
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
210
|
+
|
|
211
|
+
if DataColumns.PRED_SQA_ACC_LABEL in df_sqa.columns:
|
|
212
|
+
acc_label = df_sqa[DataColumns.PRED_SQA_ACC_LABEL].to_numpy()
|
|
213
|
+
else:
|
|
214
|
+
acc_label = None
|
|
215
|
+
|
|
216
|
+
ppg_preprocessed = df_ppg_preprocessed.values
|
|
217
|
+
time_idx = df_ppg_preprocessed.columns.get_loc(
|
|
218
|
+
config.time_colname
|
|
219
|
+
) # Get the index of the time column
|
|
220
|
+
ppg_idx = df_ppg_preprocessed.columns.get_loc(
|
|
221
|
+
config.ppg_colname
|
|
222
|
+
) # Get the index of the PPG column
|
|
223
|
+
|
|
157
224
|
# Assign window-level probabilities to individual samples
|
|
158
|
-
sqa_label = assign_sqa_label(
|
|
159
|
-
|
|
160
|
-
|
|
225
|
+
sqa_label = assign_sqa_label(
|
|
226
|
+
ppg_post_prob, config, acc_label
|
|
227
|
+
) # assigns a signal quality label to every individual data point
|
|
228
|
+
v_start_idx, v_end_idx = extract_pr_segments(
|
|
229
|
+
sqa_label, config.min_pr_samples
|
|
230
|
+
) # extracts pulse rate segments based on the SQA label
|
|
231
|
+
|
|
161
232
|
v_pr_rel = np.array([])
|
|
162
233
|
t_pr_rel = np.array([])
|
|
163
234
|
|
|
164
|
-
edge_add =
|
|
235
|
+
edge_add = (
|
|
236
|
+
2 * config.sampling_frequency
|
|
237
|
+
) # Add 2s on both sides of the segment for PR estimation
|
|
165
238
|
step_size = config.pr_est_samples # Step size for PR estimation
|
|
166
239
|
|
|
167
240
|
# Estimate the maximum size for preallocation
|
|
168
|
-
valid_segments = (v_start_idx >= edge_add) & (
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
241
|
+
valid_segments = (v_start_idx >= edge_add) & (
|
|
242
|
+
v_end_idx <= len(ppg_preprocessed) - edge_add
|
|
243
|
+
) # check if the segments are valid, e.g. not too close to the edges (2s)
|
|
244
|
+
valid_start_idx = v_start_idx[valid_segments] # get the valid start indices
|
|
245
|
+
valid_end_idx = v_end_idx[valid_segments] # get the valid end indices
|
|
246
|
+
max_size = np.sum(
|
|
247
|
+
(valid_end_idx - valid_start_idx) // step_size
|
|
248
|
+
) # maximum size for preallocation
|
|
249
|
+
|
|
173
250
|
# Preallocate arrays
|
|
174
|
-
v_pr_rel = np.empty(max_size, dtype=float)
|
|
175
|
-
t_pr_rel = np.empty(max_size, dtype=float)
|
|
251
|
+
v_pr_rel = np.empty(max_size, dtype=float)
|
|
252
|
+
t_pr_rel = np.empty(max_size, dtype=float)
|
|
176
253
|
|
|
177
254
|
# Track current position
|
|
178
255
|
pr_pos = 0
|
|
179
256
|
|
|
180
257
|
for start_idx, end_idx in zip(valid_start_idx, valid_end_idx):
|
|
181
258
|
# Extract extended PPG segment
|
|
182
|
-
extended_ppg_segment = ppg_preprocessed[
|
|
259
|
+
extended_ppg_segment = ppg_preprocessed[
|
|
260
|
+
start_idx - edge_add : end_idx + edge_add, ppg_idx
|
|
261
|
+
]
|
|
183
262
|
|
|
184
263
|
# Estimate pulse rate
|
|
185
264
|
pr_est = extract_pr_from_segment(
|
|
@@ -190,14 +269,16 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
190
269
|
config.kern_params,
|
|
191
270
|
)
|
|
192
271
|
n_pr = len(pr_est) # Number of pulse rate estimates
|
|
193
|
-
|
|
272
|
+
# Calculate end index for time, different from end_idx since it is
|
|
273
|
+
# always a multiple of step_size, while end_idx is not
|
|
274
|
+
end_idx_time = n_pr * step_size + start_idx
|
|
194
275
|
|
|
195
276
|
# Extract relative time for PR estimates
|
|
196
|
-
pr_time = ppg_preprocessed[start_idx
|
|
277
|
+
pr_time = ppg_preprocessed[start_idx:end_idx_time:step_size, time_idx]
|
|
197
278
|
|
|
198
279
|
# Insert into preallocated arrays
|
|
199
|
-
v_pr_rel[pr_pos:pr_pos + n_pr] = pr_est
|
|
200
|
-
t_pr_rel[pr_pos:pr_pos + n_pr] = pr_time
|
|
280
|
+
v_pr_rel[pr_pos : pr_pos + n_pr] = pr_est
|
|
281
|
+
t_pr_rel[pr_pos : pr_pos + n_pr] = pr_time
|
|
201
282
|
pr_pos += n_pr
|
|
202
283
|
|
|
203
284
|
df_pr = pd.DataFrame({"time": t_pr_rel, "pulse_rate": v_pr_rel})
|
|
@@ -205,7 +286,9 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
|
|
|
205
286
|
return df_pr
|
|
206
287
|
|
|
207
288
|
|
|
208
|
-
def aggregate_pulse_rate(
|
|
289
|
+
def aggregate_pulse_rate(
|
|
290
|
+
pr_values: np.ndarray, aggregates: list[str] = ["mode", "99p"]
|
|
291
|
+
) -> dict:
|
|
209
292
|
"""
|
|
210
293
|
Aggregate the pulse rate estimates using the specified aggregation methods.
|
|
211
294
|
|
|
@@ -214,7 +297,8 @@ def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode',
|
|
|
214
297
|
pr_values : np.ndarray
|
|
215
298
|
The array containing the pulse rate estimates
|
|
216
299
|
aggregates : List[str]
|
|
217
|
-
The list of aggregation methods to be used for the pulse rate
|
|
300
|
+
The list of aggregation methods to be used for the pulse rate
|
|
301
|
+
estimates. The default is ['mode', '99p'].
|
|
218
302
|
|
|
219
303
|
Returns
|
|
220
304
|
-------
|
|
@@ -226,24 +310,26 @@ def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode',
|
|
|
226
310
|
|
|
227
311
|
# Initialize the dictionary for the aggregated results with the metadata
|
|
228
312
|
aggregated_results = {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
'pr_aggregates': {}
|
|
233
|
-
}
|
|
313
|
+
"metadata": {"nr_pr_est": len(pr_values)},
|
|
314
|
+
"pr_aggregates": {},
|
|
315
|
+
}
|
|
234
316
|
for aggregate in aggregates:
|
|
235
|
-
aggregated_results[
|
|
317
|
+
aggregated_results["pr_aggregates"][f"{aggregate}_{DataColumns.PULSE_RATE}"] = (
|
|
318
|
+
aggregate_parameter(pr_values, aggregate)
|
|
319
|
+
)
|
|
236
320
|
|
|
237
321
|
return aggregated_results
|
|
238
322
|
|
|
239
323
|
|
|
240
324
|
def extract_temporal_domain_features(
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
325
|
+
ppg_windowed: np.ndarray,
|
|
326
|
+
config: PulseRateConfig,
|
|
327
|
+
quality_stats: list[str] = ["mean", "std"],
|
|
328
|
+
) -> pd.DataFrame:
|
|
245
329
|
"""
|
|
246
|
-
Compute temporal domain features for the ppg signal. The features are
|
|
330
|
+
Compute temporal domain features for the ppg signal. The features are
|
|
331
|
+
added to the dataframe. Therefore the original dataframe is modified,
|
|
332
|
+
and the modified dataframe is returned.
|
|
247
333
|
|
|
248
334
|
Parameters
|
|
249
335
|
----------
|
|
@@ -254,31 +340,36 @@ def extract_temporal_domain_features(
|
|
|
254
340
|
The configuration object containing the parameters for the feature extraction
|
|
255
341
|
|
|
256
342
|
quality_stats: list, optional
|
|
257
|
-
The statistics to be computed for the gravity component of the
|
|
258
|
-
|
|
343
|
+
The statistics to be computed for the gravity component of the
|
|
344
|
+
accelerometer signal (default: ['mean', 'std'])
|
|
345
|
+
|
|
259
346
|
Returns
|
|
260
347
|
-------
|
|
261
348
|
pd.DataFrame
|
|
262
349
|
The dataframe with the added temporal domain features.
|
|
263
350
|
"""
|
|
264
|
-
|
|
351
|
+
|
|
265
352
|
feature_dict = {}
|
|
266
353
|
for stat in quality_stats:
|
|
267
354
|
feature_dict[stat] = compute_statistics(ppg_windowed, stat, abs_stats=True)
|
|
268
|
-
|
|
269
|
-
feature_dict[
|
|
270
|
-
feature_dict[
|
|
355
|
+
|
|
356
|
+
feature_dict["signal_to_noise"] = compute_signal_to_noise_ratio(ppg_windowed)
|
|
357
|
+
feature_dict["auto_corr"] = compute_auto_correlation(
|
|
358
|
+
ppg_windowed, config.sampling_frequency
|
|
359
|
+
)
|
|
271
360
|
return pd.DataFrame(feature_dict)
|
|
272
361
|
|
|
273
362
|
|
|
274
363
|
def extract_spectral_domain_features(
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
364
|
+
ppg_windowed: np.ndarray,
|
|
365
|
+
config: PulseRateConfig,
|
|
366
|
+
) -> pd.DataFrame:
|
|
278
367
|
"""
|
|
279
|
-
Calculate the spectral features (dominant frequency, relative power, and
|
|
280
|
-
for each segment of a PPG signal using a single
|
|
281
|
-
|
|
368
|
+
Calculate the spectral features (dominant frequency, relative power, and
|
|
369
|
+
spectral entropy) for each segment of a PPG signal using a single
|
|
370
|
+
Welch's method computation. The features are added to the dataframe.
|
|
371
|
+
Therefore the original dataframe is modified, and the modified dataframe
|
|
372
|
+
is returned.
|
|
282
373
|
|
|
283
374
|
Parameters
|
|
284
375
|
----------
|
|
@@ -295,7 +386,7 @@ def extract_spectral_domain_features(
|
|
|
295
386
|
"""
|
|
296
387
|
d_features = {}
|
|
297
388
|
|
|
298
|
-
window = hamming(config.window_length_welch, sym
|
|
389
|
+
window = hamming(config.window_length_welch, sym=True)
|
|
299
390
|
|
|
300
391
|
n_samples_window = ppg_windowed.shape[1]
|
|
301
392
|
|
|
@@ -306,23 +397,23 @@ def extract_spectral_domain_features(
|
|
|
306
397
|
noverlap=config.overlap_welch_window,
|
|
307
398
|
nfft=max(256, 2 ** int(np.log2(n_samples_window))),
|
|
308
399
|
detrend=False,
|
|
309
|
-
axis=1
|
|
400
|
+
axis=1,
|
|
310
401
|
)
|
|
311
402
|
|
|
312
403
|
# Calculate each feature using the computed PSD and frequency array
|
|
313
|
-
d_features[
|
|
314
|
-
d_features[
|
|
315
|
-
d_features[
|
|
404
|
+
d_features["f_dom"] = compute_dominant_frequency(freqs, psd)
|
|
405
|
+
d_features["rel_power"] = compute_relative_power(freqs, psd, config)
|
|
406
|
+
d_features["spectral_entropy"] = compute_spectral_entropy(psd, n_samples_window)
|
|
316
407
|
|
|
317
408
|
return pd.DataFrame(d_features)
|
|
318
409
|
|
|
319
410
|
|
|
320
411
|
def extract_acc_power_feature(
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
412
|
+
f1: np.ndarray,
|
|
413
|
+
psd_acc: np.ndarray,
|
|
414
|
+
f2: np.ndarray,
|
|
415
|
+
psd_ppg: np.ndarray,
|
|
416
|
+
) -> np.ndarray:
|
|
326
417
|
"""
|
|
327
418
|
Extract the accelerometer power feature in the PPG frequency range.
|
|
328
419
|
|
|
@@ -330,11 +421,11 @@ def extract_acc_power_feature(
|
|
|
330
421
|
----------
|
|
331
422
|
f1: np.ndarray
|
|
332
423
|
The frequency bins of the accelerometer signal.
|
|
333
|
-
|
|
424
|
+
psd_acc: np.ndarray
|
|
334
425
|
The power spectral density of the accelerometer signal.
|
|
335
426
|
f2: np.ndarray
|
|
336
427
|
The frequency bins of the PPG signal.
|
|
337
|
-
|
|
428
|
+
psd_ppg: np.ndarray
|
|
338
429
|
The power spectral density of the PPG signal.
|
|
339
430
|
|
|
340
431
|
Returns
|
|
@@ -342,48 +433,54 @@ def extract_acc_power_feature(
|
|
|
342
433
|
np.ndarray
|
|
343
434
|
The accelerometer power feature in the PPG frequency range
|
|
344
435
|
"""
|
|
345
|
-
|
|
436
|
+
|
|
346
437
|
# Find the index of the maximum PSD value in the PPG signal
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
438
|
+
max_ppg_psd_idx = np.argmax(psd_ppg, axis=1)
|
|
439
|
+
max_ppg_freq_psd = f2[max_ppg_psd_idx]
|
|
440
|
+
|
|
350
441
|
# Find the neighboring indices of the maximum PSD value in the PPG signal
|
|
351
|
-
df_idx = np.column_stack(
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
442
|
+
df_idx = np.column_stack(
|
|
443
|
+
(max_ppg_psd_idx - 1, max_ppg_psd_idx, max_ppg_psd_idx + 1)
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Find the index of the closest frequency in the accelerometer signal
|
|
447
|
+
# to the first harmonic of the PPG frequency
|
|
448
|
+
corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_ppg_freq_psd * 2), axis=0)
|
|
449
|
+
fh_idx = np.column_stack(
|
|
450
|
+
(corr_acc_psd_fh_idx - 1, corr_acc_psd_fh_idx, corr_acc_psd_fh_idx + 1)
|
|
451
|
+
)
|
|
452
|
+
|
|
357
453
|
# Compute the power in the ranges corresponding to the PPG frequency
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
454
|
+
acc_power_ppg_range = np.trapezoid(
|
|
455
|
+
psd_acc[np.arange(psd_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1
|
|
456
|
+
) + np.trapezoid(
|
|
457
|
+
psd_acc[np.arange(psd_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1
|
|
361
458
|
)
|
|
362
459
|
|
|
363
460
|
# Compute the total power across the entire frequency range
|
|
364
|
-
acc_power_total = np.
|
|
365
|
-
|
|
461
|
+
acc_power_total = np.trapezoid(psd_acc, f1)
|
|
462
|
+
|
|
366
463
|
# Compute the power ratio of the accelerometer signal in the PPG frequency range
|
|
367
|
-
acc_power_ratio =
|
|
368
|
-
|
|
464
|
+
acc_power_ratio = acc_power_ppg_range / acc_power_total
|
|
465
|
+
|
|
369
466
|
return acc_power_ratio
|
|
370
467
|
|
|
468
|
+
|
|
371
469
|
def extract_accelerometer_feature(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
config: PulseRateConfig
|
|
375
|
-
) -> pd.DataFrame:
|
|
470
|
+
acc_windowed: np.ndarray, ppg_windowed: np.ndarray, config: PulseRateConfig
|
|
471
|
+
) -> pd.DataFrame:
|
|
376
472
|
"""
|
|
377
|
-
Extract accelerometer features from the accelerometer signal in the PPG
|
|
378
|
-
|
|
473
|
+
Extract accelerometer features from the accelerometer signal in the PPG
|
|
474
|
+
frequency range.
|
|
475
|
+
|
|
379
476
|
Parameters
|
|
380
|
-
----------
|
|
477
|
+
----------
|
|
381
478
|
acc_windowed: np.ndarray
|
|
382
479
|
The dataframe containing the windowed accelerometer signal
|
|
383
480
|
|
|
384
481
|
ppg_windowed: np.ndarray
|
|
385
482
|
The dataframe containing the corresponding windowed ppg signal
|
|
386
|
-
|
|
483
|
+
|
|
387
484
|
config: PulseRateConfig
|
|
388
485
|
The configuration object containing the parameters for the feature extraction
|
|
389
486
|
|
|
@@ -392,21 +489,21 @@ def extract_accelerometer_feature(
|
|
|
392
489
|
pd.DataFrame
|
|
393
490
|
The dataframe with the relative power accelerometer feature.
|
|
394
491
|
"""
|
|
395
|
-
|
|
396
|
-
if config.sensor not in [
|
|
492
|
+
|
|
493
|
+
if config.sensor not in ["imu", "ppg"]:
|
|
397
494
|
raise ValueError("Sensor not recognized.")
|
|
398
|
-
|
|
495
|
+
|
|
399
496
|
d_freq = {}
|
|
400
497
|
d_psd = {}
|
|
401
|
-
for sensor in [
|
|
498
|
+
for sensor in ["imu", "ppg"]:
|
|
402
499
|
config.set_sensor(sensor)
|
|
403
500
|
|
|
404
|
-
if sensor ==
|
|
501
|
+
if sensor == "imu":
|
|
405
502
|
windows = acc_windowed
|
|
406
503
|
else:
|
|
407
504
|
windows = ppg_windowed
|
|
408
505
|
|
|
409
|
-
window_type = hann(config.window_length_welch, sym
|
|
506
|
+
window_type = hann(config.window_length_welch, sym=True)
|
|
410
507
|
d_freq[sensor], d_psd[sensor] = welch(
|
|
411
508
|
windows,
|
|
412
509
|
fs=config.sampling_frequency,
|
|
@@ -414,13 +511,217 @@ def extract_accelerometer_feature(
|
|
|
414
511
|
noverlap=config.overlap_welch_window,
|
|
415
512
|
nfft=config.nfft,
|
|
416
513
|
detrend=False,
|
|
417
|
-
axis=1
|
|
514
|
+
axis=1,
|
|
418
515
|
)
|
|
419
516
|
|
|
420
|
-
d_psd[
|
|
517
|
+
d_psd["imu"] = np.sum(d_psd["imu"], axis=2) # Sum the PSDs of the three axes
|
|
421
518
|
|
|
422
|
-
acc_power_ratio = extract_acc_power_feature(
|
|
519
|
+
acc_power_ratio = extract_acc_power_feature(
|
|
520
|
+
d_freq["imu"], d_psd["imu"], d_freq["ppg"], d_psd["ppg"]
|
|
521
|
+
)
|
|
423
522
|
|
|
424
|
-
return pd.DataFrame(acc_power_ratio, columns=[
|
|
523
|
+
return pd.DataFrame(acc_power_ratio, columns=["acc_power_ratio"])
|
|
425
524
|
|
|
426
525
|
|
|
526
|
+
def run_pulse_rate_pipeline(
|
|
527
|
+
df_ppg_prepared: pd.DataFrame,
|
|
528
|
+
output_dir: str | Path,
|
|
529
|
+
store_intermediate: list[str] = [],
|
|
530
|
+
pulse_rate_config: PulseRateConfig | None = None,
|
|
531
|
+
ppg_config: PPGConfig | None = None,
|
|
532
|
+
logging_level: int = logging.INFO,
|
|
533
|
+
custom_logger: logging.Logger | None = None,
|
|
534
|
+
) -> pd.DataFrame:
|
|
535
|
+
"""
|
|
536
|
+
High-level pulse rate analysis pipeline for a single segment.
|
|
537
|
+
|
|
538
|
+
This function implements the complete pulse rate analysis workflow from the
|
|
539
|
+
pulse rate tutorial:
|
|
540
|
+
1. Preprocess PPG and accelerometer data (accelerometer is optional)
|
|
541
|
+
2. Extract signal quality features
|
|
542
|
+
3. Signal quality classification
|
|
543
|
+
4. Pulse rate estimation
|
|
544
|
+
5. Quantify pulse rate (select relevant columns)
|
|
545
|
+
|
|
546
|
+
Parameters
|
|
547
|
+
----------
|
|
548
|
+
df_ppg_prepared : pd.DataFrame
|
|
549
|
+
Prepared sensor data with time and PPG column.
|
|
550
|
+
output_dir : str or Path
|
|
551
|
+
Output directory for intermediate results (required)
|
|
552
|
+
store_intermediate : list of str, default []
|
|
553
|
+
Which intermediate results to store.
|
|
554
|
+
pulse_rate_config : PulseRateConfig, optional
|
|
555
|
+
Pulse rate analysis configuration
|
|
556
|
+
ppg_config : PPGConfig, optional
|
|
557
|
+
PPG preprocessing configuration
|
|
558
|
+
logging_level : int, default logging.INFO
|
|
559
|
+
Logging level using standard logging constants
|
|
560
|
+
custom_logger : logging.Logger, optional
|
|
561
|
+
Custom logger instance
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
pd.DataFrame
|
|
566
|
+
Quantified pulse rate data with columns:
|
|
567
|
+
- time: timestamp
|
|
568
|
+
- pulse_rate: pulse rate estimate
|
|
569
|
+
- signal_quality: quality assessment (if available)
|
|
570
|
+
"""
|
|
571
|
+
# Setup logger
|
|
572
|
+
active_logger = (
|
|
573
|
+
custom_logger if custom_logger is not None else logging.getLogger(__name__)
|
|
574
|
+
)
|
|
575
|
+
if custom_logger is None:
|
|
576
|
+
active_logger.setLevel(logging_level)
|
|
577
|
+
|
|
578
|
+
if pulse_rate_config is None:
|
|
579
|
+
pulse_rate_config = PulseRateConfig()
|
|
580
|
+
if ppg_config is None:
|
|
581
|
+
ppg_config = PPGConfig()
|
|
582
|
+
|
|
583
|
+
output_dir = Path(output_dir)
|
|
584
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
585
|
+
|
|
586
|
+
# Validate input data columns (PPG is required, accelerometer is optional)
|
|
587
|
+
required_columns = [DataColumns.TIME, DataColumns.PPG]
|
|
588
|
+
missing_columns = [
|
|
589
|
+
col for col in required_columns if col not in df_ppg_prepared.columns
|
|
590
|
+
]
|
|
591
|
+
if missing_columns:
|
|
592
|
+
active_logger.warning(
|
|
593
|
+
f"Missing required columns for pulse rate pipeline: {missing_columns}"
|
|
594
|
+
)
|
|
595
|
+
return pd.DataFrame()
|
|
596
|
+
|
|
597
|
+
# Step 1: Preprocess PPG and accelerometer data (following tutorial)
|
|
598
|
+
active_logger.info("Step 1: Preprocessing PPG and accelerometer data")
|
|
599
|
+
try:
|
|
600
|
+
# Separate PPG data (always available)
|
|
601
|
+
ppg_cols = [DataColumns.TIME, DataColumns.PPG]
|
|
602
|
+
df_ppg = df_ppg_prepared[ppg_cols].copy()
|
|
603
|
+
|
|
604
|
+
# Preprocess the data
|
|
605
|
+
df_ppg_proc, _ = preprocess_ppg_data(
|
|
606
|
+
df_ppg=df_ppg,
|
|
607
|
+
ppg_config=ppg_config,
|
|
608
|
+
verbose=1 if logging_level <= logging.INFO else 0,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
if "preprocessing" in store_intermediate:
|
|
612
|
+
preprocessing_dir = output_dir / "preprocessing"
|
|
613
|
+
preprocessing_dir.mkdir(exist_ok=True)
|
|
614
|
+
df_ppg_proc.to_parquet(preprocessing_dir / "ppg_preprocessed.parquet")
|
|
615
|
+
active_logger.info(f"Saved preprocessed data to {preprocessing_dir}")
|
|
616
|
+
|
|
617
|
+
except Exception as e:
|
|
618
|
+
active_logger.error(f"Preprocessing failed: {e}")
|
|
619
|
+
return pd.DataFrame()
|
|
620
|
+
|
|
621
|
+
# Step 2: Extract signal quality features
|
|
622
|
+
active_logger.info("Step 2: Extracting signal quality features")
|
|
623
|
+
try:
|
|
624
|
+
df_features = extract_signal_quality_features(df_ppg_proc, pulse_rate_config)
|
|
625
|
+
|
|
626
|
+
if "pulse_rate" in store_intermediate:
|
|
627
|
+
pulse_rate_dir = output_dir / "pulse_rate"
|
|
628
|
+
pulse_rate_dir.mkdir(exist_ok=True)
|
|
629
|
+
df_features.to_parquet(pulse_rate_dir / "signal_quality_features.parquet")
|
|
630
|
+
active_logger.info(f"Saved signal quality features to {pulse_rate_dir}")
|
|
631
|
+
|
|
632
|
+
except Exception as e:
|
|
633
|
+
active_logger.error(f"Feature extraction failed: {e}")
|
|
634
|
+
return pd.DataFrame()
|
|
635
|
+
|
|
636
|
+
# Step 3: Signal quality classification
|
|
637
|
+
active_logger.info("Step 3: Signal quality classification")
|
|
638
|
+
try:
|
|
639
|
+
classifier_path = files("paradigma.assets") / "ppg_quality_clf_package.pkl"
|
|
640
|
+
classifier_package = ClassifierPackage.load(classifier_path)
|
|
641
|
+
|
|
642
|
+
df_classified = signal_quality_classification(
|
|
643
|
+
df_features, pulse_rate_config, classifier_package
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
except Exception as e:
|
|
647
|
+
active_logger.error(f"Signal quality classification failed: {e}")
|
|
648
|
+
return pd.DataFrame()
|
|
649
|
+
|
|
650
|
+
# Step 4: Pulse rate estimation
|
|
651
|
+
active_logger.info("Step 4: Pulse rate estimation")
|
|
652
|
+
try:
|
|
653
|
+
df_pulse_rates = estimate_pulse_rate(
|
|
654
|
+
df_sqa=df_classified,
|
|
655
|
+
df_ppg_preprocessed=df_ppg_proc,
|
|
656
|
+
config=pulse_rate_config,
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
except Exception as e:
|
|
660
|
+
active_logger.error(f"Pulse rate estimation failed: {e}")
|
|
661
|
+
return pd.DataFrame()
|
|
662
|
+
|
|
663
|
+
# Step 5: Quantify pulse rate (select relevant columns and apply quality filtering)
|
|
664
|
+
active_logger.info("Step 5: Quantifying pulse rate")
|
|
665
|
+
|
|
666
|
+
# Select quantification columns
|
|
667
|
+
quantification_columns = []
|
|
668
|
+
if DataColumns.TIME in df_pulse_rates.columns:
|
|
669
|
+
quantification_columns.append(DataColumns.TIME)
|
|
670
|
+
if DataColumns.PULSE_RATE in df_pulse_rates.columns:
|
|
671
|
+
quantification_columns.append(DataColumns.PULSE_RATE)
|
|
672
|
+
if "signal_quality" in df_pulse_rates.columns:
|
|
673
|
+
quantification_columns.append("signal_quality")
|
|
674
|
+
|
|
675
|
+
# Use available columns
|
|
676
|
+
available_columns = [
|
|
677
|
+
col for col in quantification_columns if col in df_pulse_rates.columns
|
|
678
|
+
]
|
|
679
|
+
if not available_columns:
|
|
680
|
+
active_logger.warning("No valid quantification columns found")
|
|
681
|
+
return pd.DataFrame()
|
|
682
|
+
|
|
683
|
+
df_quantification = df_pulse_rates[available_columns].copy()
|
|
684
|
+
|
|
685
|
+
# Apply quality filtering if signal quality is available
|
|
686
|
+
if (
|
|
687
|
+
"signal_quality" in df_quantification.columns
|
|
688
|
+
and DataColumns.PULSE_RATE in df_quantification.columns
|
|
689
|
+
):
|
|
690
|
+
quality_threshold = getattr(pulse_rate_config, "threshold_sqa", 0.5)
|
|
691
|
+
low_quality_mask = df_quantification["signal_quality"] < quality_threshold
|
|
692
|
+
df_quantification.loc[low_quality_mask, DataColumns.PULSE_RATE] = np.nan
|
|
693
|
+
|
|
694
|
+
if "quantification" in store_intermediate:
|
|
695
|
+
quantification_dir = output_dir / "quantification"
|
|
696
|
+
quantification_dir.mkdir(exist_ok=True)
|
|
697
|
+
df_quantification.to_parquet(
|
|
698
|
+
quantification_dir / "pulse_rate_quantification.parquet"
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Save quantification metadata
|
|
702
|
+
valid_pulse_rates = (
|
|
703
|
+
df_quantification[DataColumns.PULSE_RATE].dropna()
|
|
704
|
+
if DataColumns.PULSE_RATE in df_quantification.columns
|
|
705
|
+
else pd.Series(dtype=float)
|
|
706
|
+
)
|
|
707
|
+
quantification_meta = {
|
|
708
|
+
"total_windows": len(df_quantification),
|
|
709
|
+
"valid_pulse_rate_estimates": len(valid_pulse_rates),
|
|
710
|
+
"columns": list(df_quantification.columns),
|
|
711
|
+
}
|
|
712
|
+
with open(quantification_dir / "pulse_rate_quantification_meta.json", "w") as f:
|
|
713
|
+
json.dump(quantification_meta, f, indent=2)
|
|
714
|
+
|
|
715
|
+
active_logger.info(f"Saved pulse rate quantification to {quantification_dir}")
|
|
716
|
+
|
|
717
|
+
pulse_rate_estimates = (
|
|
718
|
+
len(df_quantification[DataColumns.PULSE_RATE].dropna())
|
|
719
|
+
if DataColumns.PULSE_RATE in df_quantification.columns
|
|
720
|
+
else 0
|
|
721
|
+
)
|
|
722
|
+
active_logger.info(
|
|
723
|
+
f"Pulse rate analysis completed: {pulse_rate_estimates} valid pulse "
|
|
724
|
+
f"rate estimates from {len(df_quantification)} total windows"
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
return df_quantification
|