paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +14 -14
- paradigma/config.py +38 -29
- paradigma/constants.py +10 -2
- paradigma/feature_extraction.py +106 -75
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +488 -97
- paradigma/pipelines/pulse_rate_pipeline.py +278 -46
- paradigma/pipelines/pulse_rate_utils.py +176 -137
- paradigma/pipelines/tremor_pipeline.py +292 -72
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +345 -77
- paradigma/segmenting.py +57 -42
- paradigma/testing.py +14 -9
- paradigma/util.py +36 -22
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/METADATA +0 -140
- paradigma-1.0.4.dist-info/RECORD +0 -23
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
2
5
|
|
|
3
6
|
import numpy as np
|
|
4
7
|
import pandas as pd
|
|
@@ -6,7 +9,7 @@ from scipy.signal import welch
|
|
|
6
9
|
from scipy.signal.windows import hamming, hann
|
|
7
10
|
|
|
8
11
|
from paradigma.classification import ClassifierPackage
|
|
9
|
-
from paradigma.config import PulseRateConfig
|
|
12
|
+
from paradigma.config import PPGConfig, PulseRateConfig
|
|
10
13
|
from paradigma.constants import DataColumns
|
|
11
14
|
from paradigma.feature_extraction import (
|
|
12
15
|
compute_auto_correlation,
|
|
@@ -21,6 +24,7 @@ from paradigma.pipelines.pulse_rate_utils import (
|
|
|
21
24
|
extract_pr_from_segment,
|
|
22
25
|
extract_pr_segments,
|
|
23
26
|
)
|
|
27
|
+
from paradigma.preprocessing import preprocess_ppg_data
|
|
24
28
|
from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
|
|
25
29
|
from paradigma.util import aggregate_parameter
|
|
26
30
|
|
|
@@ -33,9 +37,11 @@ def extract_signal_quality_features(
|
|
|
33
37
|
) -> pd.DataFrame:
|
|
34
38
|
"""
|
|
35
39
|
Extract signal quality features from the PPG signal.
|
|
36
|
-
The features are extracted from the temporal and spectral domain of the
|
|
37
|
-
The temporal domain features include variance, mean, median,
|
|
38
|
-
|
|
40
|
+
The features are extracted from the temporal and spectral domain of the
|
|
41
|
+
PPG signal. The temporal domain features include variance, mean, median,
|
|
42
|
+
kurtosis, skewness, signal-to-noise ratio, and autocorrelation. The
|
|
43
|
+
spectral domain features include the dominant frequency, relative power,
|
|
44
|
+
spectral entropy.
|
|
39
45
|
|
|
40
46
|
Parameters
|
|
41
47
|
----------
|
|
@@ -44,9 +50,11 @@ def extract_signal_quality_features(
|
|
|
44
50
|
df_acc : pd.DataFrame
|
|
45
51
|
The DataFrame containing the accelerometer signal.
|
|
46
52
|
ppg_config: PulseRateConfig
|
|
47
|
-
The configuration for the signal quality feature extraction of the PPG
|
|
53
|
+
The configuration for the signal quality feature extraction of the PPG
|
|
54
|
+
signal.
|
|
48
55
|
acc_config: PulseRateConfig
|
|
49
|
-
The configuration for the signal quality feature extraction of the
|
|
56
|
+
The configuration for the signal quality feature extraction of the
|
|
57
|
+
accelerometer signal.
|
|
50
58
|
|
|
51
59
|
Returns
|
|
52
60
|
-------
|
|
@@ -68,9 +76,9 @@ def extract_signal_quality_features(
|
|
|
68
76
|
extractor = WindowedDataExtractor(ppg_windowed_colnames)
|
|
69
77
|
idx_time = extractor.get_index(ppg_config.time_colname)
|
|
70
78
|
idx_ppg = extractor.get_index(ppg_config.ppg_colname)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
79
|
+
# Start time of the window is relative to the first datapoint in the PPG
|
|
80
|
+
# data
|
|
81
|
+
start_time_ppg = np.min(ppg_windowed[:, :, idx_time], axis=1)
|
|
74
82
|
ppg_values_windowed = ppg_windowed[:, :, idx_ppg]
|
|
75
83
|
|
|
76
84
|
df_features = pd.DataFrame(start_time_ppg, columns=[ppg_config.time_colname])
|
|
@@ -125,15 +133,22 @@ def signal_quality_classification(
|
|
|
125
133
|
df: pd.DataFrame, config: PulseRateConfig, clf_package: ClassifierPackage
|
|
126
134
|
) -> pd.DataFrame:
|
|
127
135
|
"""
|
|
128
|
-
Classify the signal quality of the PPG signal using a logistic regression
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
136
|
+
Classify the signal quality of the PPG signal using a logistic regression
|
|
137
|
+
classifier. A probability close to 1 indicates a high-quality signal,
|
|
138
|
+
while a probability close to 0 indicates a low-quality signal. The
|
|
139
|
+
classifier is trained on features extracted from the PPG signal. The
|
|
140
|
+
features are extracted using the extract_signal_quality_features
|
|
141
|
+
function. The accelerometer signal is used to determine the signal
|
|
142
|
+
quality based on the power ratio of the accelerometer signal and returns
|
|
143
|
+
a binary label based on a threshold. A value of 1 on the indicates
|
|
144
|
+
no/minor periodic motion influence of the accelerometer on the PPG
|
|
145
|
+
signal, 0 indicates major periodic motion influence.
|
|
132
146
|
|
|
133
147
|
Parameters
|
|
134
148
|
----------
|
|
135
149
|
df : pd.DataFrame
|
|
136
|
-
The DataFrame containing the PPG features and the accelerometer
|
|
150
|
+
The DataFrame containing the PPG features and the accelerometer
|
|
151
|
+
feature for signal quality classification.
|
|
137
152
|
config : PulseRateConfig
|
|
138
153
|
The configuration for the signal quality classification.
|
|
139
154
|
clf_package : ClassifierPackage
|
|
@@ -142,7 +157,9 @@ def signal_quality_classification(
|
|
|
142
157
|
Returns
|
|
143
158
|
-------
|
|
144
159
|
df_sqa pd.DataFrame
|
|
145
|
-
The DataFrame containing the PPG signal quality predictions (both
|
|
160
|
+
The DataFrame containing the PPG signal quality predictions (both
|
|
161
|
+
probabilities of the PPG signal quality classification and the
|
|
162
|
+
accelerometer label based on the threshold).
|
|
146
163
|
"""
|
|
147
164
|
# Set classifier
|
|
148
165
|
clf = clf_package.classifier # Load the logistic regression classifier
|
|
@@ -152,16 +169,16 @@ def signal_quality_classification(
|
|
|
152
169
|
df.loc[:, clf.feature_names_in]
|
|
153
170
|
) # Apply scaling to the features
|
|
154
171
|
|
|
155
|
-
# Make predictions for PPG signal quality assessment, and assign the
|
|
172
|
+
# Make predictions for PPG signal quality assessment, and assign the
|
|
173
|
+
# probabilities to the DataFrame and drop the features
|
|
156
174
|
df[DataColumns.PRED_SQA_PROBA] = clf.predict_proba(scaled_features)[:, 0]
|
|
157
175
|
keep_cols = [config.time_colname, DataColumns.PRED_SQA_PROBA]
|
|
158
176
|
|
|
159
177
|
if DataColumns.ACC_POWER_RATIO in df.columns:
|
|
178
|
+
# Assign accelerometer label to the DataFrame based on the threshold
|
|
160
179
|
df[DataColumns.PRED_SQA_ACC_LABEL] = (
|
|
161
180
|
df[DataColumns.ACC_POWER_RATIO] < config.threshold_sqa_accelerometer
|
|
162
|
-
).astype(
|
|
163
|
-
int
|
|
164
|
-
) # Assign accelerometer label to the DataFrame based on the threshold
|
|
181
|
+
).astype(int)
|
|
165
182
|
keep_cols += [DataColumns.PRED_SQA_ACC_LABEL]
|
|
166
183
|
|
|
167
184
|
return df[keep_cols]
|
|
@@ -252,9 +269,9 @@ def estimate_pulse_rate(
|
|
|
252
269
|
config.kern_params,
|
|
253
270
|
)
|
|
254
271
|
n_pr = len(pr_est) # Number of pulse rate estimates
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
272
|
+
# Calculate end index for time, different from end_idx since it is
|
|
273
|
+
# always a multiple of step_size, while end_idx is not
|
|
274
|
+
end_idx_time = n_pr * step_size + start_idx
|
|
258
275
|
|
|
259
276
|
# Extract relative time for PR estimates
|
|
260
277
|
pr_time = ppg_preprocessed[start_idx:end_idx_time:step_size, time_idx]
|
|
@@ -270,7 +287,7 @@ def estimate_pulse_rate(
|
|
|
270
287
|
|
|
271
288
|
|
|
272
289
|
def aggregate_pulse_rate(
|
|
273
|
-
pr_values: np.ndarray, aggregates:
|
|
290
|
+
pr_values: np.ndarray, aggregates: list[str] = ["mode", "99p"]
|
|
274
291
|
) -> dict:
|
|
275
292
|
"""
|
|
276
293
|
Aggregate the pulse rate estimates using the specified aggregation methods.
|
|
@@ -280,7 +297,8 @@ def aggregate_pulse_rate(
|
|
|
280
297
|
pr_values : np.ndarray
|
|
281
298
|
The array containing the pulse rate estimates
|
|
282
299
|
aggregates : List[str]
|
|
283
|
-
The list of aggregation methods to be used for the pulse rate
|
|
300
|
+
The list of aggregation methods to be used for the pulse rate
|
|
301
|
+
estimates. The default is ['mode', '99p'].
|
|
284
302
|
|
|
285
303
|
Returns
|
|
286
304
|
-------
|
|
@@ -306,10 +324,12 @@ def aggregate_pulse_rate(
|
|
|
306
324
|
def extract_temporal_domain_features(
|
|
307
325
|
ppg_windowed: np.ndarray,
|
|
308
326
|
config: PulseRateConfig,
|
|
309
|
-
quality_stats:
|
|
327
|
+
quality_stats: list[str] = ["mean", "std"],
|
|
310
328
|
) -> pd.DataFrame:
|
|
311
329
|
"""
|
|
312
|
-
Compute temporal domain features for the ppg signal. The features are
|
|
330
|
+
Compute temporal domain features for the ppg signal. The features are
|
|
331
|
+
added to the dataframe. Therefore the original dataframe is modified,
|
|
332
|
+
and the modified dataframe is returned.
|
|
313
333
|
|
|
314
334
|
Parameters
|
|
315
335
|
----------
|
|
@@ -320,7 +340,8 @@ def extract_temporal_domain_features(
|
|
|
320
340
|
The configuration object containing the parameters for the feature extraction
|
|
321
341
|
|
|
322
342
|
quality_stats: list, optional
|
|
323
|
-
The statistics to be computed for the gravity component of the
|
|
343
|
+
The statistics to be computed for the gravity component of the
|
|
344
|
+
accelerometer signal (default: ['mean', 'std'])
|
|
324
345
|
|
|
325
346
|
Returns
|
|
326
347
|
-------
|
|
@@ -344,9 +365,11 @@ def extract_spectral_domain_features(
|
|
|
344
365
|
config: PulseRateConfig,
|
|
345
366
|
) -> pd.DataFrame:
|
|
346
367
|
"""
|
|
347
|
-
Calculate the spectral features (dominant frequency, relative power, and
|
|
348
|
-
for each segment of a PPG signal using a single
|
|
349
|
-
|
|
368
|
+
Calculate the spectral features (dominant frequency, relative power, and
|
|
369
|
+
spectral entropy) for each segment of a PPG signal using a single
|
|
370
|
+
Welch's method computation. The features are added to the dataframe.
|
|
371
|
+
Therefore the original dataframe is modified, and the modified dataframe
|
|
372
|
+
is returned.
|
|
350
373
|
|
|
351
374
|
Parameters
|
|
352
375
|
----------
|
|
@@ -386,7 +409,10 @@ def extract_spectral_domain_features(
|
|
|
386
409
|
|
|
387
410
|
|
|
388
411
|
def extract_acc_power_feature(
|
|
389
|
-
f1: np.ndarray,
|
|
412
|
+
f1: np.ndarray,
|
|
413
|
+
psd_acc: np.ndarray,
|
|
414
|
+
f2: np.ndarray,
|
|
415
|
+
psd_ppg: np.ndarray,
|
|
390
416
|
) -> np.ndarray:
|
|
391
417
|
"""
|
|
392
418
|
Extract the accelerometer power feature in the PPG frequency range.
|
|
@@ -395,11 +421,11 @@ def extract_acc_power_feature(
|
|
|
395
421
|
----------
|
|
396
422
|
f1: np.ndarray
|
|
397
423
|
The frequency bins of the accelerometer signal.
|
|
398
|
-
|
|
424
|
+
psd_acc: np.ndarray
|
|
399
425
|
The power spectral density of the accelerometer signal.
|
|
400
426
|
f2: np.ndarray
|
|
401
427
|
The frequency bins of the PPG signal.
|
|
402
|
-
|
|
428
|
+
psd_ppg: np.ndarray
|
|
403
429
|
The power spectral density of the PPG signal.
|
|
404
430
|
|
|
405
431
|
Returns
|
|
@@ -409,32 +435,33 @@ def extract_acc_power_feature(
|
|
|
409
435
|
"""
|
|
410
436
|
|
|
411
437
|
# Find the index of the maximum PSD value in the PPG signal
|
|
412
|
-
|
|
413
|
-
|
|
438
|
+
max_ppg_psd_idx = np.argmax(psd_ppg, axis=1)
|
|
439
|
+
max_ppg_freq_psd = f2[max_ppg_psd_idx]
|
|
414
440
|
|
|
415
441
|
# Find the neighboring indices of the maximum PSD value in the PPG signal
|
|
416
442
|
df_idx = np.column_stack(
|
|
417
|
-
(
|
|
443
|
+
(max_ppg_psd_idx - 1, max_ppg_psd_idx, max_ppg_psd_idx + 1)
|
|
418
444
|
)
|
|
419
445
|
|
|
420
|
-
# Find the index of the closest frequency in the accelerometer signal
|
|
421
|
-
|
|
446
|
+
# Find the index of the closest frequency in the accelerometer signal
|
|
447
|
+
# to the first harmonic of the PPG frequency
|
|
448
|
+
corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_ppg_freq_psd * 2), axis=0)
|
|
422
449
|
fh_idx = np.column_stack(
|
|
423
450
|
(corr_acc_psd_fh_idx - 1, corr_acc_psd_fh_idx, corr_acc_psd_fh_idx + 1)
|
|
424
451
|
)
|
|
425
452
|
|
|
426
453
|
# Compute the power in the ranges corresponding to the PPG frequency
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
) + np.
|
|
430
|
-
|
|
454
|
+
acc_power_ppg_range = np.trapezoid(
|
|
455
|
+
psd_acc[np.arange(psd_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1
|
|
456
|
+
) + np.trapezoid(
|
|
457
|
+
psd_acc[np.arange(psd_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1
|
|
431
458
|
)
|
|
432
459
|
|
|
433
460
|
# Compute the total power across the entire frequency range
|
|
434
|
-
acc_power_total = np.
|
|
461
|
+
acc_power_total = np.trapezoid(psd_acc, f1)
|
|
435
462
|
|
|
436
463
|
# Compute the power ratio of the accelerometer signal in the PPG frequency range
|
|
437
|
-
acc_power_ratio =
|
|
464
|
+
acc_power_ratio = acc_power_ppg_range / acc_power_total
|
|
438
465
|
|
|
439
466
|
return acc_power_ratio
|
|
440
467
|
|
|
@@ -443,7 +470,8 @@ def extract_accelerometer_feature(
|
|
|
443
470
|
acc_windowed: np.ndarray, ppg_windowed: np.ndarray, config: PulseRateConfig
|
|
444
471
|
) -> pd.DataFrame:
|
|
445
472
|
"""
|
|
446
|
-
Extract accelerometer features from the accelerometer signal in the PPG
|
|
473
|
+
Extract accelerometer features from the accelerometer signal in the PPG
|
|
474
|
+
frequency range.
|
|
447
475
|
|
|
448
476
|
Parameters
|
|
449
477
|
----------
|
|
@@ -493,3 +521,207 @@ def extract_accelerometer_feature(
|
|
|
493
521
|
)
|
|
494
522
|
|
|
495
523
|
return pd.DataFrame(acc_power_ratio, columns=["acc_power_ratio"])
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def run_pulse_rate_pipeline(
|
|
527
|
+
df_ppg_prepared: pd.DataFrame,
|
|
528
|
+
output_dir: str | Path,
|
|
529
|
+
store_intermediate: list[str] = [],
|
|
530
|
+
pulse_rate_config: PulseRateConfig | None = None,
|
|
531
|
+
ppg_config: PPGConfig | None = None,
|
|
532
|
+
logging_level: int = logging.INFO,
|
|
533
|
+
custom_logger: logging.Logger | None = None,
|
|
534
|
+
) -> pd.DataFrame:
|
|
535
|
+
"""
|
|
536
|
+
High-level pulse rate analysis pipeline for a single segment.
|
|
537
|
+
|
|
538
|
+
This function implements the complete pulse rate analysis workflow from the
|
|
539
|
+
pulse rate tutorial:
|
|
540
|
+
1. Preprocess PPG and accelerometer data (accelerometer is optional)
|
|
541
|
+
2. Extract signal quality features
|
|
542
|
+
3. Signal quality classification
|
|
543
|
+
4. Pulse rate estimation
|
|
544
|
+
5. Quantify pulse rate (select relevant columns)
|
|
545
|
+
|
|
546
|
+
Parameters
|
|
547
|
+
----------
|
|
548
|
+
df_ppg_prepared : pd.DataFrame
|
|
549
|
+
Prepared sensor data with time and PPG column.
|
|
550
|
+
output_dir : str or Path
|
|
551
|
+
Output directory for intermediate results (required)
|
|
552
|
+
store_intermediate : list of str, default []
|
|
553
|
+
Which intermediate results to store.
|
|
554
|
+
pulse_rate_config : PulseRateConfig, optional
|
|
555
|
+
Pulse rate analysis configuration
|
|
556
|
+
ppg_config : PPGConfig, optional
|
|
557
|
+
PPG preprocessing configuration
|
|
558
|
+
logging_level : int, default logging.INFO
|
|
559
|
+
Logging level using standard logging constants
|
|
560
|
+
custom_logger : logging.Logger, optional
|
|
561
|
+
Custom logger instance
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
pd.DataFrame
|
|
566
|
+
Quantified pulse rate data with columns:
|
|
567
|
+
- time: timestamp
|
|
568
|
+
- pulse_rate: pulse rate estimate
|
|
569
|
+
- signal_quality: quality assessment (if available)
|
|
570
|
+
"""
|
|
571
|
+
# Setup logger
|
|
572
|
+
active_logger = (
|
|
573
|
+
custom_logger if custom_logger is not None else logging.getLogger(__name__)
|
|
574
|
+
)
|
|
575
|
+
if custom_logger is None:
|
|
576
|
+
active_logger.setLevel(logging_level)
|
|
577
|
+
|
|
578
|
+
if pulse_rate_config is None:
|
|
579
|
+
pulse_rate_config = PulseRateConfig()
|
|
580
|
+
if ppg_config is None:
|
|
581
|
+
ppg_config = PPGConfig()
|
|
582
|
+
|
|
583
|
+
output_dir = Path(output_dir)
|
|
584
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
585
|
+
|
|
586
|
+
# Validate input data columns (PPG is required, accelerometer is optional)
|
|
587
|
+
required_columns = [DataColumns.TIME, DataColumns.PPG]
|
|
588
|
+
missing_columns = [
|
|
589
|
+
col for col in required_columns if col not in df_ppg_prepared.columns
|
|
590
|
+
]
|
|
591
|
+
if missing_columns:
|
|
592
|
+
active_logger.warning(
|
|
593
|
+
f"Missing required columns for pulse rate pipeline: {missing_columns}"
|
|
594
|
+
)
|
|
595
|
+
return pd.DataFrame()
|
|
596
|
+
|
|
597
|
+
# Step 1: Preprocess PPG and accelerometer data (following tutorial)
|
|
598
|
+
active_logger.info("Step 1: Preprocessing PPG and accelerometer data")
|
|
599
|
+
try:
|
|
600
|
+
# Separate PPG data (always available)
|
|
601
|
+
ppg_cols = [DataColumns.TIME, DataColumns.PPG]
|
|
602
|
+
df_ppg = df_ppg_prepared[ppg_cols].copy()
|
|
603
|
+
|
|
604
|
+
# Preprocess the data
|
|
605
|
+
df_ppg_proc, _ = preprocess_ppg_data(
|
|
606
|
+
df_ppg=df_ppg,
|
|
607
|
+
ppg_config=ppg_config,
|
|
608
|
+
verbose=1 if logging_level <= logging.INFO else 0,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
if "preprocessing" in store_intermediate:
|
|
612
|
+
preprocessing_dir = output_dir / "preprocessing"
|
|
613
|
+
preprocessing_dir.mkdir(exist_ok=True)
|
|
614
|
+
df_ppg_proc.to_parquet(preprocessing_dir / "ppg_preprocessed.parquet")
|
|
615
|
+
active_logger.info(f"Saved preprocessed data to {preprocessing_dir}")
|
|
616
|
+
|
|
617
|
+
except Exception as e:
|
|
618
|
+
active_logger.error(f"Preprocessing failed: {e}")
|
|
619
|
+
return pd.DataFrame()
|
|
620
|
+
|
|
621
|
+
# Step 2: Extract signal quality features
|
|
622
|
+
active_logger.info("Step 2: Extracting signal quality features")
|
|
623
|
+
try:
|
|
624
|
+
df_features = extract_signal_quality_features(df_ppg_proc, pulse_rate_config)
|
|
625
|
+
|
|
626
|
+
if "pulse_rate" in store_intermediate:
|
|
627
|
+
pulse_rate_dir = output_dir / "pulse_rate"
|
|
628
|
+
pulse_rate_dir.mkdir(exist_ok=True)
|
|
629
|
+
df_features.to_parquet(pulse_rate_dir / "signal_quality_features.parquet")
|
|
630
|
+
active_logger.info(f"Saved signal quality features to {pulse_rate_dir}")
|
|
631
|
+
|
|
632
|
+
except Exception as e:
|
|
633
|
+
active_logger.error(f"Feature extraction failed: {e}")
|
|
634
|
+
return pd.DataFrame()
|
|
635
|
+
|
|
636
|
+
# Step 3: Signal quality classification
|
|
637
|
+
active_logger.info("Step 3: Signal quality classification")
|
|
638
|
+
try:
|
|
639
|
+
classifier_path = files("paradigma.assets") / "ppg_quality_clf_package.pkl"
|
|
640
|
+
classifier_package = ClassifierPackage.load(classifier_path)
|
|
641
|
+
|
|
642
|
+
df_classified = signal_quality_classification(
|
|
643
|
+
df_features, pulse_rate_config, classifier_package
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
except Exception as e:
|
|
647
|
+
active_logger.error(f"Signal quality classification failed: {e}")
|
|
648
|
+
return pd.DataFrame()
|
|
649
|
+
|
|
650
|
+
# Step 4: Pulse rate estimation
|
|
651
|
+
active_logger.info("Step 4: Pulse rate estimation")
|
|
652
|
+
try:
|
|
653
|
+
df_pulse_rates = estimate_pulse_rate(
|
|
654
|
+
df_sqa=df_classified,
|
|
655
|
+
df_ppg_preprocessed=df_ppg_proc,
|
|
656
|
+
config=pulse_rate_config,
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
except Exception as e:
|
|
660
|
+
active_logger.error(f"Pulse rate estimation failed: {e}")
|
|
661
|
+
return pd.DataFrame()
|
|
662
|
+
|
|
663
|
+
# Step 5: Quantify pulse rate (select relevant columns and apply quality filtering)
|
|
664
|
+
active_logger.info("Step 5: Quantifying pulse rate")
|
|
665
|
+
|
|
666
|
+
# Select quantification columns
|
|
667
|
+
quantification_columns = []
|
|
668
|
+
if DataColumns.TIME in df_pulse_rates.columns:
|
|
669
|
+
quantification_columns.append(DataColumns.TIME)
|
|
670
|
+
if DataColumns.PULSE_RATE in df_pulse_rates.columns:
|
|
671
|
+
quantification_columns.append(DataColumns.PULSE_RATE)
|
|
672
|
+
if "signal_quality" in df_pulse_rates.columns:
|
|
673
|
+
quantification_columns.append("signal_quality")
|
|
674
|
+
|
|
675
|
+
# Use available columns
|
|
676
|
+
available_columns = [
|
|
677
|
+
col for col in quantification_columns if col in df_pulse_rates.columns
|
|
678
|
+
]
|
|
679
|
+
if not available_columns:
|
|
680
|
+
active_logger.warning("No valid quantification columns found")
|
|
681
|
+
return pd.DataFrame()
|
|
682
|
+
|
|
683
|
+
df_quantification = df_pulse_rates[available_columns].copy()
|
|
684
|
+
|
|
685
|
+
# Apply quality filtering if signal quality is available
|
|
686
|
+
if (
|
|
687
|
+
"signal_quality" in df_quantification.columns
|
|
688
|
+
and DataColumns.PULSE_RATE in df_quantification.columns
|
|
689
|
+
):
|
|
690
|
+
quality_threshold = getattr(pulse_rate_config, "threshold_sqa", 0.5)
|
|
691
|
+
low_quality_mask = df_quantification["signal_quality"] < quality_threshold
|
|
692
|
+
df_quantification.loc[low_quality_mask, DataColumns.PULSE_RATE] = np.nan
|
|
693
|
+
|
|
694
|
+
if "quantification" in store_intermediate:
|
|
695
|
+
quantification_dir = output_dir / "quantification"
|
|
696
|
+
quantification_dir.mkdir(exist_ok=True)
|
|
697
|
+
df_quantification.to_parquet(
|
|
698
|
+
quantification_dir / "pulse_rate_quantification.parquet"
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Save quantification metadata
|
|
702
|
+
valid_pulse_rates = (
|
|
703
|
+
df_quantification[DataColumns.PULSE_RATE].dropna()
|
|
704
|
+
if DataColumns.PULSE_RATE in df_quantification.columns
|
|
705
|
+
else pd.Series(dtype=float)
|
|
706
|
+
)
|
|
707
|
+
quantification_meta = {
|
|
708
|
+
"total_windows": len(df_quantification),
|
|
709
|
+
"valid_pulse_rate_estimates": len(valid_pulse_rates),
|
|
710
|
+
"columns": list(df_quantification.columns),
|
|
711
|
+
}
|
|
712
|
+
with open(quantification_dir / "pulse_rate_quantification_meta.json", "w") as f:
|
|
713
|
+
json.dump(quantification_meta, f, indent=2)
|
|
714
|
+
|
|
715
|
+
active_logger.info(f"Saved pulse rate quantification to {quantification_dir}")
|
|
716
|
+
|
|
717
|
+
pulse_rate_estimates = (
|
|
718
|
+
len(df_quantification[DataColumns.PULSE_RATE].dropna())
|
|
719
|
+
if DataColumns.PULSE_RATE in df_quantification.columns
|
|
720
|
+
else 0
|
|
721
|
+
)
|
|
722
|
+
active_logger.info(
|
|
723
|
+
f"Pulse rate analysis completed: {pulse_rate_estimates} valid pulse "
|
|
724
|
+
f"rate estimates from {len(df_quantification)} total windows"
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
return df_quantification
|