paradigma 0.1.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +1 -3
- paradigma/constants.py +65 -0
- paradigma/feature_extraction.py +703 -0
- paradigma/gait_analysis.py +415 -0
- paradigma/gait_analysis_config.py +266 -0
- paradigma/heart_rate_analysis.py +127 -0
- paradigma/heart_rate_analysis_config.py +9 -0
- paradigma/heart_rate_util.py +173 -0
- paradigma/imu_preprocessing.py +232 -0
- paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
- paradigma/ppg/classifier/LR_model.mat +0 -0
- paradigma/ppg/feat_extraction/acc_feature.m +20 -0
- paradigma/ppg/feat_extraction/peakdet.m +64 -0
- paradigma/ppg/feat_extraction/ppg_features.m +53 -0
- paradigma/ppg/glob_functions/extract_hr_segments.m +37 -0
- paradigma/ppg/glob_functions/extract_overlapping_segments.m +23 -0
- paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +41 -0
- paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +74 -0
- paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +25 -0
- paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +699 -0
- paradigma/ppg/glob_functions/jsonlab/README.txt +394 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +368 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +180 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +180 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +23 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +22 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +11 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +34 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +662 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +27 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +144 -0
- paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +21 -0
- paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +180 -0
- paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +180 -0
- paradigma/ppg/glob_functions/jsonlab/examples/example1.json +23 -0
- paradigma/ppg/glob_functions/jsonlab/examples/example2.json +22 -0
- paradigma/ppg/glob_functions/jsonlab/examples/example3.json +11 -0
- paradigma/ppg/glob_functions/jsonlab/examples/example4.json +34 -0
- paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +662 -0
- paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +27 -0
- paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +144 -0
- paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +21 -0
- paradigma/ppg/glob_functions/jsonlab/jsonopt.m +32 -0
- paradigma/ppg/glob_functions/jsonlab/loadjson.m +566 -0
- paradigma/ppg/glob_functions/jsonlab/loadubjson.m +528 -0
- paradigma/ppg/glob_functions/jsonlab/mergestruct.m +33 -0
- paradigma/ppg/glob_functions/jsonlab/savejson.m +475 -0
- paradigma/ppg/glob_functions/jsonlab/saveubjson.m +504 -0
- paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +40 -0
- paradigma/ppg/glob_functions/sample_prob_final.m +49 -0
- paradigma/ppg/glob_functions/synchronization.m +76 -0
- paradigma/ppg/glob_functions/tsdf_scan_meta.m +22 -0
- paradigma/ppg/hr_functions/Long_TFD_JOT.m +37 -0
- paradigma/ppg/hr_functions/PPG_TFD_HR.m +59 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +4 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +23 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +27 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +251 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +142 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +314 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +123 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +154 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +194 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +200 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +229 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +241 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +157 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +190 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +196 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +199 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +144 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +13 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +79 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +9 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +26 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +25 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +40 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +34 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +29 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +76 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +176 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +11 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +97 -0
- paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +149 -0
- paradigma/ppg/preprocessing/preprocessing_imu.m +15 -0
- paradigma/ppg/preprocessing/preprocessing_ppg.m +13 -0
- paradigma/ppg_preprocessing.py +313 -0
- paradigma/preprocessing_config.py +69 -0
- paradigma/quantification.py +58 -0
- paradigma/tremor/TremorFeaturesAndClassification.m +345 -0
- paradigma/tremor/feat_extraction/DerivativesExtract.m +22 -0
- paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +72 -0
- paradigma/tremor/feat_extraction/MFCCExtract.m +100 -0
- paradigma/tremor/feat_extraction/PSDBandPower.m +52 -0
- paradigma/tremor/feat_extraction/PSDEst.m +63 -0
- paradigma/tremor/feat_extraction/PSDExtrAxis.m +88 -0
- paradigma/tremor/feat_extraction/PSDExtrOpt.m +95 -0
- paradigma/tremor/preprocessing/InterpData.m +32 -0
- paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +295 -0
- paradigma/util.py +50 -0
- paradigma/windowing.py +219 -0
- paradigma-0.3.0.dist-info/LICENSE +192 -0
- paradigma-0.3.0.dist-info/METADATA +79 -0
- paradigma-0.3.0.dist-info/RECORD +108 -0
- paradigma/dummy.py +0 -3
- paradigma-0.1.5.dist-info/LICENSE +0 -201
- paradigma-0.1.5.dist-info/METADATA +0 -18
- paradigma-0.1.5.dist-info/RECORD +0 -6
- {paradigma-0.1.5.dist-info → paradigma-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
import numpy as np
|
|
3
|
+
from scipy.signal import welch
|
|
4
|
+
from sklearn.preprocessing import StandardScaler
|
|
5
|
+
from dateutil import parser
|
|
6
|
+
|
|
7
|
+
import tsdf
|
|
8
|
+
import tsdf.constants
|
|
9
|
+
from paradigma.heart_rate_analysis_config import HeartRateFeatureExtractionConfig
|
|
10
|
+
from paradigma.heart_rate_util import extract_ppg_features, calculate_power_ratio, read_PPG_quality_classifier
|
|
11
|
+
from paradigma.util import read_metadata, write_data, get_end_iso8601
|
|
12
|
+
from paradigma.constants import DataColumns, UNIX_TICKS_MS
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_signal_quality_features(input_path: str, classifier_path: str, output_path: str, config: HeartRateFeatureExtractionConfig) -> None:
|
|
16
|
+
# load data
|
|
17
|
+
metadata_time_ppg, metadata_samples_ppg = read_metadata(input_path, "PPG_meta.json", "PPG_time.bin", "PPG_samples.bin")
|
|
18
|
+
df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_samples_ppg], tsdf.constants.ConcatenationType.columns)
|
|
19
|
+
arr_ppg = df_ppg[DataColumns.PPG].to_numpy()
|
|
20
|
+
relative_time_ppg = df_ppg[DataColumns.TIME].to_numpy()
|
|
21
|
+
|
|
22
|
+
metadata_time_acc, metadata_samples_acc = read_metadata(input_path, "accelerometer_meta.json", "accelerometer_time.bin", "accelerometer_samples.bin")
|
|
23
|
+
df_acc = tsdf.load_dataframe_from_binaries([metadata_time_acc, metadata_samples_acc], tsdf.constants.ConcatenationType.columns)
|
|
24
|
+
arr_acc = df_acc[[DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z]].to_numpy()
|
|
25
|
+
|
|
26
|
+
sampling_frequency_ppg = config.sampling_frequency_ppg
|
|
27
|
+
sampling_frequency_imu = config.sampling_frequency_imu
|
|
28
|
+
|
|
29
|
+
# Parameters
|
|
30
|
+
epoch_length = 6 # in seconds
|
|
31
|
+
overlap = 5 # in seconds
|
|
32
|
+
|
|
33
|
+
# Number of samples in epoch
|
|
34
|
+
samples_per_epoch_ppg = int(epoch_length * sampling_frequency_ppg)
|
|
35
|
+
samples_per_epoch_acc = int(epoch_length * sampling_frequency_imu)
|
|
36
|
+
|
|
37
|
+
# Calculate number of samples to shift for each epoch
|
|
38
|
+
samples_shift_ppg = int((epoch_length - overlap) * sampling_frequency_ppg)
|
|
39
|
+
samples_shift_acc = int((epoch_length - overlap) * sampling_frequency_imu)
|
|
40
|
+
|
|
41
|
+
pwelchwin_acc = int(3 * sampling_frequency_imu) # window length for pwelch
|
|
42
|
+
pwelchwin_ppg = int(3 * sampling_frequency_ppg) # window length for pwelch
|
|
43
|
+
noverlap_acc = int(0.5 * pwelchwin_acc) # overlap for pwelch
|
|
44
|
+
noverlap_ppg = int(0.5 * pwelchwin_ppg) # overlap for pwelch
|
|
45
|
+
|
|
46
|
+
f_bin_res = 0.05 # the threshold is set based on this binning
|
|
47
|
+
nfft_ppg = np.arange(0, sampling_frequency_ppg / 2, f_bin_res) # frequency bins for pwelch ppg
|
|
48
|
+
nfft_acc = np.arange(0, sampling_frequency_imu / 2, f_bin_res) # frequency bins for pwelch imu
|
|
49
|
+
|
|
50
|
+
features_ppg_scaled = []
|
|
51
|
+
feature_acc = []
|
|
52
|
+
t_unix_feat_total = []
|
|
53
|
+
count = 0
|
|
54
|
+
acc_idx = 0
|
|
55
|
+
|
|
56
|
+
# Read the classifier (it contains mu and sigma)
|
|
57
|
+
clf = read_PPG_quality_classifier(classifier_path)
|
|
58
|
+
# not used here: lr_model = clf['model']
|
|
59
|
+
arr_mu = clf['mu'][:, 0]
|
|
60
|
+
arr_sigma = clf['sigma'][:, 0]
|
|
61
|
+
|
|
62
|
+
scaler = StandardScaler()
|
|
63
|
+
scaler.mean_ = arr_mu
|
|
64
|
+
scaler.scale_ = arr_sigma
|
|
65
|
+
|
|
66
|
+
ppg_start_time = parser.parse(metadata_time_ppg.start_iso8601)
|
|
67
|
+
|
|
68
|
+
# Loop over 6s segments for both PPG and IMU and calculate features
|
|
69
|
+
for i in range(0, len(arr_ppg) - samples_per_epoch_ppg + 1, samples_shift_ppg):
|
|
70
|
+
if acc_idx + samples_per_epoch_acc > len(arr_acc): # For the last epoch, check if the segment for IMU is too short (not 6 seconds)
|
|
71
|
+
break
|
|
72
|
+
else:
|
|
73
|
+
acc_segment = arr_acc[acc_idx:acc_idx + samples_per_epoch_acc, :] # Extract the IMU window (6 seconds)
|
|
74
|
+
|
|
75
|
+
ppg_segment = arr_ppg[i:i + samples_per_epoch_ppg] # Extract the PPG window (6 seconds)
|
|
76
|
+
|
|
77
|
+
count += 1
|
|
78
|
+
|
|
79
|
+
# Feature extraction + scaling
|
|
80
|
+
features = extract_ppg_features(ppg_segment, sampling_frequency_ppg)
|
|
81
|
+
features = features.reshape(1, -1)
|
|
82
|
+
features_ppg_scaled.append(scaler.transform(features)[0])
|
|
83
|
+
|
|
84
|
+
# Calculating PSD (power spectral density) of IMU and PPG
|
|
85
|
+
#hann(pwelchwin_acc)
|
|
86
|
+
#hann(pwelchwin_ppg)
|
|
87
|
+
print(pwelchwin_acc, noverlap_acc, len(nfft_acc))
|
|
88
|
+
f1, pxx1 = welch(acc_segment, sampling_frequency_imu, window='hann', nperseg=pwelchwin_acc, noverlap=None, nfft=len(nfft_acc))
|
|
89
|
+
PSD_imu = np.sum(pxx1, axis=1) # sum over the three axes
|
|
90
|
+
f2, pxx2 = welch(ppg_segment, sampling_frequency_ppg, window='hann', nperseg=pwelchwin_ppg, noverlap=None, nfft=len(nfft_ppg))
|
|
91
|
+
PSD_ppg = np.sum(pxx2) # this does nothing, equal to PSD_ppg = pxx2
|
|
92
|
+
|
|
93
|
+
# IMU feature extraction
|
|
94
|
+
print(f1.shape, f2.shape)
|
|
95
|
+
feature_acc.append(calculate_power_ratio(f1, PSD_imu, f2, PSD_ppg)) # Calculate the power ratio of the accelerometer signal in the PPG frequency range
|
|
96
|
+
|
|
97
|
+
# time channel
|
|
98
|
+
t_unix_feat_total.append((relative_time_ppg[i] + ppg_start_time) * UNIX_TICKS_MS) # Save in absolute unix time ms
|
|
99
|
+
acc_idx += samples_shift_acc # update IMU_idx
|
|
100
|
+
|
|
101
|
+
# Convert lists to numpy arrays
|
|
102
|
+
print(features_ppg_scaled[0:4])
|
|
103
|
+
features_ppg_scaled = np.array(features_ppg_scaled)
|
|
104
|
+
feature_acc = np.array(feature_acc)
|
|
105
|
+
t_unix_feat_total = np.array(t_unix_feat_total)
|
|
106
|
+
|
|
107
|
+
# Synchronization information
|
|
108
|
+
# TODO: store this, as this is needed for the HR pipeline
|
|
109
|
+
# v_sync_ppg_total = np.array([
|
|
110
|
+
# ppg_indices[0], # start index
|
|
111
|
+
# ppg_indices[1], # end index
|
|
112
|
+
# segment_ppg[0], # Segment index
|
|
113
|
+
# count # Number of epochs in the segment
|
|
114
|
+
# ])
|
|
115
|
+
|
|
116
|
+
metadata_features_ppg = metadata_samples_ppg.copy()
|
|
117
|
+
metadata_features_ppg.channels = ["variance", "mean", "median", "kurtosis", "skewness", "dominant_frequency", "relative_power", "spectral_entropy", "signal_noise_ratio", "second_highest_peak"]
|
|
118
|
+
metadata_features_acc = metadata_samples_acc.copy()
|
|
119
|
+
metadata_features_acc.channels = ["power_ratio"]
|
|
120
|
+
metadata_time_ppg = metadata_time_ppg.copy()
|
|
121
|
+
metadata_time_ppg.channels = ["time"]
|
|
122
|
+
metadata_time_acc = metadata_time_acc.copy()
|
|
123
|
+
metadata_time_acc.channels = ["time"]
|
|
124
|
+
|
|
125
|
+
write_data(metadata_time_ppg, metadata_features_ppg, output_path, 'features_ppg_meta.json', features_ppg_scaled)
|
|
126
|
+
write_data(metadata_time_acc, metadata_features_acc, output_path, 'feature_acc_meta.json', feature_acc)
|
|
127
|
+
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import List, Tuple, Union
|
|
2
|
+
import pickle
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy.signal import welch, find_peaks
|
|
6
|
+
from scipy.stats import kurtosis, skew
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_ppg_features(arr_ppg: np.ndarray, sampling_frequency: int) -> np.ndarray:
|
|
10
|
+
# Number of features
|
|
11
|
+
feature_count = 10
|
|
12
|
+
|
|
13
|
+
# Initialize features array
|
|
14
|
+
features_ppg = np.zeros(feature_count)
|
|
15
|
+
|
|
16
|
+
# Time-domain features
|
|
17
|
+
absPPG = np.abs(arr_ppg)
|
|
18
|
+
features_ppg[0] = np.var(arr_ppg) # Feature 1: variance
|
|
19
|
+
features_ppg[1] = np.mean(absPPG) # Feature 2: mean
|
|
20
|
+
features_ppg[2] = np.median(absPPG) # Feature 3: median
|
|
21
|
+
features_ppg[3] = kurtosis(arr_ppg) # Feature 4: kurtosis
|
|
22
|
+
features_ppg[4] = skew(arr_ppg) # Feature 5: skewness
|
|
23
|
+
|
|
24
|
+
window = 3 * sampling_frequency # 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
|
|
25
|
+
overlap = int(0.5 * window) # 45 samples overlap for Welch's Method
|
|
26
|
+
|
|
27
|
+
f, P = welch(arr_ppg, sampling_frequency, nperseg=window, noverlap=overlap)
|
|
28
|
+
|
|
29
|
+
# Find the dominant frequency
|
|
30
|
+
maxIndex = np.argmax(P)
|
|
31
|
+
features_ppg[5] = f[maxIndex] # Feature 6: dominant frequency
|
|
32
|
+
|
|
33
|
+
# Find indices of f in relevant physiological heart range 45-180 bpm (0.75 - 3 Hz)
|
|
34
|
+
ph_idx = np.where((f >= 0.75) & (f <= 3))[0]
|
|
35
|
+
maxIndex_ph = np.argmax(P[ph_idx])
|
|
36
|
+
dominantFrequency_ph = f[ph_idx[maxIndex_ph]]
|
|
37
|
+
f_dom_band = np.where((f >= dominantFrequency_ph - 0.2) & (f <= dominantFrequency_ph + 0.2))[0]
|
|
38
|
+
features_ppg[6] = np.trapz(P[f_dom_band]) / np.trapz(P) # Feature 7: relative power
|
|
39
|
+
|
|
40
|
+
# Normalize the power spectrum
|
|
41
|
+
pxx_norm = P / np.sum(P)
|
|
42
|
+
|
|
43
|
+
# Compute spectral entropy
|
|
44
|
+
features_ppg[7] = -np.sum(pxx_norm * np.log2(pxx_norm)) / np.log2(len(arr_ppg)) # Feature 8: spectral entropy
|
|
45
|
+
|
|
46
|
+
# Signal to noise ratio
|
|
47
|
+
arr_signal = np.var(arr_ppg)
|
|
48
|
+
arr_noise = np.var(absPPG)
|
|
49
|
+
features_ppg[8] = arr_signal / arr_noise # Feature 9: surrogate of signal to noise ratio
|
|
50
|
+
|
|
51
|
+
# Autocorrelation features
|
|
52
|
+
ppg_series = pd.Series(arr_ppg)
|
|
53
|
+
autocorrelations = [ppg_series.autocorr(lag=i) for i in range(sampling_frequency*3)]
|
|
54
|
+
|
|
55
|
+
# Finding peaks in autocorrelation
|
|
56
|
+
peaks, _ = peakdet(np.array(autocorrelations), delta=0.01)
|
|
57
|
+
sorted_peaks = np.sort(peaks)
|
|
58
|
+
#TODO: double check if this is correct
|
|
59
|
+
print(sorted_peaks)
|
|
60
|
+
|
|
61
|
+
if len(sorted_peaks) > 1:
|
|
62
|
+
features_ppg[9] = sorted_peaks[1] # Feature 10: the second highest peak
|
|
63
|
+
else:
|
|
64
|
+
features_ppg[9] = 0 # Set to 0 if there is no clear second peak
|
|
65
|
+
|
|
66
|
+
return features_ppg
|
|
67
|
+
|
|
68
|
+
# Example usage:
|
|
69
|
+
# PPG = np.random.randn(300) # Example PPG signal, replace with actual data
|
|
70
|
+
# fs = 50 # Example sampling frequency, replace with actual sampling rate
|
|
71
|
+
# features_df = ppg_features(PPG, fs)
|
|
72
|
+
# print(features_df)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def peakdet(v: np.ndarray, delta, x: Union[np.ndarray, None]=None) -> Tuple[List[Tuple[int, float]], List[Tuple[int, float]]]:
|
|
76
|
+
"""
|
|
77
|
+
Detect peaks in a vector.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
v (numpy array): Input vector.
|
|
81
|
+
delta (float): Minimum difference between a peak and its surrounding values.
|
|
82
|
+
x (numpy array, optional): Indices corresponding to the values in v. If not provided, indices are generated.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
maxtab (list of tuples): Local maxima as (index, value) pairs.
|
|
86
|
+
mintab (list of tuples): Local minima as (index, value) pairs.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
if x is None:
|
|
90
|
+
x = np.arange(len(v))
|
|
91
|
+
else:
|
|
92
|
+
if len(v) != len(x):
|
|
93
|
+
raise ValueError("Input vectors v and x must have the same length")
|
|
94
|
+
|
|
95
|
+
# Detect maxima
|
|
96
|
+
max_indices, _ = find_peaks(v, height=delta)
|
|
97
|
+
maxtab = [(x[idx], v[idx]) for idx in max_indices]
|
|
98
|
+
|
|
99
|
+
# Detect minima by inverting the signal
|
|
100
|
+
min_indices, _ = find_peaks(-v, height=delta)
|
|
101
|
+
mintab = [(x[idx], v[idx]) for idx in min_indices]
|
|
102
|
+
|
|
103
|
+
return maxtab, mintab
|
|
104
|
+
|
|
105
|
+
# Example usage:
|
|
106
|
+
# v = [0, 1, 2, 1, 0, 1, 2, 3, 2, 1, 0]
|
|
107
|
+
# delta = 1
|
|
108
|
+
# maxtab, mintab = peakdet(v, delta)
|
|
109
|
+
# print("Maxima:", maxtab)
|
|
110
|
+
# print("Minima:", mintab)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def calculate_power_ratio(f1: np.ndarray, PSD_acc: np.ndarray, f2: np.ndarray, PSD_ppg: np.ndarray) -> float:
|
|
114
|
+
"""
|
|
115
|
+
Calculates the power ratio of the accelerometer signal in the PPG frequency range.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
f1 (numpy.ndarray): Frequency bins for the accelerometer signal.
|
|
119
|
+
PSD_acc (numpy.ndarray): Power Spectral Density of the accelerometer signal.
|
|
120
|
+
f2 (numpy.ndarray): Frequency bins for the PPG signal.
|
|
121
|
+
PSD_ppg (numpy.ndarray): Power Spectral Density of the PPG signal.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
float: The power ratio of the accelerometer signal in the PPG frequency range.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
# Find the index of the maximum PSD value in the PPG signal
|
|
128
|
+
max_PPG_psd_idx = np.argmax(PSD_ppg)
|
|
129
|
+
max_PPG_freq_psd = f2[max_PPG_psd_idx]
|
|
130
|
+
|
|
131
|
+
# Find the index of the closest frequency in the accelerometer signal to the dominant PPG frequency
|
|
132
|
+
corr_acc_psd_df_idx = np.argmin(np.abs(max_PPG_freq_psd - f1))
|
|
133
|
+
|
|
134
|
+
df_idx = np.arange(corr_acc_psd_df_idx-1, corr_acc_psd_df_idx+2)
|
|
135
|
+
|
|
136
|
+
# Find the index of the closest frequency in the accelerometer signal to the first harmonic of the PPG frequency
|
|
137
|
+
corr_acc_psd_fh_idx = np.argmin(np.abs(max_PPG_freq_psd*2 - f1))
|
|
138
|
+
fh_idx = np.arange(corr_acc_psd_fh_idx-1, corr_acc_psd_fh_idx+2)
|
|
139
|
+
|
|
140
|
+
# Calculate the power ratio
|
|
141
|
+
acc_power_PPG_range = np.trapz(PSD_acc[df_idx], f1[df_idx]) + np.trapz(PSD_acc[fh_idx], f1[fh_idx])
|
|
142
|
+
acc_power_total = np.trapz(PSD_acc, f1)
|
|
143
|
+
|
|
144
|
+
acc_power_ratio = acc_power_PPG_range / acc_power_total
|
|
145
|
+
|
|
146
|
+
return acc_power_ratio
|
|
147
|
+
|
|
148
|
+
# Example usage:
|
|
149
|
+
# f1 = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
|
|
150
|
+
# PSD_acc = np.array([1, 2, 3, 2, 1])
|
|
151
|
+
# f2 = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
|
|
152
|
+
# PSD_ppg = np.array([1, 3, 2, 1, 0.5])
|
|
153
|
+
# result = acc_feature(f1, PSD_acc, f2, PSD_ppg)
|
|
154
|
+
# print(result)
|
|
155
|
+
|
|
156
|
+
def read_PPG_quality_classifier(classifier_path: str):
|
|
157
|
+
"""
|
|
158
|
+
Read the PPG quality classifier from a file.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
classifier_path : str
|
|
163
|
+
The path to the classifier file.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
dict
|
|
168
|
+
The classifier dictionary.
|
|
169
|
+
"""
|
|
170
|
+
with open(classifier_path, 'rb') as f:
|
|
171
|
+
clf = pickle.load(f)
|
|
172
|
+
return clf
|
|
173
|
+
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import List, Union
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from scipy import signal
|
|
6
|
+
from scipy.interpolate import CubicSpline
|
|
7
|
+
|
|
8
|
+
import tsdf
|
|
9
|
+
from paradigma.constants import DataColumns, TimeUnit
|
|
10
|
+
from paradigma.util import write_data, read_metadata
|
|
11
|
+
from paradigma.preprocessing_config import IMUPreprocessingConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def preprocess_imu_data(input_path: Union[str, Path], output_path: Union[str, Path], config: IMUPreprocessingConfig) -> None:
|
|
15
|
+
|
|
16
|
+
# Load data
|
|
17
|
+
metadata_time, metadata_samples = read_metadata(str(input_path), str(config.meta_filename),
|
|
18
|
+
str(config.time_filename), str(config.values_filename))
|
|
19
|
+
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)
|
|
20
|
+
|
|
21
|
+
# Rename columns
|
|
22
|
+
df = df.rename(columns={f'rotation_{a}': f'gyroscope_{a}' for a in ['x', 'y', 'z']})
|
|
23
|
+
df = df.rename(columns={f'acceleration_{a}': f'accelerometer_{a}' for a in ['x', 'y', 'z']})
|
|
24
|
+
|
|
25
|
+
# convert to relative seconds from delta milliseconds
|
|
26
|
+
df[config.time_colname] = transform_time_array(
|
|
27
|
+
time_array=df[config.time_colname],
|
|
28
|
+
scale_factor=1000,
|
|
29
|
+
input_unit_type = TimeUnit.DIFFERENCE_MS,
|
|
30
|
+
output_unit_type = TimeUnit.RELATIVE_MS)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
df = resample_data(
|
|
34
|
+
df=df,
|
|
35
|
+
time_column=config.time_colname,
|
|
36
|
+
time_unit_type=TimeUnit.RELATIVE_MS,
|
|
37
|
+
unscaled_column_names = list(config.d_channels_imu.keys()),
|
|
38
|
+
scale_factors=metadata_samples.scale_factors,
|
|
39
|
+
resampling_frequency=config.sampling_frequency)
|
|
40
|
+
|
|
41
|
+
if config.side_watch == 'left':
|
|
42
|
+
df[DataColumns.ACCELEROMETER_X] *= -1
|
|
43
|
+
|
|
44
|
+
for col in config.d_channels_accelerometer.keys():
|
|
45
|
+
|
|
46
|
+
# change to correct units [g]
|
|
47
|
+
if config.acceleration_units == 'm/s^2':
|
|
48
|
+
df[col] /= 9.81
|
|
49
|
+
|
|
50
|
+
for result, side_pass in zip(['filt', 'grav'], ['hp', 'lp']):
|
|
51
|
+
df[f'{result}_{col}'] = butterworth_filter(
|
|
52
|
+
single_sensor_col=np.array(df[col]),
|
|
53
|
+
order=config.filter_order,
|
|
54
|
+
cutoff_frequency=config.lower_cutoff_frequency,
|
|
55
|
+
passband=side_pass,
|
|
56
|
+
sampling_frequency=config.sampling_frequency,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
df = df.drop(columns=[col])
|
|
60
|
+
df = df.rename(columns={f'filt_{col}': col})
|
|
61
|
+
|
|
62
|
+
# Store data
|
|
63
|
+
for sensor, units in zip(['accelerometer', 'gyroscope'], ['g', config.rotation_units]):
|
|
64
|
+
df_sensor = df[[config.time_colname] + [x for x in df.columns if sensor in x]]
|
|
65
|
+
|
|
66
|
+
metadata_samples.channels = [x for x in df.columns if sensor in x]
|
|
67
|
+
metadata_samples.units = list(np.repeat(units, len(metadata_samples.channels)))
|
|
68
|
+
metadata_samples.file_name = f'{sensor}_samples.bin'
|
|
69
|
+
|
|
70
|
+
metadata_time.file_name = f'{sensor}_time.bin'
|
|
71
|
+
metadata_time.units = ['time_relative_ms']
|
|
72
|
+
|
|
73
|
+
write_data(metadata_time, metadata_samples, output_path, f'{sensor}_meta.json', df_sensor)
|
|
74
|
+
|
|
75
|
+
def transform_time_array(
|
|
76
|
+
time_array: pd.Series,
|
|
77
|
+
scale_factor: float,
|
|
78
|
+
input_unit_type: str,
|
|
79
|
+
output_unit_type: str,
|
|
80
|
+
start_time: float = 0.0,
|
|
81
|
+
) -> np.ndarray:
|
|
82
|
+
"""
|
|
83
|
+
Transforms the time array to relative time (when defined in delta time) and scales the values.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
time_array : pd.Series
|
|
88
|
+
The time array in milliseconds to transform.
|
|
89
|
+
scale_factor : float
|
|
90
|
+
The scale factor to apply to the time array.
|
|
91
|
+
input_unit_type : str
|
|
92
|
+
The time unit type of the input time array. Raw PPP data was in `TimeUnit.DIFFERENCE_MS`.
|
|
93
|
+
output_unit_type : str
|
|
94
|
+
The time unit type of the output time array. The processing is often done in `TimeUnit.RELATIVE_MS`.
|
|
95
|
+
start_time : float, optional
|
|
96
|
+
The start time of the time array in UNIX milliseconds (default is 0.0)
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
time_array
|
|
101
|
+
The transformed time array in milliseconds, with the specified time unit type.
|
|
102
|
+
"""
|
|
103
|
+
# Scale time array and transform to relative time (`TimeUnit.RELATIVE_MS`)
|
|
104
|
+
if input_unit_type == TimeUnit.DIFFERENCE_MS:
|
|
105
|
+
# Convert a series of differences into cumulative sum to reconstruct original time series.
|
|
106
|
+
time_array = np.cumsum(np.double(time_array)) / scale_factor
|
|
107
|
+
elif input_unit_type == TimeUnit.ABSOLUTE_MS:
|
|
108
|
+
# Set the start time if not provided.
|
|
109
|
+
if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
|
|
110
|
+
start_time = time_array[0]
|
|
111
|
+
# Convert absolute time stamps into a time series relative to start_time.
|
|
112
|
+
time_array = (time_array - start_time) / scale_factor
|
|
113
|
+
elif input_unit_type == TimeUnit.RELATIVE_MS:
|
|
114
|
+
# Scale the relative time series as per the scale_factor.
|
|
115
|
+
time_array = time_array / scale_factor
|
|
116
|
+
|
|
117
|
+
# Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
|
|
118
|
+
if output_unit_type == TimeUnit.ABSOLUTE_MS:
|
|
119
|
+
# Converts time array to absolute time by adding the start time to each element.
|
|
120
|
+
time_array = time_array + start_time
|
|
121
|
+
elif output_unit_type == TimeUnit.DIFFERENCE_MS:
|
|
122
|
+
# Creates a new array starting with 0, followed by the differences between consecutive elements.
|
|
123
|
+
time_array = np.diff(np.insert(time_array, 0, start_time))
|
|
124
|
+
elif output_unit_type == TimeUnit.RELATIVE_MS:
|
|
125
|
+
# The array is already in relative format, do nothing.
|
|
126
|
+
pass
|
|
127
|
+
return time_array
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def resample_data(
|
|
131
|
+
df: pd.DataFrame,
|
|
132
|
+
time_column : str,
|
|
133
|
+
time_unit_type: str,
|
|
134
|
+
unscaled_column_names: List[str],
|
|
135
|
+
resampling_frequency: int,
|
|
136
|
+
scale_factors: List[float] = [],
|
|
137
|
+
start_time: float = 0.0,
|
|
138
|
+
) -> pd.DataFrame:
|
|
139
|
+
"""
|
|
140
|
+
Resamples the IMU data to the resampling frequency. The data is scaled before resampling.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
df : pd.DataFrame
|
|
145
|
+
The data to resample.
|
|
146
|
+
time_column : str
|
|
147
|
+
The name of the time column.
|
|
148
|
+
time_unit_type : str
|
|
149
|
+
The time unit type of the time array. The method currently works only for `TimeUnit.RELATIVE_MS`.
|
|
150
|
+
unscaled_column_names : List[str]
|
|
151
|
+
The names of the columns to resample.
|
|
152
|
+
resampling_frequency : int
|
|
153
|
+
The frequency to resample the data to.
|
|
154
|
+
scale_factors : list, optional
|
|
155
|
+
The scale factors to apply to the values before resampling (default is []).
|
|
156
|
+
start_time : float, optional
|
|
157
|
+
The start time of the time array, which is required if it is in absolute format (default is 0.0).
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
pd.DataFrame
|
|
162
|
+
The resampled data.
|
|
163
|
+
"""
|
|
164
|
+
# We need a start_time if the time is in absolute time format
|
|
165
|
+
if time_unit_type == TimeUnit.ABSOLUTE_MS and start_time == 0.0:
|
|
166
|
+
raise ValueError("start_time is required for absolute time format")
|
|
167
|
+
|
|
168
|
+
# get time and values
|
|
169
|
+
time_abs_array=np.array(df[time_column])
|
|
170
|
+
values_unscaled=np.array(df[unscaled_column_names])
|
|
171
|
+
|
|
172
|
+
# scale data
|
|
173
|
+
if len(scale_factors) != 0 and scale_factors is not None:
|
|
174
|
+
scaled_values = values_unscaled * scale_factors
|
|
175
|
+
|
|
176
|
+
# resample
|
|
177
|
+
t_resampled = np.arange(start_time, time_abs_array[-1], 1 / resampling_frequency)
|
|
178
|
+
|
|
179
|
+
# create dataframe
|
|
180
|
+
df = pd.DataFrame(t_resampled, columns=[time_column])
|
|
181
|
+
|
|
182
|
+
# interpolate IMU - maybe a separate method?
|
|
183
|
+
for j, sensor_col in enumerate(unscaled_column_names):
|
|
184
|
+
if not np.all(np.diff(time_abs_array) > 0):
|
|
185
|
+
raise ValueError("time_abs_array is not strictly increasing")
|
|
186
|
+
|
|
187
|
+
cs = CubicSpline(time_abs_array, scaled_values.T[j])
|
|
188
|
+
#TODO: isn't sensor_col of type DataColumns?
|
|
189
|
+
df[sensor_col] = cs(df[time_column])
|
|
190
|
+
|
|
191
|
+
return df
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def butterworth_filter(
|
|
195
|
+
single_sensor_col: np.ndarray,
|
|
196
|
+
order: int,
|
|
197
|
+
cutoff_frequency: Union[float, List[float]],
|
|
198
|
+
passband: str,
|
|
199
|
+
sampling_frequency: int,
|
|
200
|
+
):
|
|
201
|
+
"""
|
|
202
|
+
Applies the Butterworth filter to a single sensor column
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
single_sensor_column: pd.Series
|
|
207
|
+
A single column containing sensor data in float format
|
|
208
|
+
order: int
|
|
209
|
+
The exponential order of the filter
|
|
210
|
+
cutoff_frequency: float or List[float]
|
|
211
|
+
The frequency at which the gain drops to 1/sqrt(2) that of the passband. If passband is 'band', then cutoff_frequency should be a list of two floats.
|
|
212
|
+
passband: str
|
|
213
|
+
Type of passband: ['hp', 'lp' or 'band']
|
|
214
|
+
sampling_frequency: int
|
|
215
|
+
The sampling frequency of the sensor data
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
sensor_column_filtered: pd.Series
|
|
220
|
+
The origin sensor column filtered applying a Butterworth filter
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
sos = signal.butter(
|
|
224
|
+
N=order,
|
|
225
|
+
Wn=cutoff_frequency,
|
|
226
|
+
btype=passband,
|
|
227
|
+
analog=False,
|
|
228
|
+
fs=sampling_frequency,
|
|
229
|
+
output="sos",
|
|
230
|
+
)
|
|
231
|
+
return signal.sosfiltfilt(sos, single_sensor_col)
|
|
232
|
+
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
function acc_power_ratio = acc_feature(f1, PSD_acc, f2, PSD_ppg)
|
|
2
|
+
% This function calculates the power ratio of the accelerometer signal in the PPG frequency range.
|
|
3
|
+
% The power ratio is defined as the ratio of the power in the PPG frequency range to the total power.
|
|
4
|
+
[~, max_PPG_psd_idx] = max(PSD_ppg);
|
|
5
|
+
max_PPG_freq_psd = f2(max_PPG_psd_idx);
|
|
6
|
+
|
|
7
|
+
%%---check dominant frequency (df) indices----%%
|
|
8
|
+
[~, corr_acc_psd_df_idx] = min(abs(max_PPG_freq_psd-f1));
|
|
9
|
+
|
|
10
|
+
df_idx = corr_acc_psd_df_idx-1:corr_acc_psd_df_idx+1;
|
|
11
|
+
|
|
12
|
+
%%---check first harmonic (fh) frequency indices----%%
|
|
13
|
+
[~, corr_acc_psd_fh_idx] = min(abs(max_PPG_freq_psd*2-f1));
|
|
14
|
+
fh_idx = corr_acc_psd_fh_idx-1:corr_acc_psd_fh_idx+1;
|
|
15
|
+
|
|
16
|
+
%%---calculate power ratio---%%
|
|
17
|
+
acc_power_PPG_range = trapz(f1(df_idx), PSD_acc(df_idx)) + trapz(f1(fh_idx), PSD_acc(fh_idx));
|
|
18
|
+
acc_power_total = trapz(f1, PSD_acc);
|
|
19
|
+
|
|
20
|
+
acc_power_ratio = acc_power_PPG_range/acc_power_total;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
function [maxtab, mintab]=peakdet(v, delta, x)
|
|
2
|
+
%PEAKDET Detect peaks in a vector
|
|
3
|
+
% [MAXTAB, MINTAB] = PEAKDET(V, DELTA) finds the local
|
|
4
|
+
% maxima and minima ("peaks") in the vector V.
|
|
5
|
+
% MAXTAB and MINTAB consists of two columns. Column 1
|
|
6
|
+
% contains indices in V, and column 2 the found values.
|
|
7
|
+
%
|
|
8
|
+
% With [MAXTAB, MINTAB] = PEAKDET(V, DELTA, X) the indices
|
|
9
|
+
% in MAXTAB and MINTAB are replaced with the corresponding
|
|
10
|
+
% X-values.
|
|
11
|
+
%
|
|
12
|
+
% A point is considered a maximum peak if it has the maximal
|
|
13
|
+
% value, and was preceded (to the left) by a value lower by
|
|
14
|
+
% DELTA.
|
|
15
|
+
|
|
16
|
+
% Eli Billauer, 3.4.05
|
|
17
|
+
% This function is released to the public domain; Any use is allowed.
|
|
18
|
+
|
|
19
|
+
maxtab = [];
|
|
20
|
+
mintab = [];
|
|
21
|
+
|
|
22
|
+
v = v(:); % Just in case this wasn't a proper vector
|
|
23
|
+
|
|
24
|
+
if nargin < 3
|
|
25
|
+
x = (1:length(v))';
|
|
26
|
+
else
|
|
27
|
+
x = x(:);
|
|
28
|
+
if length(v)~= length(x)
|
|
29
|
+
error('Input vectors v and x must have same length');
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
if (length(delta(:)))>1
|
|
34
|
+
error('Input argument DELTA must be a scalar');
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if delta <= 0
|
|
38
|
+
error('Input argument DELTA must be positive');
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
mn = Inf; mx = -Inf;
|
|
42
|
+
mnpos = NaN; mxpos = NaN;
|
|
43
|
+
|
|
44
|
+
lookformax = 1;
|
|
45
|
+
|
|
46
|
+
for i=1:length(v)
|
|
47
|
+
this = v(i);
|
|
48
|
+
if this > mx, mx = this; mxpos = x(i); end
|
|
49
|
+
if this < mn, mn = this; mnpos = x(i); end
|
|
50
|
+
|
|
51
|
+
if lookformax
|
|
52
|
+
if this < mx-delta
|
|
53
|
+
maxtab = [maxtab ; mxpos mx];
|
|
54
|
+
mn = this; mnpos = x(i);
|
|
55
|
+
lookformax = 0;
|
|
56
|
+
end
|
|
57
|
+
else
|
|
58
|
+
if this > mn+delta
|
|
59
|
+
mintab = [mintab ; mnpos mn];
|
|
60
|
+
mx = this; mxpos = x(i);
|
|
61
|
+
lookformax = 1;
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
function [FeaturesPPG] = ppg_features(PPG,fs)
|
|
2
|
+
% extract features from the PPG signal, per 6sec window (PPG input is a 6sec window of PPG signal)
|
|
3
|
+
N_feat = 10;
|
|
4
|
+
FeaturesPPG = zeros(1, N_feat);
|
|
5
|
+
% Time-domain features
|
|
6
|
+
absPPG = abs(PPG);
|
|
7
|
+
FeaturesPPG(1) = var(PPG); % Feature 1: variance
|
|
8
|
+
FeaturesPPG(2) = mean(absPPG); % Feature 2: mean
|
|
9
|
+
FeaturesPPG(3) = median(absPPG); % Feature 3: median
|
|
10
|
+
FeaturesPPG(4) = kurtosis(PPG); % Feature 4: kurtosis
|
|
11
|
+
FeaturesPPG(5) = skewness(PPG); % Feature 5: skewness
|
|
12
|
+
|
|
13
|
+
window = 3*fs; % 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
|
|
14
|
+
overlap = 0.5*window; % 45 samples overlap for Welch's Method
|
|
15
|
+
|
|
16
|
+
[P, f] = pwelch(PPG, window, overlap, [], fs);
|
|
17
|
+
|
|
18
|
+
% Find the dominant frequency
|
|
19
|
+
[~, maxIndex] = max(P);
|
|
20
|
+
FeaturesPPG(6) = f(maxIndex); % Feature 6: dominant frequency
|
|
21
|
+
|
|
22
|
+
ph_idx = find(f >= 0.75 & f <= 3); % find indices of f in relevant physiological heart range 45-180 bpm
|
|
23
|
+
[~, maxIndex_ph] = max(P(ph_idx)); % Index of dominant frequency
|
|
24
|
+
dominantFrequency_ph = f(ph_idx(maxIndex_ph)); % Extract dominant frequency
|
|
25
|
+
f_dom_band = find(f >= dominantFrequency_ph - 0.2 & f <= dominantFrequency_ph + 0.2); %
|
|
26
|
+
FeaturesPPG(7) = trapz(P(f_dom_band))/trapz(P); % Feature 7 = relative power
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
% Normalize the power spectrum
|
|
30
|
+
pxx_norm = P / sum(P);
|
|
31
|
+
|
|
32
|
+
% Compute spectral entropy
|
|
33
|
+
FeaturesPPG(8) = -sum(pxx_norm .* log2(pxx_norm))/log2(length(PPG)); % Feature 8 = spectral entropy --> normalize between 0 and 1! Or should we perform this operation at the min-max normalization! No because the values can come from different lengths!
|
|
34
|
+
|
|
35
|
+
% Signal to noise ratio
|
|
36
|
+
Signal = var(PPG);
|
|
37
|
+
Noise = var(absPPG);
|
|
38
|
+
FeaturesPPG(9) = Signal/Noise; % Feature 9 = surrogate of signal to noise ratio
|
|
39
|
+
|
|
40
|
+
%% Autocorrelation features
|
|
41
|
+
|
|
42
|
+
[acf, ~] = autocorr(PPG, 'NumLags', fs*3); % Compute the autocorrelation of the PPG signal with a maximum lag of 3 seconds (or 3 time the sampling rate)
|
|
43
|
+
[peakValues, ~] = peakdet(acf, 0.01);
|
|
44
|
+
sortedValues = sort(peakValues(:,2), 'descend'); % sort the peaks found in the corellogram
|
|
45
|
+
if length(sortedValues) > 1
|
|
46
|
+
FeaturesPPG(10) = sortedValues(2); % determine the second peak as the highest peak after the peak at lag=0, the idea is to determine the periodicity of the signal
|
|
47
|
+
else
|
|
48
|
+
FeaturesPPG(10) = 0; % Set at 0 if there is no clear second peak
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|