paradigma 0.1.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. paradigma/__init__.py +1 -3
  2. paradigma/constants.py +65 -0
  3. paradigma/feature_extraction.py +703 -0
  4. paradigma/gait_analysis.py +415 -0
  5. paradigma/gait_analysis_config.py +266 -0
  6. paradigma/heart_rate_analysis.py +127 -0
  7. paradigma/heart_rate_analysis_config.py +9 -0
  8. paradigma/heart_rate_util.py +173 -0
  9. paradigma/imu_preprocessing.py +232 -0
  10. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  11. paradigma/ppg/classifier/LR_model.mat +0 -0
  12. paradigma/ppg/feat_extraction/acc_feature.m +20 -0
  13. paradigma/ppg/feat_extraction/peakdet.m +64 -0
  14. paradigma/ppg/feat_extraction/ppg_features.m +53 -0
  15. paradigma/ppg/glob_functions/extract_hr_segments.m +37 -0
  16. paradigma/ppg/glob_functions/extract_overlapping_segments.m +23 -0
  17. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +41 -0
  18. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +74 -0
  19. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +25 -0
  20. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +699 -0
  21. paradigma/ppg/glob_functions/jsonlab/README.txt +394 -0
  22. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +368 -0
  23. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +180 -0
  24. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +180 -0
  25. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +23 -0
  26. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +22 -0
  27. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +11 -0
  28. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +34 -0
  29. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +662 -0
  30. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +27 -0
  31. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +144 -0
  32. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +21 -0
  33. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +180 -0
  34. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +180 -0
  35. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +23 -0
  36. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +22 -0
  37. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +11 -0
  38. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +34 -0
  39. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +662 -0
  40. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +27 -0
  41. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +144 -0
  42. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +21 -0
  43. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +32 -0
  44. paradigma/ppg/glob_functions/jsonlab/loadjson.m +566 -0
  45. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +528 -0
  46. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +33 -0
  47. paradigma/ppg/glob_functions/jsonlab/savejson.m +475 -0
  48. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +504 -0
  49. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +40 -0
  50. paradigma/ppg/glob_functions/sample_prob_final.m +49 -0
  51. paradigma/ppg/glob_functions/synchronization.m +76 -0
  52. paradigma/ppg/glob_functions/tsdf_scan_meta.m +22 -0
  53. paradigma/ppg/hr_functions/Long_TFD_JOT.m +37 -0
  54. paradigma/ppg/hr_functions/PPG_TFD_HR.m +59 -0
  55. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +4 -0
  56. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +23 -0
  57. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +27 -0
  58. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +251 -0
  59. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  60. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +142 -0
  61. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +314 -0
  62. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +123 -0
  63. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +154 -0
  64. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +194 -0
  65. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +200 -0
  66. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +229 -0
  67. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +241 -0
  68. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +157 -0
  69. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +190 -0
  70. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +196 -0
  71. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +199 -0
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +144 -0
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +13 -0
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +79 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +9 -0
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +26 -0
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +25 -0
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +40 -0
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +34 -0
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +29 -0
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +76 -0
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +176 -0
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +11 -0
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +97 -0
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +149 -0
  88. paradigma/ppg/preprocessing/preprocessing_imu.m +15 -0
  89. paradigma/ppg/preprocessing/preprocessing_ppg.m +13 -0
  90. paradigma/ppg_preprocessing.py +313 -0
  91. paradigma/preprocessing_config.py +69 -0
  92. paradigma/quantification.py +58 -0
  93. paradigma/tremor/TremorFeaturesAndClassification.m +345 -0
  94. paradigma/tremor/feat_extraction/DerivativesExtract.m +22 -0
  95. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +72 -0
  96. paradigma/tremor/feat_extraction/MFCCExtract.m +100 -0
  97. paradigma/tremor/feat_extraction/PSDBandPower.m +52 -0
  98. paradigma/tremor/feat_extraction/PSDEst.m +63 -0
  99. paradigma/tremor/feat_extraction/PSDExtrAxis.m +88 -0
  100. paradigma/tremor/feat_extraction/PSDExtrOpt.m +95 -0
  101. paradigma/tremor/preprocessing/InterpData.m +32 -0
  102. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +295 -0
  103. paradigma/util.py +50 -0
  104. paradigma/windowing.py +219 -0
  105. paradigma-0.3.0.dist-info/LICENSE +192 -0
  106. paradigma-0.3.0.dist-info/METADATA +79 -0
  107. paradigma-0.3.0.dist-info/RECORD +108 -0
  108. paradigma/dummy.py +0 -3
  109. paradigma-0.1.5.dist-info/LICENSE +0 -201
  110. paradigma-0.1.5.dist-info/METADATA +0 -18
  111. paradigma-0.1.5.dist-info/RECORD +0 -6
  112. {paradigma-0.1.5.dist-info → paradigma-0.3.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,127 @@
1
+ from typing import List
2
+ import numpy as np
3
+ from scipy.signal import welch
4
+ from sklearn.preprocessing import StandardScaler
5
+ from dateutil import parser
6
+
7
+ import tsdf
8
+ import tsdf.constants
9
+ from paradigma.heart_rate_analysis_config import HeartRateFeatureExtractionConfig
10
+ from paradigma.heart_rate_util import extract_ppg_features, calculate_power_ratio, read_PPG_quality_classifier
11
+ from paradigma.util import read_metadata, write_data, get_end_iso8601
12
+ from paradigma.constants import DataColumns, UNIX_TICKS_MS
13
+
14
+
15
+ def extract_signal_quality_features(input_path: str, classifier_path: str, output_path: str, config: HeartRateFeatureExtractionConfig) -> None:
16
+ # load data
17
+ metadata_time_ppg, metadata_samples_ppg = read_metadata(input_path, "PPG_meta.json", "PPG_time.bin", "PPG_samples.bin")
18
+ df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_samples_ppg], tsdf.constants.ConcatenationType.columns)
19
+ arr_ppg = df_ppg[DataColumns.PPG].to_numpy()
20
+ relative_time_ppg = df_ppg[DataColumns.TIME].to_numpy()
21
+
22
+ metadata_time_acc, metadata_samples_acc = read_metadata(input_path, "accelerometer_meta.json", "accelerometer_time.bin", "accelerometer_samples.bin")
23
+ df_acc = tsdf.load_dataframe_from_binaries([metadata_time_acc, metadata_samples_acc], tsdf.constants.ConcatenationType.columns)
24
+ arr_acc = df_acc[[DataColumns.ACCELEROMETER_X, DataColumns.ACCELEROMETER_Y, DataColumns.ACCELEROMETER_Z]].to_numpy()
25
+
26
+ sampling_frequency_ppg = config.sampling_frequency_ppg
27
+ sampling_frequency_imu = config.sampling_frequency_imu
28
+
29
+ # Parameters
30
+ epoch_length = 6 # in seconds
31
+ overlap = 5 # in seconds
32
+
33
+ # Number of samples in epoch
34
+ samples_per_epoch_ppg = int(epoch_length * sampling_frequency_ppg)
35
+ samples_per_epoch_acc = int(epoch_length * sampling_frequency_imu)
36
+
37
+ # Calculate number of samples to shift for each epoch
38
+ samples_shift_ppg = int((epoch_length - overlap) * sampling_frequency_ppg)
39
+ samples_shift_acc = int((epoch_length - overlap) * sampling_frequency_imu)
40
+
41
+ pwelchwin_acc = int(3 * sampling_frequency_imu) # window length for pwelch
42
+ pwelchwin_ppg = int(3 * sampling_frequency_ppg) # window length for pwelch
43
+ noverlap_acc = int(0.5 * pwelchwin_acc) # overlap for pwelch
44
+ noverlap_ppg = int(0.5 * pwelchwin_ppg) # overlap for pwelch
45
+
46
+ f_bin_res = 0.05 # the threshold is set based on this binning
47
+ nfft_ppg = np.arange(0, sampling_frequency_ppg / 2, f_bin_res) # frequency bins for pwelch ppg
48
+ nfft_acc = np.arange(0, sampling_frequency_imu / 2, f_bin_res) # frequency bins for pwelch imu
49
+
50
+ features_ppg_scaled = []
51
+ feature_acc = []
52
+ t_unix_feat_total = []
53
+ count = 0
54
+ acc_idx = 0
55
+
56
+ # Read the classifier (it contains mu and sigma)
57
+ clf = read_PPG_quality_classifier(classifier_path)
58
+ # not used here: lr_model = clf['model']
59
+ arr_mu = clf['mu'][:, 0]
60
+ arr_sigma = clf['sigma'][:, 0]
61
+
62
+ scaler = StandardScaler()
63
+ scaler.mean_ = arr_mu
64
+ scaler.scale_ = arr_sigma
65
+
66
+ ppg_start_time = parser.parse(metadata_time_ppg.start_iso8601)
67
+
68
+ # Loop over 6s segments for both PPG and IMU and calculate features
69
+ for i in range(0, len(arr_ppg) - samples_per_epoch_ppg + 1, samples_shift_ppg):
70
+ if acc_idx + samples_per_epoch_acc > len(arr_acc): # For the last epoch, check if the segment for IMU is too short (not 6 seconds)
71
+ break
72
+ else:
73
+ acc_segment = arr_acc[acc_idx:acc_idx + samples_per_epoch_acc, :] # Extract the IMU window (6 seconds)
74
+
75
+ ppg_segment = arr_ppg[i:i + samples_per_epoch_ppg] # Extract the PPG window (6 seconds)
76
+
77
+ count += 1
78
+
79
+ # Feature extraction + scaling
80
+ features = extract_ppg_features(ppg_segment, sampling_frequency_ppg)
81
+ features = features.reshape(1, -1)
82
+ features_ppg_scaled.append(scaler.transform(features)[0])
83
+
84
+ # Calculating PSD (power spectral density) of IMU and PPG
85
+ #hann(pwelchwin_acc)
86
+ #hann(pwelchwin_ppg)
87
+ print(pwelchwin_acc, noverlap_acc, len(nfft_acc))
88
+ f1, pxx1 = welch(acc_segment, sampling_frequency_imu, window='hann', nperseg=pwelchwin_acc, noverlap=None, nfft=len(nfft_acc))
89
+ PSD_imu = np.sum(pxx1, axis=1) # sum over the three axes
90
+ f2, pxx2 = welch(ppg_segment, sampling_frequency_ppg, window='hann', nperseg=pwelchwin_ppg, noverlap=None, nfft=len(nfft_ppg))
91
+ PSD_ppg = np.sum(pxx2) # this does nothing, equal to PSD_ppg = pxx2
92
+
93
+ # IMU feature extraction
94
+ print(f1.shape, f2.shape)
95
+ feature_acc.append(calculate_power_ratio(f1, PSD_imu, f2, PSD_ppg)) # Calculate the power ratio of the accelerometer signal in the PPG frequency range
96
+
97
+ # time channel
98
+ t_unix_feat_total.append((relative_time_ppg[i] + ppg_start_time) * UNIX_TICKS_MS) # Save in absolute unix time ms
99
+ acc_idx += samples_shift_acc # update IMU_idx
100
+
101
+ # Convert lists to numpy arrays
102
+ print(features_ppg_scaled[0:4])
103
+ features_ppg_scaled = np.array(features_ppg_scaled)
104
+ feature_acc = np.array(feature_acc)
105
+ t_unix_feat_total = np.array(t_unix_feat_total)
106
+
107
+ # Synchronization information
108
+ # TODO: store this, as this is needed for the HR pipeline
109
+ # v_sync_ppg_total = np.array([
110
+ # ppg_indices[0], # start index
111
+ # ppg_indices[1], # end index
112
+ # segment_ppg[0], # Segment index
113
+ # count # Number of epochs in the segment
114
+ # ])
115
+
116
+ metadata_features_ppg = metadata_samples_ppg.copy()
117
+ metadata_features_ppg.channels = ["variance", "mean", "median", "kurtosis", "skewness", "dominant_frequency", "relative_power", "spectral_entropy", "signal_noise_ratio", "second_highest_peak"]
118
+ metadata_features_acc = metadata_samples_acc.copy()
119
+ metadata_features_acc.channels = ["power_ratio"]
120
+ metadata_time_ppg = metadata_time_ppg.copy()
121
+ metadata_time_ppg.channels = ["time"]
122
+ metadata_time_acc = metadata_time_acc.copy()
123
+ metadata_time_acc.channels = ["time"]
124
+
125
+ write_data(metadata_time_ppg, metadata_features_ppg, output_path, 'features_ppg_meta.json', features_ppg_scaled)
126
+ write_data(metadata_time_acc, metadata_features_acc, output_path, 'feature_acc_meta.json', feature_acc)
127
+
@@ -0,0 +1,9 @@
1
+
2
+
3
+
4
+ class HeartRateFeatureExtractionConfig:
5
+
6
+ def __init__(self) -> None:
7
+
8
+ self.sampling_frequency_imu = 100
9
+ self.sampling_frequency_ppg = 30
@@ -0,0 +1,173 @@
1
+ from typing import List, Tuple, Union
2
+ import pickle
3
+ import pandas as pd
4
+ import numpy as np
5
+ from scipy.signal import welch, find_peaks
6
+ from scipy.stats import kurtosis, skew
7
+
8
+
9
+ def extract_ppg_features(arr_ppg: np.ndarray, sampling_frequency: int) -> np.ndarray:
10
+ # Number of features
11
+ feature_count = 10
12
+
13
+ # Initialize features array
14
+ features_ppg = np.zeros(feature_count)
15
+
16
+ # Time-domain features
17
+ absPPG = np.abs(arr_ppg)
18
+ features_ppg[0] = np.var(arr_ppg) # Feature 1: variance
19
+ features_ppg[1] = np.mean(absPPG) # Feature 2: mean
20
+ features_ppg[2] = np.median(absPPG) # Feature 3: median
21
+ features_ppg[3] = kurtosis(arr_ppg) # Feature 4: kurtosis
22
+ features_ppg[4] = skew(arr_ppg) # Feature 5: skewness
23
+
24
+ window = 3 * sampling_frequency # 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
25
+ overlap = int(0.5 * window) # 45 samples overlap for Welch's Method
26
+
27
+ f, P = welch(arr_ppg, sampling_frequency, nperseg=window, noverlap=overlap)
28
+
29
+ # Find the dominant frequency
30
+ maxIndex = np.argmax(P)
31
+ features_ppg[5] = f[maxIndex] # Feature 6: dominant frequency
32
+
33
+ # Find indices of f in relevant physiological heart range 45-180 bpm (0.75 - 3 Hz)
34
+ ph_idx = np.where((f >= 0.75) & (f <= 3))[0]
35
+ maxIndex_ph = np.argmax(P[ph_idx])
36
+ dominantFrequency_ph = f[ph_idx[maxIndex_ph]]
37
+ f_dom_band = np.where((f >= dominantFrequency_ph - 0.2) & (f <= dominantFrequency_ph + 0.2))[0]
38
+ features_ppg[6] = np.trapz(P[f_dom_band]) / np.trapz(P) # Feature 7: relative power
39
+
40
+ # Normalize the power spectrum
41
+ pxx_norm = P / np.sum(P)
42
+
43
+ # Compute spectral entropy
44
+ features_ppg[7] = -np.sum(pxx_norm * np.log2(pxx_norm)) / np.log2(len(arr_ppg)) # Feature 8: spectral entropy
45
+
46
+ # Signal to noise ratio
47
+ arr_signal = np.var(arr_ppg)
48
+ arr_noise = np.var(absPPG)
49
+ features_ppg[8] = arr_signal / arr_noise # Feature 9: surrogate of signal to noise ratio
50
+
51
+ # Autocorrelation features
52
+ ppg_series = pd.Series(arr_ppg)
53
+ autocorrelations = [ppg_series.autocorr(lag=i) for i in range(sampling_frequency*3)]
54
+
55
+ # Finding peaks in autocorrelation
56
+ peaks, _ = peakdet(np.array(autocorrelations), delta=0.01)
57
+ sorted_peaks = np.sort(peaks)
58
+ #TODO: double check if this is correct
59
+ print(sorted_peaks)
60
+
61
+ if len(sorted_peaks) > 1:
62
+ features_ppg[9] = sorted_peaks[1] # Feature 10: the second highest peak
63
+ else:
64
+ features_ppg[9] = 0 # Set to 0 if there is no clear second peak
65
+
66
+ return features_ppg
67
+
68
+ # Example usage:
69
+ # PPG = np.random.randn(300) # Example PPG signal, replace with actual data
70
+ # fs = 50 # Example sampling frequency, replace with actual sampling rate
71
+ # features_df = ppg_features(PPG, fs)
72
+ # print(features_df)
73
+
74
+
75
+ def peakdet(v: np.ndarray, delta, x: Union[np.ndarray, None]=None) -> Tuple[List[Tuple[int, float]], List[Tuple[int, float]]]:
76
+ """
77
+ Detect peaks in a vector.
78
+
79
+ Args:
80
+ v (numpy array): Input vector.
81
+ delta (float): Minimum difference between a peak and its surrounding values.
82
+ x (numpy array, optional): Indices corresponding to the values in v. If not provided, indices are generated.
83
+
84
+ Returns:
85
+ maxtab (list of tuples): Local maxima as (index, value) pairs.
86
+ mintab (list of tuples): Local minima as (index, value) pairs.
87
+ """
88
+
89
+ if x is None:
90
+ x = np.arange(len(v))
91
+ else:
92
+ if len(v) != len(x):
93
+ raise ValueError("Input vectors v and x must have the same length")
94
+
95
+ # Detect maxima
96
+ max_indices, _ = find_peaks(v, height=delta)
97
+ maxtab = [(x[idx], v[idx]) for idx in max_indices]
98
+
99
+ # Detect minima by inverting the signal
100
+ min_indices, _ = find_peaks(-v, height=delta)
101
+ mintab = [(x[idx], v[idx]) for idx in min_indices]
102
+
103
+ return maxtab, mintab
104
+
105
+ # Example usage:
106
+ # v = [0, 1, 2, 1, 0, 1, 2, 3, 2, 1, 0]
107
+ # delta = 1
108
+ # maxtab, mintab = peakdet(v, delta)
109
+ # print("Maxima:", maxtab)
110
+ # print("Minima:", mintab)
111
+
112
+
113
+ def calculate_power_ratio(f1: np.ndarray, PSD_acc: np.ndarray, f2: np.ndarray, PSD_ppg: np.ndarray) -> float:
114
+ """
115
+ Calculates the power ratio of the accelerometer signal in the PPG frequency range.
116
+
117
+ Args:
118
+ f1 (numpy.ndarray): Frequency bins for the accelerometer signal.
119
+ PSD_acc (numpy.ndarray): Power Spectral Density of the accelerometer signal.
120
+ f2 (numpy.ndarray): Frequency bins for the PPG signal.
121
+ PSD_ppg (numpy.ndarray): Power Spectral Density of the PPG signal.
122
+
123
+ Returns:
124
+ float: The power ratio of the accelerometer signal in the PPG frequency range.
125
+ """
126
+
127
+ # Find the index of the maximum PSD value in the PPG signal
128
+ max_PPG_psd_idx = np.argmax(PSD_ppg)
129
+ max_PPG_freq_psd = f2[max_PPG_psd_idx]
130
+
131
+ # Find the index of the closest frequency in the accelerometer signal to the dominant PPG frequency
132
+ corr_acc_psd_df_idx = np.argmin(np.abs(max_PPG_freq_psd - f1))
133
+
134
+ df_idx = np.arange(corr_acc_psd_df_idx-1, corr_acc_psd_df_idx+2)
135
+
136
+ # Find the index of the closest frequency in the accelerometer signal to the first harmonic of the PPG frequency
137
+ corr_acc_psd_fh_idx = np.argmin(np.abs(max_PPG_freq_psd*2 - f1))
138
+ fh_idx = np.arange(corr_acc_psd_fh_idx-1, corr_acc_psd_fh_idx+2)
139
+
140
+ # Calculate the power ratio
141
+ acc_power_PPG_range = np.trapz(PSD_acc[df_idx], f1[df_idx]) + np.trapz(PSD_acc[fh_idx], f1[fh_idx])
142
+ acc_power_total = np.trapz(PSD_acc, f1)
143
+
144
+ acc_power_ratio = acc_power_PPG_range / acc_power_total
145
+
146
+ return acc_power_ratio
147
+
148
+ # Example usage:
149
+ # f1 = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
150
+ # PSD_acc = np.array([1, 2, 3, 2, 1])
151
+ # f2 = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
152
+ # PSD_ppg = np.array([1, 3, 2, 1, 0.5])
153
+ # result = acc_feature(f1, PSD_acc, f2, PSD_ppg)
154
+ # print(result)
155
+
156
+ def read_PPG_quality_classifier(classifier_path: str):
157
+ """
158
+ Read the PPG quality classifier from a file.
159
+
160
+ Parameters
161
+ ----------
162
+ classifier_path : str
163
+ The path to the classifier file.
164
+
165
+ Returns
166
+ -------
167
+ dict
168
+ The classifier dictionary.
169
+ """
170
+ with open(classifier_path, 'rb') as f:
171
+ clf = pickle.load(f)
172
+ return clf
173
+
@@ -0,0 +1,232 @@
1
+ from pathlib import Path
2
+ from typing import List, Union
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy import signal
6
+ from scipy.interpolate import CubicSpline
7
+
8
+ import tsdf
9
+ from paradigma.constants import DataColumns, TimeUnit
10
+ from paradigma.util import write_data, read_metadata
11
+ from paradigma.preprocessing_config import IMUPreprocessingConfig
12
+
13
+
14
+ def preprocess_imu_data(input_path: Union[str, Path], output_path: Union[str, Path], config: IMUPreprocessingConfig) -> None:
15
+
16
+ # Load data
17
+ metadata_time, metadata_samples = read_metadata(str(input_path), str(config.meta_filename),
18
+ str(config.time_filename), str(config.values_filename))
19
+ df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)
20
+
21
+ # Rename columns
22
+ df = df.rename(columns={f'rotation_{a}': f'gyroscope_{a}' for a in ['x', 'y', 'z']})
23
+ df = df.rename(columns={f'acceleration_{a}': f'accelerometer_{a}' for a in ['x', 'y', 'z']})
24
+
25
+ # convert to relative seconds from delta milliseconds
26
+ df[config.time_colname] = transform_time_array(
27
+ time_array=df[config.time_colname],
28
+ scale_factor=1000,
29
+ input_unit_type = TimeUnit.DIFFERENCE_MS,
30
+ output_unit_type = TimeUnit.RELATIVE_MS)
31
+
32
+
33
+ df = resample_data(
34
+ df=df,
35
+ time_column=config.time_colname,
36
+ time_unit_type=TimeUnit.RELATIVE_MS,
37
+ unscaled_column_names = list(config.d_channels_imu.keys()),
38
+ scale_factors=metadata_samples.scale_factors,
39
+ resampling_frequency=config.sampling_frequency)
40
+
41
+ if config.side_watch == 'left':
42
+ df[DataColumns.ACCELEROMETER_X] *= -1
43
+
44
+ for col in config.d_channels_accelerometer.keys():
45
+
46
+ # change to correct units [g]
47
+ if config.acceleration_units == 'm/s^2':
48
+ df[col] /= 9.81
49
+
50
+ for result, side_pass in zip(['filt', 'grav'], ['hp', 'lp']):
51
+ df[f'{result}_{col}'] = butterworth_filter(
52
+ single_sensor_col=np.array(df[col]),
53
+ order=config.filter_order,
54
+ cutoff_frequency=config.lower_cutoff_frequency,
55
+ passband=side_pass,
56
+ sampling_frequency=config.sampling_frequency,
57
+ )
58
+
59
+ df = df.drop(columns=[col])
60
+ df = df.rename(columns={f'filt_{col}': col})
61
+
62
+ # Store data
63
+ for sensor, units in zip(['accelerometer', 'gyroscope'], ['g', config.rotation_units]):
64
+ df_sensor = df[[config.time_colname] + [x for x in df.columns if sensor in x]]
65
+
66
+ metadata_samples.channels = [x for x in df.columns if sensor in x]
67
+ metadata_samples.units = list(np.repeat(units, len(metadata_samples.channels)))
68
+ metadata_samples.file_name = f'{sensor}_samples.bin'
69
+
70
+ metadata_time.file_name = f'{sensor}_time.bin'
71
+ metadata_time.units = ['time_relative_ms']
72
+
73
+ write_data(metadata_time, metadata_samples, output_path, f'{sensor}_meta.json', df_sensor)
74
+
75
+ def transform_time_array(
76
+ time_array: pd.Series,
77
+ scale_factor: float,
78
+ input_unit_type: str,
79
+ output_unit_type: str,
80
+ start_time: float = 0.0,
81
+ ) -> np.ndarray:
82
+ """
83
+ Transforms the time array to relative time (when defined in delta time) and scales the values.
84
+
85
+ Parameters
86
+ ----------
87
+ time_array : pd.Series
88
+ The time array in milliseconds to transform.
89
+ scale_factor : float
90
+ The scale factor to apply to the time array.
91
+ input_unit_type : str
92
+ The time unit type of the input time array. Raw PPP data was in `TimeUnit.DIFFERENCE_MS`.
93
+ output_unit_type : str
94
+ The time unit type of the output time array. The processing is often done in `TimeUnit.RELATIVE_MS`.
95
+ start_time : float, optional
96
+ The start time of the time array in UNIX milliseconds (default is 0.0)
97
+
98
+ Returns
99
+ -------
100
+ time_array
101
+ The transformed time array in milliseconds, with the specified time unit type.
102
+ """
103
+ # Scale time array and transform to relative time (`TimeUnit.RELATIVE_MS`)
104
+ if input_unit_type == TimeUnit.DIFFERENCE_MS:
105
+ # Convert a series of differences into cumulative sum to reconstruct original time series.
106
+ time_array = np.cumsum(np.double(time_array)) / scale_factor
107
+ elif input_unit_type == TimeUnit.ABSOLUTE_MS:
108
+ # Set the start time if not provided.
109
+ if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
110
+ start_time = time_array[0]
111
+ # Convert absolute time stamps into a time series relative to start_time.
112
+ time_array = (time_array - start_time) / scale_factor
113
+ elif input_unit_type == TimeUnit.RELATIVE_MS:
114
+ # Scale the relative time series as per the scale_factor.
115
+ time_array = time_array / scale_factor
116
+
117
+ # Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
118
+ if output_unit_type == TimeUnit.ABSOLUTE_MS:
119
+ # Converts time array to absolute time by adding the start time to each element.
120
+ time_array = time_array + start_time
121
+ elif output_unit_type == TimeUnit.DIFFERENCE_MS:
122
+ # Creates a new array starting with 0, followed by the differences between consecutive elements.
123
+ time_array = np.diff(np.insert(time_array, 0, start_time))
124
+ elif output_unit_type == TimeUnit.RELATIVE_MS:
125
+ # The array is already in relative format, do nothing.
126
+ pass
127
+ return time_array
128
+
129
+
130
+ def resample_data(
131
+ df: pd.DataFrame,
132
+ time_column : str,
133
+ time_unit_type: str,
134
+ unscaled_column_names: List[str],
135
+ resampling_frequency: int,
136
+ scale_factors: List[float] = [],
137
+ start_time: float = 0.0,
138
+ ) -> pd.DataFrame:
139
+ """
140
+ Resamples the IMU data to the resampling frequency. The data is scaled before resampling.
141
+
142
+ Parameters
143
+ ----------
144
+ df : pd.DataFrame
145
+ The data to resample.
146
+ time_column : str
147
+ The name of the time column.
148
+ time_unit_type : str
149
+ The time unit type of the time array. The method currently works only for `TimeUnit.RELATIVE_MS`.
150
+ unscaled_column_names : List[str]
151
+ The names of the columns to resample.
152
+ resampling_frequency : int
153
+ The frequency to resample the data to.
154
+ scale_factors : list, optional
155
+ The scale factors to apply to the values before resampling (default is []).
156
+ start_time : float, optional
157
+ The start time of the time array, which is required if it is in absolute format (default is 0.0).
158
+
159
+ Returns
160
+ -------
161
+ pd.DataFrame
162
+ The resampled data.
163
+ """
164
+ # We need a start_time if the time is in absolute time format
165
+ if time_unit_type == TimeUnit.ABSOLUTE_MS and start_time == 0.0:
166
+ raise ValueError("start_time is required for absolute time format")
167
+
168
+ # get time and values
169
+ time_abs_array=np.array(df[time_column])
170
+ values_unscaled=np.array(df[unscaled_column_names])
171
+
172
+ # scale data
173
+ if len(scale_factors) != 0 and scale_factors is not None:
174
+ scaled_values = values_unscaled * scale_factors
175
+
176
+ # resample
177
+ t_resampled = np.arange(start_time, time_abs_array[-1], 1 / resampling_frequency)
178
+
179
+ # create dataframe
180
+ df = pd.DataFrame(t_resampled, columns=[time_column])
181
+
182
+ # interpolate IMU - maybe a separate method?
183
+ for j, sensor_col in enumerate(unscaled_column_names):
184
+ if not np.all(np.diff(time_abs_array) > 0):
185
+ raise ValueError("time_abs_array is not strictly increasing")
186
+
187
+ cs = CubicSpline(time_abs_array, scaled_values.T[j])
188
+ #TODO: isn't sensor_col of type DataColumns?
189
+ df[sensor_col] = cs(df[time_column])
190
+
191
+ return df
192
+
193
+
194
+ def butterworth_filter(
195
+ single_sensor_col: np.ndarray,
196
+ order: int,
197
+ cutoff_frequency: Union[float, List[float]],
198
+ passband: str,
199
+ sampling_frequency: int,
200
+ ):
201
+ """
202
+ Applies the Butterworth filter to a single sensor column
203
+
204
+ Parameters
205
+ ----------
206
+ single_sensor_column: pd.Series
207
+ A single column containing sensor data in float format
208
+ order: int
209
+ The exponential order of the filter
210
+ cutoff_frequency: float or List[float]
211
+ The frequency at which the gain drops to 1/sqrt(2) that of the passband. If passband is 'band', then cutoff_frequency should be a list of two floats.
212
+ passband: str
213
+ Type of passband: ['hp', 'lp' or 'band']
214
+ sampling_frequency: int
215
+ The sampling frequency of the sensor data
216
+
217
+ Returns
218
+ -------
219
+ sensor_column_filtered: pd.Series
220
+ The origin sensor column filtered applying a Butterworth filter
221
+ """
222
+
223
+ sos = signal.butter(
224
+ N=order,
225
+ Wn=cutoff_frequency,
226
+ btype=passband,
227
+ analog=False,
228
+ fs=sampling_frequency,
229
+ output="sos",
230
+ )
231
+ return signal.sosfiltfilt(sos, single_sensor_col)
232
+
Binary file
@@ -0,0 +1,20 @@
1
+ function acc_power_ratio = acc_feature(f1, PSD_acc, f2, PSD_ppg)
2
+ % This function calculates the power ratio of the accelerometer signal in the PPG frequency range.
3
+ % The power ratio is defined as the ratio of the power in the PPG frequency range to the total power.
4
+ [~, max_PPG_psd_idx] = max(PSD_ppg);
5
+ max_PPG_freq_psd = f2(max_PPG_psd_idx);
6
+
7
+ %%---check dominant frequency (df) indices----%%
8
+ [~, corr_acc_psd_df_idx] = min(abs(max_PPG_freq_psd-f1));
9
+
10
+ df_idx = corr_acc_psd_df_idx-1:corr_acc_psd_df_idx+1;
11
+
12
+ %%---check first harmonic (fh) frequency indices----%%
13
+ [~, corr_acc_psd_fh_idx] = min(abs(max_PPG_freq_psd*2-f1));
14
+ fh_idx = corr_acc_psd_fh_idx-1:corr_acc_psd_fh_idx+1;
15
+
16
+ %%---calculate power ratio---%%
17
+ acc_power_PPG_range = trapz(f1(df_idx), PSD_acc(df_idx)) + trapz(f1(fh_idx), PSD_acc(fh_idx));
18
+ acc_power_total = trapz(f1, PSD_acc);
19
+
20
+ acc_power_ratio = acc_power_PPG_range/acc_power_total;
@@ -0,0 +1,64 @@
1
+ function [maxtab, mintab]=peakdet(v, delta, x)
2
+ %PEAKDET Detect peaks in a vector
3
+ % [MAXTAB, MINTAB] = PEAKDET(V, DELTA) finds the local
4
+ % maxima and minima ("peaks") in the vector V.
5
+ % MAXTAB and MINTAB consists of two columns. Column 1
6
+ % contains indices in V, and column 2 the found values.
7
+ %
8
+ % With [MAXTAB, MINTAB] = PEAKDET(V, DELTA, X) the indices
9
+ % in MAXTAB and MINTAB are replaced with the corresponding
10
+ % X-values.
11
+ %
12
+ % A point is considered a maximum peak if it has the maximal
13
+ % value, and was preceded (to the left) by a value lower by
14
+ % DELTA.
15
+
16
+ % Eli Billauer, 3.4.05
17
+ % This function is released to the public domain; Any use is allowed.
18
+
19
+ maxtab = [];
20
+ mintab = [];
21
+
22
+ v = v(:); % Just in case this wasn't a proper vector
23
+
24
+ if nargin < 3
25
+ x = (1:length(v))';
26
+ else
27
+ x = x(:);
28
+ if length(v)~= length(x)
29
+ error('Input vectors v and x must have same length');
30
+ end
31
+ end
32
+
33
+ if (length(delta(:)))>1
34
+ error('Input argument DELTA must be a scalar');
35
+ end
36
+
37
+ if delta <= 0
38
+ error('Input argument DELTA must be positive');
39
+ end
40
+
41
+ mn = Inf; mx = -Inf;
42
+ mnpos = NaN; mxpos = NaN;
43
+
44
+ lookformax = 1;
45
+
46
+ for i=1:length(v)
47
+ this = v(i);
48
+ if this > mx, mx = this; mxpos = x(i); end
49
+ if this < mn, mn = this; mnpos = x(i); end
50
+
51
+ if lookformax
52
+ if this < mx-delta
53
+ maxtab = [maxtab ; mxpos mx];
54
+ mn = this; mnpos = x(i);
55
+ lookformax = 0;
56
+ end
57
+ else
58
+ if this > mn+delta
59
+ mintab = [mintab ; mnpos mn];
60
+ mx = this; mxpos = x(i);
61
+ lookformax = 1;
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,53 @@
1
+ function [FeaturesPPG] = ppg_features(PPG,fs)
2
+ % extract features from the PPG signal, per 6sec window (PPG input is a 6sec window of PPG signal)
3
+ N_feat = 10;
4
+ FeaturesPPG = zeros(1, N_feat);
5
+ % Time-domain features
6
+ absPPG = abs(PPG);
7
+ FeaturesPPG(1) = var(PPG); % Feature 1: variance
8
+ FeaturesPPG(2) = mean(absPPG); % Feature 2: mean
9
+ FeaturesPPG(3) = median(absPPG); % Feature 3: median
10
+ FeaturesPPG(4) = kurtosis(PPG); % Feature 4: kurtosis
11
+ FeaturesPPG(5) = skewness(PPG); % Feature 5: skewness
12
+
13
+ window = 3*fs; % 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
14
+ overlap = 0.5*window; % 45 samples overlap for Welch's Method
15
+
16
+ [P, f] = pwelch(PPG, window, overlap, [], fs);
17
+
18
+ % Find the dominant frequency
19
+ [~, maxIndex] = max(P);
20
+ FeaturesPPG(6) = f(maxIndex); % Feature 6: dominant frequency
21
+
22
+ ph_idx = find(f >= 0.75 & f <= 3); % find indices of f in relevant physiological heart range 45-180 bpm
23
+ [~, maxIndex_ph] = max(P(ph_idx)); % Index of dominant frequency
24
+ dominantFrequency_ph = f(ph_idx(maxIndex_ph)); % Extract dominant frequency
25
+ f_dom_band = find(f >= dominantFrequency_ph - 0.2 & f <= dominantFrequency_ph + 0.2); %
26
+ FeaturesPPG(7) = trapz(P(f_dom_band))/trapz(P); % Feature 7 = relative power
27
+
28
+
29
+ % Normalize the power spectrum
30
+ pxx_norm = P / sum(P);
31
+
32
+ % Compute spectral entropy
33
+ FeaturesPPG(8) = -sum(pxx_norm .* log2(pxx_norm))/log2(length(PPG)); % Feature 8 = spectral entropy --> normalize between 0 and 1! Or should we perform this operation at the min-max normalization! No because the values can come from different lengths!
34
+
35
+ % Signal to noise ratio
36
+ Signal = var(PPG);
37
+ Noise = var(absPPG);
38
+ FeaturesPPG(9) = Signal/Noise; % Feature 9 = surrogate of signal to noise ratio
39
+
40
+ %% Autocorrelation features
41
+
42
+ [acf, ~] = autocorr(PPG, 'NumLags', fs*3); % Compute the autocorrelation of the PPG signal with a maximum lag of 3 seconds (or 3 time the sampling rate)
43
+ [peakValues, ~] = peakdet(acf, 0.01);
44
+ sortedValues = sort(peakValues(:,2), 'descend'); % sort the peaks found in the corellogram
45
+ if length(sortedValues) > 1
46
+ FeaturesPPG(10) = sortedValues(2); % determine the second peak as the highest peak after the peak at lag=0, the idea is to determine the periodicity of the signal
47
+ else
48
+ FeaturesPPG(10) = 0; % Set at 0 if there is no clear second peak
49
+ end
50
+
51
+
52
+
53
+