paradigma 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. paradigma/assets/gait_detection_clf_package.pkl +0 -0
  2. paradigma/assets/gait_filtering_clf_package.pkl +0 -0
  3. paradigma/assets/ppg_quality_clf_package.pkl +0 -0
  4. paradigma/assets/tremor_detection_clf_package.pkl +0 -0
  5. paradigma/classification.py +115 -0
  6. paradigma/config.py +314 -0
  7. paradigma/constants.py +48 -7
  8. paradigma/feature_extraction.py +811 -547
  9. paradigma/pipelines/__init__.py +0 -0
  10. paradigma/pipelines/gait_pipeline.py +727 -0
  11. paradigma/pipelines/heart_rate_pipeline.py +426 -0
  12. paradigma/pipelines/heart_rate_utils.py +780 -0
  13. paradigma/pipelines/tremor_pipeline.py +299 -0
  14. paradigma/preprocessing.py +363 -0
  15. paradigma/segmenting.py +396 -0
  16. paradigma/testing.py +416 -0
  17. paradigma/util.py +393 -16
  18. paradigma-0.4.1.dist-info/METADATA +138 -0
  19. paradigma-0.4.1.dist-info/RECORD +22 -0
  20. {paradigma-0.3.2.dist-info → paradigma-0.4.1.dist-info}/WHEEL +1 -1
  21. paradigma/gait_analysis.py +0 -415
  22. paradigma/gait_analysis_config.py +0 -266
  23. paradigma/heart_rate_analysis.py +0 -127
  24. paradigma/heart_rate_analysis_config.py +0 -9
  25. paradigma/heart_rate_util.py +0 -173
  26. paradigma/imu_preprocessing.py +0 -232
  27. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  28. paradigma/ppg/classifier/LR_model.mat +0 -0
  29. paradigma/ppg/feat_extraction/acc_feature.m +0 -20
  30. paradigma/ppg/feat_extraction/peakdet.m +0 -64
  31. paradigma/ppg/feat_extraction/ppg_features.m +0 -53
  32. paradigma/ppg/glob_functions/extract_hr_segments.m +0 -37
  33. paradigma/ppg/glob_functions/extract_overlapping_segments.m +0 -23
  34. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +0 -41
  35. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +0 -74
  36. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +0 -25
  37. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +0 -699
  38. paradigma/ppg/glob_functions/jsonlab/README.txt +0 -394
  39. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +0 -368
  40. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +0 -180
  41. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +0 -180
  42. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +0 -23
  43. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +0 -22
  44. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +0 -11
  45. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +0 -34
  46. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +0 -662
  47. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +0 -27
  48. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +0 -144
  49. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +0 -21
  50. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +0 -180
  51. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +0 -180
  52. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +0 -23
  53. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +0 -22
  54. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +0 -11
  55. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +0 -34
  56. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +0 -662
  57. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +0 -27
  58. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +0 -144
  59. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +0 -21
  60. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +0 -32
  61. paradigma/ppg/glob_functions/jsonlab/loadjson.m +0 -566
  62. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +0 -528
  63. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +0 -33
  64. paradigma/ppg/glob_functions/jsonlab/savejson.m +0 -475
  65. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +0 -504
  66. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +0 -40
  67. paradigma/ppg/glob_functions/sample_prob_final.m +0 -49
  68. paradigma/ppg/glob_functions/synchronization.m +0 -76
  69. paradigma/ppg/glob_functions/tsdf_scan_meta.m +0 -22
  70. paradigma/ppg/hr_functions/Long_TFD_JOT.m +0 -37
  71. paradigma/ppg/hr_functions/PPG_TFD_HR.m +0 -59
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +0 -4
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +0 -23
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +0 -27
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +0 -251
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +0 -142
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +0 -314
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +0 -123
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +0 -154
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +0 -194
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +0 -200
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +0 -229
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +0 -241
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +0 -157
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +0 -190
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +0 -196
  88. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +0 -199
  89. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +0 -144
  90. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +0 -13
  91. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  92. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  93. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +0 -79
  94. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +0 -9
  95. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +0 -26
  96. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +0 -25
  97. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +0 -40
  98. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +0 -34
  99. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +0 -29
  100. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +0 -76
  101. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +0 -176
  102. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +0 -11
  103. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +0 -97
  104. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +0 -149
  105. paradigma/ppg/preprocessing/preprocessing_imu.m +0 -15
  106. paradigma/ppg/preprocessing/preprocessing_ppg.m +0 -13
  107. paradigma/ppg_preprocessing.py +0 -313
  108. paradigma/preprocessing_config.py +0 -69
  109. paradigma/quantification.py +0 -58
  110. paradigma/tremor/TremorFeaturesAndClassification.m +0 -345
  111. paradigma/tremor/feat_extraction/DerivativesExtract.m +0 -22
  112. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +0 -72
  113. paradigma/tremor/feat_extraction/MFCCExtract.m +0 -100
  114. paradigma/tremor/feat_extraction/PSDBandPower.m +0 -52
  115. paradigma/tremor/feat_extraction/PSDEst.m +0 -63
  116. paradigma/tremor/feat_extraction/PSDExtrAxis.m +0 -88
  117. paradigma/tremor/feat_extraction/PSDExtrOpt.m +0 -95
  118. paradigma/tremor/preprocessing/InterpData.m +0 -32
  119. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +0 -295
  120. paradigma/windowing.py +0 -219
  121. paradigma-0.3.2.dist-info/METADATA +0 -79
  122. paradigma-0.3.2.dist-info/RECORD +0 -108
  123. {paradigma-0.3.2.dist-info → paradigma-0.4.1.dist-info}/LICENSE +0 -0
@@ -1,703 +1,967 @@
1
- from typing import List
2
1
  import numpy as np
3
2
  import pandas as pd
4
- from sklearn.decomposition import PCA
3
+ from typing import List, Tuple
5
4
 
6
- from scipy import signal, fft
7
5
  from scipy.integrate import cumulative_trapezoid
8
- from scipy.signal import find_peaks
6
+ from scipy.signal import find_peaks, windows
7
+ from scipy.stats import kurtosis, skew
8
+ from sklearn.decomposition import PCA
9
9
 
10
- from paradigma.constants import DataColumns
11
- from paradigma.gait_analysis_config import IMUConfig
10
+ from paradigma.config import HeartRateConfig
12
11
 
13
12
 
14
- def generate_statistics(
15
- sensor_col: pd.Series,
16
- statistic: str
17
- ) -> list:
18
- """Generate statistics for a single sensor and axis. The function is used with the apply function in pandas.
19
-
13
+ def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False) -> np.ndarray:
14
+ """
15
+ Compute a specific statistical measure along the timestamps of a 2D or 3D array.
16
+
20
17
  Parameters
21
18
  ----------
22
- sensor_col: pd.Series
23
- The sensor column to be aggregated (e.g. x-axis of accelerometer)
24
- statistic: str
25
- The statistic to be computed [mean, std, max, min]
26
-
19
+ data : np.ndarray
20
+ A 2D or 3D NumPy array where statistics are computed.
21
+ statistic : str
22
+ The statistic to compute. Supported values are:
23
+ - 'mean': Mean.
24
+ - 'median': Median.
25
+ - 'var': Variance.
26
+ - 'std': Standard deviation.
27
+ - 'max': Maximum.
28
+ - 'min': Minimum.
29
+ - 'kurtosis': Kurtosis.
30
+ - 'skewness': Skewness.
31
+ abs_stats : bool, optional
32
+ Whether to compute the statistics on the absolute values of the data for
33
+ the mean and median (default: False).
34
+
27
35
  Returns
28
36
  -------
29
- list
30
- The aggregated statistics
37
+ np.ndarray
38
+ A 1D or 2D array containing the computed statistic for each row (2D)
39
+ or the entire array (1D).
40
+
41
+ Raises
42
+ ------
43
+ ValueError
44
+ If the specified `statistic` is not supported or if the input data has an invalid shape.
31
45
  """
46
+ if statistic not in ['mean', 'median', 'var', 'std', 'max', 'min', 'kurtosis', 'skewness']:
47
+ raise ValueError(f"Statistic '{statistic}' is not supported.")
48
+
49
+ if data.ndim > 3 or data.ndim < 2:
50
+ raise ValueError("Input data must be a 1D, 2D or 3D array.")
51
+
32
52
  if statistic == 'mean':
33
- return [np.mean(x) for x in sensor_col]
53
+ if abs_stats:
54
+ return np.mean(np.abs(data), axis=1)
55
+ else:
56
+ return np.mean(data, axis=1)
57
+ elif statistic == 'median':
58
+ if abs_stats:
59
+ return np.median(np.abs(data), axis=1)
60
+ else:
61
+ return np.median(data, axis=1)
62
+ elif statistic == 'var':
63
+ return np.var(data, ddof=1, axis=1)
34
64
  elif statistic == 'std':
35
- return [np.std(x) for x in sensor_col]
65
+ return np.std(data, axis=1)
36
66
  elif statistic == 'max':
37
- return [np.max(x) for x in sensor_col]
67
+ return np.max(data, axis=1)
38
68
  elif statistic == 'min':
39
- return [np.min(x) for x in sensor_col]
69
+ return np.min(data, axis=1)
70
+ elif statistic == 'kurtosis':
71
+ return kurtosis(data, fisher=False, axis=1)
72
+ elif statistic == 'skewness':
73
+ return skew(data, axis=1)
40
74
  else:
41
- raise ValueError("Statistic not recognized.")
75
+ raise ValueError(f"Statistic '{statistic}' is not supported.")
42
76
 
43
77
 
44
- def generate_std_norm(
45
- df: pd.DataFrame,
46
- cols: List[str],
47
- ) -> pd.Series:
48
- """Generate the standard deviation of the norm of the accelerometer axes.
49
-
78
+ def compute_std_euclidean_norm(data: np.ndarray) -> np.ndarray:
79
+ """
80
+ Compute the standard deviation of the Euclidean norm for each window of sensor data.
81
+
82
+ The function calculates the Euclidean norm (L2 norm) across sensor axes for each
83
+ timestamp within a window, and then computes the standard deviation of these norms
84
+ for each window.
85
+
50
86
  Parameters
51
87
  ----------
52
- df: pd.DataFrame
53
- The dataframe containing the accelerometer axes
54
- cols: List[str]
55
- The names of the columns containing the accelerometer axes
56
-
88
+ data : np.ndarray
89
+ A 3D NumPy array of shape (n_windows, n_timestamps, n_axes), where:
90
+ - `n_windows` is the number of windows.
91
+ - `n_timestamps` is the number of time steps per window.
92
+ - `n_axes` is the number of sensor axes (e.g., 3 for x, y, z).
93
+
57
94
  Returns
58
95
  -------
59
- pd.Series
60
- The standard deviation of the norm of the accelerometer axes
96
+ np.ndarray
97
+ A 1D array of shape (n_windows,) containing the standard deviation of the
98
+ Euclidean norm for each window.
61
99
  """
62
- return df.apply(
63
- lambda x: np.std(np.sqrt(sum(
64
- [np.array([y**2 for y in x[col]]) for col in cols]
65
- ))), axis=1)
66
-
100
+ norms = np.linalg.norm(data, axis=2) # Norm along the sensor axes (norm per timestamp, per window)
101
+ return np.std(norms, axis=1) # Standard deviation per window
102
+
103
+
104
+ def compute_power_in_bandwidth(
105
+ freqs: np.ndarray,
106
+ psd: np.ndarray,
107
+ fmin: float,
108
+ fmax: float,
109
+ include_max: bool = True,
110
+ spectral_resolution: float = 1,
111
+ cumulative_sum_method: str = 'trapz'
112
+ ) -> np.ndarray:
113
+ """
114
+ Compute the logarithmic power within specified frequency bands for each sensor axis.
115
+
116
+ This function integrates the power spectral density (PSD) over user-defined frequency
117
+ bands and computes the logarithm of the resulting power for each axis of the sensor.
67
118
 
68
- def compute_fft(
69
- values: list,
70
- window_type: str = 'hann',
71
- sampling_frequency: int = 100,
72
- ) -> tuple:
73
- """Compute the Fast Fourier Transform (FFT) of a signal.
74
-
75
119
  Parameters
76
120
  ----------
77
- values: list
78
- The values of the signal (e.g., accelerometer data) of a single window.
79
- window_type: str
80
- The type of window to be used for the FFT (default: 'hann')
81
- sampling_frequency: int
82
- The sampling frequency of the signal (default: 100)
83
-
121
+ freqs : np.ndarray
122
+ A 1D array of shape (n_frequencies,) containing the frequencies corresponding
123
+ to the PSD values.
124
+ psd : np.ndarray
125
+ A 2D array of shape (n_windows, n_frequencies) or 3D array of shape (n_windows, n_frequencies, n_axes)
126
+ representing the power spectral density (PSD) of the sensor data.
127
+ fmin : float
128
+ The lower bound of the frequency band in Hz.
129
+ fmax : float
130
+ The upper bound of the frequency band in Hz.
131
+ include_max : bool, optional
132
+ Whether to include the maximum frequency in the search range (default: True).
133
+ spectral_resolution : float, optional
134
+ The spectral resolution of the PSD in Hz (default: 1).
135
+ cumulative_sum_method : str, optional
136
+ The method used to integrate the PSD over the frequency band. Supported values are:
137
+ - 'trapz': Trapezoidal rule.
138
+ - 'sum': Simple summation (default: 'trapz').
139
+
84
140
  Returns
85
141
  -------
86
- tuple
87
- The FFT values and the corresponding frequencies
142
+ np.ndarray
143
+ A 2D array of shape (n_windows, n_axes) containing the power within
144
+ the specified frequency band for each window and each sensor axis.
88
145
  """
89
- w = signal.get_window(window_type, len(values), fftbins=False)
90
- yf = 2*fft.fft(values*w)[:int(len(values)/2+1)]
91
- xf = fft.fftfreq(len(values), 1/sampling_frequency)[:int(len(values)/2+1)]
146
+ # Create a mask for frequencies within the current band range (low, high)
147
+ if include_max:
148
+ band_mask = (freqs >= fmin) & (freqs <= fmax)
149
+ else:
150
+ band_mask = (freqs >= fmin) & (freqs < fmax)
151
+
152
+ # Integrate PSD over the selected frequency band using the band mask
153
+ if psd.ndim == 2:
154
+ masked_psd = psd[:, band_mask]
155
+ elif psd.ndim == 3:
156
+ masked_psd = psd[:, band_mask, :]
157
+
158
+ if cumulative_sum_method == 'trapz':
159
+ band_power = spectral_resolution * np.trapz(masked_psd, freqs[band_mask], axis=1)
160
+ elif cumulative_sum_method == 'sum':
161
+ band_power = spectral_resolution * np.sum(masked_psd, axis=1)
162
+ else:
163
+ raise ValueError("cumulative_sum_method must be 'trapz' or 'sum'.")
92
164
 
93
- return yf, xf
94
-
165
+ return band_power
166
+
167
+
168
+ def compute_total_power(psd: np.ndarray) -> np.ndarray:
169
+ """
170
+ Compute the total power by summing the power spectral density (PSD) across frequency bins.
95
171
 
96
- def signal_to_ffts(
97
- sensor_col: pd.Series,
98
- window_type: str = 'hann',
99
- sampling_frequency: int = 100,
100
- ) -> tuple:
101
- """Compute the Fast Fourier Transform (FFT) of a signal per window (can probably be combined with compute_fft and simplified).
172
+ This function calculates the total power for each window and each sensor axis by
173
+ summing the PSD values across all frequency bins.
102
174
 
103
175
  Parameters
104
176
  ----------
105
- sensor_col: pd.Series
106
- The sensor column to be transformed (e.g. x-axis of accelerometer)
107
- window_type: str
108
- The type of window to be used for the FFT (default: 'hann')
109
- sampling_frequency: int
110
- The sampling frequency of the signal (default: 100)
111
-
177
+ psd : np.ndarray
178
+ A 3D array of shape (n_windows, n_frequencies, n_axes) representing the
179
+ power spectral density (PSD) of the sensor data.
180
+
112
181
  Returns
113
182
  -------
114
- tuple
115
- Lists of FFT values and corresponding frequencies which can be concatenated as column to the dataframe
183
+ np.ndarray
184
+ A 2D array of shape (n_windows, n_axes) containing the total power for each
185
+ window and each sensor axis.
116
186
  """
117
- l_values_total = []
118
- l_freqs_total = []
119
- for row in sensor_col:
120
- l_values, l_freqs = compute_fft(
121
- values=row,
122
- window_type=window_type,
123
- sampling_frequency=sampling_frequency)
124
- l_values_total.append(l_values)
125
- l_freqs_total.append(l_freqs)
187
+ return np.sum(psd, axis=-1) # Sum across frequency bins
126
188
 
127
- return l_freqs_total, l_values_total
128
-
129
189
 
130
- def compute_power_in_bandwidth(
131
- sensor_col: list,
132
- fmin: float,
133
- fmax: float,
134
- sampling_frequency: int = 100,
135
- window_type: str = 'hann',
136
- ) -> float:
137
- """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
138
- Probably we want a smarter way of doing this.
139
-
140
- Computes the power in a specific frequency band for a specified sensor and axis.
190
+ def extract_tremor_power(
191
+ freqs: np.ndarray,
192
+ total_psd: np.ndarray,
193
+ fmin: float = 3,
194
+ fmax: float = 7,
195
+ spectral_resolution: float = 0.25
196
+ ) -> np.ndarray:
197
+
198
+ """Computes the tremor power (1.25 Hz around the peak within the tremor frequency band)
141
199
 
142
200
  Parameters
143
201
  ----------
144
- sensor_col: list
145
- The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe,
146
- and contains values of individual timestamps composing the window.
202
+ total_psd: np.ndarray
203
+ The power spectral density of the gyroscope signal summed over the three axes
204
+ freqs: np.ndarray
205
+ Frequency vector corresponding to the power spectral density
147
206
  fmin: float
148
- The lower bound of the frequency band
207
+ The lower bound of the tremor frequency band in Hz (default: 3)
149
208
  fmax: float
150
- The upper bound of the frequency band
151
- sampling_frequency: int
152
- The sampling frequency of the signal (default: 100)
153
- window_type: str
154
- The type of window to be used for the FFT (default: 'hann')
155
-
209
+ The upper bound of the tremor frequency band in Hz (default: 7)
210
+ spectral_resolution: float
211
+ The spectral resolution of the PSD in Hz (default: 0.25)
212
+
156
213
  Returns
157
214
  -------
158
- float
159
- The power in the specified frequency band
215
+ pd.Series
216
+ The tremor power across windows
160
217
  """
161
- fxx, pxx = signal.periodogram(sensor_col, fs=sampling_frequency, window=window_type)
162
- ind_min = np.argmax(fxx > fmin) - 1
163
- ind_max = np.argmax(fxx > fmax) - 1
164
- return np.log10(np.trapz(pxx[ind_min:ind_max], fxx[ind_min:ind_max]))
218
+
219
+ freq_idx = (freqs >= fmin) & (freqs <= fmax)
220
+ peak_idx = np.argmax(total_psd[:, freq_idx], axis=1) + np.min(np.where(freq_idx)[0])
221
+ left_idx = np.maximum((peak_idx - 0.5 / spectral_resolution).astype(int), 0)
222
+ right_idx = (peak_idx + 0.5 / spectral_resolution).astype(int)
165
223
 
224
+ row_indices = np.arange(total_psd.shape[1])
225
+ row_indices = np.tile(row_indices, (total_psd.shape[0], 1))
226
+ left_idx = left_idx[:, None]
227
+ right_idx = right_idx[:, None]
166
228
 
167
- def compute_perc_power(
168
- sensor_col: list,
169
- fmin_band: float,
170
- fmax_band: float,
171
- fmin_total: float = 0,
172
- fmax_total: float = 100,
173
- sampling_frequency: int = 100,
174
- window_type: str = 'hann'
175
- ) -> float:
176
- """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
229
+ mask = (row_indices >= left_idx) & (row_indices <= right_idx)
230
+
231
+ tremor_power = spectral_resolution * np.sum(total_psd * mask, axis=1)
232
+
233
+ return tremor_power
234
+
235
+
236
+ def compute_dominant_frequency(
237
+ freqs: np.ndarray,
238
+ psd: np.ndarray,
239
+ fmin: float | None = None,
240
+ fmax: float | None = None
241
+ ) -> np.ndarray:
242
+ """
243
+ Compute the dominant frequency within a specified frequency range for each window and sensor axis.
244
+
245
+ The dominant frequency is defined as the frequency corresponding to the maximum power in the
246
+ power spectral density (PSD) within the specified range.
177
247
 
178
- Computes the percentage of power in a specific frequency band for a specified sensor and axis.
179
-
180
248
  Parameters
181
249
  ----------
182
- sensor_col: list
183
- The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe
184
- fmin_band: float
185
- The lower bound of the frequency band
186
- fmax_band: float
187
- The upper bound of the frequency band
188
- fmin_total: float
189
- The lower bound of the frequency spectrum (default: 0)
190
- fmax_total: float
191
- The upper bound of the frequency spectrum (default: 100)
192
- sampling_frequency: int
193
- The sampling frequency of the signal (default: 100)
194
- window_type: str
195
- The type of window to be used for the FFT (default: 'hann')
196
-
250
+ freqs : np.ndarray
251
+ A 1D array of shape (n_frequencies,) containing the frequencies corresponding
252
+ to the PSD values.
253
+ psd : np.ndarray
254
+ A 2D array of shape (n_windows, n_frequencies) or a 3D array of shape
255
+ (n_windows, n_frequencies, n_axes) representing the power spectral density.
256
+ fmin : float
257
+ The lower bound of the frequency range (inclusive).
258
+ fmax : float
259
+ The upper bound of the frequency range (exclusive).
260
+
197
261
  Returns
198
262
  -------
199
- float
200
- The percentage of power in the specified frequency band
201
- """
202
- angle_power_band = compute_power_in_bandwidth(
203
- sensor_col=sensor_col,
204
- fmin=fmin_band,
205
- fmax=fmax_band,
206
- sampling_frequency=sampling_frequency,
207
- window_type=window_type
208
- )
209
-
210
- angle_power_total = compute_power_in_bandwidth(
211
- sensor_col=sensor_col,
212
- fmin=fmin_total,
213
- fmax=fmax_total,
214
- sampling_frequency=sampling_frequency,
215
- window_type=window_type
216
- )
263
+ np.ndarray
264
+ - If `psd` is 2D: A 1D array of shape (n_windows,) containing the dominant frequency
265
+ for each window.
266
+ - If `psd` is 3D: A 2D array of shape (n_windows, n_axes) containing the dominant
267
+ frequency for each window and each axis.
268
+
269
+ Raises
270
+ ------
271
+ ValueError
272
+ If `fmin` or `fmax` is outside the bounds of the `freqs` array.
273
+ If `psd` is not a 2D or 3D array.
274
+ """
275
+ # Set default values for fmin and fmax to the minimum and maximum frequencies if not provided
276
+ if fmin is None:
277
+ fmin = freqs[0]
278
+ if fmax is None:
279
+ fmax = freqs[-1]
280
+
281
+ # Validate the frequency range
282
+ if fmin < freqs[0] or fmax > freqs[-1]:
283
+ raise ValueError(f"fmin {fmin} or fmax {fmax} are out of bounds of the frequency array.")
284
+
285
+ # Find the indices corresponding to fmin and fmax
286
+ min_index = np.searchsorted(freqs, fmin)
287
+ max_index = np.searchsorted(freqs, fmax)
288
+
289
+ # Slice the PSD and frequency array to the desired range
290
+ psd_filtered = psd[:, min_index:max_index] if psd.ndim == 2 else psd[:, min_index:max_index, :]
291
+ freqs_filtered = freqs[min_index:max_index]
292
+
293
+ # Compute dominant frequency
294
+ if psd.ndim == 3:
295
+ # 3D: Compute for each axis
296
+ return np.array([
297
+ freqs_filtered[np.argmax(psd_filtered[:, :, i], axis=1)]
298
+ for i in range(psd.shape[-1])
299
+ ]).T
300
+ elif psd.ndim == 2:
301
+ # 2D: Compute for each window
302
+ return freqs_filtered[np.argmax(psd_filtered, axis=1)]
303
+ else:
304
+ raise ValueError("PSD array must be 2D or 3D.")
217
305
 
218
- return angle_power_band / angle_power_total
219
306
 
307
+ def extract_frequency_peak(
308
+ freqs: np.ndarray,
309
+ psd: np.ndarray,
310
+ fmin: float | None = None,
311
+ fmax: float | None = None,
312
+ include_max: bool = True
313
+ ) -> pd.Series:
220
314
 
221
- def get_dominant_frequency(
222
- signal_ffts: list,
223
- signal_freqs: list,
224
- fmin: float,
225
- fmax: float
226
- ) -> float:
227
- """Note: signal_ffts and signal_freqs are single cells (which corresponds to a single window) of signal_ffts and signal_freqs, as it is used with apply function.
228
-
229
- Computes the dominant frequency in a specific frequency band.
315
+ """Extract the frequency of the peak in the power spectral density within the specified frequency band.
230
316
 
231
317
  Parameters
232
318
  ----------
233
- signal_ffts: list
234
- The FFT values of the signal of a single window
235
- signal_freqs: list
236
- The corresponding frequencies of the FFT values
237
- fmin: int
238
- The lower bound of the frequency band
239
- fmax: int
240
- The upper bound of the frequency band
241
-
319
+ freqs: pd.Series
320
+ Frequency vector corresponding to the power spectral density
321
+ psd: pd.Series
322
+ The total power spectral density of the gyroscope signal
323
+ fmin: float
324
+ The lower bound of the frequency band in Hz (default: None). If not provided, the minimum frequency is used.
325
+ fmax: float
326
+ The upper bound of the frequency band in Hz (default: None). If not provided, the maximum frequency is used.
327
+ include_max: bool
328
+ Whether to include the maximum frequency in the search range (default: True)
329
+
242
330
  Returns
243
331
  -------
244
- float
245
- The dominant frequency in the specified frequency band
332
+ pd.Series
333
+ The frequency of the peak across windows
334
+ """
335
+ # Set fmin and fmax to maximum range if not provided
336
+ if fmin is None:
337
+ fmin = freqs[0]
338
+ if fmax is None:
339
+ fmax = freqs[-1]
340
+
341
+ # Find the indices corresponding to fmin and fmax
342
+ if include_max:
343
+ freq_idx = np.where((freqs>=fmin) & (freqs<=fmax))[0]
344
+ else:
345
+ freq_idx = np.where((freqs>=fmin) & (freqs<fmax))[0]
346
+
347
+ peak_idx = np.argmax(psd[:, freq_idx], axis=1)
348
+ frequency_peak = freqs[freq_idx][peak_idx]
349
+
350
+ return frequency_peak
351
+
352
+
353
+ def compute_relative_power(
354
+ freqs: np.ndarray,
355
+ psd: np.ndarray,
356
+ config: HeartRateConfig
357
+ ) -> list:
246
358
  """
247
- valid_indices = np.where((signal_freqs>fmin) & (signal_freqs<fmax))
248
- signal_freqs_adjusted = signal_freqs[valid_indices]
249
- signal_ffts_adjusted = signal_ffts[valid_indices]
359
+ Calculate relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
250
360
 
251
- idx = np.argmax(np.abs(signal_ffts_adjusted))
252
- return np.abs(signal_freqs_adjusted[idx])
253
-
361
+ Parameters
362
+ ----------
363
+ freqs: np.ndarray
364
+ The frequency bins of the power spectral density.
365
+ psd: np.ndarray
366
+ The power spectral density of the signal.
367
+ config: HeartRateConfig
368
+ The configuration object containing the parameters for the feature extraction. The following
369
+ attributes are used:
370
+ - freq_band_physio: tuple
371
+ The frequency band for physiological heart rate (default: (0.75, 3)).
372
+ - bandwidth: float
373
+ The bandwidth around the peak frequency to consider for relative power calculation (default: 0.5).
254
374
 
255
- def compute_power(
256
- df: pd.DataFrame,
257
- fft_cols: list
258
- ) -> pd.Series:
259
- """Compute the power of the FFT values.
375
+ Returns
376
+ -------
377
+ list
378
+ The relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
260
379
 
380
+ """
381
+ hr_range_mask = (freqs >= config.freq_band_physio[0]) & (freqs <= config.freq_band_physio[1])
382
+ hr_range_idx = np.where(hr_range_mask)[0]
383
+ peak_idx = np.argmax(psd[:, hr_range_idx], axis=1)
384
+ peak_freqs = freqs[hr_range_idx[peak_idx]]
385
+
386
+ dom_band_idx = [np.where((freqs >= peak_freq - config.bandwidth) & (freqs <= peak_freq + config.bandwidth))[0] for peak_freq in peak_freqs]
387
+ rel_power = [np.trapz(psd[j, idx], freqs[idx]) / np.trapz(psd[j, :], freqs) for j, idx in enumerate(dom_band_idx)]
388
+ return rel_power
389
+
390
+
391
+ def compute_spectral_entropy(
392
+ psd: np.ndarray,
393
+ n_samples: int
394
+ ) -> np.ndarray:
395
+ """
396
+ Calculate the spectral entropy from the normalized power spectral density.
397
+
261
398
  Parameters
262
399
  ----------
263
- df: pd.DataFrame
264
- The dataframe containing the FFT values
265
- fft_cols: list
266
- The names of the columns containing the FFT values
267
-
400
+ psd: np.ndarray
401
+ The power spectral density of the signal.
402
+ n_samples: int
403
+ The number of samples in the window.
404
+
268
405
  Returns
269
406
  -------
270
- pd.Series
271
- The power of the FFT values
407
+ np.ndarray
408
+ The spectral entropy of the power spectral density.
272
409
  """
273
- for col in fft_cols:
274
- df['{}_power'.format(col)] = df[col].apply(lambda x: np.square(np.abs(x)))
275
-
276
- return df.apply(lambda x: sum([np.array([y for y in x[col+'_power']]) for col in fft_cols]), axis=1)
410
+ psd_norm = psd / np.sum(psd, axis=1, keepdims=True)
411
+ spectral_entropy = -np.sum(psd_norm * np.log2(psd_norm), axis=1) / np.log2(n_samples)
277
412
 
413
+ return spectral_entropy
414
+
415
+
416
+ def compute_mfccs(
417
+ total_power_array: np.ndarray,
418
+ config,
419
+ mel_scale: bool = True,
420
+ multiplication_factor: float = 1
421
+ ) -> np.ndarray:
422
+ """
423
+ Generate Mel Frequency Cepstral Coefficients (MFCCs) from the total power spectral density of the signal.
424
+
425
+ MFCCs are commonly used features in signal processing for tasks like audio and
426
+ vibration analysis. In this version, we adjusted the MFFCs to the human activity
427
+ range according to: https://www.sciencedirect.com/science/article/abs/pii/S016516841500331X#f0050.
428
+ This function calculates MFCCs by applying a filterbank
429
+ (in either the mel scale or linear scale) to the total power of the signal,
430
+ followed by a Discrete Cosine Transform (DCT) to obtain coefficients.
278
431
 
279
- def generate_cepstral_coefficients(
280
- total_power_col: pd.Series,
281
- window_length_s: int,
282
- sampling_frequency: int = 100,
283
- low_frequency: int = 0,
284
- high_frequency: int = 25,
285
- n_filters: int = 20,
286
- n_coefficients: int = 12,
287
- ) -> pd.DataFrame:
288
- """Generate cepstral coefficients from the total power of the signal.
289
-
290
432
  Parameters
291
433
  ----------
292
- total_power_col: pd.Series
293
- The total power of the signal, extracted using compute_power
294
- window_length_s: int
295
- The number of seconds a window constitutes
296
- sampling_frequency: int
297
- The sampling frequency of the data (default: 100)
298
- low_frequency: int
299
- The lower bound of the frequency band (default: 0)
300
- high_frequency: int
301
- The upper bound of the frequency band (default: 25)
302
- n_filters: int
303
- The number of DCT filters (default: 20)
304
- n_coefficients: int
305
- The number of coefficients to extract (default: 12)
306
-
434
+ total_power_array : np.ndarray
435
+ 2D array of shape (n_windows, n_frequencies) containing the total power
436
+ of the signal for each window.
437
+ config : object
438
+ Configuration object containing the following attributes:
439
+ - window_length_s : int
440
+ Duration of each analysis window in seconds.
441
+ - sampling_frequency : int
442
+ Sampling frequency of the data in Hz (default: 100).
443
+ - mfcc_low_frequency : float
444
+ Lower bound of the frequency band in Hz (default: 0).
445
+ - mfcc_high_frequency : float
446
+ Upper bound of the frequency band in Hz (default: 25).
447
+ - mfcc_n_dct_filters : int
448
+ Number of triangular filters in the filterbank (default: 20).
449
+ - mfcc_n_coefficients : int
450
+ Number of coefficients to extract (default: 12).
451
+ mel_scale : bool, optional
452
+ Whether to use the mel scale for the filterbank (default: True).
453
+ multiplication_factor : float, optional
454
+ Multiplication factor for the Mel scale conversion (default: 1). For tremor, the recommended
455
+ value is 1. For gait, this is 4.
456
+
307
457
  Returns
308
458
  -------
309
- pd.DataFrame
310
- A dataframe with a single column corresponding to a single cepstral coefficient
459
+ np.ndarray
460
+ 2D array of MFCCs with shape `(n_windows, n_coefficients)`, where each row
461
+ contains the MFCCs for a corresponding window.
462
+ ...
463
+
464
+ Notes
465
+ -----
466
+ - The function includes filterbank normalization to ensure proper scaling.
467
+ - DCT filters are constructed to minimize spectral leakage.
311
468
  """
312
- window_length = window_length_s * sampling_frequency
469
+ # Compute window length in samples
470
+ window_length = config.window_length_s * config.sampling_frequency
471
+
472
+ # Generate filter points
473
+ if mel_scale:
474
+ freqs = np.linspace(
475
+ melscale(config.mfcc_low_frequency, multiplication_factor),
476
+ melscale(config.mfcc_high_frequency, multiplication_factor),
477
+ num=config.mfcc_n_dct_filters + 2
478
+ )
479
+ freqs = inverse_melscale(freqs, multiplication_factor)
480
+ else:
481
+ freqs = np.linspace(
482
+ config.mfcc_low_frequency,
483
+ config.mfcc_high_frequency,
484
+ num=config.mfcc_n_dct_filters + 2
485
+ )
486
+
487
+ filter_points = np.floor(
488
+ window_length / config.sampling_frequency * freqs
489
+ ).astype(int) + 1
490
+
491
+ # Construct triangular filterbank
492
+ filters = np.zeros((len(filter_points) - 2, int(window_length / 2 + 1)))
493
+ for j in range(len(filter_points) - 2):
494
+ filters[j, filter_points[j] : filter_points[j + 2]] = windows.triang(
495
+ filter_points[j + 2] - filter_points[j]
496
+ )
497
+ # Normalize filter coefficients
498
+ filters[j, :] /= (
499
+ config.sampling_frequency/window_length * np.sum(filters[j,:])
500
+ )
501
+
502
+ # Apply filterbank to total power
503
+ power_filtered = np.dot(total_power_array, filters.T)
313
504
 
314
- # compute filter points
315
- freqs = np.linspace(low_frequency, high_frequency, num=n_filters+2)
316
- filter_points = np.floor((window_length + 1) / sampling_frequency * freqs).astype(int)
505
+ # Convert power to logarithmic scale
506
+ log_power_filtered = np.log10(power_filtered + 1e-10)
507
+
508
+ # Generate DCT filters
509
+ dct_filters = np.empty((config.mfcc_n_coefficients, config.mfcc_n_dct_filters))
510
+ dct_filters[0, :] = 1.0 / np.sqrt(config.mfcc_n_dct_filters)
511
+
512
+ samples = (
513
+ np.arange(1, 2 * config.mfcc_n_dct_filters, 2) * np.pi / (2.0 * config.mfcc_n_dct_filters)
514
+ )
515
+
516
+ for i in range(1, config.mfcc_n_coefficients):
517
+ dct_filters[i, :] = np.cos(i * samples) * np.sqrt(2.0 / config.mfcc_n_dct_filters)
518
+
519
+ # Compute MFCCs
520
+ mfccs = np.dot(log_power_filtered, dct_filters.T)
317
521
 
318
- # construct filterbank
319
- filters = np.zeros((len(filter_points)-2, int(window_length/2+1)))
320
- for j in range(len(filter_points)-2):
321
- filters[j, filter_points[j] : filter_points[j+1]] = np.linspace(0, 1, filter_points[j+1] - filter_points[j])
322
- filters[j, filter_points[j+1] : filter_points[j+2]] = np.linspace(1, 0, filter_points[j+2] - filter_points[j+1])
522
+ return mfccs
323
523
 
324
- # filter signal
325
- power_filtered = [np.dot(filters, x) for x in total_power_col]
326
- log_power_filtered = [10.0 * np.log10(x) for x in power_filtered]
327
524
 
328
- # generate cepstral coefficients
329
- dct_filters = np.empty((n_coefficients, n_filters))
330
- dct_filters[0, :] = 1.0 / np.sqrt(n_filters)
525
+ def melscale(x: np.ndarray, multiplication_factor: float = 1) -> np.ndarray:
526
+ """
527
+ Maps linear frequency values to the Mel scale.
331
528
 
332
- samples = np.arange(1, 2 * n_filters, 2) * np.pi / (2.0 * n_filters)
529
+ Parameters
530
+ ----------
531
+ x : np.ndarray
532
+ Linear frequency values to be converted to the Mel scale.
533
+ multiplication_factor : float, optional
534
+ Multiplication factor for the Mel scale conversion (default: 1). For tremor, the recommended
535
+ value is 1. For gait, this is 4.
333
536
 
334
- for i in range(1, n_coefficients):
335
- dct_filters[i, :] = np.cos(i * samples) * np.sqrt(2.0 / n_filters)
537
+ Returns
538
+ -------
539
+ np.ndarray
540
+ Frequency values mapped to the Mel scale.
541
+ """
542
+ return (64.875 / multiplication_factor) * np.log10(1 + x / (17.5 / multiplication_factor))
336
543
 
337
- cepstral_coefs = [np.dot(dct_filters, x) for x in log_power_filtered]
338
544
 
339
- return pd.DataFrame(np.vstack(cepstral_coefs), columns=['cc_{}'.format(j+1) for j in range(n_coefficients)])
545
+ def inverse_melscale(x: np.ndarray, multiplication_factor: float = 1) -> np.ndarray:
546
+ """
547
+ Maps values from the Mel scale back to linear frequencies.
548
+
549
+ This function performs the inverse transformation of the Mel scale,
550
+ converting perceptual frequency values to their corresponding linear frequency values.
551
+
552
+ Parameters
553
+ ----------
554
+ x : np.ndarray
555
+ Frequency values on the Mel scale to be converted back to linear frequencies.
556
+
557
+ Returns
558
+ -------
559
+ np.ndarray
560
+ Linear frequency values corresponding to the given Mel scale values.
561
+ """
562
+ return (17.5 / multiplication_factor) * (10 ** (x / (64.875 / multiplication_factor)) - 1)
340
563
 
341
564
 
342
565
  def pca_transform_gyroscope(
343
566
  df: pd.DataFrame,
344
567
  y_gyro_colname: str,
345
568
  z_gyro_colname: str,
346
- pred_gait_colname: str,
347
- ) -> pd.Series:
348
- """Apply principal component analysis (PCA) on the y-axis and z-axis of the raw gyroscope signal
349
- to extract the velocity. PCA is applied to the predicted gait timestamps only to maximize the similarity
350
- to the velocity in the arm swing direction.
569
+ pred_colname: str | None = None,
570
+ ) -> np.ndarray:
571
+ """
572
+ Perform principal component analysis (PCA) on gyroscope data to estimate velocity. If pred_colname is provided,
573
+ the PCA is fitted on the predicted gait data. Otherwise, the PCA is fitted on the entire dataset.
351
574
 
352
575
  Parameters
353
576
  ----------
354
- df: pd.DataFrame
355
- The dataframe containing the gyroscope data
356
- y_gyro_colname: str
357
- The column name of the y-axis of the gyroscope
358
- z_gyro_colname: str
359
- The column name of the z-axis of the gyroscope
360
- pred_gait_colname: str
361
- The column name of the predicted gait boolean
362
-
577
+ df : pd.DataFrame
578
+ The DataFrame containing the gyroscope data.
579
+ y_gyro_colname : str
580
+ The column name for the y-axis gyroscope data.
581
+ z_gyro_colname : str
582
+ The column name for the z-axis gyroscope data.
583
+ pred_colname : str, optional
584
+ The column name for the predicted gait (default: None).
585
+
363
586
  Returns
364
587
  -------
365
- pd.Series
366
- The first principal component corresponding to the angular velocity in the arm swing direction
588
+ np.ndarray
589
+ The estimated velocity based on the principal component of the gyroscope data.
367
590
  """
368
- pca = PCA(n_components=2, svd_solver='auto', random_state=22)
369
- pca.fit([(i,j) for i,j in zip(df.loc[df[pred_gait_colname]==1, y_gyro_colname], df.loc[df[pred_gait_colname]==1, z_gyro_colname])])
370
- yz_gyros = pca.transform([(i,j) for i,j in zip(df[y_gyro_colname], df[z_gyro_colname])])
591
+ # Convert gyroscope columns to NumPy arrays
592
+ y_gyro_array = df[y_gyro_colname].to_numpy()
593
+ z_gyro_array = df[z_gyro_colname].to_numpy()
594
+
595
+ # Filter data based on predicted gait if pred_colname is provided
596
+ if pred_colname is not None:
597
+ pred_mask = df[pred_colname] == 1
598
+ y_gyro_fit_array = y_gyro_array[pred_mask]
599
+ z_gyro_fit_array = z_gyro_array[pred_mask]
600
+
601
+ # Fit PCA on predicted gait data
602
+ fit_data = np.column_stack((y_gyro_fit_array, z_gyro_fit_array))
603
+ full_data = np.column_stack((y_gyro_array, z_gyro_array))
604
+ else:
605
+ # Fit PCA on entire dataset
606
+ fit_data = np.column_stack((y_gyro_array, z_gyro_array))
607
+ full_data = fit_data
371
608
 
372
- velocity = [x[0] for x in yz_gyros]
609
+ pca = PCA(n_components=2, svd_solver='auto', random_state=22)
610
+ pca.fit(fit_data)
611
+ velocity = pca.transform(full_data)[:, 0] # First principal component
373
612
 
374
- return pd.Series(velocity)
613
+ return np.asarray(velocity)
375
614
 
376
615
 
377
- def compute_angle(
378
- velocity_col: pd.Series,
379
- time_col: pd.Series,
380
- ) -> pd.Series:
381
- """Apply cumulative trapezoidal integration to extract the angle from the velocity.
616
+ def compute_angle(time_array: np.ndarray, velocity_array: np.ndarray) -> np.ndarray:
617
+ """
618
+ Compute the angle from the angular velocity using cumulative trapezoidal integration.
382
619
 
383
620
  Parameters
384
621
  ----------
385
- velocity_col: pd.Series
386
- The angular velocity (gyroscope) column to be integrated
387
- time_col: pd.Series
388
- The time column corresponding to the angular velocity
622
+ time_array : np.ndarray
623
+ The time array corresponding to the angular velocity data.
624
+ velocity_array : np.ndarray
625
+ The angular velocity data to integrate.
389
626
 
390
627
  Returns
391
628
  -------
392
- pd.Series
393
- An estimation of the angle extracted from the angular velocity
629
+ np.ndarray
630
+ The estimated angle based on the cumulative trapezoidal integration of the angular velocity.
394
631
  """
395
- angle_col = cumulative_trapezoid(velocity_col, time_col, initial=0)
396
- return pd.Series([x*-1 if x<0 else x for x in angle_col])
632
+ # Perform integration and apply absolute value
633
+ angle_array = cumulative_trapezoid(
634
+ y=velocity_array,
635
+ x=time_array,
636
+ initial=0
637
+ )
638
+ return np.abs(angle_array)
397
639
 
398
640
 
399
- def remove_moving_average_angle(
400
- angle_col: pd.Series,
401
- sampling_frequency: int = 100,
402
- ) -> pd.Series:
403
- """Remove the moving average from the angle to account for potential drift in the signal.
404
-
641
+ def remove_moving_average_angle(angle_array: np.ndarray, fs: float) -> pd.Series:
642
+ """
643
+ Remove the moving average from the angle to correct for drift.
644
+
405
645
  Parameters
406
646
  ----------
407
- angle_col: pd.Series
408
- The angle column to be processed, obtained using compute_angle
409
- sampling_frequency: int
410
- The sampling frequency of the data (default: 100)
411
-
647
+ angle_array : np.ndarray
648
+ The angle array to remove the moving average from.
649
+ fs : float
650
+ The sampling frequency of the data.
651
+
412
652
  Returns
413
653
  -------
414
654
  pd.Series
415
- The estimated angle without potential drift
655
+ The angle array with the moving average removed.
416
656
  """
417
- angle_ma = angle_col.rolling(window=int(2*(sampling_frequency*0.5)+1), min_periods=1, center=True, closed='both').mean()
657
+ window_size = int(2 * (fs * 0.5) + 1)
658
+ angle_ma = np.array(pd.Series(angle_array).rolling(
659
+ window=window_size,
660
+ min_periods=1,
661
+ center=True,
662
+ closed='both'
663
+ ).mean())
418
664
 
419
- return pd.Series(angle_col - angle_ma)
665
+ return angle_array - angle_ma
420
666
 
421
667
 
422
668
  def extract_angle_extremes(
423
- df: pd.DataFrame,
424
- angle_colname: str,
425
- dominant_frequency_colname: str,
426
- sampling_frequency: int = 100,
427
- ) -> pd.Series:
428
- """Extract the peaks of the angle (minima and maxima) from the smoothed angle signal that adhere to a set of specific requirements.
669
+ angle_array: np.ndarray,
670
+ sampling_frequency: float,
671
+ max_frequency_activity: float = 1.75,
672
+ ) -> tuple[List[int], List[int], List[int]]:
673
+ """
674
+ Extract extrema (minima and maxima) indices from the angle array.
429
675
 
430
676
  Parameters
431
677
  ----------
432
- df: pd.DataFrame
433
- The dataframe containing the angle signal
434
- angle_colname: str
435
- The name of the column containing the smoothed angle signal
436
- dominant_frequency_colname: str
437
- The name of the column containing the dominant frequency
438
- sampling_frequency: int
439
- The sampling frequency of the data (default: 100)
440
-
678
+ angle_array : np.ndarray
679
+ The angle array to extract extrema from.
680
+ sampling_frequency : float
681
+ The sampling frequency of the data.
682
+ max_frequency_activity : float, optional
683
+ The maximum frequency of human activity in Hz (default: 1.75).
684
+
441
685
  Returns
442
686
  -------
443
- pd.Series
444
- The extracted angle extremes (peaks)
445
- """
446
- # determine peaks
447
- df['angle_maxima'] = df.apply(lambda x: find_peaks(x[angle_colname], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
448
- df['angle_minima'] = df.apply(lambda x: find_peaks([-x for x in x[angle_colname]], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
449
-
450
- df['angle_new_minima'] = df['angle_minima'].copy()
451
- df['angle_new_maxima'] = df['angle_maxima'].copy()
452
-
453
- for index, _ in df.iterrows():
454
- i_pks = 0 # iterable to keep track of consecutive min-min and max-max versus min-max
455
- n_min = df.loc[index, 'angle_new_minima'].size # number of minima in window
456
- n_max = df.loc[index, 'angle_new_maxima'].size # number of maxima in window
457
-
458
- if n_min > 0 and n_max > 0:
459
- # if the first minimum occurs before the first maximum, start with the minimum
460
- if df.loc[index, 'angle_new_maxima'][0] > df.loc[index, 'angle_new_minima'][0]:
461
- # only continue if there are enough minima and maxima to perform operations
462
- while i_pks < df.loc[index, 'angle_new_minima'].size - 1 and i_pks < df.loc[index, 'angle_new_maxima'].size:
463
-
464
- # if the next minimum comes before the next maximum, we have two minima in a row, and should keep the larger one
465
- if df.loc[index, 'angle_new_minima'][i_pks+1] < df.loc[index, 'angle_new_maxima'][i_pks]:
466
- # if the next minimum is smaller than the current minimum, keep the next minimum and discard the current minimum
467
- if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks+1]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]]:
468
- df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
469
- # otherwise, keep the current minimum and discard the next minimum
470
- else:
471
- df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks+1)
472
- i_pks -= 1
473
-
474
- # if the current maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
475
- if i_pks >= 0 and df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks]:
476
- # if the current maximum is smaller than the previous maximum, keep the previous maximum and discard the current maximum
477
- if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks-1]]:
478
- df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
479
- # otherwise, keep the current maximum and discard the previous maximum
480
- else:
481
- df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks-1)
482
- i_pks -= 1
483
- i_pks += 1
484
-
485
- # or if the first maximum occurs before the first minimum, start with the maximum
486
- elif df.loc[index, 'angle_new_maxima'][0] < df.loc[index, 'angle_new_minima'][0]:
487
- # only continue if there are enough minima and maxima to perform operations
488
- while i_pks < df.loc[index, 'angle_new_minima'].size and i_pks < df.loc[index, 'angle_new_maxima'].size-1:
489
- # if the next maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
490
- if df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks+1]:
491
- # if the next maximum is smaller than the current maximum, keep the next maximum and discard the current maximum
492
- if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks+1]] > df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]]:
493
- df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
494
- # otherwise, keep the current maximum and discard the next maximum
495
- else:
496
- df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks+1)
497
- i_pks -= 1
498
-
499
- # if the current minimum comes before the current maximum, we have two minima in a row, and should keep the larger one
500
- if i_pks > 0 and df.loc[index, 'angle_new_minima'][i_pks] < df.loc[index, 'angle_new_maxima'][i_pks]:
501
- # if the current minimum is smaller than the previous minimum, keep the previous minimum and discard the current minimum
502
- if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks-1]]:
503
- df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks-1)
504
- # otherwise, keep the current minimum and discard the previous minimum
505
- else:
506
- df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
507
- i_pks -= 1
508
- i_pks += 1
509
-
510
- # for some peculiar reason, if a single item remains in the row for angle_new_minima or
511
- # angle_new_maxima, it could be either a scalar or a vector.
512
- for col in ['angle_new_minima', 'angle_new_maxima']:
513
- df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col] = df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col].apply(lambda x: [x])
514
-
515
- df['angle_extrema_values'] = df.apply(lambda x: [x[angle_colname][i] for i in np.concatenate([x['angle_new_minima'], x['angle_new_maxima']])], axis=1)
516
-
517
- return
518
-
519
-
520
- def extract_range_of_motion(
521
- angle_extrema_values_col: pd.Series,
522
- ) -> pd.Series:
523
- """Extract the range of motion from the angle extrema values.
687
+ tuple
688
+ A tuple containing the indices of the angle extrema, minima, and maxima.
689
+ """
690
+ distance = sampling_frequency / max_frequency_activity
691
+ prominence = 2
692
+
693
+ # Find minima and maxima indices for each window
694
+ minima_indices = find_peaks(
695
+ x=-angle_array,
696
+ distance=distance,
697
+ prominence=prominence
698
+ )[0]
699
+ maxima_indices = find_peaks(
700
+ x=angle_array,
701
+ distance=distance,
702
+ prominence=prominence
703
+ )[0]
704
+
705
+ minima_indices = np.array(minima_indices, dtype=object)
706
+ maxima_indices = np.array(maxima_indices, dtype=object)
707
+
708
+ i_pks = 0
709
+ if minima_indices.size > 0 and maxima_indices.size > 0:
710
+ if maxima_indices[0] > minima_indices[0]:
711
+ # Start with a minimum
712
+ while i_pks < minima_indices.size - 1 and i_pks < maxima_indices.size:
713
+ if minima_indices[i_pks + 1] < maxima_indices[i_pks]:
714
+ if angle_array[minima_indices[i_pks + 1]] < angle_array[minima_indices[i_pks]]:
715
+ minima_indices = np.delete(minima_indices, i_pks)
716
+ else:
717
+ minima_indices = np.delete(minima_indices, i_pks + 1)
718
+ i_pks -= 1
719
+
720
+ if i_pks >= 0 and minima_indices[i_pks] > maxima_indices[i_pks]:
721
+ if angle_array[maxima_indices[i_pks]] < angle_array[maxima_indices[i_pks - 1]]:
722
+ maxima_indices = np.delete(maxima_indices, i_pks)
723
+ else:
724
+ maxima_indices = np.delete(maxima_indices, i_pks - 1)
725
+ i_pks -= 1
726
+ i_pks += 1
727
+
728
+ elif maxima_indices[0] < minima_indices[0]:
729
+ # Start with a maximum
730
+ while i_pks < maxima_indices.size - 1 and i_pks < minima_indices.size:
731
+ if maxima_indices[i_pks + 1] < minima_indices[i_pks]:
732
+ if angle_array[maxima_indices[i_pks + 1]] < angle_array[maxima_indices[i_pks]]:
733
+ maxima_indices = np.delete(maxima_indices, i_pks + 1)
734
+ else:
735
+ maxima_indices = np.delete(maxima_indices, i_pks)
736
+ i_pks -= 1
737
+
738
+ if i_pks >= 0 and maxima_indices[i_pks] > minima_indices[i_pks]:
739
+ if angle_array[minima_indices[i_pks]] < angle_array[minima_indices[i_pks - 1]]:
740
+ minima_indices = np.delete(minima_indices, i_pks - 1)
741
+ else:
742
+ minima_indices = np.delete(minima_indices, i_pks)
743
+ i_pks -= 1
744
+ i_pks += 1
745
+
746
+ # Combine remaining extrema and compute range of motion
747
+ angle_extrema_indices = np.sort(np.concatenate([minima_indices, maxima_indices]))
748
+
749
+ return list(angle_extrema_indices), list(minima_indices), list(maxima_indices)
750
+
751
+
752
+ def compute_range_of_motion(angle_array: np.ndarray, extrema_indices: List[int]) -> np.ndarray:
753
+ """
754
+ Compute the range of motion of a time series based on the angle extrema.
524
755
 
525
756
  Parameters
526
757
  ----------
527
- angle_extrema_values_col: pd.Series
528
- The column containing the angle extrema values
758
+ angle_array : np.ndarray
759
+ The angle array to compute the range of motion from.
760
+ extrema_indices : List[int]
761
+ The indices of the angle extrema.
529
762
 
530
763
  Returns
531
764
  -------
532
- pd.Series
533
- The range of motion
534
- """
535
- angle_amplitudes = np.empty((len(angle_extrema_values_col), 0)).tolist()
536
-
537
- # for each window
538
- for i, extrema_values in enumerate(angle_extrema_values_col):
539
- l_amplitudes = []
540
- # for each extremum contained in the window
541
- for j, value in enumerate(extrema_values):
542
- # if the extremum is not the last one in the list of extrema
543
- if j < len(extrema_values)-1:
544
- # if the current extremum is a maximum and the next one is a minimum, or vice versa
545
- if (value > 0 and extrema_values[j+1] < 0) or (value < 0 and extrema_values[j+1] > 0):
546
- # compute the amplitude as the sum of the absolute values of the two extrema
547
- l_amplitudes.append(np.sum(np.abs(value) + np.abs(extrema_values[j+1])))
548
- # or if the extrema are both positive or both negative, and the current extremum is closer to 0
549
- elif np.abs(value) < np.abs(extrema_values[j+1]):
550
- # compute the amplitude as the difference between the two extrema
551
- l_amplitudes.append(np.subtract(np.abs(extrema_values[j+1]), np.abs(value)))
552
- # or if the extrema are both positive and negative, and the current extremum is further away from 0
553
- else:
554
- # compute the amplitude as the difference between the two extrema
555
- l_amplitudes.append(np.subtract(np.abs(value), np.abs(extrema_values[j+1])))
556
-
557
- angle_amplitudes[i].append([x for x in l_amplitudes])
558
-
559
- return [y for item in angle_amplitudes for y in item]
560
-
561
-
562
- def extract_peak_angular_velocity(
563
- df: pd.DataFrame,
564
- velocity_colname: str,
565
- angle_minima_colname: str,
566
- angle_maxima_colname: str,
567
- ) -> pd.DataFrame:
568
- """Extract the forward and backward peak angular velocity from the angular velocity.
765
+ np.ndarray
766
+ The range of motion of the time series.
767
+ """
768
+ # Ensure extrema_indices is a NumPy array of integers
769
+ if not isinstance(extrema_indices, list):
770
+ raise TypeError("extrema_indices must be a list of integers.")
771
+
772
+ # Check bounds
773
+ if np.any(np.array(extrema_indices) < 0) or np.any(np.array(extrema_indices) >= len(angle_array)):
774
+ raise ValueError("extrema_indices contains out-of-bounds indices.")
569
775
 
776
+ # Extract angle amplitudes (minima and maxima values)
777
+ angle_extremas = angle_array[extrema_indices]
778
+
779
+ # Compute the differences (range of motion) across all windows at once using np.diff
780
+ range_of_motion = np.abs(np.diff(angle_extremas))
781
+
782
+ return range_of_motion
783
+
784
+
785
+ def compute_peak_angular_velocity(
786
+ velocity_array: np.ndarray,
787
+ angle_extrema_indices: List[int],
788
+ ) -> np.ndarray:
789
+ """
790
+ Compute the peak angular velocity of a time series based on the angle extrema.
791
+
570
792
  Parameters
571
793
  ----------
572
- df: pd.DataFrame
573
- The dataframe containing the angular velocity
574
- velocity_colname: str
575
- The column name of the angular velocity
576
- angle_minima_colname: str
577
- The column name of the column containing the angle minima
578
- angle_maxima_colname: str
579
- The column name of the column containing the angle maxima
580
-
794
+ velocity_array : np.ndarray
795
+ The angular velocity array to compute the peak angular velocity from.
796
+ angle_extrema_indices : List[int]
797
+ The indices of the angle extrema.
798
+
581
799
  Returns
582
800
  -------
583
- pd.DataFrame
584
- The dataframe with the forward and backward peak angular velocity
801
+ np.ndarray
802
+ The peak angular velocities of the time series.
585
803
  """
586
- df['forward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
587
- df['backward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
588
-
589
- # for each window
590
- for index, row in df.iterrows():
591
- # the peak angular velocity can only be computed if there is at least one minimum and one maximum in the window
592
- if len(row[angle_minima_colname]) > 0 and len(row[angle_maxima_colname]) > 0:
593
- # combine the minima and maxima
594
- l_extrema_indices = np.sort(np.concatenate((row[angle_minima_colname], row[angle_maxima_colname])))
595
- # for each peak
596
- for j, peak_index in enumerate(l_extrema_indices):
597
- # if the peak is a maximum and there is another peak after it
598
- if peak_index in row[angle_maxima_colname] and j < len(l_extrema_indices) - 1:
599
- # compute the forward peak angular velocity, defined by the maximum negative angular velocity between the two peaks
600
- df.loc[index, 'forward_peak_ang_vel'].append(np.abs(min(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
601
- # if the peak is a minimum and there is another peak after it
602
- elif peak_index in row[angle_minima_colname] and j < len(l_extrema_indices) - 1:
603
- # compute the backward peak angular velocity, defined by the maximum positive angular velocity between the two peaks
604
- df.loc[index, 'backward_peak_ang_vel'].append(np.abs(max(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
804
+ if np.any(np.array(angle_extrema_indices) < 0) or np.any(np.array(angle_extrema_indices) >= len(velocity_array)):
805
+ raise ValueError("angle_extrema_indices contains out-of-bounds indices.")
806
+
807
+ if len(angle_extrema_indices) < 2:
808
+ raise ValueError("angle_extrema_indices must contain at least two indices.")
605
809
 
606
- return
810
+ # Initialize a list to store the peak velocities
811
+ pav = []
812
+
813
+ # Compute peak angular velocities
814
+ for i in range(len(angle_extrema_indices) - 1):
815
+ # Get the current and next extrema index
816
+ current_peak_idx = angle_extrema_indices[i]
817
+ next_peak_idx = angle_extrema_indices[i + 1]
818
+ segment = velocity_array[current_peak_idx:next_peak_idx]
819
+
820
+ pav.append(np.max(np.abs(segment)))
607
821
 
822
+ return np.array(pav)
608
823
 
609
- def extract_temporal_domain_features(config: IMUConfig, df_windowed:pd.DataFrame, l_gravity_stats=['mean', 'std']) -> pd.DataFrame:
824
+
825
+ def compute_forward_backward_peak_angular_velocity(
826
+ velocity_array: np.ndarray,
827
+ angle_extrema_indices: List[int],
828
+ minima_indices: List[int],
829
+ maxima_indices: List[int],
830
+ ) -> Tuple[np.ndarray, np.ndarray]:
610
831
  """
611
- Compute temporal domain features for the accelerometer signal. The features are added to the dataframe. Therefore the original dataframe is modified, and the modified dataframe is returned.
832
+ Compute the peak angular velocity of a time series based on the angle extrema.
612
833
 
613
834
  Parameters
614
835
  ----------
615
-
616
- config: GaitFeatureExtractionConfig
617
- The configuration object containing the parameters for the feature extraction
836
+ velocity_array : np.ndarray
837
+ The angular velocity array to compute the peak angular velocity from.
838
+ angle_extrema_indices : List[int]
839
+ The indices of the angle extrema.
840
+ minima_indices : List[int]
841
+ The indices of the minima.
842
+ maxima_indices : List[int]
843
+ The indices of the maxima.
618
844
 
619
- df_windowed: pd.DataFrame
620
- The dataframe containing the windowed accelerometer signal
845
+ Returns
846
+ -------
847
+ Tuple[np.ndarray, np.ndarray]
848
+ A tuple containing the forward and backward peak angular velocities for minima and maxima.
849
+ """
850
+ if np.any(np.array(angle_extrema_indices) < 0) or np.any(np.array(angle_extrema_indices) >= len(velocity_array)):
851
+ raise ValueError("angle_extrema_indices contains out-of-bounds indices.")
852
+
853
+ if len(angle_extrema_indices) < 2:
854
+ raise ValueError("angle_extrema_indices must contain at least two indices.")
855
+
856
+ if len(minima_indices) == 0:
857
+ raise ValueError("No minima indices found.")
858
+
859
+ if len(maxima_indices) == 0:
860
+ raise ValueError("No maxima indices found.")
861
+
862
+ # Initialize lists to store the peak velocities
863
+ forward_pav = []
864
+ backward_pav = []
865
+
866
+ # Compute peak angular velocities
867
+ for i in range(len(angle_extrema_indices) - 1):
868
+ # Get the current and next extrema index
869
+ current_peak_idx = angle_extrema_indices[i]
870
+ next_peak_idx = angle_extrema_indices[i + 1]
871
+ segment = velocity_array[current_peak_idx:next_peak_idx]
621
872
 
622
- l_gravity_stats: list, optional
623
- The statistics to be computed for the gravity component of the accelerometer signal (default: ['mean', 'std'])
873
+ # Check if the current peak is a minimum or maximum and calculate peak velocity accordingly
874
+ if current_peak_idx in minima_indices:
875
+ forward_pav.append(np.max(np.abs(segment)))
876
+ elif current_peak_idx in maxima_indices:
877
+ backward_pav.append(np.max(np.abs(segment)))
878
+
879
+ # Convert lists to numpy arrays
880
+ forward_pav = np.array(forward_pav)
881
+ backward_pav = np.array(backward_pav)
882
+
883
+ return forward_pav, backward_pav
884
+
885
+
886
+ def compute_signal_to_noise_ratio(
887
+ ppg_windowed: np.ndarray
888
+ ) -> np.ndarray:
889
+ """
890
+ Compute the signal to noise ratio of the PPG signal.
624
891
 
892
+ Parameters
893
+ ----------
894
+ ppg_windowed: np.ndarray
895
+ The windowed PPG signal.
896
+
625
897
  Returns
626
898
  -------
627
- pd.DataFrame
628
- The dataframe with the added temporal domain features.
629
- """
630
-
631
- # compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
632
- for col in config.l_gravity_cols:
633
- for stat in l_gravity_stats:
634
- df_windowed[f'{col}_{stat}'] = generate_statistics(
635
- sensor_col=df_windowed[col],
636
- statistic=stat
637
- )
638
-
639
- # compute the standard deviation of the Euclidean norm of the three axes
640
- df_windowed['std_norm_acc'] = generate_std_norm(
641
- df=df_windowed,
642
- cols=config.l_accelerometer_cols
643
- )
899
+ np.ndarray
900
+ The signal to noise ratio of the PPG signal.
901
+ """
644
902
 
645
- return df_windowed
646
-
647
-
648
- def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):
649
-
650
- for col in l_sensor_colnames:
651
-
652
- # transform the temporal signal to the spectral domain using the fast fourier transform
653
- df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
654
- sensor_col=df_windowed[col],
655
- window_type=config.window_type,
656
- sampling_frequency=config.sampling_frequency
657
- )
658
-
659
- # compute the power in distinct frequency bandwidths
660
- for bandwidth, frequencies in config.d_frequency_bandwidths.items():
661
- df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
662
- sensor_col=x[col],
663
- fmin=frequencies[0],
664
- fmax=frequencies[1],
665
- sampling_frequency=config.sampling_frequency,
666
- window_type=config.window_type,
667
- ), axis=1
668
- )
669
-
670
- # compute the dominant frequency, i.e., the frequency with the highest power
671
- df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
672
- signal_ffts=x[col+'_fft'],
673
- signal_freqs=x[col+'_freqs'],
674
- fmin=config.spectrum_low_frequency,
675
- fmax=config.spectrum_high_frequency
676
- ), axis=1
677
- )
903
+ arr_signal = np.var(ppg_windowed, axis=1)
904
+ arr_noise = np.var(np.abs(ppg_windowed), axis=1)
905
+ signal_to_noise_ratio = arr_signal / arr_noise
906
+
907
+ return signal_to_noise_ratio
678
908
 
679
- # compute the power summed over the individual axes to obtain the total power per frequency bandwidth
680
- for bandwidth in config.d_frequency_bandwidths.keys():
681
- df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)
682
-
683
- # compute the power summed over the individual frequency bandwidths to obtain the total power
684
- df_windowed['total_power'] = compute_power(
685
- df=df_windowed,
686
- fft_cols=[f'{col}_fft' for col in l_sensor_colnames])
687
-
688
- # compute the cepstral coefficients of the total power signal
689
- cc_cols = generate_cepstral_coefficients(
690
- total_power_col=df_windowed['total_power'],
691
- window_length_s=config.window_length_s,
692
- sampling_frequency=config.sampling_frequency,
693
- low_frequency=config.spectrum_low_frequency,
694
- high_frequency=config.spectrum_high_frequency,
695
- n_filters=config.n_dct_filters_cc,
696
- n_coefficients=config.n_coefficients_cc
697
- )
909
+ def compute_auto_correlation(
910
+ ppg_windowed: np.ndarray,
911
+ fs: int
912
+ ) -> np.ndarray:
913
+ """
914
+ Compute the biased autocorrelation of the PPG signal. The autocorrelation is computed up to 3 seconds. The highest peak value is selected as the autocorrelation value. If no peaks are found, the value is set to 0.
915
+ The biased autocorrelation is computed using the biased_autocorrelation function. It differs from the unbiased autocorrelation in that the normalization factor is the length of the original signal, and boundary effects are considered. This results in a smoother autocorrelation function.
916
+
917
+ Parameters
918
+ ----------
919
+ ppg_windowed: np.ndarray
920
+ The windowed PPG signal.
921
+ fs: int
922
+ The sampling frequency of the PPG signal.
923
+
924
+ Returns
925
+ -------
926
+ np.ndarray
927
+ The autocorrelation of the PPG signal.
928
+ """
698
929
 
699
- df_windowed = pd.concat([df_windowed, cc_cols], axis=1)
930
+ auto_correlations = biased_autocorrelation(ppg_windowed, fs*3) # compute the biased autocorrelation of the PPG signal up to 3 seconds
931
+ peaks = [find_peaks(x, height=0.01)[0] for x in auto_correlations] # find the peaks of the autocorrelation
932
+ sorted_peak_values = [np.sort(auto_correlations[i, indices])[::-1] for i, indices in enumerate(peaks)] # sort the peak values in descending order
933
+ auto_correlations = [x[0] if len(x) > 0 else 0 for x in sorted_peak_values] # get the highest peak value if there are any peaks, otherwise set to 0
700
934
 
701
- df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})
935
+ return np.asarray(auto_correlations)
936
+
937
+ def biased_autocorrelation(
938
+ ppg_windowed: np.ndarray,
939
+ max_lag: int
940
+ ) -> np.ndarray:
941
+ """
942
+ Compute the biased autocorrelation of a signal (similar to matlabs autocorr function), where the normalization factor
943
+ is the length of the original signal, and boundary effects are considered.
944
+
945
+ Parameters
946
+ ----------
947
+ ppg_windowed: np.ndarray
948
+ The windowed PPG signal.
949
+ max_lag: int
950
+ The maximum lag for the autocorrelation.
702
951
 
703
- return df_windowed
952
+ Returns
953
+ -------
954
+ np.ndarray
955
+ The biased autocorrelation of the PPG signal.
956
+
957
+ """
958
+ zero_mean_ppg = ppg_windowed - np.mean(ppg_windowed, axis=1, keepdims=True) # Remove the mean of the signal to make it zero-mean
959
+ N = zero_mean_ppg.shape[1]
960
+ autocorr_values = np.zeros((zero_mean_ppg.shape[0], max_lag + 1))
961
+
962
+ for lag in range(max_lag + 1):
963
+ # Compute autocorrelation for current lag
964
+ overlapping_points = zero_mean_ppg[:, :N-lag] * zero_mean_ppg[:, lag:]
965
+ autocorr_values[:, lag] = np.sum(overlapping_points, axis=1) / N # Divide by N (biased normalization)
966
+
967
+ return autocorr_values/autocorr_values[:, 0, np.newaxis] # Normalize the autocorrelation values