paradigma 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import numpy as np
2
- import pandas as pd
3
1
  from typing import List, Tuple
4
2
 
3
+ import numpy as np
4
+ import pandas as pd
5
5
  from scipy.integrate import cumulative_trapezoid
6
6
  from scipy.signal import find_peaks, windows
7
7
  from scipy.stats import kurtosis, skew
@@ -10,7 +10,9 @@ from sklearn.decomposition import PCA
10
10
  from paradigma.config import PulseRateConfig
11
11
 
12
12
 
13
- def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False) -> np.ndarray:
13
+ def compute_statistics(
14
+ data: np.ndarray, statistic: str, abs_stats: bool = False
15
+ ) -> np.ndarray:
14
16
  """
15
17
  Compute a specific statistical measure along the timestamps of a 2D or 3D array.
16
18
 
@@ -29,7 +31,7 @@ def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False)
29
31
  - 'kurtosis': Kurtosis.
30
32
  - 'skewness': Skewness.
31
33
  abs_stats : bool, optional
32
- Whether to compute the statistics on the absolute values of the data for
34
+ Whether to compute the statistics on the absolute values of the data for
33
35
  the mean and median (default: False).
34
36
 
35
37
  Returns
@@ -43,33 +45,42 @@ def compute_statistics(data: np.ndarray, statistic: str, abs_stats: bool=False)
43
45
  ValueError
44
46
  If the specified `statistic` is not supported or if the input data has an invalid shape.
45
47
  """
46
- if statistic not in ['mean', 'median', 'var', 'std', 'max', 'min', 'kurtosis', 'skewness']:
48
+ if statistic not in [
49
+ "mean",
50
+ "median",
51
+ "var",
52
+ "std",
53
+ "max",
54
+ "min",
55
+ "kurtosis",
56
+ "skewness",
57
+ ]:
47
58
  raise ValueError(f"Statistic '{statistic}' is not supported.")
48
-
59
+
49
60
  if data.ndim > 3 or data.ndim < 2:
50
61
  raise ValueError("Input data must be a 1D, 2D or 3D array.")
51
62
 
52
- if statistic == 'mean':
63
+ if statistic == "mean":
53
64
  if abs_stats:
54
65
  return np.mean(np.abs(data), axis=1)
55
66
  else:
56
67
  return np.mean(data, axis=1)
57
- elif statistic == 'median':
68
+ elif statistic == "median":
58
69
  if abs_stats:
59
70
  return np.median(np.abs(data), axis=1)
60
71
  else:
61
72
  return np.median(data, axis=1)
62
- elif statistic == 'var':
73
+ elif statistic == "var":
63
74
  return np.var(data, ddof=1, axis=1)
64
- elif statistic == 'std':
75
+ elif statistic == "std":
65
76
  return np.std(data, axis=1)
66
- elif statistic == 'max':
77
+ elif statistic == "max":
67
78
  return np.max(data, axis=1)
68
- elif statistic == 'min':
79
+ elif statistic == "min":
69
80
  return np.min(data, axis=1)
70
- elif statistic == 'kurtosis':
81
+ elif statistic == "kurtosis":
71
82
  return kurtosis(data, fisher=False, axis=1)
72
- elif statistic == 'skewness':
83
+ elif statistic == "skewness":
73
84
  return skew(data, axis=1)
74
85
  else:
75
86
  raise ValueError(f"Statistic '{statistic}' is not supported.")
@@ -79,8 +90,8 @@ def compute_std_euclidean_norm(data: np.ndarray) -> np.ndarray:
79
90
  """
80
91
  Compute the standard deviation of the Euclidean norm for each window of sensor data.
81
92
 
82
- The function calculates the Euclidean norm (L2 norm) across sensor axes for each
83
- timestamp within a window, and then computes the standard deviation of these norms
93
+ The function calculates the Euclidean norm (L2 norm) across sensor axes for each
94
+ timestamp within a window, and then computes the standard deviation of these norms
84
95
  for each window.
85
96
 
86
97
  Parameters
@@ -94,32 +105,34 @@ def compute_std_euclidean_norm(data: np.ndarray) -> np.ndarray:
94
105
  Returns
95
106
  -------
96
107
  np.ndarray
97
- A 1D array of shape (n_windows,) containing the standard deviation of the
108
+ A 1D array of shape (n_windows,) containing the standard deviation of the
98
109
  Euclidean norm for each window.
99
110
  """
100
- norms = np.linalg.norm(data, axis=2) # Norm along the sensor axes (norm per timestamp, per window)
111
+ norms = np.linalg.norm(
112
+ data, axis=2
113
+ ) # Norm along the sensor axes (norm per timestamp, per window)
101
114
  return np.std(norms, axis=1) # Standard deviation per window
102
115
 
103
116
 
104
117
  def compute_power_in_bandwidth(
105
- freqs: np.ndarray,
106
- psd: np.ndarray,
107
- fmin: float,
108
- fmax: float,
109
- include_max: bool = True,
110
- spectral_resolution: float = 1,
111
- cumulative_sum_method: str = 'trapz'
112
- ) -> np.ndarray:
118
+ freqs: np.ndarray,
119
+ psd: np.ndarray,
120
+ fmin: float,
121
+ fmax: float,
122
+ include_max: bool = True,
123
+ spectral_resolution: float = 1,
124
+ cumulative_sum_method: str = "trapz",
125
+ ) -> np.ndarray:
113
126
  """
114
127
  Compute the logarithmic power within specified frequency bands for each sensor axis.
115
128
 
116
- This function integrates the power spectral density (PSD) over user-defined frequency
129
+ This function integrates the power spectral density (PSD) over user-defined frequency
117
130
  bands and computes the logarithm of the resulting power for each axis of the sensor.
118
131
 
119
132
  Parameters
120
133
  ----------
121
134
  freqs : np.ndarray
122
- A 1D array of shape (n_frequencies,) containing the frequencies corresponding
135
+ A 1D array of shape (n_frequencies,) containing the frequencies corresponding
123
136
  to the PSD values.
124
137
  psd : np.ndarray
125
138
  A 2D array of shape (n_windows, n_frequencies) or 3D array of shape (n_windows, n_frequencies, n_axes)
@@ -133,7 +146,7 @@ def compute_power_in_bandwidth(
133
146
  spectral_resolution : float, optional
134
147
  The spectral resolution of the PSD in Hz (default: 1).
135
148
  cumulative_sum_method : str, optional
136
- The method used to integrate the PSD over the frequency band. Supported values are:
149
+ The method used to integrate the PSD over the frequency band. Supported values are:
137
150
  - 'trapz': Trapezoidal rule.
138
151
  - 'sum': Simple summation (default: 'trapz').
139
152
 
@@ -148,16 +161,18 @@ def compute_power_in_bandwidth(
148
161
  band_mask = (freqs >= fmin) & (freqs <= fmax)
149
162
  else:
150
163
  band_mask = (freqs >= fmin) & (freqs < fmax)
151
-
164
+
152
165
  # Integrate PSD over the selected frequency band using the band mask
153
166
  if psd.ndim == 2:
154
167
  masked_psd = psd[:, band_mask]
155
168
  elif psd.ndim == 3:
156
169
  masked_psd = psd[:, band_mask, :]
157
170
 
158
- if cumulative_sum_method == 'trapz':
159
- band_power = spectral_resolution * np.trapz(masked_psd, freqs[band_mask], axis=1)
160
- elif cumulative_sum_method == 'sum':
171
+ if cumulative_sum_method == "trapz":
172
+ band_power = spectral_resolution * np.trapz(
173
+ masked_psd, freqs[band_mask], axis=1
174
+ )
175
+ elif cumulative_sum_method == "sum":
161
176
  band_power = spectral_resolution * np.sum(masked_psd, axis=1)
162
177
  else:
163
178
  raise ValueError("cumulative_sum_method must be 'trapz' or 'sum'.")
@@ -169,34 +184,33 @@ def compute_total_power(psd: np.ndarray) -> np.ndarray:
169
184
  """
170
185
  Compute the total power by summing the power spectral density (PSD) across frequency bins.
171
186
 
172
- This function calculates the total power for each window and each sensor axis by
187
+ This function calculates the total power for each window and each sensor axis by
173
188
  summing the PSD values across all frequency bins.
174
189
 
175
190
  Parameters
176
191
  ----------
177
192
  psd : np.ndarray
178
- A 3D array of shape (n_windows, n_frequencies, n_axes) representing the
193
+ A 3D array of shape (n_windows, n_frequencies, n_axes) representing the
179
194
  power spectral density (PSD) of the sensor data.
180
195
 
181
196
  Returns
182
197
  -------
183
198
  np.ndarray
184
- A 2D array of shape (n_windows, n_axes) containing the total power for each
199
+ A 2D array of shape (n_windows, n_axes) containing the total power for each
185
200
  window and each sensor axis.
186
201
  """
187
202
  return np.sum(psd, axis=-1) # Sum across frequency bins
188
203
 
189
204
 
190
205
  def extract_tremor_power(
191
- freqs: np.ndarray,
206
+ freqs: np.ndarray,
192
207
  total_psd: np.ndarray,
193
208
  fmin: float = 3,
194
209
  fmax: float = 7,
195
- spectral_resolution: float = 0.25
196
- ) -> np.ndarray:
197
-
210
+ spectral_resolution: float = 0.25,
211
+ ) -> np.ndarray:
198
212
  """Computes the tremor power (1.25 Hz around the peak within the tremor frequency band)
199
-
213
+
200
214
  Parameters
201
215
  ----------
202
216
  total_psd: np.ndarray
@@ -209,16 +223,16 @@ def extract_tremor_power(
209
223
  The upper bound of the tremor frequency band in Hz (default: 7)
210
224
  spectral_resolution: float
211
225
  The spectral resolution of the PSD in Hz (default: 0.25)
212
-
226
+
213
227
  Returns
214
228
  -------
215
229
  pd.Series
216
230
  The tremor power across windows
217
231
  """
218
-
232
+
219
233
  freq_idx = (freqs >= fmin) & (freqs <= fmax)
220
234
  peak_idx = np.argmax(total_psd[:, freq_idx], axis=1) + np.min(np.where(freq_idx)[0])
221
- left_idx = np.maximum((peak_idx - 0.5 / spectral_resolution).astype(int), 0)
235
+ left_idx = np.maximum((peak_idx - 0.5 / spectral_resolution).astype(int), 0)
222
236
  right_idx = (peak_idx + 0.5 / spectral_resolution).astype(int)
223
237
 
224
238
  row_indices = np.arange(total_psd.shape[1])
@@ -234,24 +248,24 @@ def extract_tremor_power(
234
248
 
235
249
 
236
250
  def compute_dominant_frequency(
237
- freqs: np.ndarray,
238
- psd: np.ndarray,
239
- fmin: float | None = None,
240
- fmax: float | None = None
241
- ) -> np.ndarray:
251
+ freqs: np.ndarray,
252
+ psd: np.ndarray,
253
+ fmin: float | None = None,
254
+ fmax: float | None = None,
255
+ ) -> np.ndarray:
242
256
  """
243
257
  Compute the dominant frequency within a specified frequency range for each window and sensor axis.
244
258
 
245
- The dominant frequency is defined as the frequency corresponding to the maximum power in the
259
+ The dominant frequency is defined as the frequency corresponding to the maximum power in the
246
260
  power spectral density (PSD) within the specified range.
247
261
 
248
262
  Parameters
249
263
  ----------
250
264
  freqs : np.ndarray
251
- A 1D array of shape (n_frequencies,) containing the frequencies corresponding
265
+ A 1D array of shape (n_frequencies,) containing the frequencies corresponding
252
266
  to the PSD values.
253
267
  psd : np.ndarray
254
- A 2D array of shape (n_windows, n_frequencies) or a 3D array of shape
268
+ A 2D array of shape (n_windows, n_frequencies) or a 3D array of shape
255
269
  (n_windows, n_frequencies, n_axes) representing the power spectral density.
256
270
  fmin : float
257
271
  The lower bound of the frequency range (inclusive).
@@ -261,9 +275,9 @@ def compute_dominant_frequency(
261
275
  Returns
262
276
  -------
263
277
  np.ndarray
264
- - If `psd` is 2D: A 1D array of shape (n_windows,) containing the dominant frequency
278
+ - If `psd` is 2D: A 1D array of shape (n_windows,) containing the dominant frequency
265
279
  for each window.
266
- - If `psd` is 3D: A 2D array of shape (n_windows, n_axes) containing the dominant
280
+ - If `psd` is 3D: A 2D array of shape (n_windows, n_axes) containing the dominant
267
281
  frequency for each window and each axis.
268
282
 
269
283
  Raises
@@ -280,40 +294,45 @@ def compute_dominant_frequency(
280
294
 
281
295
  # Validate the frequency range
282
296
  if fmin < freqs[0] or fmax > freqs[-1]:
283
- raise ValueError(f"fmin {fmin} or fmax {fmax} are out of bounds of the frequency array.")
284
-
297
+ raise ValueError(
298
+ f"fmin {fmin} or fmax {fmax} are out of bounds of the frequency array."
299
+ )
300
+
285
301
  # Find the indices corresponding to fmin and fmax
286
302
  min_index = np.searchsorted(freqs, fmin)
287
303
  max_index = np.searchsorted(freqs, fmax)
288
304
 
289
305
  # Slice the PSD and frequency array to the desired range
290
- psd_filtered = psd[:, min_index:max_index] if psd.ndim == 2 else psd[:, min_index:max_index, :]
306
+ psd_filtered = (
307
+ psd[:, min_index:max_index] if psd.ndim == 2 else psd[:, min_index:max_index, :]
308
+ )
291
309
  freqs_filtered = freqs[min_index:max_index]
292
310
 
293
311
  # Compute dominant frequency
294
312
  if psd.ndim == 3:
295
313
  # 3D: Compute for each axis
296
- return np.array([
297
- freqs_filtered[np.argmax(psd_filtered[:, :, i], axis=1)]
298
- for i in range(psd.shape[-1])
299
- ]).T
314
+ return np.array(
315
+ [
316
+ freqs_filtered[np.argmax(psd_filtered[:, :, i], axis=1)]
317
+ for i in range(psd.shape[-1])
318
+ ]
319
+ ).T
300
320
  elif psd.ndim == 2:
301
321
  # 2D: Compute for each window
302
322
  return freqs_filtered[np.argmax(psd_filtered, axis=1)]
303
323
  else:
304
324
  raise ValueError("PSD array must be 2D or 3D.")
305
-
325
+
306
326
 
307
327
  def extract_frequency_peak(
308
328
  freqs: np.ndarray,
309
329
  psd: np.ndarray,
310
330
  fmin: float | None = None,
311
331
  fmax: float | None = None,
312
- include_max: bool = True
313
- ) -> pd.Series:
314
-
332
+ include_max: bool = True,
333
+ ) -> pd.Series:
315
334
  """Extract the frequency of the peak in the power spectral density within the specified frequency band.
316
-
335
+
317
336
  Parameters
318
337
  ----------
319
338
  freqs: pd.Series
@@ -326,12 +345,12 @@ def extract_frequency_peak(
326
345
  The upper bound of the frequency band in Hz (default: None). If not provided, the maximum frequency is used.
327
346
  include_max: bool
328
347
  Whether to include the maximum frequency in the search range (default: True)
329
-
348
+
330
349
  Returns
331
350
  -------
332
351
  pd.Series
333
352
  The frequency of the peak across windows
334
- """
353
+ """
335
354
  # Set fmin and fmax to maximum range if not provided
336
355
  if fmin is None:
337
356
  fmin = freqs[0]
@@ -340,9 +359,9 @@ def extract_frequency_peak(
340
359
 
341
360
  # Find the indices corresponding to fmin and fmax
342
361
  if include_max:
343
- freq_idx = np.where((freqs>=fmin) & (freqs<=fmax))[0]
362
+ freq_idx = np.where((freqs >= fmin) & (freqs <= fmax))[0]
344
363
  else:
345
- freq_idx = np.where((freqs>=fmin) & (freqs<fmax))[0]
364
+ freq_idx = np.where((freqs >= fmin) & (freqs < fmax))[0]
346
365
 
347
366
  peak_idx = np.argmax(psd[:, freq_idx], axis=1)
348
367
  frequency_peak = freqs[freq_idx][peak_idx]
@@ -351,10 +370,8 @@ def extract_frequency_peak(
351
370
 
352
371
 
353
372
  def compute_relative_power(
354
- freqs: np.ndarray,
355
- psd: np.ndarray,
356
- config: PulseRateConfig
357
- ) -> list:
373
+ freqs: np.ndarray, psd: np.ndarray, config: PulseRateConfig
374
+ ) -> list:
358
375
  """
359
376
  Calculate relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
360
377
 
@@ -375,30 +392,38 @@ def compute_relative_power(
375
392
  Returns
376
393
  -------
377
394
  list
378
- The relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
379
-
395
+ The relative power within the dominant frequency band in the physiological range (0.75 - 3 Hz).
396
+
380
397
  """
381
- hr_range_mask = (freqs >= config.freq_band_physio[0]) & (freqs <= config.freq_band_physio[1])
398
+ hr_range_mask = (freqs >= config.freq_band_physio[0]) & (
399
+ freqs <= config.freq_band_physio[1]
400
+ )
382
401
  hr_range_idx = np.where(hr_range_mask)[0]
383
402
  peak_idx = np.argmax(psd[:, hr_range_idx], axis=1)
384
403
  peak_freqs = freqs[hr_range_idx[peak_idx]]
385
404
 
386
- dom_band_idx = [np.where((freqs >= peak_freq - config.bandwidth) & (freqs <= peak_freq + config.bandwidth))[0] for peak_freq in peak_freqs]
387
- rel_power = [np.trapz(psd[j, idx], freqs[idx]) / np.trapz(psd[j, :], freqs) for j, idx in enumerate(dom_band_idx)]
405
+ dom_band_idx = [
406
+ np.where(
407
+ (freqs >= peak_freq - config.bandwidth)
408
+ & (freqs <= peak_freq + config.bandwidth)
409
+ )[0]
410
+ for peak_freq in peak_freqs
411
+ ]
412
+ rel_power = [
413
+ np.trapz(psd[j, idx], freqs[idx]) / np.trapz(psd[j, :], freqs)
414
+ for j, idx in enumerate(dom_band_idx)
415
+ ]
388
416
  return rel_power
389
417
 
390
418
 
391
- def compute_spectral_entropy(
392
- psd: np.ndarray,
393
- n_samples: int
394
- ) -> np.ndarray:
419
+ def compute_spectral_entropy(psd: np.ndarray, n_samples: int) -> np.ndarray:
395
420
  """
396
421
  Calculate the spectral entropy from the normalized power spectral density.
397
422
 
398
423
  Parameters
399
424
  ----------
400
425
  psd: np.ndarray
401
- The power spectral density of the signal.
426
+ The power spectral density of the signal.
402
427
  n_samples: int
403
428
  The number of samples in the window.
404
429
 
@@ -408,33 +433,35 @@ def compute_spectral_entropy(
408
433
  The spectral entropy of the power spectral density.
409
434
  """
410
435
  psd_norm = psd / np.sum(psd, axis=1, keepdims=True)
411
- spectral_entropy = -np.sum(psd_norm * np.log2(psd_norm), axis=1) / np.log2(n_samples)
412
-
436
+ spectral_entropy = -np.sum(psd_norm * np.log2(psd_norm), axis=1) / np.log2(
437
+ n_samples
438
+ )
439
+
413
440
  return spectral_entropy
414
441
 
415
442
 
416
443
  def compute_mfccs(
417
- total_power_array: np.ndarray,
418
- config,
419
- total_power_type: str = 'psd',
420
- mel_scale: bool = True,
421
- multiplication_factor: float = 1,
422
- rounding_method: str = 'floor'
423
- ) -> np.ndarray:
444
+ total_power_array: np.ndarray,
445
+ config,
446
+ total_power_type: str = "psd",
447
+ mel_scale: bool = True,
448
+ multiplication_factor: float = 1,
449
+ rounding_method: str = "floor",
450
+ ) -> np.ndarray:
424
451
  """
425
452
  Generate Mel Frequency Cepstral Coefficients (MFCCs) from the total power spectral density or spectrogram of the signal.
426
453
 
427
- MFCCs are commonly used features in signal processing for tasks like audio and
454
+ MFCCs are commonly used features in signal processing for tasks like audio and
428
455
  vibration analysis. In this version, we adjusted the MFFCs to the human activity
429
456
  range according to: https://www.sciencedirect.com/science/article/abs/pii/S016516841500331X#f0050.
430
- This function calculates MFCCs by applying a filterbank
431
- (in either the mel scale or linear scale) to the total power of the signal,
457
+ This function calculates MFCCs by applying a filterbank
458
+ (in either the mel scale or linear scale) to the total power of the signal,
432
459
  followed by a Discrete Cosine Transform (DCT) to obtain coefficients.
433
460
 
434
461
  Parameters
435
462
  ----------
436
463
  total_power_array : np.ndarray
437
- 2D array of shape (n_windows, n_frequencies) containing the total power
464
+ 2D array of shape (n_windows, n_frequencies) containing the total power
438
465
  of the signal for each window.
439
466
  OR
440
467
  3D array of shape (n_windows, n_frequencies, n_segments) containing the total spectrogram
@@ -475,61 +502,66 @@ def compute_mfccs(
475
502
  - The function includes filterbank normalization to ensure proper scaling.
476
503
  - DCT filters are constructed to minimize spectral leakage.
477
504
  """
478
-
505
+
479
506
  # Check if total_power_type is either 'psd' or 'spectrogram'
480
- if total_power_type not in ['psd', 'spectrogram']:
481
- raise ValueError("total_power_type should be set to either 'psd' or 'spectrogram'")
507
+ if total_power_type not in ["psd", "spectrogram"]:
508
+ raise ValueError(
509
+ "total_power_type should be set to either 'psd' or 'spectrogram'"
510
+ )
482
511
 
483
512
  # Compute window length in samples
484
513
  window_length = config.window_length_s * config.sampling_frequency
485
-
514
+
486
515
  # Determine the length of subwindows used in the spectrogram computation
487
- if total_power_type == 'spectrogram':
516
+ if total_power_type == "spectrogram":
488
517
  nr_subwindows = total_power_array.shape[2]
489
- window_length = int(window_length/(nr_subwindows - (nr_subwindows - 1) * config.overlap_fraction))
518
+ window_length = int(
519
+ window_length
520
+ / (nr_subwindows - (nr_subwindows - 1) * config.overlap_fraction)
521
+ )
490
522
 
491
523
  # Generate filter points
492
524
  if mel_scale:
493
525
  freqs = np.linspace(
494
- melscale(config.mfcc_low_frequency, multiplication_factor),
495
- melscale(config.mfcc_high_frequency, multiplication_factor),
496
- num=config.mfcc_n_dct_filters + 2
526
+ melscale(config.mfcc_low_frequency, multiplication_factor),
527
+ melscale(config.mfcc_high_frequency, multiplication_factor),
528
+ num=config.mfcc_n_dct_filters + 2,
497
529
  )
498
530
  freqs = inverse_melscale(freqs, multiplication_factor)
499
531
  else:
500
532
  freqs = np.linspace(
501
- config.mfcc_low_frequency,
502
- config.mfcc_high_frequency,
503
- num=config.mfcc_n_dct_filters + 2
533
+ config.mfcc_low_frequency,
534
+ config.mfcc_high_frequency,
535
+ num=config.mfcc_n_dct_filters + 2,
504
536
  )
505
-
506
- if rounding_method == 'round':
507
- filter_points = np.round(
508
- window_length / config.sampling_frequency * freqs
509
- ).astype(int) + 1
510
537
 
511
- elif rounding_method == 'floor':
512
- filter_points = np.floor(
513
- window_length / config.sampling_frequency * freqs
514
- ).astype(int) + 1
538
+ if rounding_method == "round":
539
+ filter_points = (
540
+ np.round(window_length / config.sampling_frequency * freqs).astype(int) + 1
541
+ )
542
+
543
+ elif rounding_method == "floor":
544
+ filter_points = (
545
+ np.floor(window_length / config.sampling_frequency * freqs).astype(int) + 1
546
+ )
515
547
 
516
548
  # Construct triangular filterbank
517
549
  filters = np.zeros((len(filter_points) - 2, int(window_length / 2 + 1)))
518
550
  for j in range(len(filter_points) - 2):
519
551
  filters[j, filter_points[j] : filter_points[j + 2]] = windows.triang(
520
552
  filter_points[j + 2] - filter_points[j]
521
- )
553
+ )
522
554
  # Normalize filter coefficients
523
555
  filters[j, :] /= (
524
- config.sampling_frequency/window_length * np.sum(filters[j,:])
525
- )
556
+ config.sampling_frequency / window_length * np.sum(filters[j, :])
557
+ )
526
558
 
527
559
  # Apply filterbank to total power
528
- if total_power_type == 'spectrogram':
529
- power_filtered = np.tensordot(total_power_array, filters.T, axes=(1,0))
530
- elif total_power_type == 'psd':
560
+ if total_power_type == "spectrogram":
561
+ power_filtered = np.tensordot(total_power_array, filters.T, axes=(1, 0))
562
+ elif total_power_type == "psd":
531
563
  power_filtered = np.dot(total_power_array, filters.T)
532
-
564
+
533
565
  # Convert power to logarithmic scale
534
566
  log_power_filtered = np.log10(power_filtered + 1e-10)
535
567
 
@@ -538,16 +570,20 @@ def compute_mfccs(
538
570
  dct_filters[0, :] = 1.0 / np.sqrt(config.mfcc_n_dct_filters)
539
571
 
540
572
  samples = (
541
- np.arange(1, 2 * config.mfcc_n_dct_filters, 2) * np.pi / (2.0 * config.mfcc_n_dct_filters)
573
+ np.arange(1, 2 * config.mfcc_n_dct_filters, 2)
574
+ * np.pi
575
+ / (2.0 * config.mfcc_n_dct_filters)
542
576
  )
543
577
 
544
578
  for i in range(1, config.mfcc_n_coefficients):
545
- dct_filters[i, :] = np.cos(i * samples) * np.sqrt(2.0 / config.mfcc_n_dct_filters)
579
+ dct_filters[i, :] = np.cos(i * samples) * np.sqrt(
580
+ 2.0 / config.mfcc_n_dct_filters
581
+ )
546
582
 
547
583
  # Compute MFCCs
548
- mfccs = np.dot(log_power_filtered, dct_filters.T)
584
+ mfccs = np.dot(log_power_filtered, dct_filters.T)
549
585
 
550
- if total_power_type == 'spectrogram':
586
+ if total_power_type == "spectrogram":
551
587
  mfccs = np.mean(mfccs, axis=1)
552
588
 
553
589
  return mfccs
@@ -570,7 +606,9 @@ def melscale(x: np.ndarray, multiplication_factor: float = 1) -> np.ndarray:
570
606
  np.ndarray
571
607
  Frequency values mapped to the Mel scale.
572
608
  """
573
- return (64.875 / multiplication_factor) * np.log10(1 + x / (17.5 / multiplication_factor))
609
+ return (64.875 / multiplication_factor) * np.log10(
610
+ 1 + x / (17.5 / multiplication_factor)
611
+ )
574
612
 
575
613
 
576
614
  def inverse_melscale(x: np.ndarray, multiplication_factor: float = 1) -> np.ndarray:
@@ -590,17 +628,19 @@ def inverse_melscale(x: np.ndarray, multiplication_factor: float = 1) -> np.ndar
590
628
  np.ndarray
591
629
  Linear frequency values corresponding to the given Mel scale values.
592
630
  """
593
- return (17.5 / multiplication_factor) * (10 ** (x / (64.875 / multiplication_factor)) - 1)
631
+ return (17.5 / multiplication_factor) * (
632
+ 10 ** (x / (64.875 / multiplication_factor)) - 1
633
+ )
594
634
 
595
635
 
596
636
  def pca_transform_gyroscope(
597
- df: pd.DataFrame,
598
- y_gyro_colname: str,
599
- z_gyro_colname: str,
637
+ df: pd.DataFrame,
638
+ y_gyro_colname: str,
639
+ z_gyro_colname: str,
600
640
  ) -> np.ndarray:
601
641
  """
602
642
  Perform principal component analysis (PCA) on gyroscope data to estimate velocity.
603
-
643
+
604
644
  Parameters
605
645
  ----------
606
646
  df : pd.DataFrame
@@ -609,7 +649,7 @@ def pca_transform_gyroscope(
609
649
  The column name for the y-axis gyroscope data.
610
650
  z_gyro_colname : str
611
651
  The column name for the z-axis gyroscope data.
612
-
652
+
613
653
  Returns
614
654
  -------
615
655
  np.ndarray
@@ -623,7 +663,7 @@ def pca_transform_gyroscope(
623
663
  fit_data = np.column_stack((y_gyro_array, z_gyro_array))
624
664
  full_data = fit_data
625
665
 
626
- pca = PCA(n_components=2, svd_solver='auto', random_state=22)
666
+ pca = PCA(n_components=2, svd_solver="auto", random_state=22)
627
667
  pca.fit(fit_data)
628
668
  velocity = pca.transform(full_data)[:, 0] # First principal component
629
669
 
@@ -633,25 +673,21 @@ def pca_transform_gyroscope(
633
673
  def compute_angle(time_array: np.ndarray, velocity_array: np.ndarray) -> np.ndarray:
634
674
  """
635
675
  Compute the angle from the angular velocity using cumulative trapezoidal integration.
636
-
676
+
637
677
  Parameters
638
678
  ----------
639
679
  time_array : np.ndarray
640
680
  The time array corresponding to the angular velocity data.
641
681
  velocity_array : np.ndarray
642
682
  The angular velocity data to integrate.
643
-
683
+
644
684
  Returns
645
685
  -------
646
686
  np.ndarray
647
687
  The estimated angle based on the cumulative trapezoidal integration of the angular velocity.
648
688
  """
649
689
  # Perform integration and apply absolute value
650
- angle_array = cumulative_trapezoid(
651
- y=velocity_array,
652
- x=time_array,
653
- initial=0
654
- )
690
+ angle_array = cumulative_trapezoid(y=velocity_array, x=time_array, initial=0)
655
691
  return np.abs(angle_array)
656
692
 
657
693
 
@@ -665,31 +701,30 @@ def remove_moving_average_angle(angle_array: np.ndarray, fs: float) -> pd.Series
665
701
  The angle array to remove the moving average from.
666
702
  fs : float
667
703
  The sampling frequency of the data.
668
-
704
+
669
705
  Returns
670
706
  -------
671
707
  pd.Series
672
708
  The angle array with the moving average removed.
673
709
  """
674
710
  window_size = int(2 * (fs * 0.5) + 1)
675
- angle_ma = np.array(pd.Series(angle_array).rolling(
676
- window=window_size,
677
- min_periods=1,
678
- center=True,
679
- closed='both'
680
- ).mean())
681
-
711
+ angle_ma = np.array(
712
+ pd.Series(angle_array)
713
+ .rolling(window=window_size, min_periods=1, center=True, closed="both")
714
+ .mean()
715
+ )
716
+
682
717
  return angle_array - angle_ma
683
718
 
684
719
 
685
720
  def extract_angle_extremes(
686
- angle_array: np.ndarray,
687
- sampling_frequency: float,
688
- max_frequency_activity: float = 1.75,
689
- ) -> tuple[List[int], List[int], List[int]]:
721
+ angle_array: np.ndarray,
722
+ sampling_frequency: float,
723
+ max_frequency_activity: float = 1.75,
724
+ ) -> tuple[List[int], List[int], List[int]]:
690
725
  """
691
726
  Extract extrema (minima and maxima) indices from the angle array.
692
-
727
+
693
728
  Parameters
694
729
  ----------
695
730
  angle_array : np.ndarray
@@ -698,25 +733,21 @@ def extract_angle_extremes(
698
733
  The sampling frequency of the data.
699
734
  max_frequency_activity : float, optional
700
735
  The maximum frequency of human activity in Hz (default: 1.75).
701
-
736
+
702
737
  Returns
703
738
  -------
704
739
  tuple
705
740
  A tuple containing the indices of the angle extrema, minima, and maxima.
706
741
  """
707
742
  distance = sampling_frequency / max_frequency_activity
708
- prominence = 2
743
+ prominence = 2
709
744
 
710
745
  # Find minima and maxima indices for each window
711
746
  minima_indices = find_peaks(
712
- x=-angle_array,
713
- distance=distance,
714
- prominence=prominence
747
+ x=-angle_array, distance=distance, prominence=prominence
715
748
  )[0]
716
749
  maxima_indices = find_peaks(
717
- x=angle_array,
718
- distance=distance,
719
- prominence=prominence
750
+ x=angle_array, distance=distance, prominence=prominence
720
751
  )[0]
721
752
 
722
753
  minima_indices = np.array(minima_indices, dtype=object)
@@ -728,14 +759,20 @@ def extract_angle_extremes(
728
759
  # Start with a minimum
729
760
  while i_pks < minima_indices.size - 1 and i_pks < maxima_indices.size:
730
761
  if minima_indices[i_pks + 1] < maxima_indices[i_pks]:
731
- if angle_array[minima_indices[i_pks + 1]] < angle_array[minima_indices[i_pks]]:
762
+ if (
763
+ angle_array[minima_indices[i_pks + 1]]
764
+ < angle_array[minima_indices[i_pks]]
765
+ ):
732
766
  minima_indices = np.delete(minima_indices, i_pks)
733
767
  else:
734
768
  minima_indices = np.delete(minima_indices, i_pks + 1)
735
769
  i_pks -= 1
736
770
 
737
771
  if i_pks >= 0 and minima_indices[i_pks] > maxima_indices[i_pks]:
738
- if angle_array[maxima_indices[i_pks]] < angle_array[maxima_indices[i_pks - 1]]:
772
+ if (
773
+ angle_array[maxima_indices[i_pks]]
774
+ < angle_array[maxima_indices[i_pks - 1]]
775
+ ):
739
776
  maxima_indices = np.delete(maxima_indices, i_pks)
740
777
  else:
741
778
  maxima_indices = np.delete(maxima_indices, i_pks - 1)
@@ -746,14 +783,20 @@ def extract_angle_extremes(
746
783
  # Start with a maximum
747
784
  while i_pks < maxima_indices.size - 1 and i_pks < minima_indices.size:
748
785
  if maxima_indices[i_pks + 1] < minima_indices[i_pks]:
749
- if angle_array[maxima_indices[i_pks + 1]] < angle_array[maxima_indices[i_pks]]:
786
+ if (
787
+ angle_array[maxima_indices[i_pks + 1]]
788
+ < angle_array[maxima_indices[i_pks]]
789
+ ):
750
790
  maxima_indices = np.delete(maxima_indices, i_pks + 1)
751
791
  else:
752
792
  maxima_indices = np.delete(maxima_indices, i_pks)
753
793
  i_pks -= 1
754
794
 
755
795
  if i_pks >= 0 and maxima_indices[i_pks] > minima_indices[i_pks]:
756
- if angle_array[minima_indices[i_pks]] < angle_array[minima_indices[i_pks - 1]]:
796
+ if (
797
+ angle_array[minima_indices[i_pks]]
798
+ < angle_array[minima_indices[i_pks - 1]]
799
+ ):
757
800
  minima_indices = np.delete(minima_indices, i_pks - 1)
758
801
  else:
759
802
  minima_indices = np.delete(minima_indices, i_pks)
@@ -766,17 +809,19 @@ def extract_angle_extremes(
766
809
  return list(angle_extrema_indices), list(minima_indices), list(maxima_indices)
767
810
 
768
811
 
769
- def compute_range_of_motion(angle_array: np.ndarray, extrema_indices: List[int]) -> np.ndarray:
812
+ def compute_range_of_motion(
813
+ angle_array: np.ndarray, extrema_indices: List[int]
814
+ ) -> np.ndarray:
770
815
  """
771
816
  Compute the range of motion of a time series based on the angle extrema.
772
-
817
+
773
818
  Parameters
774
819
  ----------
775
820
  angle_array : np.ndarray
776
821
  The angle array to compute the range of motion from.
777
822
  extrema_indices : List[int]
778
823
  The indices of the angle extrema.
779
-
824
+
780
825
  Returns
781
826
  -------
782
827
  np.ndarray
@@ -787,9 +832,11 @@ def compute_range_of_motion(angle_array: np.ndarray, extrema_indices: List[int])
787
832
  raise TypeError("extrema_indices must be a list of integers.")
788
833
 
789
834
  # Check bounds
790
- if np.any(np.array(extrema_indices) < 0) or np.any(np.array(extrema_indices) >= len(angle_array)):
835
+ if np.any(np.array(extrema_indices) < 0) or np.any(
836
+ np.array(extrema_indices) >= len(angle_array)
837
+ ):
791
838
  raise ValueError("extrema_indices contains out-of-bounds indices.")
792
-
839
+
793
840
  # Extract angle amplitudes (minima and maxima values)
794
841
  angle_extremas = angle_array[extrema_indices]
795
842
 
@@ -812,19 +859,21 @@ def compute_peak_angular_velocity(
812
859
  The angular velocity array to compute the peak angular velocity from.
813
860
  angle_extrema_indices : List[int]
814
861
  The indices of the angle extrema.
815
-
862
+
816
863
  Returns
817
864
  -------
818
865
  np.ndarray
819
866
  The peak angular velocities of the time series.
820
867
  """
821
- if np.any(np.array(angle_extrema_indices) < 0) or np.any(np.array(angle_extrema_indices) >= len(velocity_array)):
868
+ if np.any(np.array(angle_extrema_indices) < 0) or np.any(
869
+ np.array(angle_extrema_indices) >= len(velocity_array)
870
+ ):
822
871
  raise ValueError("angle_extrema_indices contains out-of-bounds indices.")
823
-
872
+
824
873
  if len(angle_extrema_indices) < 2:
825
874
  raise ValueError("angle_extrema_indices must contain at least two indices.")
826
-
827
- # Initialize a list to store the peak velocities
875
+
876
+ # Initialize a list to store the peak velocities
828
877
  pav = []
829
878
 
830
879
  # Compute peak angular velocities
@@ -858,21 +907,23 @@ def compute_forward_backward_peak_angular_velocity(
858
907
  The indices of the minima.
859
908
  maxima_indices : List[int]
860
909
  The indices of the maxima.
861
-
910
+
862
911
  Returns
863
912
  -------
864
913
  Tuple[np.ndarray, np.ndarray]
865
914
  A tuple containing the forward and backward peak angular velocities for minima and maxima.
866
915
  """
867
- if np.any(np.array(angle_extrema_indices) < 0) or np.any(np.array(angle_extrema_indices) >= len(velocity_array)):
916
+ if np.any(np.array(angle_extrema_indices) < 0) or np.any(
917
+ np.array(angle_extrema_indices) >= len(velocity_array)
918
+ ):
868
919
  raise ValueError("angle_extrema_indices contains out-of-bounds indices.")
869
-
920
+
870
921
  if len(angle_extrema_indices) < 2:
871
922
  raise ValueError("angle_extrema_indices must contain at least two indices.")
872
-
923
+
873
924
  if len(minima_indices) == 0:
874
925
  raise ValueError("No minima indices found.")
875
-
926
+
876
927
  if len(maxima_indices) == 0:
877
928
  raise ValueError("No maxima indices found.")
878
929
 
@@ -900,12 +951,10 @@ def compute_forward_backward_peak_angular_velocity(
900
951
  return forward_pav, backward_pav
901
952
 
902
953
 
903
- def compute_signal_to_noise_ratio(
904
- ppg_windowed: np.ndarray
905
- ) -> np.ndarray:
954
+ def compute_signal_to_noise_ratio(ppg_windowed: np.ndarray) -> np.ndarray:
906
955
  """
907
956
  Compute the signal to noise ratio of the PPG signal.
908
-
957
+
909
958
  Parameters
910
959
  ----------
911
960
  ppg_windowed: np.ndarray
@@ -916,21 +965,19 @@ def compute_signal_to_noise_ratio(
916
965
  np.ndarray
917
966
  The signal to noise ratio of the PPG signal.
918
967
  """
919
-
968
+
920
969
  arr_signal = np.var(ppg_windowed, axis=1)
921
970
  arr_noise = np.var(np.abs(ppg_windowed), axis=1)
922
971
  signal_to_noise_ratio = arr_signal / arr_noise
923
-
972
+
924
973
  return signal_to_noise_ratio
925
974
 
926
- def compute_auto_correlation(
927
- ppg_windowed: np.ndarray,
928
- fs: int
929
- ) -> np.ndarray:
975
+
976
+ def compute_auto_correlation(ppg_windowed: np.ndarray, fs: int) -> np.ndarray:
930
977
  """
931
978
  Compute the biased autocorrelation of the PPG signal. The autocorrelation is computed up to 3 seconds. The highest peak value is selected as the autocorrelation value. If no peaks are found, the value is set to 0.
932
979
  The biased autocorrelation is computed using the biased_autocorrelation function. It differs from the unbiased autocorrelation in that the normalization factor is the length of the original signal, and boundary effects are considered. This results in a smoother autocorrelation function.
933
-
980
+
934
981
  Parameters
935
982
  ----------
936
983
  ppg_windowed: np.ndarray
@@ -944,21 +991,27 @@ def compute_auto_correlation(
944
991
  The autocorrelation of the PPG signal.
945
992
  """
946
993
 
947
- auto_correlations = biased_autocorrelation(ppg_windowed, fs*3) # compute the biased autocorrelation of the PPG signal up to 3 seconds
948
- peaks = [find_peaks(x, height=0.01)[0] for x in auto_correlations] # find the peaks of the autocorrelation
949
- sorted_peak_values = [np.sort(auto_correlations[i, indices])[::-1] for i, indices in enumerate(peaks)] # sort the peak values in descending order
950
- auto_correlations = [x[0] if len(x) > 0 else 0 for x in sorted_peak_values] # get the highest peak value if there are any peaks, otherwise set to 0
994
+ auto_correlations = biased_autocorrelation(
995
+ ppg_windowed, fs * 3
996
+ ) # compute the biased autocorrelation of the PPG signal up to 3 seconds
997
+ peaks = [
998
+ find_peaks(x, height=0.01)[0] for x in auto_correlations
999
+ ] # find the peaks of the autocorrelation
1000
+ sorted_peak_values = [
1001
+ np.sort(auto_correlations[i, indices])[::-1] for i, indices in enumerate(peaks)
1002
+ ] # sort the peak values in descending order
1003
+ auto_correlations = [
1004
+ x[0] if len(x) > 0 else 0 for x in sorted_peak_values
1005
+ ] # get the highest peak value if there are any peaks, otherwise set to 0
951
1006
 
952
1007
  return np.asarray(auto_correlations)
953
1008
 
954
- def biased_autocorrelation(
955
- ppg_windowed: np.ndarray,
956
- max_lag: int
957
- ) -> np.ndarray:
1009
+
1010
+ def biased_autocorrelation(ppg_windowed: np.ndarray, max_lag: int) -> np.ndarray:
958
1011
  """
959
- Compute the biased autocorrelation of a signal (similar to matlabs autocorr function), where the normalization factor
1012
+ Compute the biased autocorrelation of a signal (similar to matlabs autocorr function), where the normalization factor
960
1013
  is the length of the original signal, and boundary effects are considered.
961
-
1014
+
962
1015
  Parameters
963
1016
  ----------
964
1017
  ppg_windowed: np.ndarray
@@ -972,13 +1025,19 @@ def biased_autocorrelation(
972
1025
  The biased autocorrelation of the PPG signal.
973
1026
 
974
1027
  """
975
- zero_mean_ppg = ppg_windowed - np.mean(ppg_windowed, axis=1, keepdims=True) # Remove the mean of the signal to make it zero-mean
1028
+ zero_mean_ppg = ppg_windowed - np.mean(
1029
+ ppg_windowed, axis=1, keepdims=True
1030
+ ) # Remove the mean of the signal to make it zero-mean
976
1031
  N = zero_mean_ppg.shape[1]
977
1032
  autocorr_values = np.zeros((zero_mean_ppg.shape[0], max_lag + 1))
978
-
1033
+
979
1034
  for lag in range(max_lag + 1):
980
1035
  # Compute autocorrelation for current lag
981
- overlapping_points = zero_mean_ppg[:, :N-lag] * zero_mean_ppg[:, lag:]
982
- autocorr_values[:, lag] = np.sum(overlapping_points, axis=1) / N # Divide by N (biased normalization)
983
-
984
- return autocorr_values/autocorr_values[:, 0, np.newaxis] # Normalize the autocorrelation values
1036
+ overlapping_points = zero_mean_ppg[:, : N - lag] * zero_mean_ppg[:, lag:]
1037
+ autocorr_values[:, lag] = (
1038
+ np.sum(overlapping_points, axis=1) / N
1039
+ ) # Divide by N (biased normalization)
1040
+
1041
+ return (
1042
+ autocorr_values / autocorr_values[:, 0, np.newaxis]
1043
+ ) # Normalize the autocorrelation values