paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,47 @@
1
1
  import json
2
+ import logging
3
+ from importlib.resources import files
4
+ from pathlib import Path
5
+
2
6
  import numpy as np
3
- import os
4
7
  import pandas as pd
5
- from pathlib import Path
6
8
  from scipy.signal import welch
7
9
  from scipy.signal.windows import hamming, hann
8
- import tsdf
9
- from typing import List
10
10
 
11
11
  from paradigma.classification import ClassifierPackage
12
+ from paradigma.config import PPGConfig, PulseRateConfig
12
13
  from paradigma.constants import DataColumns
13
- from paradigma.config import PulseRateConfig
14
- from paradigma.feature_extraction import compute_statistics, compute_signal_to_noise_ratio, compute_auto_correlation, \
15
- compute_dominant_frequency, compute_relative_power, compute_spectral_entropy
16
- from paradigma.pipelines.pulse_rate_utils import assign_sqa_label, extract_pr_segments, extract_pr_from_segment
17
- from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
14
+ from paradigma.feature_extraction import (
15
+ compute_auto_correlation,
16
+ compute_dominant_frequency,
17
+ compute_relative_power,
18
+ compute_signal_to_noise_ratio,
19
+ compute_spectral_entropy,
20
+ compute_statistics,
21
+ )
22
+ from paradigma.pipelines.pulse_rate_utils import (
23
+ assign_sqa_label,
24
+ extract_pr_from_segment,
25
+ extract_pr_segments,
26
+ )
27
+ from paradigma.preprocessing import preprocess_ppg_data
28
+ from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
18
29
  from paradigma.util import aggregate_parameter
19
30
 
20
- def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PulseRateConfig, acc_config: PulseRateConfig) -> pd.DataFrame:
21
- """
31
+
32
+ def extract_signal_quality_features(
33
+ df_ppg: pd.DataFrame,
34
+ ppg_config: PulseRateConfig,
35
+ df_acc: pd.DataFrame | None = None,
36
+ acc_config: PulseRateConfig | None = None,
37
+ ) -> pd.DataFrame:
38
+ """
22
39
  Extract signal quality features from the PPG signal.
23
- The features are extracted from the temporal and spectral domain of the PPG signal.
24
- The temporal domain features include variance, mean, median, kurtosis, skewness, signal-to-noise ratio, and autocorrelation.
25
- The spectral domain features include the dominant frequency, relative power, spectral entropy.
40
+ The features are extracted from the temporal and spectral domain of the
41
+ PPG signal. The temporal domain features include variance, mean, median,
42
+ kurtosis, skewness, signal-to-noise ratio, and autocorrelation. The
43
+ spectral domain features include the dominant frequency, relative power,
44
+ spectral entropy.
26
45
 
27
46
  Parameters
28
47
  ----------
@@ -31,104 +50,143 @@ def extract_signal_quality_features(df_ppg: pd.DataFrame, df_acc: pd.DataFrame,
31
50
  df_acc : pd.DataFrame
32
51
  The DataFrame containing the accelerometer signal.
33
52
  ppg_config: PulseRateConfig
34
- The configuration for the signal quality feature extraction of the PPG signal.
53
+ The configuration for the signal quality feature extraction of the PPG
54
+ signal.
35
55
  acc_config: PulseRateConfig
36
- The configuration for the signal quality feature extraction of the accelerometer signal.
56
+ The configuration for the signal quality feature extraction of the
57
+ accelerometer signal.
37
58
 
38
59
  Returns
39
60
  -------
40
61
  df_features : pd.DataFrame
41
62
  The DataFrame containing the extracted signal quality features.
42
-
63
+
43
64
  """
44
65
  # Group sequences of timestamps into windows
45
- ppg_windowed_cols = [DataColumns.TIME, ppg_config.ppg_colname]
66
+ ppg_windowed_colnames = [ppg_config.time_colname, ppg_config.ppg_colname]
46
67
  ppg_windowed = tabulate_windows(
47
- df=df_ppg,
48
- columns=ppg_windowed_cols,
68
+ df=df_ppg,
69
+ columns=ppg_windowed_colnames,
49
70
  window_length_s=ppg_config.window_length_s,
50
71
  window_step_length_s=ppg_config.window_step_length_s,
51
- fs=ppg_config.sampling_frequency
72
+ fs=ppg_config.sampling_frequency,
52
73
  )
53
74
 
54
75
  # Extract data from the windowed PPG signal
55
- extractor = WindowedDataExtractor(ppg_windowed_cols)
56
- idx_time = extractor.get_index(DataColumns.TIME)
76
+ extractor = WindowedDataExtractor(ppg_windowed_colnames)
77
+ idx_time = extractor.get_index(ppg_config.time_colname)
57
78
  idx_ppg = extractor.get_index(ppg_config.ppg_colname)
58
- start_time_ppg = np.min(ppg_windowed[:, :, idx_time], axis=1) # Start time of the window is relative to the first datapoint in the PPG data
79
+ # Start time of the window is relative to the first datapoint in the PPG
80
+ # data
81
+ start_time_ppg = np.min(ppg_windowed[:, :, idx_time], axis=1)
59
82
  ppg_values_windowed = ppg_windowed[:, :, idx_ppg]
60
83
 
61
- acc_windowed_cols = [DataColumns.TIME] + acc_config.accelerometer_cols
62
- acc_windowed = tabulate_windows(
63
- df=df_acc,
64
- columns=acc_windowed_cols,
65
- window_length_s=acc_config.window_length_s,
66
- window_step_length_s=acc_config.window_step_length_s,
67
- fs=acc_config.sampling_frequency
68
- )
84
+ df_features = pd.DataFrame(start_time_ppg, columns=[ppg_config.time_colname])
69
85
 
70
- # Extract data from the windowed accelerometer signal
71
- extractor = WindowedDataExtractor(acc_windowed_cols)
72
- idx_acc = extractor.get_slice(acc_config.accelerometer_cols)
73
- acc_values_windowed = acc_windowed[:, :, idx_acc]
86
+ if df_acc is not None and acc_config is not None:
87
+
88
+ acc_windowed_colnames = [
89
+ acc_config.time_colname
90
+ ] + acc_config.accelerometer_colnames
91
+ acc_windowed = tabulate_windows(
92
+ df=df_acc,
93
+ columns=acc_windowed_colnames,
94
+ window_length_s=acc_config.window_length_s,
95
+ window_step_length_s=acc_config.window_step_length_s,
96
+ fs=acc_config.sampling_frequency,
97
+ )
98
+
99
+ # Extract data from the windowed accelerometer signal
100
+ extractor = WindowedDataExtractor(acc_windowed_colnames)
101
+ idx_acc = extractor.get_slice(acc_config.accelerometer_colnames)
102
+ acc_values_windowed = acc_windowed[:, :, idx_acc]
103
+
104
+ # Compute periodicity feature of the accelerometer signal
105
+ df_accelerometer_feature = extract_accelerometer_feature(
106
+ acc_values_windowed, ppg_values_windowed, acc_config
107
+ )
108
+ # Combine the accelerometer feature with the previously computed features
109
+ df_features = pd.concat([df_features, df_accelerometer_feature], axis=1)
74
110
 
75
- df_features = pd.DataFrame(start_time_ppg, columns=[DataColumns.TIME])
76
111
  # Compute features of the temporal domain of the PPG signal
77
- df_temporal_features = extract_temporal_domain_features(ppg_values_windowed, ppg_config, quality_stats=['var', 'mean', 'median', 'kurtosis', 'skewness'])
78
-
112
+ df_temporal_features = extract_temporal_domain_features(
113
+ ppg_values_windowed,
114
+ ppg_config,
115
+ quality_stats=["var", "mean", "median", "kurtosis", "skewness"],
116
+ )
117
+
79
118
  # Combine temporal features with the start time
80
119
  df_features = pd.concat([df_features, df_temporal_features], axis=1)
81
120
 
82
121
  # Compute features of the spectral domain of the PPG signal
83
- df_spectral_features = extract_spectral_domain_features(ppg_values_windowed, ppg_config)
122
+ df_spectral_features = extract_spectral_domain_features(
123
+ ppg_values_windowed, ppg_config
124
+ )
84
125
 
85
126
  # Combine the spectral features with the previously computed temporal features
86
127
  df_features = pd.concat([df_features, df_spectral_features], axis=1)
87
-
88
- # Compute periodicity feature of the accelerometer signal
89
- df_accelerometer_feature = extract_accelerometer_feature(acc_values_windowed, ppg_values_windowed, acc_config)
90
-
91
- # Combine the accelerometer feature with the previously computed features
92
- df_features = pd.concat([df_features, df_accelerometer_feature], axis=1)
93
128
 
94
129
  return df_features
95
130
 
96
131
 
97
- def signal_quality_classification(df: pd.DataFrame, config: PulseRateConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
132
+ def signal_quality_classification(
133
+ df: pd.DataFrame, config: PulseRateConfig, clf_package: ClassifierPackage
134
+ ) -> pd.DataFrame:
98
135
  """
99
- Classify the signal quality of the PPG signal using a logistic regression classifier. A probability close to 1 indicates a high-quality signal, while a probability close to 0 indicates a low-quality signal.
100
- The classifier is trained on features extracted from the PPG signal. The features are extracted using the extract_signal_quality_features function.
101
- The accelerometer signal is used to determine the signal quality based on the power ratio of the accelerometer signal and returns a binary label based on a threshold.
102
- A value of 1 on the indicates no/minor periodic motion influence of the accelerometer on the PPG signal, 0 indicates major periodic motion influence.
136
+ Classify the signal quality of the PPG signal using a logistic regression
137
+ classifier. A probability close to 1 indicates a high-quality signal,
138
+ while a probability close to 0 indicates a low-quality signal. The
139
+ classifier is trained on features extracted from the PPG signal. The
140
+ features are extracted using the extract_signal_quality_features
141
+ function. The accelerometer signal is used to determine the signal
142
+ quality based on the power ratio of the accelerometer signal and returns
143
+ a binary label based on a threshold. A value of 1 on the indicates
144
+ no/minor periodic motion influence of the accelerometer on the PPG
145
+ signal, 0 indicates major periodic motion influence.
103
146
 
104
147
  Parameters
105
148
  ----------
106
149
  df : pd.DataFrame
107
- The DataFrame containing the PPG features and the accelerometer feature for signal quality classification.
150
+ The DataFrame containing the PPG features and the accelerometer
151
+ feature for signal quality classification.
108
152
  config : PulseRateConfig
109
153
  The configuration for the signal quality classification.
110
- full_path_to_classifier_package : str | Path
111
- The path to the directory containing the classifier.
154
+ clf_package : ClassifierPackage
155
+ The classifier package containing the classifier and scaler.
112
156
 
113
157
  Returns
114
158
  -------
115
159
  df_sqa pd.DataFrame
116
- The DataFrame containing the PPG signal quality predictions (both probabilities of the PPG signal quality classification and the accelerometer label based on the threshold).
160
+ The DataFrame containing the PPG signal quality predictions (both
161
+ probabilities of the PPG signal quality classification and the
162
+ accelerometer label based on the threshold).
117
163
  """
118
- clf_package = ClassifierPackage.load(full_path_to_classifier_package) # Load the classifier package
164
+ # Set classifier
119
165
  clf = clf_package.classifier # Load the logistic regression classifier
120
166
 
121
167
  # Apply scaling to relevant columns
122
- scaled_features = clf_package.transform_features(df.loc[:, clf.feature_names_in]) # Apply scaling to the features
168
+ scaled_features = clf_package.transform_features(
169
+ df.loc[:, clf.feature_names_in]
170
+ ) # Apply scaling to the features
123
171
 
124
- # Make predictions for PPG signal quality assessment, and assign the probabilities to the DataFrame and drop the features
172
+ # Make predictions for PPG signal quality assessment, and assign the
173
+ # probabilities to the DataFrame and drop the features
125
174
  df[DataColumns.PRED_SQA_PROBA] = clf.predict_proba(scaled_features)[:, 0]
126
- df[DataColumns.PRED_SQA_ACC_LABEL] = (df[DataColumns.ACC_POWER_RATIO] < config.threshold_sqa_accelerometer).astype(int) # Assign accelerometer label to the DataFrame based on the threshold
127
-
128
- return df[[DataColumns.TIME, DataColumns.PRED_SQA_PROBA, DataColumns.PRED_SQA_ACC_LABEL]] # Return only the relevant columns, namely the predicted probabilities for the PPG signal quality and the accelerometer label
175
+ keep_cols = [config.time_colname, DataColumns.PRED_SQA_PROBA]
176
+
177
+ if DataColumns.ACC_POWER_RATIO in df.columns:
178
+ # Assign accelerometer label to the DataFrame based on the threshold
179
+ df[DataColumns.PRED_SQA_ACC_LABEL] = (
180
+ df[DataColumns.ACC_POWER_RATIO] < config.threshold_sqa_accelerometer
181
+ ).astype(int)
182
+ keep_cols += [DataColumns.PRED_SQA_ACC_LABEL]
129
183
 
184
+ return df[keep_cols]
130
185
 
131
- def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: PulseRateConfig) -> pd.DataFrame:
186
+
187
+ def estimate_pulse_rate(
188
+ df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: PulseRateConfig
189
+ ) -> pd.DataFrame:
132
190
  """
133
191
  Estimate the pulse rate from the PPG signal using the time-frequency domain method.
134
192
 
@@ -149,37 +207,58 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
149
207
 
150
208
  # Extract NumPy arrays for faster operations
151
209
  ppg_post_prob = df_sqa[DataColumns.PRED_SQA_PROBA].to_numpy()
152
- acc_label = df_sqa.loc[:, DataColumns.PRED_SQA_ACC_LABEL].to_numpy() # Adjust later in data columns to get the correct label, should be first intergrated in feature extraction and classification
153
- ppg_preprocessed = df_ppg_preprocessed.values
154
- time_idx = df_ppg_preprocessed.columns.get_loc(DataColumns.TIME) # Get the index of the time column
155
- ppg_idx = df_ppg_preprocessed.columns.get_loc(DataColumns.PPG) # Get the index of the PPG column
156
-
210
+
211
+ if DataColumns.PRED_SQA_ACC_LABEL in df_sqa.columns:
212
+ acc_label = df_sqa[DataColumns.PRED_SQA_ACC_LABEL].to_numpy()
213
+ else:
214
+ acc_label = None
215
+
216
+ ppg_preprocessed = df_ppg_preprocessed.values
217
+ time_idx = df_ppg_preprocessed.columns.get_loc(
218
+ config.time_colname
219
+ ) # Get the index of the time column
220
+ ppg_idx = df_ppg_preprocessed.columns.get_loc(
221
+ config.ppg_colname
222
+ ) # Get the index of the PPG column
223
+
157
224
  # Assign window-level probabilities to individual samples
158
- sqa_label = assign_sqa_label(ppg_post_prob, config, acc_label) # assigns a signal quality label to every individual data point
159
- v_start_idx, v_end_idx = extract_pr_segments(sqa_label, config.min_pr_samples) # extracts pulse rate segments based on the SQA label
160
-
225
+ sqa_label = assign_sqa_label(
226
+ ppg_post_prob, config, acc_label
227
+ ) # assigns a signal quality label to every individual data point
228
+ v_start_idx, v_end_idx = extract_pr_segments(
229
+ sqa_label, config.min_pr_samples
230
+ ) # extracts pulse rate segments based on the SQA label
231
+
161
232
  v_pr_rel = np.array([])
162
233
  t_pr_rel = np.array([])
163
234
 
164
- edge_add = 2 * config.sampling_frequency # Add 2s on both sides of the segment for PR estimation
235
+ edge_add = (
236
+ 2 * config.sampling_frequency
237
+ ) # Add 2s on both sides of the segment for PR estimation
165
238
  step_size = config.pr_est_samples # Step size for PR estimation
166
239
 
167
240
  # Estimate the maximum size for preallocation
168
- valid_segments = (v_start_idx >= edge_add) & (v_end_idx <= len(ppg_preprocessed) - edge_add) # check if the segments are valid, e.g. not too close to the edges (2s)
169
- valid_start_idx = v_start_idx[valid_segments] # get the valid start indices
170
- valid_end_idx = v_end_idx[valid_segments] # get the valid end indices
171
- max_size = np.sum((valid_end_idx - valid_start_idx) // step_size) # maximum size for preallocation
172
-
241
+ valid_segments = (v_start_idx >= edge_add) & (
242
+ v_end_idx <= len(ppg_preprocessed) - edge_add
243
+ ) # check if the segments are valid, e.g. not too close to the edges (2s)
244
+ valid_start_idx = v_start_idx[valid_segments] # get the valid start indices
245
+ valid_end_idx = v_end_idx[valid_segments] # get the valid end indices
246
+ max_size = np.sum(
247
+ (valid_end_idx - valid_start_idx) // step_size
248
+ ) # maximum size for preallocation
249
+
173
250
  # Preallocate arrays
174
- v_pr_rel = np.empty(max_size, dtype=float)
175
- t_pr_rel = np.empty(max_size, dtype=float)
251
+ v_pr_rel = np.empty(max_size, dtype=float)
252
+ t_pr_rel = np.empty(max_size, dtype=float)
176
253
 
177
254
  # Track current position
178
255
  pr_pos = 0
179
256
 
180
257
  for start_idx, end_idx in zip(valid_start_idx, valid_end_idx):
181
258
  # Extract extended PPG segment
182
- extended_ppg_segment = ppg_preprocessed[start_idx - edge_add : end_idx + edge_add, ppg_idx]
259
+ extended_ppg_segment = ppg_preprocessed[
260
+ start_idx - edge_add : end_idx + edge_add, ppg_idx
261
+ ]
183
262
 
184
263
  # Estimate pulse rate
185
264
  pr_est = extract_pr_from_segment(
@@ -190,14 +269,16 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
190
269
  config.kern_params,
191
270
  )
192
271
  n_pr = len(pr_est) # Number of pulse rate estimates
193
- end_idx_time = n_pr * step_size + start_idx # Calculate end index for time, different from end_idx since it is always a multiple of step_size, while end_idx is not
272
+ # Calculate end index for time, different from end_idx since it is
273
+ # always a multiple of step_size, while end_idx is not
274
+ end_idx_time = n_pr * step_size + start_idx
194
275
 
195
276
  # Extract relative time for PR estimates
196
- pr_time = ppg_preprocessed[start_idx : end_idx_time : step_size, time_idx]
277
+ pr_time = ppg_preprocessed[start_idx:end_idx_time:step_size, time_idx]
197
278
 
198
279
  # Insert into preallocated arrays
199
- v_pr_rel[pr_pos:pr_pos + n_pr] = pr_est
200
- t_pr_rel[pr_pos:pr_pos + n_pr] = pr_time
280
+ v_pr_rel[pr_pos : pr_pos + n_pr] = pr_est
281
+ t_pr_rel[pr_pos : pr_pos + n_pr] = pr_time
201
282
  pr_pos += n_pr
202
283
 
203
284
  df_pr = pd.DataFrame({"time": t_pr_rel, "pulse_rate": v_pr_rel})
@@ -205,7 +286,9 @@ def estimate_pulse_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
205
286
  return df_pr
206
287
 
207
288
 
208
- def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode', '99p']) -> dict:
289
+ def aggregate_pulse_rate(
290
+ pr_values: np.ndarray, aggregates: list[str] = ["mode", "99p"]
291
+ ) -> dict:
209
292
  """
210
293
  Aggregate the pulse rate estimates using the specified aggregation methods.
211
294
 
@@ -214,7 +297,8 @@ def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode',
214
297
  pr_values : np.ndarray
215
298
  The array containing the pulse rate estimates
216
299
  aggregates : List[str]
217
- The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
300
+ The list of aggregation methods to be used for the pulse rate
301
+ estimates. The default is ['mode', '99p'].
218
302
 
219
303
  Returns
220
304
  -------
@@ -226,24 +310,26 @@ def aggregate_pulse_rate(pr_values: np.ndarray, aggregates: List[str] = ['mode',
226
310
 
227
311
  # Initialize the dictionary for the aggregated results with the metadata
228
312
  aggregated_results = {
229
- 'metadata': {
230
- 'nr_pr_est': len(pr_values)
231
- },
232
- 'pr_aggregates': {}
233
- }
313
+ "metadata": {"nr_pr_est": len(pr_values)},
314
+ "pr_aggregates": {},
315
+ }
234
316
  for aggregate in aggregates:
235
- aggregated_results['pr_aggregates'][f'{aggregate}_{DataColumns.PULSE_RATE}'] = aggregate_parameter(pr_values, aggregate)
317
+ aggregated_results["pr_aggregates"][f"{aggregate}_{DataColumns.PULSE_RATE}"] = (
318
+ aggregate_parameter(pr_values, aggregate)
319
+ )
236
320
 
237
321
  return aggregated_results
238
322
 
239
323
 
240
324
  def extract_temporal_domain_features(
241
- ppg_windowed: np.ndarray,
242
- config: PulseRateConfig,
243
- quality_stats: List[str] = ['mean', 'std']
244
- ) -> pd.DataFrame:
325
+ ppg_windowed: np.ndarray,
326
+ config: PulseRateConfig,
327
+ quality_stats: list[str] = ["mean", "std"],
328
+ ) -> pd.DataFrame:
245
329
  """
246
- Compute temporal domain features for the ppg signal. The features are added to the dataframe. Therefore the original dataframe is modified, and the modified dataframe is returned.
330
+ Compute temporal domain features for the ppg signal. The features are
331
+ added to the dataframe. Therefore the original dataframe is modified,
332
+ and the modified dataframe is returned.
247
333
 
248
334
  Parameters
249
335
  ----------
@@ -254,31 +340,36 @@ def extract_temporal_domain_features(
254
340
  The configuration object containing the parameters for the feature extraction
255
341
 
256
342
  quality_stats: list, optional
257
- The statistics to be computed for the gravity component of the accelerometer signal (default: ['mean', 'std'])
258
-
343
+ The statistics to be computed for the gravity component of the
344
+ accelerometer signal (default: ['mean', 'std'])
345
+
259
346
  Returns
260
347
  -------
261
348
  pd.DataFrame
262
349
  The dataframe with the added temporal domain features.
263
350
  """
264
-
351
+
265
352
  feature_dict = {}
266
353
  for stat in quality_stats:
267
354
  feature_dict[stat] = compute_statistics(ppg_windowed, stat, abs_stats=True)
268
-
269
- feature_dict['signal_to_noise'] = compute_signal_to_noise_ratio(ppg_windowed)
270
- feature_dict['auto_corr'] = compute_auto_correlation(ppg_windowed, config.sampling_frequency)
355
+
356
+ feature_dict["signal_to_noise"] = compute_signal_to_noise_ratio(ppg_windowed)
357
+ feature_dict["auto_corr"] = compute_auto_correlation(
358
+ ppg_windowed, config.sampling_frequency
359
+ )
271
360
  return pd.DataFrame(feature_dict)
272
361
 
273
362
 
274
363
  def extract_spectral_domain_features(
275
- ppg_windowed: np.ndarray,
276
- config: PulseRateConfig,
277
- ) -> pd.DataFrame:
364
+ ppg_windowed: np.ndarray,
365
+ config: PulseRateConfig,
366
+ ) -> pd.DataFrame:
278
367
  """
279
- Calculate the spectral features (dominant frequency, relative power, and spectral entropy)
280
- for each segment of a PPG signal using a single Welch's method computation. The features are added to the dataframe.
281
- Therefore the original dataframe is modified, and the modified dataframe is returned.
368
+ Calculate the spectral features (dominant frequency, relative power, and
369
+ spectral entropy) for each segment of a PPG signal using a single
370
+ Welch's method computation. The features are added to the dataframe.
371
+ Therefore the original dataframe is modified, and the modified dataframe
372
+ is returned.
282
373
 
283
374
  Parameters
284
375
  ----------
@@ -295,7 +386,7 @@ def extract_spectral_domain_features(
295
386
  """
296
387
  d_features = {}
297
388
 
298
- window = hamming(config.window_length_welch, sym = True)
389
+ window = hamming(config.window_length_welch, sym=True)
299
390
 
300
391
  n_samples_window = ppg_windowed.shape[1]
301
392
 
@@ -306,23 +397,23 @@ def extract_spectral_domain_features(
306
397
  noverlap=config.overlap_welch_window,
307
398
  nfft=max(256, 2 ** int(np.log2(n_samples_window))),
308
399
  detrend=False,
309
- axis=1
400
+ axis=1,
310
401
  )
311
402
 
312
403
  # Calculate each feature using the computed PSD and frequency array
313
- d_features['f_dom'] = compute_dominant_frequency(freqs, psd)
314
- d_features['rel_power'] = compute_relative_power(freqs, psd, config)
315
- d_features['spectral_entropy'] = compute_spectral_entropy(psd, n_samples_window)
404
+ d_features["f_dom"] = compute_dominant_frequency(freqs, psd)
405
+ d_features["rel_power"] = compute_relative_power(freqs, psd, config)
406
+ d_features["spectral_entropy"] = compute_spectral_entropy(psd, n_samples_window)
316
407
 
317
408
  return pd.DataFrame(d_features)
318
409
 
319
410
 
320
411
  def extract_acc_power_feature(
321
- f1: np.ndarray,
322
- PSD_acc: np.ndarray,
323
- f2: np.ndarray,
324
- PSD_ppg: np.ndarray
325
- ) -> np.ndarray:
412
+ f1: np.ndarray,
413
+ psd_acc: np.ndarray,
414
+ f2: np.ndarray,
415
+ psd_ppg: np.ndarray,
416
+ ) -> np.ndarray:
326
417
  """
327
418
  Extract the accelerometer power feature in the PPG frequency range.
328
419
 
@@ -330,11 +421,11 @@ def extract_acc_power_feature(
330
421
  ----------
331
422
  f1: np.ndarray
332
423
  The frequency bins of the accelerometer signal.
333
- PSD_acc: np.ndarray
424
+ psd_acc: np.ndarray
334
425
  The power spectral density of the accelerometer signal.
335
426
  f2: np.ndarray
336
427
  The frequency bins of the PPG signal.
337
- PSD_ppg: np.ndarray
428
+ psd_ppg: np.ndarray
338
429
  The power spectral density of the PPG signal.
339
430
 
340
431
  Returns
@@ -342,48 +433,54 @@ def extract_acc_power_feature(
342
433
  np.ndarray
343
434
  The accelerometer power feature in the PPG frequency range
344
435
  """
345
-
436
+
346
437
  # Find the index of the maximum PSD value in the PPG signal
347
- max_PPG_psd_idx = np.argmax(PSD_ppg, axis=1)
348
- max_PPG_freq_psd = f2[max_PPG_psd_idx]
349
-
438
+ max_ppg_psd_idx = np.argmax(psd_ppg, axis=1)
439
+ max_ppg_freq_psd = f2[max_ppg_psd_idx]
440
+
350
441
  # Find the neighboring indices of the maximum PSD value in the PPG signal
351
- df_idx = np.column_stack((max_PPG_psd_idx - 1, max_PPG_psd_idx, max_PPG_psd_idx + 1))
352
-
353
- # Find the index of the closest frequency in the accelerometer signal to the first harmonic of the PPG frequency
354
- corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_PPG_freq_psd*2), axis=0)
355
- fh_idx = np.column_stack((corr_acc_psd_fh_idx - 1, corr_acc_psd_fh_idx, corr_acc_psd_fh_idx + 1))
356
-
442
+ df_idx = np.column_stack(
443
+ (max_ppg_psd_idx - 1, max_ppg_psd_idx, max_ppg_psd_idx + 1)
444
+ )
445
+
446
+ # Find the index of the closest frequency in the accelerometer signal
447
+ # to the first harmonic of the PPG frequency
448
+ corr_acc_psd_fh_idx = np.argmin(np.abs(f1[:, None] - max_ppg_freq_psd * 2), axis=0)
449
+ fh_idx = np.column_stack(
450
+ (corr_acc_psd_fh_idx - 1, corr_acc_psd_fh_idx, corr_acc_psd_fh_idx + 1)
451
+ )
452
+
357
453
  # Compute the power in the ranges corresponding to the PPG frequency
358
- acc_power_PPG_range = (
359
- np.trapz(PSD_acc[np.arange(PSD_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1) +
360
- np.trapz(PSD_acc[np.arange(PSD_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1)
454
+ acc_power_ppg_range = np.trapezoid(
455
+ psd_acc[np.arange(psd_acc.shape[0])[:, None], df_idx], f1[df_idx], axis=1
456
+ ) + np.trapezoid(
457
+ psd_acc[np.arange(psd_acc.shape[0])[:, None], fh_idx], f1[fh_idx], axis=1
361
458
  )
362
459
 
363
460
  # Compute the total power across the entire frequency range
364
- acc_power_total = np.trapz(PSD_acc, f1)
365
-
461
+ acc_power_total = np.trapezoid(psd_acc, f1)
462
+
366
463
  # Compute the power ratio of the accelerometer signal in the PPG frequency range
367
- acc_power_ratio = acc_power_PPG_range / acc_power_total
368
-
464
+ acc_power_ratio = acc_power_ppg_range / acc_power_total
465
+
369
466
  return acc_power_ratio
370
467
 
468
+
371
469
  def extract_accelerometer_feature(
372
- acc_windowed: np.ndarray,
373
- ppg_windowed: np.ndarray,
374
- config: PulseRateConfig
375
- ) -> pd.DataFrame:
470
+ acc_windowed: np.ndarray, ppg_windowed: np.ndarray, config: PulseRateConfig
471
+ ) -> pd.DataFrame:
376
472
  """
377
- Extract accelerometer features from the accelerometer signal in the PPG frequency range.
378
-
473
+ Extract accelerometer features from the accelerometer signal in the PPG
474
+ frequency range.
475
+
379
476
  Parameters
380
- ----------
477
+ ----------
381
478
  acc_windowed: np.ndarray
382
479
  The dataframe containing the windowed accelerometer signal
383
480
 
384
481
  ppg_windowed: np.ndarray
385
482
  The dataframe containing the corresponding windowed ppg signal
386
-
483
+
387
484
  config: PulseRateConfig
388
485
  The configuration object containing the parameters for the feature extraction
389
486
 
@@ -392,21 +489,21 @@ def extract_accelerometer_feature(
392
489
  pd.DataFrame
393
490
  The dataframe with the relative power accelerometer feature.
394
491
  """
395
-
396
- if config.sensor not in ['imu', 'ppg']:
492
+
493
+ if config.sensor not in ["imu", "ppg"]:
397
494
  raise ValueError("Sensor not recognized.")
398
-
495
+
399
496
  d_freq = {}
400
497
  d_psd = {}
401
- for sensor in ['imu', 'ppg']:
498
+ for sensor in ["imu", "ppg"]:
402
499
  config.set_sensor(sensor)
403
500
 
404
- if sensor == 'imu':
501
+ if sensor == "imu":
405
502
  windows = acc_windowed
406
503
  else:
407
504
  windows = ppg_windowed
408
505
 
409
- window_type = hann(config.window_length_welch, sym = True)
506
+ window_type = hann(config.window_length_welch, sym=True)
410
507
  d_freq[sensor], d_psd[sensor] = welch(
411
508
  windows,
412
509
  fs=config.sampling_frequency,
@@ -414,13 +511,217 @@ def extract_accelerometer_feature(
414
511
  noverlap=config.overlap_welch_window,
415
512
  nfft=config.nfft,
416
513
  detrend=False,
417
- axis=1
514
+ axis=1,
418
515
  )
419
516
 
420
- d_psd['imu'] = np.sum(d_psd['imu'], axis=2) # Sum the PSDs of the three axes
517
+ d_psd["imu"] = np.sum(d_psd["imu"], axis=2) # Sum the PSDs of the three axes
421
518
 
422
- acc_power_ratio = extract_acc_power_feature(d_freq['imu'], d_psd['imu'], d_freq['ppg'], d_psd['ppg'])
519
+ acc_power_ratio = extract_acc_power_feature(
520
+ d_freq["imu"], d_psd["imu"], d_freq["ppg"], d_psd["ppg"]
521
+ )
423
522
 
424
- return pd.DataFrame(acc_power_ratio, columns=['acc_power_ratio'])
523
+ return pd.DataFrame(acc_power_ratio, columns=["acc_power_ratio"])
425
524
 
426
525
 
526
+ def run_pulse_rate_pipeline(
527
+ df_ppg_prepared: pd.DataFrame,
528
+ output_dir: str | Path,
529
+ store_intermediate: list[str] = [],
530
+ pulse_rate_config: PulseRateConfig | None = None,
531
+ ppg_config: PPGConfig | None = None,
532
+ logging_level: int = logging.INFO,
533
+ custom_logger: logging.Logger | None = None,
534
+ ) -> pd.DataFrame:
535
+ """
536
+ High-level pulse rate analysis pipeline for a single segment.
537
+
538
+ This function implements the complete pulse rate analysis workflow from the
539
+ pulse rate tutorial:
540
+ 1. Preprocess PPG and accelerometer data (accelerometer is optional)
541
+ 2. Extract signal quality features
542
+ 3. Signal quality classification
543
+ 4. Pulse rate estimation
544
+ 5. Quantify pulse rate (select relevant columns)
545
+
546
+ Parameters
547
+ ----------
548
+ df_ppg_prepared : pd.DataFrame
549
+ Prepared sensor data with time and PPG column.
550
+ output_dir : str or Path
551
+ Output directory for intermediate results (required)
552
+ store_intermediate : list of str, default []
553
+ Which intermediate results to store.
554
+ pulse_rate_config : PulseRateConfig, optional
555
+ Pulse rate analysis configuration
556
+ ppg_config : PPGConfig, optional
557
+ PPG preprocessing configuration
558
+ logging_level : int, default logging.INFO
559
+ Logging level using standard logging constants
560
+ custom_logger : logging.Logger, optional
561
+ Custom logger instance
562
+
563
+ Returns
564
+ -------
565
+ pd.DataFrame
566
+ Quantified pulse rate data with columns:
567
+ - time: timestamp
568
+ - pulse_rate: pulse rate estimate
569
+ - signal_quality: quality assessment (if available)
570
+ """
571
+ # Setup logger
572
+ active_logger = (
573
+ custom_logger if custom_logger is not None else logging.getLogger(__name__)
574
+ )
575
+ if custom_logger is None:
576
+ active_logger.setLevel(logging_level)
577
+
578
+ if pulse_rate_config is None:
579
+ pulse_rate_config = PulseRateConfig()
580
+ if ppg_config is None:
581
+ ppg_config = PPGConfig()
582
+
583
+ output_dir = Path(output_dir)
584
+ output_dir.mkdir(parents=True, exist_ok=True)
585
+
586
+ # Validate input data columns (PPG is required, accelerometer is optional)
587
+ required_columns = [DataColumns.TIME, DataColumns.PPG]
588
+ missing_columns = [
589
+ col for col in required_columns if col not in df_ppg_prepared.columns
590
+ ]
591
+ if missing_columns:
592
+ active_logger.warning(
593
+ f"Missing required columns for pulse rate pipeline: {missing_columns}"
594
+ )
595
+ return pd.DataFrame()
596
+
597
+ # Step 1: Preprocess PPG and accelerometer data (following tutorial)
598
+ active_logger.info("Step 1: Preprocessing PPG and accelerometer data")
599
+ try:
600
+ # Separate PPG data (always available)
601
+ ppg_cols = [DataColumns.TIME, DataColumns.PPG]
602
+ df_ppg = df_ppg_prepared[ppg_cols].copy()
603
+
604
+ # Preprocess the data
605
+ df_ppg_proc, _ = preprocess_ppg_data(
606
+ df_ppg=df_ppg,
607
+ ppg_config=ppg_config,
608
+ verbose=1 if logging_level <= logging.INFO else 0,
609
+ )
610
+
611
+ if "preprocessing" in store_intermediate:
612
+ preprocessing_dir = output_dir / "preprocessing"
613
+ preprocessing_dir.mkdir(exist_ok=True)
614
+ df_ppg_proc.to_parquet(preprocessing_dir / "ppg_preprocessed.parquet")
615
+ active_logger.info(f"Saved preprocessed data to {preprocessing_dir}")
616
+
617
+ except Exception as e:
618
+ active_logger.error(f"Preprocessing failed: {e}")
619
+ return pd.DataFrame()
620
+
621
+ # Step 2: Extract signal quality features
622
+ active_logger.info("Step 2: Extracting signal quality features")
623
+ try:
624
+ df_features = extract_signal_quality_features(df_ppg_proc, pulse_rate_config)
625
+
626
+ if "pulse_rate" in store_intermediate:
627
+ pulse_rate_dir = output_dir / "pulse_rate"
628
+ pulse_rate_dir.mkdir(exist_ok=True)
629
+ df_features.to_parquet(pulse_rate_dir / "signal_quality_features.parquet")
630
+ active_logger.info(f"Saved signal quality features to {pulse_rate_dir}")
631
+
632
+ except Exception as e:
633
+ active_logger.error(f"Feature extraction failed: {e}")
634
+ return pd.DataFrame()
635
+
636
+ # Step 3: Signal quality classification
637
+ active_logger.info("Step 3: Signal quality classification")
638
+ try:
639
+ classifier_path = files("paradigma.assets") / "ppg_quality_clf_package.pkl"
640
+ classifier_package = ClassifierPackage.load(classifier_path)
641
+
642
+ df_classified = signal_quality_classification(
643
+ df_features, pulse_rate_config, classifier_package
644
+ )
645
+
646
+ except Exception as e:
647
+ active_logger.error(f"Signal quality classification failed: {e}")
648
+ return pd.DataFrame()
649
+
650
+ # Step 4: Pulse rate estimation
651
+ active_logger.info("Step 4: Pulse rate estimation")
652
+ try:
653
+ df_pulse_rates = estimate_pulse_rate(
654
+ df_sqa=df_classified,
655
+ df_ppg_preprocessed=df_ppg_proc,
656
+ config=pulse_rate_config,
657
+ )
658
+
659
+ except Exception as e:
660
+ active_logger.error(f"Pulse rate estimation failed: {e}")
661
+ return pd.DataFrame()
662
+
663
+ # Step 5: Quantify pulse rate (select relevant columns and apply quality filtering)
664
+ active_logger.info("Step 5: Quantifying pulse rate")
665
+
666
+ # Select quantification columns
667
+ quantification_columns = []
668
+ if DataColumns.TIME in df_pulse_rates.columns:
669
+ quantification_columns.append(DataColumns.TIME)
670
+ if DataColumns.PULSE_RATE in df_pulse_rates.columns:
671
+ quantification_columns.append(DataColumns.PULSE_RATE)
672
+ if "signal_quality" in df_pulse_rates.columns:
673
+ quantification_columns.append("signal_quality")
674
+
675
+ # Use available columns
676
+ available_columns = [
677
+ col for col in quantification_columns if col in df_pulse_rates.columns
678
+ ]
679
+ if not available_columns:
680
+ active_logger.warning("No valid quantification columns found")
681
+ return pd.DataFrame()
682
+
683
+ df_quantification = df_pulse_rates[available_columns].copy()
684
+
685
+ # Apply quality filtering if signal quality is available
686
+ if (
687
+ "signal_quality" in df_quantification.columns
688
+ and DataColumns.PULSE_RATE in df_quantification.columns
689
+ ):
690
+ quality_threshold = getattr(pulse_rate_config, "threshold_sqa", 0.5)
691
+ low_quality_mask = df_quantification["signal_quality"] < quality_threshold
692
+ df_quantification.loc[low_quality_mask, DataColumns.PULSE_RATE] = np.nan
693
+
694
+ if "quantification" in store_intermediate:
695
+ quantification_dir = output_dir / "quantification"
696
+ quantification_dir.mkdir(exist_ok=True)
697
+ df_quantification.to_parquet(
698
+ quantification_dir / "pulse_rate_quantification.parquet"
699
+ )
700
+
701
+ # Save quantification metadata
702
+ valid_pulse_rates = (
703
+ df_quantification[DataColumns.PULSE_RATE].dropna()
704
+ if DataColumns.PULSE_RATE in df_quantification.columns
705
+ else pd.Series(dtype=float)
706
+ )
707
+ quantification_meta = {
708
+ "total_windows": len(df_quantification),
709
+ "valid_pulse_rate_estimates": len(valid_pulse_rates),
710
+ "columns": list(df_quantification.columns),
711
+ }
712
+ with open(quantification_dir / "pulse_rate_quantification_meta.json", "w") as f:
713
+ json.dump(quantification_meta, f, indent=2)
714
+
715
+ active_logger.info(f"Saved pulse rate quantification to {quantification_dir}")
716
+
717
+ pulse_rate_estimates = (
718
+ len(df_quantification[DataColumns.PULSE_RATE].dropna())
719
+ if DataColumns.PULSE_RATE in df_quantification.columns
720
+ else 0
721
+ )
722
+ active_logger.info(
723
+ f"Pulse rate analysis completed: {pulse_rate_estimates} valid pulse "
724
+ f"rate estimates from {len(df_quantification)} total windows"
725
+ )
726
+
727
+ return df_quantification