paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,22 @@
1
- import json
1
+ from datetime import datetime
2
+ from typing import List, Tuple, Union
3
+
2
4
  import numpy as np
3
5
  import pandas as pd
4
- import tsdf
5
- from pathlib import Path
6
6
  from scipy import signal
7
7
  from scipy.interpolate import interp1d
8
- from typing import List, Tuple, Union
9
- from datetime import datetime
10
8
 
11
- from paradigma.constants import TimeUnit, DataColumns
12
- from paradigma.config import PPGConfig, IMUConfig
13
- from paradigma.util import write_df_data, read_metadata, invert_watch_side
9
+ from paradigma.config import IMUConfig, PPGConfig
10
+ from paradigma.util import invert_watch_side
14
11
 
15
12
 
16
13
  def resample_data(
17
14
  df: pd.DataFrame,
18
- time_column : str,
15
+ time_column: str,
19
16
  values_column_names: List[str],
20
17
  sampling_frequency: int,
21
18
  resampling_frequency: int,
22
- tolerance: float | None = None
19
+ tolerance: float | None = None,
23
20
  ) -> pd.DataFrame:
24
21
  """
25
22
  Resamples sensor data to a specified frequency using cubic interpolation.
@@ -37,9 +34,8 @@ def resample_data(
37
34
  resampling_frequency : int
38
35
  The frequency to which the data should be resampled (in Hz).
39
36
  tolerance : float, optional
40
- The tolerance added to the expected difference when checking
41
- for contiguous timestamps. If not provided, it defaults to
42
- twice the expected interval.
37
+ The tolerance added to the expected difference when checking
38
+ for contiguous timestamps. If not provided, it defaults to the tolerance specified in IMUConfig.
43
39
 
44
40
  Returns
45
41
  -------
@@ -57,9 +53,10 @@ def resample_data(
57
53
  - Uses cubic interpolation for smooth resampling if there are enough points.
58
54
  - If only two timestamps are available, it falls back to linear interpolation.
59
55
  """
60
- # Set default tolerance if not provided to twice the expected interval
56
+
57
+ # Set default tolerance if not provided to tolerance specified in IMUConfig
61
58
  if tolerance is None:
62
- tolerance = 2 * 1 / sampling_frequency
59
+ tolerance = IMUConfig().tolerance
63
60
 
64
61
  # Extract time and values
65
62
  time_abs_array = np.array(df[time_column])
@@ -68,7 +65,7 @@ def resample_data(
68
65
  # Ensure the time array is strictly increasing
69
66
  if not np.all(np.diff(time_abs_array) > 0):
70
67
  raise ValueError("Time array is not strictly increasing")
71
-
68
+
72
69
  # Ensure the time array is contiguous
73
70
  expected_interval = 1 / sampling_frequency
74
71
  timestamp_diffs = np.diff(time_abs_array)
@@ -76,12 +73,20 @@ def resample_data(
76
73
  raise ValueError("Time array is not contiguous")
77
74
 
78
75
  # Resample the time data using the specified frequency
79
- t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
80
-
76
+ t_resampled = np.arange(
77
+ time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency
78
+ )
79
+
81
80
  # Choose interpolation method
82
81
  interpolation_kind = "cubic" if len(time_abs_array) > 3 else "linear"
83
- interpolator = interp1d(time_abs_array, values_array, axis=0, kind=interpolation_kind, fill_value="extrapolate")
84
-
82
+ interpolator = interp1d(
83
+ time_abs_array,
84
+ values_array,
85
+ axis=0,
86
+ kind=interpolation_kind,
87
+ fill_value="extrapolate",
88
+ )
89
+
85
90
  # Interpolate
86
91
  resampled_values = interpolator(t_resampled)
87
92
 
@@ -103,20 +108,20 @@ def butterworth_filter(
103
108
  """
104
109
  Applies a Butterworth filter to 1D or 2D sensor data.
105
110
 
106
- This function applies a low-pass, high-pass, or band-pass Butterworth filter to the
107
- input data. The filter is designed using the specified order, cutoff frequency,
111
+ This function applies a low-pass, high-pass, or band-pass Butterworth filter to the
112
+ input data. The filter is designed using the specified order, cutoff frequency,
108
113
  and passband type. The function can handle both 1D and 2D data arrays.
109
114
 
110
115
  Parameters
111
116
  ----------
112
117
  data : np.ndarray
113
- The sensor data to be filtered. Can be 1D (e.g., a single signal) or 2D
118
+ The sensor data to be filtered. Can be 1D (e.g., a single signal) or 2D
114
119
  (e.g., multi-axis sensor data).
115
120
  order : int
116
121
  The order of the Butterworth filter. Higher values result in a steeper roll-off.
117
122
  cutoff_frequency : float or List[float]
118
- The cutoff frequency (or frequencies) for the filter. For a low-pass or high-pass filter,
119
- this is a single float. For a band-pass filter, this should be a list of two floats,
123
+ The cutoff frequency (or frequencies) for the filter. For a low-pass or high-pass filter,
124
+ this is a single float. For a band-pass filter, this should be a list of two floats,
120
125
  specifying the lower and upper cutoff frequencies.
121
126
  passband : str
122
127
  The type of passband to apply. Options are:
@@ -159,7 +164,10 @@ def butterworth_filter(
159
164
  else:
160
165
  raise ValueError("Data must be either 1D or 2D.")
161
166
 
162
- def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_side: str) -> pd.DataFrame:
167
+
168
+ def preprocess_imu_data(
169
+ df: pd.DataFrame, config: IMUConfig, sensor: str, watch_side: str
170
+ ) -> pd.DataFrame:
163
171
  """
164
172
  Preprocesses IMU data by resampling and applying filters.
165
173
 
@@ -186,69 +194,88 @@ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_
186
194
  The preprocessed accelerometer and or gyroscope data with the following transformations:
187
195
  - Resampled data at the specified frequency.
188
196
  - Filtered accelerometer data with high-pass and low-pass filtering applied.
189
-
197
+
190
198
  Notes
191
199
  -----
192
200
  - The function applies Butterworth filters to accelerometer data, both high-pass and low-pass.
193
201
  """
194
202
 
195
203
  # Extract sensor column
196
- if sensor == 'accelerometer':
197
- values_colnames = config.accelerometer_cols
198
- elif sensor == 'gyroscope':
199
- values_colnames = config.gyroscope_cols
200
- elif sensor == 'both':
201
- values_colnames = config.accelerometer_cols + config.gyroscope_cols
204
+ if sensor == "accelerometer":
205
+ values_colnames = config.accelerometer_colnames
206
+ elif sensor == "gyroscope":
207
+ values_colnames = config.gyroscope_colnames
208
+ elif sensor == "both":
209
+ values_colnames = config.accelerometer_colnames + config.gyroscope_colnames
202
210
  else:
203
- raise('Sensor should be either accelerometer, gyroscope, or both')
204
-
211
+ raise ("Sensor should be either accelerometer, gyroscope, or both")
212
+
205
213
  # Resample the data to the specified frequency
206
214
  df = resample_data(
207
215
  df=df,
208
- time_column=DataColumns.TIME,
216
+ time_column=config.time_colname,
209
217
  values_column_names=values_colnames,
210
218
  sampling_frequency=config.sampling_frequency,
211
- resampling_frequency=config.sampling_frequency
219
+ resampling_frequency=config.resampling_frequency,
220
+ tolerance=config.tolerance,
212
221
  )
213
222
 
214
223
  # Invert the IMU data if the watch was worn on the right wrist
215
224
  df = invert_watch_side(df, watch_side, sensor)
216
-
217
- if sensor in ['accelerometer', 'both']:
218
-
225
+
226
+ if sensor in ["accelerometer", "both"]:
227
+
219
228
  # Extract accelerometer data for filtering
220
- accel_data = df[config.accelerometer_cols].values
229
+ accel_data = df[config.accelerometer_colnames].values
221
230
 
222
231
  # Define filter configurations for high-pass and low-pass
223
232
  filter_renaming_configs = {
224
- "hp": {"result_columns": config.accelerometer_cols, "replace_original": True},
225
- "lp": {"result_columns": [f'{col}_grav' for col in config.accelerometer_cols], "replace_original": False},
233
+ "hp": {
234
+ "result_columns": config.accelerometer_colnames,
235
+ "replace_original": True,
236
+ },
237
+ "lp": {
238
+ "result_columns": [
239
+ f"{col}_grav" for col in config.accelerometer_colnames
240
+ ],
241
+ "replace_original": False,
242
+ },
226
243
  }
227
244
 
228
245
  # Apply filters in a loop
229
246
  for passband, filter_config in filter_renaming_configs.items():
230
247
  filtered_data = butterworth_filter(
231
- data=accel_data,
232
- order=config.filter_order,
233
- cutoff_frequency=config.lower_cutoff_frequency,
234
- passband=passband,
235
- sampling_frequency=config.sampling_frequency,
248
+ data=accel_data,
249
+ order=config.filter_order,
250
+ cutoff_frequency=config.lower_cutoff_frequency,
251
+ passband=passband,
252
+ sampling_frequency=config.sampling_frequency,
236
253
  )
237
254
 
238
255
  # Replace or add new columns based on configuration
239
256
  df[filter_config["result_columns"]] = filtered_data
240
257
 
241
- values_colnames += config.gravity_cols
258
+ values_colnames += config.gravity_colnames
242
259
 
243
- df = df[[DataColumns.TIME, *values_colnames]]
260
+ df = df[[config.time_colname, *values_colnames]]
244
261
 
245
262
  return df
246
263
 
247
264
 
248
- def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PPGConfig,
249
- imu_config: IMUConfig, start_time_ppg: str, start_time_imu: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
265
+ def preprocess_ppg_data(
266
+ df_ppg: pd.DataFrame,
267
+ ppg_config: PPGConfig,
268
+ start_time_ppg: str | None = None,
269
+ df_acc: pd.DataFrame | None = None,
270
+ imu_config: IMUConfig | None = None,
271
+ start_time_imu: str | None = None,
272
+ ) -> Tuple[pd.DataFrame, pd.DataFrame | None]:
250
273
  """
251
- Preprocess PPG and IMU (accelerometer only) data by resampling, filtering, and aligning the data segments.
274
+ This function preprocesses PPG and accelerometer data by resampling, filtering and aligning the data segments of both sensors (if applicable).
275
+ Aligning is done using the extract_overlapping_segments function which is based on the provided start times of the PPG and IMU data and returns
276
+ only the data points where both signals overlap in time. The remaining data points are discarded.
277
+ After alignment, the function resamples the data to the specified frequency and applies Butterworth filters to both PPG and accelerometer data (if applicable).
278
+ The output is two DataFrames: one for the preprocessed PPG data and another for the preprocessed accelerometer data (if provided, otherwise return is None).
252
279
 
253
280
  Parameters
254
281
  ----------
@@ -267,79 +294,117 @@ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config:
267
294
 
268
295
  Returns
269
296
  -------
270
- Tuple[pd.DataFrame, pd.DataFrame]
271
- Preprocessed PPG and IMU data as DataFrames.
272
-
297
+ Tuple[pd.DataFrame, pd.DataFrame | None]
298
+ A tuple containing two DataFrames:
299
+ - Preprocessed PPG data with the following transformations:
300
+ - Resampled data at the specified frequency.
301
+ - Filtered PPG data with bandpass filtering applied.
302
+ - Preprocessed accelerometer data (if provided, otherwise return is None) with the following transformations:
303
+ - Resampled data at the specified frequency.
304
+ - Filtered accelerometer data with high-pass and low-pass filtering applied.
305
+
306
+ Notes
307
+ -----
308
+ - If accelerometer data or IMU configuration is not provided, the function only preprocesses PPG data.
309
+ - The function applies Butterworth filters to PPG and accelerometer (if applicable) data, both high-pass and low-pass.
310
+
273
311
  """
312
+ if df_acc is not None and imu_config is not None:
313
+ # Extract overlapping segments
314
+ df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(
315
+ df_ppg=df_ppg,
316
+ df_acc=df_acc,
317
+ time_colname_ppg=ppg_config.time_colname,
318
+ time_colname_imu=imu_config.time_colname,
319
+ start_time_ppg=start_time_ppg,
320
+ start_time_acc=start_time_imu,
321
+ )
274
322
 
275
- # Extract overlapping segments
276
- df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc, start_time_ppg, start_time_imu)
277
-
278
- # Resample accelerometer data
279
- df_acc_proc = resample_data(
280
- df=df_acc_overlapping,
281
- time_column=DataColumns.TIME,
282
- values_column_names = list(imu_config.d_channels_accelerometer.keys()),
283
- sampling_frequency=imu_config.sampling_frequency,
284
- resampling_frequency=imu_config.sampling_frequency
285
- )
323
+ # Resample accelerometer data
324
+ df_acc_proc = resample_data(
325
+ df=df_acc_overlapping,
326
+ time_column=imu_config.time_colname,
327
+ values_column_names=list(imu_config.d_channels_accelerometer.keys()),
328
+ sampling_frequency=imu_config.sampling_frequency,
329
+ resampling_frequency=imu_config.resampling_frequency,
330
+ tolerance=imu_config.tolerance,
331
+ )
332
+
333
+ # Extract accelerometer data for filtering
334
+ accel_data = df_acc_proc[imu_config.accelerometer_colnames].values
335
+
336
+ # Define filter configurations for high-pass and low-pass
337
+ filter_renaming_configs = {
338
+ "hp": {
339
+ "result_columns": imu_config.accelerometer_colnames,
340
+ "replace_original": True,
341
+ }
342
+ }
343
+
344
+ # Apply filters in a loop
345
+ for passband, filter_config in filter_renaming_configs.items():
346
+ filtered_data = butterworth_filter(
347
+ data=accel_data,
348
+ order=imu_config.filter_order,
349
+ cutoff_frequency=imu_config.lower_cutoff_frequency,
350
+ passband=passband,
351
+ sampling_frequency=imu_config.sampling_frequency,
352
+ )
353
+
354
+ # Replace or add new columns based on configuration
355
+ df_acc_proc[filter_config["result_columns"]] = filtered_data
356
+
357
+ else:
358
+ df_ppg_overlapping = df_ppg
286
359
 
287
360
  # Resample PPG data
288
361
  df_ppg_proc = resample_data(
289
362
  df=df_ppg_overlapping,
290
- time_column=DataColumns.TIME,
291
- values_column_names = list(ppg_config.d_channels_ppg.keys()),
363
+ time_column=ppg_config.time_colname,
364
+ values_column_names=list(ppg_config.d_channels_ppg.keys()),
292
365
  sampling_frequency=ppg_config.sampling_frequency,
293
- resampling_frequency=ppg_config.sampling_frequency
366
+ resampling_frequency=ppg_config.resampling_frequency,
367
+ tolerance=ppg_config.tolerance,
294
368
  )
295
369
 
296
-
297
- # Extract accelerometer data for filtering
298
- accel_data = df_acc_proc[imu_config.accelerometer_cols].values
299
-
300
- # Define filter configurations for high-pass and low-pass
301
- filter_renaming_configs = {
302
- "hp": {"result_columns": imu_config.accelerometer_cols, "replace_original": True}}
303
-
304
- # Apply filters in a loop
305
- for passband, filter_config in filter_renaming_configs.items():
306
- filtered_data = butterworth_filter(
307
- data=accel_data,
308
- order=imu_config.filter_order,
309
- cutoff_frequency=imu_config.lower_cutoff_frequency,
310
- passband=passband,
311
- sampling_frequency=imu_config.sampling_frequency,
312
- )
313
-
314
- # Replace or add new columns based on configuration
315
- df_acc_proc[filter_config["result_columns"]] = filtered_data
316
-
317
370
  # Extract accelerometer data for filtering
318
371
  ppg_data = df_ppg_proc[ppg_config.ppg_colname].values
319
372
 
320
373
  # Define filter configurations for high-pass and low-pass
321
374
  filter_renaming_configs = {
322
- "bandpass": {"result_columns": ppg_config.ppg_colname, "replace_original": True}}
375
+ "bandpass": {"result_columns": ppg_config.ppg_colname, "replace_original": True}
376
+ }
323
377
 
324
378
  # Apply filters in a loop
325
379
  for passband, filter_config in filter_renaming_configs.items():
326
380
  filtered_data = butterworth_filter(
327
- data=ppg_data,
328
- order=ppg_config.filter_order,
329
- cutoff_frequency=[ppg_config.lower_cutoff_frequency, ppg_config.upper_cutoff_frequency],
330
- passband=passband,
331
- sampling_frequency=ppg_config.sampling_frequency,
381
+ data=ppg_data,
382
+ order=ppg_config.filter_order,
383
+ cutoff_frequency=[
384
+ ppg_config.lower_cutoff_frequency,
385
+ ppg_config.upper_cutoff_frequency,
386
+ ],
387
+ passband=passband,
388
+ sampling_frequency=ppg_config.sampling_frequency,
332
389
  )
333
390
 
334
391
  # Replace or add new columns based on configuration
335
392
  df_ppg_proc[filter_config["result_columns"]] = filtered_data
336
-
337
- return df_ppg_proc, df_acc_proc
338
-
339
393
 
394
+ if df_acc is not None and imu_config is not None:
395
+ return df_ppg_proc, df_acc_proc
396
+ else:
397
+ return df_ppg_proc, None
340
398
 
341
399
 
342
- def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, start_time_ppg: str, start_time_acc: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
400
+ def extract_overlapping_segments(
401
+ df_ppg: pd.DataFrame,
402
+ df_acc: pd.DataFrame,
403
+ time_colname_ppg: str,
404
+ time_colname_imu: str,
405
+ start_time_ppg: str,
406
+ start_time_acc: str,
407
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
343
408
  """
344
409
  Extract DataFrames with overlapping data segments between accelerometer (from the IMU) and PPG datasets based on their timestamps.
345
410
 
@@ -349,6 +414,10 @@ def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, sta
349
414
  DataFrame containing PPG data.
350
415
  df_acc : pd.DataFrame
351
416
  DataFrame containing accelerometer data from the IMU.
417
+ time_colname_ppg : str
418
+ The name of the column containing the time data in the PPG dataframe.
419
+ time_colname_imu : str
420
+ The name of the column containing the time data in the IMU dataframe.
352
421
  start_time_ppg : str
353
422
  iso8601 formatted start time of the PPG data.
354
423
  start_time_acc : str
@@ -366,21 +435,21 @@ def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, sta
366
435
  start_acc_ppg = int(datetime_acc_start.timestamp())
367
436
 
368
437
  # Calculate the time in Unix timestamps for each dataset because the timestamps are relative to the start time
369
- ppg_time = df_ppg[DataColumns.TIME] + start_unix_ppg
370
- acc_time = df_acc[DataColumns.TIME] + start_acc_ppg
438
+ ppg_time = df_ppg[time_colname_ppg] + start_unix_ppg
439
+ acc_time = df_acc[time_colname_imu] + start_acc_ppg
371
440
 
372
441
  # Determine the overlapping time interval
373
442
  start_time = max(ppg_time.iloc[0], acc_time.iloc[0])
374
443
  end_time = min(ppg_time.iloc[-1], acc_time.iloc[-1])
375
444
 
376
445
  # Extract indices for overlapping segments
377
- ppg_start_index = np.searchsorted(ppg_time, start_time, 'left')
378
- ppg_end_index = np.searchsorted(ppg_time, end_time, 'right') - 1
379
- acc_start_index = np.searchsorted(acc_time, start_time, 'left')
380
- acc_end_index = np.searchsorted(acc_time, end_time, 'right') - 1
446
+ ppg_start_index = np.searchsorted(ppg_time, start_time, "left")
447
+ ppg_end_index = np.searchsorted(ppg_time, end_time, "right") - 1
448
+ acc_start_index = np.searchsorted(acc_time, start_time, "left")
449
+ acc_end_index = np.searchsorted(acc_time, end_time, "right") - 1
381
450
 
382
451
  # Extract overlapping segments from DataFrames
383
- df_ppg_overlapping = df_ppg.iloc[ppg_start_index:ppg_end_index + 1]
384
- df_acc_overlapping = df_acc.iloc[acc_start_index:acc_end_index + 1]
452
+ df_ppg_overlapping = df_ppg.iloc[ppg_start_index : ppg_end_index + 1]
453
+ df_acc_overlapping = df_acc.iloc[acc_start_index : acc_end_index + 1]
385
454
 
386
- return df_ppg_overlapping, df_acc_overlapping
455
+ return df_ppg_overlapping, df_acc_overlapping