paradigma 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. paradigma/assets/gait_detection_clf_package.pkl +0 -0
  2. paradigma/assets/gait_filtering_clf_package.pkl +0 -0
  3. paradigma/assets/ppg_quality_clf_package.pkl +0 -0
  4. paradigma/assets/tremor_detection_clf_package.pkl +0 -0
  5. paradigma/classification.py +115 -0
  6. paradigma/config.py +314 -0
  7. paradigma/constants.py +48 -7
  8. paradigma/feature_extraction.py +811 -547
  9. paradigma/pipelines/__init__.py +0 -0
  10. paradigma/pipelines/gait_pipeline.py +727 -0
  11. paradigma/pipelines/heart_rate_pipeline.py +426 -0
  12. paradigma/pipelines/heart_rate_utils.py +780 -0
  13. paradigma/pipelines/tremor_pipeline.py +299 -0
  14. paradigma/preprocessing.py +363 -0
  15. paradigma/segmenting.py +396 -0
  16. paradigma/testing.py +416 -0
  17. paradigma/util.py +393 -16
  18. paradigma-0.4.1.dist-info/METADATA +138 -0
  19. paradigma-0.4.1.dist-info/RECORD +22 -0
  20. {paradigma-0.3.2.dist-info → paradigma-0.4.1.dist-info}/WHEEL +1 -1
  21. paradigma/gait_analysis.py +0 -415
  22. paradigma/gait_analysis_config.py +0 -266
  23. paradigma/heart_rate_analysis.py +0 -127
  24. paradigma/heart_rate_analysis_config.py +0 -9
  25. paradigma/heart_rate_util.py +0 -173
  26. paradigma/imu_preprocessing.py +0 -232
  27. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  28. paradigma/ppg/classifier/LR_model.mat +0 -0
  29. paradigma/ppg/feat_extraction/acc_feature.m +0 -20
  30. paradigma/ppg/feat_extraction/peakdet.m +0 -64
  31. paradigma/ppg/feat_extraction/ppg_features.m +0 -53
  32. paradigma/ppg/glob_functions/extract_hr_segments.m +0 -37
  33. paradigma/ppg/glob_functions/extract_overlapping_segments.m +0 -23
  34. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +0 -41
  35. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +0 -74
  36. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +0 -25
  37. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +0 -699
  38. paradigma/ppg/glob_functions/jsonlab/README.txt +0 -394
  39. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +0 -368
  40. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +0 -180
  41. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +0 -180
  42. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +0 -23
  43. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +0 -22
  44. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +0 -11
  45. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +0 -34
  46. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +0 -662
  47. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +0 -27
  48. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +0 -144
  49. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +0 -21
  50. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +0 -180
  51. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +0 -180
  52. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +0 -23
  53. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +0 -22
  54. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +0 -11
  55. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +0 -34
  56. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +0 -662
  57. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +0 -27
  58. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +0 -144
  59. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +0 -21
  60. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +0 -32
  61. paradigma/ppg/glob_functions/jsonlab/loadjson.m +0 -566
  62. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +0 -528
  63. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +0 -33
  64. paradigma/ppg/glob_functions/jsonlab/savejson.m +0 -475
  65. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +0 -504
  66. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +0 -40
  67. paradigma/ppg/glob_functions/sample_prob_final.m +0 -49
  68. paradigma/ppg/glob_functions/synchronization.m +0 -76
  69. paradigma/ppg/glob_functions/tsdf_scan_meta.m +0 -22
  70. paradigma/ppg/hr_functions/Long_TFD_JOT.m +0 -37
  71. paradigma/ppg/hr_functions/PPG_TFD_HR.m +0 -59
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +0 -4
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +0 -23
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +0 -27
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +0 -251
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +0 -142
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +0 -314
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +0 -123
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +0 -154
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +0 -194
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +0 -200
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +0 -229
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +0 -241
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +0 -157
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +0 -190
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +0 -196
  88. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +0 -199
  89. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +0 -144
  90. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +0 -13
  91. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  92. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  93. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +0 -79
  94. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +0 -9
  95. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +0 -26
  96. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +0 -25
  97. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +0 -40
  98. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +0 -34
  99. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +0 -29
  100. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +0 -76
  101. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +0 -176
  102. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +0 -11
  103. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +0 -97
  104. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +0 -149
  105. paradigma/ppg/preprocessing/preprocessing_imu.m +0 -15
  106. paradigma/ppg/preprocessing/preprocessing_ppg.m +0 -13
  107. paradigma/ppg_preprocessing.py +0 -313
  108. paradigma/preprocessing_config.py +0 -69
  109. paradigma/quantification.py +0 -58
  110. paradigma/tremor/TremorFeaturesAndClassification.m +0 -345
  111. paradigma/tremor/feat_extraction/DerivativesExtract.m +0 -22
  112. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +0 -72
  113. paradigma/tremor/feat_extraction/MFCCExtract.m +0 -100
  114. paradigma/tremor/feat_extraction/PSDBandPower.m +0 -52
  115. paradigma/tremor/feat_extraction/PSDEst.m +0 -63
  116. paradigma/tremor/feat_extraction/PSDExtrAxis.m +0 -88
  117. paradigma/tremor/feat_extraction/PSDExtrOpt.m +0 -95
  118. paradigma/tremor/preprocessing/InterpData.m +0 -32
  119. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +0 -295
  120. paradigma/windowing.py +0 -219
  121. paradigma-0.3.2.dist-info/METADATA +0 -79
  122. paradigma-0.3.2.dist-info/RECORD +0 -108
  123. {paradigma-0.3.2.dist-info → paradigma-0.4.1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,299 @@
1
+ import tsdf
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+ from pathlib import Path
6
+ from scipy import signal
7
+ from scipy.stats import gaussian_kde
8
+
9
+ from paradigma.classification import ClassifierPackage
10
+ from paradigma.constants import DataColumns
11
+ from paradigma.config import TremorConfig
12
+ from paradigma.feature_extraction import compute_mfccs, compute_power_in_bandwidth, compute_total_power, extract_frequency_peak, \
13
+ extract_tremor_power
14
+ from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
15
+ from paradigma.util import get_end_iso8601, write_df_data, read_metadata, aggregate_parameter
16
+
17
+
18
+ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFrame:
19
+ """
20
+ This function groups sequences of timestamps into windows and subsequently extracts
21
+ tremor features from windowed gyroscope data.
22
+
23
+ Parameters
24
+ ----------
25
+ df : pd.DataFrame
26
+ The input DataFrame containing sensor data, which includes time and gyroscope data. The data should be
27
+ structured with the necessary columns as specified in the `config`.
28
+
29
+ config : TremorConfig
30
+ Configuration object containing parameters for feature extraction, including column names for time, gyroscope data,
31
+ as well as settings for windowing, and feature computation.
32
+
33
+ Returns
34
+ -------
35
+ pd.DataFrame
36
+ A DataFrame containing extracted tremor features and a column corresponding to time.
37
+
38
+ Notes
39
+ -----
40
+ - This function groups the data into windows based on timestamps.
41
+ - The input DataFrame must include columns as specified in the `config` object for proper feature extraction.
42
+
43
+ Raises
44
+ ------
45
+ ValueError
46
+ If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
47
+ """
48
+ # group sequences of timestamps into windows
49
+ windowed_cols = [DataColumns.TIME] + config.gyroscope_cols
50
+ windowed_data = tabulate_windows(df, windowed_cols, config.window_length_s, config.window_step_length_s, config.sampling_frequency)
51
+
52
+ extractor = WindowedDataExtractor(windowed_cols)
53
+
54
+ # Extract the start time and gyroscope data from the windowed data
55
+ idx_time = extractor.get_index(DataColumns.TIME)
56
+ idx_gyro = extractor.get_slice(config.gyroscope_cols)
57
+
58
+ # Extract data
59
+ start_time = np.min(windowed_data[:, :, idx_time], axis=1)
60
+ windowed_gyro = windowed_data[:, :, idx_gyro]
61
+
62
+ df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
63
+
64
+ # transform the signals from the temporal domain to the spectral domain and extract tremor features
65
+ df_spectral_features = extract_spectral_domain_features(windowed_gyro, config)
66
+
67
+ # Combine spectral features with the start time
68
+ df_features = pd.concat([df_features, df_spectral_features], axis=1)
69
+
70
+ return df_features
71
+
72
+
73
+ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
74
+ """
75
+ Detects tremor in the input DataFrame using a pre-trained classifier and applies a threshold to the predicted probabilities.
76
+
77
+ This function performs the following steps:
78
+ 1. Loads the pre-trained classifier and scaling parameters from the provided directory.
79
+ 2. Scales the relevant features in the input DataFrame (`df`) using the loaded scaling parameters.
80
+ 3. Makes predictions using the classifier to estimate the probability of tremor.
81
+ 4. Applies a threshold to the predicted probabilities to classify whether tremor is detected or not.
82
+ 5. Checks for rest tremor by verifying the frequency of the peak and below tremor power.
83
+ 6. Adds the predicted probabilities and the classification result to the DataFrame.
84
+
85
+ Parameters
86
+ ----------
87
+ df : pd.DataFrame
88
+ The input DataFrame containing extracted tremor features. The DataFrame must include
89
+ the necessary columns as specified in the classifier's feature names.
90
+
91
+ config : TremorConfig
92
+ Configuration object containing settings for tremor detection, including the frequency range for rest tremor.
93
+
94
+ full_path_to_classifier_package : str | Path
95
+ The path to the directory containing the classifier file, threshold value, scaler parameters, and other necessary input
96
+ files for tremor detection.
97
+
98
+ Returns
99
+ -------
100
+ pd.DataFrame
101
+ The input DataFrame (`df`) with two additional columns:
102
+ - `PRED_TREMOR_PROBA`: Predicted probability of tremor based on the classifier.
103
+ - `PRED_TREMOR_LOGREG`: Binary classification result (True for tremor, False for no tremor), based on the threshold applied to `PRED_TREMOR_PROBA`.
104
+ - `PRED_TREMOR_CHECKED`: Binary classification result (True for tremor, False for no tremor), after performing extra checks for rest tremor on `PRED_TREMOR_LOGREG`.
105
+ - `PRED_ARM_AT_REST`: Binary classification result (True for arm at rest or stable posture, False for significant arm movement), based on the power below tremor.
106
+
107
+ Notes
108
+ -----
109
+ - The threshold used to classify tremor is loaded from a file and applied to the predicted probabilities.
110
+
111
+ Raises
112
+ ------
113
+ FileNotFoundError
114
+ If the classifier, scaler, or threshold files are not found at the specified paths.
115
+ ValueError
116
+ If the DataFrame does not contain the expected features for prediction or if the prediction fails.
117
+
118
+ """
119
+
120
+ # Load the classifier package
121
+ clf_package = ClassifierPackage.load(full_path_to_classifier_package)
122
+
123
+ # Set classifier
124
+ clf = clf_package.classifier
125
+ feature_names_scaling = clf_package.scaler.feature_names_in_
126
+ feature_names_predictions = clf.feature_names_in_
127
+
128
+ # Apply scaling to relevant columns
129
+ scaled_features = clf_package.transform_features(df.loc[:, feature_names_scaling])
130
+
131
+ # Replace scaled features in a copy of the relevant features for prediction
132
+ X = df.loc[:, feature_names_predictions].copy()
133
+ X.loc[:, feature_names_scaling] = scaled_features
134
+
135
+ # Get the tremor probability
136
+ df[DataColumns.PRED_TREMOR_PROBA] = clf_package.predict_proba(X)
137
+
138
+ # Make prediction based on pre-defined threshold
139
+ df[DataColumns.PRED_TREMOR_LOGREG] = (df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold).astype(int)
140
+
141
+ # Perform extra checks for rest tremor
142
+ peak_check = (df['freq_peak'] >= config.fmin_rest_tremor) & (df['freq_peak']<=config.fmax_rest_tremor) # peak within 3-7 Hz
143
+ df[DataColumns.PRED_ARM_AT_REST] = (df['below_tremor_power'] <= config.movement_threshold).astype(int) # arm at rest or in stable posture
144
+ df[DataColumns.PRED_TREMOR_CHECKED] = ((df[DataColumns.PRED_TREMOR_LOGREG]==1) & (peak_check==True) & (df[DataColumns.PRED_ARM_AT_REST] == True)).astype(int)
145
+
146
+ return df
147
+
148
+
149
+ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
150
+ """
151
+ Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
152
+ Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
153
+ without significant non-tremor movement (at rest). For tremor power the following aggregates are derived:
154
+ the median, mode and percentile of tremor power specified in the configuration object.
155
+
156
+ Parameters
157
+ ----------
158
+ df : pd.DataFrame
159
+ The input DataFrame containing extracted tremor features. The DataFrame must include
160
+ the necessary columns as specified in the classifier's feature names.
161
+
162
+ config : TremorConfig
163
+ Configuration object containing the percentile for aggregating tremor power.
164
+
165
+ Returns
166
+ -------
167
+ dict
168
+ A dictionary with the aggregated tremor time and tremor power measures, as well as the total number of windows
169
+ available in the input dataframe, and the number of windows at rest.
170
+
171
+ Notes
172
+ -----
173
+ - Tremor power is converted to log scale, after adding a constant of 1, so that zero tremor power
174
+ corresponds to a value of 0 in log scale.
175
+ - The modal tremor power is computed based on gaussian kernel density estimation.
176
+
177
+ """
178
+
179
+ nr_windows_total = df.shape[0] # number of windows in the input dataframe
180
+
181
+ # remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
182
+ df_filtered = df.loc[df.pred_arm_at_rest == 1]
183
+ nr_windows_rest = df_filtered.shape[0] # number of windows without non-tremor arm movement
184
+
185
+ # calculate tremor time
186
+ perc_windows_tremor= np.sum(df_filtered['pred_tremor_checked']) / nr_windows_rest * 100 # as percentage of total measured time without non-tremor arm movement
187
+
188
+ # calculate aggregated tremor power measures
189
+ tremor_power = df_filtered.loc[df_filtered['pred_tremor_checked'] == 1, 'tremor_power']
190
+ tremor_power = np.log10(tremor_power+1) # convert to log scale
191
+ aggregated_tremor_power = {}
192
+
193
+ for aggregate in config.aggregates_tremor_power:
194
+ aggregate_name = f"{aggregate}_tremor_power"
195
+ if aggregate == 'mode':
196
+ # calculate modal tremor power
197
+ bin_edges = np.linspace(0, 6, 301)
198
+ kde = gaussian_kde(tremor_power)
199
+ kde_values = kde(bin_edges)
200
+ max_index = np.argmax(kde_values)
201
+ aggregated_tremor_power['modal_tremor_power'] = bin_edges[max_index]
202
+ else: # calculate te other aggregates (e.g. median and 90th percentile) of tremor power
203
+ aggregated_tremor_power[aggregate_name] = aggregate_parameter(tremor_power, aggregate)
204
+
205
+ # store aggregates in json format
206
+ d_aggregates = {
207
+ 'metadata': {
208
+ 'nr_windows_total': nr_windows_total,
209
+ 'nr_windows_rest': nr_windows_rest
210
+ },
211
+ 'aggregated_tremor_measures': {
212
+ 'perc_windows_tremor': perc_windows_tremor,
213
+ 'median_tremor_power': aggregated_tremor_power['median_tremor_power'],
214
+ 'modal_tremor_power': aggregated_tremor_power['modal_tremor_power'],
215
+ '90p_tremor_power': aggregated_tremor_power['90p_tremor_power']
216
+ }
217
+ }
218
+
219
+ return d_aggregates
220
+
221
+
222
+ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
223
+ """
224
+ Compute spectral domain features from the gyroscope data.
225
+
226
+ This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
227
+ the tremor power, and the below tremor power based on the total power spectral density of the windowed gyroscope data.
228
+
229
+ Parameters
230
+ ----------
231
+ data : numpy.ndarray
232
+ A 2D numpy array where each row corresponds to a window of gyroscope data.
233
+ config : object
234
+ Configuration object containing settings such as sampling frequency, window type,
235
+ and MFCC parameters.
236
+
237
+ Returns
238
+ -------
239
+ pd.DataFrame
240
+ The feature dataframe containing the extracted spectral features, including
241
+ MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
242
+ """
243
+
244
+ # Initialize a dictionary to hold the results
245
+ feature_dict = {}
246
+
247
+ # Initialize parameters
248
+ sampling_frequency = config.sampling_frequency
249
+ segment_length_s = config.segment_length_s
250
+ overlap_fraction = config.overlap_fraction
251
+ spectral_resolution = config.spectral_resolution
252
+ window_type = 'hann'
253
+
254
+ # Compute the power spectral density
255
+ segment_length_n = sampling_frequency * segment_length_s
256
+ overlap_n = segment_length_n * overlap_fraction
257
+ window = signal.get_window(window_type, segment_length_n, fftbins=False)
258
+ nfft = sampling_frequency / spectral_resolution
259
+
260
+ freqs, psd = signal.welch(
261
+ x=data,
262
+ fs=sampling_frequency,
263
+ window=window,
264
+ nperseg=segment_length_n,
265
+ noverlap=overlap_n,
266
+ nfft=nfft,
267
+ detrend=False,
268
+ scaling='density',
269
+ axis=1
270
+ )
271
+
272
+ # Compute total power in the PSD (over the three axes)
273
+ total_psd = compute_total_power(psd)
274
+
275
+ # Compute the MFCC's
276
+ config.mfcc_low_frequency = config.fmin_mfcc
277
+ config.mfcc_high_frequency = config.fmax_mfcc
278
+ config.mfcc_n_dct_filters = config.n_dct_filters_mfcc
279
+ config.mfcc_n_coefficients = config.n_coefficients_mfcc
280
+
281
+ mfccs = compute_mfccs(
282
+ total_power_array=total_psd,
283
+ config=config,
284
+ multiplication_factor=1
285
+ )
286
+
287
+ # Combine the MFCCs into the features DataFrame
288
+ mfcc_colnames = [f'mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
289
+ for i, colname in enumerate(mfcc_colnames):
290
+ feature_dict[colname] = mfccs[:, i]
291
+
292
+ # Compute the frequency of the peak, non-tremor power and tremor power
293
+ feature_dict['freq_peak'] = extract_frequency_peak(freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search)
294
+ feature_dict['below_tremor_power'] = compute_power_in_bandwidth(freqs, total_psd, config.fmin_below_rest_tremor, config.fmax_below_rest_tremor,
295
+ include_max=False, spectral_resolution=config.spectral_resolution,
296
+ cumulative_sum_method='sum')
297
+ feature_dict['tremor_power'] = extract_tremor_power(freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor)
298
+
299
+ return pd.DataFrame(feature_dict)
@@ -0,0 +1,363 @@
1
+ import json
2
+ import numpy as np
3
+ import pandas as pd
4
+ import tsdf
5
+ from pathlib import Path
6
+ from scipy import signal
7
+ from scipy.interpolate import interp1d
8
+ from typing import List, Tuple, Union
9
+ from datetime import datetime
10
+
11
+ from paradigma.constants import TimeUnit, DataColumns
12
+ from paradigma.config import PPGConfig, IMUConfig
13
+ from paradigma.util import write_df_data, read_metadata, invert_watch_side
14
+
15
+
16
+ def resample_data(
17
+ df: pd.DataFrame,
18
+ time_column : str,
19
+ values_column_names: List[str],
20
+ resampling_frequency: int,
21
+ ) -> pd.DataFrame:
22
+ """
23
+ Resamples sensor data to a specified frequency using cubic interpolation.
24
+
25
+ Parameters
26
+ ----------
27
+ df : pd.DataFrame
28
+ The input DataFrame containing the sensor data.
29
+ time_column : str
30
+ The name of the column containing the time data.
31
+ values_column_names : List[str]
32
+ A list of column names that should be resampled.
33
+ resampling_frequency : int
34
+ The frequency to which the data should be resampled (in Hz).
35
+
36
+ Returns
37
+ -------
38
+ pd.DataFrame
39
+ A DataFrame with the resampled data, where each column contains resampled values.
40
+ The time column will reflect the new resampling frequency.
41
+
42
+ Raises
43
+ ------
44
+ ValueError
45
+ If the time array is not strictly increasing.
46
+
47
+ Notes
48
+ -----
49
+ The function uses cubic interpolation to resample the data to the specified frequency.
50
+ It requires the input time array to be strictly increasing.
51
+ """
52
+
53
+ # Extract time and values from DataFrame
54
+ time_abs_array = np.array(df[time_column])
55
+ values_array = np.array(df[values_column_names])
56
+
57
+ # Ensure the time array is strictly increasing
58
+ if not np.all(np.diff(time_abs_array) > 0):
59
+ raise ValueError("time_abs_array is not strictly increasing")
60
+
61
+ # Resample the time data using the specified frequency
62
+ t_resampled = np.arange(time_abs_array[0], time_abs_array[-1], 1 / resampling_frequency)
63
+
64
+ # Interpolate the data using cubic interpolation
65
+ interpolator = interp1d(time_abs_array, values_array, axis=0, kind="cubic")
66
+ resampled_values = interpolator(t_resampled)
67
+
68
+ # Create a DataFrame with the resampled data
69
+ df_resampled = pd.DataFrame(resampled_values, columns=values_column_names)
70
+ df_resampled[time_column] = t_resampled
71
+
72
+ # Return the DataFrame with columns in the correct order
73
+ return df_resampled[[time_column] + values_column_names]
74
+
75
+
76
+ def butterworth_filter(
77
+ data: np.ndarray,
78
+ order: int,
79
+ cutoff_frequency: Union[float, List[float]],
80
+ passband: str,
81
+ sampling_frequency: int,
82
+ ):
83
+ """
84
+ Applies a Butterworth filter to 1D or 2D sensor data.
85
+
86
+ This function applies a low-pass, high-pass, or band-pass Butterworth filter to the
87
+ input data. The filter is designed using the specified order, cutoff frequency,
88
+ and passband type. The function can handle both 1D and 2D data arrays.
89
+
90
+ Parameters
91
+ ----------
92
+ data : np.ndarray
93
+ The sensor data to be filtered. Can be 1D (e.g., a single signal) or 2D
94
+ (e.g., multi-axis sensor data).
95
+ order : int
96
+ The order of the Butterworth filter. Higher values result in a steeper roll-off.
97
+ cutoff_frequency : float or List[float]
98
+ The cutoff frequency (or frequencies) for the filter. For a low-pass or high-pass filter,
99
+ this is a single float. For a band-pass filter, this should be a list of two floats,
100
+ specifying the lower and upper cutoff frequencies.
101
+ passband : str
102
+ The type of passband to apply. Options are:
103
+ - 'hp' : high-pass filter
104
+ - 'lp' : low-pass filter
105
+ - 'band' : band-pass filter
106
+ sampling_frequency : int
107
+ The sampling frequency of the data in Hz. This is used to normalize the cutoff frequency.
108
+
109
+ Returns
110
+ -------
111
+ np.ndarray
112
+ The filtered sensor data. The shape of the output is the same as the input data.
113
+
114
+ Raises
115
+ ------
116
+ ValueError
117
+ If the input data has more than two dimensions, or if an invalid passband is specified.
118
+
119
+ Notes
120
+ -----
121
+ The function uses `scipy.signal.butter` to design the filter and `scipy.signal.sosfiltfilt`
122
+ to apply it using second-order sections (SOS) to improve numerical stability.
123
+ """
124
+ # Design the filter using second-order sections (SOS)
125
+ sos = signal.butter(
126
+ N=order,
127
+ Wn=cutoff_frequency,
128
+ btype=passband,
129
+ analog=False,
130
+ fs=sampling_frequency,
131
+ output="sos",
132
+ )
133
+
134
+ # Apply the filter to the data
135
+ if data.ndim == 1: # 1D data case
136
+ return signal.sosfiltfilt(sos, data)
137
+ elif data.ndim == 2: # 2D data case
138
+ return signal.sosfiltfilt(sos, data, axis=0)
139
+ else:
140
+ raise ValueError("Data must be either 1D or 2D.")
141
+
142
+ def preprocess_imu_data(df: pd.DataFrame, config: IMUConfig, sensor: str, watch_side: str) -> pd.DataFrame:
143
+ """
144
+ Preprocesses IMU data by resampling and applying filters.
145
+
146
+ Parameters
147
+ ----------
148
+ df : pd.DataFrame
149
+ The DataFrame containing raw accelerometer and/or gyroscope data.
150
+ config : IMUConfig
151
+ Configuration object containing various settings, such as time column name, accelerometer and/or gyroscope columns,
152
+ filter settings, and sampling frequency.
153
+ sensor: str
154
+ Name of the sensor data to be preprocessed. Must be one of:
155
+ - "accelerometer": Preprocess accelerometer data only.
156
+ - "gyroscope": Preprocess gyroscope data only.
157
+ - "both": Preprocess both accelerometer and gyroscope data.
158
+ watch_side: str
159
+ The side of the watch where the data was collected. Must be one of:
160
+ - "left": Data was collected from the left wrist.
161
+ - "right": Data was collected from the right wrist.
162
+
163
+ Returns
164
+ -------
165
+ pd.DataFrame
166
+ The preprocessed accelerometer and or gyroscope data with the following transformations:
167
+ - Resampled data at the specified frequency.
168
+ - Filtered accelerometer data with high-pass and low-pass filtering applied.
169
+
170
+ Notes
171
+ -----
172
+ - The function applies Butterworth filters to accelerometer data, both high-pass and low-pass.
173
+ """
174
+
175
+ # Extract sensor column
176
+ if sensor == 'accelerometer':
177
+ values_colnames = config.accelerometer_cols
178
+ elif sensor == 'gyroscope':
179
+ values_colnames = config.gyroscope_cols
180
+ elif sensor == 'both':
181
+ values_colnames = config.accelerometer_cols + config.gyroscope_cols
182
+ else:
183
+ raise('Sensor should be either accelerometer, gyroscope, or both')
184
+
185
+ # Resample the data to the specified frequency
186
+ df = resample_data(
187
+ df=df,
188
+ time_column=DataColumns.TIME,
189
+ values_column_names = values_colnames,
190
+ resampling_frequency=config.sampling_frequency
191
+ )
192
+
193
+ # Invert the IMU data if the watch was worn on the right wrist
194
+ df = invert_watch_side(df, watch_side)
195
+
196
+ if sensor in ['accelerometer', 'both']:
197
+
198
+ # Extract accelerometer data for filtering
199
+ accel_data = df[config.accelerometer_cols].values
200
+
201
+ # Define filter configurations for high-pass and low-pass
202
+ filter_renaming_configs = {
203
+ "hp": {"result_columns": config.accelerometer_cols, "replace_original": True},
204
+ "lp": {"result_columns": [f'{col}_grav' for col in config.accelerometer_cols], "replace_original": False},
205
+ }
206
+
207
+ # Apply filters in a loop
208
+ for passband, filter_config in filter_renaming_configs.items():
209
+ filtered_data = butterworth_filter(
210
+ data=accel_data,
211
+ order=config.filter_order,
212
+ cutoff_frequency=config.lower_cutoff_frequency,
213
+ passband=passband,
214
+ sampling_frequency=config.sampling_frequency,
215
+ )
216
+
217
+ # Replace or add new columns based on configuration
218
+ df[filter_config["result_columns"]] = filtered_data
219
+
220
+ values_colnames += config.gravity_cols
221
+
222
+ df = df[[DataColumns.TIME, *values_colnames]]
223
+
224
+ return df
225
+
226
+
227
+ def preprocess_ppg_data(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, ppg_config: PPGConfig,
228
+ imu_config: IMUConfig, start_time_ppg: str, start_time_imu: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
229
+ """
230
+ Preprocess PPG and IMU (accelerometer only) data by resampling, filtering, and aligning the data segments.
231
+
232
+ Parameters
233
+ ----------
234
+ df_ppg : pd.DataFrame
235
+ DataFrame containing PPG data.
236
+ df_acc : pd.DataFrame
237
+ DataFrame containing accelerometer from IMU data.
238
+ ppg_config : PPGPreprocessingConfig
239
+ Configuration object for PPG preprocessing.
240
+ imu_config : IMUPreprocessingConfig
241
+ Configuration object for IMU preprocessing.
242
+ start_time_ppg : str
243
+ iso8601 formatted start time of the PPG data.
244
+ start_time_imu : str
245
+ iso8601 formatted start time of the IMU data.
246
+
247
+ Returns
248
+ -------
249
+ Tuple[pd.DataFrame, pd.DataFrame]
250
+ Preprocessed PPG and IMU data as DataFrames.
251
+
252
+ """
253
+
254
+ # Extract overlapping segments
255
+ df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc, start_time_ppg, start_time_imu)
256
+
257
+ # Resample accelerometer data
258
+ df_acc_proc = resample_data(
259
+ df=df_acc_overlapping,
260
+ time_column=DataColumns.TIME,
261
+ values_column_names = list(imu_config.d_channels_accelerometer.keys()),
262
+ resampling_frequency=imu_config.sampling_frequency
263
+ )
264
+
265
+ # Resample PPG data
266
+ df_ppg_proc = resample_data(
267
+ df=df_ppg_overlapping,
268
+ time_column=DataColumns.TIME,
269
+ values_column_names = list(ppg_config.d_channels_ppg.keys()),
270
+ resampling_frequency=ppg_config.sampling_frequency
271
+ )
272
+
273
+
274
+ # Extract accelerometer data for filtering
275
+ accel_data = df_acc_proc[imu_config.accelerometer_cols].values
276
+
277
+ # Define filter configurations for high-pass and low-pass
278
+ filter_renaming_configs = {
279
+ "hp": {"result_columns": imu_config.accelerometer_cols, "replace_original": True}}
280
+
281
+ # Apply filters in a loop
282
+ for passband, filter_config in filter_renaming_configs.items():
283
+ filtered_data = butterworth_filter(
284
+ data=accel_data,
285
+ order=imu_config.filter_order,
286
+ cutoff_frequency=imu_config.lower_cutoff_frequency,
287
+ passband=passband,
288
+ sampling_frequency=imu_config.sampling_frequency,
289
+ )
290
+
291
+ # Replace or add new columns based on configuration
292
+ df_acc_proc[filter_config["result_columns"]] = filtered_data
293
+
294
+ # Extract accelerometer data for filtering
295
+ ppg_data = df_ppg_proc[ppg_config.ppg_colname].values
296
+
297
+ # Define filter configurations for high-pass and low-pass
298
+ filter_renaming_configs = {
299
+ "bandpass": {"result_columns": ppg_config.ppg_colname, "replace_original": True}}
300
+
301
+ # Apply filters in a loop
302
+ for passband, filter_config in filter_renaming_configs.items():
303
+ filtered_data = butterworth_filter(
304
+ data=ppg_data,
305
+ order=ppg_config.filter_order,
306
+ cutoff_frequency=[ppg_config.lower_cutoff_frequency, ppg_config.upper_cutoff_frequency],
307
+ passband=passband,
308
+ sampling_frequency=ppg_config.sampling_frequency,
309
+ )
310
+
311
+ # Replace or add new columns based on configuration
312
+ df_ppg_proc[filter_config["result_columns"]] = filtered_data
313
+
314
+ return df_ppg_proc, df_acc_proc
315
+
316
+
317
+
318
+
319
+ def extract_overlapping_segments(df_ppg: pd.DataFrame, df_acc: pd.DataFrame, start_time_ppg: str, start_time_acc: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
320
+ """
321
+ Extract DataFrames with overlapping data segments between accelerometer (from the IMU) and PPG datasets based on their timestamps.
322
+
323
+ Parameters
324
+ ----------
325
+ df_ppg : pd.DataFrame
326
+ DataFrame containing PPG data.
327
+ df_acc : pd.DataFrame
328
+ DataFrame containing accelerometer data from the IMU.
329
+ start_time_ppg : str
330
+ iso8601 formatted start time of the PPG data.
331
+ start_time_acc : str
332
+ iso8601 formatted start time of the accelerometer data.
333
+
334
+ Returns
335
+ -------
336
+ Tuple[pd.DataFrame, pd.DataFrame]
337
+ DataFrames containing the overlapping segments (time and values) of PPG and accelerometer data.
338
+ """
339
+ # Convert start times to Unix timestamps
340
+ datetime_ppg_start = datetime.fromisoformat(start_time_ppg.replace("Z", "+00:00"))
341
+ start_unix_ppg = int(datetime_ppg_start.timestamp())
342
+ datetime_acc_start = datetime.fromisoformat(start_time_acc.replace("Z", "+00:00"))
343
+ start_acc_ppg = int(datetime_acc_start.timestamp())
344
+
345
+ # Calculate the time in Unix timestamps for each dataset because the timestamps are relative to the start time
346
+ ppg_time = df_ppg[DataColumns.TIME] + start_unix_ppg
347
+ acc_time = df_acc[DataColumns.TIME] + start_acc_ppg
348
+
349
+ # Determine the overlapping time interval
350
+ start_time = max(ppg_time.iloc[0], acc_time.iloc[0])
351
+ end_time = min(ppg_time.iloc[-1], acc_time.iloc[-1])
352
+
353
+ # Extract indices for overlapping segments
354
+ ppg_start_index = np.searchsorted(ppg_time, start_time, 'left')
355
+ ppg_end_index = np.searchsorted(ppg_time, end_time, 'right') - 1
356
+ acc_start_index = np.searchsorted(acc_time, start_time, 'left')
357
+ acc_end_index = np.searchsorted(acc_time, end_time, 'right') - 1
358
+
359
+ # Extract overlapping segments from DataFrames
360
+ df_ppg_overlapping = df_ppg.iloc[ppg_start_index:ppg_end_index + 1]
361
+ df_acc_overlapping = df_acc.iloc[acc_start_index:acc_end_index + 1]
362
+
363
+ return df_ppg_overlapping, df_acc_overlapping