paradigma 0.1.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. paradigma/__init__.py +1 -3
  2. paradigma/constants.py +65 -0
  3. paradigma/feature_extraction.py +703 -0
  4. paradigma/gait_analysis.py +415 -0
  5. paradigma/gait_analysis_config.py +266 -0
  6. paradigma/heart_rate_analysis.py +127 -0
  7. paradigma/heart_rate_analysis_config.py +9 -0
  8. paradigma/heart_rate_util.py +173 -0
  9. paradigma/imu_preprocessing.py +232 -0
  10. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  11. paradigma/ppg/classifier/LR_model.mat +0 -0
  12. paradigma/ppg/feat_extraction/acc_feature.m +20 -0
  13. paradigma/ppg/feat_extraction/peakdet.m +64 -0
  14. paradigma/ppg/feat_extraction/ppg_features.m +53 -0
  15. paradigma/ppg/glob_functions/extract_hr_segments.m +37 -0
  16. paradigma/ppg/glob_functions/extract_overlapping_segments.m +23 -0
  17. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +41 -0
  18. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +74 -0
  19. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +25 -0
  20. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +699 -0
  21. paradigma/ppg/glob_functions/jsonlab/README.txt +394 -0
  22. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +368 -0
  23. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +180 -0
  24. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +180 -0
  25. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +23 -0
  26. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +22 -0
  27. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +11 -0
  28. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +34 -0
  29. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +662 -0
  30. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +27 -0
  31. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +144 -0
  32. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +21 -0
  33. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +180 -0
  34. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +180 -0
  35. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +23 -0
  36. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +22 -0
  37. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +11 -0
  38. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +34 -0
  39. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +662 -0
  40. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +27 -0
  41. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +144 -0
  42. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +21 -0
  43. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +32 -0
  44. paradigma/ppg/glob_functions/jsonlab/loadjson.m +566 -0
  45. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +528 -0
  46. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +33 -0
  47. paradigma/ppg/glob_functions/jsonlab/savejson.m +475 -0
  48. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +504 -0
  49. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +40 -0
  50. paradigma/ppg/glob_functions/sample_prob_final.m +49 -0
  51. paradigma/ppg/glob_functions/synchronization.m +76 -0
  52. paradigma/ppg/glob_functions/tsdf_scan_meta.m +22 -0
  53. paradigma/ppg/hr_functions/Long_TFD_JOT.m +37 -0
  54. paradigma/ppg/hr_functions/PPG_TFD_HR.m +59 -0
  55. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +4 -0
  56. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +23 -0
  57. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +27 -0
  58. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +251 -0
  59. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  60. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +142 -0
  61. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +314 -0
  62. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +123 -0
  63. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +154 -0
  64. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +194 -0
  65. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +200 -0
  66. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +229 -0
  67. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +241 -0
  68. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +157 -0
  69. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +190 -0
  70. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +196 -0
  71. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +199 -0
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +144 -0
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +13 -0
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +79 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +9 -0
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +26 -0
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +25 -0
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +40 -0
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +34 -0
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +29 -0
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +76 -0
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +176 -0
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +11 -0
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +97 -0
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +149 -0
  88. paradigma/ppg/preprocessing/preprocessing_imu.m +15 -0
  89. paradigma/ppg/preprocessing/preprocessing_ppg.m +13 -0
  90. paradigma/ppg_preprocessing.py +313 -0
  91. paradigma/preprocessing_config.py +69 -0
  92. paradigma/quantification.py +58 -0
  93. paradigma/tremor/TremorFeaturesAndClassification.m +345 -0
  94. paradigma/tremor/feat_extraction/DerivativesExtract.m +22 -0
  95. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +72 -0
  96. paradigma/tremor/feat_extraction/MFCCExtract.m +100 -0
  97. paradigma/tremor/feat_extraction/PSDBandPower.m +52 -0
  98. paradigma/tremor/feat_extraction/PSDEst.m +63 -0
  99. paradigma/tremor/feat_extraction/PSDExtrAxis.m +88 -0
  100. paradigma/tremor/feat_extraction/PSDExtrOpt.m +95 -0
  101. paradigma/tremor/preprocessing/InterpData.m +32 -0
  102. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +295 -0
  103. paradigma/util.py +50 -0
  104. paradigma/windowing.py +219 -0
  105. paradigma-0.3.0.dist-info/LICENSE +192 -0
  106. paradigma-0.3.0.dist-info/METADATA +79 -0
  107. paradigma-0.3.0.dist-info/RECORD +108 -0
  108. paradigma/dummy.py +0 -3
  109. paradigma-0.1.5.dist-info/LICENSE +0 -201
  110. paradigma-0.1.5.dist-info/METADATA +0 -18
  111. paradigma-0.1.5.dist-info/RECORD +0 -6
  112. {paradigma-0.1.5.dist-info → paradigma-0.3.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,703 @@
1
+ from typing import List
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.decomposition import PCA
5
+
6
+ from scipy import signal, fft
7
+ from scipy.integrate import cumulative_trapezoid
8
+ from scipy.signal import find_peaks
9
+
10
+ from paradigma.constants import DataColumns
11
+ from paradigma.gait_analysis_config import IMUConfig
12
+
13
+
14
+ def generate_statistics(
15
+ sensor_col: pd.Series,
16
+ statistic: str
17
+ ) -> list:
18
+ """Generate statistics for a single sensor and axis. The function is used with the apply function in pandas.
19
+
20
+ Parameters
21
+ ----------
22
+ sensor_col: pd.Series
23
+ The sensor column to be aggregated (e.g. x-axis of accelerometer)
24
+ statistic: str
25
+ The statistic to be computed [mean, std, max, min]
26
+
27
+ Returns
28
+ -------
29
+ list
30
+ The aggregated statistics
31
+ """
32
+ if statistic == 'mean':
33
+ return [np.mean(x) for x in sensor_col]
34
+ elif statistic == 'std':
35
+ return [np.std(x) for x in sensor_col]
36
+ elif statistic == 'max':
37
+ return [np.max(x) for x in sensor_col]
38
+ elif statistic == 'min':
39
+ return [np.min(x) for x in sensor_col]
40
+ else:
41
+ raise ValueError("Statistic not recognized.")
42
+
43
+
44
+ def generate_std_norm(
45
+ df: pd.DataFrame,
46
+ cols: List[str],
47
+ ) -> pd.Series:
48
+ """Generate the standard deviation of the norm of the accelerometer axes.
49
+
50
+ Parameters
51
+ ----------
52
+ df: pd.DataFrame
53
+ The dataframe containing the accelerometer axes
54
+ cols: List[str]
55
+ The names of the columns containing the accelerometer axes
56
+
57
+ Returns
58
+ -------
59
+ pd.Series
60
+ The standard deviation of the norm of the accelerometer axes
61
+ """
62
+ return df.apply(
63
+ lambda x: np.std(np.sqrt(sum(
64
+ [np.array([y**2 for y in x[col]]) for col in cols]
65
+ ))), axis=1)
66
+
67
+
68
+ def compute_fft(
69
+ values: list,
70
+ window_type: str = 'hann',
71
+ sampling_frequency: int = 100,
72
+ ) -> tuple:
73
+ """Compute the Fast Fourier Transform (FFT) of a signal.
74
+
75
+ Parameters
76
+ ----------
77
+ values: list
78
+ The values of the signal (e.g., accelerometer data) of a single window.
79
+ window_type: str
80
+ The type of window to be used for the FFT (default: 'hann')
81
+ sampling_frequency: int
82
+ The sampling frequency of the signal (default: 100)
83
+
84
+ Returns
85
+ -------
86
+ tuple
87
+ The FFT values and the corresponding frequencies
88
+ """
89
+ w = signal.get_window(window_type, len(values), fftbins=False)
90
+ yf = 2*fft.fft(values*w)[:int(len(values)/2+1)]
91
+ xf = fft.fftfreq(len(values), 1/sampling_frequency)[:int(len(values)/2+1)]
92
+
93
+ return yf, xf
94
+
95
+
96
+ def signal_to_ffts(
97
+ sensor_col: pd.Series,
98
+ window_type: str = 'hann',
99
+ sampling_frequency: int = 100,
100
+ ) -> tuple:
101
+ """Compute the Fast Fourier Transform (FFT) of a signal per window (can probably be combined with compute_fft and simplified).
102
+
103
+ Parameters
104
+ ----------
105
+ sensor_col: pd.Series
106
+ The sensor column to be transformed (e.g. x-axis of accelerometer)
107
+ window_type: str
108
+ The type of window to be used for the FFT (default: 'hann')
109
+ sampling_frequency: int
110
+ The sampling frequency of the signal (default: 100)
111
+
112
+ Returns
113
+ -------
114
+ tuple
115
+ Lists of FFT values and corresponding frequencies which can be concatenated as column to the dataframe
116
+ """
117
+ l_values_total = []
118
+ l_freqs_total = []
119
+ for row in sensor_col:
120
+ l_values, l_freqs = compute_fft(
121
+ values=row,
122
+ window_type=window_type,
123
+ sampling_frequency=sampling_frequency)
124
+ l_values_total.append(l_values)
125
+ l_freqs_total.append(l_freqs)
126
+
127
+ return l_freqs_total, l_values_total
128
+
129
+
130
+ def compute_power_in_bandwidth(
131
+ sensor_col: list,
132
+ fmin: float,
133
+ fmax: float,
134
+ sampling_frequency: int = 100,
135
+ window_type: str = 'hann',
136
+ ) -> float:
137
+ """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
138
+ Probably we want a smarter way of doing this.
139
+
140
+ Computes the power in a specific frequency band for a specified sensor and axis.
141
+
142
+ Parameters
143
+ ----------
144
+ sensor_col: list
145
+ The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe,
146
+ and contains values of individual timestamps composing the window.
147
+ fmin: float
148
+ The lower bound of the frequency band
149
+ fmax: float
150
+ The upper bound of the frequency band
151
+ sampling_frequency: int
152
+ The sampling frequency of the signal (default: 100)
153
+ window_type: str
154
+ The type of window to be used for the FFT (default: 'hann')
155
+
156
+ Returns
157
+ -------
158
+ float
159
+ The power in the specified frequency band
160
+ """
161
+ fxx, pxx = signal.periodogram(sensor_col, fs=sampling_frequency, window=window_type)
162
+ ind_min = np.argmax(fxx > fmin) - 1
163
+ ind_max = np.argmax(fxx > fmax) - 1
164
+ return np.log10(np.trapz(pxx[ind_min:ind_max], fxx[ind_min:ind_max]))
165
+
166
+
167
+ def compute_perc_power(
168
+ sensor_col: list,
169
+ fmin_band: float,
170
+ fmax_band: float,
171
+ fmin_total: float = 0,
172
+ fmax_total: float = 100,
173
+ sampling_frequency: int = 100,
174
+ window_type: str = 'hann'
175
+ ) -> float:
176
+ """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
177
+
178
+ Computes the percentage of power in a specific frequency band for a specified sensor and axis.
179
+
180
+ Parameters
181
+ ----------
182
+ sensor_col: list
183
+ The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe
184
+ fmin_band: float
185
+ The lower bound of the frequency band
186
+ fmax_band: float
187
+ The upper bound of the frequency band
188
+ fmin_total: float
189
+ The lower bound of the frequency spectrum (default: 0)
190
+ fmax_total: float
191
+ The upper bound of the frequency spectrum (default: 100)
192
+ sampling_frequency: int
193
+ The sampling frequency of the signal (default: 100)
194
+ window_type: str
195
+ The type of window to be used for the FFT (default: 'hann')
196
+
197
+ Returns
198
+ -------
199
+ float
200
+ The percentage of power in the specified frequency band
201
+ """
202
+ angle_power_band = compute_power_in_bandwidth(
203
+ sensor_col=sensor_col,
204
+ fmin=fmin_band,
205
+ fmax=fmax_band,
206
+ sampling_frequency=sampling_frequency,
207
+ window_type=window_type
208
+ )
209
+
210
+ angle_power_total = compute_power_in_bandwidth(
211
+ sensor_col=sensor_col,
212
+ fmin=fmin_total,
213
+ fmax=fmax_total,
214
+ sampling_frequency=sampling_frequency,
215
+ window_type=window_type
216
+ )
217
+
218
+ return angle_power_band / angle_power_total
219
+
220
+
221
+ def get_dominant_frequency(
222
+ signal_ffts: list,
223
+ signal_freqs: list,
224
+ fmin: float,
225
+ fmax: float
226
+ ) -> float:
227
+ """Note: signal_ffts and signal_freqs are single cells (which corresponds to a single window) of signal_ffts and signal_freqs, as it is used with apply function.
228
+
229
+ Computes the dominant frequency in a specific frequency band.
230
+
231
+ Parameters
232
+ ----------
233
+ signal_ffts: list
234
+ The FFT values of the signal of a single window
235
+ signal_freqs: list
236
+ The corresponding frequencies of the FFT values
237
+ fmin: int
238
+ The lower bound of the frequency band
239
+ fmax: int
240
+ The upper bound of the frequency band
241
+
242
+ Returns
243
+ -------
244
+ float
245
+ The dominant frequency in the specified frequency band
246
+ """
247
+ valid_indices = np.where((signal_freqs>fmin) & (signal_freqs<fmax))
248
+ signal_freqs_adjusted = signal_freqs[valid_indices]
249
+ signal_ffts_adjusted = signal_ffts[valid_indices]
250
+
251
+ idx = np.argmax(np.abs(signal_ffts_adjusted))
252
+ return np.abs(signal_freqs_adjusted[idx])
253
+
254
+
255
+ def compute_power(
256
+ df: pd.DataFrame,
257
+ fft_cols: list
258
+ ) -> pd.Series:
259
+ """Compute the power of the FFT values.
260
+
261
+ Parameters
262
+ ----------
263
+ df: pd.DataFrame
264
+ The dataframe containing the FFT values
265
+ fft_cols: list
266
+ The names of the columns containing the FFT values
267
+
268
+ Returns
269
+ -------
270
+ pd.Series
271
+ The power of the FFT values
272
+ """
273
+ for col in fft_cols:
274
+ df['{}_power'.format(col)] = df[col].apply(lambda x: np.square(np.abs(x)))
275
+
276
+ return df.apply(lambda x: sum([np.array([y for y in x[col+'_power']]) for col in fft_cols]), axis=1)
277
+
278
+
279
+ def generate_cepstral_coefficients(
280
+ total_power_col: pd.Series,
281
+ window_length_s: int,
282
+ sampling_frequency: int = 100,
283
+ low_frequency: int = 0,
284
+ high_frequency: int = 25,
285
+ n_filters: int = 20,
286
+ n_coefficients: int = 12,
287
+ ) -> pd.DataFrame:
288
+ """Generate cepstral coefficients from the total power of the signal.
289
+
290
+ Parameters
291
+ ----------
292
+ total_power_col: pd.Series
293
+ The total power of the signal, extracted using compute_power
294
+ window_length_s: int
295
+ The number of seconds a window constitutes
296
+ sampling_frequency: int
297
+ The sampling frequency of the data (default: 100)
298
+ low_frequency: int
299
+ The lower bound of the frequency band (default: 0)
300
+ high_frequency: int
301
+ The upper bound of the frequency band (default: 25)
302
+ n_filters: int
303
+ The number of DCT filters (default: 20)
304
+ n_coefficients: int
305
+ The number of coefficients to extract (default: 12)
306
+
307
+ Returns
308
+ -------
309
+ pd.DataFrame
310
+ A dataframe with a single column corresponding to a single cepstral coefficient
311
+ """
312
+ window_length = window_length_s * sampling_frequency
313
+
314
+ # compute filter points
315
+ freqs = np.linspace(low_frequency, high_frequency, num=n_filters+2)
316
+ filter_points = np.floor((window_length + 1) / sampling_frequency * freqs).astype(int)
317
+
318
+ # construct filterbank
319
+ filters = np.zeros((len(filter_points)-2, int(window_length/2+1)))
320
+ for j in range(len(filter_points)-2):
321
+ filters[j, filter_points[j] : filter_points[j+1]] = np.linspace(0, 1, filter_points[j+1] - filter_points[j])
322
+ filters[j, filter_points[j+1] : filter_points[j+2]] = np.linspace(1, 0, filter_points[j+2] - filter_points[j+1])
323
+
324
+ # filter signal
325
+ power_filtered = [np.dot(filters, x) for x in total_power_col]
326
+ log_power_filtered = [10.0 * np.log10(x) for x in power_filtered]
327
+
328
+ # generate cepstral coefficients
329
+ dct_filters = np.empty((n_coefficients, n_filters))
330
+ dct_filters[0, :] = 1.0 / np.sqrt(n_filters)
331
+
332
+ samples = np.arange(1, 2 * n_filters, 2) * np.pi / (2.0 * n_filters)
333
+
334
+ for i in range(1, n_coefficients):
335
+ dct_filters[i, :] = np.cos(i * samples) * np.sqrt(2.0 / n_filters)
336
+
337
+ cepstral_coefs = [np.dot(dct_filters, x) for x in log_power_filtered]
338
+
339
+ return pd.DataFrame(np.vstack(cepstral_coefs), columns=['cc_{}'.format(j+1) for j in range(n_coefficients)])
340
+
341
+
342
+ def pca_transform_gyroscope(
343
+ df: pd.DataFrame,
344
+ y_gyro_colname: str,
345
+ z_gyro_colname: str,
346
+ pred_gait_colname: str,
347
+ ) -> pd.Series:
348
+ """Apply principal component analysis (PCA) on the y-axis and z-axis of the raw gyroscope signal
349
+ to extract the velocity. PCA is applied to the predicted gait timestamps only to maximize the similarity
350
+ to the velocity in the arm swing direction.
351
+
352
+ Parameters
353
+ ----------
354
+ df: pd.DataFrame
355
+ The dataframe containing the gyroscope data
356
+ y_gyro_colname: str
357
+ The column name of the y-axis of the gyroscope
358
+ z_gyro_colname: str
359
+ The column name of the z-axis of the gyroscope
360
+ pred_gait_colname: str
361
+ The column name of the predicted gait boolean
362
+
363
+ Returns
364
+ -------
365
+ pd.Series
366
+ The first principal component corresponding to the angular velocity in the arm swing direction
367
+ """
368
+ pca = PCA(n_components=2, svd_solver='auto', random_state=22)
369
+ pca.fit([(i,j) for i,j in zip(df.loc[df[pred_gait_colname]==1, y_gyro_colname], df.loc[df[pred_gait_colname]==1, z_gyro_colname])])
370
+ yz_gyros = pca.transform([(i,j) for i,j in zip(df[y_gyro_colname], df[z_gyro_colname])])
371
+
372
+ velocity = [x[0] for x in yz_gyros]
373
+
374
+ return pd.Series(velocity)
375
+
376
+
377
+ def compute_angle(
378
+ velocity_col: pd.Series,
379
+ time_col: pd.Series,
380
+ ) -> pd.Series:
381
+ """Apply cumulative trapezoidal integration to extract the angle from the velocity.
382
+
383
+ Parameters
384
+ ----------
385
+ velocity_col: pd.Series
386
+ The angular velocity (gyroscope) column to be integrated
387
+ time_col: pd.Series
388
+ The time column corresponding to the angular velocity
389
+
390
+ Returns
391
+ -------
392
+ pd.Series
393
+ An estimation of the angle extracted from the angular velocity
394
+ """
395
+ angle_col = cumulative_trapezoid(velocity_col, time_col, initial=0)
396
+ return pd.Series([x*-1 if x<0 else x for x in angle_col])
397
+
398
+
399
+ def remove_moving_average_angle(
400
+ angle_col: pd.Series,
401
+ sampling_frequency: int = 100,
402
+ ) -> pd.Series:
403
+ """Remove the moving average from the angle to account for potential drift in the signal.
404
+
405
+ Parameters
406
+ ----------
407
+ angle_col: pd.Series
408
+ The angle column to be processed, obtained using compute_angle
409
+ sampling_frequency: int
410
+ The sampling frequency of the data (default: 100)
411
+
412
+ Returns
413
+ -------
414
+ pd.Series
415
+ The estimated angle without potential drift
416
+ """
417
+ angle_ma = angle_col.rolling(window=int(2*(sampling_frequency*0.5)+1), min_periods=1, center=True, closed='both').mean()
418
+
419
+ return pd.Series(angle_col - angle_ma)
420
+
421
+
422
+ def extract_angle_extremes(
423
+ df: pd.DataFrame,
424
+ angle_colname: str,
425
+ dominant_frequency_colname: str,
426
+ sampling_frequency: int = 100,
427
+ ) -> pd.Series:
428
+ """Extract the peaks of the angle (minima and maxima) from the smoothed angle signal that adhere to a set of specific requirements.
429
+
430
+ Parameters
431
+ ----------
432
+ df: pd.DataFrame
433
+ The dataframe containing the angle signal
434
+ angle_colname: str
435
+ The name of the column containing the smoothed angle signal
436
+ dominant_frequency_colname: str
437
+ The name of the column containing the dominant frequency
438
+ sampling_frequency: int
439
+ The sampling frequency of the data (default: 100)
440
+
441
+ Returns
442
+ -------
443
+ pd.Series
444
+ The extracted angle extremes (peaks)
445
+ """
446
+ # determine peaks
447
+ df['angle_maxima'] = df.apply(lambda x: find_peaks(x[angle_colname], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
448
+ df['angle_minima'] = df.apply(lambda x: find_peaks([-x for x in x[angle_colname]], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
449
+
450
+ df['angle_new_minima'] = df['angle_minima'].copy()
451
+ df['angle_new_maxima'] = df['angle_maxima'].copy()
452
+
453
+ for index, _ in df.iterrows():
454
+ i_pks = 0 # iterable to keep track of consecutive min-min and max-max versus min-max
455
+ n_min = df.loc[index, 'angle_new_minima'].size # number of minima in window
456
+ n_max = df.loc[index, 'angle_new_maxima'].size # number of maxima in window
457
+
458
+ if n_min > 0 and n_max > 0:
459
+ # if the first minimum occurs before the first maximum, start with the minimum
460
+ if df.loc[index, 'angle_new_maxima'][0] > df.loc[index, 'angle_new_minima'][0]:
461
+ # only continue if there are enough minima and maxima to perform operations
462
+ while i_pks < df.loc[index, 'angle_new_minima'].size - 1 and i_pks < df.loc[index, 'angle_new_maxima'].size:
463
+
464
+ # if the next minimum comes before the next maximum, we have two minima in a row, and should keep the larger one
465
+ if df.loc[index, 'angle_new_minima'][i_pks+1] < df.loc[index, 'angle_new_maxima'][i_pks]:
466
+ # if the next minimum is smaller than the current minimum, keep the next minimum and discard the current minimum
467
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks+1]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]]:
468
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
469
+ # otherwise, keep the current minimum and discard the next minimum
470
+ else:
471
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks+1)
472
+ i_pks -= 1
473
+
474
+ # if the current maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
475
+ if i_pks >= 0 and df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks]:
476
+ # if the current maximum is smaller than the previous maximum, keep the previous maximum and discard the current maximum
477
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks-1]]:
478
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
479
+ # otherwise, keep the current maximum and discard the previous maximum
480
+ else:
481
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks-1)
482
+ i_pks -= 1
483
+ i_pks += 1
484
+
485
+ # or if the first maximum occurs before the first minimum, start with the maximum
486
+ elif df.loc[index, 'angle_new_maxima'][0] < df.loc[index, 'angle_new_minima'][0]:
487
+ # only continue if there are enough minima and maxima to perform operations
488
+ while i_pks < df.loc[index, 'angle_new_minima'].size and i_pks < df.loc[index, 'angle_new_maxima'].size-1:
489
+ # if the next maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
490
+ if df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks+1]:
491
+ # if the next maximum is smaller than the current maximum, keep the next maximum and discard the current maximum
492
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks+1]] > df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]]:
493
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
494
+ # otherwise, keep the current maximum and discard the next maximum
495
+ else:
496
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks+1)
497
+ i_pks -= 1
498
+
499
+ # if the current minimum comes before the current maximum, we have two minima in a row, and should keep the larger one
500
+ if i_pks > 0 and df.loc[index, 'angle_new_minima'][i_pks] < df.loc[index, 'angle_new_maxima'][i_pks]:
501
+ # if the current minimum is smaller than the previous minimum, keep the previous minimum and discard the current minimum
502
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks-1]]:
503
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks-1)
504
+ # otherwise, keep the current minimum and discard the previous minimum
505
+ else:
506
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
507
+ i_pks -= 1
508
+ i_pks += 1
509
+
510
+ # for some peculiar reason, if a single item remains in the row for angle_new_minima or
511
+ # angle_new_maxima, it could be either a scalar or a vector.
512
+ for col in ['angle_new_minima', 'angle_new_maxima']:
513
+ df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col] = df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col].apply(lambda x: [x])
514
+
515
+ df['angle_extrema_values'] = df.apply(lambda x: [x[angle_colname][i] for i in np.concatenate([x['angle_new_minima'], x['angle_new_maxima']])], axis=1)
516
+
517
+ return
518
+
519
+
520
+ def extract_range_of_motion(
521
+ angle_extrema_values_col: pd.Series,
522
+ ) -> pd.Series:
523
+ """Extract the range of motion from the angle extrema values.
524
+
525
+ Parameters
526
+ ----------
527
+ angle_extrema_values_col: pd.Series
528
+ The column containing the angle extrema values
529
+
530
+ Returns
531
+ -------
532
+ pd.Series
533
+ The range of motion
534
+ """
535
+ angle_amplitudes = np.empty((len(angle_extrema_values_col), 0)).tolist()
536
+
537
+ # for each window
538
+ for i, extrema_values in enumerate(angle_extrema_values_col):
539
+ l_amplitudes = []
540
+ # for each extremum contained in the window
541
+ for j, value in enumerate(extrema_values):
542
+ # if the extremum is not the last one in the list of extrema
543
+ if j < len(extrema_values)-1:
544
+ # if the current extremum is a maximum and the next one is a minimum, or vice versa
545
+ if (value > 0 and extrema_values[j+1] < 0) or (value < 0 and extrema_values[j+1] > 0):
546
+ # compute the amplitude as the sum of the absolute values of the two extrema
547
+ l_amplitudes.append(np.sum(np.abs(value) + np.abs(extrema_values[j+1])))
548
+ # or if the extrema are both positive or both negative, and the current extremum is closer to 0
549
+ elif np.abs(value) < np.abs(extrema_values[j+1]):
550
+ # compute the amplitude as the difference between the two extrema
551
+ l_amplitudes.append(np.subtract(np.abs(extrema_values[j+1]), np.abs(value)))
552
+ # or if the extrema are both positive and negative, and the current extremum is further away from 0
553
+ else:
554
+ # compute the amplitude as the difference between the two extrema
555
+ l_amplitudes.append(np.subtract(np.abs(value), np.abs(extrema_values[j+1])))
556
+
557
+ angle_amplitudes[i].append([x for x in l_amplitudes])
558
+
559
+ return [y for item in angle_amplitudes for y in item]
560
+
561
+
562
+ def extract_peak_angular_velocity(
563
+ df: pd.DataFrame,
564
+ velocity_colname: str,
565
+ angle_minima_colname: str,
566
+ angle_maxima_colname: str,
567
+ ) -> pd.DataFrame:
568
+ """Extract the forward and backward peak angular velocity from the angular velocity.
569
+
570
+ Parameters
571
+ ----------
572
+ df: pd.DataFrame
573
+ The dataframe containing the angular velocity
574
+ velocity_colname: str
575
+ The column name of the angular velocity
576
+ angle_minima_colname: str
577
+ The column name of the column containing the angle minima
578
+ angle_maxima_colname: str
579
+ The column name of the column containing the angle maxima
580
+
581
+ Returns
582
+ -------
583
+ pd.DataFrame
584
+ The dataframe with the forward and backward peak angular velocity
585
+ """
586
+ df['forward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
587
+ df['backward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
588
+
589
+ # for each window
590
+ for index, row in df.iterrows():
591
+ # the peak angular velocity can only be computed if there is at least one minimum and one maximum in the window
592
+ if len(row[angle_minima_colname]) > 0 and len(row[angle_maxima_colname]) > 0:
593
+ # combine the minima and maxima
594
+ l_extrema_indices = np.sort(np.concatenate((row[angle_minima_colname], row[angle_maxima_colname])))
595
+ # for each peak
596
+ for j, peak_index in enumerate(l_extrema_indices):
597
+ # if the peak is a maximum and there is another peak after it
598
+ if peak_index in row[angle_maxima_colname] and j < len(l_extrema_indices) - 1:
599
+ # compute the forward peak angular velocity, defined by the maximum negative angular velocity between the two peaks
600
+ df.loc[index, 'forward_peak_ang_vel'].append(np.abs(min(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
601
+ # if the peak is a minimum and there is another peak after it
602
+ elif peak_index in row[angle_minima_colname] and j < len(l_extrema_indices) - 1:
603
+ # compute the backward peak angular velocity, defined by the maximum positive angular velocity between the two peaks
604
+ df.loc[index, 'backward_peak_ang_vel'].append(np.abs(max(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
605
+
606
+ return
607
+
608
+
609
+ def extract_temporal_domain_features(config: IMUConfig, df_windowed:pd.DataFrame, l_gravity_stats=['mean', 'std']) -> pd.DataFrame:
610
+ """
611
+ Compute temporal domain features for the accelerometer signal. The features are added to the dataframe. Therefore the original dataframe is modified, and the modified dataframe is returned.
612
+
613
+ Parameters
614
+ ----------
615
+
616
+ config: GaitFeatureExtractionConfig
617
+ The configuration object containing the parameters for the feature extraction
618
+
619
+ df_windowed: pd.DataFrame
620
+ The dataframe containing the windowed accelerometer signal
621
+
622
+ l_gravity_stats: list, optional
623
+ The statistics to be computed for the gravity component of the accelerometer signal (default: ['mean', 'std'])
624
+
625
+ Returns
626
+ -------
627
+ pd.DataFrame
628
+ The dataframe with the added temporal domain features.
629
+ """
630
+
631
+ # compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
632
+ for col in config.l_gravity_cols:
633
+ for stat in l_gravity_stats:
634
+ df_windowed[f'{col}_{stat}'] = generate_statistics(
635
+ sensor_col=df_windowed[col],
636
+ statistic=stat
637
+ )
638
+
639
+ # compute the standard deviation of the Euclidean norm of the three axes
640
+ df_windowed['std_norm_acc'] = generate_std_norm(
641
+ df=df_windowed,
642
+ cols=config.l_accelerometer_cols
643
+ )
644
+
645
+ return df_windowed
646
+
647
+
648
+ def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):
649
+
650
+ for col in l_sensor_colnames:
651
+
652
+ # transform the temporal signal to the spectral domain using the fast fourier transform
653
+ df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
654
+ sensor_col=df_windowed[col],
655
+ window_type=config.window_type,
656
+ sampling_frequency=config.sampling_frequency
657
+ )
658
+
659
+ # compute the power in distinct frequency bandwidths
660
+ for bandwidth, frequencies in config.d_frequency_bandwidths.items():
661
+ df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
662
+ sensor_col=x[col],
663
+ fmin=frequencies[0],
664
+ fmax=frequencies[1],
665
+ sampling_frequency=config.sampling_frequency,
666
+ window_type=config.window_type,
667
+ ), axis=1
668
+ )
669
+
670
+ # compute the dominant frequency, i.e., the frequency with the highest power
671
+ df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
672
+ signal_ffts=x[col+'_fft'],
673
+ signal_freqs=x[col+'_freqs'],
674
+ fmin=config.spectrum_low_frequency,
675
+ fmax=config.spectrum_high_frequency
676
+ ), axis=1
677
+ )
678
+
679
+ # compute the power summed over the individual axes to obtain the total power per frequency bandwidth
680
+ for bandwidth in config.d_frequency_bandwidths.keys():
681
+ df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)
682
+
683
+ # compute the power summed over the individual frequency bandwidths to obtain the total power
684
+ df_windowed['total_power'] = compute_power(
685
+ df=df_windowed,
686
+ fft_cols=[f'{col}_fft' for col in l_sensor_colnames])
687
+
688
+ # compute the cepstral coefficients of the total power signal
689
+ cc_cols = generate_cepstral_coefficients(
690
+ total_power_col=df_windowed['total_power'],
691
+ window_length_s=config.window_length_s,
692
+ sampling_frequency=config.sampling_frequency,
693
+ low_frequency=config.spectrum_low_frequency,
694
+ high_frequency=config.spectrum_high_frequency,
695
+ n_filters=config.n_dct_filters_cc,
696
+ n_coefficients=config.n_coefficients_cc
697
+ )
698
+
699
+ df_windowed = pd.concat([df_windowed, cc_cols], axis=1)
700
+
701
+ df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})
702
+
703
+ return df_windowed