paradigma 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. paradigma/__init__.py +1 -3
  2. paradigma/constants.py +35 -0
  3. paradigma/feature_extraction.py +678 -0
  4. paradigma/gait_analysis.py +413 -0
  5. paradigma/gait_analysis_config.py +244 -0
  6. paradigma/heart_rate_analysis.py +127 -0
  7. paradigma/heart_rate_analysis_config.py +9 -0
  8. paradigma/heart_rate_util.py +173 -0
  9. paradigma/imu_preprocessing.py +229 -0
  10. paradigma/ppg/classifier/LR_PPG_quality.pkl +0 -0
  11. paradigma/ppg/classifier/LR_model.mat +0 -0
  12. paradigma/ppg/feat_extraction/acc_feature.m +20 -0
  13. paradigma/ppg/feat_extraction/peakdet.m +64 -0
  14. paradigma/ppg/feat_extraction/ppg_features.m +53 -0
  15. paradigma/ppg/glob_functions/extract_hr_segments.m +37 -0
  16. paradigma/ppg/glob_functions/extract_overlapping_segments.m +23 -0
  17. paradigma/ppg/glob_functions/jsonlab/AUTHORS.txt +41 -0
  18. paradigma/ppg/glob_functions/jsonlab/ChangeLog.txt +74 -0
  19. paradigma/ppg/glob_functions/jsonlab/LICENSE_BSD.txt +25 -0
  20. paradigma/ppg/glob_functions/jsonlab/LICENSE_GPLv3.txt +699 -0
  21. paradigma/ppg/glob_functions/jsonlab/README.txt +394 -0
  22. paradigma/ppg/glob_functions/jsonlab/examples/.svn/entries +368 -0
  23. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_jsonlab_basic.m.svn-base +180 -0
  24. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/demo_ubjson_basic.m.svn-base +180 -0
  25. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example1.json.svn-base +23 -0
  26. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example2.json.svn-base +22 -0
  27. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example3.json.svn-base +11 -0
  28. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/example4.json.svn-base +34 -0
  29. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_basictest.matlab.svn-base +662 -0
  30. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.m.svn-base +27 -0
  31. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_selftest.matlab.svn-base +144 -0
  32. paradigma/ppg/glob_functions/jsonlab/examples/.svn/text-base/jsonlab_speedtest.m.svn-base +21 -0
  33. paradigma/ppg/glob_functions/jsonlab/examples/demo_jsonlab_basic.m +180 -0
  34. paradigma/ppg/glob_functions/jsonlab/examples/demo_ubjson_basic.m +180 -0
  35. paradigma/ppg/glob_functions/jsonlab/examples/example1.json +23 -0
  36. paradigma/ppg/glob_functions/jsonlab/examples/example2.json +22 -0
  37. paradigma/ppg/glob_functions/jsonlab/examples/example3.json +11 -0
  38. paradigma/ppg/glob_functions/jsonlab/examples/example4.json +34 -0
  39. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_basictest.matlab +662 -0
  40. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.m +27 -0
  41. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_selftest.matlab +144 -0
  42. paradigma/ppg/glob_functions/jsonlab/examples/jsonlab_speedtest.m +21 -0
  43. paradigma/ppg/glob_functions/jsonlab/jsonopt.m +32 -0
  44. paradigma/ppg/glob_functions/jsonlab/loadjson.m +566 -0
  45. paradigma/ppg/glob_functions/jsonlab/loadubjson.m +528 -0
  46. paradigma/ppg/glob_functions/jsonlab/mergestruct.m +33 -0
  47. paradigma/ppg/glob_functions/jsonlab/savejson.m +475 -0
  48. paradigma/ppg/glob_functions/jsonlab/saveubjson.m +504 -0
  49. paradigma/ppg/glob_functions/jsonlab/varargin2struct.m +40 -0
  50. paradigma/ppg/glob_functions/sample_prob_final.m +49 -0
  51. paradigma/ppg/glob_functions/synchronization.m +76 -0
  52. paradigma/ppg/glob_functions/tsdf_scan_meta.m +22 -0
  53. paradigma/ppg/hr_functions/Long_TFD_JOT.m +37 -0
  54. paradigma/ppg/hr_functions/PPG_TFD_HR.m +59 -0
  55. paradigma/ppg/hr_functions/TFD toolbox JOT/.gitignore +4 -0
  56. paradigma/ppg/hr_functions/TFD toolbox JOT/CHANGELOG.md +23 -0
  57. paradigma/ppg/hr_functions/TFD toolbox JOT/LICENCE.md +27 -0
  58. paradigma/ppg/hr_functions/TFD toolbox JOT/README.md +251 -0
  59. paradigma/ppg/hr_functions/TFD toolbox JOT/README.pdf +0 -0
  60. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_kern.m +142 -0
  61. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_Doppler_lag_kern.m +314 -0
  62. paradigma/ppg/hr_functions/TFD toolbox JOT/common/gen_lag_kern.m +123 -0
  63. paradigma/ppg/hr_functions/TFD toolbox JOT/dec_tfd.m +154 -0
  64. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_di_gdtfd.m +194 -0
  65. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_li_gdtfd.m +200 -0
  66. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_nonsep_gdtfd.m +229 -0
  67. paradigma/ppg/hr_functions/TFD toolbox JOT/decimated_TFDs/dec_sep_gdtfd.m +241 -0
  68. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/di_gdtfd.m +157 -0
  69. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/li_gdtfd.m +190 -0
  70. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/nonsep_gdtfd.m +196 -0
  71. paradigma/ppg/hr_functions/TFD toolbox JOT/full_TFDs/sep_gdtfd.m +199 -0
  72. paradigma/ppg/hr_functions/TFD toolbox JOT/full_tfd.m +144 -0
  73. paradigma/ppg/hr_functions/TFD toolbox JOT/load_curdir.m +13 -0
  74. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/decimated_TFDs_examples.png +0 -0
  75. paradigma/ppg/hr_functions/TFD toolbox JOT/pics/full_TFDs_examples.png +0 -0
  76. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/check_dec_params_seq.m +79 -0
  77. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispEE.m +9 -0
  78. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/dispVars.m +26 -0
  79. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/disp_bytes.m +25 -0
  80. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_full.m +40 -0
  81. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/fold_vector_half.m +34 -0
  82. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/gen_LFM.m +29 -0
  83. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_analytic_signal.m +76 -0
  84. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/get_window.m +176 -0
  85. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/isreal_fn.m +11 -0
  86. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/padWin.m +97 -0
  87. paradigma/ppg/hr_functions/TFD toolbox JOT/utils/vtfd.m +149 -0
  88. paradigma/ppg/preprocessing/preprocessing_imu.m +15 -0
  89. paradigma/ppg/preprocessing/preprocessing_ppg.m +13 -0
  90. paradigma/ppg_preprocessing.py +313 -0
  91. paradigma/preprocessing_config.py +64 -0
  92. paradigma/quantification.py +58 -0
  93. paradigma/tremor/TremorFeaturesAndClassification.m +345 -0
  94. paradigma/tremor/feat_extraction/DerivativesExtract.m +22 -0
  95. paradigma/tremor/feat_extraction/ExtractBandSignalsRMS.m +72 -0
  96. paradigma/tremor/feat_extraction/MFCCExtract.m +100 -0
  97. paradigma/tremor/feat_extraction/PSDBandPower.m +52 -0
  98. paradigma/tremor/feat_extraction/PSDEst.m +63 -0
  99. paradigma/tremor/feat_extraction/PSDExtrAxis.m +88 -0
  100. paradigma/tremor/feat_extraction/PSDExtrOpt.m +95 -0
  101. paradigma/tremor/preprocessing/InterpData.m +32 -0
  102. paradigma/tremor/weekly_aggregates/WeeklyAggregates.m +295 -0
  103. paradigma/util.py +50 -0
  104. paradigma/windowing.py +217 -0
  105. paradigma-0.2.0.dist-info/LICENSE +192 -0
  106. paradigma-0.2.0.dist-info/METADATA +58 -0
  107. paradigma-0.2.0.dist-info/RECORD +108 -0
  108. paradigma/dummy.py +0 -3
  109. paradigma-0.1.5.dist-info/LICENSE +0 -201
  110. paradigma-0.1.5.dist-info/METADATA +0 -18
  111. paradigma-0.1.5.dist-info/RECORD +0 -6
  112. {paradigma-0.1.5.dist-info → paradigma-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,678 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.decomposition import PCA
4
+
5
+ from scipy import signal, fft
6
+ from scipy.integrate import cumulative_trapezoid
7
+ from scipy.signal import find_peaks
8
+
9
+
10
+ def generate_statistics(
11
+ sensor_col: pd.Series,
12
+ statistic: str
13
+ ) -> list:
14
+ """Generate statistics for a single sensor and axis. The function is used with the apply function in pandas.
15
+
16
+ Parameters
17
+ ----------
18
+ sensor_col: pd.Series
19
+ The sensor column to be aggregated (e.g. x-axis of accelerometer)
20
+ statistic: str
21
+ The statistic to be computed [mean, std, max, min]
22
+
23
+ Returns
24
+ -------
25
+ list
26
+ The aggregated statistics
27
+ """
28
+ if statistic == 'mean':
29
+ return [np.mean(x) for x in sensor_col]
30
+ elif statistic == 'std':
31
+ return [np.std(x) for x in sensor_col]
32
+ elif statistic == 'max':
33
+ return [np.max(x) for x in sensor_col]
34
+ elif statistic == 'min':
35
+ return [np.min(x) for x in sensor_col]
36
+ else:
37
+ raise ValueError("Statistic not recognized.")
38
+
39
+
40
+ def generate_std_norm(
41
+ df: pd.DataFrame,
42
+ cols: list,
43
+ ) -> pd.Series:
44
+ """Generate the standard deviation of the norm of the accelerometer axes.
45
+
46
+ Parameters
47
+ ----------
48
+ df: pd.DataFrame
49
+ The dataframe containing the accelerometer axes
50
+ cols: list
51
+ The names of the columns containing the accelerometer axes
52
+
53
+ Returns
54
+ -------
55
+ pd.Series
56
+ The standard deviation of the norm of the accelerometer axes
57
+ """
58
+ return df.apply(
59
+ lambda x: np.std(np.sqrt(sum(
60
+ [np.array([y**2 for y in x[col]]) for col in cols]
61
+ ))), axis=1)
62
+
63
+
64
+ def compute_fft(
65
+ values: list,
66
+ window_type: str = 'hann',
67
+ sampling_frequency: int = 100,
68
+ ) -> tuple:
69
+ """Compute the Fast Fourier Transform (FFT) of a signal.
70
+
71
+ Parameters
72
+ ----------
73
+ values: list
74
+ The values of the signal (e.g., accelerometer data) of a single window.
75
+ window_type: str
76
+ The type of window to be used for the FFT (default: 'hann')
77
+ sampling_frequency: int
78
+ The sampling frequency of the signal (default: 100)
79
+
80
+ Returns
81
+ -------
82
+ tuple
83
+ The FFT values and the corresponding frequencies
84
+ """
85
+ w = signal.get_window(window_type, len(values), fftbins=False)
86
+ yf = 2*fft.fft(values*w)[:int(len(values)/2+1)]
87
+ xf = fft.fftfreq(len(values), 1/sampling_frequency)[:int(len(values)/2+1)]
88
+
89
+ return yf, xf
90
+
91
+
92
+ def signal_to_ffts(
93
+ sensor_col: pd.Series,
94
+ window_type: str = 'hann',
95
+ sampling_frequency: int = 100,
96
+ ) -> tuple:
97
+ """Compute the Fast Fourier Transform (FFT) of a signal per window (can probably be combined with compute_fft and simplified).
98
+
99
+ Parameters
100
+ ----------
101
+ sensor_col: pd.Series
102
+ The sensor column to be transformed (e.g. x-axis of accelerometer)
103
+ window_type: str
104
+ The type of window to be used for the FFT (default: 'hann')
105
+ sampling_frequency: int
106
+ The sampling frequency of the signal (default: 100)
107
+
108
+ Returns
109
+ -------
110
+ tuple
111
+ Lists of FFT values and corresponding frequencies which can be concatenated as column to the dataframe
112
+ """
113
+ l_values_total = []
114
+ l_freqs_total = []
115
+ for row in sensor_col:
116
+ l_values, l_freqs = compute_fft(
117
+ values=row,
118
+ window_type=window_type,
119
+ sampling_frequency=sampling_frequency)
120
+ l_values_total.append(l_values)
121
+ l_freqs_total.append(l_freqs)
122
+
123
+ return l_freqs_total, l_values_total
124
+
125
+
126
+ def compute_power_in_bandwidth(
127
+ sensor_col: list,
128
+ fmin: int,
129
+ fmax: int,
130
+ sampling_frequency: int = 100,
131
+ window_type: str = 'hann',
132
+ ) -> float:
133
+ """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
134
+ Probably we want a smarter way of doing this.
135
+
136
+ Computes the power in a specific frequency band for a specified sensor and axis.
137
+
138
+ Parameters
139
+ ----------
140
+ sensor_col: list
141
+ The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe,
142
+ and contains values of individual timestamps composing the window.
143
+ fmin: int
144
+ The lower bound of the frequency band
145
+ fmax: int
146
+ The upper bound of the frequency band
147
+ sampling_frequency: int
148
+ The sampling frequency of the signal (default: 100)
149
+ window_type: str
150
+ The type of window to be used for the FFT (default: 'hann')
151
+
152
+ Returns
153
+ -------
154
+ float
155
+ The power in the specified frequency band
156
+ """
157
+ fxx, pxx = signal.periodogram(sensor_col, fs=sampling_frequency, window=window_type)
158
+ ind_min = np.argmax(fxx > fmin) - 1
159
+ ind_max = np.argmax(fxx > fmax) - 1
160
+ return np.log10(np.trapz(pxx[ind_min:ind_max], fxx[ind_min:ind_max]))
161
+
162
+
163
+ def compute_perc_power(
164
+ sensor_col: list,
165
+ fmin_band: int,
166
+ fmax_band: int,
167
+ fmin_total: int = 0,
168
+ fmax_total: int = 100,
169
+ sampling_frequency: int = 100,
170
+ window_type: str = 'hann'
171
+ ) -> float:
172
+ """Note: sensor_col is a single cell (which corresponds to a single window) of sensor_col, as it is used with apply function.
173
+
174
+ Computes the percentage of power in a specific frequency band for a specified sensor and axis.
175
+
176
+ Parameters
177
+ ----------
178
+ sensor_col: list
179
+ The sensor column to be transformed (e.g. x-axis of accelerometer). This corresponds to a single window, which is a single row of the dataframe
180
+ fmin_band: int
181
+ The lower bound of the frequency band
182
+ fmax_band: int
183
+ The upper bound of the frequency band
184
+ fmin_total: int
185
+ The lower bound of the frequency spectrum (default: 0)
186
+ fmax_total: int
187
+ The upper bound of the frequency spectrum (default: 100)
188
+ sampling_frequency: int
189
+ The sampling frequency of the signal (default: 100)
190
+ window_type: str
191
+ The type of window to be used for the FFT (default: 'hann')
192
+
193
+ Returns
194
+ -------
195
+ float
196
+ The percentage of power in the specified frequency band
197
+ """
198
+ angle_power_band = compute_power_in_bandwidth(
199
+ sensor_col=sensor_col,
200
+ fmin=fmin_band,
201
+ fmax=fmax_band,
202
+ sampling_frequency=sampling_frequency,
203
+ window_type=window_type
204
+ )
205
+
206
+ angle_power_total = compute_power_in_bandwidth(
207
+ sensor_col=sensor_col,
208
+ fmin=fmin_total,
209
+ fmax=fmax_total,
210
+ sampling_frequency=sampling_frequency,
211
+ window_type=window_type
212
+ )
213
+
214
+ return angle_power_band / angle_power_total
215
+
216
+
217
+ def get_dominant_frequency(
218
+ signal_ffts: list,
219
+ signal_freqs: list,
220
+ fmin: int,
221
+ fmax: int
222
+ ) -> float:
223
+ """Note: signal_ffts and signal_freqs are single cells (which corresponds to a single window) of signal_ffts and signal_freqs, as it is used with apply function.
224
+
225
+ Computes the dominant frequency in a specific frequency band.
226
+
227
+ Parameters
228
+ ----------
229
+ signal_ffts: list
230
+ The FFT values of the signal of a single window
231
+ signal_freqs: list
232
+ The corresponding frequencies of the FFT values
233
+ fmin: int
234
+ The lower bound of the frequency band
235
+ fmax: int
236
+ The upper bound of the frequency band
237
+
238
+ Returns
239
+ -------
240
+ float
241
+ The dominant frequency in the specified frequency band
242
+ """
243
+ valid_indices = np.where((signal_freqs>fmin) & (signal_freqs<fmax))
244
+ signal_freqs_adjusted = signal_freqs[valid_indices]
245
+ signal_ffts_adjusted = signal_ffts[valid_indices]
246
+
247
+ idx = np.argmax(np.abs(signal_ffts_adjusted))
248
+ return np.abs(signal_freqs_adjusted[idx])
249
+
250
+
251
+ def compute_power(
252
+ df: pd.DataFrame,
253
+ fft_cols: list
254
+ ) -> pd.Series:
255
+ """Compute the power of the FFT values.
256
+
257
+ Parameters
258
+ ----------
259
+ df: pd.DataFrame
260
+ The dataframe containing the FFT values
261
+ fft_cols: list
262
+ The names of the columns containing the FFT values
263
+
264
+ Returns
265
+ -------
266
+ pd.Series
267
+ The power of the FFT values
268
+ """
269
+ for col in fft_cols:
270
+ df['{}_power'.format(col)] = df[col].apply(lambda x: np.square(np.abs(x)))
271
+
272
+ return df.apply(lambda x: sum([np.array([y for y in x[col+'_power']]) for col in fft_cols]), axis=1)
273
+
274
+
275
+ def generate_cepstral_coefficients(
276
+ total_power_col: pd.Series,
277
+ window_length_s: int,
278
+ sampling_frequency: int = 100,
279
+ low_frequency: int = 0,
280
+ high_frequency: int = 25,
281
+ n_filters: int = 20,
282
+ n_coefficients: int = 12,
283
+ ) -> pd.DataFrame:
284
+ """Generate cepstral coefficients from the total power of the signal.
285
+
286
+ Parameters
287
+ ----------
288
+ total_power_col: pd.Series
289
+ The total power of the signal, extracted using compute_power
290
+ window_length_s: int
291
+ The number of seconds a window constitutes
292
+ sampling_frequency: int
293
+ The sampling frequency of the data (default: 100)
294
+ low_frequency: int
295
+ The lower bound of the frequency band (default: 0)
296
+ high_frequency: int
297
+ The upper bound of the frequency band (default: 25)
298
+ n_filters: int
299
+ The number of DCT filters (default: 20)
300
+ n_coefficients: int
301
+ The number of coefficients to extract (default: 12)
302
+
303
+ Returns
304
+ -------
305
+ pd.DataFrame
306
+ A dataframe with a single column corresponding to a single cepstral coefficient
307
+ """
308
+ window_length = window_length_s * sampling_frequency
309
+
310
+ # compute filter points
311
+ freqs = np.linspace(low_frequency, high_frequency, num=n_filters+2)
312
+ filter_points = np.floor((window_length + 1) / sampling_frequency * freqs).astype(int)
313
+
314
+ # construct filterbank
315
+ filters = np.zeros((len(filter_points)-2, int(window_length/2+1)))
316
+ for j in range(len(filter_points)-2):
317
+ filters[j, filter_points[j] : filter_points[j+1]] = np.linspace(0, 1, filter_points[j+1] - filter_points[j])
318
+ filters[j, filter_points[j+1] : filter_points[j+2]] = np.linspace(1, 0, filter_points[j+2] - filter_points[j+1])
319
+
320
+ # filter signal
321
+ power_filtered = [np.dot(filters, x) for x in total_power_col]
322
+ log_power_filtered = [10.0 * np.log10(x) for x in power_filtered]
323
+
324
+ # generate cepstral coefficients
325
+ dct_filters = np.empty((n_coefficients, n_filters))
326
+ dct_filters[0, :] = 1.0 / np.sqrt(n_filters)
327
+
328
+ samples = np.arange(1, 2 * n_filters, 2) * np.pi / (2.0 * n_filters)
329
+
330
+ for i in range(1, n_coefficients):
331
+ dct_filters[i, :] = np.cos(i * samples) * np.sqrt(2.0 / n_filters)
332
+
333
+ cepstral_coefs = [np.dot(dct_filters, x) for x in log_power_filtered]
334
+
335
+ return pd.DataFrame(np.vstack(cepstral_coefs), columns=['cc_{}'.format(j+1) for j in range(n_coefficients)])
336
+
337
+
338
+ def pca_transform_gyroscope(
339
+ df: pd.DataFrame,
340
+ y_gyro_colname: str,
341
+ z_gyro_colname: str,
342
+ pred_gait_colname: str,
343
+ ) -> pd.Series:
344
+ """Apply principal component analysis (PCA) on the y-axis and z-axis of the raw gyroscope signal
345
+ to extract the velocity. PCA is applied to the predicted gait timestamps only to maximize the similarity
346
+ to the velocity in the arm swing direction.
347
+
348
+ Parameters
349
+ ----------
350
+ df: pd.DataFrame
351
+ The dataframe containing the gyroscope data
352
+ y_gyro_colname: str
353
+ The column name of the y-axis of the gyroscope
354
+ z_gyro_colname: str
355
+ The column name of the z-axis of the gyroscope
356
+ pred_gait_colname: str
357
+ The column name of the predicted gait boolean
358
+
359
+ Returns
360
+ -------
361
+ pd.Series
362
+ The first principal component corresponding to the angular velocity in the arm swing direction
363
+ """
364
+ pca = PCA(n_components=2, svd_solver='auto', random_state=22)
365
+ pca.fit([(i,j) for i,j in zip(df.loc[df[pred_gait_colname]==1, y_gyro_colname], df.loc[df[pred_gait_colname]==1, z_gyro_colname])])
366
+ yz_gyros = pca.transform([(i,j) for i,j in zip(df[y_gyro_colname], df[z_gyro_colname])])
367
+
368
+ velocity = [x[0] for x in yz_gyros]
369
+
370
+ return pd.Series(velocity)
371
+
372
+
373
+ def compute_angle(
374
+ velocity_col: pd.Series,
375
+ time_col: pd.Series,
376
+ ) -> pd.Series:
377
+ """Apply cumulative trapezoidal integration to extract the angle from the velocity.
378
+
379
+ Parameters
380
+ ----------
381
+ velocity_col: pd.Series
382
+ The angular velocity (gyroscope) column to be integrated
383
+ time_col: pd.Series
384
+ The time column corresponding to the angular velocity
385
+
386
+ Returns
387
+ -------
388
+ pd.Series
389
+ An estimation of the angle extracted from the angular velocity
390
+ """
391
+ angle_col = cumulative_trapezoid(velocity_col, time_col, initial=0)
392
+ return pd.Series([x*-1 if x<0 else x for x in angle_col])
393
+
394
+
395
+ def remove_moving_average_angle(
396
+ angle_col: pd.Series,
397
+ sampling_frequency: int = 100,
398
+ ) -> pd.Series:
399
+ """Remove the moving average from the angle to account for potential drift in the signal.
400
+
401
+ Parameters
402
+ ----------
403
+ angle_col: pd.Series
404
+ The angle column to be processed, obtained using compute_angle
405
+ sampling_frequency: int
406
+ The sampling frequency of the data (default: 100)
407
+
408
+ Returns
409
+ -------
410
+ pd.Series
411
+ The estimated angle without potential drift
412
+ """
413
+ angle_ma = angle_col.rolling(window=int(2*(sampling_frequency*0.5)+1), min_periods=1, center=True, closed='both').mean()
414
+
415
+ return pd.Series(angle_col - angle_ma)
416
+
417
+
418
+ def extract_angle_extremes(
419
+ df: pd.DataFrame,
420
+ angle_colname: str,
421
+ dominant_frequency_colname: str,
422
+ sampling_frequency: int = 100,
423
+ ) -> pd.Series:
424
+ """Extract the peaks of the angle (minima and maxima) from the smoothed angle signal that adhere to a set of specific requirements.
425
+
426
+ Parameters
427
+ ----------
428
+ df: pd.DataFrame
429
+ The dataframe containing the angle signal
430
+ angle_colname: str
431
+ The name of the column containing the smoothed angle signal
432
+ dominant_frequency_colname: str
433
+ The name of the column containing the dominant frequency
434
+ sampling_frequency: int
435
+ The sampling frequency of the data (default: 100)
436
+
437
+ Returns
438
+ -------
439
+ pd.Series
440
+ The extracted angle extremes (peaks)
441
+ """
442
+ # determine peaks
443
+ df['angle_maxima'] = df.apply(lambda x: find_peaks(x[angle_colname], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
444
+ df['angle_minima'] = df.apply(lambda x: find_peaks([-x for x in x[angle_colname]], distance=sampling_frequency * 0.6 / x[dominant_frequency_colname], prominence=2)[0], axis=1)
445
+
446
+ df['angle_new_minima'] = df['angle_minima'].copy()
447
+ df['angle_new_maxima'] = df['angle_maxima'].copy()
448
+
449
+ for index, _ in df.iterrows():
450
+ i_pks = 0 # iterable to keep track of consecutive min-min and max-max versus min-max
451
+ n_min = df.loc[index, 'angle_new_minima'].size # number of minima in window
452
+ n_max = df.loc[index, 'angle_new_maxima'].size # number of maxima in window
453
+
454
+ if n_min > 0 and n_max > 0:
455
+ # if the first minimum occurs before the first maximum, start with the minimum
456
+ if df.loc[index, 'angle_new_maxima'][0] > df.loc[index, 'angle_new_minima'][0]:
457
+ # only continue if there are enough minima and maxima to perform operations
458
+ while i_pks < df.loc[index, 'angle_new_minima'].size - 1 and i_pks < df.loc[index, 'angle_new_maxima'].size:
459
+
460
+ # if the next minimum comes before the next maximum, we have two minima in a row, and should keep the larger one
461
+ if df.loc[index, 'angle_new_minima'][i_pks+1] < df.loc[index, 'angle_new_maxima'][i_pks]:
462
+ # if the next minimum is smaller than the current minimum, keep the next minimum and discard the current minimum
463
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks+1]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]]:
464
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
465
+ # otherwise, keep the current minimum and discard the next minimum
466
+ else:
467
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks+1)
468
+ i_pks -= 1
469
+
470
+ # if the current maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
471
+ if i_pks >= 0 and df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks]:
472
+ # if the current maximum is smaller than the previous maximum, keep the previous maximum and discard the current maximum
473
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks-1]]:
474
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
475
+ # otherwise, keep the current maximum and discard the previous maximum
476
+ else:
477
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks-1)
478
+ i_pks -= 1
479
+ i_pks += 1
480
+
481
+ # or if the first maximum occurs before the first minimum, start with the maximum
482
+ elif df.loc[index, 'angle_new_maxima'][0] < df.loc[index, 'angle_new_minima'][0]:
483
+ # only continue if there are enough minima and maxima to perform operations
484
+ while i_pks < df.loc[index, 'angle_new_minima'].size and i_pks < df.loc[index, 'angle_new_maxima'].size-1:
485
+ # if the next maximum comes before the current minimum, we have two maxima in a row, and should keep the larger one
486
+ if df.loc[index, 'angle_new_minima'][i_pks] > df.loc[index, 'angle_new_maxima'][i_pks+1]:
487
+ # if the next maximum is smaller than the current maximum, keep the next maximum and discard the current maximum
488
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks+1]] > df.loc[index, angle_colname][df.loc[index, 'angle_new_maxima'][i_pks]]:
489
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks)
490
+ # otherwise, keep the current maximum and discard the next maximum
491
+ else:
492
+ df.at[index, 'angle_new_maxima'] = np.delete(df.loc[index, 'angle_new_maxima'], i_pks+1)
493
+ i_pks -= 1
494
+
495
+ # if the current minimum comes before the current maximum, we have two minima in a row, and should keep the larger one
496
+ if i_pks > 0 and df.loc[index, 'angle_new_minima'][i_pks] < df.loc[index, 'angle_new_maxima'][i_pks]:
497
+ # if the current minimum is smaller than the previous minimum, keep the previous minimum and discard the current minimum
498
+ if df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks]] < df.loc[index, angle_colname][df.loc[index, 'angle_new_minima'][i_pks-1]]:
499
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks-1)
500
+ # otherwise, keep the current minimum and discard the previous minimum
501
+ else:
502
+ df.at[index, 'angle_new_minima'] = np.delete(df.loc[index, 'angle_new_minima'], i_pks)
503
+ i_pks -= 1
504
+ i_pks += 1
505
+
506
+ # for some peculiar reason, if a single item remains in the row for angle_new_minima or
507
+ # angle_new_maxima, it could be either a scalar or a vector.
508
+ for col in ['angle_new_minima', 'angle_new_maxima']:
509
+ df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col] = df.loc[df.apply(lambda x: type(x[col].tolist())==int, axis=1), col].apply(lambda x: [x])
510
+
511
+ df['angle_extrema_values'] = df.apply(lambda x: [x[angle_colname][i] for i in np.concatenate([x['angle_new_minima'], x['angle_new_maxima']])], axis=1)
512
+
513
+ return
514
+
515
+
516
+ def extract_range_of_motion(
517
+ angle_extrema_values_col: pd.Series,
518
+ ) -> pd.Series:
519
+ """Extract the range of motion from the angle extrema values.
520
+
521
+ Parameters
522
+ ----------
523
+ angle_extrema_values_col: pd.Series
524
+ The column containing the angle extrema values
525
+
526
+ Returns
527
+ -------
528
+ pd.Series
529
+ The range of motion
530
+ """
531
+ angle_amplitudes = np.empty((len(angle_extrema_values_col), 0)).tolist()
532
+
533
+ # for each window
534
+ for i, extrema_values in enumerate(angle_extrema_values_col):
535
+ l_amplitudes = []
536
+ # for each extremum contained in the window
537
+ for j, value in enumerate(extrema_values):
538
+ # if the extremum is not the last one in the list of extrema
539
+ if j < len(extrema_values)-1:
540
+ # if the current extremum is a maximum and the next one is a minimum, or vice versa
541
+ if (value > 0 and extrema_values[j+1] < 0) or (value < 0 and extrema_values[j+1] > 0):
542
+ # compute the amplitude as the sum of the absolute values of the two extrema
543
+ l_amplitudes.append(np.sum(np.abs(value) + np.abs(extrema_values[j+1])))
544
+ # or if the extrema are both positive or both negative, and the current extremum is closer to 0
545
+ elif np.abs(value) < np.abs(extrema_values[j+1]):
546
+ # compute the amplitude as the difference between the two extrema
547
+ l_amplitudes.append(np.subtract(np.abs(extrema_values[j+1]), np.abs(value)))
548
+ # or if the extrema are both positive and negative, and the current extremum is further away from 0
549
+ else:
550
+ # compute the amplitude as the difference between the two extrema
551
+ l_amplitudes.append(np.subtract(np.abs(value), np.abs(extrema_values[j+1])))
552
+
553
+ angle_amplitudes[i].append([x for x in l_amplitudes])
554
+
555
+ return [y for item in angle_amplitudes for y in item]
556
+
557
+
558
+ def extract_peak_angular_velocity(
559
+ df: pd.DataFrame,
560
+ velocity_colname: str,
561
+ angle_minima_colname: str,
562
+ angle_maxima_colname: str,
563
+ ) -> pd.DataFrame:
564
+ """Extract the forward and backward peak angular velocity from the angular velocity.
565
+
566
+ Parameters
567
+ ----------
568
+ df: pd.DataFrame
569
+ The dataframe containing the angular velocity
570
+ velocity_colname: str
571
+ The column name of the angular velocity
572
+ angle_minima_colname: str
573
+ The column name of the column containing the angle minima
574
+ angle_maxima_colname: str
575
+ The column name of the column containing the angle maxima
576
+
577
+ Returns
578
+ -------
579
+ pd.DataFrame
580
+ The dataframe with the forward and backward peak angular velocity
581
+ """
582
+ df['forward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
583
+ df['backward_peak_ang_vel'] = np.empty((len(df), 0)).tolist()
584
+
585
+ # for each window
586
+ for index, row in df.iterrows():
587
+ # the peak angular velocity can only be computed if there is at least one minimum and one maximum in the window
588
+ if len(row[angle_minima_colname]) > 0 and len(row[angle_maxima_colname]) > 0:
589
+ # combine the minima and maxima
590
+ l_extrema_indices = np.sort(np.concatenate((row[angle_minima_colname], row[angle_maxima_colname])))
591
+ # for each peak
592
+ for j, peak_index in enumerate(l_extrema_indices):
593
+ # if the peak is a maximum and there is another peak after it
594
+ if peak_index in row[angle_maxima_colname] and j < len(l_extrema_indices) - 1:
595
+ # compute the forward peak angular velocity, defined by the maximum negative angular velocity between the two peaks
596
+ df.loc[index, 'forward_peak_ang_vel'].append(np.abs(min(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
597
+ # if the peak is a minimum and there is another peak after it
598
+ elif peak_index in row[angle_minima_colname] and j < len(l_extrema_indices) - 1:
599
+ # compute the backward peak angular velocity, defined by the maximum positive angular velocity between the two peaks
600
+ df.loc[index, 'backward_peak_ang_vel'].append(np.abs(max(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
601
+
602
+ return
603
+
604
+
605
+ def extract_temporal_domain_features(config, df_windowed, l_gravity_stats=['mean', 'std']):
606
+ # compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
607
+ for col in config.l_gravity_cols:
608
+ for stat in l_gravity_stats:
609
+ df_windowed[f'{col}_{stat}'] = generate_statistics(
610
+ sensor_col=df_windowed[col],
611
+ statistic=stat
612
+ )
613
+
614
+ # compute the standard deviation of the Euclidean norm of the three axes
615
+ df_windowed['std_norm_acc'] = generate_std_norm(
616
+ df=df_windowed,
617
+ cols=config.l_accelerometer_cols
618
+ )
619
+
620
+ return df_windowed
621
+
622
+
623
+ def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):
624
+
625
+ for col in l_sensor_colnames:
626
+
627
+ # transform the temporal signal to the spectral domain using the fast fourier transform
628
+ df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
629
+ sensor_col=df_windowed[col],
630
+ window_type=config.window_type,
631
+ sampling_frequency=config.sampling_frequency
632
+ )
633
+
634
+ # compute the power in distinct frequency bandwidths
635
+ for bandwidth, frequencies in config.d_frequency_bandwidths.items():
636
+ df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
637
+ sensor_col=x[col],
638
+ fmin=frequencies[0],
639
+ fmax=frequencies[1],
640
+ sampling_frequency=config.sampling_frequency,
641
+ window_type=config.window_type,
642
+ ), axis=1
643
+ )
644
+
645
+ # compute the dominant frequency, i.e., the frequency with the highest power
646
+ df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
647
+ signal_ffts=x[col+'_fft'],
648
+ signal_freqs=x[col+'_freqs'],
649
+ fmin=config.spectrum_low_frequency,
650
+ fmax=config.spectrum_high_frequency
651
+ ), axis=1
652
+ )
653
+
654
+ # compute the power summed over the individual axes to obtain the total power per frequency bandwidth
655
+ for bandwidth in config.d_frequency_bandwidths.keys():
656
+ df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)
657
+
658
+ # compute the power summed over the individual frequency bandwidths to obtain the total power
659
+ df_windowed['total_power'] = compute_power(
660
+ df=df_windowed,
661
+ fft_cols=[f'{col}_fft' for col in l_sensor_colnames])
662
+
663
+ # compute the cepstral coefficients of the total power signal
664
+ cc_cols = generate_cepstral_coefficients(
665
+ total_power_col=df_windowed['total_power'],
666
+ window_length_s=config.window_length_s,
667
+ sampling_frequency=config.sampling_frequency,
668
+ low_frequency=config.spectrum_low_frequency,
669
+ high_frequency=config.spectrum_high_frequency,
670
+ n_filters=config.n_dct_filters_cc,
671
+ n_coefficients=config.n_coefficients_cc
672
+ )
673
+
674
+ df_windowed = pd.concat([df_windowed, cc_cols], axis=1)
675
+
676
+ df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})
677
+
678
+ return df_windowed