paradigma 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +157 -102
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +232 -184
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +138 -85
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +158 -83
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/METADATA +31 -29
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/RECORD +0 -22
|
@@ -1,20 +1,26 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
1
|
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
4
5
|
from scipy import signal
|
|
5
6
|
|
|
6
7
|
from paradigma.classification import ClassifierPackage
|
|
7
|
-
from paradigma.constants import DataColumns
|
|
8
8
|
from paradigma.config import TremorConfig
|
|
9
|
-
from paradigma.
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
from paradigma.constants import DataColumns
|
|
10
|
+
from paradigma.feature_extraction import (
|
|
11
|
+
compute_mfccs,
|
|
12
|
+
compute_power_in_bandwidth,
|
|
13
|
+
compute_total_power,
|
|
14
|
+
extract_frequency_peak,
|
|
15
|
+
extract_tremor_power,
|
|
16
|
+
)
|
|
17
|
+
from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
|
|
12
18
|
from paradigma.util import aggregate_parameter
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFrame:
|
|
16
22
|
"""
|
|
17
|
-
This function groups sequences of timestamps into windows and subsequently extracts
|
|
23
|
+
This function groups sequences of timestamps into windows and subsequently extracts
|
|
18
24
|
tremor features from windowed gyroscope data.
|
|
19
25
|
|
|
20
26
|
Parameters
|
|
@@ -31,7 +37,7 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
31
37
|
-------
|
|
32
38
|
pd.DataFrame
|
|
33
39
|
A DataFrame containing extracted tremor features and a column corresponding to time.
|
|
34
|
-
|
|
40
|
+
|
|
35
41
|
Notes
|
|
36
42
|
-----
|
|
37
43
|
- This function groups the data into windows based on timestamps.
|
|
@@ -43,21 +49,27 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
43
49
|
If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
|
|
44
50
|
"""
|
|
45
51
|
# group sequences of timestamps into windows
|
|
46
|
-
|
|
47
|
-
windowed_data = tabulate_windows(
|
|
52
|
+
windowed_colnames = [config.time_colname] + config.gyroscope_colnames
|
|
53
|
+
windowed_data = tabulate_windows(
|
|
54
|
+
df,
|
|
55
|
+
windowed_colnames,
|
|
56
|
+
config.window_length_s,
|
|
57
|
+
config.window_step_length_s,
|
|
58
|
+
config.sampling_frequency,
|
|
59
|
+
)
|
|
48
60
|
|
|
49
|
-
extractor = WindowedDataExtractor(
|
|
61
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
50
62
|
|
|
51
63
|
# Extract the start time and gyroscope data from the windowed data
|
|
52
|
-
idx_time = extractor.get_index(
|
|
53
|
-
idx_gyro = extractor.get_slice(config.
|
|
64
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
65
|
+
idx_gyro = extractor.get_slice(config.gyroscope_colnames)
|
|
54
66
|
|
|
55
67
|
# Extract data
|
|
56
68
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
57
69
|
windowed_gyro = windowed_data[:, :, idx_gyro]
|
|
58
70
|
|
|
59
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
60
|
-
|
|
71
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
72
|
+
|
|
61
73
|
# transform the signals from the temporal domain to the spectral domain and extract tremor features
|
|
62
74
|
df_spectral_features = extract_spectral_domain_features(windowed_gyro, config)
|
|
63
75
|
|
|
@@ -67,7 +79,9 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
67
79
|
return df_features
|
|
68
80
|
|
|
69
81
|
|
|
70
|
-
def detect_tremor(
|
|
82
|
+
def detect_tremor(
|
|
83
|
+
df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path
|
|
84
|
+
) -> pd.DataFrame:
|
|
71
85
|
"""
|
|
72
86
|
Detects tremor in the input DataFrame using a pre-trained classifier and applies a threshold to the predicted probabilities.
|
|
73
87
|
|
|
@@ -129,26 +143,39 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
|
|
|
129
143
|
X = df.loc[:, feature_names_predictions].copy()
|
|
130
144
|
X.loc[:, feature_names_scaling] = scaled_features
|
|
131
145
|
|
|
132
|
-
# Get the tremor probability
|
|
146
|
+
# Get the tremor probability
|
|
133
147
|
df[DataColumns.PRED_TREMOR_PROBA] = clf_package.predict_proba(X)
|
|
134
148
|
|
|
135
149
|
# Make prediction based on pre-defined threshold
|
|
136
|
-
df[DataColumns.PRED_TREMOR_LOGREG] = (
|
|
150
|
+
df[DataColumns.PRED_TREMOR_LOGREG] = (
|
|
151
|
+
df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold
|
|
152
|
+
).astype(int)
|
|
153
|
+
|
|
154
|
+
# Perform extra checks for rest tremor
|
|
155
|
+
peak_check = (df["freq_peak"] >= config.fmin_rest_tremor) & (
|
|
156
|
+
df["freq_peak"] <= config.fmax_rest_tremor
|
|
157
|
+
) # peak within 3-7 Hz
|
|
158
|
+
df[DataColumns.PRED_ARM_AT_REST] = (
|
|
159
|
+
df["below_tremor_power"] <= config.movement_threshold
|
|
160
|
+
).astype(
|
|
161
|
+
int
|
|
162
|
+
) # arm at rest or in stable posture
|
|
163
|
+
df[DataColumns.PRED_TREMOR_CHECKED] = (
|
|
164
|
+
(df[DataColumns.PRED_TREMOR_LOGREG] == 1)
|
|
165
|
+
& peak_check
|
|
166
|
+
& df[DataColumns.PRED_ARM_AT_REST]
|
|
167
|
+
).astype(int)
|
|
137
168
|
|
|
138
|
-
# Perform extra checks for rest tremor
|
|
139
|
-
peak_check = (df['freq_peak'] >= config.fmin_rest_tremor) & (df['freq_peak']<=config.fmax_rest_tremor) # peak within 3-7 Hz
|
|
140
|
-
df[DataColumns.PRED_ARM_AT_REST] = (df['below_tremor_power'] <= config.movement_threshold).astype(int) # arm at rest or in stable posture
|
|
141
|
-
df[DataColumns.PRED_TREMOR_CHECKED] = ((df[DataColumns.PRED_TREMOR_LOGREG]==1) & (peak_check==True) & (df[DataColumns.PRED_ARM_AT_REST] == True)).astype(int)
|
|
142
|
-
|
|
143
169
|
return df
|
|
144
170
|
|
|
171
|
+
|
|
145
172
|
def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
146
173
|
"""
|
|
147
174
|
Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
|
|
148
|
-
Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
|
|
175
|
+
Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
|
|
149
176
|
without significant non-tremor movement (at rest). For tremor power the following aggregates are derived:
|
|
150
|
-
the median, mode and percentile of tremor power specified in the configuration object.
|
|
151
|
-
|
|
177
|
+
the median, mode and percentile of tremor power specified in the configuration object.
|
|
178
|
+
|
|
152
179
|
Parameters
|
|
153
180
|
----------
|
|
154
181
|
df : pd.DataFrame
|
|
@@ -169,53 +196,69 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
169
196
|
- Tremor power is converted to log scale, after adding a constant of 1, so that zero tremor power
|
|
170
197
|
corresponds to a value of 0 in log scale.
|
|
171
198
|
- The modal tremor power is computed based on gaussian kernel density estimation.
|
|
172
|
-
|
|
199
|
+
|
|
173
200
|
"""
|
|
174
|
-
nr_valid_days =
|
|
175
|
-
|
|
201
|
+
nr_valid_days = (
|
|
202
|
+
df["time_dt"].dt.date.unique().size
|
|
203
|
+
) # number of valid days in the input dataframe
|
|
204
|
+
nr_windows_total = df.shape[0] # number of windows in the input dataframe
|
|
176
205
|
|
|
177
206
|
# remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
|
|
178
207
|
df_filtered = df.loc[df.pred_arm_at_rest == 1]
|
|
179
|
-
nr_windows_rest = df_filtered.shape[
|
|
208
|
+
nr_windows_rest = df_filtered.shape[
|
|
209
|
+
0
|
|
210
|
+
] # number of windows without non-tremor arm movement
|
|
180
211
|
|
|
181
|
-
if
|
|
182
|
-
|
|
212
|
+
if (
|
|
213
|
+
nr_windows_rest == 0
|
|
214
|
+
): # if no windows without non-tremor arm movement are detected
|
|
215
|
+
raise Warning("No windows without non-tremor arm movement are detected.")
|
|
183
216
|
|
|
184
217
|
# calculate tremor time
|
|
185
|
-
n_windows_tremor = np.sum(df_filtered[
|
|
186
|
-
perc_windows_tremor =
|
|
218
|
+
n_windows_tremor = np.sum(df_filtered["pred_tremor_checked"])
|
|
219
|
+
perc_windows_tremor = (
|
|
220
|
+
n_windows_tremor / nr_windows_rest * 100
|
|
221
|
+
) # as percentage of total measured time without non-tremor arm movement
|
|
222
|
+
|
|
223
|
+
aggregated_tremor_power = (
|
|
224
|
+
{}
|
|
225
|
+
) # initialize dictionary to store aggregated tremor power measures
|
|
187
226
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
227
|
+
if (
|
|
228
|
+
n_windows_tremor == 0
|
|
229
|
+
): # if no tremor is detected, the tremor power measures are set to NaN
|
|
191
230
|
|
|
192
|
-
aggregated_tremor_power[
|
|
193
|
-
aggregated_tremor_power[
|
|
194
|
-
aggregated_tremor_power[
|
|
231
|
+
aggregated_tremor_power["median_tremor_power"] = np.nan
|
|
232
|
+
aggregated_tremor_power["mode_binned_tremor_power"] = np.nan
|
|
233
|
+
aggregated_tremor_power["90p_tremor_power"] = np.nan
|
|
195
234
|
|
|
196
235
|
else:
|
|
197
|
-
|
|
236
|
+
|
|
198
237
|
# calculate aggregated tremor power measures
|
|
199
|
-
tremor_power = df_filtered.loc[
|
|
200
|
-
|
|
201
|
-
|
|
238
|
+
tremor_power = df_filtered.loc[
|
|
239
|
+
df_filtered["pred_tremor_checked"] == 1, "tremor_power"
|
|
240
|
+
]
|
|
241
|
+
tremor_power = np.log10(tremor_power + 1) # convert to log scale
|
|
242
|
+
|
|
202
243
|
for aggregate in config.aggregates_tremor_power:
|
|
203
244
|
aggregate_name = f"{aggregate}_tremor_power"
|
|
204
|
-
aggregated_tremor_power[aggregate_name] = aggregate_parameter(
|
|
245
|
+
aggregated_tremor_power[aggregate_name] = aggregate_parameter(
|
|
246
|
+
tremor_power, aggregate, config.evaluation_points_tremor_power
|
|
247
|
+
)
|
|
205
248
|
|
|
206
249
|
# store aggregates in json format
|
|
207
250
|
d_aggregates = {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
251
|
+
"metadata": {
|
|
252
|
+
"nr_valid_days": nr_valid_days,
|
|
253
|
+
"nr_windows_total": nr_windows_total,
|
|
254
|
+
"nr_windows_rest": nr_windows_rest,
|
|
255
|
+
},
|
|
256
|
+
"aggregated_tremor_measures": {
|
|
257
|
+
"perc_windows_tremor": perc_windows_tremor,
|
|
258
|
+
"median_tremor_power": aggregated_tremor_power["median_tremor_power"],
|
|
259
|
+
"modal_tremor_power": aggregated_tremor_power["mode_binned_tremor_power"],
|
|
260
|
+
"90p_tremor_power": aggregated_tremor_power["90p_tremor_power"],
|
|
212
261
|
},
|
|
213
|
-
'aggregated_tremor_measures': {
|
|
214
|
-
'perc_windows_tremor': perc_windows_tremor,
|
|
215
|
-
'median_tremor_power': aggregated_tremor_power['median_tremor_power'],
|
|
216
|
-
'modal_tremor_power': aggregated_tremor_power['mode_binned_tremor_power'],
|
|
217
|
-
'90p_tremor_power': aggregated_tremor_power['90p_tremor_power']
|
|
218
|
-
}
|
|
219
262
|
}
|
|
220
263
|
|
|
221
264
|
return d_aggregates
|
|
@@ -225,7 +268,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
225
268
|
"""
|
|
226
269
|
Compute spectral domain features from the gyroscope data.
|
|
227
270
|
|
|
228
|
-
This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
|
|
271
|
+
This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
|
|
229
272
|
the tremor power, and the below tremor power based on the total power spectral density of the windowed gyroscope data.
|
|
230
273
|
|
|
231
274
|
Parameters
|
|
@@ -233,15 +276,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
233
276
|
data : numpy.ndarray
|
|
234
277
|
A 2D numpy array where each row corresponds to a window of gyroscope data.
|
|
235
278
|
config : object
|
|
236
|
-
Configuration object containing settings such as sampling frequency, window type,
|
|
279
|
+
Configuration object containing settings such as sampling frequency, window type,
|
|
237
280
|
and MFCC parameters.
|
|
238
|
-
|
|
281
|
+
|
|
239
282
|
Returns
|
|
240
283
|
-------
|
|
241
284
|
pd.DataFrame
|
|
242
|
-
The feature dataframe containing the extracted spectral features, including
|
|
285
|
+
The feature dataframe containing the extracted spectral features, including
|
|
243
286
|
MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
|
|
244
|
-
|
|
287
|
+
|
|
245
288
|
"""
|
|
246
289
|
|
|
247
290
|
# Initialize a dictionary to hold the results
|
|
@@ -253,7 +296,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
253
296
|
segment_length_spectrogram_s = config.segment_length_spectrogram_s
|
|
254
297
|
overlap_fraction = config.overlap_fraction
|
|
255
298
|
spectral_resolution = config.spectral_resolution
|
|
256
|
-
window_type =
|
|
299
|
+
window_type = "hann"
|
|
257
300
|
|
|
258
301
|
# Compute the power spectral density
|
|
259
302
|
segment_length_n = sampling_frequency * segment_length_psd_s
|
|
@@ -262,15 +305,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
262
305
|
nfft = sampling_frequency / spectral_resolution
|
|
263
306
|
|
|
264
307
|
freqs, psd = signal.welch(
|
|
265
|
-
x=data,
|
|
266
|
-
fs=sampling_frequency,
|
|
267
|
-
window=window,
|
|
308
|
+
x=data,
|
|
309
|
+
fs=sampling_frequency,
|
|
310
|
+
window=window,
|
|
268
311
|
nperseg=segment_length_n,
|
|
269
|
-
noverlap=overlap_n,
|
|
270
|
-
nfft=nfft,
|
|
271
|
-
detrend=False,
|
|
272
|
-
scaling=
|
|
273
|
-
axis=1
|
|
312
|
+
noverlap=overlap_n,
|
|
313
|
+
nfft=nfft,
|
|
314
|
+
detrend=False,
|
|
315
|
+
scaling="density",
|
|
316
|
+
axis=1,
|
|
274
317
|
)
|
|
275
318
|
|
|
276
319
|
# Compute the spectrogram
|
|
@@ -279,18 +322,18 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
279
322
|
window = signal.get_window(window_type, segment_length_n)
|
|
280
323
|
|
|
281
324
|
f, t, S1 = signal.stft(
|
|
282
|
-
x=data,
|
|
283
|
-
fs=sampling_frequency,
|
|
284
|
-
window=window,
|
|
285
|
-
nperseg=segment_length_n,
|
|
325
|
+
x=data,
|
|
326
|
+
fs=sampling_frequency,
|
|
327
|
+
window=window,
|
|
328
|
+
nperseg=segment_length_n,
|
|
286
329
|
noverlap=overlap_n,
|
|
287
330
|
boundary=None,
|
|
288
|
-
axis=1
|
|
331
|
+
axis=1,
|
|
289
332
|
)
|
|
290
333
|
|
|
291
334
|
# Compute total power in the PSD and the total spectrogram (summed over the three axes)
|
|
292
335
|
total_psd = compute_total_power(psd)
|
|
293
|
-
total_spectrogram = np.sum(np.abs(S1)*sampling_frequency, axis=2)
|
|
336
|
+
total_spectrogram = np.sum(np.abs(S1) * sampling_frequency, axis=2)
|
|
294
337
|
|
|
295
338
|
# Compute the MFCC's
|
|
296
339
|
config.mfcc_low_frequency = config.fmin_mfcc
|
|
@@ -301,21 +344,31 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
301
344
|
mfccs = compute_mfccs(
|
|
302
345
|
total_power_array=total_spectrogram,
|
|
303
346
|
config=config,
|
|
304
|
-
total_power_type=
|
|
305
|
-
rounding_method=
|
|
306
|
-
multiplication_factor=1
|
|
347
|
+
total_power_type="spectrogram",
|
|
348
|
+
rounding_method="round",
|
|
349
|
+
multiplication_factor=1,
|
|
307
350
|
)
|
|
308
351
|
|
|
309
352
|
# Combine the MFCCs into the features DataFrame
|
|
310
|
-
mfcc_colnames = [f
|
|
353
|
+
mfcc_colnames = [f"mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)]
|
|
311
354
|
for i, colname in enumerate(mfcc_colnames):
|
|
312
355
|
feature_dict[colname] = mfccs[:, i]
|
|
313
356
|
|
|
314
357
|
# Compute the frequency of the peak, non-tremor power and tremor power
|
|
315
|
-
feature_dict[
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
358
|
+
feature_dict["freq_peak"] = extract_frequency_peak(
|
|
359
|
+
freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search
|
|
360
|
+
)
|
|
361
|
+
feature_dict["below_tremor_power"] = compute_power_in_bandwidth(
|
|
362
|
+
freqs,
|
|
363
|
+
total_psd,
|
|
364
|
+
config.fmin_below_rest_tremor,
|
|
365
|
+
config.fmax_below_rest_tremor,
|
|
366
|
+
include_max=False,
|
|
367
|
+
spectral_resolution=config.spectral_resolution,
|
|
368
|
+
cumulative_sum_method="sum",
|
|
369
|
+
)
|
|
370
|
+
feature_dict["tremor_power"] = extract_tremor_power(
|
|
371
|
+
freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor
|
|
372
|
+
)
|
|
320
373
|
|
|
321
|
-
return pd.DataFrame(feature_dict)
|
|
374
|
+
return pd.DataFrame(feature_dict)
|