paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +158 -101
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +286 -190
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +139 -95
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +171 -80
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/METADATA +39 -36
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.2.dist-info/RECORD +0 -22
|
@@ -1,21 +1,26 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
1
|
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
4
5
|
from scipy import signal
|
|
5
|
-
from scipy.stats import gaussian_kde
|
|
6
6
|
|
|
7
7
|
from paradigma.classification import ClassifierPackage
|
|
8
|
-
from paradigma.constants import DataColumns
|
|
9
8
|
from paradigma.config import TremorConfig
|
|
10
|
-
from paradigma.
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
from paradigma.constants import DataColumns
|
|
10
|
+
from paradigma.feature_extraction import (
|
|
11
|
+
compute_mfccs,
|
|
12
|
+
compute_power_in_bandwidth,
|
|
13
|
+
compute_total_power,
|
|
14
|
+
extract_frequency_peak,
|
|
15
|
+
extract_tremor_power,
|
|
16
|
+
)
|
|
17
|
+
from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
|
|
13
18
|
from paradigma.util import aggregate_parameter
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFrame:
|
|
17
22
|
"""
|
|
18
|
-
This function groups sequences of timestamps into windows and subsequently extracts
|
|
23
|
+
This function groups sequences of timestamps into windows and subsequently extracts
|
|
19
24
|
tremor features from windowed gyroscope data.
|
|
20
25
|
|
|
21
26
|
Parameters
|
|
@@ -32,7 +37,7 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
32
37
|
-------
|
|
33
38
|
pd.DataFrame
|
|
34
39
|
A DataFrame containing extracted tremor features and a column corresponding to time.
|
|
35
|
-
|
|
40
|
+
|
|
36
41
|
Notes
|
|
37
42
|
-----
|
|
38
43
|
- This function groups the data into windows based on timestamps.
|
|
@@ -44,21 +49,27 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
44
49
|
If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
|
|
45
50
|
"""
|
|
46
51
|
# group sequences of timestamps into windows
|
|
47
|
-
|
|
48
|
-
windowed_data = tabulate_windows(
|
|
52
|
+
windowed_colnames = [config.time_colname] + config.gyroscope_colnames
|
|
53
|
+
windowed_data = tabulate_windows(
|
|
54
|
+
df,
|
|
55
|
+
windowed_colnames,
|
|
56
|
+
config.window_length_s,
|
|
57
|
+
config.window_step_length_s,
|
|
58
|
+
config.sampling_frequency,
|
|
59
|
+
)
|
|
49
60
|
|
|
50
|
-
extractor = WindowedDataExtractor(
|
|
61
|
+
extractor = WindowedDataExtractor(windowed_colnames)
|
|
51
62
|
|
|
52
63
|
# Extract the start time and gyroscope data from the windowed data
|
|
53
|
-
idx_time = extractor.get_index(
|
|
54
|
-
idx_gyro = extractor.get_slice(config.
|
|
64
|
+
idx_time = extractor.get_index(config.time_colname)
|
|
65
|
+
idx_gyro = extractor.get_slice(config.gyroscope_colnames)
|
|
55
66
|
|
|
56
67
|
# Extract data
|
|
57
68
|
start_time = np.min(windowed_data[:, :, idx_time], axis=1)
|
|
58
69
|
windowed_gyro = windowed_data[:, :, idx_gyro]
|
|
59
70
|
|
|
60
|
-
df_features = pd.DataFrame(start_time, columns=[
|
|
61
|
-
|
|
71
|
+
df_features = pd.DataFrame(start_time, columns=[config.time_colname])
|
|
72
|
+
|
|
62
73
|
# transform the signals from the temporal domain to the spectral domain and extract tremor features
|
|
63
74
|
df_spectral_features = extract_spectral_domain_features(windowed_gyro, config)
|
|
64
75
|
|
|
@@ -68,7 +79,9 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
|
|
|
68
79
|
return df_features
|
|
69
80
|
|
|
70
81
|
|
|
71
|
-
def detect_tremor(
|
|
82
|
+
def detect_tremor(
|
|
83
|
+
df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path
|
|
84
|
+
) -> pd.DataFrame:
|
|
72
85
|
"""
|
|
73
86
|
Detects tremor in the input DataFrame using a pre-trained classifier and applies a threshold to the predicted probabilities.
|
|
74
87
|
|
|
@@ -130,26 +143,39 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
|
|
|
130
143
|
X = df.loc[:, feature_names_predictions].copy()
|
|
131
144
|
X.loc[:, feature_names_scaling] = scaled_features
|
|
132
145
|
|
|
133
|
-
# Get the tremor probability
|
|
146
|
+
# Get the tremor probability
|
|
134
147
|
df[DataColumns.PRED_TREMOR_PROBA] = clf_package.predict_proba(X)
|
|
135
148
|
|
|
136
149
|
# Make prediction based on pre-defined threshold
|
|
137
|
-
df[DataColumns.PRED_TREMOR_LOGREG] = (
|
|
150
|
+
df[DataColumns.PRED_TREMOR_LOGREG] = (
|
|
151
|
+
df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold
|
|
152
|
+
).astype(int)
|
|
153
|
+
|
|
154
|
+
# Perform extra checks for rest tremor
|
|
155
|
+
peak_check = (df["freq_peak"] >= config.fmin_rest_tremor) & (
|
|
156
|
+
df["freq_peak"] <= config.fmax_rest_tremor
|
|
157
|
+
) # peak within 3-7 Hz
|
|
158
|
+
df[DataColumns.PRED_ARM_AT_REST] = (
|
|
159
|
+
df["below_tremor_power"] <= config.movement_threshold
|
|
160
|
+
).astype(
|
|
161
|
+
int
|
|
162
|
+
) # arm at rest or in stable posture
|
|
163
|
+
df[DataColumns.PRED_TREMOR_CHECKED] = (
|
|
164
|
+
(df[DataColumns.PRED_TREMOR_LOGREG] == 1)
|
|
165
|
+
& peak_check
|
|
166
|
+
& df[DataColumns.PRED_ARM_AT_REST]
|
|
167
|
+
).astype(int)
|
|
138
168
|
|
|
139
|
-
# Perform extra checks for rest tremor
|
|
140
|
-
peak_check = (df['freq_peak'] >= config.fmin_rest_tremor) & (df['freq_peak']<=config.fmax_rest_tremor) # peak within 3-7 Hz
|
|
141
|
-
df[DataColumns.PRED_ARM_AT_REST] = (df['below_tremor_power'] <= config.movement_threshold).astype(int) # arm at rest or in stable posture
|
|
142
|
-
df[DataColumns.PRED_TREMOR_CHECKED] = ((df[DataColumns.PRED_TREMOR_LOGREG]==1) & (peak_check==True) & (df[DataColumns.PRED_ARM_AT_REST] == True)).astype(int)
|
|
143
|
-
|
|
144
169
|
return df
|
|
145
170
|
|
|
171
|
+
|
|
146
172
|
def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
147
173
|
"""
|
|
148
174
|
Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
|
|
149
|
-
Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
|
|
175
|
+
Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
|
|
150
176
|
without significant non-tremor movement (at rest). For tremor power the following aggregates are derived:
|
|
151
|
-
the median, mode and percentile of tremor power specified in the configuration object.
|
|
152
|
-
|
|
177
|
+
the median, mode and percentile of tremor power specified in the configuration object.
|
|
178
|
+
|
|
153
179
|
Parameters
|
|
154
180
|
----------
|
|
155
181
|
df : pd.DataFrame
|
|
@@ -170,61 +196,69 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
|
|
|
170
196
|
- Tremor power is converted to log scale, after adding a constant of 1, so that zero tremor power
|
|
171
197
|
corresponds to a value of 0 in log scale.
|
|
172
198
|
- The modal tremor power is computed based on gaussian kernel density estimation.
|
|
173
|
-
|
|
199
|
+
|
|
174
200
|
"""
|
|
175
|
-
nr_valid_days =
|
|
176
|
-
|
|
201
|
+
nr_valid_days = (
|
|
202
|
+
df["time_dt"].dt.date.unique().size
|
|
203
|
+
) # number of valid days in the input dataframe
|
|
204
|
+
nr_windows_total = df.shape[0] # number of windows in the input dataframe
|
|
177
205
|
|
|
178
206
|
# remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
|
|
179
207
|
df_filtered = df.loc[df.pred_arm_at_rest == 1]
|
|
180
|
-
nr_windows_rest = df_filtered.shape[
|
|
208
|
+
nr_windows_rest = df_filtered.shape[
|
|
209
|
+
0
|
|
210
|
+
] # number of windows without non-tremor arm movement
|
|
181
211
|
|
|
182
|
-
if
|
|
183
|
-
|
|
212
|
+
if (
|
|
213
|
+
nr_windows_rest == 0
|
|
214
|
+
): # if no windows without non-tremor arm movement are detected
|
|
215
|
+
raise Warning("No windows without non-tremor arm movement are detected.")
|
|
184
216
|
|
|
185
217
|
# calculate tremor time
|
|
186
|
-
n_windows_tremor = np.sum(df_filtered[
|
|
187
|
-
perc_windows_tremor =
|
|
218
|
+
n_windows_tremor = np.sum(df_filtered["pred_tremor_checked"])
|
|
219
|
+
perc_windows_tremor = (
|
|
220
|
+
n_windows_tremor / nr_windows_rest * 100
|
|
221
|
+
) # as percentage of total measured time without non-tremor arm movement
|
|
188
222
|
|
|
189
|
-
aggregated_tremor_power =
|
|
190
|
-
|
|
191
|
-
|
|
223
|
+
aggregated_tremor_power = (
|
|
224
|
+
{}
|
|
225
|
+
) # initialize dictionary to store aggregated tremor power measures
|
|
192
226
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
227
|
+
if (
|
|
228
|
+
n_windows_tremor == 0
|
|
229
|
+
): # if no tremor is detected, the tremor power measures are set to NaN
|
|
230
|
+
|
|
231
|
+
aggregated_tremor_power["median_tremor_power"] = np.nan
|
|
232
|
+
aggregated_tremor_power["mode_binned_tremor_power"] = np.nan
|
|
233
|
+
aggregated_tremor_power["90p_tremor_power"] = np.nan
|
|
196
234
|
|
|
197
235
|
else:
|
|
198
|
-
|
|
236
|
+
|
|
199
237
|
# calculate aggregated tremor power measures
|
|
200
|
-
tremor_power = df_filtered.loc[
|
|
201
|
-
|
|
202
|
-
|
|
238
|
+
tremor_power = df_filtered.loc[
|
|
239
|
+
df_filtered["pred_tremor_checked"] == 1, "tremor_power"
|
|
240
|
+
]
|
|
241
|
+
tremor_power = np.log10(tremor_power + 1) # convert to log scale
|
|
242
|
+
|
|
203
243
|
for aggregate in config.aggregates_tremor_power:
|
|
204
244
|
aggregate_name = f"{aggregate}_tremor_power"
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
kde_values = kde(bin_edges)
|
|
210
|
-
max_index = np.argmax(kde_values)
|
|
211
|
-
aggregated_tremor_power['modal_tremor_power'] = bin_edges[max_index]
|
|
212
|
-
else: # calculate te other aggregates (e.g. median and 90th percentile) of tremor power
|
|
213
|
-
aggregated_tremor_power[aggregate_name] = aggregate_parameter(tremor_power, aggregate)
|
|
214
|
-
|
|
245
|
+
aggregated_tremor_power[aggregate_name] = aggregate_parameter(
|
|
246
|
+
tremor_power, aggregate, config.evaluation_points_tremor_power
|
|
247
|
+
)
|
|
248
|
+
|
|
215
249
|
# store aggregates in json format
|
|
216
250
|
d_aggregates = {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
251
|
+
"metadata": {
|
|
252
|
+
"nr_valid_days": nr_valid_days,
|
|
253
|
+
"nr_windows_total": nr_windows_total,
|
|
254
|
+
"nr_windows_rest": nr_windows_rest,
|
|
255
|
+
},
|
|
256
|
+
"aggregated_tremor_measures": {
|
|
257
|
+
"perc_windows_tremor": perc_windows_tremor,
|
|
258
|
+
"median_tremor_power": aggregated_tremor_power["median_tremor_power"],
|
|
259
|
+
"modal_tremor_power": aggregated_tremor_power["mode_binned_tremor_power"],
|
|
260
|
+
"90p_tremor_power": aggregated_tremor_power["90p_tremor_power"],
|
|
221
261
|
},
|
|
222
|
-
'aggregated_tremor_measures': {
|
|
223
|
-
'perc_windows_tremor': perc_windows_tremor,
|
|
224
|
-
'median_tremor_power': aggregated_tremor_power['median_tremor_power'],
|
|
225
|
-
'modal_tremor_power': aggregated_tremor_power['modal_tremor_power'],
|
|
226
|
-
'90p_tremor_power': aggregated_tremor_power['90p_tremor_power']
|
|
227
|
-
}
|
|
228
262
|
}
|
|
229
263
|
|
|
230
264
|
return d_aggregates
|
|
@@ -234,7 +268,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
234
268
|
"""
|
|
235
269
|
Compute spectral domain features from the gyroscope data.
|
|
236
270
|
|
|
237
|
-
This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
|
|
271
|
+
This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
|
|
238
272
|
the tremor power, and the below tremor power based on the total power spectral density of the windowed gyroscope data.
|
|
239
273
|
|
|
240
274
|
Parameters
|
|
@@ -242,15 +276,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
242
276
|
data : numpy.ndarray
|
|
243
277
|
A 2D numpy array where each row corresponds to a window of gyroscope data.
|
|
244
278
|
config : object
|
|
245
|
-
Configuration object containing settings such as sampling frequency, window type,
|
|
279
|
+
Configuration object containing settings such as sampling frequency, window type,
|
|
246
280
|
and MFCC parameters.
|
|
247
|
-
|
|
281
|
+
|
|
248
282
|
Returns
|
|
249
283
|
-------
|
|
250
284
|
pd.DataFrame
|
|
251
|
-
The feature dataframe containing the extracted spectral features, including
|
|
285
|
+
The feature dataframe containing the extracted spectral features, including
|
|
252
286
|
MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
|
|
253
|
-
|
|
287
|
+
|
|
254
288
|
"""
|
|
255
289
|
|
|
256
290
|
# Initialize a dictionary to hold the results
|
|
@@ -262,7 +296,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
262
296
|
segment_length_spectrogram_s = config.segment_length_spectrogram_s
|
|
263
297
|
overlap_fraction = config.overlap_fraction
|
|
264
298
|
spectral_resolution = config.spectral_resolution
|
|
265
|
-
window_type =
|
|
299
|
+
window_type = "hann"
|
|
266
300
|
|
|
267
301
|
# Compute the power spectral density
|
|
268
302
|
segment_length_n = sampling_frequency * segment_length_psd_s
|
|
@@ -271,15 +305,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
271
305
|
nfft = sampling_frequency / spectral_resolution
|
|
272
306
|
|
|
273
307
|
freqs, psd = signal.welch(
|
|
274
|
-
x=data,
|
|
275
|
-
fs=sampling_frequency,
|
|
276
|
-
window=window,
|
|
308
|
+
x=data,
|
|
309
|
+
fs=sampling_frequency,
|
|
310
|
+
window=window,
|
|
277
311
|
nperseg=segment_length_n,
|
|
278
|
-
noverlap=overlap_n,
|
|
279
|
-
nfft=nfft,
|
|
280
|
-
detrend=False,
|
|
281
|
-
scaling=
|
|
282
|
-
axis=1
|
|
312
|
+
noverlap=overlap_n,
|
|
313
|
+
nfft=nfft,
|
|
314
|
+
detrend=False,
|
|
315
|
+
scaling="density",
|
|
316
|
+
axis=1,
|
|
283
317
|
)
|
|
284
318
|
|
|
285
319
|
# Compute the spectrogram
|
|
@@ -288,18 +322,18 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
288
322
|
window = signal.get_window(window_type, segment_length_n)
|
|
289
323
|
|
|
290
324
|
f, t, S1 = signal.stft(
|
|
291
|
-
x=data,
|
|
292
|
-
fs=sampling_frequency,
|
|
293
|
-
window=window,
|
|
294
|
-
nperseg=segment_length_n,
|
|
325
|
+
x=data,
|
|
326
|
+
fs=sampling_frequency,
|
|
327
|
+
window=window,
|
|
328
|
+
nperseg=segment_length_n,
|
|
295
329
|
noverlap=overlap_n,
|
|
296
330
|
boundary=None,
|
|
297
|
-
axis=1
|
|
331
|
+
axis=1,
|
|
298
332
|
)
|
|
299
333
|
|
|
300
334
|
# Compute total power in the PSD and the total spectrogram (summed over the three axes)
|
|
301
335
|
total_psd = compute_total_power(psd)
|
|
302
|
-
total_spectrogram = np.sum(np.abs(S1)*sampling_frequency, axis=2)
|
|
336
|
+
total_spectrogram = np.sum(np.abs(S1) * sampling_frequency, axis=2)
|
|
303
337
|
|
|
304
338
|
# Compute the MFCC's
|
|
305
339
|
config.mfcc_low_frequency = config.fmin_mfcc
|
|
@@ -310,21 +344,31 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
|
|
|
310
344
|
mfccs = compute_mfccs(
|
|
311
345
|
total_power_array=total_spectrogram,
|
|
312
346
|
config=config,
|
|
313
|
-
total_power_type=
|
|
314
|
-
rounding_method=
|
|
315
|
-
multiplication_factor=1
|
|
347
|
+
total_power_type="spectrogram",
|
|
348
|
+
rounding_method="round",
|
|
349
|
+
multiplication_factor=1,
|
|
316
350
|
)
|
|
317
351
|
|
|
318
352
|
# Combine the MFCCs into the features DataFrame
|
|
319
|
-
mfcc_colnames = [f
|
|
353
|
+
mfcc_colnames = [f"mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)]
|
|
320
354
|
for i, colname in enumerate(mfcc_colnames):
|
|
321
355
|
feature_dict[colname] = mfccs[:, i]
|
|
322
356
|
|
|
323
357
|
# Compute the frequency of the peak, non-tremor power and tremor power
|
|
324
|
-
feature_dict[
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
358
|
+
feature_dict["freq_peak"] = extract_frequency_peak(
|
|
359
|
+
freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search
|
|
360
|
+
)
|
|
361
|
+
feature_dict["below_tremor_power"] = compute_power_in_bandwidth(
|
|
362
|
+
freqs,
|
|
363
|
+
total_psd,
|
|
364
|
+
config.fmin_below_rest_tremor,
|
|
365
|
+
config.fmax_below_rest_tremor,
|
|
366
|
+
include_max=False,
|
|
367
|
+
spectral_resolution=config.spectral_resolution,
|
|
368
|
+
cumulative_sum_method="sum",
|
|
369
|
+
)
|
|
370
|
+
feature_dict["tremor_power"] = extract_tremor_power(
|
|
371
|
+
freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor
|
|
372
|
+
)
|
|
329
373
|
|
|
330
|
-
return pd.DataFrame(feature_dict)
|
|
374
|
+
return pd.DataFrame(feature_dict)
|