paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,26 @@
1
- import pandas as pd
2
- import numpy as np
3
1
  from pathlib import Path
2
+
3
+ import numpy as np
4
+ import pandas as pd
4
5
  from scipy import signal
5
- from scipy.stats import gaussian_kde
6
6
 
7
7
  from paradigma.classification import ClassifierPackage
8
- from paradigma.constants import DataColumns
9
8
  from paradigma.config import TremorConfig
10
- from paradigma.feature_extraction import compute_mfccs, compute_power_in_bandwidth, compute_total_power, extract_frequency_peak, \
11
- extract_tremor_power
12
- from paradigma.segmenting import tabulate_windows, WindowedDataExtractor
9
+ from paradigma.constants import DataColumns
10
+ from paradigma.feature_extraction import (
11
+ compute_mfccs,
12
+ compute_power_in_bandwidth,
13
+ compute_total_power,
14
+ extract_frequency_peak,
15
+ extract_tremor_power,
16
+ )
17
+ from paradigma.segmenting import WindowedDataExtractor, tabulate_windows
13
18
  from paradigma.util import aggregate_parameter
14
19
 
15
20
 
16
21
  def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFrame:
17
22
  """
18
- This function groups sequences of timestamps into windows and subsequently extracts
23
+ This function groups sequences of timestamps into windows and subsequently extracts
19
24
  tremor features from windowed gyroscope data.
20
25
 
21
26
  Parameters
@@ -32,7 +37,7 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
32
37
  -------
33
38
  pd.DataFrame
34
39
  A DataFrame containing extracted tremor features and a column corresponding to time.
35
-
40
+
36
41
  Notes
37
42
  -----
38
43
  - This function groups the data into windows based on timestamps.
@@ -44,21 +49,27 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
44
49
  If the input DataFrame does not contain the required columns as specified in the configuration or if any step in the feature extraction fails.
45
50
  """
46
51
  # group sequences of timestamps into windows
47
- windowed_cols = [DataColumns.TIME] + config.gyroscope_cols
48
- windowed_data = tabulate_windows(df, windowed_cols, config.window_length_s, config.window_step_length_s, config.sampling_frequency)
52
+ windowed_colnames = [config.time_colname] + config.gyroscope_colnames
53
+ windowed_data = tabulate_windows(
54
+ df,
55
+ windowed_colnames,
56
+ config.window_length_s,
57
+ config.window_step_length_s,
58
+ config.sampling_frequency,
59
+ )
49
60
 
50
- extractor = WindowedDataExtractor(windowed_cols)
61
+ extractor = WindowedDataExtractor(windowed_colnames)
51
62
 
52
63
  # Extract the start time and gyroscope data from the windowed data
53
- idx_time = extractor.get_index(DataColumns.TIME)
54
- idx_gyro = extractor.get_slice(config.gyroscope_cols)
64
+ idx_time = extractor.get_index(config.time_colname)
65
+ idx_gyro = extractor.get_slice(config.gyroscope_colnames)
55
66
 
56
67
  # Extract data
57
68
  start_time = np.min(windowed_data[:, :, idx_time], axis=1)
58
69
  windowed_gyro = windowed_data[:, :, idx_gyro]
59
70
 
60
- df_features = pd.DataFrame(start_time, columns=[DataColumns.TIME])
61
-
71
+ df_features = pd.DataFrame(start_time, columns=[config.time_colname])
72
+
62
73
  # transform the signals from the temporal domain to the spectral domain and extract tremor features
63
74
  df_spectral_features = extract_spectral_domain_features(windowed_gyro, config)
64
75
 
@@ -68,7 +79,9 @@ def extract_tremor_features(df: pd.DataFrame, config: TremorConfig) -> pd.DataFr
68
79
  return df_features
69
80
 
70
81
 
71
- def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path) -> pd.DataFrame:
82
+ def detect_tremor(
83
+ df: pd.DataFrame, config: TremorConfig, full_path_to_classifier_package: str | Path
84
+ ) -> pd.DataFrame:
72
85
  """
73
86
  Detects tremor in the input DataFrame using a pre-trained classifier and applies a threshold to the predicted probabilities.
74
87
 
@@ -130,26 +143,39 @@ def detect_tremor(df: pd.DataFrame, config: TremorConfig, full_path_to_classifie
130
143
  X = df.loc[:, feature_names_predictions].copy()
131
144
  X.loc[:, feature_names_scaling] = scaled_features
132
145
 
133
- # Get the tremor probability
146
+ # Get the tremor probability
134
147
  df[DataColumns.PRED_TREMOR_PROBA] = clf_package.predict_proba(X)
135
148
 
136
149
  # Make prediction based on pre-defined threshold
137
- df[DataColumns.PRED_TREMOR_LOGREG] = (df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold).astype(int)
150
+ df[DataColumns.PRED_TREMOR_LOGREG] = (
151
+ df[DataColumns.PRED_TREMOR_PROBA] >= clf_package.threshold
152
+ ).astype(int)
153
+
154
+ # Perform extra checks for rest tremor
155
+ peak_check = (df["freq_peak"] >= config.fmin_rest_tremor) & (
156
+ df["freq_peak"] <= config.fmax_rest_tremor
157
+ ) # peak within 3-7 Hz
158
+ df[DataColumns.PRED_ARM_AT_REST] = (
159
+ df["below_tremor_power"] <= config.movement_threshold
160
+ ).astype(
161
+ int
162
+ ) # arm at rest or in stable posture
163
+ df[DataColumns.PRED_TREMOR_CHECKED] = (
164
+ (df[DataColumns.PRED_TREMOR_LOGREG] == 1)
165
+ & peak_check
166
+ & df[DataColumns.PRED_ARM_AT_REST]
167
+ ).astype(int)
138
168
 
139
- # Perform extra checks for rest tremor
140
- peak_check = (df['freq_peak'] >= config.fmin_rest_tremor) & (df['freq_peak']<=config.fmax_rest_tremor) # peak within 3-7 Hz
141
- df[DataColumns.PRED_ARM_AT_REST] = (df['below_tremor_power'] <= config.movement_threshold).astype(int) # arm at rest or in stable posture
142
- df[DataColumns.PRED_TREMOR_CHECKED] = ((df[DataColumns.PRED_TREMOR_LOGREG]==1) & (peak_check==True) & (df[DataColumns.PRED_ARM_AT_REST] == True)).astype(int)
143
-
144
169
  return df
145
170
 
171
+
146
172
  def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
147
173
  """
148
174
  Quantifies the amount of tremor time and tremor power, aggregated over all windows in the input dataframe.
149
- Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
175
+ Tremor time is calculated as the number of the detected tremor windows, as percentage of the number of windows
150
176
  without significant non-tremor movement (at rest). For tremor power the following aggregates are derived:
151
- the median, mode and percentile of tremor power specified in the configuration object.
152
-
177
+ the median, mode and percentile of tremor power specified in the configuration object.
178
+
153
179
  Parameters
154
180
  ----------
155
181
  df : pd.DataFrame
@@ -170,61 +196,69 @@ def aggregate_tremor(df: pd.DataFrame, config: TremorConfig):
170
196
  - Tremor power is converted to log scale, after adding a constant of 1, so that zero tremor power
171
197
  corresponds to a value of 0 in log scale.
172
198
  - The modal tremor power is computed based on gaussian kernel density estimation.
173
-
199
+
174
200
  """
175
- nr_valid_days = df['time_dt'].dt.date.unique().size # number of valid days in the input dataframe
176
- nr_windows_total = df.shape[0] # number of windows in the input dataframe
201
+ nr_valid_days = (
202
+ df["time_dt"].dt.date.unique().size
203
+ ) # number of valid days in the input dataframe
204
+ nr_windows_total = df.shape[0] # number of windows in the input dataframe
177
205
 
178
206
  # remove windows with detected non-tremor arm movements to control for the amount of arm activities performed
179
207
  df_filtered = df.loc[df.pred_arm_at_rest == 1]
180
- nr_windows_rest = df_filtered.shape[0] # number of windows without non-tremor arm movement
208
+ nr_windows_rest = df_filtered.shape[
209
+ 0
210
+ ] # number of windows without non-tremor arm movement
181
211
 
182
- if nr_windows_rest == 0: # if no windows without non-tremor arm movement are detected
183
- raise Warning('No windows without non-tremor arm movement are detected.')
212
+ if (
213
+ nr_windows_rest == 0
214
+ ): # if no windows without non-tremor arm movement are detected
215
+ raise Warning("No windows without non-tremor arm movement are detected.")
184
216
 
185
217
  # calculate tremor time
186
- n_windows_tremor = np.sum(df_filtered['pred_tremor_checked'])
187
- perc_windows_tremor = n_windows_tremor / nr_windows_rest * 100 # as percentage of total measured time without non-tremor arm movement
218
+ n_windows_tremor = np.sum(df_filtered["pred_tremor_checked"])
219
+ perc_windows_tremor = (
220
+ n_windows_tremor / nr_windows_rest * 100
221
+ ) # as percentage of total measured time without non-tremor arm movement
188
222
 
189
- aggregated_tremor_power = {} # initialize dictionary to store aggregated tremor power measures
190
-
191
- if n_windows_tremor == 0: # if no tremor is detected, the tremor power measures are set to NaN
223
+ aggregated_tremor_power = (
224
+ {}
225
+ ) # initialize dictionary to store aggregated tremor power measures
192
226
 
193
- aggregated_tremor_power['median_tremor_power'] = np.nan
194
- aggregated_tremor_power['modal_tremor_power'] = np.nan
195
- aggregated_tremor_power['90p_tremor_power'] = np.nan
227
+ if (
228
+ n_windows_tremor == 0
229
+ ): # if no tremor is detected, the tremor power measures are set to NaN
230
+
231
+ aggregated_tremor_power["median_tremor_power"] = np.nan
232
+ aggregated_tremor_power["mode_binned_tremor_power"] = np.nan
233
+ aggregated_tremor_power["90p_tremor_power"] = np.nan
196
234
 
197
235
  else:
198
-
236
+
199
237
  # calculate aggregated tremor power measures
200
- tremor_power = df_filtered.loc[df_filtered['pred_tremor_checked'] == 1, 'tremor_power']
201
- tremor_power = np.log10(tremor_power+1) # convert to log scale
202
-
238
+ tremor_power = df_filtered.loc[
239
+ df_filtered["pred_tremor_checked"] == 1, "tremor_power"
240
+ ]
241
+ tremor_power = np.log10(tremor_power + 1) # convert to log scale
242
+
203
243
  for aggregate in config.aggregates_tremor_power:
204
244
  aggregate_name = f"{aggregate}_tremor_power"
205
- if aggregate == 'mode':
206
- # calculate modal tremor power
207
- bin_edges = np.linspace(0, 6, 301)
208
- kde = gaussian_kde(tremor_power)
209
- kde_values = kde(bin_edges)
210
- max_index = np.argmax(kde_values)
211
- aggregated_tremor_power['modal_tremor_power'] = bin_edges[max_index]
212
- else: # calculate te other aggregates (e.g. median and 90th percentile) of tremor power
213
- aggregated_tremor_power[aggregate_name] = aggregate_parameter(tremor_power, aggregate)
214
-
245
+ aggregated_tremor_power[aggregate_name] = aggregate_parameter(
246
+ tremor_power, aggregate, config.evaluation_points_tremor_power
247
+ )
248
+
215
249
  # store aggregates in json format
216
250
  d_aggregates = {
217
- 'metadata': {
218
- 'nr_valid_days': nr_valid_days,
219
- 'nr_windows_total': nr_windows_total,
220
- 'nr_windows_rest': nr_windows_rest
251
+ "metadata": {
252
+ "nr_valid_days": nr_valid_days,
253
+ "nr_windows_total": nr_windows_total,
254
+ "nr_windows_rest": nr_windows_rest,
255
+ },
256
+ "aggregated_tremor_measures": {
257
+ "perc_windows_tremor": perc_windows_tremor,
258
+ "median_tremor_power": aggregated_tremor_power["median_tremor_power"],
259
+ "modal_tremor_power": aggregated_tremor_power["mode_binned_tremor_power"],
260
+ "90p_tremor_power": aggregated_tremor_power["90p_tremor_power"],
221
261
  },
222
- 'aggregated_tremor_measures': {
223
- 'perc_windows_tremor': perc_windows_tremor,
224
- 'median_tremor_power': aggregated_tremor_power['median_tremor_power'],
225
- 'modal_tremor_power': aggregated_tremor_power['modal_tremor_power'],
226
- '90p_tremor_power': aggregated_tremor_power['90p_tremor_power']
227
- }
228
262
  }
229
263
 
230
264
  return d_aggregates
@@ -234,7 +268,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
234
268
  """
235
269
  Compute spectral domain features from the gyroscope data.
236
270
 
237
- This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
271
+ This function computes Mel-frequency cepstral coefficients (MFCCs), the frequency of the peak,
238
272
  the tremor power, and the below tremor power based on the total power spectral density of the windowed gyroscope data.
239
273
 
240
274
  Parameters
@@ -242,15 +276,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
242
276
  data : numpy.ndarray
243
277
  A 2D numpy array where each row corresponds to a window of gyroscope data.
244
278
  config : object
245
- Configuration object containing settings such as sampling frequency, window type,
279
+ Configuration object containing settings such as sampling frequency, window type,
246
280
  and MFCC parameters.
247
-
281
+
248
282
  Returns
249
283
  -------
250
284
  pd.DataFrame
251
- The feature dataframe containing the extracted spectral features, including
285
+ The feature dataframe containing the extracted spectral features, including
252
286
  MFCCs, the frequency of the peak, the tremor power and below tremor power for each window.
253
-
287
+
254
288
  """
255
289
 
256
290
  # Initialize a dictionary to hold the results
@@ -262,7 +296,7 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
262
296
  segment_length_spectrogram_s = config.segment_length_spectrogram_s
263
297
  overlap_fraction = config.overlap_fraction
264
298
  spectral_resolution = config.spectral_resolution
265
- window_type = 'hann'
299
+ window_type = "hann"
266
300
 
267
301
  # Compute the power spectral density
268
302
  segment_length_n = sampling_frequency * segment_length_psd_s
@@ -271,15 +305,15 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
271
305
  nfft = sampling_frequency / spectral_resolution
272
306
 
273
307
  freqs, psd = signal.welch(
274
- x=data,
275
- fs=sampling_frequency,
276
- window=window,
308
+ x=data,
309
+ fs=sampling_frequency,
310
+ window=window,
277
311
  nperseg=segment_length_n,
278
- noverlap=overlap_n,
279
- nfft=nfft,
280
- detrend=False,
281
- scaling='density',
282
- axis=1
312
+ noverlap=overlap_n,
313
+ nfft=nfft,
314
+ detrend=False,
315
+ scaling="density",
316
+ axis=1,
283
317
  )
284
318
 
285
319
  # Compute the spectrogram
@@ -288,18 +322,18 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
288
322
  window = signal.get_window(window_type, segment_length_n)
289
323
 
290
324
  f, t, S1 = signal.stft(
291
- x=data,
292
- fs=sampling_frequency,
293
- window=window,
294
- nperseg=segment_length_n,
325
+ x=data,
326
+ fs=sampling_frequency,
327
+ window=window,
328
+ nperseg=segment_length_n,
295
329
  noverlap=overlap_n,
296
330
  boundary=None,
297
- axis=1
331
+ axis=1,
298
332
  )
299
333
 
300
334
  # Compute total power in the PSD and the total spectrogram (summed over the three axes)
301
335
  total_psd = compute_total_power(psd)
302
- total_spectrogram = np.sum(np.abs(S1)*sampling_frequency, axis=2)
336
+ total_spectrogram = np.sum(np.abs(S1) * sampling_frequency, axis=2)
303
337
 
304
338
  # Compute the MFCC's
305
339
  config.mfcc_low_frequency = config.fmin_mfcc
@@ -310,21 +344,31 @@ def extract_spectral_domain_features(data: np.ndarray, config) -> pd.DataFrame:
310
344
  mfccs = compute_mfccs(
311
345
  total_power_array=total_spectrogram,
312
346
  config=config,
313
- total_power_type='spectrogram',
314
- rounding_method='round',
315
- multiplication_factor=1
347
+ total_power_type="spectrogram",
348
+ rounding_method="round",
349
+ multiplication_factor=1,
316
350
  )
317
351
 
318
352
  # Combine the MFCCs into the features DataFrame
319
- mfcc_colnames = [f'mfcc_{x}' for x in range(1, config.mfcc_n_coefficients + 1)]
353
+ mfcc_colnames = [f"mfcc_{x}" for x in range(1, config.mfcc_n_coefficients + 1)]
320
354
  for i, colname in enumerate(mfcc_colnames):
321
355
  feature_dict[colname] = mfccs[:, i]
322
356
 
323
357
  # Compute the frequency of the peak, non-tremor power and tremor power
324
- feature_dict['freq_peak'] = extract_frequency_peak(freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search)
325
- feature_dict['below_tremor_power'] = compute_power_in_bandwidth(freqs, total_psd, config.fmin_below_rest_tremor, config.fmax_below_rest_tremor,
326
- include_max=False, spectral_resolution=config.spectral_resolution,
327
- cumulative_sum_method='sum')
328
- feature_dict['tremor_power'] = extract_tremor_power(freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor)
358
+ feature_dict["freq_peak"] = extract_frequency_peak(
359
+ freqs, total_psd, config.fmin_peak_search, config.fmax_peak_search
360
+ )
361
+ feature_dict["below_tremor_power"] = compute_power_in_bandwidth(
362
+ freqs,
363
+ total_psd,
364
+ config.fmin_below_rest_tremor,
365
+ config.fmax_below_rest_tremor,
366
+ include_max=False,
367
+ spectral_resolution=config.spectral_resolution,
368
+ cumulative_sum_method="sum",
369
+ )
370
+ feature_dict["tremor_power"] = extract_tremor_power(
371
+ freqs, total_psd, config.fmin_rest_tremor, config.fmax_rest_tremor
372
+ )
329
373
 
330
- return pd.DataFrame(feature_dict)
374
+ return pd.DataFrame(feature_dict)