paradigma 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +157 -102
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +232 -184
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +138 -85
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +158 -83
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/METADATA +31 -29
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/RECORD +0 -22
paradigma/util.py
CHANGED
|
@@ -1,17 +1,44 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import os
|
|
3
|
+
import warnings
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import List, Optional, Tuple
|
|
6
|
+
|
|
2
7
|
import numpy as np
|
|
3
8
|
import pandas as pd
|
|
4
|
-
|
|
9
|
+
import tsdf
|
|
5
10
|
from dateutil import parser
|
|
6
|
-
from typing import List, Tuple, Optional
|
|
7
11
|
from scipy.stats import gaussian_kde
|
|
8
|
-
|
|
9
|
-
import tsdf
|
|
10
12
|
from tsdf import TSDFMetadata
|
|
11
13
|
|
|
12
14
|
from paradigma.constants import DataColumns, TimeUnit
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
def deprecated(reason: str = ""):
|
|
18
|
+
"""
|
|
19
|
+
Decorator to mark functions as deprecated. It will show a warning when the function is used.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
reason : str, optional
|
|
24
|
+
Additional message to explain why it is deprecated and what to use instead.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def decorator(func):
|
|
28
|
+
message = f"Function {func.__name__} is deprecated."
|
|
29
|
+
if reason:
|
|
30
|
+
message += f" {reason}"
|
|
31
|
+
|
|
32
|
+
@functools.wraps(func)
|
|
33
|
+
def wrapper(*args, **kwargs):
|
|
34
|
+
warnings.warn(message, category=DeprecationWarning, stacklevel=2)
|
|
35
|
+
return func(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
return wrapper
|
|
38
|
+
|
|
39
|
+
return decorator
|
|
40
|
+
|
|
41
|
+
|
|
15
42
|
def parse_iso8601_to_datetime(date_str):
|
|
16
43
|
return parser.parse(date_str)
|
|
17
44
|
|
|
@@ -28,7 +55,7 @@ def get_end_iso8601(start_iso8601, window_length_seconds):
|
|
|
28
55
|
|
|
29
56
|
def write_np_data(
|
|
30
57
|
metadata_time: TSDFMetadata,
|
|
31
|
-
np_array_time: np.ndarray,
|
|
58
|
+
np_array_time: np.ndarray,
|
|
32
59
|
metadata_values: TSDFMetadata,
|
|
33
60
|
np_array_values: np.ndarray,
|
|
34
61
|
output_path: str,
|
|
@@ -53,7 +80,7 @@ def write_np_data(
|
|
|
53
80
|
The filename for the metadata.
|
|
54
81
|
|
|
55
82
|
"""
|
|
56
|
-
|
|
83
|
+
|
|
57
84
|
if not os.path.exists(output_path):
|
|
58
85
|
os.makedirs(output_path)
|
|
59
86
|
|
|
@@ -62,9 +89,19 @@ def write_np_data(
|
|
|
62
89
|
metadata_values.file_dir_path = output_path
|
|
63
90
|
|
|
64
91
|
# store binaries and metadata
|
|
65
|
-
time_tsdf = tsdf.write_binary_file(
|
|
92
|
+
time_tsdf = tsdf.write_binary_file(
|
|
93
|
+
file_dir=output_path,
|
|
94
|
+
file_name=metadata_time.file_name,
|
|
95
|
+
data=np_array_time,
|
|
96
|
+
metadata=metadata_time.get_plain_tsdf_dict_copy(),
|
|
97
|
+
)
|
|
66
98
|
|
|
67
|
-
samples_tsdf = tsdf.write_binary_file(
|
|
99
|
+
samples_tsdf = tsdf.write_binary_file(
|
|
100
|
+
file_dir=output_path,
|
|
101
|
+
file_name=metadata_values.file_name,
|
|
102
|
+
data=np_array_values,
|
|
103
|
+
metadata=metadata_values.get_plain_tsdf_dict_copy(),
|
|
104
|
+
)
|
|
68
105
|
|
|
69
106
|
tsdf.write_metadata([time_tsdf, samples_tsdf], output_filename)
|
|
70
107
|
|
|
@@ -127,13 +164,23 @@ def read_metadata(
|
|
|
127
164
|
return metadata_time, metadata_values
|
|
128
165
|
|
|
129
166
|
|
|
130
|
-
def load_tsdf_dataframe(
|
|
167
|
+
def load_tsdf_dataframe(
|
|
168
|
+
path_to_data,
|
|
169
|
+
prefix,
|
|
170
|
+
meta_suffix="meta.json",
|
|
171
|
+
time_suffix="time.bin",
|
|
172
|
+
values_suffix="values.bin",
|
|
173
|
+
):
|
|
131
174
|
meta_filename = f"{prefix}_{meta_suffix}"
|
|
132
175
|
time_filename = f"{prefix}_{time_suffix}"
|
|
133
176
|
values_filename = f"{prefix}_{values_suffix}"
|
|
134
177
|
|
|
135
|
-
metadata_time, metadata_values = read_metadata(
|
|
136
|
-
|
|
178
|
+
metadata_time, metadata_values = read_metadata(
|
|
179
|
+
path_to_data, meta_filename, time_filename, values_filename
|
|
180
|
+
)
|
|
181
|
+
df = tsdf.load_dataframe_from_binaries(
|
|
182
|
+
[metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns
|
|
183
|
+
)
|
|
137
184
|
|
|
138
185
|
return df, metadata_time, metadata_values
|
|
139
186
|
|
|
@@ -152,11 +199,9 @@ def load_metadata_list(
|
|
|
152
199
|
The filename of the metadata file.
|
|
153
200
|
filenames : List[str]
|
|
154
201
|
The list of binary files of which the metadata files need to be loaded
|
|
155
|
-
|
|
156
|
-
"""
|
|
157
|
-
metadata_dict = tsdf.load_metadata_from_path(
|
|
158
|
-
os.path.join(dir_path, meta_filename)
|
|
159
|
-
)
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
metadata_dict = tsdf.load_metadata_from_path(os.path.join(dir_path, meta_filename))
|
|
160
205
|
metadata_list = []
|
|
161
206
|
for filename in filenames:
|
|
162
207
|
metadata_list.append(metadata_dict[filename])
|
|
@@ -195,37 +240,54 @@ def transform_time_array(
|
|
|
195
240
|
- The transformation allows for scaling of the time array, converting between time unit types (e.g., relative, absolute, or difference).
|
|
196
241
|
- When converting to `TimeUnit.RELATIVE_MS`, the function calculates the relative time starting from the provided or default start time.
|
|
197
242
|
"""
|
|
198
|
-
input_units = input_unit_type.split(
|
|
199
|
-
output_units = output_unit_type.split(
|
|
243
|
+
input_units = input_unit_type.split("_")[-1].lower()
|
|
244
|
+
output_units = output_unit_type.split("_")[-1].lower()
|
|
200
245
|
|
|
201
246
|
if input_units == output_units:
|
|
202
247
|
scale_factor = 1
|
|
203
|
-
elif input_units ==
|
|
248
|
+
elif input_units == "s" and output_units == "ms":
|
|
204
249
|
scale_factor = 1e3
|
|
205
|
-
elif input_units ==
|
|
250
|
+
elif input_units == "ms" and output_units == "s":
|
|
206
251
|
scale_factor = 1 / 1e3
|
|
207
252
|
else:
|
|
208
|
-
raise ValueError(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
#
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Unsupported time units conversion: {input_units} to {output_units}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Transform to relative time (`TimeUnit.RELATIVE_MS`)
|
|
258
|
+
if (
|
|
259
|
+
input_unit_type == TimeUnit.DIFFERENCE_MS
|
|
260
|
+
or input_unit_type == TimeUnit.DIFFERENCE_S
|
|
261
|
+
):
|
|
262
|
+
# Convert a series of differences into cumulative sum to reconstruct original time series.
|
|
213
263
|
time_array = np.cumsum(np.double(time_array))
|
|
214
|
-
elif
|
|
264
|
+
elif (
|
|
265
|
+
input_unit_type == TimeUnit.ABSOLUTE_MS
|
|
266
|
+
or input_unit_type == TimeUnit.ABSOLUTE_S
|
|
267
|
+
):
|
|
215
268
|
# Set the start time if not provided.
|
|
216
269
|
if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
|
|
217
270
|
start_time = time_array[0]
|
|
218
271
|
# Convert absolute time stamps into a time series relative to start_time.
|
|
219
|
-
time_array =
|
|
272
|
+
time_array = time_array - start_time
|
|
220
273
|
|
|
221
274
|
# Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
|
|
222
|
-
if
|
|
275
|
+
if (
|
|
276
|
+
output_unit_type == TimeUnit.ABSOLUTE_MS
|
|
277
|
+
or output_unit_type == TimeUnit.ABSOLUTE_S
|
|
278
|
+
):
|
|
223
279
|
# Converts time array to absolute time by adding the start time to each element.
|
|
224
280
|
time_array = time_array + start_time
|
|
225
|
-
elif
|
|
281
|
+
elif (
|
|
282
|
+
output_unit_type == TimeUnit.DIFFERENCE_MS
|
|
283
|
+
or output_unit_type == TimeUnit.DIFFERENCE_S
|
|
284
|
+
):
|
|
226
285
|
# Creates a new array starting with 0, followed by the differences between consecutive elements.
|
|
227
286
|
time_array = np.diff(np.insert(time_array, 0, start_time))
|
|
228
|
-
elif
|
|
287
|
+
elif (
|
|
288
|
+
output_unit_type == TimeUnit.RELATIVE_MS
|
|
289
|
+
or output_unit_type == TimeUnit.RELATIVE_S
|
|
290
|
+
):
|
|
229
291
|
# The array is already in relative format, do nothing.
|
|
230
292
|
pass
|
|
231
293
|
|
|
@@ -256,25 +318,25 @@ def convert_units_accelerometer(data: np.ndarray, units: str) -> np.ndarray:
|
|
|
256
318
|
return data
|
|
257
319
|
else:
|
|
258
320
|
raise ValueError(f"Unsupported unit: {units}")
|
|
259
|
-
|
|
321
|
+
|
|
260
322
|
|
|
261
323
|
def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
|
|
262
324
|
"""
|
|
263
325
|
Convert gyroscope data to deg/s.
|
|
264
|
-
|
|
326
|
+
|
|
265
327
|
Parameters
|
|
266
328
|
----------
|
|
267
329
|
data : np.ndarray
|
|
268
330
|
The gyroscope data.
|
|
269
|
-
|
|
331
|
+
|
|
270
332
|
units : str
|
|
271
333
|
The unit of the data (currently supports deg/s and rad/s).
|
|
272
|
-
|
|
334
|
+
|
|
273
335
|
Returns
|
|
274
336
|
-------
|
|
275
337
|
np.ndarray
|
|
276
338
|
The gyroscope data in deg/s.
|
|
277
|
-
|
|
339
|
+
|
|
278
340
|
"""
|
|
279
341
|
if units == "deg/s":
|
|
280
342
|
return data
|
|
@@ -282,9 +344,9 @@ def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
|
|
|
282
344
|
return np.degrees(data)
|
|
283
345
|
else:
|
|
284
346
|
raise ValueError(f"Unsupported unit: {units}")
|
|
285
|
-
|
|
286
347
|
|
|
287
|
-
|
|
348
|
+
|
|
349
|
+
def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
|
|
288
350
|
"""
|
|
289
351
|
Invert the data based on the watch side.
|
|
290
352
|
|
|
@@ -305,32 +367,37 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
|
|
|
305
367
|
"""
|
|
306
368
|
if side not in ["left", "right"]:
|
|
307
369
|
raise ValueError(f"Unsupported side: {side}")
|
|
308
|
-
if sensor not in [
|
|
370
|
+
if sensor not in ["accelerometer", "gyroscope", "both"]:
|
|
309
371
|
raise ValueError(f"Unsupported sensor: {sensor}")
|
|
310
372
|
|
|
311
373
|
elif side == "right":
|
|
312
|
-
if sensor in [
|
|
374
|
+
if sensor in ["gyroscope", "both"]:
|
|
313
375
|
df[DataColumns.GYROSCOPE_Y] *= -1
|
|
314
376
|
df[DataColumns.GYROSCOPE_Z] *= -1
|
|
315
|
-
if sensor in [
|
|
377
|
+
if sensor in ["accelerometer", "both"]:
|
|
316
378
|
df[DataColumns.ACCELEROMETER_X] *= -1
|
|
317
379
|
|
|
318
380
|
return df
|
|
319
381
|
|
|
320
|
-
|
|
382
|
+
|
|
383
|
+
def aggregate_parameter(
|
|
384
|
+
parameter: np.ndarray,
|
|
385
|
+
aggregate: str,
|
|
386
|
+
evaluation_points: Optional[np.ndarray] = None,
|
|
387
|
+
) -> np.ndarray | int:
|
|
321
388
|
"""
|
|
322
389
|
Aggregate a parameter based on the specified method.
|
|
323
|
-
|
|
390
|
+
|
|
324
391
|
Parameters
|
|
325
392
|
----------
|
|
326
393
|
parameter : np.ndarray
|
|
327
394
|
The parameter to aggregate.
|
|
328
|
-
|
|
395
|
+
|
|
329
396
|
aggregate : str
|
|
330
397
|
The aggregation method to apply.
|
|
331
398
|
|
|
332
399
|
evaluation_points : np.ndarray, optional
|
|
333
|
-
Should be specified if the mode is derived for a continuous parameter.
|
|
400
|
+
Should be specified if the mode is derived for a continuous parameter.
|
|
334
401
|
Defines the evaluation points for the kernel density estimation function, from which the maximum is derived as the mode.
|
|
335
402
|
|
|
336
403
|
Returns
|
|
@@ -338,42 +405,45 @@ def aggregate_parameter(parameter: np.ndarray, aggregate: str, evaluation_points
|
|
|
338
405
|
np.ndarray
|
|
339
406
|
The aggregated parameter.
|
|
340
407
|
"""
|
|
341
|
-
if aggregate ==
|
|
408
|
+
if aggregate == "mean":
|
|
342
409
|
return np.mean(parameter)
|
|
343
|
-
elif aggregate ==
|
|
410
|
+
elif aggregate == "median":
|
|
344
411
|
return np.median(parameter)
|
|
345
|
-
elif aggregate ==
|
|
412
|
+
elif aggregate == "mode_binned":
|
|
346
413
|
if evaluation_points is None:
|
|
347
|
-
raise ValueError(
|
|
414
|
+
raise ValueError(
|
|
415
|
+
"evaluation_points must be provided for 'mode_binned' aggregation."
|
|
416
|
+
)
|
|
348
417
|
else:
|
|
349
418
|
kde = gaussian_kde(parameter)
|
|
350
419
|
kde_values = kde(evaluation_points)
|
|
351
420
|
max_index = np.argmax(kde_values)
|
|
352
421
|
return evaluation_points[max_index]
|
|
353
|
-
elif aggregate ==
|
|
422
|
+
elif aggregate == "mode":
|
|
354
423
|
unique_values, counts = np.unique(parameter, return_counts=True)
|
|
355
424
|
return unique_values[np.argmax(counts)]
|
|
356
|
-
elif aggregate ==
|
|
425
|
+
elif aggregate == "90p":
|
|
357
426
|
return np.percentile(parameter, 90)
|
|
358
|
-
elif aggregate ==
|
|
427
|
+
elif aggregate == "95p":
|
|
359
428
|
return np.percentile(parameter, 95)
|
|
360
|
-
elif aggregate ==
|
|
429
|
+
elif aggregate == "99p":
|
|
361
430
|
return np.percentile(parameter, 99)
|
|
362
|
-
elif aggregate ==
|
|
431
|
+
elif aggregate == "std":
|
|
363
432
|
return np.std(parameter)
|
|
364
|
-
elif aggregate ==
|
|
433
|
+
elif aggregate == "cov":
|
|
365
434
|
mean_value = np.mean(parameter)
|
|
366
435
|
return np.std(parameter) / mean_value if mean_value != 0 else 0
|
|
367
436
|
else:
|
|
368
437
|
raise ValueError(f"Invalid aggregation method: {aggregate}")
|
|
369
438
|
|
|
439
|
+
|
|
370
440
|
def merge_predictions_with_timestamps(
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
441
|
+
df_ts: pd.DataFrame,
|
|
442
|
+
df_predictions: pd.DataFrame,
|
|
443
|
+
pred_proba_colname: str,
|
|
444
|
+
window_length_s: float,
|
|
445
|
+
fs: int,
|
|
446
|
+
) -> pd.DataFrame:
|
|
377
447
|
"""
|
|
378
448
|
Merges prediction probabilities with timestamps by expanding overlapping windows
|
|
379
449
|
into individual timestamps and averaging probabilities per unique timestamp.
|
|
@@ -398,7 +468,7 @@ def merge_predictions_with_timestamps(
|
|
|
398
468
|
|
|
399
469
|
fs : int
|
|
400
470
|
The sampling frequency of the data.
|
|
401
|
-
|
|
471
|
+
|
|
402
472
|
Returns:
|
|
403
473
|
-------
|
|
404
474
|
pd.DataFrame
|
|
@@ -419,22 +489,18 @@ def merge_predictions_with_timestamps(
|
|
|
419
489
|
# Step 1: Generate all timestamps for prediction windows using NumPy broadcasting
|
|
420
490
|
window_length = int(window_length_s * fs)
|
|
421
491
|
timestamps = (
|
|
422
|
-
df_predictions[DataColumns.TIME].values[:, None]
|
|
423
|
-
np.arange(0, window_length) / fs
|
|
492
|
+
df_predictions[DataColumns.TIME].values[:, None]
|
|
493
|
+
+ np.arange(0, window_length) / fs
|
|
424
494
|
)
|
|
425
|
-
|
|
495
|
+
|
|
426
496
|
# Flatten timestamps and probabilities into a single array for efficient processing
|
|
427
497
|
flat_timestamps = timestamps.ravel()
|
|
428
|
-
flat_proba = np.repeat(
|
|
429
|
-
df_predictions[pred_proba_colname].values,
|
|
430
|
-
window_length
|
|
431
|
-
)
|
|
498
|
+
flat_proba = np.repeat(df_predictions[pred_proba_colname].values, window_length)
|
|
432
499
|
|
|
433
500
|
# Step 2: Create a DataFrame for expanded data
|
|
434
|
-
expanded_df = pd.DataFrame(
|
|
435
|
-
DataColumns.TIME: flat_timestamps,
|
|
436
|
-
|
|
437
|
-
})
|
|
501
|
+
expanded_df = pd.DataFrame(
|
|
502
|
+
{DataColumns.TIME: flat_timestamps, pred_proba_colname: flat_proba}
|
|
503
|
+
)
|
|
438
504
|
|
|
439
505
|
# Step 3: Round timestamps and aggregate probabilities
|
|
440
506
|
expanded_df[DataColumns.TIME] = expanded_df[DataColumns.TIME].round(2)
|
|
@@ -442,14 +508,15 @@ def merge_predictions_with_timestamps(
|
|
|
442
508
|
|
|
443
509
|
# Step 4: Round timestamps in `df_ts` and merge
|
|
444
510
|
df_ts[DataColumns.TIME] = df_ts[DataColumns.TIME].round(2)
|
|
445
|
-
df_ts = pd.merge(df_ts, mean_proba, how=
|
|
511
|
+
df_ts = pd.merge(df_ts, mean_proba, how="left", on=DataColumns.TIME)
|
|
446
512
|
df_ts = df_ts.dropna(subset=[pred_proba_colname])
|
|
447
513
|
|
|
448
514
|
return df_ts
|
|
449
515
|
|
|
450
516
|
|
|
451
|
-
def select_hours(
|
|
452
|
-
|
|
517
|
+
def select_hours(
|
|
518
|
+
df: pd.DataFrame, select_hours_start: str, select_hours_end: str
|
|
519
|
+
) -> pd.DataFrame:
|
|
453
520
|
"""
|
|
454
521
|
Select hours of interest from the data to include in the aggregation step.
|
|
455
522
|
|
|
@@ -460,7 +527,7 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
|
|
|
460
527
|
|
|
461
528
|
select_hours_start: str
|
|
462
529
|
The start time of the selected hours in "HH:MM" format.
|
|
463
|
-
|
|
530
|
+
|
|
464
531
|
select_hours_end: str
|
|
465
532
|
The end time of the selected hours in "HH:MM" format.
|
|
466
533
|
|
|
@@ -471,14 +538,18 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
|
|
|
471
538
|
|
|
472
539
|
"""
|
|
473
540
|
|
|
474
|
-
select_hours_start = datetime.strptime(
|
|
475
|
-
|
|
476
|
-
|
|
541
|
+
select_hours_start = datetime.strptime(
|
|
542
|
+
select_hours_start, "%H:%M"
|
|
543
|
+
).time() # convert to time object
|
|
544
|
+
select_hours_end = datetime.strptime(select_hours_end, "%H:%M").time()
|
|
545
|
+
df_subset = df[
|
|
546
|
+
df["time_dt"].dt.time.between(select_hours_start, select_hours_end)
|
|
547
|
+
] # select the hours of interest
|
|
477
548
|
|
|
478
549
|
return df_subset
|
|
479
550
|
|
|
480
|
-
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
481
551
|
|
|
552
|
+
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
482
553
|
"""
|
|
483
554
|
Select days of interest from the data to include in the aggregation step.
|
|
484
555
|
|
|
@@ -499,8 +570,12 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
|
499
570
|
"""
|
|
500
571
|
|
|
501
572
|
min_s_per_day = min_hours_per_day * 3600
|
|
502
|
-
window_length_s =
|
|
573
|
+
window_length_s = (
|
|
574
|
+
df["time_dt"].diff().dt.total_seconds().iloc[1]
|
|
575
|
+
) # determine the length of the first window in seconds
|
|
503
576
|
min_windows_per_day = min_s_per_day / window_length_s
|
|
504
|
-
df_subset = df.groupby(df[
|
|
577
|
+
df_subset = df.groupby(df["time_dt"].dt.date).filter(
|
|
578
|
+
lambda x: len(x) >= min_windows_per_day
|
|
579
|
+
)
|
|
505
580
|
|
|
506
|
-
return df_subset
|
|
581
|
+
return df_subset
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: paradigma
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
|
|
5
5
|
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Erik Post
|
|
7
8
|
Author-email: erik.post@radboudumc.nl
|
|
8
9
|
Requires-Python: >=3.11,<4.0
|
|
@@ -11,6 +12,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
16
|
Requires-Dist: nbconvert (>=7.16.6,<8.0.0)
|
|
15
17
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
16
18
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
@@ -29,25 +31,25 @@ Description-Content-Type: text/markdown
|
|
|
29
31
|
| **DOI** | [](https://doi.org/10.5281/zenodo.13838392) |
|
|
30
32
|
| **Build Status** | [](https://www.python.org/downloads/) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
|
|
31
33
|
| **License** | [](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
|
|
32
|
-
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
34
|
+
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
33
35
|
|
|
34
36
|
## Overview
|
|
35
37
|
The Parkinson's disease Digital Markers (ParaDigMa) toolbox is a Python
|
|
36
38
|
software package designed for processing real-life wrist sensor data
|
|
37
|
-
to extract digital measures of motor and non-motor signs of Parkinson's disease (PD).
|
|
38
|
-
|
|
39
|
-
Specifically, the toolbox is designed to process accelerometer, gyroscope and
|
|
40
|
-
photoplethysmography (PPG) signals, collected during passive monitoring in daily life.
|
|
41
|
-
It contains three data processing pipelines: (1) arm swing during gait, (2) tremor,
|
|
42
|
-
and (3) pulse rate. These pipelines are scientifically validated for their
|
|
43
|
-
use in persons with PD. Furthermore, the toolbox contains general functionalities for
|
|
44
|
-
signal processing and feature extraction, such as filtering, peak detection, and
|
|
39
|
+
to extract digital measures of motor and non-motor signs of Parkinson's disease (PD).
|
|
40
|
+
|
|
41
|
+
Specifically, the toolbox is designed to process accelerometer, gyroscope and
|
|
42
|
+
photoplethysmography (PPG) signals, collected during passive monitoring in daily life.
|
|
43
|
+
It contains three data processing pipelines: (1) arm swing during gait, (2) tremor,
|
|
44
|
+
and (3) pulse rate. These pipelines are scientifically validated for their
|
|
45
|
+
use in persons with PD. Furthermore, the toolbox contains general functionalities for
|
|
46
|
+
signal processing and feature extraction, such as filtering, peak detection, and
|
|
45
47
|
spectral analysis.
|
|
46
48
|
|
|
47
|
-
The toolbox is accompanied by a set of example scripts and notebooks for
|
|
48
|
-
each processing pipeline that demonstrate how to use the toolbox for extracting
|
|
49
|
+
The toolbox is accompanied by a set of example scripts and notebooks for
|
|
50
|
+
each processing pipeline that demonstrate how to use the toolbox for extracting
|
|
49
51
|
digital measures. In addition, the toolbox is designed to be modular, enabling
|
|
50
|
-
researchers to easily extend the toolbox with new algorithms and functionalities.
|
|
52
|
+
researchers to easily extend the toolbox with new algorithms and functionalities.
|
|
51
53
|
|
|
52
54
|
## Features
|
|
53
55
|
The components of ParaDigMa are shown in the diagram below.
|
|
@@ -60,20 +62,20 @@ ParaDigMa can best be understood by categorizing the sequential processes:
|
|
|
60
62
|
|
|
61
63
|
| Process | Description |
|
|
62
64
|
| ---- | ---- |
|
|
63
|
-
| Preprocessing | Preparing raw sensor signals for further processing |
|
|
65
|
+
| Preprocessing | Preparing raw sensor signals for further processing |
|
|
64
66
|
| Feature extraction | Extracting features based on windowed sensor signals |
|
|
65
|
-
| Classification | Detecting segments of interest using validated classifiers (e.g., gait segments) |
|
|
67
|
+
| Classification | Detecting segments of interest using validated classifiers (e.g., gait segments) |
|
|
66
68
|
| Quantification | Extracting specific measures from the detected segments (e.g., arm swing measures) |
|
|
67
69
|
| Aggregation | Aggregating the measures over a specific time period (e.g., week-level aggregates) |
|
|
68
70
|
|
|
69
71
|
<br/>
|
|
70
|
-
ParaDigMa contains the following validated processing pipelines (each using the processes described above):
|
|
72
|
+
ParaDigMa contains the following validated processing pipelines (each using the processes described above):
|
|
71
73
|
|
|
72
|
-
| Pipeline | Input | Output classification | Output quantification | Output week-level aggregation |
|
|
74
|
+
| Pipeline | Input | Output classification | Output quantification | Output week-level aggregation |
|
|
73
75
|
| ---- | ---- | ---- | ---- | ---- |
|
|
74
|
-
| **Arm swing during gait** | Wrist accelerometer and gyroscope data | Gait probability, gait without other arm activities probability | Arm swing range of motion (RoM) | Typical & maximum arm swing RoM |
|
|
75
|
-
| **Tremor** | Wrist gyroscope data | Tremor probability | Tremor power | % tremor time, typical & maximum tremor power |
|
|
76
|
-
| **Pulse rate** | Wrist PPG and accelerometer data | PPG signal quality | Pulse rate | Resting & maximum pulse rate |
|
|
76
|
+
| **Arm swing during gait** | Wrist accelerometer and gyroscope data | Gait probability, gait without other arm activities probability | Arm swing range of motion (RoM) | Typical & maximum arm swing RoM |
|
|
77
|
+
| **Tremor** | Wrist gyroscope data | Tremor probability | Tremor power | % tremor time, typical & maximum tremor power |
|
|
78
|
+
| **Pulse rate** | Wrist PPG and accelerometer data | PPG signal quality | Pulse rate | Resting & maximum pulse rate |
|
|
77
79
|
|
|
78
80
|
## Installation
|
|
79
81
|
|
|
@@ -91,9 +93,9 @@ The API reference contains detailed documentation of all toolbox modules and fun
|
|
|
91
93
|
The user guides provide additional information about specific topics (e.g. the required orientation of the wrist sensor).
|
|
92
94
|
|
|
93
95
|
### Sensor data requirements
|
|
94
|
-
The ParaDigMa toolbox is designed for the analysis of passive monitoring data collected using a wrist sensor in persons with PD.
|
|
96
|
+
The ParaDigMa toolbox is designed for the analysis of passive monitoring data collected using a wrist sensor in persons with PD.
|
|
95
97
|
|
|
96
|
-
Specific requirements include:
|
|
98
|
+
Specific requirements include:
|
|
97
99
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
98
100
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
99
101
|
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
@@ -101,11 +103,11 @@ Specific requirements include:
|
|
|
101
103
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
102
104
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
103
105
|
|
|
104
|
-
\* The processing of PPG signals is currently based on the blood volume pulse (arbitrary units) obtained from the Verily Study Watch
|
|
106
|
+
\* The processing of PPG signals is currently based on the blood volume pulse (arbitrary units) obtained from the Verily Study Watch. [This](https://biomarkersparkinson.github.io/paradigma/tutorials/_static/pulse_rate_analysis.html#step-3-signal-quality-classification) part of the PPG tutorial provides code and documentation on how to use the pipeline with other PPG devices.
|
|
105
107
|
|
|
106
108
|
> [!WARNING]
|
|
107
|
-
> While the toolbox is designed to work on any wrist sensor device which fulfills the requirements,
|
|
108
|
-
we have currently verified its performance on data from the Gait-up Physilog 4 (arm swing during gait & tremor) and the Verily Study Watch (all pipelines). Furthermore, the specifications above are the minimally validated requirements. For example, while ParaDigMa works with accelerometer and gyroscope data sampled at 50 Hz, its effect on subsequent processes has not been empirically validated.
|
|
109
|
+
> While the toolbox is designed to work on any wrist sensor device which fulfills the requirements,
|
|
110
|
+
we have currently verified its performance on data from the Gait-up Physilog 4 (arm swing during gait & tremor) and the Verily Study Watch (all pipelines). Furthermore, the specifications above are the minimally validated requirements. For example, while ParaDigMa works with accelerometer and gyroscope data sampled at 50 Hz, its effect on subsequent processes has not been empirically validated.
|
|
109
111
|
<br/>
|
|
110
112
|
|
|
111
113
|
We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/) as format for loading and storing sensor data. TSDF enables efficient data storage with added metadata. However, ParaDigMa does not require a particular method of data storage and retrieval. Please see our tutorial [Data preparation](https://biomarkersparkinson.github.io/paradigma/tutorials/data_preparation.html) for examples of loading TSDF and other data formats into memory, and for preparing raw sensor data as input for the processing pipelines.
|
|
@@ -119,7 +121,7 @@ The pipelines were developed and validated using data from the Parkinson@Home Va
|
|
|
119
121
|
|
|
120
122
|
## Contributing
|
|
121
123
|
|
|
122
|
-
We welcome contributions! Please check out our [contributing guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html).
|
|
124
|
+
We welcome contributions! Please check out our [contributing guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html).
|
|
123
125
|
Please note that this project is released with a [Code of Conduct](https://biomarkersparkinson.github.io/paradigma/conduct.html). By contributing to this project, you agree to abide by its terms.
|
|
124
126
|
|
|
125
127
|
## License
|
|
@@ -128,8 +130,8 @@ It is licensed under the terms of the Apache License 2.0 license. See [License](
|
|
|
128
130
|
|
|
129
131
|
## Acknowledgements
|
|
130
132
|
|
|
131
|
-
The core team of ParaDigMa consists of Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Peter Kok, Vedran Kasalica and Luc Evers.
|
|
132
|
-
Advisors to the project are Max Little, Jordan Raykov, Twan van Laarhoven, Hayriye Cagnan, and Bas Bloem.
|
|
133
|
+
The core team of ParaDigMa consists of Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Peter Kok, Vedran Kasalica and Luc Evers.
|
|
134
|
+
Advisors to the project are Max Little, Jordan Raykov, Twan van Laarhoven, Hayriye Cagnan, and Bas Bloem.
|
|
133
135
|
The initial release of ParaDigMa was funded by the Michael J Fox Foundation (grant #020425) and the Dutch Research Council (grant #ASDI.2020.060 & grant #2023.010).
|
|
134
136
|
ParaDigMa was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
|
|
135
137
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
paradigma/__init__.py,sha256=vCLqo7vOEgcnYs10gUVYvEFfi8y-jBi7w1YKRoqn95k,127
|
|
2
|
+
paradigma/assets/gait_detection_clf_package.pkl,sha256=8jCbuM_4dkilSjOEk9ss7bJbSppgzXe72y0X4BCnzCU,11497247
|
|
3
|
+
paradigma/assets/gait_filtering_clf_package.pkl,sha256=lAaLyhmXdV4X_drmYt0EM6wGwSo80yhpxtncWGq4RfQ,3915
|
|
4
|
+
paradigma/assets/ppg_quality_clf_package.pkl,sha256=vUcM4v8gZwWAmDVK7E4UcHhVnhlEg27RSB71oPGloSc,1292
|
|
5
|
+
paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0AhNZThA6Qe-cs0QPcWw4,1475
|
|
6
|
+
paradigma/classification.py,sha256=yDTetqTZT0c7G0QtNX_i7SNjevEJeaqr334HyZUE6zw,3302
|
|
7
|
+
paradigma/config.py,sha256=B2a3oCusaxH2vAjNV4ae7IWstVwhMpD5H1uN_7Oz4U4,13924
|
|
8
|
+
paradigma/constants.py,sha256=fbJuZW5VB_hhz9NQYwjOxINOJPAcCgRPYJNL6tMqpTA,3493
|
|
9
|
+
paradigma/feature_extraction.py,sha256=meL8fKmaJd1t7apEzFjt757w7M9EF4lX9w-xK2oRgT8,35686
|
|
10
|
+
paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
paradigma/pipelines/gait_pipeline.py,sha256=WkAL6iazYQlze21RLMsEPP5q0XAYlOkkDK74c-A8HnE,29190
|
|
12
|
+
paradigma/pipelines/pulse_rate_pipeline.py,sha256=vktK9C1K-0-MifwJ3t_-gnEmEBFExovGVmgXDjBs2j4,17990
|
|
13
|
+
paradigma/pipelines/pulse_rate_utils.py,sha256=-ixh9aTz_bwqCxpBPJW_L37I5yYuI9u5fJ8TBtDoL8Q,26480
|
|
14
|
+
paradigma/pipelines/tremor_pipeline.py,sha256=Le5DUBMhg7DQaRmS49h3SavoUwKn63bJvlmj2-se7sw,14537
|
|
15
|
+
paradigma/preprocessing.py,sha256=324xRLe_fCwbduSwieFNJOn33AStWpC1eMO1QW6etJQ,17119
|
|
16
|
+
paradigma/segmenting.py,sha256=ccc6gwfXouDi6WGhzuaITKn1FevtNwalmLUqBPxDf8g,14647
|
|
17
|
+
paradigma/testing.py,sha256=DWoq6dUzyg4wnmpv8tyV_2-bN16D5krReeZvurRv5gU,19481
|
|
18
|
+
paradigma/util.py,sha256=L2_fJcGQBpZAqD9ay-sxJBe9ypq0FYsUNIp1-U_x2Jw,18221
|
|
19
|
+
paradigma-1.0.4.dist-info/METADATA,sha256=su0TkJ23wB-qJiqrkn8k-9dLBcoqQ2jPS6oscC3jUzI,12176
|
|
20
|
+
paradigma-1.0.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
21
|
+
paradigma-1.0.4.dist-info/entry_points.txt,sha256=Jiuvl3rCJFdHOVxhC4Uum_jc3qXIj_h5mTo-rALN94E,89
|
|
22
|
+
paradigma-1.0.4.dist-info/licenses/LICENSE,sha256=FErdVJ9zP4I24ElO6xFU5_e8KckvFkpcZdm69ZkaUWI,9806
|
|
23
|
+
paradigma-1.0.4.dist-info/RECORD,,
|
paradigma-1.0.3.dist-info/RECORD
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
paradigma/__init__.py,sha256=vCLqo7vOEgcnYs10gUVYvEFfi8y-jBi7w1YKRoqn95k,127
|
|
2
|
-
paradigma/assets/gait_detection_clf_package.pkl,sha256=8jCbuM_4dkilSjOEk9ss7bJbSppgzXe72y0X4BCnzCU,11497247
|
|
3
|
-
paradigma/assets/gait_filtering_clf_package.pkl,sha256=lAaLyhmXdV4X_drmYt0EM6wGwSo80yhpxtncWGq4RfQ,3915
|
|
4
|
-
paradigma/assets/ppg_quality_clf_package.pkl,sha256=vUcM4v8gZwWAmDVK7E4UcHhVnhlEg27RSB71oPGloSc,1292
|
|
5
|
-
paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0AhNZThA6Qe-cs0QPcWw4,1475
|
|
6
|
-
paradigma/classification.py,sha256=sBJSePvwHZNPUQuLdx-pncfnDzMq-1naomsCxSJneWY,2921
|
|
7
|
-
paradigma/config.py,sha256=rrGKabsd1ffLYH9NKdvZmGJmsv-V8rpyqSaC5jkXfo0,11335
|
|
8
|
-
paradigma/constants.py,sha256=gR--OzxaZqS5nJnYlWLqnJ9xN05_GMNtd6ec3upsfms,3543
|
|
9
|
-
paradigma/feature_extraction.py,sha256=zgu_fW1zpPvHxpgsPVpJILUiyWH44b9n1bGG7lV2HwE,35323
|
|
10
|
-
paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
paradigma/pipelines/gait_pipeline.py,sha256=u4Jocmt94V08Yw4ZxsNPeh-E-SN6a1gMN-OHiTqkb50,28452
|
|
12
|
-
paradigma/pipelines/pulse_rate_pipeline.py,sha256=aBDopwWvfabLCQM6De9PHNKKzL03xD_29jWcsElnjCw,17711
|
|
13
|
-
paradigma/pipelines/pulse_rate_utils.py,sha256=rlXze04meLFlyPaxMBYhvz3_vu3SM77RF-7mLPegTm0,26772
|
|
14
|
-
paradigma/pipelines/tremor_pipeline.py,sha256=IgDAnYbOLYaadBeS-ylT_O24qNuXbWVhopuU5rIrCNQ,14302
|
|
15
|
-
paradigma/preprocessing.py,sha256=U9ZGnmUg7M77i00YJ9_gV51p3giYqhWKDAeXZk-veQo,14570
|
|
16
|
-
paradigma/segmenting.py,sha256=hgT4dtg23eyvjUraEXCzX8u0kSRx4vArjQgF10r61P8,13909
|
|
17
|
-
paradigma/testing.py,sha256=zWPBj7Q1Td6rgeMGoAWi6rIVLB8M6_FNUxlZSbpWqEM,18547
|
|
18
|
-
paradigma/util.py,sha256=EvZvwRxdQXXAx_U5QsMVKkjTFp__M_pNF61bhefGza4,17289
|
|
19
|
-
paradigma-1.0.3.dist-info/LICENSE,sha256=Lda8kIVC2kbmlSeYaUWwUwV75Q-q31idYvo18HUTfiw,9807
|
|
20
|
-
paradigma-1.0.3.dist-info/METADATA,sha256=XM1odiNJLhbvpz6rIjQYrM0ET2fGPhI6RpOId6G21CI,11972
|
|
21
|
-
paradigma-1.0.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
22
|
-
paradigma-1.0.3.dist-info/RECORD,,
|