paradigma 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/classification.py +28 -11
- paradigma/config.py +158 -101
- paradigma/constants.py +39 -34
- paradigma/feature_extraction.py +270 -211
- paradigma/pipelines/gait_pipeline.py +286 -190
- paradigma/pipelines/pulse_rate_pipeline.py +202 -133
- paradigma/pipelines/pulse_rate_utils.py +144 -142
- paradigma/pipelines/tremor_pipeline.py +139 -95
- paradigma/preprocessing.py +179 -110
- paradigma/segmenting.py +138 -113
- paradigma/testing.py +359 -172
- paradigma/util.py +171 -80
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/METADATA +39 -36
- paradigma-1.0.4.dist-info/RECORD +23 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.2.dist-info → paradigma-1.0.4.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.2.dist-info/RECORD +0 -22
paradigma/util.py
CHANGED
|
@@ -1,16 +1,44 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import os
|
|
2
|
-
import
|
|
3
|
-
import pandas as pd
|
|
3
|
+
import warnings
|
|
4
4
|
from datetime import datetime, timedelta
|
|
5
|
-
from
|
|
6
|
-
from typing import List, Tuple
|
|
5
|
+
from typing import List, Optional, Tuple
|
|
7
6
|
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
8
9
|
import tsdf
|
|
10
|
+
from dateutil import parser
|
|
11
|
+
from scipy.stats import gaussian_kde
|
|
9
12
|
from tsdf import TSDFMetadata
|
|
10
13
|
|
|
11
14
|
from paradigma.constants import DataColumns, TimeUnit
|
|
12
15
|
|
|
13
16
|
|
|
17
|
+
def deprecated(reason: str = ""):
|
|
18
|
+
"""
|
|
19
|
+
Decorator to mark functions as deprecated. It will show a warning when the function is used.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
reason : str, optional
|
|
24
|
+
Additional message to explain why it is deprecated and what to use instead.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def decorator(func):
|
|
28
|
+
message = f"Function {func.__name__} is deprecated."
|
|
29
|
+
if reason:
|
|
30
|
+
message += f" {reason}"
|
|
31
|
+
|
|
32
|
+
@functools.wraps(func)
|
|
33
|
+
def wrapper(*args, **kwargs):
|
|
34
|
+
warnings.warn(message, category=DeprecationWarning, stacklevel=2)
|
|
35
|
+
return func(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
return wrapper
|
|
38
|
+
|
|
39
|
+
return decorator
|
|
40
|
+
|
|
41
|
+
|
|
14
42
|
def parse_iso8601_to_datetime(date_str):
|
|
15
43
|
return parser.parse(date_str)
|
|
16
44
|
|
|
@@ -27,7 +55,7 @@ def get_end_iso8601(start_iso8601, window_length_seconds):
|
|
|
27
55
|
|
|
28
56
|
def write_np_data(
|
|
29
57
|
metadata_time: TSDFMetadata,
|
|
30
|
-
np_array_time: np.ndarray,
|
|
58
|
+
np_array_time: np.ndarray,
|
|
31
59
|
metadata_values: TSDFMetadata,
|
|
32
60
|
np_array_values: np.ndarray,
|
|
33
61
|
output_path: str,
|
|
@@ -52,7 +80,7 @@ def write_np_data(
|
|
|
52
80
|
The filename for the metadata.
|
|
53
81
|
|
|
54
82
|
"""
|
|
55
|
-
|
|
83
|
+
|
|
56
84
|
if not os.path.exists(output_path):
|
|
57
85
|
os.makedirs(output_path)
|
|
58
86
|
|
|
@@ -61,9 +89,19 @@ def write_np_data(
|
|
|
61
89
|
metadata_values.file_dir_path = output_path
|
|
62
90
|
|
|
63
91
|
# store binaries and metadata
|
|
64
|
-
time_tsdf = tsdf.write_binary_file(
|
|
92
|
+
time_tsdf = tsdf.write_binary_file(
|
|
93
|
+
file_dir=output_path,
|
|
94
|
+
file_name=metadata_time.file_name,
|
|
95
|
+
data=np_array_time,
|
|
96
|
+
metadata=metadata_time.get_plain_tsdf_dict_copy(),
|
|
97
|
+
)
|
|
65
98
|
|
|
66
|
-
samples_tsdf = tsdf.write_binary_file(
|
|
99
|
+
samples_tsdf = tsdf.write_binary_file(
|
|
100
|
+
file_dir=output_path,
|
|
101
|
+
file_name=metadata_values.file_name,
|
|
102
|
+
data=np_array_values,
|
|
103
|
+
metadata=metadata_values.get_plain_tsdf_dict_copy(),
|
|
104
|
+
)
|
|
67
105
|
|
|
68
106
|
tsdf.write_metadata([time_tsdf, samples_tsdf], output_filename)
|
|
69
107
|
|
|
@@ -126,13 +164,23 @@ def read_metadata(
|
|
|
126
164
|
return metadata_time, metadata_values
|
|
127
165
|
|
|
128
166
|
|
|
129
|
-
def load_tsdf_dataframe(
|
|
167
|
+
def load_tsdf_dataframe(
|
|
168
|
+
path_to_data,
|
|
169
|
+
prefix,
|
|
170
|
+
meta_suffix="meta.json",
|
|
171
|
+
time_suffix="time.bin",
|
|
172
|
+
values_suffix="values.bin",
|
|
173
|
+
):
|
|
130
174
|
meta_filename = f"{prefix}_{meta_suffix}"
|
|
131
175
|
time_filename = f"{prefix}_{time_suffix}"
|
|
132
176
|
values_filename = f"{prefix}_{values_suffix}"
|
|
133
177
|
|
|
134
|
-
metadata_time, metadata_values = read_metadata(
|
|
135
|
-
|
|
178
|
+
metadata_time, metadata_values = read_metadata(
|
|
179
|
+
path_to_data, meta_filename, time_filename, values_filename
|
|
180
|
+
)
|
|
181
|
+
df = tsdf.load_dataframe_from_binaries(
|
|
182
|
+
[metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns
|
|
183
|
+
)
|
|
136
184
|
|
|
137
185
|
return df, metadata_time, metadata_values
|
|
138
186
|
|
|
@@ -151,11 +199,9 @@ def load_metadata_list(
|
|
|
151
199
|
The filename of the metadata file.
|
|
152
200
|
filenames : List[str]
|
|
153
201
|
The list of binary files of which the metadata files need to be loaded
|
|
154
|
-
|
|
155
|
-
"""
|
|
156
|
-
metadata_dict = tsdf.load_metadata_from_path(
|
|
157
|
-
os.path.join(dir_path, meta_filename)
|
|
158
|
-
)
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
metadata_dict = tsdf.load_metadata_from_path(os.path.join(dir_path, meta_filename))
|
|
159
205
|
metadata_list = []
|
|
160
206
|
for filename in filenames:
|
|
161
207
|
metadata_list.append(metadata_dict[filename])
|
|
@@ -194,37 +240,54 @@ def transform_time_array(
|
|
|
194
240
|
- The transformation allows for scaling of the time array, converting between time unit types (e.g., relative, absolute, or difference).
|
|
195
241
|
- When converting to `TimeUnit.RELATIVE_MS`, the function calculates the relative time starting from the provided or default start time.
|
|
196
242
|
"""
|
|
197
|
-
input_units = input_unit_type.split(
|
|
198
|
-
output_units = output_unit_type.split(
|
|
243
|
+
input_units = input_unit_type.split("_")[-1].lower()
|
|
244
|
+
output_units = output_unit_type.split("_")[-1].lower()
|
|
199
245
|
|
|
200
246
|
if input_units == output_units:
|
|
201
247
|
scale_factor = 1
|
|
202
|
-
elif input_units ==
|
|
248
|
+
elif input_units == "s" and output_units == "ms":
|
|
203
249
|
scale_factor = 1e3
|
|
204
|
-
elif input_units ==
|
|
250
|
+
elif input_units == "ms" and output_units == "s":
|
|
205
251
|
scale_factor = 1 / 1e3
|
|
206
252
|
else:
|
|
207
|
-
raise ValueError(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
#
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Unsupported time units conversion: {input_units} to {output_units}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Transform to relative time (`TimeUnit.RELATIVE_MS`)
|
|
258
|
+
if (
|
|
259
|
+
input_unit_type == TimeUnit.DIFFERENCE_MS
|
|
260
|
+
or input_unit_type == TimeUnit.DIFFERENCE_S
|
|
261
|
+
):
|
|
262
|
+
# Convert a series of differences into cumulative sum to reconstruct original time series.
|
|
212
263
|
time_array = np.cumsum(np.double(time_array))
|
|
213
|
-
elif
|
|
264
|
+
elif (
|
|
265
|
+
input_unit_type == TimeUnit.ABSOLUTE_MS
|
|
266
|
+
or input_unit_type == TimeUnit.ABSOLUTE_S
|
|
267
|
+
):
|
|
214
268
|
# Set the start time if not provided.
|
|
215
269
|
if np.isclose(start_time, 0.0, rtol=1e-09, atol=1e-09):
|
|
216
270
|
start_time = time_array[0]
|
|
217
271
|
# Convert absolute time stamps into a time series relative to start_time.
|
|
218
|
-
time_array =
|
|
272
|
+
time_array = time_array - start_time
|
|
219
273
|
|
|
220
274
|
# Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
|
|
221
|
-
if
|
|
275
|
+
if (
|
|
276
|
+
output_unit_type == TimeUnit.ABSOLUTE_MS
|
|
277
|
+
or output_unit_type == TimeUnit.ABSOLUTE_S
|
|
278
|
+
):
|
|
222
279
|
# Converts time array to absolute time by adding the start time to each element.
|
|
223
280
|
time_array = time_array + start_time
|
|
224
|
-
elif
|
|
281
|
+
elif (
|
|
282
|
+
output_unit_type == TimeUnit.DIFFERENCE_MS
|
|
283
|
+
or output_unit_type == TimeUnit.DIFFERENCE_S
|
|
284
|
+
):
|
|
225
285
|
# Creates a new array starting with 0, followed by the differences between consecutive elements.
|
|
226
286
|
time_array = np.diff(np.insert(time_array, 0, start_time))
|
|
227
|
-
elif
|
|
287
|
+
elif (
|
|
288
|
+
output_unit_type == TimeUnit.RELATIVE_MS
|
|
289
|
+
or output_unit_type == TimeUnit.RELATIVE_S
|
|
290
|
+
):
|
|
228
291
|
# The array is already in relative format, do nothing.
|
|
229
292
|
pass
|
|
230
293
|
|
|
@@ -255,25 +318,25 @@ def convert_units_accelerometer(data: np.ndarray, units: str) -> np.ndarray:
|
|
|
255
318
|
return data
|
|
256
319
|
else:
|
|
257
320
|
raise ValueError(f"Unsupported unit: {units}")
|
|
258
|
-
|
|
321
|
+
|
|
259
322
|
|
|
260
323
|
def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
|
|
261
324
|
"""
|
|
262
325
|
Convert gyroscope data to deg/s.
|
|
263
|
-
|
|
326
|
+
|
|
264
327
|
Parameters
|
|
265
328
|
----------
|
|
266
329
|
data : np.ndarray
|
|
267
330
|
The gyroscope data.
|
|
268
|
-
|
|
331
|
+
|
|
269
332
|
units : str
|
|
270
333
|
The unit of the data (currently supports deg/s and rad/s).
|
|
271
|
-
|
|
334
|
+
|
|
272
335
|
Returns
|
|
273
336
|
-------
|
|
274
337
|
np.ndarray
|
|
275
338
|
The gyroscope data in deg/s.
|
|
276
|
-
|
|
339
|
+
|
|
277
340
|
"""
|
|
278
341
|
if units == "deg/s":
|
|
279
342
|
return data
|
|
@@ -281,9 +344,9 @@ def convert_units_gyroscope(data: np.ndarray, units: str) -> np.ndarray:
|
|
|
281
344
|
return np.degrees(data)
|
|
282
345
|
else:
|
|
283
346
|
raise ValueError(f"Unsupported unit: {units}")
|
|
284
|
-
|
|
285
347
|
|
|
286
|
-
|
|
348
|
+
|
|
349
|
+
def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
|
|
287
350
|
"""
|
|
288
351
|
Invert the data based on the watch side.
|
|
289
352
|
|
|
@@ -304,60 +367,83 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor='both') -> np.ndarray:
|
|
|
304
367
|
"""
|
|
305
368
|
if side not in ["left", "right"]:
|
|
306
369
|
raise ValueError(f"Unsupported side: {side}")
|
|
307
|
-
if sensor not in [
|
|
370
|
+
if sensor not in ["accelerometer", "gyroscope", "both"]:
|
|
308
371
|
raise ValueError(f"Unsupported sensor: {sensor}")
|
|
309
372
|
|
|
310
373
|
elif side == "right":
|
|
311
|
-
if sensor in [
|
|
374
|
+
if sensor in ["gyroscope", "both"]:
|
|
312
375
|
df[DataColumns.GYROSCOPE_Y] *= -1
|
|
313
376
|
df[DataColumns.GYROSCOPE_Z] *= -1
|
|
314
|
-
if sensor in [
|
|
377
|
+
if sensor in ["accelerometer", "both"]:
|
|
315
378
|
df[DataColumns.ACCELEROMETER_X] *= -1
|
|
316
379
|
|
|
317
380
|
return df
|
|
318
381
|
|
|
319
|
-
|
|
382
|
+
|
|
383
|
+
def aggregate_parameter(
|
|
384
|
+
parameter: np.ndarray,
|
|
385
|
+
aggregate: str,
|
|
386
|
+
evaluation_points: Optional[np.ndarray] = None,
|
|
387
|
+
) -> np.ndarray | int:
|
|
320
388
|
"""
|
|
321
389
|
Aggregate a parameter based on the specified method.
|
|
322
|
-
|
|
390
|
+
|
|
323
391
|
Parameters
|
|
324
392
|
----------
|
|
325
393
|
parameter : np.ndarray
|
|
326
394
|
The parameter to aggregate.
|
|
327
|
-
|
|
395
|
+
|
|
328
396
|
aggregate : str
|
|
329
397
|
The aggregation method to apply.
|
|
330
|
-
|
|
398
|
+
|
|
399
|
+
evaluation_points : np.ndarray, optional
|
|
400
|
+
Should be specified if the mode is derived for a continuous parameter.
|
|
401
|
+
Defines the evaluation points for the kernel density estimation function, from which the maximum is derived as the mode.
|
|
402
|
+
|
|
331
403
|
Returns
|
|
332
404
|
-------
|
|
333
405
|
np.ndarray
|
|
334
406
|
The aggregated parameter.
|
|
335
407
|
"""
|
|
336
|
-
if aggregate ==
|
|
408
|
+
if aggregate == "mean":
|
|
337
409
|
return np.mean(parameter)
|
|
338
|
-
elif aggregate ==
|
|
410
|
+
elif aggregate == "median":
|
|
339
411
|
return np.median(parameter)
|
|
340
|
-
elif aggregate ==
|
|
412
|
+
elif aggregate == "mode_binned":
|
|
413
|
+
if evaluation_points is None:
|
|
414
|
+
raise ValueError(
|
|
415
|
+
"evaluation_points must be provided for 'mode_binned' aggregation."
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
kde = gaussian_kde(parameter)
|
|
419
|
+
kde_values = kde(evaluation_points)
|
|
420
|
+
max_index = np.argmax(kde_values)
|
|
421
|
+
return evaluation_points[max_index]
|
|
422
|
+
elif aggregate == "mode":
|
|
341
423
|
unique_values, counts = np.unique(parameter, return_counts=True)
|
|
342
424
|
return unique_values[np.argmax(counts)]
|
|
343
|
-
elif aggregate ==
|
|
425
|
+
elif aggregate == "90p":
|
|
344
426
|
return np.percentile(parameter, 90)
|
|
345
|
-
elif aggregate ==
|
|
427
|
+
elif aggregate == "95p":
|
|
346
428
|
return np.percentile(parameter, 95)
|
|
347
|
-
elif aggregate ==
|
|
429
|
+
elif aggregate == "99p":
|
|
348
430
|
return np.percentile(parameter, 99)
|
|
349
|
-
elif aggregate ==
|
|
431
|
+
elif aggregate == "std":
|
|
350
432
|
return np.std(parameter)
|
|
433
|
+
elif aggregate == "cov":
|
|
434
|
+
mean_value = np.mean(parameter)
|
|
435
|
+
return np.std(parameter) / mean_value if mean_value != 0 else 0
|
|
351
436
|
else:
|
|
352
437
|
raise ValueError(f"Invalid aggregation method: {aggregate}")
|
|
353
438
|
|
|
439
|
+
|
|
354
440
|
def merge_predictions_with_timestamps(
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
441
|
+
df_ts: pd.DataFrame,
|
|
442
|
+
df_predictions: pd.DataFrame,
|
|
443
|
+
pred_proba_colname: str,
|
|
444
|
+
window_length_s: float,
|
|
445
|
+
fs: int,
|
|
446
|
+
) -> pd.DataFrame:
|
|
361
447
|
"""
|
|
362
448
|
Merges prediction probabilities with timestamps by expanding overlapping windows
|
|
363
449
|
into individual timestamps and averaging probabilities per unique timestamp.
|
|
@@ -382,7 +468,7 @@ def merge_predictions_with_timestamps(
|
|
|
382
468
|
|
|
383
469
|
fs : int
|
|
384
470
|
The sampling frequency of the data.
|
|
385
|
-
|
|
471
|
+
|
|
386
472
|
Returns:
|
|
387
473
|
-------
|
|
388
474
|
pd.DataFrame
|
|
@@ -403,22 +489,18 @@ def merge_predictions_with_timestamps(
|
|
|
403
489
|
# Step 1: Generate all timestamps for prediction windows using NumPy broadcasting
|
|
404
490
|
window_length = int(window_length_s * fs)
|
|
405
491
|
timestamps = (
|
|
406
|
-
df_predictions[DataColumns.TIME].values[:, None]
|
|
407
|
-
np.arange(0, window_length) / fs
|
|
492
|
+
df_predictions[DataColumns.TIME].values[:, None]
|
|
493
|
+
+ np.arange(0, window_length) / fs
|
|
408
494
|
)
|
|
409
|
-
|
|
495
|
+
|
|
410
496
|
# Flatten timestamps and probabilities into a single array for efficient processing
|
|
411
497
|
flat_timestamps = timestamps.ravel()
|
|
412
|
-
flat_proba = np.repeat(
|
|
413
|
-
df_predictions[pred_proba_colname].values,
|
|
414
|
-
window_length
|
|
415
|
-
)
|
|
498
|
+
flat_proba = np.repeat(df_predictions[pred_proba_colname].values, window_length)
|
|
416
499
|
|
|
417
500
|
# Step 2: Create a DataFrame for expanded data
|
|
418
|
-
expanded_df = pd.DataFrame(
|
|
419
|
-
DataColumns.TIME: flat_timestamps,
|
|
420
|
-
|
|
421
|
-
})
|
|
501
|
+
expanded_df = pd.DataFrame(
|
|
502
|
+
{DataColumns.TIME: flat_timestamps, pred_proba_colname: flat_proba}
|
|
503
|
+
)
|
|
422
504
|
|
|
423
505
|
# Step 3: Round timestamps and aggregate probabilities
|
|
424
506
|
expanded_df[DataColumns.TIME] = expanded_df[DataColumns.TIME].round(2)
|
|
@@ -426,14 +508,15 @@ def merge_predictions_with_timestamps(
|
|
|
426
508
|
|
|
427
509
|
# Step 4: Round timestamps in `df_ts` and merge
|
|
428
510
|
df_ts[DataColumns.TIME] = df_ts[DataColumns.TIME].round(2)
|
|
429
|
-
df_ts = pd.merge(df_ts, mean_proba, how=
|
|
511
|
+
df_ts = pd.merge(df_ts, mean_proba, how="left", on=DataColumns.TIME)
|
|
430
512
|
df_ts = df_ts.dropna(subset=[pred_proba_colname])
|
|
431
513
|
|
|
432
514
|
return df_ts
|
|
433
515
|
|
|
434
516
|
|
|
435
|
-
def select_hours(
|
|
436
|
-
|
|
517
|
+
def select_hours(
|
|
518
|
+
df: pd.DataFrame, select_hours_start: str, select_hours_end: str
|
|
519
|
+
) -> pd.DataFrame:
|
|
437
520
|
"""
|
|
438
521
|
Select hours of interest from the data to include in the aggregation step.
|
|
439
522
|
|
|
@@ -444,7 +527,7 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
|
|
|
444
527
|
|
|
445
528
|
select_hours_start: str
|
|
446
529
|
The start time of the selected hours in "HH:MM" format.
|
|
447
|
-
|
|
530
|
+
|
|
448
531
|
select_hours_end: str
|
|
449
532
|
The end time of the selected hours in "HH:MM" format.
|
|
450
533
|
|
|
@@ -455,14 +538,18 @@ def select_hours(df: pd.DataFrame, select_hours_start: str, select_hours_end: st
|
|
|
455
538
|
|
|
456
539
|
"""
|
|
457
540
|
|
|
458
|
-
select_hours_start = datetime.strptime(
|
|
459
|
-
|
|
460
|
-
|
|
541
|
+
select_hours_start = datetime.strptime(
|
|
542
|
+
select_hours_start, "%H:%M"
|
|
543
|
+
).time() # convert to time object
|
|
544
|
+
select_hours_end = datetime.strptime(select_hours_end, "%H:%M").time()
|
|
545
|
+
df_subset = df[
|
|
546
|
+
df["time_dt"].dt.time.between(select_hours_start, select_hours_end)
|
|
547
|
+
] # select the hours of interest
|
|
461
548
|
|
|
462
549
|
return df_subset
|
|
463
550
|
|
|
464
|
-
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
465
551
|
|
|
552
|
+
def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
466
553
|
"""
|
|
467
554
|
Select days of interest from the data to include in the aggregation step.
|
|
468
555
|
|
|
@@ -483,8 +570,12 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
|
483
570
|
"""
|
|
484
571
|
|
|
485
572
|
min_s_per_day = min_hours_per_day * 3600
|
|
486
|
-
window_length_s =
|
|
573
|
+
window_length_s = (
|
|
574
|
+
df["time_dt"].diff().dt.total_seconds().iloc[1]
|
|
575
|
+
) # determine the length of the first window in seconds
|
|
487
576
|
min_windows_per_day = min_s_per_day / window_length_s
|
|
488
|
-
df_subset = df.groupby(df[
|
|
577
|
+
df_subset = df.groupby(df["time_dt"].dt.date).filter(
|
|
578
|
+
lambda x: len(x) >= min_windows_per_day
|
|
579
|
+
)
|
|
489
580
|
|
|
490
|
-
return df_subset
|
|
581
|
+
return df_subset
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: paradigma
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
|
|
5
5
|
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Erik Post
|
|
7
8
|
Author-email: erik.post@radboudumc.nl
|
|
8
9
|
Requires-Python: >=3.11,<4.0
|
|
@@ -11,6 +12,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: nbconvert (>=7.16.6,<8.0.0)
|
|
14
17
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
15
18
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
16
19
|
Requires-Dist: pytype (>=2024.4.11,<2025.0.0)
|
|
@@ -28,25 +31,25 @@ Description-Content-Type: text/markdown
|
|
|
28
31
|
| **DOI** | [](https://doi.org/10.5281/zenodo.13838392) |
|
|
29
32
|
| **Build Status** | [](https://www.python.org/downloads/) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
|
|
30
33
|
| **License** | [](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
|
|
31
|
-
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
34
|
+
<!-- | **Fairness** | [](https://fair-software.eu) [](https://www.bestpractices.dev/projects/8083) | -->
|
|
32
35
|
|
|
33
36
|
## Overview
|
|
34
37
|
The Parkinson's disease Digital Markers (ParaDigMa) toolbox is a Python
|
|
35
38
|
software package designed for processing real-life wrist sensor data
|
|
36
|
-
to extract digital measures of motor and non-motor signs of Parkinson's disease (PD).
|
|
37
|
-
|
|
38
|
-
Specifically, the toolbox is designed to process accelerometer, gyroscope and
|
|
39
|
-
photoplethysmography (PPG) signals, collected during passive monitoring in daily life.
|
|
40
|
-
It contains three data processing pipelines: (1) arm swing during gait, (2) tremor,
|
|
41
|
-
and (3) pulse rate. These pipelines are scientifically validated for their
|
|
42
|
-
use in persons with PD. Furthermore, the toolbox contains general functionalities for
|
|
43
|
-
signal processing and feature extraction, such as filtering, peak detection, and
|
|
44
|
-
spectral analysis.
|
|
45
|
-
|
|
46
|
-
The toolbox is accompanied by a set of example scripts and notebooks for
|
|
47
|
-
each processing pipeline that demonstrate how to use the toolbox for extracting
|
|
39
|
+
to extract digital measures of motor and non-motor signs of Parkinson's disease (PD).
|
|
40
|
+
|
|
41
|
+
Specifically, the toolbox is designed to process accelerometer, gyroscope and
|
|
42
|
+
photoplethysmography (PPG) signals, collected during passive monitoring in daily life.
|
|
43
|
+
It contains three data processing pipelines: (1) arm swing during gait, (2) tremor,
|
|
44
|
+
and (3) pulse rate. These pipelines are scientifically validated for their
|
|
45
|
+
use in persons with PD. Furthermore, the toolbox contains general functionalities for
|
|
46
|
+
signal processing and feature extraction, such as filtering, peak detection, and
|
|
47
|
+
spectral analysis.
|
|
48
|
+
|
|
49
|
+
The toolbox is accompanied by a set of example scripts and notebooks for
|
|
50
|
+
each processing pipeline that demonstrate how to use the toolbox for extracting
|
|
48
51
|
digital measures. In addition, the toolbox is designed to be modular, enabling
|
|
49
|
-
researchers to easily extend the toolbox with new algorithms and functionalities.
|
|
52
|
+
researchers to easily extend the toolbox with new algorithms and functionalities.
|
|
50
53
|
|
|
51
54
|
## Features
|
|
52
55
|
The components of ParaDigMa are shown in the diagram below.
|
|
@@ -59,20 +62,20 @@ ParaDigMa can best be understood by categorizing the sequential processes:
|
|
|
59
62
|
|
|
60
63
|
| Process | Description |
|
|
61
64
|
| ---- | ---- |
|
|
62
|
-
| Preprocessing | Preparing raw sensor signals for further processing |
|
|
65
|
+
| Preprocessing | Preparing raw sensor signals for further processing |
|
|
63
66
|
| Feature extraction | Extracting features based on windowed sensor signals |
|
|
64
|
-
| Classification | Detecting segments of interest using validated classifiers (e.g., gait segments) |
|
|
67
|
+
| Classification | Detecting segments of interest using validated classifiers (e.g., gait segments) |
|
|
65
68
|
| Quantification | Extracting specific measures from the detected segments (e.g., arm swing measures) |
|
|
66
69
|
| Aggregation | Aggregating the measures over a specific time period (e.g., week-level aggregates) |
|
|
67
70
|
|
|
68
71
|
<br/>
|
|
69
|
-
ParaDigMa contains the following validated processing pipelines (each using the processes described above):
|
|
72
|
+
ParaDigMa contains the following validated processing pipelines (each using the processes described above):
|
|
70
73
|
|
|
71
|
-
| Pipeline | Input | Output classification | Output quantification | Output week-level aggregation |
|
|
74
|
+
| Pipeline | Input | Output classification | Output quantification | Output week-level aggregation |
|
|
72
75
|
| ---- | ---- | ---- | ---- | ---- |
|
|
73
|
-
| **Arm swing during gait** | Wrist accelerometer and gyroscope data | Gait probability, gait without other arm activities probability | Arm swing range of motion (RoM) | Typical & maximum arm swing RoM |
|
|
74
|
-
| **Tremor** | Wrist gyroscope data | Tremor probability | Tremor power | % tremor time, typical & maximum tremor power |
|
|
75
|
-
| **Pulse rate** | Wrist PPG and accelerometer data | PPG signal quality | Pulse rate | Resting & maximum pulse rate |
|
|
76
|
+
| **Arm swing during gait** | Wrist accelerometer and gyroscope data | Gait probability, gait without other arm activities probability | Arm swing range of motion (RoM) | Typical & maximum arm swing RoM |
|
|
77
|
+
| **Tremor** | Wrist gyroscope data | Tremor probability | Tremor power | % tremor time, typical & maximum tremor power |
|
|
78
|
+
| **Pulse rate** | Wrist PPG and accelerometer data | PPG signal quality | Pulse rate | Resting & maximum pulse rate |
|
|
76
79
|
|
|
77
80
|
## Installation
|
|
78
81
|
|
|
@@ -90,9 +93,9 @@ The API reference contains detailed documentation of all toolbox modules and fun
|
|
|
90
93
|
The user guides provide additional information about specific topics (e.g. the required orientation of the wrist sensor).
|
|
91
94
|
|
|
92
95
|
### Sensor data requirements
|
|
93
|
-
The ParaDigMa toolbox is designed for the analysis of passive monitoring data collected using a wrist sensor in persons with PD.
|
|
96
|
+
The ParaDigMa toolbox is designed for the analysis of passive monitoring data collected using a wrist sensor in persons with PD.
|
|
94
97
|
|
|
95
|
-
Specific requirements include:
|
|
98
|
+
Specific requirements include:
|
|
96
99
|
| Pipeline | Sensor Configuration | Context of Use |
|
|
97
100
|
|------------------------|--------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
|
98
101
|
| **All** | - Sensor position: wrist-band on most or least affected side (validated for both, but different sensitivity for measuring disease progression for tremor and arm swing during gait). <br> - Sensor orientation: orientation as described in [Coordinate System](https://biomarkersparkinson.github.io/paradigma/guides/coordinate_system.html). <br> - Timeframe: contiguous, strictly increasing timestamps. | - Population: persons with PD. <br> - Data collection protocol: passive monitoring in daily life. |
|
|
@@ -100,25 +103,25 @@ Specific requirements include:
|
|
|
100
103
|
| **Tremor** | - Gyroscope: minimum sampling rate of 100 Hz, minimum range of ± 1000 degrees/sec. | - Compliance: for weekly measures: at least three compliant days (with ≥10 hours of data between 8 am and 10 pm). |
|
|
101
104
|
| **Pulse rate** | - PPG*: minimum sampling rate of 30 Hz, green LED. <br> - Accelerometer: minimum sampling rate of 100 Hz, minimum range of ± 4 g. | - Population: no rhythm disorders (e.g. atrial fibrillation, atrial flutter). <br> - Compliance: for weekly measures: minimum average of 12 hours of data per day. |
|
|
102
105
|
|
|
103
|
-
\* The processing of PPG signals is currently based on the blood volume pulse (arbitrary units) obtained from the Verily Study Watch
|
|
106
|
+
\* The processing of PPG signals is currently based on the blood volume pulse (arbitrary units) obtained from the Verily Study Watch. [This](https://biomarkersparkinson.github.io/paradigma/tutorials/_static/pulse_rate_analysis.html#step-3-signal-quality-classification) part of the PPG tutorial provides code and documentation on how to use the pipeline with other PPG devices.
|
|
104
107
|
|
|
105
108
|
> [!WARNING]
|
|
106
|
-
> While the toolbox is designed to work on any wrist sensor device which fulfills the requirements,
|
|
107
|
-
we have currently verified its performance on data from the Gait-up Physilog 4 (arm swing during gait & tremor) and the Verily Study Watch (all pipelines). Furthermore, the specifications above are the minimally validated requirements. For example, while ParaDigMa works with accelerometer and gyroscope data sampled at 50 Hz, its effect on subsequent processes has not been empirically validated.
|
|
109
|
+
> While the toolbox is designed to work on any wrist sensor device which fulfills the requirements,
|
|
110
|
+
we have currently verified its performance on data from the Gait-up Physilog 4 (arm swing during gait & tremor) and the Verily Study Watch (all pipelines). Furthermore, the specifications above are the minimally validated requirements. For example, while ParaDigMa works with accelerometer and gyroscope data sampled at 50 Hz, its effect on subsequent processes has not been empirically validated.
|
|
108
111
|
<br/>
|
|
109
112
|
|
|
110
113
|
We have included support for [TSDF](https://biomarkersparkinson.github.io/tsdf/) as format for loading and storing sensor data. TSDF enables efficient data storage with added metadata. However, ParaDigMa does not require a particular method of data storage and retrieval. Please see our tutorial [Data preparation](https://biomarkersparkinson.github.io/paradigma/tutorials/data_preparation.html) for examples of loading TSDF and other data formats into memory, and for preparing raw sensor data as input for the processing pipelines.
|
|
111
114
|
|
|
112
115
|
## Scientific validation
|
|
113
116
|
|
|
114
|
-
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. 2020]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. 2019]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following
|
|
115
|
-
* [Post, E. et al. - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
116
|
-
|
|
117
|
-
|
|
117
|
+
The pipelines were developed and validated using data from the Parkinson@Home Validation study [[Evers et al. (2020)]](https://pmc.ncbi.nlm.nih.gov/articles/PMC7584982/) and the Personalized Parkinson Project [[Bloem et al. (2019)]](https://pubmed.ncbi.nlm.nih.gov/31315608/). The following publications contain details and validation of the pipelines:
|
|
118
|
+
* [Post, E. et al. (2025) - Quantifying arm swing in Parkinson's disease: a method account for arm activities during free-living gait](https://doi.org/10.1186/s12984-025-01578-z)
|
|
119
|
+
* [Timmermans, N.A. et al. (2025) - A generalizable and open-source algorithm for real-life monitoring of tremor in Parkinson's disease](https://doi.org/10.1038/s41531-025-01056-2)
|
|
120
|
+
* [Veldkamp, K.I. et al. (2025) - Heart rate monitoring using wrist photoplethysmography in Parkinson disease: feasibility and relation with autonomic dysfunction](https://doi.org/10.1101/2025.08.15.25333751)
|
|
118
121
|
|
|
119
122
|
## Contributing
|
|
120
123
|
|
|
121
|
-
We welcome contributions! Please check out our [contributing guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html).
|
|
124
|
+
We welcome contributions! Please check out our [contributing guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html).
|
|
122
125
|
Please note that this project is released with a [Code of Conduct](https://biomarkersparkinson.github.io/paradigma/conduct.html). By contributing to this project, you agree to abide by its terms.
|
|
123
126
|
|
|
124
127
|
## License
|
|
@@ -127,11 +130,11 @@ It is licensed under the terms of the Apache License 2.0 license. See [License](
|
|
|
127
130
|
|
|
128
131
|
## Acknowledgements
|
|
129
132
|
|
|
130
|
-
The core team of ParaDigMa consists of Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Peter Kok, Vedran Kasalica and Luc Evers.
|
|
131
|
-
Advisors to the project are Max Little, Jordan Raykov, Twan van Laarhoven, Hayriye Cagnan, and Bas Bloem.
|
|
133
|
+
The core team of ParaDigMa consists of Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Peter Kok, Vedran Kasalica and Luc Evers.
|
|
134
|
+
Advisors to the project are Max Little, Jordan Raykov, Twan van Laarhoven, Hayriye Cagnan, and Bas Bloem.
|
|
132
135
|
The initial release of ParaDigMa was funded by the Michael J Fox Foundation (grant #020425) and the Dutch Research Council (grant #ASDI.2020.060 & grant #2023.010).
|
|
133
136
|
ParaDigMa was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
|
|
134
137
|
|
|
135
138
|
## Contact
|
|
136
|
-
Questions, issues or suggestions about ParaDigMa? Please reach out to
|
|
139
|
+
Questions, issues or suggestions about ParaDigMa? Please reach out to paradigma@radboudumc.nl, or open an issue in the GitHub repository.
|
|
137
140
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
paradigma/__init__.py,sha256=vCLqo7vOEgcnYs10gUVYvEFfi8y-jBi7w1YKRoqn95k,127
|
|
2
|
+
paradigma/assets/gait_detection_clf_package.pkl,sha256=8jCbuM_4dkilSjOEk9ss7bJbSppgzXe72y0X4BCnzCU,11497247
|
|
3
|
+
paradigma/assets/gait_filtering_clf_package.pkl,sha256=lAaLyhmXdV4X_drmYt0EM6wGwSo80yhpxtncWGq4RfQ,3915
|
|
4
|
+
paradigma/assets/ppg_quality_clf_package.pkl,sha256=vUcM4v8gZwWAmDVK7E4UcHhVnhlEg27RSB71oPGloSc,1292
|
|
5
|
+
paradigma/assets/tremor_detection_clf_package.pkl,sha256=S-KsK1EcUBJX6oGGBo8GqU0AhNZThA6Qe-cs0QPcWw4,1475
|
|
6
|
+
paradigma/classification.py,sha256=yDTetqTZT0c7G0QtNX_i7SNjevEJeaqr334HyZUE6zw,3302
|
|
7
|
+
paradigma/config.py,sha256=B2a3oCusaxH2vAjNV4ae7IWstVwhMpD5H1uN_7Oz4U4,13924
|
|
8
|
+
paradigma/constants.py,sha256=fbJuZW5VB_hhz9NQYwjOxINOJPAcCgRPYJNL6tMqpTA,3493
|
|
9
|
+
paradigma/feature_extraction.py,sha256=meL8fKmaJd1t7apEzFjt757w7M9EF4lX9w-xK2oRgT8,35686
|
|
10
|
+
paradigma/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
paradigma/pipelines/gait_pipeline.py,sha256=WkAL6iazYQlze21RLMsEPP5q0XAYlOkkDK74c-A8HnE,29190
|
|
12
|
+
paradigma/pipelines/pulse_rate_pipeline.py,sha256=vktK9C1K-0-MifwJ3t_-gnEmEBFExovGVmgXDjBs2j4,17990
|
|
13
|
+
paradigma/pipelines/pulse_rate_utils.py,sha256=-ixh9aTz_bwqCxpBPJW_L37I5yYuI9u5fJ8TBtDoL8Q,26480
|
|
14
|
+
paradigma/pipelines/tremor_pipeline.py,sha256=Le5DUBMhg7DQaRmS49h3SavoUwKn63bJvlmj2-se7sw,14537
|
|
15
|
+
paradigma/preprocessing.py,sha256=324xRLe_fCwbduSwieFNJOn33AStWpC1eMO1QW6etJQ,17119
|
|
16
|
+
paradigma/segmenting.py,sha256=ccc6gwfXouDi6WGhzuaITKn1FevtNwalmLUqBPxDf8g,14647
|
|
17
|
+
paradigma/testing.py,sha256=DWoq6dUzyg4wnmpv8tyV_2-bN16D5krReeZvurRv5gU,19481
|
|
18
|
+
paradigma/util.py,sha256=L2_fJcGQBpZAqD9ay-sxJBe9ypq0FYsUNIp1-U_x2Jw,18221
|
|
19
|
+
paradigma-1.0.4.dist-info/METADATA,sha256=su0TkJ23wB-qJiqrkn8k-9dLBcoqQ2jPS6oscC3jUzI,12176
|
|
20
|
+
paradigma-1.0.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
21
|
+
paradigma-1.0.4.dist-info/entry_points.txt,sha256=Jiuvl3rCJFdHOVxhC4Uum_jc3qXIj_h5mTo-rALN94E,89
|
|
22
|
+
paradigma-1.0.4.dist-info/licenses/LICENSE,sha256=FErdVJ9zP4I24ElO6xFU5_e8KckvFkpcZdm69ZkaUWI,9806
|
|
23
|
+
paradigma-1.0.4.dist-info/RECORD,,
|