paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +14 -14
- paradigma/config.py +38 -29
- paradigma/constants.py +10 -2
- paradigma/feature_extraction.py +106 -75
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +488 -97
- paradigma/pipelines/pulse_rate_pipeline.py +278 -46
- paradigma/pipelines/pulse_rate_utils.py +176 -137
- paradigma/pipelines/tremor_pipeline.py +292 -72
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +345 -77
- paradigma/segmenting.py +57 -42
- paradigma/testing.py +14 -9
- paradigma/util.py +36 -22
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/METADATA +0 -140
- paradigma-1.0.4.dist-info/RECORD +0 -23
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0
paradigma/segmenting.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import List
|
|
2
|
-
|
|
3
1
|
import numpy as np
|
|
4
2
|
import pandas as pd
|
|
5
3
|
|
|
@@ -9,19 +7,21 @@ from paradigma.util import deprecated
|
|
|
9
7
|
|
|
10
8
|
def tabulate_windows(
|
|
11
9
|
df: pd.DataFrame,
|
|
12
|
-
columns:
|
|
10
|
+
columns: list[str],
|
|
13
11
|
window_length_s: float,
|
|
14
12
|
window_step_length_s: float,
|
|
15
13
|
fs: int,
|
|
16
14
|
) -> np.ndarray:
|
|
17
15
|
"""
|
|
18
|
-
Split the given DataFrame into overlapping windows of specified length
|
|
16
|
+
Split the given DataFrame into overlapping windows of specified length
|
|
17
|
+
and step size.
|
|
19
18
|
|
|
20
|
-
This function extracts windows of data from the specified columns of the
|
|
21
|
-
the window length and step size provided in the
|
|
22
|
-
a 3D NumPy array, where the
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
This function extracts windows of data from the specified columns of the
|
|
20
|
+
DataFrame, based on the window length and step size provided in the
|
|
21
|
+
configuration. The windows are returned in a 3D NumPy array, where the
|
|
22
|
+
first dimension represents the window index, the second dimension
|
|
23
|
+
represents the time steps within the window, and the third dimension
|
|
24
|
+
represents the columns of the data.
|
|
25
25
|
|
|
26
26
|
Parameters
|
|
27
27
|
----------
|
|
@@ -40,17 +40,22 @@ def tabulate_windows(
|
|
|
40
40
|
-------
|
|
41
41
|
np.ndarray
|
|
42
42
|
A 3D NumPy array of shape (n_windows, window_size, n_columns), where:
|
|
43
|
-
- `n_windows` is the number of windows that can be formed from the
|
|
44
|
-
|
|
45
|
-
- `
|
|
43
|
+
- `n_windows` is the number of windows that can be formed from the
|
|
44
|
+
data.
|
|
45
|
+
- `window_size` is the length of each window in terms of the number
|
|
46
|
+
of time steps.
|
|
47
|
+
- `n_columns` is the number of columns in the input DataFrame
|
|
48
|
+
specified by `columns`.
|
|
46
49
|
|
|
47
|
-
If the length of the data is shorter than the specified window size,
|
|
50
|
+
If the length of the data is shorter than the specified window size,
|
|
51
|
+
an empty array is returned.
|
|
48
52
|
|
|
49
53
|
Notes
|
|
50
54
|
-----
|
|
51
|
-
This function uses `np.lib.stride_tricks.sliding_window_view` to
|
|
52
|
-
The step size is applied to extract
|
|
53
|
-
If the data is insufficient for at least one
|
|
55
|
+
This function uses `np.lib.stride_tricks.sliding_window_view` to
|
|
56
|
+
generate sliding windows of data. The step size is applied to extract
|
|
57
|
+
windows at intervals. If the data is insufficient for at least one
|
|
58
|
+
window, an empty array will be returned.
|
|
54
59
|
|
|
55
60
|
Example
|
|
56
61
|
-------
|
|
@@ -84,7 +89,8 @@ def tabulate_windows(
|
|
|
84
89
|
|
|
85
90
|
def tabulate_windows_legacy(config, df, agg_func="first"):
|
|
86
91
|
"""
|
|
87
|
-
Efficiently creates a windowed dataframe from the input dataframe using
|
|
92
|
+
Efficiently creates a windowed dataframe from the input dataframe using
|
|
93
|
+
vectorized operations.
|
|
88
94
|
|
|
89
95
|
Parameters
|
|
90
96
|
----------
|
|
@@ -93,11 +99,13 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
|
|
|
93
99
|
- `window_length_s`: The number of seconds per window.
|
|
94
100
|
- `window_step_length_s`: The number of seconds to shift between windows.
|
|
95
101
|
- `sampling_frequency`: The sampling frequency in Hz.
|
|
96
|
-
- `single_value_colnames`: List of column names where a single value
|
|
97
|
-
|
|
102
|
+
- `single_value_colnames`: List of column names where a single value
|
|
103
|
+
(e.g., mean) is needed.
|
|
104
|
+
- `list_value_colnames`: List of column names where all 600 values
|
|
105
|
+
should be stored in a list.
|
|
98
106
|
agg_func : str or callable, optional
|
|
99
|
-
Aggregation function for single-value columns. Can be 'mean',
|
|
100
|
-
Default is 'first'.
|
|
107
|
+
Aggregation function for single-value columns. Can be 'mean',
|
|
108
|
+
'first', or a custom callable. Default is 'first'.
|
|
101
109
|
|
|
102
110
|
Returns
|
|
103
111
|
-------
|
|
@@ -122,7 +130,8 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
|
|
|
122
130
|
n_rows = len(df)
|
|
123
131
|
if window_length > n_rows:
|
|
124
132
|
raise ValueError(
|
|
125
|
-
f"Window size ({window_length}) cannot be greater than the
|
|
133
|
+
f"Window size ({window_length}) cannot be greater than the "
|
|
134
|
+
f"number of rows ({n_rows}) in the dataframe."
|
|
126
135
|
)
|
|
127
136
|
|
|
128
137
|
# Create indices for window start positions
|
|
@@ -170,7 +179,8 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
|
|
|
170
179
|
# Convert result list into a DataFrame
|
|
171
180
|
windowed_df = pd.DataFrame(result)
|
|
172
181
|
|
|
173
|
-
# Ensure the column order is as desired: window_nr, window_start,
|
|
182
|
+
# Ensure the column order is as desired: window_nr, window_start,
|
|
183
|
+
# window_end, pre_or_post, and then the rest
|
|
174
184
|
desired_order = (
|
|
175
185
|
["window_nr", "window_start", "window_end"]
|
|
176
186
|
+ config.single_value_colnames
|
|
@@ -191,7 +201,7 @@ def create_segments(
|
|
|
191
201
|
gap_exceeds = time_diff > max_segment_gap_s
|
|
192
202
|
|
|
193
203
|
# Create the segment number based on the cumulative sum of the gap_exceeds mask
|
|
194
|
-
segments = gap_exceeds.cumsum()
|
|
204
|
+
segments = gap_exceeds.cumsum() + 1
|
|
195
205
|
|
|
196
206
|
return segments
|
|
197
207
|
|
|
@@ -229,7 +239,8 @@ def discard_segments(
|
|
|
229
239
|
|
|
230
240
|
Example
|
|
231
241
|
-------
|
|
232
|
-
config = Config(min_segment_length_s=2, sampling_frequency=100,
|
|
242
|
+
config = Config(min_segment_length_s=2, sampling_frequency=100,
|
|
243
|
+
segment_nr_colname='segment')
|
|
233
244
|
df = pd.DataFrame({
|
|
234
245
|
'segment': [1, 1, 2, 2, 2],
|
|
235
246
|
'time': [0, 1, 2, 3, 4]
|
|
@@ -245,26 +256,26 @@ def discard_segments(
|
|
|
245
256
|
"""
|
|
246
257
|
# Minimum segment size in number of samples
|
|
247
258
|
if format == "timestamps":
|
|
248
|
-
min_samples = min_segment_length_s * fs
|
|
259
|
+
min_samples = int(min_segment_length_s * fs)
|
|
249
260
|
elif format == "windows":
|
|
250
|
-
min_samples = min_segment_length_s
|
|
261
|
+
min_samples = int(min_segment_length_s)
|
|
251
262
|
else:
|
|
252
263
|
raise ValueError("Invalid format. Must be 'timestamps' or 'windows'.")
|
|
253
264
|
|
|
254
|
-
#
|
|
255
|
-
|
|
256
|
-
df.groupby(segment_nr_colname)[segment_nr_colname].transform("size")
|
|
257
|
-
>= min_samples
|
|
258
|
-
)
|
|
265
|
+
# Count samples per segment
|
|
266
|
+
segment_counts = df.groupby(segment_nr_colname).size()
|
|
259
267
|
|
|
260
|
-
|
|
268
|
+
# Filter rows for valid segments (>= min samples)
|
|
269
|
+
counts_map = segment_counts.to_dict()
|
|
270
|
+
df = df[df[segment_nr_colname].map(counts_map) >= min_samples].copy()
|
|
261
271
|
|
|
262
272
|
if df.empty:
|
|
263
|
-
raise ValueError(
|
|
273
|
+
raise ValueError(
|
|
274
|
+
f"All segments were removed: no segment ≥ {min_samples} samples."
|
|
275
|
+
)
|
|
264
276
|
|
|
265
|
-
# Reset segment numbers
|
|
266
|
-
|
|
267
|
-
df[segment_nr_colname] = unique_segments
|
|
277
|
+
# Reset segment numbers
|
|
278
|
+
df[segment_nr_colname] = pd.factorize(df[segment_nr_colname])[0] + 1
|
|
268
279
|
|
|
269
280
|
return df
|
|
270
281
|
|
|
@@ -313,7 +324,7 @@ def categorize_segments(df, fs, format="timestamps", window_step_length_s=None):
|
|
|
313
324
|
d_max_duration = {k: v * fs for k, v in d_max_duration.items()}
|
|
314
325
|
|
|
315
326
|
# Count rows per segment
|
|
316
|
-
segment_sizes = df[DataColumns.
|
|
327
|
+
segment_sizes = df[DataColumns.GAIT_SEGMENT_NR].value_counts()
|
|
317
328
|
|
|
318
329
|
# Convert segment sizes to duration in seconds
|
|
319
330
|
if format == "windows":
|
|
@@ -332,7 +343,10 @@ def categorize_segments(df, fs, format="timestamps", window_step_length_s=None):
|
|
|
332
343
|
|
|
333
344
|
# Apply categorization to the DataFrame
|
|
334
345
|
return (
|
|
335
|
-
df[DataColumns.
|
|
346
|
+
df[DataColumns.GAIT_SEGMENT_NR]
|
|
347
|
+
.map(segment_sizes)
|
|
348
|
+
.map(categorize)
|
|
349
|
+
.astype("category")
|
|
336
350
|
)
|
|
337
351
|
|
|
338
352
|
|
|
@@ -354,7 +368,7 @@ class WindowedDataExtractor:
|
|
|
354
368
|
Returns a slice object for a range of consecutive column names.
|
|
355
369
|
"""
|
|
356
370
|
|
|
357
|
-
def __init__(self, windowed_colnames:
|
|
371
|
+
def __init__(self, windowed_colnames: list[str]):
|
|
358
372
|
"""
|
|
359
373
|
Initialize the WindowedDataExtractor.
|
|
360
374
|
|
|
@@ -395,7 +409,7 @@ class WindowedDataExtractor:
|
|
|
395
409
|
raise ValueError(f"Column name '{colname}' not found in windowed_colnames.")
|
|
396
410
|
return self.column_indices[colname]
|
|
397
411
|
|
|
398
|
-
def get_slice(self, colnames:
|
|
412
|
+
def get_slice(self, colnames: list[str]) -> slice:
|
|
399
413
|
"""
|
|
400
414
|
Get a slice object for a range of consecutive columns.
|
|
401
415
|
|
|
@@ -412,7 +426,8 @@ class WindowedDataExtractor:
|
|
|
412
426
|
Raises
|
|
413
427
|
------
|
|
414
428
|
ValueError
|
|
415
|
-
If one or more columns in `colnames` are not found in the
|
|
429
|
+
If one or more columns in `colnames` are not found in the
|
|
430
|
+
`windowed_colnames` list.
|
|
416
431
|
"""
|
|
417
432
|
if not all(col in self.column_indices for col in colnames):
|
|
418
433
|
missing = [col for col in colnames if col not in self.column_indices]
|
paradigma/testing.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import List
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pandas as pd
|
|
@@ -95,7 +94,8 @@ def preprocess_ppg_data_io(
|
|
|
95
94
|
imu_config: IMUConfig,
|
|
96
95
|
) -> None:
|
|
97
96
|
"""
|
|
98
|
-
Preprocess PPG and IMU data by resampling, filtering, and aligning the
|
|
97
|
+
Preprocess PPG and IMU data by resampling, filtering, and aligning the
|
|
98
|
+
data segments.
|
|
99
99
|
|
|
100
100
|
Parameters
|
|
101
101
|
----------
|
|
@@ -520,13 +520,15 @@ def extract_signal_quality_features_io(
|
|
|
520
520
|
Parameters
|
|
521
521
|
----------
|
|
522
522
|
input_path : str | Path
|
|
523
|
-
The path to the directory containing the preprocessed PPG and
|
|
523
|
+
The path to the directory containing the preprocessed PPG and
|
|
524
|
+
accelerometer data.
|
|
524
525
|
output_path : str | Path
|
|
525
526
|
The path to the directory where the extracted features will be saved.
|
|
526
527
|
ppg_config: PulseRateConfig
|
|
527
528
|
The configuration for the signal quality feature extraction of the ppg signal.
|
|
528
529
|
acc_config: PulseRateConfig
|
|
529
|
-
The configuration for the signal quality feature extraction of the
|
|
530
|
+
The configuration for the signal quality feature extraction of the
|
|
531
|
+
accelerometer signal.
|
|
530
532
|
|
|
531
533
|
Returns
|
|
532
534
|
-------
|
|
@@ -589,23 +591,26 @@ def extract_signal_quality_features_io(
|
|
|
589
591
|
def aggregate_pulse_rate_io(
|
|
590
592
|
full_path_to_input: str | Path,
|
|
591
593
|
full_path_to_output: str | Path,
|
|
592
|
-
aggregates:
|
|
594
|
+
aggregates: list[str] = ["mode", "99p"],
|
|
593
595
|
) -> None:
|
|
594
596
|
"""
|
|
595
|
-
Extract pulse rate from the PPG signal and save the aggregated pulse rate
|
|
597
|
+
Extract pulse rate from the PPG signal and save the aggregated pulse rate
|
|
598
|
+
estimates to a file.
|
|
596
599
|
|
|
597
600
|
Parameters
|
|
598
601
|
----------
|
|
599
602
|
input_path : str | Path
|
|
600
603
|
The path to the directory containing the pulse rate estimates.
|
|
601
604
|
output_path : str | Path
|
|
602
|
-
The path to the directory where the aggregated pulse rate estimates
|
|
605
|
+
The path to the directory where the aggregated pulse rate estimates
|
|
606
|
+
will be saved.
|
|
603
607
|
aggregates : List[str]
|
|
604
|
-
The list of aggregation methods to be used for the pulse rate
|
|
608
|
+
The list of aggregation methods to be used for the pulse rate
|
|
609
|
+
estimates. The default is ['mode', '99p'].
|
|
605
610
|
"""
|
|
606
611
|
|
|
607
612
|
# Load the pulse rate estimates
|
|
608
|
-
with open(full_path_to_input
|
|
613
|
+
with open(full_path_to_input) as f:
|
|
609
614
|
df_pr = json.load(f)
|
|
610
615
|
|
|
611
616
|
# Aggregate the pulse rate estimates
|
paradigma/util.py
CHANGED
|
@@ -2,7 +2,6 @@ import functools
|
|
|
2
2
|
import os
|
|
3
3
|
import warnings
|
|
4
4
|
from datetime import datetime, timedelta
|
|
5
|
-
from typing import List, Optional, Tuple
|
|
6
5
|
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
@@ -16,12 +15,14 @@ from paradigma.constants import DataColumns, TimeUnit
|
|
|
16
15
|
|
|
17
16
|
def deprecated(reason: str = ""):
|
|
18
17
|
"""
|
|
19
|
-
Decorator to mark functions as deprecated. It will show a warning when the
|
|
18
|
+
Decorator to mark functions as deprecated. It will show a warning when the
|
|
19
|
+
function is used.
|
|
20
20
|
|
|
21
21
|
Parameters
|
|
22
22
|
----------
|
|
23
23
|
reason : str, optional
|
|
24
|
-
Additional message to explain why it is deprecated and what to use
|
|
24
|
+
Additional message to explain why it is deprecated and what to use
|
|
25
|
+
instead.
|
|
25
26
|
"""
|
|
26
27
|
|
|
27
28
|
def decorator(func):
|
|
@@ -155,7 +156,7 @@ def write_df_data(
|
|
|
155
156
|
|
|
156
157
|
def read_metadata(
|
|
157
158
|
input_path: str, meta_filename: str, time_filename: str, values_filename: str
|
|
158
|
-
) ->
|
|
159
|
+
) -> tuple[TSDFMetadata, TSDFMetadata]:
|
|
159
160
|
metadata_dict = tsdf.load_metadata_from_path(
|
|
160
161
|
os.path.join(input_path, meta_filename)
|
|
161
162
|
)
|
|
@@ -186,8 +187,8 @@ def load_tsdf_dataframe(
|
|
|
186
187
|
|
|
187
188
|
|
|
188
189
|
def load_metadata_list(
|
|
189
|
-
dir_path: str, meta_filename: str, filenames:
|
|
190
|
-
) ->
|
|
190
|
+
dir_path: str, meta_filename: str, filenames: list[str]
|
|
191
|
+
) -> list[TSDFMetadata]:
|
|
191
192
|
"""
|
|
192
193
|
Load the metadata objects from a metadata file according to the specified binaries.
|
|
193
194
|
|
|
@@ -216,7 +217,8 @@ def transform_time_array(
|
|
|
216
217
|
start_time: float = 0.0,
|
|
217
218
|
) -> np.ndarray:
|
|
218
219
|
"""
|
|
219
|
-
Transforms the time array to relative time (when defined in delta time)
|
|
220
|
+
Transforms the time array to relative time (when defined in delta time)
|
|
221
|
+
and scales the values.
|
|
220
222
|
|
|
221
223
|
Parameters
|
|
222
224
|
----------
|
|
@@ -225,7 +227,8 @@ def transform_time_array(
|
|
|
225
227
|
input_unit_type : str
|
|
226
228
|
The time unit type of the input time array.
|
|
227
229
|
output_unit_type : str
|
|
228
|
-
The time unit type of the output time array. ParaDigMa expects
|
|
230
|
+
The time unit type of the output time array. ParaDigMa expects
|
|
231
|
+
`TimeUnit.RELATIVE_S`.
|
|
229
232
|
start_time : float, optional
|
|
230
233
|
The start time of the time array in UNIX seconds (default is 0.0)
|
|
231
234
|
|
|
@@ -236,9 +239,13 @@ def transform_time_array(
|
|
|
236
239
|
|
|
237
240
|
Notes
|
|
238
241
|
-----
|
|
239
|
-
- The function handles different time units (`TimeUnit.RELATIVE_MS`,
|
|
240
|
-
|
|
241
|
-
|
|
242
|
+
- The function handles different time units (`TimeUnit.RELATIVE_MS`,
|
|
243
|
+
`TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`,
|
|
244
|
+
`TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
|
|
245
|
+
- The transformation allows for scaling of the time array, converting
|
|
246
|
+
between time unit types (e.g., relative, absolute, or difference).
|
|
247
|
+
- When converting to `TimeUnit.RELATIVE_MS`, the function calculates the
|
|
248
|
+
relative time starting from the provided or default start time.
|
|
242
249
|
"""
|
|
243
250
|
input_units = input_unit_type.split("_")[-1].lower()
|
|
244
251
|
output_units = output_unit_type.split("_")[-1].lower()
|
|
@@ -259,7 +266,8 @@ def transform_time_array(
|
|
|
259
266
|
input_unit_type == TimeUnit.DIFFERENCE_MS
|
|
260
267
|
or input_unit_type == TimeUnit.DIFFERENCE_S
|
|
261
268
|
):
|
|
262
|
-
# Convert a series of differences into cumulative sum to
|
|
269
|
+
# Convert a series of differences into cumulative sum to
|
|
270
|
+
# reconstruct original time series.
|
|
263
271
|
time_array = np.cumsum(np.double(time_array))
|
|
264
272
|
elif (
|
|
265
273
|
input_unit_type == TimeUnit.ABSOLUTE_MS
|
|
@@ -271,7 +279,8 @@ def transform_time_array(
|
|
|
271
279
|
# Convert absolute time stamps into a time series relative to start_time.
|
|
272
280
|
time_array = time_array - start_time
|
|
273
281
|
|
|
274
|
-
# Transform the time array from `TimeUnit.RELATIVE_MS` to the
|
|
282
|
+
# Transform the time array from `TimeUnit.RELATIVE_MS` to the
|
|
283
|
+
# specified time unit type
|
|
275
284
|
if (
|
|
276
285
|
output_unit_type == TimeUnit.ABSOLUTE_MS
|
|
277
286
|
or output_unit_type == TimeUnit.ABSOLUTE_S
|
|
@@ -282,7 +291,8 @@ def transform_time_array(
|
|
|
282
291
|
output_unit_type == TimeUnit.DIFFERENCE_MS
|
|
283
292
|
or output_unit_type == TimeUnit.DIFFERENCE_S
|
|
284
293
|
):
|
|
285
|
-
# Creates a new array starting with 0, followed by the
|
|
294
|
+
# Creates a new array starting with 0, followed by the
|
|
295
|
+
# differences between consecutive elements.
|
|
286
296
|
time_array = np.diff(np.insert(time_array, 0, start_time))
|
|
287
297
|
elif (
|
|
288
298
|
output_unit_type == TimeUnit.RELATIVE_MS
|
|
@@ -383,7 +393,7 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
|
|
|
383
393
|
def aggregate_parameter(
|
|
384
394
|
parameter: np.ndarray,
|
|
385
395
|
aggregate: str,
|
|
386
|
-
evaluation_points:
|
|
396
|
+
evaluation_points: np.ndarray | None = None,
|
|
387
397
|
) -> np.ndarray | int:
|
|
388
398
|
"""
|
|
389
399
|
Aggregate a parameter based on the specified method.
|
|
@@ -398,7 +408,8 @@ def aggregate_parameter(
|
|
|
398
408
|
|
|
399
409
|
evaluation_points : np.ndarray, optional
|
|
400
410
|
Should be specified if the mode is derived for a continuous parameter.
|
|
401
|
-
Defines the evaluation points for the kernel density estimation
|
|
411
|
+
Defines the evaluation points for the kernel density estimation
|
|
412
|
+
function, from which the maximum is derived as the mode.
|
|
402
413
|
|
|
403
414
|
Returns
|
|
404
415
|
-------
|
|
@@ -445,8 +456,9 @@ def merge_predictions_with_timestamps(
|
|
|
445
456
|
fs: int,
|
|
446
457
|
) -> pd.DataFrame:
|
|
447
458
|
"""
|
|
448
|
-
Merges prediction probabilities with timestamps by expanding overlapping
|
|
449
|
-
into individual timestamps and averaging probabilities per unique
|
|
459
|
+
Merges prediction probabilities with timestamps by expanding overlapping
|
|
460
|
+
windows into individual timestamps and averaging probabilities per unique
|
|
461
|
+
timestamp.
|
|
450
462
|
|
|
451
463
|
Parameters:
|
|
452
464
|
----------
|
|
@@ -455,10 +467,11 @@ def merge_predictions_with_timestamps(
|
|
|
455
467
|
Must include the timestamp column specified in `DataColumns.TIME`.
|
|
456
468
|
|
|
457
469
|
df_predictions : pd.DataFrame
|
|
458
|
-
DataFrame containing prediction windows with start times and
|
|
459
|
-
Must include:
|
|
470
|
+
DataFrame containing prediction windows with start times and
|
|
471
|
+
probabilities. Must include:
|
|
460
472
|
- A column for window start times (defined by `DataColumns.TIME`).
|
|
461
|
-
- A column for prediction probabilities (defined by
|
|
473
|
+
- A column for prediction probabilities (defined by
|
|
474
|
+
`DataColumns.PRED_GAIT_PROBA`).
|
|
462
475
|
|
|
463
476
|
pred_proba_colname : str
|
|
464
477
|
The column name for the prediction probabilities in `df_predictions`.
|
|
@@ -559,7 +572,8 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
|
|
|
559
572
|
Input data with column 'time_dt' in which the date is stored.
|
|
560
573
|
|
|
561
574
|
min_hours_per_day: int
|
|
562
|
-
The minimum number of hours per day required for including the day
|
|
575
|
+
The minimum number of hours per day required for including the day
|
|
576
|
+
in the aggregation step.
|
|
563
577
|
|
|
564
578
|
|
|
565
579
|
Returns
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: paradigma
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Erik Post
|
|
8
|
+
Author-email: erik.post@radboudumc.nl
|
|
9
|
+
Requires-Python: >=3.11,<4.0
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: avro (>=1.12.1,<2.0.0)
|
|
17
|
+
Requires-Dist: nbconvert (>=7.16.6,<8.0.0)
|
|
18
|
+
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
19
|
+
Requires-Dist: pyarrow (>=22.0.0,<23.0.0)
|
|
20
|
+
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
21
|
+
Requires-Dist: scikit-learn (>=1.3.2,<1.6.1)
|
|
22
|
+
Requires-Dist: tsdf (>=0.6.1,<0.7.0)
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="https://raw.githubusercontent.com/biomarkersParkinson/paradigma/main/docs/source/_static/img/paradigma-logo-banner.png" alt="ParaDigMa logo"/>
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
| Badges | |
|
|
30
|
+
|:----:|----|
|
|
31
|
+
| **Packages and Releases** | [](https://github.com/biomarkersparkinson/paradigma/releases/latest) [](https://pypi.python.org/pypi/paradigma/) [](https://research-software-directory.org/software/paradigma) |
|
|
32
|
+
| **DOI** | [](https://doi.org/10.5281/zenodo.13838392) |
|
|
33
|
+
| **Build Status** | [](https://www.python.org/downloads/) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
|
|
34
|
+
| **License** | [](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
|
|
35
|
+
|
|
36
|
+
## Overview
|
|
37
|
+
|
|
38
|
+
ParaDigMa (Parkinson's disease Digital Markers) is a Python toolbox for extracting validated digital biomarkers from wrist sensor data in Parkinson's disease. It processes accelerometer, gyroscope, and PPG signals collected during passive monitoring in daily life.
|
|
39
|
+
|
|
40
|
+
**Key Features:**
|
|
41
|
+
- Arm swing during gait analysis
|
|
42
|
+
- Tremor analysis
|
|
43
|
+
- Pulse rate analysis
|
|
44
|
+
- Scientifically validated in peer-reviewed publications
|
|
45
|
+
- Modular, extensible architecture for custom analyses
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
### Installation
|
|
50
|
+
|
|
51
|
+
**For regular use:**
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install paradigma
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Requires Python 3.11+.
|
|
58
|
+
|
|
59
|
+
**For development or running tutorials:**
|
|
60
|
+
|
|
61
|
+
Example data requires git-lfs. See the [installation guide](https://biomarkersparkinson.github.io/paradigma/guides/installation.html) for setup instructions.
|
|
62
|
+
|
|
63
|
+
### Basic Usage
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from paradigma.orchestrator import run_paradigma
|
|
67
|
+
|
|
68
|
+
# Example 1: Single DataFrame with default output directory
|
|
69
|
+
results = run_paradigma(
|
|
70
|
+
dfs=df,
|
|
71
|
+
pipelines=['gait', 'tremor'],
|
|
72
|
+
watch_side='left', # Required for gait pipeline
|
|
73
|
+
save_intermediate=['quantification', 'aggregation'] # Saves to ./output by default
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Example 2: Multiple DataFrames as list (assigned to 'df_1', 'df_2', etc.)
|
|
77
|
+
results = run_paradigma(
|
|
78
|
+
dfs=[df1, df2, df3],
|
|
79
|
+
pipelines=['gait', 'tremor'],
|
|
80
|
+
output_dir="./results", # Custom output directory
|
|
81
|
+
watch_side='left',
|
|
82
|
+
save_intermediate=['quantification', 'aggregation']
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Example 3: Dictionary of DataFrames (custom segment/file names)
|
|
86
|
+
results = run_paradigma(
|
|
87
|
+
dfs={'morning_session': df1, 'evening_session': df2},
|
|
88
|
+
pipelines=['gait', 'tremor'],
|
|
89
|
+
watch_side='right',
|
|
90
|
+
save_intermediate=[] # No files saved - results only in memory
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Example 4: Load from data directory
|
|
94
|
+
results = run_paradigma(
|
|
95
|
+
data_path='./my_data',
|
|
96
|
+
pipelines=['gait', 'tremor'],
|
|
97
|
+
watch_side='left',
|
|
98
|
+
file_pattern='*.parquet',
|
|
99
|
+
save_intermediate=['quantification', 'aggregation']
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Access results (nested by pipeline)
|
|
103
|
+
gait_measures = results['quantifications']['gait']
|
|
104
|
+
tremor_measures = results['quantifications']['tremor']
|
|
105
|
+
gait_aggregates = results['aggregations']['gait']
|
|
106
|
+
tremor_aggregates = results['aggregations']['tremor']
|
|
107
|
+
|
|
108
|
+
# Check for errors
|
|
109
|
+
if results['errors']:
|
|
110
|
+
print(f"Warning: {len(results['errors'])} error(s) occurred")
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**See our [tutorials](https://biomarkersparkinson.github.io/paradigma/tutorials/index.html) for complete examples.**
|
|
114
|
+
|
|
115
|
+
## Pipelines
|
|
116
|
+
|
|
117
|
+
<p align="center">
|
|
118
|
+
<img src="https://raw.githubusercontent.com/biomarkersParkinson/paradigma/main/docs/source/_static/img/pipeline-architecture.png" alt="Pipeline architeecture"/>
|
|
119
|
+
</p>
|
|
120
|
+
|
|
121
|
+
### Validated Processing Pipelines
|
|
122
|
+
|
|
123
|
+
| Pipeline | Input sensors | Output week-level aggregation | Publications | Tutorial |
|
|
124
|
+
| ---- | ---- | ------- | ---- | ---- |
|
|
125
|
+
| **Arm swing during gait** | Accelerometer + Gyroscope | Typical, maximum & variability of arm swing range of motion | [Post 2025](https://doi.org/10.1186/s12984-025-01578-z), [Post 2026*](https://doi.org/10.64898/2026.01.06.26343500) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/gait_analysis) |
|
|
126
|
+
| **Tremor** | Gyroscope | % tremor time, typical & maximum tremor power | [Timmermans 2025a](https://doi.org/10.1038/s41531-025-01056-2), [Timmermans 2025b*](https://www.medrxiv.org/content/10.64898/2025.12.23.25342892v1) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/tremor_analysis) |
|
|
127
|
+
| **Pulse rate** | PPG (+ Accelerometer) | Resting & maximum pulse rate | [Veldkamp 2025*](https://doi.org/10.1101/2025.08.15.25333751) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/pulse_rate_analysis) |
|
|
128
|
+
|
|
129
|
+
*\* Indicates pre-print*
|
|
130
|
+
|
|
131
|
+
### Pipeline Architecture
|
|
132
|
+
|
|
133
|
+
ParaDigMa can best be understood by categorizing the sequential processes:
|
|
134
|
+
| Process | Description |
|
|
135
|
+
| ---- | ---- |
|
|
136
|
+
| **Preprocessing** | Preparing raw sensor signals for further processing |
|
|
137
|
+
| **Feature extraction** | Extracting features based on windowed sensor signals |
|
|
138
|
+
| **Classification** | Detecting segments of interest using validated classifiers (e.g., gait segments) |
|
|
139
|
+
| **Quantification** | Extracting specific measures from the detected segments (e.g., arm swing measures) |
|
|
140
|
+
| **Aggregation** | Aggregating the measures over a specific time period (e.g., week-level aggregates)
|
|
141
|
+
|
|
142
|
+
## Usage
|
|
143
|
+
### Documentation
|
|
144
|
+
|
|
145
|
+
- **[Tutorials](https://biomarkersparkinson.github.io/paradigma/tutorials/index.html)** - Step-by-step usage examples
|
|
146
|
+
- **[Installation Guide](https://biomarkersparkinson.github.io/paradigma/guides/installation.html)** - Setup and troubleshooting
|
|
147
|
+
- **[Sensor Requirements](https://biomarkersparkinson.github.io/paradigma/guides/sensor_requirements.html)** - Data specifications and compliance
|
|
148
|
+
- **[Supported Devices](https://biomarkersparkinson.github.io/paradigma/guides/supported_devices.html)** - Validated hardware
|
|
149
|
+
- **[Input Formats Guide](https://biomarkersparkinson.github.io/paradigma/guides/input_formats.html)** - Input format options and data loading
|
|
150
|
+
- **[Configuration Guide](https://biomarkersparkinson.github.io/paradigma/guides/config.html)** - Pipeline configuration
|
|
151
|
+
- **[Scientific Validation](https://biomarkersparkinson.github.io/paradigma/guides/validation.html)** - Validation studies and publications
|
|
152
|
+
- **[API Reference](https://biomarkersparkinson.github.io/paradigma/autoapi/paradigma/index.html)** - Complete API documentation
|
|
153
|
+
|
|
154
|
+
### Sensor Requirements & Supported Devices
|
|
155
|
+
|
|
156
|
+
ParaDigMa is designed for wrist sensor data collected during passive monitoring in persons with Parkinson's disease. While designed to work with any compliant device, it has been empirically validated on:
|
|
157
|
+
|
|
158
|
+
- **Verily Study Watch** (gait, tremor, pulse rate)
|
|
159
|
+
- **Axivity AX6** (gait, tremor)
|
|
160
|
+
- **Gait-up Physilog 4** (gait, tremor)
|
|
161
|
+
- **Empatica EmbracePlus** (data loading)
|
|
162
|
+
|
|
163
|
+
Please check before running the pipelines whether your sensor data complies with the requirements for the sensor configuration and context of use. See the [sensor requirements guide](https://biomarkersparkinson.github.io/paradigma/guides/sensor_requirements.html) for data specifications and the [supported devices guide](https://biomarkersparkinson.github.io/paradigma/guides/supported_devices.html) for device-specific setup instructions.
|
|
164
|
+
|
|
165
|
+
### Data Formats
|
|
166
|
+
|
|
167
|
+
ParaDigMa supports the following data formats:
|
|
168
|
+
|
|
169
|
+
- In-memory (recommended): **Pandas DataFrames** (see examples above)
|
|
170
|
+
- Data loading file extensions: **TSDF, Parquet, CSV, Pickle** and **several device-specific formats** (AVRO (Empatica), CWA (Axivity))
|
|
171
|
+
|
|
172
|
+
### Troubleshooting
|
|
173
|
+
|
|
174
|
+
For installation issues, see the [installation guide troubleshooting section](https://biomarkersparkinson.github.io/paradigma/guides/installation.html#troubleshooting).
|
|
175
|
+
|
|
176
|
+
For other issues, check our [issue tracker](https://github.com/biomarkersParkinson/paradigma/issues) or contact paradigma@radboudumc.nl.
|
|
177
|
+
|
|
178
|
+
## Scientific Validation
|
|
179
|
+
|
|
180
|
+
ParaDigMa pipelines are validated in peer-reviewed publications:
|
|
181
|
+
|
|
182
|
+
| Pipeline | Publication |
|
|
183
|
+
|----------|-------------|
|
|
184
|
+
| **Arm swing during gait** | Post et al. (2025, 2026) |
|
|
185
|
+
| **Tremor** | Timmermans et al. (2025a, 2025b) |
|
|
186
|
+
| **Pulse rate** | Veldkamp et al. (2025) |
|
|
187
|
+
|
|
188
|
+
See the [validation guide](https://biomarkersparkinson.github.io/paradigma/guides/validation.html) for full publication details.
|
|
189
|
+
|
|
190
|
+
## Contributing
|
|
191
|
+
|
|
192
|
+
We welcome contributions! Please see:
|
|
193
|
+
|
|
194
|
+
- [Contributing Guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html)
|
|
195
|
+
- [Code of Conduct](https://biomarkersparkinson.github.io/paradigma/conduct.html)
|
|
196
|
+
|
|
197
|
+
## Citation
|
|
198
|
+
|
|
199
|
+
If you use ParaDigMa in your research, please cite:
|
|
200
|
+
|
|
201
|
+
```bibtex
|
|
202
|
+
@software{paradigma2024,
|
|
203
|
+
author = {Post, Erik and Veldkamp, Kars and Timmermans, Nienke and
|
|
204
|
+
Soriano, Diogo Coutinho and Kasalica, Vedran and
|
|
205
|
+
Kok, Peter and Evers, Luc},
|
|
206
|
+
title = {ParaDigMa: Parkinson's disease Digital Markers},
|
|
207
|
+
year = {2024},
|
|
208
|
+
doi = {10.5281/zenodo.13838392},
|
|
209
|
+
url = {https://github.com/biomarkersParkinson/paradigma}
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## License
|
|
214
|
+
|
|
215
|
+
Licensed under the Apache License 2.0. See [LICENSE](LICENSE) for details.
|
|
216
|
+
|
|
217
|
+
## Acknowledgements
|
|
218
|
+
|
|
219
|
+
**Core Team**: Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Vedran Kasalica, Peter Kok, Twan van Laarhoven, Luc Evers
|
|
220
|
+
|
|
221
|
+
**Advisors**: Max Little, Jordan Raykov, Hayriye Cagnan, Bas Bloem
|
|
222
|
+
|
|
223
|
+
**Funding**: the initial release was funded by the Michael J Fox Foundation (grant #020425) and the Dutch Research Council (grants #ASDI.2020.060, #2023.010)
|
|
224
|
+
|
|
225
|
+
## Contact
|
|
226
|
+
|
|
227
|
+
- Email: paradigma@radboudumc.nl
|
|
228
|
+
- [Issue Tracker](https://github.com/biomarkersParkinson/paradigma/issues)
|
|
229
|
+
|