paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
paradigma/segmenting.py CHANGED
@@ -1,5 +1,3 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
  import pandas as pd
5
3
 
@@ -9,19 +7,21 @@ from paradigma.util import deprecated
9
7
 
10
8
  def tabulate_windows(
11
9
  df: pd.DataFrame,
12
- columns: List[str],
10
+ columns: list[str],
13
11
  window_length_s: float,
14
12
  window_step_length_s: float,
15
13
  fs: int,
16
14
  ) -> np.ndarray:
17
15
  """
18
- Split the given DataFrame into overlapping windows of specified length and step size.
16
+ Split the given DataFrame into overlapping windows of specified length
17
+ and step size.
19
18
 
20
- This function extracts windows of data from the specified columns of the DataFrame, based on
21
- the window length and step size provided in the configuration. The windows are returned in
22
- a 3D NumPy array, where the first dimension represents the window index, the second dimension
23
- represents the time steps within the window, and the third dimension represents the columns
24
- of the data.
19
+ This function extracts windows of data from the specified columns of the
20
+ DataFrame, based on the window length and step size provided in the
21
+ configuration. The windows are returned in a 3D NumPy array, where the
22
+ first dimension represents the window index, the second dimension
23
+ represents the time steps within the window, and the third dimension
24
+ represents the columns of the data.
25
25
 
26
26
  Parameters
27
27
  ----------
@@ -40,17 +40,22 @@ def tabulate_windows(
40
40
  -------
41
41
  np.ndarray
42
42
  A 3D NumPy array of shape (n_windows, window_size, n_columns), where:
43
- - `n_windows` is the number of windows that can be formed from the data.
44
- - `window_size` is the length of each window in terms of the number of time steps.
45
- - `n_columns` is the number of columns in the input DataFrame specified by `columns`.
43
+ - `n_windows` is the number of windows that can be formed from the
44
+ data.
45
+ - `window_size` is the length of each window in terms of the number
46
+ of time steps.
47
+ - `n_columns` is the number of columns in the input DataFrame
48
+ specified by `columns`.
46
49
 
47
- If the length of the data is shorter than the specified window size, an empty array is returned.
50
+ If the length of the data is shorter than the specified window size,
51
+ an empty array is returned.
48
52
 
49
53
  Notes
50
54
  -----
51
- This function uses `np.lib.stride_tricks.sliding_window_view` to generate sliding windows of data.
52
- The step size is applied to extract windows at intervals.
53
- If the data is insufficient for at least one window, an empty array will be returned.
55
+ This function uses `np.lib.stride_tricks.sliding_window_view` to
56
+ generate sliding windows of data. The step size is applied to extract
57
+ windows at intervals. If the data is insufficient for at least one
58
+ window, an empty array will be returned.
54
59
 
55
60
  Example
56
61
  -------
@@ -84,7 +89,8 @@ def tabulate_windows(
84
89
 
85
90
  def tabulate_windows_legacy(config, df, agg_func="first"):
86
91
  """
87
- Efficiently creates a windowed dataframe from the input dataframe using vectorized operations.
92
+ Efficiently creates a windowed dataframe from the input dataframe using
93
+ vectorized operations.
88
94
 
89
95
  Parameters
90
96
  ----------
@@ -93,11 +99,13 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
93
99
  - `window_length_s`: The number of seconds per window.
94
100
  - `window_step_length_s`: The number of seconds to shift between windows.
95
101
  - `sampling_frequency`: The sampling frequency in Hz.
96
- - `single_value_colnames`: List of column names where a single value (e.g., mean) is needed.
97
- - `list_value_colnames`: List of column names where all 600 values should be stored in a list.
102
+ - `single_value_colnames`: List of column names where a single value
103
+ (e.g., mean) is needed.
104
+ - `list_value_colnames`: List of column names where all 600 values
105
+ should be stored in a list.
98
106
  agg_func : str or callable, optional
99
- Aggregation function for single-value columns. Can be 'mean', 'first', or a custom callable.
100
- Default is 'first'.
107
+ Aggregation function for single-value columns. Can be 'mean',
108
+ 'first', or a custom callable. Default is 'first'.
101
109
 
102
110
  Returns
103
111
  -------
@@ -122,7 +130,8 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
122
130
  n_rows = len(df)
123
131
  if window_length > n_rows:
124
132
  raise ValueError(
125
- f"Window size ({window_length}) cannot be greater than the number of rows ({n_rows}) in the dataframe."
133
+ f"Window size ({window_length}) cannot be greater than the "
134
+ f"number of rows ({n_rows}) in the dataframe."
126
135
  )
127
136
 
128
137
  # Create indices for window start positions
@@ -170,7 +179,8 @@ def tabulate_windows_legacy(config, df, agg_func="first"):
170
179
  # Convert result list into a DataFrame
171
180
  windowed_df = pd.DataFrame(result)
172
181
 
173
- # Ensure the column order is as desired: window_nr, window_start, window_end, pre_or_post, and then the rest
182
+ # Ensure the column order is as desired: window_nr, window_start,
183
+ # window_end, pre_or_post, and then the rest
174
184
  desired_order = (
175
185
  ["window_nr", "window_start", "window_end"]
176
186
  + config.single_value_colnames
@@ -191,7 +201,7 @@ def create_segments(
191
201
  gap_exceeds = time_diff > max_segment_gap_s
192
202
 
193
203
  # Create the segment number based on the cumulative sum of the gap_exceeds mask
194
- segments = gap_exceeds.cumsum()
204
+ segments = gap_exceeds.cumsum() + 1
195
205
 
196
206
  return segments
197
207
 
@@ -229,7 +239,8 @@ def discard_segments(
229
239
 
230
240
  Example
231
241
  -------
232
- config = Config(min_segment_length_s=2, sampling_frequency=100, segment_nr_colname='segment')
242
+ config = Config(min_segment_length_s=2, sampling_frequency=100,
243
+ segment_nr_colname='segment')
233
244
  df = pd.DataFrame({
234
245
  'segment': [1, 1, 2, 2, 2],
235
246
  'time': [0, 1, 2, 3, 4]
@@ -245,26 +256,26 @@ def discard_segments(
245
256
  """
246
257
  # Minimum segment size in number of samples
247
258
  if format == "timestamps":
248
- min_samples = min_segment_length_s * fs
259
+ min_samples = int(min_segment_length_s * fs)
249
260
  elif format == "windows":
250
- min_samples = min_segment_length_s
261
+ min_samples = int(min_segment_length_s)
251
262
  else:
252
263
  raise ValueError("Invalid format. Must be 'timestamps' or 'windows'.")
253
264
 
254
- # Group by segment and filter out small segments in one step
255
- valid_segment_mask = (
256
- df.groupby(segment_nr_colname)[segment_nr_colname].transform("size")
257
- >= min_samples
258
- )
265
+ # Count samples per segment
266
+ segment_counts = df.groupby(segment_nr_colname).size()
259
267
 
260
- df = df[valid_segment_mask].copy()
268
+ # Filter rows for valid segments (>= min samples)
269
+ counts_map = segment_counts.to_dict()
270
+ df = df[df[segment_nr_colname].map(counts_map) >= min_samples].copy()
261
271
 
262
272
  if df.empty:
263
- raise ValueError("All segments were removed.")
273
+ raise ValueError(
274
+ f"All segments were removed: no segment ≥ {min_samples} samples."
275
+ )
264
276
 
265
- # Reset segment numbers in a single step
266
- unique_segments = pd.factorize(df[segment_nr_colname])[0] + 1
267
- df[segment_nr_colname] = unique_segments
277
+ # Reset segment numbers
278
+ df[segment_nr_colname] = pd.factorize(df[segment_nr_colname])[0] + 1
268
279
 
269
280
  return df
270
281
 
@@ -313,7 +324,7 @@ def categorize_segments(df, fs, format="timestamps", window_step_length_s=None):
313
324
  d_max_duration = {k: v * fs for k, v in d_max_duration.items()}
314
325
 
315
326
  # Count rows per segment
316
- segment_sizes = df[DataColumns.SEGMENT_NR].value_counts()
327
+ segment_sizes = df[DataColumns.GAIT_SEGMENT_NR].value_counts()
317
328
 
318
329
  # Convert segment sizes to duration in seconds
319
330
  if format == "windows":
@@ -332,7 +343,10 @@ def categorize_segments(df, fs, format="timestamps", window_step_length_s=None):
332
343
 
333
344
  # Apply categorization to the DataFrame
334
345
  return (
335
- df[DataColumns.SEGMENT_NR].map(segment_sizes).map(categorize).astype("category")
346
+ df[DataColumns.GAIT_SEGMENT_NR]
347
+ .map(segment_sizes)
348
+ .map(categorize)
349
+ .astype("category")
336
350
  )
337
351
 
338
352
 
@@ -354,7 +368,7 @@ class WindowedDataExtractor:
354
368
  Returns a slice object for a range of consecutive column names.
355
369
  """
356
370
 
357
- def __init__(self, windowed_colnames: List[str]):
371
+ def __init__(self, windowed_colnames: list[str]):
358
372
  """
359
373
  Initialize the WindowedDataExtractor.
360
374
 
@@ -395,7 +409,7 @@ class WindowedDataExtractor:
395
409
  raise ValueError(f"Column name '{colname}' not found in windowed_colnames.")
396
410
  return self.column_indices[colname]
397
411
 
398
- def get_slice(self, colnames: List[str]) -> slice:
412
+ def get_slice(self, colnames: list[str]) -> slice:
399
413
  """
400
414
  Get a slice object for a range of consecutive columns.
401
415
 
@@ -412,7 +426,8 @@ class WindowedDataExtractor:
412
426
  Raises
413
427
  ------
414
428
  ValueError
415
- If one or more columns in `colnames` are not found in the `windowed_colnames` list.
429
+ If one or more columns in `colnames` are not found in the
430
+ `windowed_colnames` list.
416
431
  """
417
432
  if not all(col in self.column_indices for col in colnames):
418
433
  missing = [col for col in colnames if col not in self.column_indices]
paradigma/testing.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import List
5
4
 
6
5
  import numpy as np
7
6
  import pandas as pd
@@ -95,7 +94,8 @@ def preprocess_ppg_data_io(
95
94
  imu_config: IMUConfig,
96
95
  ) -> None:
97
96
  """
98
- Preprocess PPG and IMU data by resampling, filtering, and aligning the data segments.
97
+ Preprocess PPG and IMU data by resampling, filtering, and aligning the
98
+ data segments.
99
99
 
100
100
  Parameters
101
101
  ----------
@@ -520,13 +520,15 @@ def extract_signal_quality_features_io(
520
520
  Parameters
521
521
  ----------
522
522
  input_path : str | Path
523
- The path to the directory containing the preprocessed PPG and accelerometer data.
523
+ The path to the directory containing the preprocessed PPG and
524
+ accelerometer data.
524
525
  output_path : str | Path
525
526
  The path to the directory where the extracted features will be saved.
526
527
  ppg_config: PulseRateConfig
527
528
  The configuration for the signal quality feature extraction of the ppg signal.
528
529
  acc_config: PulseRateConfig
529
- The configuration for the signal quality feature extraction of the accelerometer signal.
530
+ The configuration for the signal quality feature extraction of the
531
+ accelerometer signal.
530
532
 
531
533
  Returns
532
534
  -------
@@ -589,23 +591,26 @@ def extract_signal_quality_features_io(
589
591
  def aggregate_pulse_rate_io(
590
592
  full_path_to_input: str | Path,
591
593
  full_path_to_output: str | Path,
592
- aggregates: List[str] = ["mode", "99p"],
594
+ aggregates: list[str] = ["mode", "99p"],
593
595
  ) -> None:
594
596
  """
595
- Extract pulse rate from the PPG signal and save the aggregated pulse rate estimates to a file.
597
+ Extract pulse rate from the PPG signal and save the aggregated pulse rate
598
+ estimates to a file.
596
599
 
597
600
  Parameters
598
601
  ----------
599
602
  input_path : str | Path
600
603
  The path to the directory containing the pulse rate estimates.
601
604
  output_path : str | Path
602
- The path to the directory where the aggregated pulse rate estimates will be saved.
605
+ The path to the directory where the aggregated pulse rate estimates
606
+ will be saved.
603
607
  aggregates : List[str]
604
- The list of aggregation methods to be used for the pulse rate estimates. The default is ['mode', '99p'].
608
+ The list of aggregation methods to be used for the pulse rate
609
+ estimates. The default is ['mode', '99p'].
605
610
  """
606
611
 
607
612
  # Load the pulse rate estimates
608
- with open(full_path_to_input, "r") as f:
613
+ with open(full_path_to_input) as f:
609
614
  df_pr = json.load(f)
610
615
 
611
616
  # Aggregate the pulse rate estimates
paradigma/util.py CHANGED
@@ -2,7 +2,6 @@ import functools
2
2
  import os
3
3
  import warnings
4
4
  from datetime import datetime, timedelta
5
- from typing import List, Optional, Tuple
6
5
 
7
6
  import numpy as np
8
7
  import pandas as pd
@@ -16,12 +15,14 @@ from paradigma.constants import DataColumns, TimeUnit
16
15
 
17
16
  def deprecated(reason: str = ""):
18
17
  """
19
- Decorator to mark functions as deprecated. It will show a warning when the function is used.
18
+ Decorator to mark functions as deprecated. It will show a warning when the
19
+ function is used.
20
20
 
21
21
  Parameters
22
22
  ----------
23
23
  reason : str, optional
24
- Additional message to explain why it is deprecated and what to use instead.
24
+ Additional message to explain why it is deprecated and what to use
25
+ instead.
25
26
  """
26
27
 
27
28
  def decorator(func):
@@ -155,7 +156,7 @@ def write_df_data(
155
156
 
156
157
  def read_metadata(
157
158
  input_path: str, meta_filename: str, time_filename: str, values_filename: str
158
- ) -> Tuple[TSDFMetadata, TSDFMetadata]:
159
+ ) -> tuple[TSDFMetadata, TSDFMetadata]:
159
160
  metadata_dict = tsdf.load_metadata_from_path(
160
161
  os.path.join(input_path, meta_filename)
161
162
  )
@@ -186,8 +187,8 @@ def load_tsdf_dataframe(
186
187
 
187
188
 
188
189
  def load_metadata_list(
189
- dir_path: str, meta_filename: str, filenames: List[str]
190
- ) -> List[TSDFMetadata]:
190
+ dir_path: str, meta_filename: str, filenames: list[str]
191
+ ) -> list[TSDFMetadata]:
191
192
  """
192
193
  Load the metadata objects from a metadata file according to the specified binaries.
193
194
 
@@ -216,7 +217,8 @@ def transform_time_array(
216
217
  start_time: float = 0.0,
217
218
  ) -> np.ndarray:
218
219
  """
219
- Transforms the time array to relative time (when defined in delta time) and scales the values.
220
+ Transforms the time array to relative time (when defined in delta time)
221
+ and scales the values.
220
222
 
221
223
  Parameters
222
224
  ----------
@@ -225,7 +227,8 @@ def transform_time_array(
225
227
  input_unit_type : str
226
228
  The time unit type of the input time array.
227
229
  output_unit_type : str
228
- The time unit type of the output time array. ParaDigMa expects `TimeUnit.RELATIVE_S`.
230
+ The time unit type of the output time array. ParaDigMa expects
231
+ `TimeUnit.RELATIVE_S`.
229
232
  start_time : float, optional
230
233
  The start time of the time array in UNIX seconds (default is 0.0)
231
234
 
@@ -236,9 +239,13 @@ def transform_time_array(
236
239
 
237
240
  Notes
238
241
  -----
239
- - The function handles different time units (`TimeUnit.RELATIVE_MS`, `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`, `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
240
- - The transformation allows for scaling of the time array, converting between time unit types (e.g., relative, absolute, or difference).
241
- - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the relative time starting from the provided or default start time.
242
+ - The function handles different time units (`TimeUnit.RELATIVE_MS`,
243
+ `TimeUnit.RELATIVE_S`, `TimeUnit.ABSOLUTE_MS`, `TimeUnit.ABSOLUTE_S`,
244
+ `TimeUnit.DIFFERENCE_MS`, `TimeUnit.DIFFERENCE_S`).
245
+ - The transformation allows for scaling of the time array, converting
246
+ between time unit types (e.g., relative, absolute, or difference).
247
+ - When converting to `TimeUnit.RELATIVE_MS`, the function calculates the
248
+ relative time starting from the provided or default start time.
242
249
  """
243
250
  input_units = input_unit_type.split("_")[-1].lower()
244
251
  output_units = output_unit_type.split("_")[-1].lower()
@@ -259,7 +266,8 @@ def transform_time_array(
259
266
  input_unit_type == TimeUnit.DIFFERENCE_MS
260
267
  or input_unit_type == TimeUnit.DIFFERENCE_S
261
268
  ):
262
- # Convert a series of differences into cumulative sum to reconstruct original time series.
269
+ # Convert a series of differences into cumulative sum to
270
+ # reconstruct original time series.
263
271
  time_array = np.cumsum(np.double(time_array))
264
272
  elif (
265
273
  input_unit_type == TimeUnit.ABSOLUTE_MS
@@ -271,7 +279,8 @@ def transform_time_array(
271
279
  # Convert absolute time stamps into a time series relative to start_time.
272
280
  time_array = time_array - start_time
273
281
 
274
- # Transform the time array from `TimeUnit.RELATIVE_MS` to the specified time unit type
282
+ # Transform the time array from `TimeUnit.RELATIVE_MS` to the
283
+ # specified time unit type
275
284
  if (
276
285
  output_unit_type == TimeUnit.ABSOLUTE_MS
277
286
  or output_unit_type == TimeUnit.ABSOLUTE_S
@@ -282,7 +291,8 @@ def transform_time_array(
282
291
  output_unit_type == TimeUnit.DIFFERENCE_MS
283
292
  or output_unit_type == TimeUnit.DIFFERENCE_S
284
293
  ):
285
- # Creates a new array starting with 0, followed by the differences between consecutive elements.
294
+ # Creates a new array starting with 0, followed by the
295
+ # differences between consecutive elements.
286
296
  time_array = np.diff(np.insert(time_array, 0, start_time))
287
297
  elif (
288
298
  output_unit_type == TimeUnit.RELATIVE_MS
@@ -383,7 +393,7 @@ def invert_watch_side(df: pd.DataFrame, side: str, sensor="both") -> np.ndarray:
383
393
  def aggregate_parameter(
384
394
  parameter: np.ndarray,
385
395
  aggregate: str,
386
- evaluation_points: Optional[np.ndarray] = None,
396
+ evaluation_points: np.ndarray | None = None,
387
397
  ) -> np.ndarray | int:
388
398
  """
389
399
  Aggregate a parameter based on the specified method.
@@ -398,7 +408,8 @@ def aggregate_parameter(
398
408
 
399
409
  evaluation_points : np.ndarray, optional
400
410
  Should be specified if the mode is derived for a continuous parameter.
401
- Defines the evaluation points for the kernel density estimation function, from which the maximum is derived as the mode.
411
+ Defines the evaluation points for the kernel density estimation
412
+ function, from which the maximum is derived as the mode.
402
413
 
403
414
  Returns
404
415
  -------
@@ -445,8 +456,9 @@ def merge_predictions_with_timestamps(
445
456
  fs: int,
446
457
  ) -> pd.DataFrame:
447
458
  """
448
- Merges prediction probabilities with timestamps by expanding overlapping windows
449
- into individual timestamps and averaging probabilities per unique timestamp.
459
+ Merges prediction probabilities with timestamps by expanding overlapping
460
+ windows into individual timestamps and averaging probabilities per unique
461
+ timestamp.
450
462
 
451
463
  Parameters:
452
464
  ----------
@@ -455,10 +467,11 @@ def merge_predictions_with_timestamps(
455
467
  Must include the timestamp column specified in `DataColumns.TIME`.
456
468
 
457
469
  df_predictions : pd.DataFrame
458
- DataFrame containing prediction windows with start times and probabilities.
459
- Must include:
470
+ DataFrame containing prediction windows with start times and
471
+ probabilities. Must include:
460
472
  - A column for window start times (defined by `DataColumns.TIME`).
461
- - A column for prediction probabilities (defined by `DataColumns.PRED_GAIT_PROBA`).
473
+ - A column for prediction probabilities (defined by
474
+ `DataColumns.PRED_GAIT_PROBA`).
462
475
 
463
476
  pred_proba_colname : str
464
477
  The column name for the prediction probabilities in `df_predictions`.
@@ -559,7 +572,8 @@ def select_days(df: pd.DataFrame, min_hours_per_day: int) -> pd.DataFrame:
559
572
  Input data with column 'time_dt' in which the date is stored.
560
573
 
561
574
  min_hours_per_day: int
562
- The minimum number of hours per day required for including the day in the aggregation step.
575
+ The minimum number of hours per day required for including the day
576
+ in the aggregation step.
563
577
 
564
578
 
565
579
  Returns
@@ -0,0 +1,229 @@
1
+ Metadata-Version: 2.4
2
+ Name: paradigma
3
+ Version: 1.1.0
4
+ Summary: ParaDigMa - A toolbox for deriving Parkinson's disease Digital Markers from real-life wrist sensor data
5
+ License: Apache-2.0
6
+ License-File: LICENSE
7
+ Author: Erik Post
8
+ Author-email: erik.post@radboudumc.nl
9
+ Requires-Python: >=3.11,<4.0
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: avro (>=1.12.1,<2.0.0)
17
+ Requires-Dist: nbconvert (>=7.16.6,<8.0.0)
18
+ Requires-Dist: pandas (>=2.1.4,<3.0.0)
19
+ Requires-Dist: pyarrow (>=22.0.0,<23.0.0)
20
+ Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
21
+ Requires-Dist: scikit-learn (>=1.3.2,<1.6.1)
22
+ Requires-Dist: tsdf (>=0.6.1,<0.7.0)
23
+ Description-Content-Type: text/markdown
24
+
25
+ <p align="center">
26
+ <img src="https://raw.githubusercontent.com/biomarkersParkinson/paradigma/main/docs/source/_static/img/paradigma-logo-banner.png" alt="ParaDigMa logo"/>
27
+ </p>
28
+
29
+ | Badges | |
30
+ |:----:|----|
31
+ | **Packages and Releases** | [![Latest release](https://img.shields.io/github/release/biomarkersparkinson/paradigma.svg)](https://github.com/biomarkersparkinson/paradigma/releases/latest) [![PyPI](https://img.shields.io/pypi/v/paradigma.svg)](https://pypi.python.org/pypi/paradigma/) [![Static Badge](https://img.shields.io/badge/RSD-paradigma-lib)](https://research-software-directory.org/software/paradigma) |
32
+ | **DOI** | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13838392.svg)](https://doi.org/10.5281/zenodo.13838392) |
33
+ | **Build Status** | [![](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) [![Build and test](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/build-and-test.yml) [![pages-build-deployment](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/biomarkersParkinson/paradigma/actions/workflows/pages/pages-build-deployment) |
34
+ | **License** | [![GitHub license](https://img.shields.io/github/license/biomarkersParkinson/paradigma)](https://github.com/biomarkersparkinson/paradigma/blob/main/LICENSE) |
35
+
36
+ ## Overview
37
+
38
+ ParaDigMa (Parkinson's disease Digital Markers) is a Python toolbox for extracting validated digital biomarkers from wrist sensor data in Parkinson's disease. It processes accelerometer, gyroscope, and PPG signals collected during passive monitoring in daily life.
39
+
40
+ **Key Features:**
41
+ - Arm swing during gait analysis
42
+ - Tremor analysis
43
+ - Pulse rate analysis
44
+ - Scientifically validated in peer-reviewed publications
45
+ - Modular, extensible architecture for custom analyses
46
+
47
+ ## Quick Start
48
+
49
+ ### Installation
50
+
51
+ **For regular use:**
52
+
53
+ ```bash
54
+ pip install paradigma
55
+ ```
56
+
57
+ Requires Python 3.11+.
58
+
59
+ **For development or running tutorials:**
60
+
61
+ Example data requires git-lfs. See the [installation guide](https://biomarkersparkinson.github.io/paradigma/guides/installation.html) for setup instructions.
62
+
63
+ ### Basic Usage
64
+
65
+ ```python
66
+ from paradigma.orchestrator import run_paradigma
67
+
68
+ # Example 1: Single DataFrame with default output directory
69
+ results = run_paradigma(
70
+ dfs=df,
71
+ pipelines=['gait', 'tremor'],
72
+ watch_side='left', # Required for gait pipeline
73
+ save_intermediate=['quantification', 'aggregation'] # Saves to ./output by default
74
+ )
75
+
76
+ # Example 2: Multiple DataFrames as list (assigned to 'df_1', 'df_2', etc.)
77
+ results = run_paradigma(
78
+ dfs=[df1, df2, df3],
79
+ pipelines=['gait', 'tremor'],
80
+ output_dir="./results", # Custom output directory
81
+ watch_side='left',
82
+ save_intermediate=['quantification', 'aggregation']
83
+ )
84
+
85
+ # Example 3: Dictionary of DataFrames (custom segment/file names)
86
+ results = run_paradigma(
87
+ dfs={'morning_session': df1, 'evening_session': df2},
88
+ pipelines=['gait', 'tremor'],
89
+ watch_side='right',
90
+ save_intermediate=[] # No files saved - results only in memory
91
+ )
92
+
93
+ # Example 4: Load from data directory
94
+ results = run_paradigma(
95
+ data_path='./my_data',
96
+ pipelines=['gait', 'tremor'],
97
+ watch_side='left',
98
+ file_pattern='*.parquet',
99
+ save_intermediate=['quantification', 'aggregation']
100
+ )
101
+
102
+ # Access results (nested by pipeline)
103
+ gait_measures = results['quantifications']['gait']
104
+ tremor_measures = results['quantifications']['tremor']
105
+ gait_aggregates = results['aggregations']['gait']
106
+ tremor_aggregates = results['aggregations']['tremor']
107
+
108
+ # Check for errors
109
+ if results['errors']:
110
+ print(f"Warning: {len(results['errors'])} error(s) occurred")
111
+ ```
112
+
113
+ **See our [tutorials](https://biomarkersparkinson.github.io/paradigma/tutorials/index.html) for complete examples.**
114
+
115
+ ## Pipelines
116
+
117
+ <p align="center">
118
+ <img src="https://raw.githubusercontent.com/biomarkersParkinson/paradigma/main/docs/source/_static/img/pipeline-architecture.png" alt="Pipeline architeecture"/>
119
+ </p>
120
+
121
+ ### Validated Processing Pipelines
122
+
123
+ | Pipeline | Input sensors | Output week-level aggregation | Publications | Tutorial |
124
+ | ---- | ---- | ------- | ---- | ---- |
125
+ | **Arm swing during gait** | Accelerometer + Gyroscope | Typical, maximum & variability of arm swing range of motion | [Post 2025](https://doi.org/10.1186/s12984-025-01578-z), [Post 2026*](https://doi.org/10.64898/2026.01.06.26343500) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/gait_analysis) |
126
+ | **Tremor** | Gyroscope | % tremor time, typical & maximum tremor power | [Timmermans 2025a](https://doi.org/10.1038/s41531-025-01056-2), [Timmermans 2025b*](https://www.medrxiv.org/content/10.64898/2025.12.23.25342892v1) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/tremor_analysis) |
127
+ | **Pulse rate** | PPG (+ Accelerometer) | Resting & maximum pulse rate | [Veldkamp 2025*](https://doi.org/10.1101/2025.08.15.25333751) | [Guide](https://biomarkersparkinson.github.io/paradigma/tutorials/pulse_rate_analysis) |
128
+
129
+ *\* Indicates pre-print*
130
+
131
+ ### Pipeline Architecture
132
+
133
+ ParaDigMa can best be understood by categorizing the sequential processes:
134
+ | Process | Description |
135
+ | ---- | ---- |
136
+ | **Preprocessing** | Preparing raw sensor signals for further processing |
137
+ | **Feature extraction** | Extracting features based on windowed sensor signals |
138
+ | **Classification** | Detecting segments of interest using validated classifiers (e.g., gait segments) |
139
+ | **Quantification** | Extracting specific measures from the detected segments (e.g., arm swing measures) |
140
+ | **Aggregation** | Aggregating the measures over a specific time period (e.g., week-level aggregates)
141
+
142
+ ## Usage
143
+ ### Documentation
144
+
145
+ - **[Tutorials](https://biomarkersparkinson.github.io/paradigma/tutorials/index.html)** - Step-by-step usage examples
146
+ - **[Installation Guide](https://biomarkersparkinson.github.io/paradigma/guides/installation.html)** - Setup and troubleshooting
147
+ - **[Sensor Requirements](https://biomarkersparkinson.github.io/paradigma/guides/sensor_requirements.html)** - Data specifications and compliance
148
+ - **[Supported Devices](https://biomarkersparkinson.github.io/paradigma/guides/supported_devices.html)** - Validated hardware
149
+ - **[Input Formats Guide](https://biomarkersparkinson.github.io/paradigma/guides/input_formats.html)** - Input format options and data loading
150
+ - **[Configuration Guide](https://biomarkersparkinson.github.io/paradigma/guides/config.html)** - Pipeline configuration
151
+ - **[Scientific Validation](https://biomarkersparkinson.github.io/paradigma/guides/validation.html)** - Validation studies and publications
152
+ - **[API Reference](https://biomarkersparkinson.github.io/paradigma/autoapi/paradigma/index.html)** - Complete API documentation
153
+
154
+ ### Sensor Requirements & Supported Devices
155
+
156
+ ParaDigMa is designed for wrist sensor data collected during passive monitoring in persons with Parkinson's disease. While designed to work with any compliant device, it has been empirically validated on:
157
+
158
+ - **Verily Study Watch** (gait, tremor, pulse rate)
159
+ - **Axivity AX6** (gait, tremor)
160
+ - **Gait-up Physilog 4** (gait, tremor)
161
+ - **Empatica EmbracePlus** (data loading)
162
+
163
+ Please check before running the pipelines whether your sensor data complies with the requirements for the sensor configuration and context of use. See the [sensor requirements guide](https://biomarkersparkinson.github.io/paradigma/guides/sensor_requirements.html) for data specifications and the [supported devices guide](https://biomarkersparkinson.github.io/paradigma/guides/supported_devices.html) for device-specific setup instructions.
164
+
165
+ ### Data Formats
166
+
167
+ ParaDigMa supports the following data formats:
168
+
169
+ - In-memory (recommended): **Pandas DataFrames** (see examples above)
170
+ - Data loading file extensions: **TSDF, Parquet, CSV, Pickle** and **several device-specific formats** (AVRO (Empatica), CWA (Axivity))
171
+
172
+ ### Troubleshooting
173
+
174
+ For installation issues, see the [installation guide troubleshooting section](https://biomarkersparkinson.github.io/paradigma/guides/installation.html#troubleshooting).
175
+
176
+ For other issues, check our [issue tracker](https://github.com/biomarkersParkinson/paradigma/issues) or contact paradigma@radboudumc.nl.
177
+
178
+ ## Scientific Validation
179
+
180
+ ParaDigMa pipelines are validated in peer-reviewed publications:
181
+
182
+ | Pipeline | Publication |
183
+ |----------|-------------|
184
+ | **Arm swing during gait** | Post et al. (2025, 2026) |
185
+ | **Tremor** | Timmermans et al. (2025a, 2025b) |
186
+ | **Pulse rate** | Veldkamp et al. (2025) |
187
+
188
+ See the [validation guide](https://biomarkersparkinson.github.io/paradigma/guides/validation.html) for full publication details.
189
+
190
+ ## Contributing
191
+
192
+ We welcome contributions! Please see:
193
+
194
+ - [Contributing Guidelines](https://biomarkersparkinson.github.io/paradigma/contributing.html)
195
+ - [Code of Conduct](https://biomarkersparkinson.github.io/paradigma/conduct.html)
196
+
197
+ ## Citation
198
+
199
+ If you use ParaDigMa in your research, please cite:
200
+
201
+ ```bibtex
202
+ @software{paradigma2024,
203
+ author = {Post, Erik and Veldkamp, Kars and Timmermans, Nienke and
204
+ Soriano, Diogo Coutinho and Kasalica, Vedran and
205
+ Kok, Peter and Evers, Luc},
206
+ title = {ParaDigMa: Parkinson's disease Digital Markers},
207
+ year = {2024},
208
+ doi = {10.5281/zenodo.13838392},
209
+ url = {https://github.com/biomarkersParkinson/paradigma}
210
+ }
211
+ ```
212
+
213
+ ## License
214
+
215
+ Licensed under the Apache License 2.0. See [LICENSE](LICENSE) for details.
216
+
217
+ ## Acknowledgements
218
+
219
+ **Core Team**: Erik Post, Kars Veldkamp, Nienke Timmermans, Diogo Coutinho Soriano, Vedran Kasalica, Peter Kok, Twan van Laarhoven, Luc Evers
220
+
221
+ **Advisors**: Max Little, Jordan Raykov, Hayriye Cagnan, Bas Bloem
222
+
223
+ **Funding**: the initial release was funded by the Michael J Fox Foundation (grant #020425) and the Dutch Research Council (grants #ASDI.2020.060, #2023.010)
224
+
225
+ ## Contact
226
+
227
+ - Email: paradigma@radboudumc.nl
228
+ - [Issue Tracker](https://github.com/biomarkersParkinson/paradigma/issues)
229
+