paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +38 -21
- paradigma/config.py +187 -123
- paradigma/constants.py +48 -35
- paradigma/feature_extraction.py +345 -255
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +685 -246
- paradigma/pipelines/pulse_rate_pipeline.py +456 -155
- paradigma/pipelines/pulse_rate_utils.py +289 -248
- paradigma/pipelines/tremor_pipeline.py +405 -132
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +500 -163
- paradigma/segmenting.py +180 -140
- paradigma/testing.py +370 -178
- paradigma/util.py +190 -101
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.1.0.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/METADATA +0 -138
- paradigma-1.0.3.dist-info/RECORD +0 -22
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data preparation module for ParaDigMa toolbox.
|
|
3
|
+
|
|
4
|
+
This module provides functions to prepare raw sensor data for analysis:
|
|
5
|
+
- Unit conversion (m/s² to g, rad/s to deg/s)
|
|
6
|
+
- Time column formatting
|
|
7
|
+
- Column name standardization
|
|
8
|
+
- Watch side orientation correction
|
|
9
|
+
- Resampling to 100 Hz
|
|
10
|
+
|
|
11
|
+
Based on data_preparation tutorial.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from paradigma.constants import DataColumns, TimeUnit
|
|
19
|
+
from paradigma.preprocessing import resample_data
|
|
20
|
+
from paradigma.util import (
|
|
21
|
+
convert_units_accelerometer,
|
|
22
|
+
convert_units_gyroscope,
|
|
23
|
+
transform_time_array,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def standardize_column_names(
|
|
30
|
+
df: pd.DataFrame,
|
|
31
|
+
column_mapping: dict[str, str] | None = None,
|
|
32
|
+
) -> pd.DataFrame:
|
|
33
|
+
"""
|
|
34
|
+
Standardize column names to ParaDigMa conventions.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
df : pd.DataFrame
|
|
39
|
+
Input DataFrame.
|
|
40
|
+
column_mapping : dict, optional
|
|
41
|
+
Custom column mapping.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
pd.DataFrame
|
|
46
|
+
DataFrame with standardized column names.
|
|
47
|
+
"""
|
|
48
|
+
df = df.copy()
|
|
49
|
+
|
|
50
|
+
# Apply mapping for existing columns only
|
|
51
|
+
if column_mapping is None:
|
|
52
|
+
return df
|
|
53
|
+
|
|
54
|
+
existing_mapping = {k: v for k, v in column_mapping.items() if k in df.columns}
|
|
55
|
+
df = df.rename(columns=existing_mapping)
|
|
56
|
+
|
|
57
|
+
if existing_mapping:
|
|
58
|
+
logger.debug(f"Standardized columns: {existing_mapping}")
|
|
59
|
+
return df
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def convert_sensor_units(
|
|
63
|
+
df: pd.DataFrame,
|
|
64
|
+
accelerometer_units: str = "m/s^2",
|
|
65
|
+
gyroscope_units: str = "deg/s",
|
|
66
|
+
) -> pd.DataFrame:
|
|
67
|
+
"""
|
|
68
|
+
Convert sensor units to ParaDigMa expected format (g for acceleration,
|
|
69
|
+
deg/s for gyroscope).
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
df : pd.DataFrame
|
|
74
|
+
Input DataFrame with sensor data.
|
|
75
|
+
accelerometer_units : str, default 'm/s^2'
|
|
76
|
+
Current units of accelerometer data.
|
|
77
|
+
gyroscope_units : str, default 'deg/s'
|
|
78
|
+
Current units of gyroscope data.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
pd.DataFrame
|
|
83
|
+
DataFrame with converted units.
|
|
84
|
+
"""
|
|
85
|
+
df = df.copy()
|
|
86
|
+
|
|
87
|
+
# Convert accelerometer units
|
|
88
|
+
accelerometer_columns = [
|
|
89
|
+
col
|
|
90
|
+
for col in [
|
|
91
|
+
DataColumns.ACCELEROMETER_X,
|
|
92
|
+
DataColumns.ACCELEROMETER_Y,
|
|
93
|
+
DataColumns.ACCELEROMETER_Z,
|
|
94
|
+
]
|
|
95
|
+
if col in df.columns
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
if accelerometer_columns and accelerometer_units != "g":
|
|
99
|
+
logger.debug(f"Converting accelerometer units from {accelerometer_units} to g")
|
|
100
|
+
accelerometer_data = df[accelerometer_columns].values
|
|
101
|
+
df[accelerometer_columns] = convert_units_accelerometer(
|
|
102
|
+
data=accelerometer_data, units=accelerometer_units
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Convert gyroscope units
|
|
106
|
+
gyroscope_columns = [
|
|
107
|
+
col
|
|
108
|
+
for col in [
|
|
109
|
+
DataColumns.GYROSCOPE_X,
|
|
110
|
+
DataColumns.GYROSCOPE_Y,
|
|
111
|
+
DataColumns.GYROSCOPE_Z,
|
|
112
|
+
]
|
|
113
|
+
if col in df.columns
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
if gyroscope_columns and gyroscope_units != "deg/s":
|
|
117
|
+
logger.debug(f"Converting gyroscope units from {gyroscope_units} to deg/s")
|
|
118
|
+
gyroscope_data = df[gyroscope_columns].values
|
|
119
|
+
df[gyroscope_columns] = convert_units_gyroscope(
|
|
120
|
+
data=gyroscope_data, units=gyroscope_units
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return df
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def prepare_time_column(
|
|
127
|
+
df: pd.DataFrame,
|
|
128
|
+
time_column: str = DataColumns.TIME,
|
|
129
|
+
input_unit_type: TimeUnit = TimeUnit.RELATIVE_S,
|
|
130
|
+
output_unit_type: TimeUnit = TimeUnit.RELATIVE_S,
|
|
131
|
+
) -> pd.DataFrame:
|
|
132
|
+
"""
|
|
133
|
+
Prepare time column to start from 0 seconds.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
df : pd.DataFrame
|
|
138
|
+
Input DataFrame.
|
|
139
|
+
time_column : str, default DataColumns.TIME
|
|
140
|
+
Name of time column.
|
|
141
|
+
input_unit_type : TimeUnit, default TimeUnit.RELATIVE_S
|
|
142
|
+
Input time unit type.
|
|
143
|
+
output_unit_type : TimeUnit, default TimeUnit.RELATIVE_S
|
|
144
|
+
Output time unit type.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
pd.DataFrame
|
|
149
|
+
DataFrame with prepared time column.
|
|
150
|
+
"""
|
|
151
|
+
df = df.copy()
|
|
152
|
+
|
|
153
|
+
if time_column not in df.columns:
|
|
154
|
+
raise ValueError(f"Time column '{time_column}' not found in DataFrame")
|
|
155
|
+
|
|
156
|
+
logger.debug(f"Preparing time column: {input_unit_type} -> {output_unit_type}")
|
|
157
|
+
|
|
158
|
+
df[time_column] = transform_time_array(
|
|
159
|
+
time_array=df[time_column],
|
|
160
|
+
input_unit_type=input_unit_type,
|
|
161
|
+
output_unit_type=output_unit_type,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return df
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def correct_watch_orientation(
|
|
168
|
+
df: pd.DataFrame,
|
|
169
|
+
device_orientation: list[str] | None = None,
|
|
170
|
+
sensor: str = "both",
|
|
171
|
+
) -> pd.DataFrame:
|
|
172
|
+
"""
|
|
173
|
+
Apply custom device orientation mapping if provided.
|
|
174
|
+
|
|
175
|
+
Note: Watch-side inversion is handled separately during preprocessing
|
|
176
|
+
in the pipeline functions (preprocess_imu_data), not during data preparation.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
df : pd.DataFrame
|
|
181
|
+
Input DataFrame with sensor data
|
|
182
|
+
device_orientation : list of str, optional
|
|
183
|
+
Custom orientation correction multipliers for each axis.
|
|
184
|
+
Maps device axes to standard [x, y, z] orientation.
|
|
185
|
+
sensor: str, optional
|
|
186
|
+
Sensor to correct ('accelerometer', 'gyroscope', or 'both').
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
pd.DataFrame
|
|
191
|
+
DataFrame with corrected device orientation (if custom mapping provided)
|
|
192
|
+
"""
|
|
193
|
+
out = df.copy()
|
|
194
|
+
|
|
195
|
+
target_orientation = ["x", "y", "z"]
|
|
196
|
+
valid_axes = ["x", "-x", "y", "-y", "z", "-z"]
|
|
197
|
+
|
|
198
|
+
if sensor == "both":
|
|
199
|
+
sensors_to_correct = ["accelerometer", "gyroscope"]
|
|
200
|
+
elif sensor in ["accelerometer", "gyroscope"]:
|
|
201
|
+
sensors_to_correct = [sensor]
|
|
202
|
+
else:
|
|
203
|
+
raise ValueError("Sensor must be 'accelerometer', 'gyroscope', or 'both'")
|
|
204
|
+
|
|
205
|
+
if device_orientation is not None:
|
|
206
|
+
if any([axis not in valid_axes for axis in device_orientation]):
|
|
207
|
+
raise ValueError(
|
|
208
|
+
f"Invalid device_orientation values. Must be one of {valid_axes}"
|
|
209
|
+
)
|
|
210
|
+
if len(device_orientation) != 3:
|
|
211
|
+
raise ValueError("device_orientation must have exactly 3 elements")
|
|
212
|
+
|
|
213
|
+
if all([device_orientation[x] == target_orientation[x] for x in range(3)]):
|
|
214
|
+
logger.debug(
|
|
215
|
+
"Device orientation matches target orientation, "
|
|
216
|
+
"no correction applied"
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
for sensor_type in sensors_to_correct:
|
|
220
|
+
for target_axis, mapping in zip(["x", "y", "z"], device_orientation):
|
|
221
|
+
sign = -1 if mapping.startswith("-") else 1
|
|
222
|
+
source_axis = mapping[-1]
|
|
223
|
+
|
|
224
|
+
out[f"{sensor_type}_{target_axis}"] = (
|
|
225
|
+
sign * df[f"{sensor_type}_{source_axis}"]
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
logger.debug(
|
|
229
|
+
f"Applied custom orientation: {sensor_type} "
|
|
230
|
+
f"{target_axis} mapped from {mapping}"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return out
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def validate_prepared_data(df: pd.DataFrame) -> dict[str, bool | str]:
|
|
237
|
+
"""
|
|
238
|
+
Validate that data is properly prepared for ParaDigMa analysis.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
df : pd.DataFrame
|
|
243
|
+
Prepared DataFrame
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
dict
|
|
248
|
+
Validation results with checks and error messages
|
|
249
|
+
"""
|
|
250
|
+
validation = {"valid": True, "errors": [], "warnings": []}
|
|
251
|
+
|
|
252
|
+
# Check required columns
|
|
253
|
+
if DataColumns.TIME not in df.columns:
|
|
254
|
+
validation["errors"].append(f"Missing required time column: {DataColumns.TIME}")
|
|
255
|
+
|
|
256
|
+
# Check for at least accelerometer or gyroscope data
|
|
257
|
+
accel_cols = [
|
|
258
|
+
DataColumns.ACCELEROMETER_X,
|
|
259
|
+
DataColumns.ACCELEROMETER_Y,
|
|
260
|
+
DataColumns.ACCELEROMETER_Z,
|
|
261
|
+
]
|
|
262
|
+
gyro_cols = [
|
|
263
|
+
DataColumns.GYROSCOPE_X,
|
|
264
|
+
DataColumns.GYROSCOPE_Y,
|
|
265
|
+
DataColumns.GYROSCOPE_Z,
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
has_accel = all(col in df.columns for col in accel_cols)
|
|
269
|
+
has_gyro = all(col in df.columns for col in gyro_cols)
|
|
270
|
+
|
|
271
|
+
if not has_accel and not has_gyro:
|
|
272
|
+
validation["errors"].append("Missing accelerometer and gyroscope data")
|
|
273
|
+
elif not has_accel:
|
|
274
|
+
validation["warnings"].append("Missing accelerometer data")
|
|
275
|
+
elif not has_gyro:
|
|
276
|
+
validation["warnings"].append("Missing gyroscope data")
|
|
277
|
+
|
|
278
|
+
# Check time column format
|
|
279
|
+
if DataColumns.TIME in df.columns:
|
|
280
|
+
if df[DataColumns.TIME].iloc[0] != 0:
|
|
281
|
+
validation["warnings"].append("Time column does not start at 0")
|
|
282
|
+
|
|
283
|
+
time_diff = df[DataColumns.TIME].diff().dropna()
|
|
284
|
+
if time_diff.std() / time_diff.mean() > 0.1:
|
|
285
|
+
validation["warnings"].append("Time column has irregular sampling")
|
|
286
|
+
|
|
287
|
+
# Check for NaN values
|
|
288
|
+
nan_columns = df.columns[df.isnull().any()].tolist()
|
|
289
|
+
if nan_columns:
|
|
290
|
+
validation["warnings"].append(f"Columns with NaN values: {nan_columns}")
|
|
291
|
+
|
|
292
|
+
# Check sampling frequency
|
|
293
|
+
if DataColumns.TIME in df.columns and len(df) > 1:
|
|
294
|
+
time_diff = df[DataColumns.TIME].diff().dropna()
|
|
295
|
+
current_dt = time_diff.median()
|
|
296
|
+
current_frequency = 1.0 / current_dt
|
|
297
|
+
|
|
298
|
+
if abs(current_frequency - 100.0) > 5.0:
|
|
299
|
+
validation["warnings"].append(
|
|
300
|
+
f"Sampling frequency {current_frequency:.2f} Hz differs from "
|
|
301
|
+
f"expected 100 Hz"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Set overall validity
|
|
305
|
+
validation["valid"] = len(validation["errors"]) == 0
|
|
306
|
+
|
|
307
|
+
return validation
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def prepare_raw_data(
|
|
311
|
+
df: pd.DataFrame,
|
|
312
|
+
accelerometer_units: str = "m/s^2",
|
|
313
|
+
gyroscope_units: str = "deg/s",
|
|
314
|
+
time_input_unit: TimeUnit = TimeUnit.RELATIVE_S,
|
|
315
|
+
resampling_frequency: float = 100.0,
|
|
316
|
+
column_mapping: dict[str, str] | None = None,
|
|
317
|
+
device_orientation: dict[str, int] | None = None,
|
|
318
|
+
validate: bool = True,
|
|
319
|
+
auto_segment: bool = False,
|
|
320
|
+
max_segment_gap_s: float | None = None,
|
|
321
|
+
min_segment_length_s: float | None = None,
|
|
322
|
+
) -> pd.DataFrame:
|
|
323
|
+
"""
|
|
324
|
+
Complete data preparation pipeline for raw sensor data.
|
|
325
|
+
|
|
326
|
+
Parameters
|
|
327
|
+
----------
|
|
328
|
+
df : pd.DataFrame
|
|
329
|
+
Raw sensor data
|
|
330
|
+
accelerometer_units : str, default 'm/s^2'
|
|
331
|
+
Current units of accelerometer data
|
|
332
|
+
gyroscope_units : str, default 'deg/s'
|
|
333
|
+
Current units of gyroscope data
|
|
334
|
+
time_input_unit : TimeUnit, default TimeUnit.RELATIVE_S
|
|
335
|
+
Input time unit type
|
|
336
|
+
resampling_frequency : float, default 100.0
|
|
337
|
+
Target sampling frequency in Hz
|
|
338
|
+
column_mapping : Dict[str, str], optional
|
|
339
|
+
Custom column name mapping
|
|
340
|
+
device_orientation : Dict[str, int], optional
|
|
341
|
+
Custom orientation correction
|
|
342
|
+
validate : bool, default True
|
|
343
|
+
Whether to validate the prepared data
|
|
344
|
+
auto_segment : bool, default False
|
|
345
|
+
If True, automatically split non-contiguous data into segments.
|
|
346
|
+
Adds 'data_segment_nr' column to output.
|
|
347
|
+
max_segment_gap_s : float, optional
|
|
348
|
+
Maximum gap (seconds) before starting new segment. Used when auto_segment=True.
|
|
349
|
+
Defaults to 1.5s.
|
|
350
|
+
min_segment_length_s : float, optional
|
|
351
|
+
Minimum segment length (seconds) to keep. Used when auto_segment=True.
|
|
352
|
+
Defaults to 1.5s.
|
|
353
|
+
|
|
354
|
+
Returns
|
|
355
|
+
-------
|
|
356
|
+
pd.DataFrame
|
|
357
|
+
Prepared data ready for ParaDigMa analysis. If auto_segment=True and multiple
|
|
358
|
+
segments found, includes 'data_segment_nr' column.
|
|
359
|
+
"""
|
|
360
|
+
logger.info("Starting data preparation pipeline")
|
|
361
|
+
|
|
362
|
+
# Step 1: Standardize column names
|
|
363
|
+
logger.info("Step 1: Standardizing column names")
|
|
364
|
+
if column_mapping is None:
|
|
365
|
+
logger.debug("No column mapping provided, using default mapping")
|
|
366
|
+
else:
|
|
367
|
+
df = standardize_column_names(df, column_mapping)
|
|
368
|
+
|
|
369
|
+
# Step 2: Convert units
|
|
370
|
+
logger.info("Step 2: Converting sensor units")
|
|
371
|
+
df = convert_sensor_units(df, accelerometer_units, gyroscope_units)
|
|
372
|
+
|
|
373
|
+
# Step 3: Prepare time column
|
|
374
|
+
logger.info("Step 3: Preparing time column")
|
|
375
|
+
df = prepare_time_column(df, input_unit_type=time_input_unit)
|
|
376
|
+
|
|
377
|
+
# Step 4: Correct device orientation
|
|
378
|
+
logger.info("Step 4: Correcting device orientation")
|
|
379
|
+
df = correct_watch_orientation(df, device_orientation=device_orientation)
|
|
380
|
+
|
|
381
|
+
# Step 5: Resample to target frequency
|
|
382
|
+
logger.info(f"Step 5: Resampling to {resampling_frequency} Hz")
|
|
383
|
+
df = resample_data(
|
|
384
|
+
df,
|
|
385
|
+
resampling_frequency=resampling_frequency,
|
|
386
|
+
auto_segment=auto_segment,
|
|
387
|
+
max_segment_gap_s=max_segment_gap_s,
|
|
388
|
+
min_segment_length_s=min_segment_length_s,
|
|
389
|
+
verbose=1 if logger.level <= logging.INFO else 0,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Step 6: Validate prepared data
|
|
393
|
+
if validate:
|
|
394
|
+
logger.info("Step 6: Validating prepared data")
|
|
395
|
+
validation = validate_prepared_data(df)
|
|
396
|
+
|
|
397
|
+
if validation["warnings"]:
|
|
398
|
+
for warning in validation["warnings"]:
|
|
399
|
+
logger.warning(warning)
|
|
400
|
+
|
|
401
|
+
if not validation["valid"]:
|
|
402
|
+
for error in validation["errors"]:
|
|
403
|
+
logger.error(error)
|
|
404
|
+
raise ValueError("Data preparation validation failed")
|
|
405
|
+
|
|
406
|
+
logger.info(
|
|
407
|
+
f"Data preparation completed: {df.shape[0]} rows, {df.shape[1]} columns"
|
|
408
|
+
)
|
|
409
|
+
return df
|