paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +14 -14
- paradigma/config.py +38 -29
- paradigma/constants.py +10 -2
- paradigma/feature_extraction.py +106 -75
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +488 -97
- paradigma/pipelines/pulse_rate_pipeline.py +278 -46
- paradigma/pipelines/pulse_rate_utils.py +176 -137
- paradigma/pipelines/tremor_pipeline.py +292 -72
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +345 -77
- paradigma/segmenting.py +57 -42
- paradigma/testing.py +14 -9
- paradigma/util.py +36 -22
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/METADATA +0 -140
- paradigma-1.0.4.dist-info/RECORD +0 -23
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,670 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level pipeline orchestrator for ParaDigMa toolbox.
|
|
3
|
+
|
|
4
|
+
This module provides the main entry point for running analysis pipelines:
|
|
5
|
+
|
|
6
|
+
Main Function
|
|
7
|
+
-------------
|
|
8
|
+
- run_paradigma(): Complete pipeline from data loading/preparation
|
|
9
|
+
to aggregated results. Main entry point for end-to-end analysis
|
|
10
|
+
supporting multiple pipelines (gait, tremor, pulse_rate).
|
|
11
|
+
Can process raw data from disk or already-prepared DataFrames.
|
|
12
|
+
|
|
13
|
+
The orchestrator coordinates:
|
|
14
|
+
1. Data loading and preparation (unit conversion, resampling, orientation correction)
|
|
15
|
+
2. Pipeline execution on single or multiple files (imports from pipeline modules)
|
|
16
|
+
3. Result aggregation across files and segments
|
|
17
|
+
4. Optional intermediate result storage
|
|
18
|
+
|
|
19
|
+
Supports multi-file processing with automatic segment numbering and metadata tracking.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
import pandas as pd
|
|
29
|
+
|
|
30
|
+
from paradigma.config import (
|
|
31
|
+
GaitConfig,
|
|
32
|
+
IMUConfig,
|
|
33
|
+
PPGConfig,
|
|
34
|
+
PulseRateConfig,
|
|
35
|
+
TremorConfig,
|
|
36
|
+
)
|
|
37
|
+
from paradigma.constants import DataColumns, TimeUnit
|
|
38
|
+
from paradigma.load import (
|
|
39
|
+
get_data_file_paths,
|
|
40
|
+
load_single_data_file,
|
|
41
|
+
save_prepared_data,
|
|
42
|
+
)
|
|
43
|
+
from paradigma.pipelines.gait_pipeline import (
|
|
44
|
+
aggregate_arm_swing_params,
|
|
45
|
+
run_gait_pipeline,
|
|
46
|
+
)
|
|
47
|
+
from paradigma.pipelines.pulse_rate_pipeline import (
|
|
48
|
+
aggregate_pulse_rate,
|
|
49
|
+
run_pulse_rate_pipeline,
|
|
50
|
+
)
|
|
51
|
+
from paradigma.pipelines.tremor_pipeline import aggregate_tremor, run_tremor_pipeline
|
|
52
|
+
from paradigma.prepare_data import prepare_raw_data
|
|
53
|
+
|
|
54
|
+
logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
# Custom logging level for detailed info (between INFO=20 and DEBUG=10)
|
|
57
|
+
DETAILED_INFO = 15
|
|
58
|
+
logging.addLevelName(DETAILED_INFO, "DETAILED")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def run_paradigma(
|
|
62
|
+
*,
|
|
63
|
+
data_path: str | Path | None = None,
|
|
64
|
+
dfs: pd.DataFrame | list[pd.DataFrame] | dict[str, pd.DataFrame] | None = None,
|
|
65
|
+
save_intermediate: list[str] = [],
|
|
66
|
+
output_dir: str | Path = "./output",
|
|
67
|
+
skip_preparation: bool = False,
|
|
68
|
+
pipelines: list[str] | str | None = None,
|
|
69
|
+
watch_side: str | None = None,
|
|
70
|
+
accelerometer_units: str = "g",
|
|
71
|
+
gyroscope_units: str = "deg/s",
|
|
72
|
+
time_input_unit: TimeUnit = TimeUnit.RELATIVE_S,
|
|
73
|
+
target_frequency: float = 100.0,
|
|
74
|
+
column_mapping: dict[str, str] | None = None,
|
|
75
|
+
device_orientation: list[str] | None = ["x", "y", "z"],
|
|
76
|
+
file_pattern: str | list[str] | None = None,
|
|
77
|
+
aggregates: list[str] | None = None,
|
|
78
|
+
segment_length_bins: list[str] | None = None,
|
|
79
|
+
split_by_gaps: bool = False,
|
|
80
|
+
max_gap_seconds: float | None = None,
|
|
81
|
+
min_segment_seconds: float | None = None,
|
|
82
|
+
imu_config: IMUConfig | None = None,
|
|
83
|
+
ppg_config: PPGConfig | None = None,
|
|
84
|
+
gait_config: GaitConfig | None = None,
|
|
85
|
+
arm_activity_config: GaitConfig | None = None,
|
|
86
|
+
tremor_config: TremorConfig | None = None,
|
|
87
|
+
pulse_rate_config: PulseRateConfig | None = None,
|
|
88
|
+
logging_level: int = logging.INFO,
|
|
89
|
+
custom_logger: logging.Logger | None = None,
|
|
90
|
+
) -> dict[str, pd.DataFrame | dict]:
|
|
91
|
+
"""
|
|
92
|
+
Complete ParaDigMa analysis pipeline from data loading to aggregated results.
|
|
93
|
+
|
|
94
|
+
This is the main entry point for ParaDigMa analysis. It supports
|
|
95
|
+
multiple pipeline types:
|
|
96
|
+
- gait: Arm swing during gait analysis
|
|
97
|
+
- tremor: Tremor detection and quantification
|
|
98
|
+
- pulse_rate: Pulse rate estimation from PPG signals
|
|
99
|
+
|
|
100
|
+
The function:
|
|
101
|
+
1. Loads data files from the specified directory or uses provided DataFrame
|
|
102
|
+
2. Prepares raw data if needed (unit conversion, resampling, etc.)
|
|
103
|
+
3. Runs the specified pipeline on each data file
|
|
104
|
+
4. Aggregates results across all data files
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
data_path : str or Path, optional
|
|
109
|
+
Path to directory containing data files.
|
|
110
|
+
dfs : DataFrame, list of DataFrames, or dict of DataFrames, optional
|
|
111
|
+
Dataframes used as input (bypasses data loading). Can be:
|
|
112
|
+
- Single DataFrame: Will be processed as one file with key 'df_1'.
|
|
113
|
+
- List[DataFrame]: Multiple dataframes assigned IDs as 'df_1', 'df_2', etc.
|
|
114
|
+
- Dict[str, DataFrame]: Keys are file names, values are dataframes.
|
|
115
|
+
Note: The 'file_key' column is only added to quantification results when
|
|
116
|
+
len(dfs) > 1, allowing cleaner output for single-file processing.
|
|
117
|
+
See input_formats guide for details.
|
|
118
|
+
save_intermediate : list of str, default []
|
|
119
|
+
Which intermediate results to store. Valid values:
|
|
120
|
+
- 'preparation': Save prepared data
|
|
121
|
+
- 'preprocessing': Save preprocessed signals
|
|
122
|
+
- 'classification': Save classification results
|
|
123
|
+
- 'quantification': Save quantified measures
|
|
124
|
+
- 'aggregation': Save aggregated results
|
|
125
|
+
If empty, no files are saved (results are only returned).
|
|
126
|
+
output_dir : str or Path, default './output'
|
|
127
|
+
Output directory for all results. Files are only saved if
|
|
128
|
+
save_intermediate is not empty.
|
|
129
|
+
skip_preparation : bool, default False
|
|
130
|
+
Whether data is already prepared. If False, data will be
|
|
131
|
+
prepared (unit conversion, resampling, etc.). If True,
|
|
132
|
+
assumes data is already in the required format.
|
|
133
|
+
pipelines : list of str or str, optional
|
|
134
|
+
Pipelines to run: 'gait', 'tremor', and/or 'pulse_rate'.
|
|
135
|
+
If providing a list, currently only tremor and gait pipelines
|
|
136
|
+
can be run together.
|
|
137
|
+
watch_side : str, optional
|
|
138
|
+
Watch side: 'left' or 'right' (required for gait pipeline).
|
|
139
|
+
accelerometer_units : str, default 'm/s^2'
|
|
140
|
+
Units for accelerometer data.
|
|
141
|
+
gyroscope_units : str, default 'deg/s'
|
|
142
|
+
Units for gyroscope data.
|
|
143
|
+
time_input_unit : TimeUnit, default TimeUnit.RELATIVE_S
|
|
144
|
+
Input time unit type.
|
|
145
|
+
target_frequency : float, default 100.0
|
|
146
|
+
Target sampling frequency for resampling.
|
|
147
|
+
column_mapping : dict, optional
|
|
148
|
+
Custom column name mapping.
|
|
149
|
+
device_orientation : list of str, optional
|
|
150
|
+
Custom device orientation corrections.
|
|
151
|
+
file_pattern : str or list of str, optional
|
|
152
|
+
File pattern(s) to match when loading data (e.g., 'parquet', '*.csv').
|
|
153
|
+
aggregates : list of str, optional
|
|
154
|
+
Aggregation methods for quantification.
|
|
155
|
+
segment_length_bins : list of str, optional
|
|
156
|
+
Duration bins for gait segment aggregation (gait pipeline only).
|
|
157
|
+
Example: ['(0, 10)', '(10, 20)'] for segments 0-10s and 10-20s.
|
|
158
|
+
split_by_gaps : bool, default False
|
|
159
|
+
If True, automatically split non-contiguous data into segments
|
|
160
|
+
during preparation.
|
|
161
|
+
Adds 'data_segment_nr' column to prepared data which is preserved
|
|
162
|
+
through pipeline.
|
|
163
|
+
Useful for handling data with gaps/interruptions.
|
|
164
|
+
max_gap_seconds : float, optional
|
|
165
|
+
Maximum gap (seconds) before starting new segment. Used when split_by_gaps=True.
|
|
166
|
+
Defaults to 1.5s.
|
|
167
|
+
min_segment_seconds : float, optional
|
|
168
|
+
Minimum segment length (seconds) to keep. Used when split_by_gaps=True.
|
|
169
|
+
Defaults to 1.5s.
|
|
170
|
+
imu_config : IMUConfig, optional
|
|
171
|
+
IMU preprocessing configuration.
|
|
172
|
+
ppg_config : PPGConfig, optional
|
|
173
|
+
PPG preprocessing configuration.
|
|
174
|
+
gait_config : GaitConfig, optional
|
|
175
|
+
Gait analysis configuration.
|
|
176
|
+
arm_activity_config : GaitConfig, optional
|
|
177
|
+
Arm activity analysis configuration.
|
|
178
|
+
tremor_config : TremorConfig, optional
|
|
179
|
+
Tremor analysis configuration.
|
|
180
|
+
pulse_rate_config : PulseRateConfig, optional
|
|
181
|
+
Pulse rate analysis configuration.
|
|
182
|
+
logging_level : int, default logging.INFO
|
|
183
|
+
Logging level using standard logging constants:
|
|
184
|
+
- logging.ERROR: Only errors
|
|
185
|
+
- logging.WARNING: Warnings and errors
|
|
186
|
+
- logging.INFO: Basic progress information (default)
|
|
187
|
+
- logging.DEBUG: Detailed debug information
|
|
188
|
+
Can also use DETAILED_INFO (15) for intermediate detail level.
|
|
189
|
+
custom_logger : logging.Logger, optional
|
|
190
|
+
Custom logger instance. If provided, logging_level is ignored.
|
|
191
|
+
Allows full control over logging configuration.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
dict
|
|
196
|
+
Complete analysis results with nested structure for multiple pipelines:
|
|
197
|
+
- 'quantifications': dict with pipeline names as keys and DataFrames as values
|
|
198
|
+
- 'aggregations': dict with pipeline names as keys and result dicts as values
|
|
199
|
+
- 'metadata': dict with pipeline names as keys and metadata dicts as values
|
|
200
|
+
- 'errors': list of dicts tracking any errors that occurred during processing.
|
|
201
|
+
Each error dict contains 'stage', 'error', and optionally 'file' and
|
|
202
|
+
'pipeline'.
|
|
203
|
+
Empty list indicates successful processing of all files.
|
|
204
|
+
"""
|
|
205
|
+
if (data_path is None) == (dfs is None):
|
|
206
|
+
raise ValueError("Exactly one of data_path or dfs must be provided")
|
|
207
|
+
|
|
208
|
+
if isinstance(pipelines, str):
|
|
209
|
+
pipelines = [pipelines]
|
|
210
|
+
|
|
211
|
+
if len(pipelines) > 1 and "pulse_rate" in pipelines:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
"Pulse rate pipeline cannot be run together with other pipelines"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if any(p not in ["gait", "tremor", "pulse_rate"] for p in pipelines):
|
|
217
|
+
raise ValueError(
|
|
218
|
+
f"At least one unknown pipeline provided: {pipelines}. "
|
|
219
|
+
f"Supported pipelines: 'gait', 'tremor', 'pulse_rate'"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Use custom logger if provided, otherwise use module logger
|
|
223
|
+
active_logger = custom_logger if custom_logger is not None else logger
|
|
224
|
+
|
|
225
|
+
# Get package logger for configuration (affects all paradigma.* modules)
|
|
226
|
+
package_logger = logging.getLogger("paradigma")
|
|
227
|
+
|
|
228
|
+
# Configure package-wide logging level for all paradigma modules
|
|
229
|
+
if custom_logger is None:
|
|
230
|
+
package_logger.setLevel(logging_level)
|
|
231
|
+
|
|
232
|
+
if data_path is not None:
|
|
233
|
+
data_path = Path(data_path)
|
|
234
|
+
active_logger.info(f"Applying ParaDigMa pipelines to {data_path}")
|
|
235
|
+
else:
|
|
236
|
+
active_logger.info("Applying ParaDigMa pipelines to provided DataFrame")
|
|
237
|
+
|
|
238
|
+
# Convert and create output directory
|
|
239
|
+
output_dir = Path(output_dir)
|
|
240
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
241
|
+
|
|
242
|
+
# Setup logging to file - add handler to package logger so ALL paradigma modules
|
|
243
|
+
# log to file
|
|
244
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
|
|
245
|
+
log_file = output_dir / f"paradigma_run_{timestamp}.log"
|
|
246
|
+
file_handler = logging.FileHandler(log_file)
|
|
247
|
+
file_handler.setLevel(logging.INFO)
|
|
248
|
+
file_handler.setFormatter(
|
|
249
|
+
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
250
|
+
)
|
|
251
|
+
package_logger.addHandler(file_handler)
|
|
252
|
+
active_logger.info(f"Logging to {log_file}")
|
|
253
|
+
|
|
254
|
+
# Step 1: Get file paths or convert provided DataFrames
|
|
255
|
+
file_paths = None # Will hold list of file paths if loading from directory
|
|
256
|
+
dfs_dict = None # Will hold dict of DataFrames if provided directly
|
|
257
|
+
|
|
258
|
+
if data_path is not None:
|
|
259
|
+
active_logger.info("Step 1: Finding data files")
|
|
260
|
+
try:
|
|
261
|
+
file_paths = get_data_file_paths(
|
|
262
|
+
data_path=data_path, file_patterns=file_pattern
|
|
263
|
+
)
|
|
264
|
+
except Exception as e:
|
|
265
|
+
active_logger.error(f"Failed to find data files: {e}")
|
|
266
|
+
raise
|
|
267
|
+
|
|
268
|
+
if not file_paths:
|
|
269
|
+
raise ValueError(f"No data files found in {data_path}")
|
|
270
|
+
else:
|
|
271
|
+
active_logger.info("Step 1: Using provided DataFrame(s) as input")
|
|
272
|
+
|
|
273
|
+
# Convert provided dfs to dict format
|
|
274
|
+
if isinstance(dfs, list):
|
|
275
|
+
dfs_dict = {f"df_{i}": df for i, df in enumerate(dfs, start=1)}
|
|
276
|
+
elif isinstance(dfs, pd.DataFrame):
|
|
277
|
+
dfs_dict = {"df_1": dfs}
|
|
278
|
+
else:
|
|
279
|
+
dfs_dict = dfs
|
|
280
|
+
|
|
281
|
+
# Determine number of files to process
|
|
282
|
+
num_files = len(file_paths) if file_paths else len(dfs_dict)
|
|
283
|
+
|
|
284
|
+
# Initialize results storage for each pipeline
|
|
285
|
+
all_results = {
|
|
286
|
+
"quantifications": {p: [] for p in pipelines},
|
|
287
|
+
"aggregations": {p: {} for p in pipelines},
|
|
288
|
+
"metadata": {p: {} for p in pipelines},
|
|
289
|
+
"errors": [],
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Steps 2-3: Process each file individually
|
|
293
|
+
active_logger.info(f"Steps 2-3: Processing {num_files} files individually")
|
|
294
|
+
|
|
295
|
+
# Track maximum gait segment number across files for proper offset
|
|
296
|
+
max_gait_segment_nr = 0
|
|
297
|
+
|
|
298
|
+
for i in range(num_files):
|
|
299
|
+
# Load one file at a time
|
|
300
|
+
if file_paths:
|
|
301
|
+
file_path = file_paths[i]
|
|
302
|
+
active_logger.info(f"Processing file {i+1}/{num_files}: {file_path.name}")
|
|
303
|
+
try:
|
|
304
|
+
file_name, df_raw = load_single_data_file(file_path)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
error_msg = f"Failed to load file {file_path.name}: {e}"
|
|
307
|
+
active_logger.error(error_msg)
|
|
308
|
+
all_results["errors"].append(
|
|
309
|
+
{"file": file_path.name, "stage": "loading", "error": str(e)}
|
|
310
|
+
)
|
|
311
|
+
continue
|
|
312
|
+
else:
|
|
313
|
+
# Using in-memory data
|
|
314
|
+
file_name = list(dfs_dict.keys())[i]
|
|
315
|
+
df_raw = dfs_dict[file_name]
|
|
316
|
+
active_logger.info(f"Processing DataFrame {i+1}/{num_files}: {file_name}")
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
# Step 2: Prepare data (if needed)
|
|
320
|
+
if not skip_preparation:
|
|
321
|
+
active_logger.log(DETAILED_INFO, f"Preparing data for {file_name}")
|
|
322
|
+
|
|
323
|
+
prepare_params = {
|
|
324
|
+
"time_input_unit": time_input_unit,
|
|
325
|
+
"resampling_frequency": target_frequency,
|
|
326
|
+
"column_mapping": column_mapping,
|
|
327
|
+
"auto_segment": split_by_gaps,
|
|
328
|
+
"max_segment_gap_s": max_gap_seconds,
|
|
329
|
+
"min_segment_length_s": min_segment_seconds,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
# Add pipeline-specific preparation parameters
|
|
333
|
+
if "gait" in pipelines or "tremor" in pipelines:
|
|
334
|
+
prepare_params["gyroscope_units"] = gyroscope_units
|
|
335
|
+
|
|
336
|
+
if "gait" in pipelines:
|
|
337
|
+
prepare_params.update(
|
|
338
|
+
{
|
|
339
|
+
"accelerometer_units": accelerometer_units,
|
|
340
|
+
"device_orientation": device_orientation,
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
df_prepared = prepare_raw_data(df=df_raw, **prepare_params)
|
|
345
|
+
|
|
346
|
+
# Save prepared data if requested
|
|
347
|
+
if "preparation" in save_intermediate:
|
|
348
|
+
prepared_dir = output_dir / "prepared_data"
|
|
349
|
+
prepared_dir.mkdir(exist_ok=True)
|
|
350
|
+
save_prepared_data(
|
|
351
|
+
df_prepared,
|
|
352
|
+
prepared_dir / f"{file_name}.parquet",
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
df_prepared = df_raw
|
|
356
|
+
|
|
357
|
+
# Release raw data from memory
|
|
358
|
+
del df_raw
|
|
359
|
+
|
|
360
|
+
# Step 3: Run each pipeline on this single file
|
|
361
|
+
store_intermediate_per_file = [
|
|
362
|
+
x
|
|
363
|
+
for x in save_intermediate
|
|
364
|
+
if x not in ["aggregation", "quantification"]
|
|
365
|
+
]
|
|
366
|
+
|
|
367
|
+
# Create file-specific output directory
|
|
368
|
+
file_output_dir = output_dir / "individual_files" / file_name
|
|
369
|
+
|
|
370
|
+
for pipeline_name in pipelines:
|
|
371
|
+
active_logger.log(
|
|
372
|
+
DETAILED_INFO, f"Running {pipeline_name} pipeline on {file_name}"
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
if pipeline_name == "gait":
|
|
377
|
+
quantification_data, quantification_metadata = (
|
|
378
|
+
run_gait_pipeline(
|
|
379
|
+
df_prepared=df_prepared,
|
|
380
|
+
watch_side=watch_side,
|
|
381
|
+
imu_config=imu_config,
|
|
382
|
+
gait_config=gait_config,
|
|
383
|
+
arm_activity_config=arm_activity_config,
|
|
384
|
+
store_intermediate=store_intermediate_per_file,
|
|
385
|
+
output_dir=file_output_dir,
|
|
386
|
+
segment_number_offset=max_gait_segment_nr,
|
|
387
|
+
logging_level=logging_level,
|
|
388
|
+
custom_logger=active_logger,
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if len(quantification_data) > 0:
|
|
393
|
+
# Add file identifier if processing multiple files
|
|
394
|
+
quantification_data = quantification_data.copy()
|
|
395
|
+
if num_files > 1:
|
|
396
|
+
quantification_data["file_key"] = file_name
|
|
397
|
+
all_results["quantifications"][pipeline_name].append(
|
|
398
|
+
quantification_data
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Update max segment number for next file
|
|
402
|
+
max_gait_segment_nr = int(
|
|
403
|
+
quantification_data["gait_segment_nr"].max()
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Store metadata and update offset even if no quantifications
|
|
407
|
+
if (
|
|
408
|
+
quantification_metadata
|
|
409
|
+
and "per_segment" in quantification_metadata
|
|
410
|
+
):
|
|
411
|
+
all_results["metadata"][pipeline_name].update(
|
|
412
|
+
quantification_metadata["per_segment"]
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Update max segment number based on metadata to prevent
|
|
416
|
+
# overwrites
|
|
417
|
+
if quantification_metadata["per_segment"]:
|
|
418
|
+
max_segment_in_metadata = max(
|
|
419
|
+
quantification_metadata["per_segment"].keys()
|
|
420
|
+
)
|
|
421
|
+
max_gait_segment_nr = max(
|
|
422
|
+
max_gait_segment_nr, max_segment_in_metadata
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
elif pipeline_name == "tremor":
|
|
426
|
+
quantification_data = run_tremor_pipeline(
|
|
427
|
+
df_prepared=df_prepared,
|
|
428
|
+
store_intermediate=store_intermediate_per_file,
|
|
429
|
+
output_dir=file_output_dir,
|
|
430
|
+
tremor_config=tremor_config,
|
|
431
|
+
imu_config=imu_config,
|
|
432
|
+
logging_level=logging_level,
|
|
433
|
+
custom_logger=active_logger,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
if len(quantification_data) > 0:
|
|
437
|
+
quantification_data = quantification_data.copy()
|
|
438
|
+
if num_files > 1:
|
|
439
|
+
quantification_data["file_key"] = file_name
|
|
440
|
+
all_results["quantifications"][pipeline_name].append(
|
|
441
|
+
quantification_data
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
elif pipeline_name == "pulse_rate":
|
|
445
|
+
quantification_data = run_pulse_rate_pipeline(
|
|
446
|
+
df_ppg_prepared=df_prepared,
|
|
447
|
+
store_intermediate=store_intermediate_per_file,
|
|
448
|
+
output_dir=file_output_dir,
|
|
449
|
+
pulse_rate_config=pulse_rate_config,
|
|
450
|
+
ppg_config=ppg_config,
|
|
451
|
+
logging_level=logging_level,
|
|
452
|
+
custom_logger=active_logger,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if len(quantification_data) > 0:
|
|
456
|
+
quantification_data = quantification_data.copy()
|
|
457
|
+
if num_files > 1:
|
|
458
|
+
quantification_data["file_key"] = file_name
|
|
459
|
+
all_results["quantifications"][pipeline_name].append(
|
|
460
|
+
quantification_data
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
except Exception as e:
|
|
464
|
+
error_msg = (
|
|
465
|
+
f"Failed to run {pipeline_name} pipeline on {file_name}: {e}"
|
|
466
|
+
)
|
|
467
|
+
active_logger.error(error_msg)
|
|
468
|
+
all_results["errors"].append(
|
|
469
|
+
{
|
|
470
|
+
"file": file_name,
|
|
471
|
+
"pipeline": pipeline_name,
|
|
472
|
+
"stage": "pipeline_execution",
|
|
473
|
+
"error": str(e),
|
|
474
|
+
}
|
|
475
|
+
)
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
# Release prepared data from memory
|
|
479
|
+
del df_prepared
|
|
480
|
+
|
|
481
|
+
except Exception as e:
|
|
482
|
+
error_msg = f"Failed to process file {file_name}: {e}"
|
|
483
|
+
active_logger.error(error_msg)
|
|
484
|
+
all_results["errors"].append(
|
|
485
|
+
{"file": file_name, "stage": "preparation", "error": str(e)}
|
|
486
|
+
)
|
|
487
|
+
continue
|
|
488
|
+
|
|
489
|
+
# Step 4: Combine quantifications from all files
|
|
490
|
+
active_logger.info("Step 4: Combining quantifications from all files")
|
|
491
|
+
|
|
492
|
+
for pipeline_name in pipelines:
|
|
493
|
+
# Concatenate all quantifications for this pipeline
|
|
494
|
+
if all_results["quantifications"][pipeline_name]:
|
|
495
|
+
combined_quantified = pd.concat(
|
|
496
|
+
all_results["quantifications"][pipeline_name], ignore_index=True
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
num_files_processed = len(all_results["quantifications"][pipeline_name])
|
|
500
|
+
all_results["quantifications"][pipeline_name] = combined_quantified
|
|
501
|
+
|
|
502
|
+
active_logger.info(
|
|
503
|
+
f"{pipeline_name.capitalize()}: Combined "
|
|
504
|
+
f"{len(combined_quantified)} windows from "
|
|
505
|
+
f"{num_files_processed} files"
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# Step 5: Perform aggregation on combined results FROM ALL FILES
|
|
509
|
+
try:
|
|
510
|
+
if pipeline_name == "gait" and all_results["metadata"][pipeline_name]:
|
|
511
|
+
active_logger.info(
|
|
512
|
+
"Step 5: Aggregating gait results across ALL files"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
if segment_length_bins is None:
|
|
516
|
+
gait_segment_categories = [
|
|
517
|
+
(0, 10),
|
|
518
|
+
(10, 20),
|
|
519
|
+
(20, np.inf),
|
|
520
|
+
(0, np.inf),
|
|
521
|
+
]
|
|
522
|
+
else:
|
|
523
|
+
gait_segment_categories = segment_length_bins
|
|
524
|
+
|
|
525
|
+
if aggregates is None:
|
|
526
|
+
agg_methods = ["median", "95p", "cov"]
|
|
527
|
+
else:
|
|
528
|
+
agg_methods = aggregates
|
|
529
|
+
|
|
530
|
+
aggregations = aggregate_arm_swing_params(
|
|
531
|
+
df_arm_swing_params=combined_quantified,
|
|
532
|
+
segment_meta=all_results["metadata"][pipeline_name],
|
|
533
|
+
segment_cats=gait_segment_categories,
|
|
534
|
+
aggregates=agg_methods,
|
|
535
|
+
)
|
|
536
|
+
all_results["aggregations"][pipeline_name] = aggregations
|
|
537
|
+
active_logger.info(
|
|
538
|
+
f"Aggregation completed across "
|
|
539
|
+
f"{len(gait_segment_categories)} gait segment categories"
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
elif pipeline_name == "tremor":
|
|
543
|
+
active_logger.info(
|
|
544
|
+
"Step 5: Aggregating tremor results across ALL files"
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
# Work on a copy for tremor aggregation
|
|
548
|
+
tremor_data_for_aggregation = combined_quantified.copy()
|
|
549
|
+
|
|
550
|
+
# Need to add datetime column for aggregate_tremor
|
|
551
|
+
if (
|
|
552
|
+
"time_dt" not in tremor_data_for_aggregation.columns
|
|
553
|
+
and "time" in tremor_data_for_aggregation.columns
|
|
554
|
+
):
|
|
555
|
+
tremor_data_for_aggregation["time_dt"] = pd.to_datetime(
|
|
556
|
+
tremor_data_for_aggregation["time"], unit="s"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
if tremor_config is None:
|
|
560
|
+
tremor_config = TremorConfig()
|
|
561
|
+
|
|
562
|
+
aggregation_output = aggregate_tremor(
|
|
563
|
+
tremor_data_for_aggregation, tremor_config
|
|
564
|
+
)
|
|
565
|
+
all_results["aggregations"][pipeline_name] = aggregation_output[
|
|
566
|
+
"aggregated_tremor_measures"
|
|
567
|
+
]
|
|
568
|
+
all_results["metadata"][pipeline_name] = aggregation_output[
|
|
569
|
+
"metadata"
|
|
570
|
+
]
|
|
571
|
+
active_logger.info("Tremor aggregation completed")
|
|
572
|
+
|
|
573
|
+
elif pipeline_name == "pulse_rate":
|
|
574
|
+
active_logger.info(
|
|
575
|
+
"Step 5: Aggregating pulse rate results across ALL files"
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
pulse_rate_values = (
|
|
579
|
+
combined_quantified[DataColumns.PULSE_RATE].dropna().values
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if len(pulse_rate_values) > 0:
|
|
583
|
+
aggregation_output = aggregate_pulse_rate(
|
|
584
|
+
pr_values=pulse_rate_values,
|
|
585
|
+
aggregates=aggregates if aggregates else ["mode", "99p"],
|
|
586
|
+
)
|
|
587
|
+
all_results["aggregations"][pipeline_name] = aggregation_output[
|
|
588
|
+
"pr_aggregates"
|
|
589
|
+
]
|
|
590
|
+
all_results["metadata"][pipeline_name] = aggregation_output[
|
|
591
|
+
"metadata"
|
|
592
|
+
]
|
|
593
|
+
active_logger.info(
|
|
594
|
+
f"Pulse rate aggregation completed with "
|
|
595
|
+
f"{len(pulse_rate_values)} valid estimates"
|
|
596
|
+
)
|
|
597
|
+
else:
|
|
598
|
+
active_logger.warning(
|
|
599
|
+
"No valid pulse rate estimates found for aggregation"
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
except Exception as e:
|
|
603
|
+
error_msg = f"Failed to aggregate {pipeline_name} results: {e}"
|
|
604
|
+
active_logger.error(error_msg)
|
|
605
|
+
all_results["errors"].append(
|
|
606
|
+
{"pipeline": pipeline_name, "stage": "aggregation", "error": str(e)}
|
|
607
|
+
)
|
|
608
|
+
all_results["aggregations"][pipeline_name] = {}
|
|
609
|
+
|
|
610
|
+
else:
|
|
611
|
+
# No quantifications found for this pipeline
|
|
612
|
+
all_results["quantifications"][pipeline_name] = pd.DataFrame()
|
|
613
|
+
active_logger.warning(f"No quantified {pipeline_name} results found")
|
|
614
|
+
|
|
615
|
+
# Save combined quantifications if requested
|
|
616
|
+
if "quantification" in save_intermediate:
|
|
617
|
+
for pipeline_name in pipelines:
|
|
618
|
+
if not all_results["quantifications"][pipeline_name].empty:
|
|
619
|
+
quant_file = output_dir / f"quantifications_{pipeline_name}.parquet"
|
|
620
|
+
save_prepared_data(
|
|
621
|
+
all_results["quantifications"][pipeline_name],
|
|
622
|
+
quant_file,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
# Save aggregations if requested
|
|
626
|
+
if "aggregation" in save_intermediate:
|
|
627
|
+
for pipeline_name in pipelines:
|
|
628
|
+
if all_results["aggregations"][pipeline_name]:
|
|
629
|
+
agg_file = output_dir / f"aggregations_{pipeline_name}.json"
|
|
630
|
+
with open(agg_file, "w") as f:
|
|
631
|
+
json.dump(all_results["aggregations"][pipeline_name], f, indent=2)
|
|
632
|
+
active_logger.info(f"Saved aggregations to {agg_file}")
|
|
633
|
+
|
|
634
|
+
if all_results["errors"]:
|
|
635
|
+
active_logger.warning(
|
|
636
|
+
f"ParaDigMa analysis completed with {len(all_results['errors'])} error(s)"
|
|
637
|
+
)
|
|
638
|
+
else:
|
|
639
|
+
active_logger.info(
|
|
640
|
+
"ParaDigMa analysis completed successfully for all pipelines"
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
# Log final summary for all pipelines
|
|
644
|
+
for pipeline_name in pipelines:
|
|
645
|
+
quant_df = all_results["quantifications"][pipeline_name]
|
|
646
|
+
if not quant_df.empty and "file_key" in quant_df.columns:
|
|
647
|
+
successful_files = np.unique(quant_df["file_key"].values)
|
|
648
|
+
active_logger.log(
|
|
649
|
+
DETAILED_INFO,
|
|
650
|
+
f"{pipeline_name.capitalize()}: Files successfully "
|
|
651
|
+
f"processed: {successful_files}",
|
|
652
|
+
)
|
|
653
|
+
elif not quant_df.empty:
|
|
654
|
+
active_logger.log(
|
|
655
|
+
DETAILED_INFO,
|
|
656
|
+
f"{pipeline_name.capitalize()}: Single file processed " f"successfully",
|
|
657
|
+
)
|
|
658
|
+
else:
|
|
659
|
+
active_logger.log(
|
|
660
|
+
DETAILED_INFO, f"{pipeline_name.capitalize()}: No successful results"
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
# Close file handler to release log file - remove from package logger
|
|
664
|
+
package_logger = logging.getLogger("paradigma")
|
|
665
|
+
for handler in package_logger.handlers[:]:
|
|
666
|
+
if isinstance(handler, logging.FileHandler):
|
|
667
|
+
handler.close()
|
|
668
|
+
package_logger.removeHandler(handler)
|
|
669
|
+
|
|
670
|
+
return all_results
|