paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,670 @@
1
+ """
2
+ High-level pipeline orchestrator for ParaDigMa toolbox.
3
+
4
+ This module provides the main entry point for running analysis pipelines:
5
+
6
+ Main Function
7
+ -------------
8
+ - run_paradigma(): Complete pipeline from data loading/preparation
9
+ to aggregated results. Main entry point for end-to-end analysis
10
+ supporting multiple pipelines (gait, tremor, pulse_rate).
11
+ Can process raw data from disk or already-prepared DataFrames.
12
+
13
+ The orchestrator coordinates:
14
+ 1. Data loading and preparation (unit conversion, resampling, orientation correction)
15
+ 2. Pipeline execution on single or multiple files (imports from pipeline modules)
16
+ 3. Result aggregation across files and segments
17
+ 4. Optional intermediate result storage
18
+
19
+ Supports multi-file processing with automatic segment numbering and metadata tracking.
20
+ """
21
+
22
+ import json
23
+ import logging
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+
27
+ import numpy as np
28
+ import pandas as pd
29
+
30
+ from paradigma.config import (
31
+ GaitConfig,
32
+ IMUConfig,
33
+ PPGConfig,
34
+ PulseRateConfig,
35
+ TremorConfig,
36
+ )
37
+ from paradigma.constants import DataColumns, TimeUnit
38
+ from paradigma.load import (
39
+ get_data_file_paths,
40
+ load_single_data_file,
41
+ save_prepared_data,
42
+ )
43
+ from paradigma.pipelines.gait_pipeline import (
44
+ aggregate_arm_swing_params,
45
+ run_gait_pipeline,
46
+ )
47
+ from paradigma.pipelines.pulse_rate_pipeline import (
48
+ aggregate_pulse_rate,
49
+ run_pulse_rate_pipeline,
50
+ )
51
+ from paradigma.pipelines.tremor_pipeline import aggregate_tremor, run_tremor_pipeline
52
+ from paradigma.prepare_data import prepare_raw_data
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+ # Custom logging level for detailed info (between INFO=20 and DEBUG=10)
57
+ DETAILED_INFO = 15
58
+ logging.addLevelName(DETAILED_INFO, "DETAILED")
59
+
60
+
61
+ def run_paradigma(
62
+ *,
63
+ data_path: str | Path | None = None,
64
+ dfs: pd.DataFrame | list[pd.DataFrame] | dict[str, pd.DataFrame] | None = None,
65
+ save_intermediate: list[str] = [],
66
+ output_dir: str | Path = "./output",
67
+ skip_preparation: bool = False,
68
+ pipelines: list[str] | str | None = None,
69
+ watch_side: str | None = None,
70
+ accelerometer_units: str = "g",
71
+ gyroscope_units: str = "deg/s",
72
+ time_input_unit: TimeUnit = TimeUnit.RELATIVE_S,
73
+ target_frequency: float = 100.0,
74
+ column_mapping: dict[str, str] | None = None,
75
+ device_orientation: list[str] | None = ["x", "y", "z"],
76
+ file_pattern: str | list[str] | None = None,
77
+ aggregates: list[str] | None = None,
78
+ segment_length_bins: list[str] | None = None,
79
+ split_by_gaps: bool = False,
80
+ max_gap_seconds: float | None = None,
81
+ min_segment_seconds: float | None = None,
82
+ imu_config: IMUConfig | None = None,
83
+ ppg_config: PPGConfig | None = None,
84
+ gait_config: GaitConfig | None = None,
85
+ arm_activity_config: GaitConfig | None = None,
86
+ tremor_config: TremorConfig | None = None,
87
+ pulse_rate_config: PulseRateConfig | None = None,
88
+ logging_level: int = logging.INFO,
89
+ custom_logger: logging.Logger | None = None,
90
+ ) -> dict[str, pd.DataFrame | dict]:
91
+ """
92
+ Complete ParaDigMa analysis pipeline from data loading to aggregated results.
93
+
94
+ This is the main entry point for ParaDigMa analysis. It supports
95
+ multiple pipeline types:
96
+ - gait: Arm swing during gait analysis
97
+ - tremor: Tremor detection and quantification
98
+ - pulse_rate: Pulse rate estimation from PPG signals
99
+
100
+ The function:
101
+ 1. Loads data files from the specified directory or uses provided DataFrame
102
+ 2. Prepares raw data if needed (unit conversion, resampling, etc.)
103
+ 3. Runs the specified pipeline on each data file
104
+ 4. Aggregates results across all data files
105
+
106
+ Parameters
107
+ ----------
108
+ data_path : str or Path, optional
109
+ Path to directory containing data files.
110
+ dfs : DataFrame, list of DataFrames, or dict of DataFrames, optional
111
+ Dataframes used as input (bypasses data loading). Can be:
112
+ - Single DataFrame: Will be processed as one file with key 'df_1'.
113
+ - List[DataFrame]: Multiple dataframes assigned IDs as 'df_1', 'df_2', etc.
114
+ - Dict[str, DataFrame]: Keys are file names, values are dataframes.
115
+ Note: The 'file_key' column is only added to quantification results when
116
+ len(dfs) > 1, allowing cleaner output for single-file processing.
117
+ See input_formats guide for details.
118
+ save_intermediate : list of str, default []
119
+ Which intermediate results to store. Valid values:
120
+ - 'preparation': Save prepared data
121
+ - 'preprocessing': Save preprocessed signals
122
+ - 'classification': Save classification results
123
+ - 'quantification': Save quantified measures
124
+ - 'aggregation': Save aggregated results
125
+ If empty, no files are saved (results are only returned).
126
+ output_dir : str or Path, default './output'
127
+ Output directory for all results. Files are only saved if
128
+ save_intermediate is not empty.
129
+ skip_preparation : bool, default False
130
+ Whether data is already prepared. If False, data will be
131
+ prepared (unit conversion, resampling, etc.). If True,
132
+ assumes data is already in the required format.
133
+ pipelines : list of str or str, optional
134
+ Pipelines to run: 'gait', 'tremor', and/or 'pulse_rate'.
135
+ If providing a list, currently only tremor and gait pipelines
136
+ can be run together.
137
+ watch_side : str, optional
138
+ Watch side: 'left' or 'right' (required for gait pipeline).
139
+ accelerometer_units : str, default 'm/s^2'
140
+ Units for accelerometer data.
141
+ gyroscope_units : str, default 'deg/s'
142
+ Units for gyroscope data.
143
+ time_input_unit : TimeUnit, default TimeUnit.RELATIVE_S
144
+ Input time unit type.
145
+ target_frequency : float, default 100.0
146
+ Target sampling frequency for resampling.
147
+ column_mapping : dict, optional
148
+ Custom column name mapping.
149
+ device_orientation : list of str, optional
150
+ Custom device orientation corrections.
151
+ file_pattern : str or list of str, optional
152
+ File pattern(s) to match when loading data (e.g., 'parquet', '*.csv').
153
+ aggregates : list of str, optional
154
+ Aggregation methods for quantification.
155
+ segment_length_bins : list of str, optional
156
+ Duration bins for gait segment aggregation (gait pipeline only).
157
+ Example: ['(0, 10)', '(10, 20)'] for segments 0-10s and 10-20s.
158
+ split_by_gaps : bool, default False
159
+ If True, automatically split non-contiguous data into segments
160
+ during preparation.
161
+ Adds 'data_segment_nr' column to prepared data which is preserved
162
+ through pipeline.
163
+ Useful for handling data with gaps/interruptions.
164
+ max_gap_seconds : float, optional
165
+ Maximum gap (seconds) before starting new segment. Used when split_by_gaps=True.
166
+ Defaults to 1.5s.
167
+ min_segment_seconds : float, optional
168
+ Minimum segment length (seconds) to keep. Used when split_by_gaps=True.
169
+ Defaults to 1.5s.
170
+ imu_config : IMUConfig, optional
171
+ IMU preprocessing configuration.
172
+ ppg_config : PPGConfig, optional
173
+ PPG preprocessing configuration.
174
+ gait_config : GaitConfig, optional
175
+ Gait analysis configuration.
176
+ arm_activity_config : GaitConfig, optional
177
+ Arm activity analysis configuration.
178
+ tremor_config : TremorConfig, optional
179
+ Tremor analysis configuration.
180
+ pulse_rate_config : PulseRateConfig, optional
181
+ Pulse rate analysis configuration.
182
+ logging_level : int, default logging.INFO
183
+ Logging level using standard logging constants:
184
+ - logging.ERROR: Only errors
185
+ - logging.WARNING: Warnings and errors
186
+ - logging.INFO: Basic progress information (default)
187
+ - logging.DEBUG: Detailed debug information
188
+ Can also use DETAILED_INFO (15) for intermediate detail level.
189
+ custom_logger : logging.Logger, optional
190
+ Custom logger instance. If provided, logging_level is ignored.
191
+ Allows full control over logging configuration.
192
+
193
+ Returns
194
+ -------
195
+ dict
196
+ Complete analysis results with nested structure for multiple pipelines:
197
+ - 'quantifications': dict with pipeline names as keys and DataFrames as values
198
+ - 'aggregations': dict with pipeline names as keys and result dicts as values
199
+ - 'metadata': dict with pipeline names as keys and metadata dicts as values
200
+ - 'errors': list of dicts tracking any errors that occurred during processing.
201
+ Each error dict contains 'stage', 'error', and optionally 'file' and
202
+ 'pipeline'.
203
+ Empty list indicates successful processing of all files.
204
+ """
205
+ if (data_path is None) == (dfs is None):
206
+ raise ValueError("Exactly one of data_path or dfs must be provided")
207
+
208
+ if isinstance(pipelines, str):
209
+ pipelines = [pipelines]
210
+
211
+ if len(pipelines) > 1 and "pulse_rate" in pipelines:
212
+ raise ValueError(
213
+ "Pulse rate pipeline cannot be run together with other pipelines"
214
+ )
215
+
216
+ if any(p not in ["gait", "tremor", "pulse_rate"] for p in pipelines):
217
+ raise ValueError(
218
+ f"At least one unknown pipeline provided: {pipelines}. "
219
+ f"Supported pipelines: 'gait', 'tremor', 'pulse_rate'"
220
+ )
221
+
222
+ # Use custom logger if provided, otherwise use module logger
223
+ active_logger = custom_logger if custom_logger is not None else logger
224
+
225
+ # Get package logger for configuration (affects all paradigma.* modules)
226
+ package_logger = logging.getLogger("paradigma")
227
+
228
+ # Configure package-wide logging level for all paradigma modules
229
+ if custom_logger is None:
230
+ package_logger.setLevel(logging_level)
231
+
232
+ if data_path is not None:
233
+ data_path = Path(data_path)
234
+ active_logger.info(f"Applying ParaDigMa pipelines to {data_path}")
235
+ else:
236
+ active_logger.info("Applying ParaDigMa pipelines to provided DataFrame")
237
+
238
+ # Convert and create output directory
239
+ output_dir = Path(output_dir)
240
+ output_dir.mkdir(parents=True, exist_ok=True)
241
+
242
+ # Setup logging to file - add handler to package logger so ALL paradigma modules
243
+ # log to file
244
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M")
245
+ log_file = output_dir / f"paradigma_run_{timestamp}.log"
246
+ file_handler = logging.FileHandler(log_file)
247
+ file_handler.setLevel(logging.INFO)
248
+ file_handler.setFormatter(
249
+ logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
250
+ )
251
+ package_logger.addHandler(file_handler)
252
+ active_logger.info(f"Logging to {log_file}")
253
+
254
+ # Step 1: Get file paths or convert provided DataFrames
255
+ file_paths = None # Will hold list of file paths if loading from directory
256
+ dfs_dict = None # Will hold dict of DataFrames if provided directly
257
+
258
+ if data_path is not None:
259
+ active_logger.info("Step 1: Finding data files")
260
+ try:
261
+ file_paths = get_data_file_paths(
262
+ data_path=data_path, file_patterns=file_pattern
263
+ )
264
+ except Exception as e:
265
+ active_logger.error(f"Failed to find data files: {e}")
266
+ raise
267
+
268
+ if not file_paths:
269
+ raise ValueError(f"No data files found in {data_path}")
270
+ else:
271
+ active_logger.info("Step 1: Using provided DataFrame(s) as input")
272
+
273
+ # Convert provided dfs to dict format
274
+ if isinstance(dfs, list):
275
+ dfs_dict = {f"df_{i}": df for i, df in enumerate(dfs, start=1)}
276
+ elif isinstance(dfs, pd.DataFrame):
277
+ dfs_dict = {"df_1": dfs}
278
+ else:
279
+ dfs_dict = dfs
280
+
281
+ # Determine number of files to process
282
+ num_files = len(file_paths) if file_paths else len(dfs_dict)
283
+
284
+ # Initialize results storage for each pipeline
285
+ all_results = {
286
+ "quantifications": {p: [] for p in pipelines},
287
+ "aggregations": {p: {} for p in pipelines},
288
+ "metadata": {p: {} for p in pipelines},
289
+ "errors": [],
290
+ }
291
+
292
+ # Steps 2-3: Process each file individually
293
+ active_logger.info(f"Steps 2-3: Processing {num_files} files individually")
294
+
295
+ # Track maximum gait segment number across files for proper offset
296
+ max_gait_segment_nr = 0
297
+
298
+ for i in range(num_files):
299
+ # Load one file at a time
300
+ if file_paths:
301
+ file_path = file_paths[i]
302
+ active_logger.info(f"Processing file {i+1}/{num_files}: {file_path.name}")
303
+ try:
304
+ file_name, df_raw = load_single_data_file(file_path)
305
+ except Exception as e:
306
+ error_msg = f"Failed to load file {file_path.name}: {e}"
307
+ active_logger.error(error_msg)
308
+ all_results["errors"].append(
309
+ {"file": file_path.name, "stage": "loading", "error": str(e)}
310
+ )
311
+ continue
312
+ else:
313
+ # Using in-memory data
314
+ file_name = list(dfs_dict.keys())[i]
315
+ df_raw = dfs_dict[file_name]
316
+ active_logger.info(f"Processing DataFrame {i+1}/{num_files}: {file_name}")
317
+
318
+ try:
319
+ # Step 2: Prepare data (if needed)
320
+ if not skip_preparation:
321
+ active_logger.log(DETAILED_INFO, f"Preparing data for {file_name}")
322
+
323
+ prepare_params = {
324
+ "time_input_unit": time_input_unit,
325
+ "resampling_frequency": target_frequency,
326
+ "column_mapping": column_mapping,
327
+ "auto_segment": split_by_gaps,
328
+ "max_segment_gap_s": max_gap_seconds,
329
+ "min_segment_length_s": min_segment_seconds,
330
+ }
331
+
332
+ # Add pipeline-specific preparation parameters
333
+ if "gait" in pipelines or "tremor" in pipelines:
334
+ prepare_params["gyroscope_units"] = gyroscope_units
335
+
336
+ if "gait" in pipelines:
337
+ prepare_params.update(
338
+ {
339
+ "accelerometer_units": accelerometer_units,
340
+ "device_orientation": device_orientation,
341
+ }
342
+ )
343
+
344
+ df_prepared = prepare_raw_data(df=df_raw, **prepare_params)
345
+
346
+ # Save prepared data if requested
347
+ if "preparation" in save_intermediate:
348
+ prepared_dir = output_dir / "prepared_data"
349
+ prepared_dir.mkdir(exist_ok=True)
350
+ save_prepared_data(
351
+ df_prepared,
352
+ prepared_dir / f"{file_name}.parquet",
353
+ )
354
+ else:
355
+ df_prepared = df_raw
356
+
357
+ # Release raw data from memory
358
+ del df_raw
359
+
360
+ # Step 3: Run each pipeline on this single file
361
+ store_intermediate_per_file = [
362
+ x
363
+ for x in save_intermediate
364
+ if x not in ["aggregation", "quantification"]
365
+ ]
366
+
367
+ # Create file-specific output directory
368
+ file_output_dir = output_dir / "individual_files" / file_name
369
+
370
+ for pipeline_name in pipelines:
371
+ active_logger.log(
372
+ DETAILED_INFO, f"Running {pipeline_name} pipeline on {file_name}"
373
+ )
374
+
375
+ try:
376
+ if pipeline_name == "gait":
377
+ quantification_data, quantification_metadata = (
378
+ run_gait_pipeline(
379
+ df_prepared=df_prepared,
380
+ watch_side=watch_side,
381
+ imu_config=imu_config,
382
+ gait_config=gait_config,
383
+ arm_activity_config=arm_activity_config,
384
+ store_intermediate=store_intermediate_per_file,
385
+ output_dir=file_output_dir,
386
+ segment_number_offset=max_gait_segment_nr,
387
+ logging_level=logging_level,
388
+ custom_logger=active_logger,
389
+ )
390
+ )
391
+
392
+ if len(quantification_data) > 0:
393
+ # Add file identifier if processing multiple files
394
+ quantification_data = quantification_data.copy()
395
+ if num_files > 1:
396
+ quantification_data["file_key"] = file_name
397
+ all_results["quantifications"][pipeline_name].append(
398
+ quantification_data
399
+ )
400
+
401
+ # Update max segment number for next file
402
+ max_gait_segment_nr = int(
403
+ quantification_data["gait_segment_nr"].max()
404
+ )
405
+
406
+ # Store metadata and update offset even if no quantifications
407
+ if (
408
+ quantification_metadata
409
+ and "per_segment" in quantification_metadata
410
+ ):
411
+ all_results["metadata"][pipeline_name].update(
412
+ quantification_metadata["per_segment"]
413
+ )
414
+
415
+ # Update max segment number based on metadata to prevent
416
+ # overwrites
417
+ if quantification_metadata["per_segment"]:
418
+ max_segment_in_metadata = max(
419
+ quantification_metadata["per_segment"].keys()
420
+ )
421
+ max_gait_segment_nr = max(
422
+ max_gait_segment_nr, max_segment_in_metadata
423
+ )
424
+
425
+ elif pipeline_name == "tremor":
426
+ quantification_data = run_tremor_pipeline(
427
+ df_prepared=df_prepared,
428
+ store_intermediate=store_intermediate_per_file,
429
+ output_dir=file_output_dir,
430
+ tremor_config=tremor_config,
431
+ imu_config=imu_config,
432
+ logging_level=logging_level,
433
+ custom_logger=active_logger,
434
+ )
435
+
436
+ if len(quantification_data) > 0:
437
+ quantification_data = quantification_data.copy()
438
+ if num_files > 1:
439
+ quantification_data["file_key"] = file_name
440
+ all_results["quantifications"][pipeline_name].append(
441
+ quantification_data
442
+ )
443
+
444
+ elif pipeline_name == "pulse_rate":
445
+ quantification_data = run_pulse_rate_pipeline(
446
+ df_ppg_prepared=df_prepared,
447
+ store_intermediate=store_intermediate_per_file,
448
+ output_dir=file_output_dir,
449
+ pulse_rate_config=pulse_rate_config,
450
+ ppg_config=ppg_config,
451
+ logging_level=logging_level,
452
+ custom_logger=active_logger,
453
+ )
454
+
455
+ if len(quantification_data) > 0:
456
+ quantification_data = quantification_data.copy()
457
+ if num_files > 1:
458
+ quantification_data["file_key"] = file_name
459
+ all_results["quantifications"][pipeline_name].append(
460
+ quantification_data
461
+ )
462
+
463
+ except Exception as e:
464
+ error_msg = (
465
+ f"Failed to run {pipeline_name} pipeline on {file_name}: {e}"
466
+ )
467
+ active_logger.error(error_msg)
468
+ all_results["errors"].append(
469
+ {
470
+ "file": file_name,
471
+ "pipeline": pipeline_name,
472
+ "stage": "pipeline_execution",
473
+ "error": str(e),
474
+ }
475
+ )
476
+ continue
477
+
478
+ # Release prepared data from memory
479
+ del df_prepared
480
+
481
+ except Exception as e:
482
+ error_msg = f"Failed to process file {file_name}: {e}"
483
+ active_logger.error(error_msg)
484
+ all_results["errors"].append(
485
+ {"file": file_name, "stage": "preparation", "error": str(e)}
486
+ )
487
+ continue
488
+
489
+ # Step 4: Combine quantifications from all files
490
+ active_logger.info("Step 4: Combining quantifications from all files")
491
+
492
+ for pipeline_name in pipelines:
493
+ # Concatenate all quantifications for this pipeline
494
+ if all_results["quantifications"][pipeline_name]:
495
+ combined_quantified = pd.concat(
496
+ all_results["quantifications"][pipeline_name], ignore_index=True
497
+ )
498
+
499
+ num_files_processed = len(all_results["quantifications"][pipeline_name])
500
+ all_results["quantifications"][pipeline_name] = combined_quantified
501
+
502
+ active_logger.info(
503
+ f"{pipeline_name.capitalize()}: Combined "
504
+ f"{len(combined_quantified)} windows from "
505
+ f"{num_files_processed} files"
506
+ )
507
+
508
+ # Step 5: Perform aggregation on combined results FROM ALL FILES
509
+ try:
510
+ if pipeline_name == "gait" and all_results["metadata"][pipeline_name]:
511
+ active_logger.info(
512
+ "Step 5: Aggregating gait results across ALL files"
513
+ )
514
+
515
+ if segment_length_bins is None:
516
+ gait_segment_categories = [
517
+ (0, 10),
518
+ (10, 20),
519
+ (20, np.inf),
520
+ (0, np.inf),
521
+ ]
522
+ else:
523
+ gait_segment_categories = segment_length_bins
524
+
525
+ if aggregates is None:
526
+ agg_methods = ["median", "95p", "cov"]
527
+ else:
528
+ agg_methods = aggregates
529
+
530
+ aggregations = aggregate_arm_swing_params(
531
+ df_arm_swing_params=combined_quantified,
532
+ segment_meta=all_results["metadata"][pipeline_name],
533
+ segment_cats=gait_segment_categories,
534
+ aggregates=agg_methods,
535
+ )
536
+ all_results["aggregations"][pipeline_name] = aggregations
537
+ active_logger.info(
538
+ f"Aggregation completed across "
539
+ f"{len(gait_segment_categories)} gait segment categories"
540
+ )
541
+
542
+ elif pipeline_name == "tremor":
543
+ active_logger.info(
544
+ "Step 5: Aggregating tremor results across ALL files"
545
+ )
546
+
547
+ # Work on a copy for tremor aggregation
548
+ tremor_data_for_aggregation = combined_quantified.copy()
549
+
550
+ # Need to add datetime column for aggregate_tremor
551
+ if (
552
+ "time_dt" not in tremor_data_for_aggregation.columns
553
+ and "time" in tremor_data_for_aggregation.columns
554
+ ):
555
+ tremor_data_for_aggregation["time_dt"] = pd.to_datetime(
556
+ tremor_data_for_aggregation["time"], unit="s"
557
+ )
558
+
559
+ if tremor_config is None:
560
+ tremor_config = TremorConfig()
561
+
562
+ aggregation_output = aggregate_tremor(
563
+ tremor_data_for_aggregation, tremor_config
564
+ )
565
+ all_results["aggregations"][pipeline_name] = aggregation_output[
566
+ "aggregated_tremor_measures"
567
+ ]
568
+ all_results["metadata"][pipeline_name] = aggregation_output[
569
+ "metadata"
570
+ ]
571
+ active_logger.info("Tremor aggregation completed")
572
+
573
+ elif pipeline_name == "pulse_rate":
574
+ active_logger.info(
575
+ "Step 5: Aggregating pulse rate results across ALL files"
576
+ )
577
+
578
+ pulse_rate_values = (
579
+ combined_quantified[DataColumns.PULSE_RATE].dropna().values
580
+ )
581
+
582
+ if len(pulse_rate_values) > 0:
583
+ aggregation_output = aggregate_pulse_rate(
584
+ pr_values=pulse_rate_values,
585
+ aggregates=aggregates if aggregates else ["mode", "99p"],
586
+ )
587
+ all_results["aggregations"][pipeline_name] = aggregation_output[
588
+ "pr_aggregates"
589
+ ]
590
+ all_results["metadata"][pipeline_name] = aggregation_output[
591
+ "metadata"
592
+ ]
593
+ active_logger.info(
594
+ f"Pulse rate aggregation completed with "
595
+ f"{len(pulse_rate_values)} valid estimates"
596
+ )
597
+ else:
598
+ active_logger.warning(
599
+ "No valid pulse rate estimates found for aggregation"
600
+ )
601
+
602
+ except Exception as e:
603
+ error_msg = f"Failed to aggregate {pipeline_name} results: {e}"
604
+ active_logger.error(error_msg)
605
+ all_results["errors"].append(
606
+ {"pipeline": pipeline_name, "stage": "aggregation", "error": str(e)}
607
+ )
608
+ all_results["aggregations"][pipeline_name] = {}
609
+
610
+ else:
611
+ # No quantifications found for this pipeline
612
+ all_results["quantifications"][pipeline_name] = pd.DataFrame()
613
+ active_logger.warning(f"No quantified {pipeline_name} results found")
614
+
615
+ # Save combined quantifications if requested
616
+ if "quantification" in save_intermediate:
617
+ for pipeline_name in pipelines:
618
+ if not all_results["quantifications"][pipeline_name].empty:
619
+ quant_file = output_dir / f"quantifications_{pipeline_name}.parquet"
620
+ save_prepared_data(
621
+ all_results["quantifications"][pipeline_name],
622
+ quant_file,
623
+ )
624
+
625
+ # Save aggregations if requested
626
+ if "aggregation" in save_intermediate:
627
+ for pipeline_name in pipelines:
628
+ if all_results["aggregations"][pipeline_name]:
629
+ agg_file = output_dir / f"aggregations_{pipeline_name}.json"
630
+ with open(agg_file, "w") as f:
631
+ json.dump(all_results["aggregations"][pipeline_name], f, indent=2)
632
+ active_logger.info(f"Saved aggregations to {agg_file}")
633
+
634
+ if all_results["errors"]:
635
+ active_logger.warning(
636
+ f"ParaDigMa analysis completed with {len(all_results['errors'])} error(s)"
637
+ )
638
+ else:
639
+ active_logger.info(
640
+ "ParaDigMa analysis completed successfully for all pipelines"
641
+ )
642
+
643
+ # Log final summary for all pipelines
644
+ for pipeline_name in pipelines:
645
+ quant_df = all_results["quantifications"][pipeline_name]
646
+ if not quant_df.empty and "file_key" in quant_df.columns:
647
+ successful_files = np.unique(quant_df["file_key"].values)
648
+ active_logger.log(
649
+ DETAILED_INFO,
650
+ f"{pipeline_name.capitalize()}: Files successfully "
651
+ f"processed: {successful_files}",
652
+ )
653
+ elif not quant_df.empty:
654
+ active_logger.log(
655
+ DETAILED_INFO,
656
+ f"{pipeline_name.capitalize()}: Single file processed " f"successfully",
657
+ )
658
+ else:
659
+ active_logger.log(
660
+ DETAILED_INFO, f"{pipeline_name.capitalize()}: No successful results"
661
+ )
662
+
663
+ # Close file handler to release log file - remove from package logger
664
+ package_logger = logging.getLogger("paradigma")
665
+ for handler in package_logger.handlers[:]:
666
+ if isinstance(handler, logging.FileHandler):
667
+ handler.close()
668
+ package_logger.removeHandler(handler)
669
+
670
+ return all_results