PyPI - ml4t-diagnostic - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

ml4t/diagnostic/AGENT.md +25 -0
ml4t/diagnostic/__init__.py +166 -0
ml4t/diagnostic/backends/__init__.py +10 -0
ml4t/diagnostic/backends/adapter.py +192 -0
ml4t/diagnostic/backends/polars_backend.py +899 -0
ml4t/diagnostic/caching/__init__.py +40 -0
ml4t/diagnostic/caching/cache.py +331 -0
ml4t/diagnostic/caching/decorators.py +131 -0
ml4t/diagnostic/caching/smart_cache.py +339 -0
ml4t/diagnostic/config/AGENT.md +24 -0
ml4t/diagnostic/config/README.md +267 -0
ml4t/diagnostic/config/__init__.py +219 -0
ml4t/diagnostic/config/barrier_config.py +277 -0
ml4t/diagnostic/config/base.py +301 -0
ml4t/diagnostic/config/event_config.py +148 -0
ml4t/diagnostic/config/feature_config.py +404 -0
ml4t/diagnostic/config/multi_signal_config.py +55 -0
ml4t/diagnostic/config/portfolio_config.py +215 -0
ml4t/diagnostic/config/report_config.py +391 -0
ml4t/diagnostic/config/sharpe_config.py +202 -0
ml4t/diagnostic/config/signal_config.py +206 -0
ml4t/diagnostic/config/trade_analysis_config.py +310 -0
ml4t/diagnostic/config/validation.py +279 -0
ml4t/diagnostic/core/__init__.py +29 -0
ml4t/diagnostic/core/numba_utils.py +315 -0
ml4t/diagnostic/core/purging.py +372 -0
ml4t/diagnostic/core/sampling.py +471 -0
ml4t/diagnostic/errors/__init__.py +205 -0
ml4t/diagnostic/evaluation/AGENT.md +26 -0
ml4t/diagnostic/evaluation/__init__.py +437 -0
ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
ml4t/diagnostic/evaluation/dashboard.py +715 -0
ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
ml4t/diagnostic/evaluation/event_analysis.py +647 -0
ml4t/diagnostic/evaluation/excursion.py +390 -0
ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
ml4t/diagnostic/evaluation/framework.py +935 -0
ml4t/diagnostic/evaluation/metric_registry.py +255 -0
ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
ml4t/diagnostic/evaluation/multi_signal.py +550 -0
ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
ml4t/diagnostic/evaluation/report_generation.py +824 -0
ml4t/diagnostic/evaluation/signal_selector.py +452 -0
ml4t/diagnostic/evaluation/stat_registry.py +139 -0
ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
ml4t/diagnostic/evaluation/stats/moments.py +164 -0
ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
ml4t/diagnostic/evaluation/themes.py +330 -0
ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
ml4t/diagnostic/evaluation/validated_cv.py +535 -0
ml4t/diagnostic/evaluation/visualization.py +1050 -0
ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
ml4t/diagnostic/integration/__init__.py +48 -0
ml4t/diagnostic/integration/backtest_contract.py +671 -0
ml4t/diagnostic/integration/data_contract.py +316 -0
ml4t/diagnostic/integration/engineer_contract.py +226 -0
ml4t/diagnostic/logging/__init__.py +77 -0
ml4t/diagnostic/logging/logger.py +245 -0
ml4t/diagnostic/logging/performance.py +234 -0
ml4t/diagnostic/logging/progress.py +234 -0
ml4t/diagnostic/logging/wandb.py +412 -0
ml4t/diagnostic/metrics/__init__.py +9 -0
ml4t/diagnostic/metrics/percentiles.py +128 -0
ml4t/diagnostic/py.typed +1 -0
ml4t/diagnostic/reporting/__init__.py +43 -0
ml4t/diagnostic/reporting/base.py +130 -0
ml4t/diagnostic/reporting/html_renderer.py +275 -0
ml4t/diagnostic/reporting/json_renderer.py +51 -0
ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
ml4t/diagnostic/results/AGENT.md +24 -0
ml4t/diagnostic/results/__init__.py +105 -0
ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
ml4t/diagnostic/results/barrier_results/validation.py +38 -0
ml4t/diagnostic/results/base.py +177 -0
ml4t/diagnostic/results/event_results.py +349 -0
ml4t/diagnostic/results/feature_results.py +787 -0
ml4t/diagnostic/results/multi_signal_results.py +431 -0
ml4t/diagnostic/results/portfolio_results.py +281 -0
ml4t/diagnostic/results/sharpe_results.py +448 -0
ml4t/diagnostic/results/signal_results/__init__.py +74 -0
ml4t/diagnostic/results/signal_results/ic.py +581 -0
ml4t/diagnostic/results/signal_results/irtc.py +110 -0
ml4t/diagnostic/results/signal_results/quantile.py +392 -0
ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
ml4t/diagnostic/results/signal_results/turnover.py +213 -0
ml4t/diagnostic/results/signal_results/validation.py +147 -0
ml4t/diagnostic/signal/AGENT.md +17 -0
ml4t/diagnostic/signal/__init__.py +69 -0
ml4t/diagnostic/signal/_report.py +152 -0
ml4t/diagnostic/signal/_utils.py +261 -0
ml4t/diagnostic/signal/core.py +275 -0
ml4t/diagnostic/signal/quantile.py +148 -0
ml4t/diagnostic/signal/result.py +214 -0
ml4t/diagnostic/signal/signal_ic.py +129 -0
ml4t/diagnostic/signal/turnover.py +182 -0
ml4t/diagnostic/splitters/AGENT.md +19 -0
ml4t/diagnostic/splitters/__init__.py +36 -0
ml4t/diagnostic/splitters/base.py +501 -0
ml4t/diagnostic/splitters/calendar.py +421 -0
ml4t/diagnostic/splitters/calendar_config.py +91 -0
ml4t/diagnostic/splitters/combinatorial.py +1064 -0
ml4t/diagnostic/splitters/config.py +322 -0
ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
ml4t/diagnostic/splitters/group_isolation.py +329 -0
ml4t/diagnostic/splitters/persistence.py +316 -0
ml4t/diagnostic/splitters/utils.py +207 -0
ml4t/diagnostic/splitters/walk_forward.py +757 -0
ml4t/diagnostic/utils/__init__.py +42 -0
ml4t/diagnostic/utils/config.py +542 -0
ml4t/diagnostic/utils/dependencies.py +318 -0
ml4t/diagnostic/utils/sessions.py +127 -0
ml4t/diagnostic/validation/__init__.py +54 -0
ml4t/diagnostic/validation/dataframe.py +274 -0
ml4t/diagnostic/validation/returns.py +280 -0
ml4t/diagnostic/validation/timeseries.py +299 -0
ml4t/diagnostic/visualization/AGENT.md +19 -0
ml4t/diagnostic/visualization/__init__.py +223 -0
ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
ml4t/diagnostic/visualization/barrier_plots.py +782 -0
ml4t/diagnostic/visualization/core.py +1060 -0
ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
ml4t/diagnostic/visualization/dashboards/base.py +582 -0
ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
ml4t/diagnostic/visualization/dashboards.py +43 -0
ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
ml4t/diagnostic/visualization/feature_plots.py +888 -0
ml4t/diagnostic/visualization/interaction_plots.py +618 -0
ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
ml4t/diagnostic/visualization/report_generation.py +1343 -0
ml4t/diagnostic/visualization/signal/__init__.py +103 -0
ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0

ml4t/diagnostic/evaluation/stationarity/analysis.py ADDED Viewed

@@ -0,0 +1,518 @@
+"""Comprehensive stationarity analysis combining ADF, KPSS, and PP tests.
+This module provides unified stationarity analysis by combining multiple
+tests with consensus-based interpretation.
+Key Concept:
+    Different tests have different null hypotheses:
+    - ADF/PP: H0 = unit root (non-stationary), reject => stationary
+    - KPSS: H0 = stationary, reject => non-stationary
+    Strong evidence requires agreement between tests with opposite hypotheses.
+Consensus Logic:
+    - Strong stationary: All tests agree (ADF/PP reject, KPSS fails to reject)
+    - Likely stationary: 2/3 tests agree on stationarity
+    - Inconclusive: Tests evenly split (e.g., ADF/PP reject, KPSS rejects)
+    - Likely non-stationary: 2/3 tests agree on non-stationarity
+    - Strong non-stationary: All tests agree (ADF/PP fail to reject, KPSS rejects)
+"""
+from __future__ import annotations
+from typing import Literal
+import numpy as np
+import pandas as pd
+from ml4t.diagnostic.errors import ComputationError, ValidationError
+from ml4t.diagnostic.evaluation.stationarity.augmented_dickey_fuller import ADFResult, adf_test
+from ml4t.diagnostic.evaluation.stationarity.kpss_test import KPSSResult, kpss_test
+from ml4t.diagnostic.evaluation.stationarity.phillips_perron import (
+    PPResult,
+    _check_arch_available,
+    pp_test,
+)
+from ml4t.diagnostic.logging import get_logger
+logger = get_logger(__name__)
+class StationarityAnalysisResult:
+    """Comprehensive stationarity analysis combining ADF, KPSS, and PP tests.
+    Provides unified view of multiple stationarity tests with consensus interpretation.
+    Attributes:
+        adf_result: ADF test result (None if test not run or failed)
+        kpss_result: KPSS test result (None if test not run or failed)
+        pp_result: PP test result (None if test not run or failed)
+        consensus: Consensus interpretation of stationarity
+        summary_df: DataFrame with all test results in tabular form
+        agreement_score: Agreement between tests (0.0 to 1.0)
+        alpha: Significance level used for all tests
+        n_tests_run: Number of tests successfully completed
+    """
+    def __init__(
+        self,
+        adf_result: ADFResult | None = None,
+        kpss_result: KPSSResult | None = None,
+        pp_result: PPResult | None = None,
+        alpha: float = 0.05,
+    ):
+        """Initialize stationarity analysis result.
+        Args:
+            adf_result: ADF test result
+            kpss_result: KPSS test result
+            pp_result: PP test result
+            alpha: Significance level used
+        """
+        self.adf_result = adf_result
+        self.kpss_result = kpss_result
+        self.pp_result = pp_result
+        self.alpha = alpha
+        # Count number of tests run
+        self.n_tests_run = sum(
+            [
+                adf_result is not None,
+                kpss_result is not None,
+                pp_result is not None,
+            ]
+        )
+        # Calculate consensus and agreement
+        self.consensus = self._calculate_consensus()
+        self.agreement_score = self._calculate_agreement()
+        # Create summary DataFrame
+        self.summary_df = self._create_summary_df()
+    def _calculate_consensus(
+        self,
+    ) -> Literal[
+        "strong_stationary",
+        "likely_stationary",
+        "inconclusive",
+        "likely_nonstationary",
+        "strong_nonstationary",
+    ]:
+        """Calculate consensus interpretation from all tests.
+        Consensus Logic:
+            - Strong stationary: All tests agree stationary
+            - Likely stationary: 2/3 tests agree stationary
+            - Inconclusive: Tests evenly split or only 2 tests with disagreement
+            - Likely non-stationary: 2/3 tests agree non-stationary
+            - Strong non-stationary: All tests agree non-stationary
+        Returns:
+            Consensus interpretation
+        """
+        # Collect stationarity results
+        results = []
+        if self.adf_result is not None:
+            results.append(self.adf_result.is_stationary)
+        if self.kpss_result is not None:
+            results.append(self.kpss_result.is_stationary)
+        if self.pp_result is not None:
+            results.append(self.pp_result.is_stationary)
+        if len(results) == 0:
+            return "inconclusive"
+        # Count votes
+        stationary_votes = sum(results)
+        # Determine consensus
+        if len(results) == 3:
+            if stationary_votes == 3:
+                return "strong_stationary"
+            elif stationary_votes == 2:
+                return "likely_stationary"
+            elif stationary_votes == 1:
+                return "likely_nonstationary"
+            else:  # stationary_votes == 0
+                return "strong_nonstationary"
+        elif len(results) == 2:
+            if stationary_votes == 2:
+                return "likely_stationary"
+            elif stationary_votes == 0:
+                return "likely_nonstationary"
+            else:  # stationary_votes == 1 (disagreement)
+                return "inconclusive"
+        else:  # len(results) == 1
+            # Single test - use its result but label as "likely" not "strong"
+            if results[0]:
+                return "likely_stationary"
+            else:
+                return "likely_nonstationary"
+    def _calculate_agreement(self) -> float:
+        """Calculate agreement score between tests.
+        Agreement score ranges from 0.0 (complete disagreement) to 1.0 (complete agreement).
+        For 3 tests:
+            - All agree: 1.0
+            - 2 agree: 0.67
+            - None agree (all different): 0.33
+        For 2 tests:
+            - Both agree: 1.0
+            - Disagree: 0.0
+        For 1 test:
+            - Always 1.0 (no disagreement possible)
+        Returns:
+            Agreement score between 0.0 and 1.0
+        """
+        # Collect stationarity results
+        results = []
+        if self.adf_result is not None:
+            results.append(self.adf_result.is_stationary)
+        if self.kpss_result is not None:
+            results.append(self.kpss_result.is_stationary)
+        if self.pp_result is not None:
+            results.append(self.pp_result.is_stationary)
+        if len(results) <= 1:
+            return 1.0
+        # Count how many agree with majority
+        stationary_votes = sum(results)
+        majority_count = max(stationary_votes, len(results) - stationary_votes)
+        # Agreement score = proportion agreeing with majority
+        return majority_count / len(results)
+    def _create_summary_df(self) -> pd.DataFrame:
+        """Create summary DataFrame with all test results.
+        Returns:
+            DataFrame with columns: test_name, test_statistic, p_value,
+                                   is_stationary, conclusion, alpha
+        """
+        rows = []
+        # Add ADF results
+        if self.adf_result is not None:
+            rows.append(
+                {
+                    "test_name": "ADF",
+                    "test_statistic": self.adf_result.test_statistic,
+                    "p_value": self.adf_result.p_value,
+                    "is_stationary": self.adf_result.is_stationary,
+                    "conclusion": "Stationary"
+                    if self.adf_result.is_stationary
+                    else "Non-stationary",
+                    "alpha": self.alpha,
+                }
+            )
+        # Add KPSS results
+        if self.kpss_result is not None:
+            rows.append(
+                {
+                    "test_name": "KPSS",
+                    "test_statistic": self.kpss_result.test_statistic,
+                    "p_value": self.kpss_result.p_value,
+                    "is_stationary": self.kpss_result.is_stationary,
+                    "conclusion": "Stationary"
+                    if self.kpss_result.is_stationary
+                    else "Non-stationary",
+                    "alpha": self.alpha,
+                }
+            )
+        # Add PP results
+        if self.pp_result is not None:
+            rows.append(
+                {
+                    "test_name": "PP",
+                    "test_statistic": self.pp_result.test_statistic,
+                    "p_value": self.pp_result.p_value,
+                    "is_stationary": self.pp_result.is_stationary,
+                    "conclusion": "Stationary"
+                    if self.pp_result.is_stationary
+                    else "Non-stationary",
+                    "alpha": self.alpha,
+                }
+            )
+        return pd.DataFrame(rows)
+    def __repr__(self) -> str:
+        """String representation."""
+        return (
+            f"StationarityAnalysisResult("
+            f"consensus={self.consensus}, "
+            f"agreement={self.agreement_score:.2f}, "
+            f"n_tests={self.n_tests_run})"
+        )
+    def summary(self) -> str:
+        """Human-readable summary of comprehensive stationarity analysis."""
+        lines = [
+            "Comprehensive Stationarity Analysis",
+            "=" * 60,
+            f"Tests Run: {self.n_tests_run} | Significance Level: {self.alpha}",
+            "",
+        ]
+        # Individual test results
+        for name, res in [
+            ("ADF Test", self.adf_result),
+            ("KPSS Test", self.kpss_result),
+            ("PP Test", self.pp_result),
+        ]:
+            if res is not None:
+                status = "Stationary" if res.is_stationary else "Non-stationary"
+                lines.append(
+                    f"{name}: {status} (stat={res.test_statistic:.4f}, p={res.p_value:.4f})"
+                )
+        lines.append(
+            f"\nAgreement Score: {self.agreement_score:.2f} ({int(self.agreement_score * 100)}%)"
+        )
+        consensus_labels = {
+            "strong_stationary": "STRONG STATIONARY (all agree)",
+            "likely_stationary": "LIKELY STATIONARY (majority)",
+            "inconclusive": "INCONCLUSIVE (tests disagree)",
+            "likely_nonstationary": "LIKELY NON-STATIONARY (majority)",
+            "strong_nonstationary": "STRONG NON-STATIONARY (all agree)",
+        }
+        lines.append(f"Consensus: {consensus_labels[self.consensus]}")
+        # Interpretation guidance matching test expectations
+        lines.append("\nInterpretation:")
+        if self.consensus == "strong_stationary":
+            lines.append("  - Series exhibits strong evidence of stationarity")
+            lines.append("  - Safe to use in models requiring stationarity")
+        elif self.consensus == "likely_stationary":
+            lines.append("  - Series likely stationary, but some uncertainty")
+        elif self.consensus == "inconclusive":
+            lines.append("  - Tests provide conflicting evidence")
+            lines.append("  - Consider differencing or detrending")
+        elif self.consensus == "likely_nonstationary":
+            lines.append("  - Series likely has unit root")
+            lines.append("  - Apply differencing before modeling")
+        else:  # strong_nonstationary
+            lines.append("  - Series exhibits strong evidence of unit root")
+            lines.append("  - Requires differencing or cointegration approach")
+        return "\n".join(lines)
+def analyze_stationarity(
+    data: pd.Series | np.ndarray,
+    alpha: float = 0.05,
+    include_tests: list[Literal["adf", "kpss", "pp"]] | None = None,
+    **test_kwargs,
+) -> StationarityAnalysisResult:
+    """Perform comprehensive stationarity analysis with multiple tests.
+    Runs ADF, KPSS, and PP tests (or subset) and provides consensus interpretation
+    of stationarity. This is the recommended way to assess stationarity robustly.
+    Key Concept:
+        Different tests have different null hypotheses:
+        - ADF/PP: H0 = unit root (non-stationary), reject => stationary
+        - KPSS: H0 = stationary, reject => non-stationary
+        Strong evidence requires agreement between tests with opposite hypotheses.
+    Consensus Logic:
+        - Strong stationary: All tests agree (ADF/PP reject, KPSS fails to reject)
+        - Likely stationary: 2/3 tests agree on stationarity
+        - Inconclusive: Tests evenly split (e.g., ADF/PP reject, KPSS rejects)
+        - Likely non-stationary: 2/3 tests agree on non-stationarity
+        - Strong non-stationary: All tests agree (ADF/PP fail to reject, KPSS rejects)
+    Args:
+        data: Time series data to test (1D array or Series)
+        alpha: Significance level for all tests (default: 0.05)
+        include_tests: List of tests to run. If None, runs all available tests.
+                      Options: ["adf", "kpss", "pp"]. PP requires arch package.
+        **test_kwargs: Additional keyword arguments passed to individual tests.
+                      Common options:
+                      - regression: 'c', 'ct', 'n' (for ADF/KPSS/PP)
+                      - maxlag: int or None (for ADF)
+                      - autolag: 'AIC', 'BIC', 't-stat' or None (for ADF)
+                      - nlags: int, 'auto', or 'legacy' (for KPSS)
+                      - lags: int or None (for PP)
+    Returns:
+        StationarityAnalysisResult with all test results, consensus, and summary
+    Raises:
+        ValidationError: If data is invalid (empty, wrong shape, etc.)
+        ComputationError: If all tests fail to run
+    Example:
+        >>> import numpy as np
+        >>> from ml4t.diagnostic.evaluation.stationarity import analyze_stationarity
+        >>> white_noise = np.random.randn(1000)
+        >>> result = analyze_stationarity(white_noise)
+        >>> print(f"Consensus: {result.consensus}, Agreement: {result.agreement_score:.2%}")
+        >>> # With custom parameters
+        >>> result = analyze_stationarity(white_noise, regression="ct", include_tests=["adf", "kpss"])
+    Notes:
+        - White noise: strong_stationary; Random walk: strong_nonstationary
+        - PP test requires arch package (auto-skipped if unavailable)
+        - Individual results: adf_result, kpss_result, pp_result; tabular: summary_df
+    """
+    # Validate data first
+    if data is None:
+        raise ValidationError("Data cannot be None", context={"function": "analyze_stationarity"})
+    # Convert to numpy array for validation
+    if isinstance(data, pd.Series):
+        arr = data.to_numpy()
+    elif isinstance(data, np.ndarray):
+        arr = data
+    else:
+        raise ValidationError(
+            f"Data must be pandas Series or numpy array, got {type(data)}",
+            context={"function": "analyze_stationarity", "data_type": type(data).__name__},
+        )
+    if arr.ndim != 1:
+        raise ValidationError(
+            f"Data must be 1-dimensional, got {arr.ndim}D",
+            context={"function": "analyze_stationarity", "shape": arr.shape},
+        )
+    if len(arr) == 0:
+        raise ValidationError(
+            "Data cannot be empty", context={"function": "analyze_stationarity", "length": 0}
+        )
+    # Determine which tests to run
+    if include_tests is None:
+        # Run all available tests
+        tests_to_run = ["adf", "kpss"]
+        if _check_arch_available():
+            tests_to_run.append("pp")
+        else:
+            logger.info(
+                "PP test not available (arch package not installed), running ADF and KPSS only"
+            )
+    else:
+        # Validate test names
+        valid_tests: set[str] = {"adf", "kpss", "pp"}
+        provided_tests: set[str] = set(include_tests)
+        invalid = provided_tests - valid_tests
+        if invalid:
+            raise ValidationError(
+                f"Invalid test names: {invalid}. Valid options: {valid_tests}",
+                context={"function": "analyze_stationarity", "include_tests": include_tests},
+            )
+        tests_to_run = list(include_tests)
+        # Warn if PP requested but not available
+        if "pp" in tests_to_run and not _check_arch_available():
+            logger.warning(
+                "PP test requested but arch package not installed - skipping PP test. "
+                "Install with: pip install arch or pip install ml4t-diagnostic[advanced]"
+            )
+            tests_to_run = [t for t in tests_to_run if t != "pp"]
+    if len(tests_to_run) == 0:
+        raise ValidationError(
+            "No valid tests to run",
+            context={"function": "analyze_stationarity", "include_tests": include_tests},
+        )
+    logger.info(
+        "Running comprehensive stationarity analysis",
+        n_obs=len(arr),
+        tests=tests_to_run,
+        alpha=alpha,
+    )
+    # Run tests and collect results
+    adf_result = None
+    kpss_result = None
+    pp_result = None
+    failed_tests = []
+    # Define test configurations: (test_name, test_func, param_keys)
+    test_configs = {
+        "adf": (adf_test, ["maxlag", "regression", "autolag"]),
+        "kpss": (kpss_test, ["regression", "nlags"]),
+        "pp": (pp_test, ["lags", "regression", "test_type"]),
+    }
+    for test_name in tests_to_run:
+        test_func, param_keys = test_configs[test_name]
+        params = {k: test_kwargs[k] for k in param_keys if k in test_kwargs}
+        # KPSS only supports 'c' and 'ct' regression
+        if (
+            test_name == "kpss"
+            and "regression" in params
+            and params["regression"] not in ("c", "ct")
+        ):
+            logger.warning(f"KPSS does not support regression='{params['regression']}', using 'c'")
+            params.pop("regression")
+        try:
+            result = test_func(data, **params)
+            logger.info(f"{test_name.upper()} test completed", stationary=result.is_stationary)
+            if test_name == "adf":
+                adf_result = result
+            elif test_name == "kpss":
+                kpss_result = result
+            else:
+                pp_result = result
+        except Exception as e:
+            logger.error(f"{test_name.upper()} test failed", error=str(e))
+            failed_tests.append((test_name.upper(), str(e)))
+    # Check if at least one test succeeded
+    n_succeeded = sum([adf_result is not None, kpss_result is not None, pp_result is not None])
+    if n_succeeded == 0:
+        # All tests failed
+        error_msg = "All stationarity tests failed:\n"
+        for test_name, error in failed_tests:
+            error_msg += f"  - {test_name}: {error}\n"
+        raise ComputationError(
+            error_msg.strip(),
+            context={
+                "function": "analyze_stationarity",
+                "n_obs": len(arr),
+                "tests_attempted": tests_to_run,
+            },
+        )
+    # Log warnings for failed tests
+    if failed_tests:
+        logger.warning(
+            f"{len(failed_tests)} test(s) failed but {n_succeeded} succeeded",
+            failed_tests=[t[0] for t in failed_tests],
+        )
+    # Create analysis result
+    result = StationarityAnalysisResult(
+        adf_result=adf_result,
+        kpss_result=kpss_result,
+        pp_result=pp_result,
+        alpha=alpha,
+    )
+    logger.info(
+        "Stationarity analysis completed",
+        n_tests_run=result.n_tests_run,
+        consensus=result.consensus,
+        agreement=result.agreement_score,
+    )
+    return result