ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
"""Autocorrelation analysis for time series features.
|
|
2
|
+
|
|
3
|
+
Provides ACF (autocorrelation function) and PACF (partial autocorrelation function)
|
|
4
|
+
analysis with confidence intervals and ARIMA order suggestions.
|
|
5
|
+
|
|
6
|
+
Key Functions:
|
|
7
|
+
compute_acf: Autocorrelation function with confidence intervals
|
|
8
|
+
compute_pacf: Partial autocorrelation function with confidence intervals
|
|
9
|
+
analyze_autocorrelation: Combined ACF/PACF analysis with ARIMA order suggestion
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, Literal, cast
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from statsmodels.tsa.stattools import acf, pacf
|
|
19
|
+
|
|
20
|
+
from ml4t.diagnostic.errors import ComputationError, ValidationError
|
|
21
|
+
from ml4t.diagnostic.logging import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# Result Class
|
|
28
|
+
# =============================================================================
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CorrelationResult:
|
|
32
|
+
"""Results from autocorrelation (ACF) or partial autocorrelation (PACF) analysis.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
values: Correlation coefficients for each lag (length: nlags+1).
|
|
36
|
+
values[0] = 1.0 (correlation with itself).
|
|
37
|
+
conf_int: Confidence intervals (shape: (nlags+1, 2)).
|
|
38
|
+
lags: Lag indices (0, 1, 2, ..., nlags).
|
|
39
|
+
alpha: Significance level for confidence intervals.
|
|
40
|
+
n_obs: Number of observations used.
|
|
41
|
+
method: Estimation method used.
|
|
42
|
+
kind: Type of correlation ('acf' or 'pacf').
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
values: np.ndarray | None = None,
|
|
48
|
+
conf_int: np.ndarray | None = None,
|
|
49
|
+
lags: np.ndarray | None = None,
|
|
50
|
+
alpha: float = 0.05,
|
|
51
|
+
n_obs: int = 0,
|
|
52
|
+
method: str = "standard",
|
|
53
|
+
kind: Literal["acf", "pacf"] | None = None,
|
|
54
|
+
# Backward compat aliases
|
|
55
|
+
acf_values: np.ndarray | None = None,
|
|
56
|
+
pacf_values: np.ndarray | None = None,
|
|
57
|
+
):
|
|
58
|
+
# Handle backward compat: acf_values/pacf_values -> values + kind
|
|
59
|
+
if acf_values is not None:
|
|
60
|
+
values = acf_values
|
|
61
|
+
kind = "acf"
|
|
62
|
+
elif pacf_values is not None:
|
|
63
|
+
values = pacf_values
|
|
64
|
+
kind = "pacf"
|
|
65
|
+
|
|
66
|
+
if values is None:
|
|
67
|
+
raise ValueError("Must provide values, acf_values, or pacf_values")
|
|
68
|
+
if conf_int is None:
|
|
69
|
+
raise ValueError("Must provide conf_int")
|
|
70
|
+
if lags is None:
|
|
71
|
+
raise ValueError("Must provide lags")
|
|
72
|
+
if kind is None:
|
|
73
|
+
raise ValueError("Must provide kind")
|
|
74
|
+
|
|
75
|
+
self.values = values
|
|
76
|
+
self.conf_int = conf_int
|
|
77
|
+
self.lags = lags
|
|
78
|
+
self.alpha = alpha
|
|
79
|
+
self.n_obs = n_obs
|
|
80
|
+
self.method = method
|
|
81
|
+
self.kind = kind
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def significant_lags(self) -> list[int]:
|
|
85
|
+
"""Lags where correlation is significantly different from zero."""
|
|
86
|
+
significant = []
|
|
87
|
+
for i in range(1, len(self.values)):
|
|
88
|
+
if self.conf_int[i, 0] > 0 or self.conf_int[i, 1] < 0:
|
|
89
|
+
significant.append(int(self.lags[i]))
|
|
90
|
+
return significant
|
|
91
|
+
|
|
92
|
+
# Backward compatibility aliases
|
|
93
|
+
@property
|
|
94
|
+
def acf_values(self) -> np.ndarray:
|
|
95
|
+
"""ACF values (alias for values when kind='acf')."""
|
|
96
|
+
return self.values
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def pacf_values(self) -> np.ndarray:
|
|
100
|
+
"""PACF values (alias for values when kind='pacf')."""
|
|
101
|
+
return self.values
|
|
102
|
+
|
|
103
|
+
def __repr__(self) -> str:
|
|
104
|
+
sig_count = len(self.significant_lags)
|
|
105
|
+
total_lags = len(self.lags) - 1
|
|
106
|
+
kind_upper = self.kind.upper()
|
|
107
|
+
base = f"{kind_upper}Result(n_obs={self.n_obs}, nlags={total_lags}, significant={sig_count}/{total_lags}, alpha={self.alpha}"
|
|
108
|
+
if self.kind == "pacf":
|
|
109
|
+
base += f", method='{self.method}'"
|
|
110
|
+
return base + ")"
|
|
111
|
+
|
|
112
|
+
def __str__(self) -> str:
|
|
113
|
+
kind_upper = self.kind.upper()
|
|
114
|
+
lines = [
|
|
115
|
+
f"{kind_upper} Analysis Results:",
|
|
116
|
+
f" Observations: {self.n_obs}",
|
|
117
|
+
f" Lags analyzed: {len(self.lags) - 1}",
|
|
118
|
+
f" Significance level: {self.alpha}",
|
|
119
|
+
f" Method: {self.method}",
|
|
120
|
+
f" Significant lags: {len(self.significant_lags)}",
|
|
121
|
+
]
|
|
122
|
+
if self.significant_lags:
|
|
123
|
+
lines.append(f" Lags: {self.significant_lags[:10]}")
|
|
124
|
+
if len(self.significant_lags) > 10:
|
|
125
|
+
lines[-1] += " ..."
|
|
126
|
+
return "\n".join(lines)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Backward compatibility type aliases
|
|
130
|
+
ACFResult = CorrelationResult
|
|
131
|
+
PACFResult = CorrelationResult
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# =============================================================================
|
|
135
|
+
# Shared Validation
|
|
136
|
+
# =============================================================================
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _validate_and_prepare(
|
|
140
|
+
data: pd.Series | np.ndarray,
|
|
141
|
+
nlags: int | None,
|
|
142
|
+
kind: Literal["acf", "pacf"],
|
|
143
|
+
missing: Literal["none", "raise", "conservative", "drop"] = "none",
|
|
144
|
+
) -> tuple[np.ndarray, int]:
|
|
145
|
+
"""Validate input data and prepare for ACF/PACF computation.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
data: Time series data.
|
|
149
|
+
nlags: Number of lags (None for auto).
|
|
150
|
+
kind: Type of correlation ('acf' or 'pacf').
|
|
151
|
+
missing: How to handle missing values.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Tuple of (clean_values, nlags).
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
ValidationError: If data is invalid.
|
|
158
|
+
"""
|
|
159
|
+
# Convert to numpy
|
|
160
|
+
values = data.to_numpy() if isinstance(data, pd.Series) else np.asarray(data)
|
|
161
|
+
|
|
162
|
+
# Check empty
|
|
163
|
+
if len(values) == 0:
|
|
164
|
+
raise ValidationError(
|
|
165
|
+
f"Cannot compute {kind.upper()} for empty data",
|
|
166
|
+
context={"data_length": 0},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Handle missing values
|
|
170
|
+
if missing == "raise" and np.any(np.isnan(values)):
|
|
171
|
+
nan_count = int(np.sum(np.isnan(values)))
|
|
172
|
+
raise ValidationError(
|
|
173
|
+
"Data contains NaN values",
|
|
174
|
+
context={"nan_count": nan_count, "total_count": len(values)},
|
|
175
|
+
)
|
|
176
|
+
elif missing in ["conservative", "drop"] and np.any(np.isnan(values)):
|
|
177
|
+
original_length = len(values)
|
|
178
|
+
values = values[~np.isnan(values)]
|
|
179
|
+
logger.info(
|
|
180
|
+
"Dropped NaN values",
|
|
181
|
+
original_length=original_length,
|
|
182
|
+
clean_length=len(values),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Check all NaN
|
|
186
|
+
if len(values) == 0:
|
|
187
|
+
raise ValidationError("All data is NaN after missing value handling")
|
|
188
|
+
|
|
189
|
+
# Minimum observations
|
|
190
|
+
min_obs = 5 if kind == "pacf" else 3
|
|
191
|
+
if len(values) < min_obs:
|
|
192
|
+
raise ValidationError(
|
|
193
|
+
f"Insufficient data for {kind.upper()} computation (need at least {min_obs} observations)",
|
|
194
|
+
context={"n_obs": len(values)},
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
n_obs = len(values)
|
|
198
|
+
|
|
199
|
+
# Determine nlags
|
|
200
|
+
if nlags is None:
|
|
201
|
+
max_lag = n_obs // 2 - 1 if kind == "pacf" else n_obs - 1
|
|
202
|
+
nlags = int(min(10 * np.log10(n_obs), max_lag))
|
|
203
|
+
logger.debug(f"Auto-selected nlags for {kind.upper()}", nlags=nlags, n_obs=n_obs)
|
|
204
|
+
else:
|
|
205
|
+
if nlags < 0:
|
|
206
|
+
raise ValidationError("nlags must be non-negative", context={"nlags": nlags})
|
|
207
|
+
|
|
208
|
+
max_lag = n_obs // 2 if kind == "pacf" else n_obs
|
|
209
|
+
if nlags >= max_lag:
|
|
210
|
+
msg = (
|
|
211
|
+
"nlags must be less than n_obs/2 for PACF"
|
|
212
|
+
if kind == "pacf"
|
|
213
|
+
else "nlags must be less than number of observations"
|
|
214
|
+
)
|
|
215
|
+
raise ValidationError(
|
|
216
|
+
msg,
|
|
217
|
+
context={"nlags": nlags, "n_obs": n_obs, "max_nlags": max_lag - 1},
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if nlags > n_obs // 4:
|
|
221
|
+
logger.warning(
|
|
222
|
+
"Large nlags may produce unreliable results",
|
|
223
|
+
nlags=nlags,
|
|
224
|
+
n_obs=n_obs,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return values, nlags
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# =============================================================================
|
|
231
|
+
# Public API
|
|
232
|
+
# =============================================================================
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def compute_acf(
|
|
236
|
+
data: pd.Series | np.ndarray,
|
|
237
|
+
nlags: int | None = None,
|
|
238
|
+
alpha: float = 0.05,
|
|
239
|
+
fft: bool = False,
|
|
240
|
+
missing: Literal["none", "raise", "conservative", "drop"] = "none",
|
|
241
|
+
) -> CorrelationResult:
|
|
242
|
+
"""Compute autocorrelation function (ACF) with confidence intervals.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
data: Time series data.
|
|
246
|
+
nlags: Number of lags. If None, uses min(10*log10(n), n-1).
|
|
247
|
+
alpha: Significance level for confidence intervals.
|
|
248
|
+
fft: Use FFT for faster computation.
|
|
249
|
+
missing: How to handle missing values.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
CorrelationResult with ACF values and confidence intervals.
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
ValidationError: If data is invalid.
|
|
256
|
+
ComputationError: If computation fails.
|
|
257
|
+
"""
|
|
258
|
+
logger.debug("Computing ACF", fft=fft, missing_handling=missing)
|
|
259
|
+
|
|
260
|
+
values, nlags = _validate_and_prepare(data, nlags, "acf", missing)
|
|
261
|
+
n_obs = len(values)
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
acf_values, conf_int = acf(values, nlags=nlags, alpha=alpha, fft=fft, missing=missing)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
raise ComputationError(
|
|
267
|
+
f"Failed to compute ACF: {e}",
|
|
268
|
+
context={"n_obs": n_obs, "nlags": nlags},
|
|
269
|
+
cause=e,
|
|
270
|
+
) from None
|
|
271
|
+
|
|
272
|
+
result = CorrelationResult(
|
|
273
|
+
values=acf_values,
|
|
274
|
+
conf_int=conf_int,
|
|
275
|
+
lags=np.arange(len(acf_values)),
|
|
276
|
+
alpha=alpha,
|
|
277
|
+
n_obs=n_obs,
|
|
278
|
+
method="fft" if fft else "standard",
|
|
279
|
+
kind="acf",
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
logger.info("ACF computed", n_obs=n_obs, nlags=nlags, significant=len(result.significant_lags))
|
|
283
|
+
return result
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def compute_pacf(
|
|
287
|
+
data: pd.Series | np.ndarray,
|
|
288
|
+
nlags: int | None = None,
|
|
289
|
+
alpha: float = 0.05,
|
|
290
|
+
method: Literal[
|
|
291
|
+
"ywadjusted", "yw_adjusted", "ols", "ld", "ldadjusted", "ld_adjusted"
|
|
292
|
+
] = "ywadjusted",
|
|
293
|
+
) -> CorrelationResult:
|
|
294
|
+
"""Compute partial autocorrelation function (PACF) with confidence intervals.
|
|
295
|
+
|
|
296
|
+
PACF measures direct correlation with lag k, controlling for intermediate lags.
|
|
297
|
+
Key for identifying AR order: PACF cuts off after lag p for AR(p) processes.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
data: Time series data.
|
|
301
|
+
nlags: Number of lags. If None, uses min(10*log10(n), n//2-1).
|
|
302
|
+
alpha: Significance level for confidence intervals.
|
|
303
|
+
method: Estimation method ('ywadjusted', 'ols', 'ld', etc.).
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
CorrelationResult with PACF values and confidence intervals.
|
|
307
|
+
|
|
308
|
+
Raises:
|
|
309
|
+
ValidationError: If data is invalid.
|
|
310
|
+
ComputationError: If computation fails.
|
|
311
|
+
"""
|
|
312
|
+
logger.debug("Computing PACF", method=method)
|
|
313
|
+
|
|
314
|
+
# PACF always drops NaN (statsmodels.pacf doesn't have missing parameter)
|
|
315
|
+
values, nlags = _validate_and_prepare(data, nlags, "pacf", missing="drop")
|
|
316
|
+
n_obs = len(values)
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
method_normalized = cast(Any, method.replace("_", ""))
|
|
320
|
+
pacf_values, conf_int = pacf(values, nlags=nlags, alpha=alpha, method=method_normalized)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
raise ComputationError(
|
|
323
|
+
f"Failed to compute PACF: {e}",
|
|
324
|
+
context={"n_obs": n_obs, "nlags": nlags, "method": method},
|
|
325
|
+
cause=e,
|
|
326
|
+
) from None
|
|
327
|
+
|
|
328
|
+
result = CorrelationResult(
|
|
329
|
+
values=pacf_values,
|
|
330
|
+
conf_int=conf_int,
|
|
331
|
+
lags=np.arange(len(pacf_values)),
|
|
332
|
+
alpha=alpha,
|
|
333
|
+
n_obs=n_obs,
|
|
334
|
+
method=method,
|
|
335
|
+
kind="pacf",
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
logger.info("PACF computed", n_obs=n_obs, nlags=nlags, significant=len(result.significant_lags))
|
|
339
|
+
return result
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# =============================================================================
|
|
343
|
+
# Analysis
|
|
344
|
+
# =============================================================================
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class AutocorrelationAnalysisResult:
|
|
348
|
+
"""Combined ACF and PACF analysis with ARIMA order suggestions.
|
|
349
|
+
|
|
350
|
+
Attributes:
|
|
351
|
+
acf_result: ACF analysis result.
|
|
352
|
+
pacf_result: PACF analysis result.
|
|
353
|
+
suggested_ar_order: AR order (p) from PACF cutoff.
|
|
354
|
+
suggested_ma_order: MA order (q) from ACF cutoff.
|
|
355
|
+
suggested_d_order: Always 0 (assess stationarity separately).
|
|
356
|
+
is_white_noise: True if no significant autocorrelation.
|
|
357
|
+
summary_df: DataFrame with ACF/PACF side-by-side.
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
def __init__(
|
|
361
|
+
self,
|
|
362
|
+
acf_result: CorrelationResult,
|
|
363
|
+
pacf_result: CorrelationResult,
|
|
364
|
+
suggested_ar_order: int,
|
|
365
|
+
suggested_ma_order: int,
|
|
366
|
+
is_white_noise: bool,
|
|
367
|
+
summary_df: pd.DataFrame,
|
|
368
|
+
# Backward compat - allow passing these explicitly
|
|
369
|
+
significant_acf_lags: list[int] | None = None,
|
|
370
|
+
significant_pacf_lags: list[int] | None = None,
|
|
371
|
+
):
|
|
372
|
+
self.acf_result = acf_result
|
|
373
|
+
self.pacf_result = pacf_result
|
|
374
|
+
self.suggested_ar_order = suggested_ar_order
|
|
375
|
+
self.suggested_ma_order = suggested_ma_order
|
|
376
|
+
self.suggested_d_order = 0
|
|
377
|
+
# Use passed values if provided, otherwise derive from results
|
|
378
|
+
self.significant_acf_lags = (
|
|
379
|
+
significant_acf_lags
|
|
380
|
+
if significant_acf_lags is not None
|
|
381
|
+
else acf_result.significant_lags
|
|
382
|
+
)
|
|
383
|
+
self.significant_pacf_lags = (
|
|
384
|
+
significant_pacf_lags
|
|
385
|
+
if significant_pacf_lags is not None
|
|
386
|
+
else pacf_result.significant_lags
|
|
387
|
+
)
|
|
388
|
+
self.is_white_noise = is_white_noise
|
|
389
|
+
self.summary_df = summary_df
|
|
390
|
+
|
|
391
|
+
@property
|
|
392
|
+
def suggested_arima_order(self) -> tuple[int, int, int]:
|
|
393
|
+
"""Suggested ARIMA(p, d, q) order."""
|
|
394
|
+
return (self.suggested_ar_order, self.suggested_d_order, self.suggested_ma_order)
|
|
395
|
+
|
|
396
|
+
def __repr__(self) -> str:
|
|
397
|
+
p, d, q = self.suggested_arima_order
|
|
398
|
+
return (
|
|
399
|
+
f"AutocorrelationAnalysisResult(n_obs={self.acf_result.n_obs}, "
|
|
400
|
+
f"ARIMA({p},{d},{q}), white_noise={self.is_white_noise})"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
def __str__(self) -> str:
|
|
404
|
+
lines = [
|
|
405
|
+
"Autocorrelation Analysis Results:",
|
|
406
|
+
f" Observations: {self.acf_result.n_obs}",
|
|
407
|
+
f" Lags analyzed: {len(self.acf_result.lags) - 1}",
|
|
408
|
+
f" Significance level: {self.acf_result.alpha}",
|
|
409
|
+
"",
|
|
410
|
+
f"ACF: {len(self.significant_acf_lags)} significant lags",
|
|
411
|
+
f"PACF: {len(self.significant_pacf_lags)} significant lags",
|
|
412
|
+
"",
|
|
413
|
+
f"White noise: {self.is_white_noise}",
|
|
414
|
+
f"Suggested ARIMA order: {self.suggested_arima_order}",
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
if self.is_white_noise:
|
|
418
|
+
lines.append("Interpretation: No autocorrelation detected (random process)")
|
|
419
|
+
elif self.suggested_ar_order > 0 and self.suggested_ma_order == 0:
|
|
420
|
+
lines.append(f"Interpretation: AR({self.suggested_ar_order}) process detected")
|
|
421
|
+
elif self.suggested_ar_order == 0 and self.suggested_ma_order > 0:
|
|
422
|
+
lines.append(f"Interpretation: MA({self.suggested_ma_order}) process detected")
|
|
423
|
+
elif self.suggested_ar_order > 0 and self.suggested_ma_order > 0:
|
|
424
|
+
lines.append(
|
|
425
|
+
f"Interpretation: ARMA({self.suggested_ar_order},{self.suggested_ma_order}) process detected"
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
return "\n".join(lines)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def analyze_autocorrelation(
|
|
432
|
+
data: pd.Series | np.ndarray,
|
|
433
|
+
max_lags: int | None = None,
|
|
434
|
+
alpha: float = 0.05,
|
|
435
|
+
acf_method: Literal["standard", "fft"] = "standard",
|
|
436
|
+
pacf_method: Literal[
|
|
437
|
+
"ywadjusted", "yw_adjusted", "ols", "ld", "ldadjusted", "ld_adjusted"
|
|
438
|
+
] = "ywadjusted",
|
|
439
|
+
) -> AutocorrelationAnalysisResult:
|
|
440
|
+
"""Perform combined ACF/PACF analysis with ARIMA order suggestion.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
data: Time series data.
|
|
444
|
+
max_lags: Maximum lags for both ACF and PACF.
|
|
445
|
+
alpha: Significance level for confidence intervals.
|
|
446
|
+
acf_method: ACF method ('standard' or 'fft').
|
|
447
|
+
pacf_method: PACF estimation method.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
AutocorrelationAnalysisResult with suggested ARIMA orders.
|
|
451
|
+
"""
|
|
452
|
+
logger.info("Starting autocorrelation analysis")
|
|
453
|
+
|
|
454
|
+
# Compute both ACF and PACF
|
|
455
|
+
acf_result = compute_acf(data, nlags=max_lags, alpha=alpha, fft=(acf_method == "fft"))
|
|
456
|
+
pacf_result = compute_pacf(data, nlags=max_lags, alpha=alpha, method=pacf_method)
|
|
457
|
+
|
|
458
|
+
# Determine if white noise
|
|
459
|
+
is_white_noise = (
|
|
460
|
+
len(acf_result.significant_lags) == 0 and len(pacf_result.significant_lags) == 0
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Suggest ARIMA orders
|
|
464
|
+
suggested_ar_order = _suggest_order(pacf_result)
|
|
465
|
+
suggested_ma_order = _suggest_order(acf_result)
|
|
466
|
+
|
|
467
|
+
# Create summary DataFrame
|
|
468
|
+
summary_df = _create_summary_dataframe(acf_result, pacf_result)
|
|
469
|
+
|
|
470
|
+
result = AutocorrelationAnalysisResult(
|
|
471
|
+
acf_result=acf_result,
|
|
472
|
+
pacf_result=pacf_result,
|
|
473
|
+
suggested_ar_order=suggested_ar_order,
|
|
474
|
+
suggested_ma_order=suggested_ma_order,
|
|
475
|
+
is_white_noise=is_white_noise,
|
|
476
|
+
summary_df=summary_df,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
logger.info(
|
|
480
|
+
"Autocorrelation analysis completed",
|
|
481
|
+
arima_order=result.suggested_arima_order,
|
|
482
|
+
white_noise=is_white_noise,
|
|
483
|
+
)
|
|
484
|
+
return result
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
# =============================================================================
|
|
488
|
+
# Helpers
|
|
489
|
+
# =============================================================================
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _suggest_order(result: CorrelationResult) -> int:
|
|
493
|
+
"""Suggest AR order (from PACF) or MA order (from ACF) based on cutoff pattern.
|
|
494
|
+
|
|
495
|
+
For AR(p): PACF cuts off after lag p.
|
|
496
|
+
For MA(q): ACF cuts off after lag q.
|
|
497
|
+
"""
|
|
498
|
+
significant_set = set(result.significant_lags)
|
|
499
|
+
if not significant_set:
|
|
500
|
+
return 0
|
|
501
|
+
|
|
502
|
+
cutoff_lag = 0
|
|
503
|
+
for lag in range(1, len(result.lags)):
|
|
504
|
+
if lag in significant_set and lag == cutoff_lag + 1:
|
|
505
|
+
cutoff_lag = lag
|
|
506
|
+
else:
|
|
507
|
+
break
|
|
508
|
+
return cutoff_lag
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _create_summary_dataframe(
|
|
512
|
+
acf_result: CorrelationResult, pacf_result: CorrelationResult
|
|
513
|
+
) -> pd.DataFrame:
|
|
514
|
+
"""Create DataFrame with ACF and PACF side-by-side (excluding lag 0)."""
|
|
515
|
+
lags = acf_result.lags[1:]
|
|
516
|
+
acf_sig_set = set(acf_result.significant_lags)
|
|
517
|
+
pacf_sig_set = set(pacf_result.significant_lags)
|
|
518
|
+
|
|
519
|
+
return pd.DataFrame(
|
|
520
|
+
{
|
|
521
|
+
"lag": lags,
|
|
522
|
+
"acf_value": acf_result.values[1:],
|
|
523
|
+
"acf_significant": [lag in acf_sig_set for lag in lags],
|
|
524
|
+
"acf_ci_lower": acf_result.conf_int[1:, 0],
|
|
525
|
+
"acf_ci_upper": acf_result.conf_int[1:, 1],
|
|
526
|
+
"pacf_value": pacf_result.values[1:],
|
|
527
|
+
"pacf_significant": [lag in pacf_sig_set for lag in lags],
|
|
528
|
+
"pacf_ci_lower": pacf_result.conf_int[1:, 0],
|
|
529
|
+
"pacf_ci_upper": pacf_result.conf_int[1:, 1],
|
|
530
|
+
}
|
|
531
|
+
)
|