ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,531 @@
1
+ """Autocorrelation analysis for time series features.
2
+
3
+ Provides ACF (autocorrelation function) and PACF (partial autocorrelation function)
4
+ analysis with confidence intervals and ARIMA order suggestions.
5
+
6
+ Key Functions:
7
+ compute_acf: Autocorrelation function with confidence intervals
8
+ compute_pacf: Partial autocorrelation function with confidence intervals
9
+ analyze_autocorrelation: Combined ACF/PACF analysis with ARIMA order suggestion
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any, Literal, cast
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ from statsmodels.tsa.stattools import acf, pacf
19
+
20
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
21
+ from ml4t.diagnostic.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ # =============================================================================
27
+ # Result Class
28
+ # =============================================================================
29
+
30
+
31
+ class CorrelationResult:
32
+ """Results from autocorrelation (ACF) or partial autocorrelation (PACF) analysis.
33
+
34
+ Attributes:
35
+ values: Correlation coefficients for each lag (length: nlags+1).
36
+ values[0] = 1.0 (correlation with itself).
37
+ conf_int: Confidence intervals (shape: (nlags+1, 2)).
38
+ lags: Lag indices (0, 1, 2, ..., nlags).
39
+ alpha: Significance level for confidence intervals.
40
+ n_obs: Number of observations used.
41
+ method: Estimation method used.
42
+ kind: Type of correlation ('acf' or 'pacf').
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ values: np.ndarray | None = None,
48
+ conf_int: np.ndarray | None = None,
49
+ lags: np.ndarray | None = None,
50
+ alpha: float = 0.05,
51
+ n_obs: int = 0,
52
+ method: str = "standard",
53
+ kind: Literal["acf", "pacf"] | None = None,
54
+ # Backward compat aliases
55
+ acf_values: np.ndarray | None = None,
56
+ pacf_values: np.ndarray | None = None,
57
+ ):
58
+ # Handle backward compat: acf_values/pacf_values -> values + kind
59
+ if acf_values is not None:
60
+ values = acf_values
61
+ kind = "acf"
62
+ elif pacf_values is not None:
63
+ values = pacf_values
64
+ kind = "pacf"
65
+
66
+ if values is None:
67
+ raise ValueError("Must provide values, acf_values, or pacf_values")
68
+ if conf_int is None:
69
+ raise ValueError("Must provide conf_int")
70
+ if lags is None:
71
+ raise ValueError("Must provide lags")
72
+ if kind is None:
73
+ raise ValueError("Must provide kind")
74
+
75
+ self.values = values
76
+ self.conf_int = conf_int
77
+ self.lags = lags
78
+ self.alpha = alpha
79
+ self.n_obs = n_obs
80
+ self.method = method
81
+ self.kind = kind
82
+
83
+ @property
84
+ def significant_lags(self) -> list[int]:
85
+ """Lags where correlation is significantly different from zero."""
86
+ significant = []
87
+ for i in range(1, len(self.values)):
88
+ if self.conf_int[i, 0] > 0 or self.conf_int[i, 1] < 0:
89
+ significant.append(int(self.lags[i]))
90
+ return significant
91
+
92
+ # Backward compatibility aliases
93
+ @property
94
+ def acf_values(self) -> np.ndarray:
95
+ """ACF values (alias for values when kind='acf')."""
96
+ return self.values
97
+
98
+ @property
99
+ def pacf_values(self) -> np.ndarray:
100
+ """PACF values (alias for values when kind='pacf')."""
101
+ return self.values
102
+
103
+ def __repr__(self) -> str:
104
+ sig_count = len(self.significant_lags)
105
+ total_lags = len(self.lags) - 1
106
+ kind_upper = self.kind.upper()
107
+ base = f"{kind_upper}Result(n_obs={self.n_obs}, nlags={total_lags}, significant={sig_count}/{total_lags}, alpha={self.alpha}"
108
+ if self.kind == "pacf":
109
+ base += f", method='{self.method}'"
110
+ return base + ")"
111
+
112
+ def __str__(self) -> str:
113
+ kind_upper = self.kind.upper()
114
+ lines = [
115
+ f"{kind_upper} Analysis Results:",
116
+ f" Observations: {self.n_obs}",
117
+ f" Lags analyzed: {len(self.lags) - 1}",
118
+ f" Significance level: {self.alpha}",
119
+ f" Method: {self.method}",
120
+ f" Significant lags: {len(self.significant_lags)}",
121
+ ]
122
+ if self.significant_lags:
123
+ lines.append(f" Lags: {self.significant_lags[:10]}")
124
+ if len(self.significant_lags) > 10:
125
+ lines[-1] += " ..."
126
+ return "\n".join(lines)
127
+
128
+
129
+ # Backward compatibility type aliases
130
+ ACFResult = CorrelationResult
131
+ PACFResult = CorrelationResult
132
+
133
+
134
+ # =============================================================================
135
+ # Shared Validation
136
+ # =============================================================================
137
+
138
+
139
+ def _validate_and_prepare(
140
+ data: pd.Series | np.ndarray,
141
+ nlags: int | None,
142
+ kind: Literal["acf", "pacf"],
143
+ missing: Literal["none", "raise", "conservative", "drop"] = "none",
144
+ ) -> tuple[np.ndarray, int]:
145
+ """Validate input data and prepare for ACF/PACF computation.
146
+
147
+ Args:
148
+ data: Time series data.
149
+ nlags: Number of lags (None for auto).
150
+ kind: Type of correlation ('acf' or 'pacf').
151
+ missing: How to handle missing values.
152
+
153
+ Returns:
154
+ Tuple of (clean_values, nlags).
155
+
156
+ Raises:
157
+ ValidationError: If data is invalid.
158
+ """
159
+ # Convert to numpy
160
+ values = data.to_numpy() if isinstance(data, pd.Series) else np.asarray(data)
161
+
162
+ # Check empty
163
+ if len(values) == 0:
164
+ raise ValidationError(
165
+ f"Cannot compute {kind.upper()} for empty data",
166
+ context={"data_length": 0},
167
+ )
168
+
169
+ # Handle missing values
170
+ if missing == "raise" and np.any(np.isnan(values)):
171
+ nan_count = int(np.sum(np.isnan(values)))
172
+ raise ValidationError(
173
+ "Data contains NaN values",
174
+ context={"nan_count": nan_count, "total_count": len(values)},
175
+ )
176
+ elif missing in ["conservative", "drop"] and np.any(np.isnan(values)):
177
+ original_length = len(values)
178
+ values = values[~np.isnan(values)]
179
+ logger.info(
180
+ "Dropped NaN values",
181
+ original_length=original_length,
182
+ clean_length=len(values),
183
+ )
184
+
185
+ # Check all NaN
186
+ if len(values) == 0:
187
+ raise ValidationError("All data is NaN after missing value handling")
188
+
189
+ # Minimum observations
190
+ min_obs = 5 if kind == "pacf" else 3
191
+ if len(values) < min_obs:
192
+ raise ValidationError(
193
+ f"Insufficient data for {kind.upper()} computation (need at least {min_obs} observations)",
194
+ context={"n_obs": len(values)},
195
+ )
196
+
197
+ n_obs = len(values)
198
+
199
+ # Determine nlags
200
+ if nlags is None:
201
+ max_lag = n_obs // 2 - 1 if kind == "pacf" else n_obs - 1
202
+ nlags = int(min(10 * np.log10(n_obs), max_lag))
203
+ logger.debug(f"Auto-selected nlags for {kind.upper()}", nlags=nlags, n_obs=n_obs)
204
+ else:
205
+ if nlags < 0:
206
+ raise ValidationError("nlags must be non-negative", context={"nlags": nlags})
207
+
208
+ max_lag = n_obs // 2 if kind == "pacf" else n_obs
209
+ if nlags >= max_lag:
210
+ msg = (
211
+ "nlags must be less than n_obs/2 for PACF"
212
+ if kind == "pacf"
213
+ else "nlags must be less than number of observations"
214
+ )
215
+ raise ValidationError(
216
+ msg,
217
+ context={"nlags": nlags, "n_obs": n_obs, "max_nlags": max_lag - 1},
218
+ )
219
+
220
+ if nlags > n_obs // 4:
221
+ logger.warning(
222
+ "Large nlags may produce unreliable results",
223
+ nlags=nlags,
224
+ n_obs=n_obs,
225
+ )
226
+
227
+ return values, nlags
228
+
229
+
230
+ # =============================================================================
231
+ # Public API
232
+ # =============================================================================
233
+
234
+
235
+ def compute_acf(
236
+ data: pd.Series | np.ndarray,
237
+ nlags: int | None = None,
238
+ alpha: float = 0.05,
239
+ fft: bool = False,
240
+ missing: Literal["none", "raise", "conservative", "drop"] = "none",
241
+ ) -> CorrelationResult:
242
+ """Compute autocorrelation function (ACF) with confidence intervals.
243
+
244
+ Args:
245
+ data: Time series data.
246
+ nlags: Number of lags. If None, uses min(10*log10(n), n-1).
247
+ alpha: Significance level for confidence intervals.
248
+ fft: Use FFT for faster computation.
249
+ missing: How to handle missing values.
250
+
251
+ Returns:
252
+ CorrelationResult with ACF values and confidence intervals.
253
+
254
+ Raises:
255
+ ValidationError: If data is invalid.
256
+ ComputationError: If computation fails.
257
+ """
258
+ logger.debug("Computing ACF", fft=fft, missing_handling=missing)
259
+
260
+ values, nlags = _validate_and_prepare(data, nlags, "acf", missing)
261
+ n_obs = len(values)
262
+
263
+ try:
264
+ acf_values, conf_int = acf(values, nlags=nlags, alpha=alpha, fft=fft, missing=missing)
265
+ except Exception as e:
266
+ raise ComputationError(
267
+ f"Failed to compute ACF: {e}",
268
+ context={"n_obs": n_obs, "nlags": nlags},
269
+ cause=e,
270
+ ) from None
271
+
272
+ result = CorrelationResult(
273
+ values=acf_values,
274
+ conf_int=conf_int,
275
+ lags=np.arange(len(acf_values)),
276
+ alpha=alpha,
277
+ n_obs=n_obs,
278
+ method="fft" if fft else "standard",
279
+ kind="acf",
280
+ )
281
+
282
+ logger.info("ACF computed", n_obs=n_obs, nlags=nlags, significant=len(result.significant_lags))
283
+ return result
284
+
285
+
286
+ def compute_pacf(
287
+ data: pd.Series | np.ndarray,
288
+ nlags: int | None = None,
289
+ alpha: float = 0.05,
290
+ method: Literal[
291
+ "ywadjusted", "yw_adjusted", "ols", "ld", "ldadjusted", "ld_adjusted"
292
+ ] = "ywadjusted",
293
+ ) -> CorrelationResult:
294
+ """Compute partial autocorrelation function (PACF) with confidence intervals.
295
+
296
+ PACF measures direct correlation with lag k, controlling for intermediate lags.
297
+ Key for identifying AR order: PACF cuts off after lag p for AR(p) processes.
298
+
299
+ Args:
300
+ data: Time series data.
301
+ nlags: Number of lags. If None, uses min(10*log10(n), n//2-1).
302
+ alpha: Significance level for confidence intervals.
303
+ method: Estimation method ('ywadjusted', 'ols', 'ld', etc.).
304
+
305
+ Returns:
306
+ CorrelationResult with PACF values and confidence intervals.
307
+
308
+ Raises:
309
+ ValidationError: If data is invalid.
310
+ ComputationError: If computation fails.
311
+ """
312
+ logger.debug("Computing PACF", method=method)
313
+
314
+ # PACF always drops NaN (statsmodels.pacf doesn't have missing parameter)
315
+ values, nlags = _validate_and_prepare(data, nlags, "pacf", missing="drop")
316
+ n_obs = len(values)
317
+
318
+ try:
319
+ method_normalized = cast(Any, method.replace("_", ""))
320
+ pacf_values, conf_int = pacf(values, nlags=nlags, alpha=alpha, method=method_normalized)
321
+ except Exception as e:
322
+ raise ComputationError(
323
+ f"Failed to compute PACF: {e}",
324
+ context={"n_obs": n_obs, "nlags": nlags, "method": method},
325
+ cause=e,
326
+ ) from None
327
+
328
+ result = CorrelationResult(
329
+ values=pacf_values,
330
+ conf_int=conf_int,
331
+ lags=np.arange(len(pacf_values)),
332
+ alpha=alpha,
333
+ n_obs=n_obs,
334
+ method=method,
335
+ kind="pacf",
336
+ )
337
+
338
+ logger.info("PACF computed", n_obs=n_obs, nlags=nlags, significant=len(result.significant_lags))
339
+ return result
340
+
341
+
342
+ # =============================================================================
343
+ # Analysis
344
+ # =============================================================================
345
+
346
+
347
+ class AutocorrelationAnalysisResult:
348
+ """Combined ACF and PACF analysis with ARIMA order suggestions.
349
+
350
+ Attributes:
351
+ acf_result: ACF analysis result.
352
+ pacf_result: PACF analysis result.
353
+ suggested_ar_order: AR order (p) from PACF cutoff.
354
+ suggested_ma_order: MA order (q) from ACF cutoff.
355
+ suggested_d_order: Always 0 (assess stationarity separately).
356
+ is_white_noise: True if no significant autocorrelation.
357
+ summary_df: DataFrame with ACF/PACF side-by-side.
358
+ """
359
+
360
+ def __init__(
361
+ self,
362
+ acf_result: CorrelationResult,
363
+ pacf_result: CorrelationResult,
364
+ suggested_ar_order: int,
365
+ suggested_ma_order: int,
366
+ is_white_noise: bool,
367
+ summary_df: pd.DataFrame,
368
+ # Backward compat - allow passing these explicitly
369
+ significant_acf_lags: list[int] | None = None,
370
+ significant_pacf_lags: list[int] | None = None,
371
+ ):
372
+ self.acf_result = acf_result
373
+ self.pacf_result = pacf_result
374
+ self.suggested_ar_order = suggested_ar_order
375
+ self.suggested_ma_order = suggested_ma_order
376
+ self.suggested_d_order = 0
377
+ # Use passed values if provided, otherwise derive from results
378
+ self.significant_acf_lags = (
379
+ significant_acf_lags
380
+ if significant_acf_lags is not None
381
+ else acf_result.significant_lags
382
+ )
383
+ self.significant_pacf_lags = (
384
+ significant_pacf_lags
385
+ if significant_pacf_lags is not None
386
+ else pacf_result.significant_lags
387
+ )
388
+ self.is_white_noise = is_white_noise
389
+ self.summary_df = summary_df
390
+
391
+ @property
392
+ def suggested_arima_order(self) -> tuple[int, int, int]:
393
+ """Suggested ARIMA(p, d, q) order."""
394
+ return (self.suggested_ar_order, self.suggested_d_order, self.suggested_ma_order)
395
+
396
+ def __repr__(self) -> str:
397
+ p, d, q = self.suggested_arima_order
398
+ return (
399
+ f"AutocorrelationAnalysisResult(n_obs={self.acf_result.n_obs}, "
400
+ f"ARIMA({p},{d},{q}), white_noise={self.is_white_noise})"
401
+ )
402
+
403
+ def __str__(self) -> str:
404
+ lines = [
405
+ "Autocorrelation Analysis Results:",
406
+ f" Observations: {self.acf_result.n_obs}",
407
+ f" Lags analyzed: {len(self.acf_result.lags) - 1}",
408
+ f" Significance level: {self.acf_result.alpha}",
409
+ "",
410
+ f"ACF: {len(self.significant_acf_lags)} significant lags",
411
+ f"PACF: {len(self.significant_pacf_lags)} significant lags",
412
+ "",
413
+ f"White noise: {self.is_white_noise}",
414
+ f"Suggested ARIMA order: {self.suggested_arima_order}",
415
+ ]
416
+
417
+ if self.is_white_noise:
418
+ lines.append("Interpretation: No autocorrelation detected (random process)")
419
+ elif self.suggested_ar_order > 0 and self.suggested_ma_order == 0:
420
+ lines.append(f"Interpretation: AR({self.suggested_ar_order}) process detected")
421
+ elif self.suggested_ar_order == 0 and self.suggested_ma_order > 0:
422
+ lines.append(f"Interpretation: MA({self.suggested_ma_order}) process detected")
423
+ elif self.suggested_ar_order > 0 and self.suggested_ma_order > 0:
424
+ lines.append(
425
+ f"Interpretation: ARMA({self.suggested_ar_order},{self.suggested_ma_order}) process detected"
426
+ )
427
+
428
+ return "\n".join(lines)
429
+
430
+
431
+ def analyze_autocorrelation(
432
+ data: pd.Series | np.ndarray,
433
+ max_lags: int | None = None,
434
+ alpha: float = 0.05,
435
+ acf_method: Literal["standard", "fft"] = "standard",
436
+ pacf_method: Literal[
437
+ "ywadjusted", "yw_adjusted", "ols", "ld", "ldadjusted", "ld_adjusted"
438
+ ] = "ywadjusted",
439
+ ) -> AutocorrelationAnalysisResult:
440
+ """Perform combined ACF/PACF analysis with ARIMA order suggestion.
441
+
442
+ Args:
443
+ data: Time series data.
444
+ max_lags: Maximum lags for both ACF and PACF.
445
+ alpha: Significance level for confidence intervals.
446
+ acf_method: ACF method ('standard' or 'fft').
447
+ pacf_method: PACF estimation method.
448
+
449
+ Returns:
450
+ AutocorrelationAnalysisResult with suggested ARIMA orders.
451
+ """
452
+ logger.info("Starting autocorrelation analysis")
453
+
454
+ # Compute both ACF and PACF
455
+ acf_result = compute_acf(data, nlags=max_lags, alpha=alpha, fft=(acf_method == "fft"))
456
+ pacf_result = compute_pacf(data, nlags=max_lags, alpha=alpha, method=pacf_method)
457
+
458
+ # Determine if white noise
459
+ is_white_noise = (
460
+ len(acf_result.significant_lags) == 0 and len(pacf_result.significant_lags) == 0
461
+ )
462
+
463
+ # Suggest ARIMA orders
464
+ suggested_ar_order = _suggest_order(pacf_result)
465
+ suggested_ma_order = _suggest_order(acf_result)
466
+
467
+ # Create summary DataFrame
468
+ summary_df = _create_summary_dataframe(acf_result, pacf_result)
469
+
470
+ result = AutocorrelationAnalysisResult(
471
+ acf_result=acf_result,
472
+ pacf_result=pacf_result,
473
+ suggested_ar_order=suggested_ar_order,
474
+ suggested_ma_order=suggested_ma_order,
475
+ is_white_noise=is_white_noise,
476
+ summary_df=summary_df,
477
+ )
478
+
479
+ logger.info(
480
+ "Autocorrelation analysis completed",
481
+ arima_order=result.suggested_arima_order,
482
+ white_noise=is_white_noise,
483
+ )
484
+ return result
485
+
486
+
487
+ # =============================================================================
488
+ # Helpers
489
+ # =============================================================================
490
+
491
+
492
+ def _suggest_order(result: CorrelationResult) -> int:
493
+ """Suggest AR order (from PACF) or MA order (from ACF) based on cutoff pattern.
494
+
495
+ For AR(p): PACF cuts off after lag p.
496
+ For MA(q): ACF cuts off after lag q.
497
+ """
498
+ significant_set = set(result.significant_lags)
499
+ if not significant_set:
500
+ return 0
501
+
502
+ cutoff_lag = 0
503
+ for lag in range(1, len(result.lags)):
504
+ if lag in significant_set and lag == cutoff_lag + 1:
505
+ cutoff_lag = lag
506
+ else:
507
+ break
508
+ return cutoff_lag
509
+
510
+
511
+ def _create_summary_dataframe(
512
+ acf_result: CorrelationResult, pacf_result: CorrelationResult
513
+ ) -> pd.DataFrame:
514
+ """Create DataFrame with ACF and PACF side-by-side (excluding lag 0)."""
515
+ lags = acf_result.lags[1:]
516
+ acf_sig_set = set(acf_result.significant_lags)
517
+ pacf_sig_set = set(pacf_result.significant_lags)
518
+
519
+ return pd.DataFrame(
520
+ {
521
+ "lag": lags,
522
+ "acf_value": acf_result.values[1:],
523
+ "acf_significant": [lag in acf_sig_set for lag in lags],
524
+ "acf_ci_lower": acf_result.conf_int[1:, 0],
525
+ "acf_ci_upper": acf_result.conf_int[1:, 1],
526
+ "pacf_value": pacf_result.values[1:],
527
+ "pacf_significant": [lag in pacf_sig_set for lag in lags],
528
+ "pacf_ci_lower": pacf_result.conf_int[1:, 0],
529
+ "pacf_ci_upper": pacf_result.conf_int[1:, 1],
530
+ }
531
+ )