ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,296 @@
1
+ """Augmented Dickey-Fuller test for unit root detection.
2
+
3
+ The ADF test tests the null hypothesis that a unit root is present
4
+ in the time series. If the null is rejected (p < alpha), the series
5
+ is considered stationary.
6
+
7
+ References:
8
+ - Dickey, D. A., & Fuller, W. A. (1979). Distribution of the estimators
9
+ for autoregressive time series with a unit root.
10
+ - MacKinnon, J. G. (1994). Approximate asymptotic distribution functions
11
+ for unit-root and cointegration tests.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Literal
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from statsmodels.tsa.stattools import adfuller
21
+
22
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
23
+ from ml4t.diagnostic.logging import get_logger
24
+
25
+ logger = get_logger(__name__)
26
+
27
+
28
+ class ADFResult:
29
+ """Results from Augmented Dickey-Fuller test.
30
+
31
+ Attributes:
32
+ test_statistic: ADF test statistic
33
+ p_value: MacKinnon p-value for null hypothesis (unit root exists)
34
+ critical_values: Critical values at 1%, 5%, 10% significance levels
35
+ lags_used: Number of lags included in the test
36
+ n_obs: Number of observations used in regression
37
+ is_stationary: Whether series is stationary (rejects unit root at 5%)
38
+ regression: Type of regression ('c', 'ct', 'ctt', 'n')
39
+ autolag_method: Method used for lag selection if applicable
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ test_statistic: float,
45
+ p_value: float,
46
+ critical_values: dict[str, float],
47
+ lags_used: int,
48
+ n_obs: int,
49
+ regression: str,
50
+ autolag_method: str | None = None,
51
+ ):
52
+ """Initialize ADF result.
53
+
54
+ Args:
55
+ test_statistic: ADF test statistic
56
+ p_value: P-value for unit root hypothesis
57
+ critical_values: Critical values dict with keys '1%', '5%', '10%'
58
+ lags_used: Number of lags used in test
59
+ n_obs: Number of observations
60
+ regression: Regression type
61
+ autolag_method: Automatic lag selection method if used
62
+ """
63
+ self.test_statistic = test_statistic
64
+ self.p_value = p_value
65
+ self.critical_values = critical_values
66
+ self.lags_used = lags_used
67
+ self.n_obs = n_obs
68
+ self.regression = regression
69
+ self.autolag_method = autolag_method
70
+
71
+ # Determine stationarity at 5% significance level
72
+ self.is_stationary = p_value < 0.05
73
+
74
+ def __repr__(self) -> str:
75
+ """String representation."""
76
+ return (
77
+ f"ADFResult(statistic={self.test_statistic:.4f}, "
78
+ f"p_value={self.p_value:.4f}, "
79
+ f"stationary={self.is_stationary})"
80
+ )
81
+
82
+ def summary(self) -> str:
83
+ """Human-readable summary of ADF test results.
84
+
85
+ Returns:
86
+ Formatted summary string
87
+ """
88
+ lines = [
89
+ "Augmented Dickey-Fuller Test Results",
90
+ "=" * 50,
91
+ f"Test Statistic: {self.test_statistic:.4f}",
92
+ f"P-value: {self.p_value:.4f}",
93
+ f"Lags Used: {self.lags_used}",
94
+ f"Observations: {self.n_obs}",
95
+ f"Regression Type: {self.regression}",
96
+ ]
97
+
98
+ if self.autolag_method:
99
+ lines.append(f"Autolag Method: {self.autolag_method}")
100
+
101
+ lines.append("")
102
+ lines.append("Critical Values:")
103
+ for level, value in sorted(self.critical_values.items()):
104
+ lines.append(f" {level:>4s}: {value:>8.4f}")
105
+
106
+ lines.append("")
107
+ lines.append(f"Conclusion: {'Stationary' if self.is_stationary else 'Non-stationary'}")
108
+ lines.append(
109
+ f" (Reject H0 at 5% level: {self.is_stationary})"
110
+ if self.is_stationary
111
+ else " (Fail to reject H0 at 5% level)"
112
+ )
113
+
114
+ return "\n".join(lines)
115
+
116
+
117
+ def adf_test(
118
+ data: pd.Series | np.ndarray,
119
+ maxlag: int | None = None,
120
+ regression: Literal["c", "ct", "ctt", "n"] = "c",
121
+ autolag: Literal["AIC", "BIC", "t-stat"] | None = "AIC",
122
+ ) -> ADFResult:
123
+ """Perform Augmented Dickey-Fuller test for unit root.
124
+
125
+ The ADF test tests the null hypothesis that a unit root is present
126
+ in the time series. If the null is rejected (p < alpha), the series
127
+ is considered stationary.
128
+
129
+ Regression types:
130
+ - 'c': Constant only (default)
131
+ - 'ct': Constant and trend
132
+ - 'ctt': Constant, linear and quadratic trend
133
+ - 'n': No constant, no trend
134
+
135
+ Lag selection methods:
136
+ - 'AIC': Akaike Information Criterion (default)
137
+ - 'BIC': Bayesian Information Criterion
138
+ - 't-stat': Based on t-statistic of last lag
139
+ - None: Use maxlag directly
140
+
141
+ Args:
142
+ data: Time series data to test (1D array or Series)
143
+ maxlag: Maximum number of lags to use. If None, uses 12*(nobs/100)^{1/4}
144
+ regression: Type of regression to include in test
145
+ autolag: Method for automatic lag selection. If None, uses maxlag directly
146
+
147
+ Returns:
148
+ ADFResult with test statistics and conclusion
149
+
150
+ Raises:
151
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
152
+ ComputationError: If test computation fails
153
+
154
+ Example:
155
+ >>> import numpy as np
156
+ >>> # Test random walk (non-stationary)
157
+ >>> rw = np.cumsum(np.random.randn(1000))
158
+ >>> result = adf_test(rw)
159
+ >>> print(result.summary())
160
+ >>>
161
+ >>> # Test with manual lag specification
162
+ >>> result = adf_test(rw, maxlag=10, autolag=None)
163
+ >>> print(f"Used {result.lags_used} lags")
164
+ >>>
165
+ >>> # Test with trend
166
+ >>> result = adf_test(rw, regression='ct')
167
+ >>> print(f"Stationary: {result.is_stationary}")
168
+
169
+ Notes:
170
+ - For financial returns, 'c' (constant only) is typically appropriate
171
+ - For price series, 'ct' (constant + trend) may be better
172
+ - Larger maxlag increases power but reduces sample size
173
+ - AIC tends to select more lags than BIC
174
+ """
175
+ # Input validation
176
+ if data is None:
177
+ raise ValidationError("Data cannot be None", context={"function": "adf_test"})
178
+
179
+ # Convert to numpy array
180
+ if isinstance(data, pd.Series):
181
+ arr = data.to_numpy()
182
+ logger.debug("Converted pandas Series to numpy array", shape=arr.shape)
183
+ elif isinstance(data, np.ndarray):
184
+ arr = data
185
+ else:
186
+ raise ValidationError(
187
+ f"Data must be pandas Series or numpy array, got {type(data)}",
188
+ context={"function": "adf_test", "data_type": type(data).__name__},
189
+ )
190
+
191
+ # Check array properties
192
+ if arr.ndim != 1:
193
+ raise ValidationError(
194
+ f"Data must be 1-dimensional, got {arr.ndim}D",
195
+ context={"function": "adf_test", "shape": arr.shape},
196
+ )
197
+
198
+ if len(arr) == 0:
199
+ raise ValidationError("Data cannot be empty", context={"function": "adf_test", "length": 0})
200
+
201
+ # Check for missing values
202
+ if np.any(np.isnan(arr)):
203
+ n_missing = np.sum(np.isnan(arr))
204
+ raise ValidationError(
205
+ f"Data contains {n_missing} missing values (NaN)",
206
+ context={"function": "adf_test", "n_missing": n_missing, "length": len(arr)},
207
+ )
208
+
209
+ # Check for infinite values
210
+ if np.any(np.isinf(arr)):
211
+ n_inf = np.sum(np.isinf(arr))
212
+ raise ValidationError(
213
+ f"Data contains {n_inf} infinite values",
214
+ context={"function": "adf_test", "n_inf": n_inf, "length": len(arr)},
215
+ )
216
+
217
+ # Check minimum length
218
+ min_length = 10 if maxlag is None else max(10, maxlag + 3)
219
+ if len(arr) < min_length:
220
+ raise ValidationError(
221
+ f"Insufficient data for ADF test (need at least {min_length} observations)",
222
+ context={
223
+ "function": "adf_test",
224
+ "length": len(arr),
225
+ "min_length": min_length,
226
+ "maxlag": maxlag,
227
+ },
228
+ )
229
+
230
+ # Check for constant series
231
+ if np.std(arr) == 0:
232
+ raise ValidationError(
233
+ "Data is constant (zero variance)",
234
+ context={
235
+ "function": "adf_test",
236
+ "length": len(arr),
237
+ "mean": float(np.mean(arr)),
238
+ },
239
+ )
240
+
241
+ # Log test parameters
242
+ logger.info(
243
+ "Running ADF test",
244
+ n_obs=len(arr),
245
+ maxlag=maxlag,
246
+ regression=regression,
247
+ autolag=autolag,
248
+ )
249
+
250
+ # Run ADF test
251
+ try:
252
+ result = adfuller(
253
+ arr, maxlag=maxlag, regression=regression, autolag=autolag, regresults=False
254
+ )
255
+
256
+ # Unpack result
257
+ # adfuller returns: (adf, pvalue, usedlag, nobs, critical_values, icbest)
258
+ adf_stat = result[0]
259
+ pvalue = result[1]
260
+ usedlag = result[2]
261
+ nobs = result[3]
262
+ critical_vals = result[4]
263
+
264
+ logger.info(
265
+ "ADF test completed",
266
+ statistic=adf_stat,
267
+ p_value=pvalue,
268
+ lags_used=usedlag,
269
+ n_obs=nobs,
270
+ stationary=pvalue < 0.05,
271
+ )
272
+
273
+ # Create result object
274
+ return ADFResult(
275
+ test_statistic=float(adf_stat),
276
+ p_value=float(pvalue),
277
+ critical_values=dict(critical_vals),
278
+ lags_used=int(usedlag),
279
+ n_obs=int(nobs),
280
+ regression=regression,
281
+ autolag_method=autolag,
282
+ )
283
+
284
+ except Exception as e:
285
+ logger.error("ADF test failed", error=str(e), n_obs=len(arr))
286
+ raise ComputationError( # noqa: B904
287
+ f"ADF test computation failed: {e}",
288
+ context={
289
+ "function": "adf_test",
290
+ "n_obs": len(arr),
291
+ "maxlag": maxlag,
292
+ "regression": regression,
293
+ "autolag": autolag,
294
+ },
295
+ cause=e,
296
+ )
@@ -0,0 +1,308 @@
1
+ """Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for stationarity.
2
+
3
+ IMPORTANT: KPSS tests the null hypothesis of stationarity, which is the
4
+ OPPOSITE of the ADF test. Rejecting H0 means the series is NON-stationary.
5
+
6
+ KPSS is typically used in conjunction with ADF to provide more robust
7
+ stationarity assessment:
8
+ - Stationary: ADF rejects + KPSS fails to reject
9
+ - Non-stationary: ADF fails to reject + KPSS rejects
10
+ - Quasi-stationary: Both reject or both fail (inconclusive)
11
+
12
+ References:
13
+ - Kwiatkowski, D., Phillips, P. C., Schmidt, P., & Shin, Y. (1992).
14
+ Testing the null hypothesis of stationarity against the alternative
15
+ of a unit root. Journal of Econometrics, 54(1-3), 159-178.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Literal
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+ from statsmodels.tsa.stattools import kpss
25
+
26
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
27
+ from ml4t.diagnostic.logging import get_logger
28
+
29
+ logger = get_logger(__name__)
30
+
31
+
32
+ class KPSSResult:
33
+ """Results from Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test.
34
+
35
+ IMPORTANT: KPSS tests the null hypothesis of stationarity, which is the
36
+ OPPOSITE of the ADF test. Rejecting H0 means the series is NON-stationary.
37
+
38
+ Attributes:
39
+ test_statistic: KPSS test statistic
40
+ p_value: Interpolated p-value for null hypothesis (stationarity)
41
+ critical_values: Critical values at 10%, 5%, 2.5%, 1% significance levels
42
+ lags_used: Number of lags used in Newey-West standard errors
43
+ n_obs: Number of observations used
44
+ is_stationary: Whether series is stationary (fails to reject H0 at 5%)
45
+ regression: Type of regression ('c' for level, 'ct' for trend)
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ test_statistic: float,
51
+ p_value: float,
52
+ critical_values: dict[str, float],
53
+ lags_used: int,
54
+ n_obs: int,
55
+ regression: str,
56
+ ):
57
+ """Initialize KPSS result.
58
+
59
+ Args:
60
+ test_statistic: KPSS test statistic
61
+ p_value: P-value for stationarity hypothesis
62
+ critical_values: Critical values dict with keys '10%', '5%', '2.5%', '1%'
63
+ lags_used: Number of lags used for Newey-West
64
+ n_obs: Number of observations
65
+ regression: Regression type ('c' or 'ct')
66
+ """
67
+ self.test_statistic = test_statistic
68
+ self.p_value = p_value
69
+ self.critical_values = critical_values
70
+ self.lags_used = lags_used
71
+ self.n_obs = n_obs
72
+ self.regression = regression
73
+
74
+ # CRITICAL: KPSS has opposite interpretation from ADF
75
+ # H0 = stationary, so we're stationary if we FAIL to reject (p >= 0.05)
76
+ self.is_stationary = p_value >= 0.05
77
+
78
+ def __repr__(self) -> str:
79
+ """String representation."""
80
+ return (
81
+ f"KPSSResult(statistic={self.test_statistic:.4f}, "
82
+ f"p_value={self.p_value:.4f}, "
83
+ f"stationary={self.is_stationary})"
84
+ )
85
+
86
+ def summary(self) -> str:
87
+ """Human-readable summary of KPSS test results.
88
+
89
+ Returns:
90
+ Formatted summary string
91
+ """
92
+ lines = [
93
+ "Kwiatkowski-Phillips-Schmidt-Shin (KPSS) Test Results",
94
+ "=" * 50,
95
+ f"Test Statistic: {self.test_statistic:.4f}",
96
+ f"P-value: {self.p_value:.4f}",
97
+ f"Lags Used: {self.lags_used}",
98
+ f"Observations: {self.n_obs}",
99
+ f"Regression Type: {'Level' if self.regression == 'c' else 'Trend'}",
100
+ ]
101
+
102
+ lines.append("")
103
+ lines.append("Critical Values:")
104
+ for level, value in sorted(self.critical_values.items()):
105
+ lines.append(f" {level:>4s}: {value:>8.4f}")
106
+
107
+ lines.append("")
108
+ lines.append(f"Conclusion: {'Stationary' if self.is_stationary else 'Non-stationary'}")
109
+ lines.append(
110
+ " (Fail to reject H0 at 5% level)"
111
+ if self.is_stationary
112
+ else f" (Reject H0 at 5% level: {not self.is_stationary})"
113
+ )
114
+ lines.append("")
115
+ lines.append("IMPORTANT: KPSS tests H0 = stationary (opposite of ADF)")
116
+ lines.append(" - High p-value (>0.05) => stationary")
117
+ lines.append(" - Low p-value (<0.05) => non-stationary")
118
+
119
+ return "\n".join(lines)
120
+
121
+
122
+ def kpss_test(
123
+ data: pd.Series | np.ndarray,
124
+ regression: Literal["c", "ct"] = "c",
125
+ nlags: int | Literal["auto", "legacy"] | None = "auto",
126
+ ) -> KPSSResult:
127
+ """Perform Kwiatkowski-Phillips-Schmidt-Shin test for stationarity.
128
+
129
+ The KPSS test tests the null hypothesis that the time series is stationary.
130
+ This is the OPPOSITE of the ADF test. If the null is rejected (p < alpha),
131
+ the series is considered NON-stationary.
132
+
133
+ KPSS is typically used in conjunction with ADF to provide more robust
134
+ stationarity assessment:
135
+ - Stationary: ADF rejects + KPSS fails to reject
136
+ - Non-stationary: ADF fails to reject + KPSS rejects
137
+ - Quasi-stationary: Both reject or both fail (inconclusive)
138
+
139
+ Regression types:
140
+ - 'c': Level stationarity (constant mean, default)
141
+ - 'ct': Trend stationarity (stationary around a trend)
142
+
143
+ Lag selection for Newey-West standard errors:
144
+ - 'auto': Uses int(12 * (nobs/100)^{1/4}) (default, recommended)
145
+ - 'legacy': Uses int(4 * (nobs/100)^{1/4})
146
+ - int: Manual specification of number of lags
147
+
148
+ Args:
149
+ data: Time series data to test (1D array or Series)
150
+ regression: Type of stationarity to test ('c' for level, 'ct' for trend)
151
+ nlags: Number of lags for Newey-West standard errors
152
+
153
+ Returns:
154
+ KPSSResult with test statistics and conclusion
155
+
156
+ Raises:
157
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
158
+ ComputationError: If test computation fails
159
+
160
+ Example:
161
+ >>> import numpy as np
162
+ >>> # Test white noise (stationary)
163
+ >>> wn = np.random.randn(1000)
164
+ >>> result = kpss_test(wn)
165
+ >>> print(result.summary())
166
+ >>>
167
+ >>> # Test random walk (non-stationary)
168
+ >>> rw = np.cumsum(np.random.randn(1000))
169
+ >>> result = kpss_test(rw)
170
+ >>> print(f"Stationary: {result.is_stationary}")
171
+ >>>
172
+ >>> # Test with trend stationarity
173
+ >>> result = kpss_test(rw, regression='ct')
174
+ >>> print(f"Trend stationary: {result.is_stationary}")
175
+ >>>
176
+ >>> # Use with ADF for complementary testing
177
+ >>> from ml4t.diagnostic.evaluation.stationarity import adf_test
178
+ >>> adf_result = adf_test(wn)
179
+ >>> kpss_result = kpss_test(wn)
180
+ >>> if adf_result.is_stationary and kpss_result.is_stationary:
181
+ ... print("Strong evidence for stationarity")
182
+
183
+ Notes:
184
+ - For financial returns, 'c' (level) is typically appropriate
185
+ - For price series with trend, 'ct' may be better
186
+ - KPSS is more powerful against I(1) alternatives than ADF
187
+ - Use both ADF and KPSS for robust stationarity assessment
188
+ - White noise should pass both tests (ADF rejects, KPSS fails to reject)
189
+ """
190
+ # Input validation (same as ADF)
191
+ if data is None:
192
+ raise ValidationError("Data cannot be None", context={"function": "kpss_test"})
193
+
194
+ # Convert to numpy array
195
+ if isinstance(data, pd.Series):
196
+ arr = data.to_numpy()
197
+ logger.debug("Converted pandas Series to numpy array", shape=arr.shape)
198
+ elif isinstance(data, np.ndarray):
199
+ arr = data
200
+ else:
201
+ raise ValidationError(
202
+ f"Data must be pandas Series or numpy array, got {type(data)}",
203
+ context={"function": "kpss_test", "data_type": type(data).__name__},
204
+ )
205
+
206
+ # Check array properties
207
+ if arr.ndim != 1:
208
+ raise ValidationError(
209
+ f"Data must be 1-dimensional, got {arr.ndim}D",
210
+ context={"function": "kpss_test", "shape": arr.shape},
211
+ )
212
+
213
+ if len(arr) == 0:
214
+ raise ValidationError(
215
+ "Data cannot be empty", context={"function": "kpss_test", "length": 0}
216
+ )
217
+
218
+ # Check for missing values
219
+ if np.any(np.isnan(arr)):
220
+ n_missing = np.sum(np.isnan(arr))
221
+ raise ValidationError(
222
+ f"Data contains {n_missing} missing values (NaN)",
223
+ context={"function": "kpss_test", "n_missing": n_missing, "length": len(arr)},
224
+ )
225
+
226
+ # Check for infinite values
227
+ if np.any(np.isinf(arr)):
228
+ n_inf = np.sum(np.isinf(arr))
229
+ raise ValidationError(
230
+ f"Data contains {n_inf} infinite values",
231
+ context={"function": "kpss_test", "n_inf": n_inf, "length": len(arr)},
232
+ )
233
+
234
+ # Check minimum length
235
+ min_length = 10
236
+ if len(arr) < min_length:
237
+ raise ValidationError(
238
+ f"Insufficient data for KPSS test (need at least {min_length} observations)",
239
+ context={
240
+ "function": "kpss_test",
241
+ "length": len(arr),
242
+ "min_length": min_length,
243
+ },
244
+ )
245
+
246
+ # Check for constant series
247
+ if np.std(arr) == 0:
248
+ raise ValidationError(
249
+ "Data is constant (zero variance)",
250
+ context={
251
+ "function": "kpss_test",
252
+ "length": len(arr),
253
+ "mean": float(np.mean(arr)),
254
+ },
255
+ )
256
+
257
+ # Log test parameters
258
+ logger.info(
259
+ "Running KPSS test",
260
+ n_obs=len(arr),
261
+ regression=regression,
262
+ nlags=nlags,
263
+ )
264
+
265
+ # Run KPSS test
266
+ try:
267
+ # Use "auto" if nlags is None (statsmodels doesn't accept None)
268
+ nlags_param: int | Literal["auto", "legacy"] = nlags if nlags is not None else "auto"
269
+ result = kpss(arr, regression=regression, nlags=nlags_param)
270
+
271
+ # Unpack result
272
+ # kpss returns: (kpss_stat, pvalue, lags, critical_values)
273
+ kpss_stat = result[0]
274
+ pvalue = result[1]
275
+ usedlag = result[2]
276
+ critical_vals = result[3]
277
+
278
+ logger.info(
279
+ "KPSS test completed",
280
+ statistic=kpss_stat,
281
+ p_value=pvalue,
282
+ lags_used=usedlag,
283
+ n_obs=len(arr),
284
+ stationary=pvalue >= 0.05, # Note: opposite of ADF
285
+ )
286
+
287
+ # Create result object
288
+ return KPSSResult(
289
+ test_statistic=float(kpss_stat),
290
+ p_value=float(pvalue),
291
+ critical_values=dict(critical_vals),
292
+ lags_used=int(usedlag),
293
+ n_obs=len(arr),
294
+ regression=regression,
295
+ )
296
+
297
+ except Exception as e:
298
+ logger.error("KPSS test failed", error=str(e), n_obs=len(arr))
299
+ raise ComputationError( # noqa: B904
300
+ f"KPSS test computation failed: {e}",
301
+ context={
302
+ "function": "kpss_test",
303
+ "n_obs": len(arr),
304
+ "regression": regression,
305
+ "nlags": nlags,
306
+ },
307
+ cause=e,
308
+ )