ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,518 @@
1
+ """Comprehensive stationarity analysis combining ADF, KPSS, and PP tests.
2
+
3
+ This module provides unified stationarity analysis by combining multiple
4
+ tests with consensus-based interpretation.
5
+
6
+ Key Concept:
7
+ Different tests have different null hypotheses:
8
+ - ADF/PP: H0 = unit root (non-stationary), reject => stationary
9
+ - KPSS: H0 = stationary, reject => non-stationary
10
+
11
+ Strong evidence requires agreement between tests with opposite hypotheses.
12
+
13
+ Consensus Logic:
14
+ - Strong stationary: All tests agree (ADF/PP reject, KPSS fails to reject)
15
+ - Likely stationary: 2/3 tests agree on stationarity
16
+ - Inconclusive: Tests evenly split (e.g., ADF/PP reject, KPSS rejects)
17
+ - Likely non-stationary: 2/3 tests agree on non-stationarity
18
+ - Strong non-stationary: All tests agree (ADF/PP fail to reject, KPSS rejects)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import Literal
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+
28
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
29
+ from ml4t.diagnostic.evaluation.stationarity.augmented_dickey_fuller import ADFResult, adf_test
30
+ from ml4t.diagnostic.evaluation.stationarity.kpss_test import KPSSResult, kpss_test
31
+ from ml4t.diagnostic.evaluation.stationarity.phillips_perron import (
32
+ PPResult,
33
+ _check_arch_available,
34
+ pp_test,
35
+ )
36
+ from ml4t.diagnostic.logging import get_logger
37
+
38
+ logger = get_logger(__name__)
39
+
40
+
41
+ class StationarityAnalysisResult:
42
+ """Comprehensive stationarity analysis combining ADF, KPSS, and PP tests.
43
+
44
+ Provides unified view of multiple stationarity tests with consensus interpretation.
45
+
46
+ Attributes:
47
+ adf_result: ADF test result (None if test not run or failed)
48
+ kpss_result: KPSS test result (None if test not run or failed)
49
+ pp_result: PP test result (None if test not run or failed)
50
+ consensus: Consensus interpretation of stationarity
51
+ summary_df: DataFrame with all test results in tabular form
52
+ agreement_score: Agreement between tests (0.0 to 1.0)
53
+ alpha: Significance level used for all tests
54
+ n_tests_run: Number of tests successfully completed
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ adf_result: ADFResult | None = None,
60
+ kpss_result: KPSSResult | None = None,
61
+ pp_result: PPResult | None = None,
62
+ alpha: float = 0.05,
63
+ ):
64
+ """Initialize stationarity analysis result.
65
+
66
+ Args:
67
+ adf_result: ADF test result
68
+ kpss_result: KPSS test result
69
+ pp_result: PP test result
70
+ alpha: Significance level used
71
+ """
72
+ self.adf_result = adf_result
73
+ self.kpss_result = kpss_result
74
+ self.pp_result = pp_result
75
+ self.alpha = alpha
76
+
77
+ # Count number of tests run
78
+ self.n_tests_run = sum(
79
+ [
80
+ adf_result is not None,
81
+ kpss_result is not None,
82
+ pp_result is not None,
83
+ ]
84
+ )
85
+
86
+ # Calculate consensus and agreement
87
+ self.consensus = self._calculate_consensus()
88
+ self.agreement_score = self._calculate_agreement()
89
+
90
+ # Create summary DataFrame
91
+ self.summary_df = self._create_summary_df()
92
+
93
+ def _calculate_consensus(
94
+ self,
95
+ ) -> Literal[
96
+ "strong_stationary",
97
+ "likely_stationary",
98
+ "inconclusive",
99
+ "likely_nonstationary",
100
+ "strong_nonstationary",
101
+ ]:
102
+ """Calculate consensus interpretation from all tests.
103
+
104
+ Consensus Logic:
105
+ - Strong stationary: All tests agree stationary
106
+ - Likely stationary: 2/3 tests agree stationary
107
+ - Inconclusive: Tests evenly split or only 2 tests with disagreement
108
+ - Likely non-stationary: 2/3 tests agree non-stationary
109
+ - Strong non-stationary: All tests agree non-stationary
110
+
111
+ Returns:
112
+ Consensus interpretation
113
+ """
114
+ # Collect stationarity results
115
+ results = []
116
+ if self.adf_result is not None:
117
+ results.append(self.adf_result.is_stationary)
118
+ if self.kpss_result is not None:
119
+ results.append(self.kpss_result.is_stationary)
120
+ if self.pp_result is not None:
121
+ results.append(self.pp_result.is_stationary)
122
+
123
+ if len(results) == 0:
124
+ return "inconclusive"
125
+
126
+ # Count votes
127
+ stationary_votes = sum(results)
128
+
129
+ # Determine consensus
130
+ if len(results) == 3:
131
+ if stationary_votes == 3:
132
+ return "strong_stationary"
133
+ elif stationary_votes == 2:
134
+ return "likely_stationary"
135
+ elif stationary_votes == 1:
136
+ return "likely_nonstationary"
137
+ else: # stationary_votes == 0
138
+ return "strong_nonstationary"
139
+ elif len(results) == 2:
140
+ if stationary_votes == 2:
141
+ return "likely_stationary"
142
+ elif stationary_votes == 0:
143
+ return "likely_nonstationary"
144
+ else: # stationary_votes == 1 (disagreement)
145
+ return "inconclusive"
146
+ else: # len(results) == 1
147
+ # Single test - use its result but label as "likely" not "strong"
148
+ if results[0]:
149
+ return "likely_stationary"
150
+ else:
151
+ return "likely_nonstationary"
152
+
153
+ def _calculate_agreement(self) -> float:
154
+ """Calculate agreement score between tests.
155
+
156
+ Agreement score ranges from 0.0 (complete disagreement) to 1.0 (complete agreement).
157
+
158
+ For 3 tests:
159
+ - All agree: 1.0
160
+ - 2 agree: 0.67
161
+ - None agree (all different): 0.33
162
+
163
+ For 2 tests:
164
+ - Both agree: 1.0
165
+ - Disagree: 0.0
166
+
167
+ For 1 test:
168
+ - Always 1.0 (no disagreement possible)
169
+
170
+ Returns:
171
+ Agreement score between 0.0 and 1.0
172
+ """
173
+ # Collect stationarity results
174
+ results = []
175
+ if self.adf_result is not None:
176
+ results.append(self.adf_result.is_stationary)
177
+ if self.kpss_result is not None:
178
+ results.append(self.kpss_result.is_stationary)
179
+ if self.pp_result is not None:
180
+ results.append(self.pp_result.is_stationary)
181
+
182
+ if len(results) <= 1:
183
+ return 1.0
184
+
185
+ # Count how many agree with majority
186
+ stationary_votes = sum(results)
187
+ majority_count = max(stationary_votes, len(results) - stationary_votes)
188
+
189
+ # Agreement score = proportion agreeing with majority
190
+ return majority_count / len(results)
191
+
192
+ def _create_summary_df(self) -> pd.DataFrame:
193
+ """Create summary DataFrame with all test results.
194
+
195
+ Returns:
196
+ DataFrame with columns: test_name, test_statistic, p_value,
197
+ is_stationary, conclusion, alpha
198
+ """
199
+ rows = []
200
+
201
+ # Add ADF results
202
+ if self.adf_result is not None:
203
+ rows.append(
204
+ {
205
+ "test_name": "ADF",
206
+ "test_statistic": self.adf_result.test_statistic,
207
+ "p_value": self.adf_result.p_value,
208
+ "is_stationary": self.adf_result.is_stationary,
209
+ "conclusion": "Stationary"
210
+ if self.adf_result.is_stationary
211
+ else "Non-stationary",
212
+ "alpha": self.alpha,
213
+ }
214
+ )
215
+
216
+ # Add KPSS results
217
+ if self.kpss_result is not None:
218
+ rows.append(
219
+ {
220
+ "test_name": "KPSS",
221
+ "test_statistic": self.kpss_result.test_statistic,
222
+ "p_value": self.kpss_result.p_value,
223
+ "is_stationary": self.kpss_result.is_stationary,
224
+ "conclusion": "Stationary"
225
+ if self.kpss_result.is_stationary
226
+ else "Non-stationary",
227
+ "alpha": self.alpha,
228
+ }
229
+ )
230
+
231
+ # Add PP results
232
+ if self.pp_result is not None:
233
+ rows.append(
234
+ {
235
+ "test_name": "PP",
236
+ "test_statistic": self.pp_result.test_statistic,
237
+ "p_value": self.pp_result.p_value,
238
+ "is_stationary": self.pp_result.is_stationary,
239
+ "conclusion": "Stationary"
240
+ if self.pp_result.is_stationary
241
+ else "Non-stationary",
242
+ "alpha": self.alpha,
243
+ }
244
+ )
245
+
246
+ return pd.DataFrame(rows)
247
+
248
+ def __repr__(self) -> str:
249
+ """String representation."""
250
+ return (
251
+ f"StationarityAnalysisResult("
252
+ f"consensus={self.consensus}, "
253
+ f"agreement={self.agreement_score:.2f}, "
254
+ f"n_tests={self.n_tests_run})"
255
+ )
256
+
257
+ def summary(self) -> str:
258
+ """Human-readable summary of comprehensive stationarity analysis."""
259
+ lines = [
260
+ "Comprehensive Stationarity Analysis",
261
+ "=" * 60,
262
+ f"Tests Run: {self.n_tests_run} | Significance Level: {self.alpha}",
263
+ "",
264
+ ]
265
+
266
+ # Individual test results
267
+ for name, res in [
268
+ ("ADF Test", self.adf_result),
269
+ ("KPSS Test", self.kpss_result),
270
+ ("PP Test", self.pp_result),
271
+ ]:
272
+ if res is not None:
273
+ status = "Stationary" if res.is_stationary else "Non-stationary"
274
+ lines.append(
275
+ f"{name}: {status} (stat={res.test_statistic:.4f}, p={res.p_value:.4f})"
276
+ )
277
+
278
+ lines.append(
279
+ f"\nAgreement Score: {self.agreement_score:.2f} ({int(self.agreement_score * 100)}%)"
280
+ )
281
+
282
+ consensus_labels = {
283
+ "strong_stationary": "STRONG STATIONARY (all agree)",
284
+ "likely_stationary": "LIKELY STATIONARY (majority)",
285
+ "inconclusive": "INCONCLUSIVE (tests disagree)",
286
+ "likely_nonstationary": "LIKELY NON-STATIONARY (majority)",
287
+ "strong_nonstationary": "STRONG NON-STATIONARY (all agree)",
288
+ }
289
+ lines.append(f"Consensus: {consensus_labels[self.consensus]}")
290
+
291
+ # Interpretation guidance matching test expectations
292
+ lines.append("\nInterpretation:")
293
+ if self.consensus == "strong_stationary":
294
+ lines.append(" - Series exhibits strong evidence of stationarity")
295
+ lines.append(" - Safe to use in models requiring stationarity")
296
+ elif self.consensus == "likely_stationary":
297
+ lines.append(" - Series likely stationary, but some uncertainty")
298
+ elif self.consensus == "inconclusive":
299
+ lines.append(" - Tests provide conflicting evidence")
300
+ lines.append(" - Consider differencing or detrending")
301
+ elif self.consensus == "likely_nonstationary":
302
+ lines.append(" - Series likely has unit root")
303
+ lines.append(" - Apply differencing before modeling")
304
+ else: # strong_nonstationary
305
+ lines.append(" - Series exhibits strong evidence of unit root")
306
+ lines.append(" - Requires differencing or cointegration approach")
307
+
308
+ return "\n".join(lines)
309
+
310
+
311
+ def analyze_stationarity(
312
+ data: pd.Series | np.ndarray,
313
+ alpha: float = 0.05,
314
+ include_tests: list[Literal["adf", "kpss", "pp"]] | None = None,
315
+ **test_kwargs,
316
+ ) -> StationarityAnalysisResult:
317
+ """Perform comprehensive stationarity analysis with multiple tests.
318
+
319
+ Runs ADF, KPSS, and PP tests (or subset) and provides consensus interpretation
320
+ of stationarity. This is the recommended way to assess stationarity robustly.
321
+
322
+ Key Concept:
323
+ Different tests have different null hypotheses:
324
+ - ADF/PP: H0 = unit root (non-stationary), reject => stationary
325
+ - KPSS: H0 = stationary, reject => non-stationary
326
+
327
+ Strong evidence requires agreement between tests with opposite hypotheses.
328
+
329
+ Consensus Logic:
330
+ - Strong stationary: All tests agree (ADF/PP reject, KPSS fails to reject)
331
+ - Likely stationary: 2/3 tests agree on stationarity
332
+ - Inconclusive: Tests evenly split (e.g., ADF/PP reject, KPSS rejects)
333
+ - Likely non-stationary: 2/3 tests agree on non-stationarity
334
+ - Strong non-stationary: All tests agree (ADF/PP fail to reject, KPSS rejects)
335
+
336
+ Args:
337
+ data: Time series data to test (1D array or Series)
338
+ alpha: Significance level for all tests (default: 0.05)
339
+ include_tests: List of tests to run. If None, runs all available tests.
340
+ Options: ["adf", "kpss", "pp"]. PP requires arch package.
341
+ **test_kwargs: Additional keyword arguments passed to individual tests.
342
+ Common options:
343
+ - regression: 'c', 'ct', 'n' (for ADF/KPSS/PP)
344
+ - maxlag: int or None (for ADF)
345
+ - autolag: 'AIC', 'BIC', 't-stat' or None (for ADF)
346
+ - nlags: int, 'auto', or 'legacy' (for KPSS)
347
+ - lags: int or None (for PP)
348
+
349
+ Returns:
350
+ StationarityAnalysisResult with all test results, consensus, and summary
351
+
352
+ Raises:
353
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
354
+ ComputationError: If all tests fail to run
355
+
356
+ Example:
357
+ >>> import numpy as np
358
+ >>> from ml4t.diagnostic.evaluation.stationarity import analyze_stationarity
359
+ >>> white_noise = np.random.randn(1000)
360
+ >>> result = analyze_stationarity(white_noise)
361
+ >>> print(f"Consensus: {result.consensus}, Agreement: {result.agreement_score:.2%}")
362
+ >>> # With custom parameters
363
+ >>> result = analyze_stationarity(white_noise, regression="ct", include_tests=["adf", "kpss"])
364
+
365
+ Notes:
366
+ - White noise: strong_stationary; Random walk: strong_nonstationary
367
+ - PP test requires arch package (auto-skipped if unavailable)
368
+ - Individual results: adf_result, kpss_result, pp_result; tabular: summary_df
369
+ """
370
+ # Validate data first
371
+ if data is None:
372
+ raise ValidationError("Data cannot be None", context={"function": "analyze_stationarity"})
373
+
374
+ # Convert to numpy array for validation
375
+ if isinstance(data, pd.Series):
376
+ arr = data.to_numpy()
377
+ elif isinstance(data, np.ndarray):
378
+ arr = data
379
+ else:
380
+ raise ValidationError(
381
+ f"Data must be pandas Series or numpy array, got {type(data)}",
382
+ context={"function": "analyze_stationarity", "data_type": type(data).__name__},
383
+ )
384
+
385
+ if arr.ndim != 1:
386
+ raise ValidationError(
387
+ f"Data must be 1-dimensional, got {arr.ndim}D",
388
+ context={"function": "analyze_stationarity", "shape": arr.shape},
389
+ )
390
+
391
+ if len(arr) == 0:
392
+ raise ValidationError(
393
+ "Data cannot be empty", context={"function": "analyze_stationarity", "length": 0}
394
+ )
395
+
396
+ # Determine which tests to run
397
+ if include_tests is None:
398
+ # Run all available tests
399
+ tests_to_run = ["adf", "kpss"]
400
+ if _check_arch_available():
401
+ tests_to_run.append("pp")
402
+ else:
403
+ logger.info(
404
+ "PP test not available (arch package not installed), running ADF and KPSS only"
405
+ )
406
+ else:
407
+ # Validate test names
408
+ valid_tests: set[str] = {"adf", "kpss", "pp"}
409
+ provided_tests: set[str] = set(include_tests)
410
+ invalid = provided_tests - valid_tests
411
+ if invalid:
412
+ raise ValidationError(
413
+ f"Invalid test names: {invalid}. Valid options: {valid_tests}",
414
+ context={"function": "analyze_stationarity", "include_tests": include_tests},
415
+ )
416
+
417
+ tests_to_run = list(include_tests)
418
+
419
+ # Warn if PP requested but not available
420
+ if "pp" in tests_to_run and not _check_arch_available():
421
+ logger.warning(
422
+ "PP test requested but arch package not installed - skipping PP test. "
423
+ "Install with: pip install arch or pip install ml4t-diagnostic[advanced]"
424
+ )
425
+ tests_to_run = [t for t in tests_to_run if t != "pp"]
426
+
427
+ if len(tests_to_run) == 0:
428
+ raise ValidationError(
429
+ "No valid tests to run",
430
+ context={"function": "analyze_stationarity", "include_tests": include_tests},
431
+ )
432
+
433
+ logger.info(
434
+ "Running comprehensive stationarity analysis",
435
+ n_obs=len(arr),
436
+ tests=tests_to_run,
437
+ alpha=alpha,
438
+ )
439
+
440
+ # Run tests and collect results
441
+ adf_result = None
442
+ kpss_result = None
443
+ pp_result = None
444
+ failed_tests = []
445
+
446
+ # Define test configurations: (test_name, test_func, param_keys)
447
+ test_configs = {
448
+ "adf": (adf_test, ["maxlag", "regression", "autolag"]),
449
+ "kpss": (kpss_test, ["regression", "nlags"]),
450
+ "pp": (pp_test, ["lags", "regression", "test_type"]),
451
+ }
452
+
453
+ for test_name in tests_to_run:
454
+ test_func, param_keys = test_configs[test_name]
455
+ params = {k: test_kwargs[k] for k in param_keys if k in test_kwargs}
456
+
457
+ # KPSS only supports 'c' and 'ct' regression
458
+ if (
459
+ test_name == "kpss"
460
+ and "regression" in params
461
+ and params["regression"] not in ("c", "ct")
462
+ ):
463
+ logger.warning(f"KPSS does not support regression='{params['regression']}', using 'c'")
464
+ params.pop("regression")
465
+
466
+ try:
467
+ result = test_func(data, **params)
468
+ logger.info(f"{test_name.upper()} test completed", stationary=result.is_stationary)
469
+ if test_name == "adf":
470
+ adf_result = result
471
+ elif test_name == "kpss":
472
+ kpss_result = result
473
+ else:
474
+ pp_result = result
475
+ except Exception as e:
476
+ logger.error(f"{test_name.upper()} test failed", error=str(e))
477
+ failed_tests.append((test_name.upper(), str(e)))
478
+
479
+ # Check if at least one test succeeded
480
+ n_succeeded = sum([adf_result is not None, kpss_result is not None, pp_result is not None])
481
+
482
+ if n_succeeded == 0:
483
+ # All tests failed
484
+ error_msg = "All stationarity tests failed:\n"
485
+ for test_name, error in failed_tests:
486
+ error_msg += f" - {test_name}: {error}\n"
487
+ raise ComputationError(
488
+ error_msg.strip(),
489
+ context={
490
+ "function": "analyze_stationarity",
491
+ "n_obs": len(arr),
492
+ "tests_attempted": tests_to_run,
493
+ },
494
+ )
495
+
496
+ # Log warnings for failed tests
497
+ if failed_tests:
498
+ logger.warning(
499
+ f"{len(failed_tests)} test(s) failed but {n_succeeded} succeeded",
500
+ failed_tests=[t[0] for t in failed_tests],
501
+ )
502
+
503
+ # Create analysis result
504
+ result = StationarityAnalysisResult(
505
+ adf_result=adf_result,
506
+ kpss_result=kpss_result,
507
+ pp_result=pp_result,
508
+ alpha=alpha,
509
+ )
510
+
511
+ logger.info(
512
+ "Stationarity analysis completed",
513
+ n_tests_run=result.n_tests_run,
514
+ consensus=result.consensus,
515
+ agreement=result.agreement_score,
516
+ )
517
+
518
+ return result