ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,365 @@
1
+ """Phillips-Perron (PP) unit root test for stationarity.
2
+
3
+ The PP test is a non-parametric alternative to the ADF test that corrects
4
+ for serial correlation and heteroscedasticity using Newey-West estimator.
5
+
6
+ Like ADF, PP tests the null hypothesis that a unit root exists (non-stationary).
7
+ Rejecting H0 means the series is stationary.
8
+
9
+ Key Differences from ADF:
10
+ - PP uses non-parametric Newey-West correction for serial correlation
11
+ - PP estimates regression with only 1 lag vs ADF's multiple lags
12
+ - PP more robust to general forms of heteroscedasticity
13
+ - Both tests have same null hypothesis: unit root exists
14
+
15
+ References:
16
+ - Phillips, P. C., & Perron, P. (1988). Testing for a unit root in time
17
+ series regression. Biometrika, 75(2), 335-346.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from typing import Literal
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+
27
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
28
+ from ml4t.diagnostic.logging import get_logger
29
+
30
+ logger = get_logger(__name__)
31
+
32
+ # Lazy check for optional arch package (PP test)
33
+ # Import is deferred to pp_test() to avoid slow module-level import (~200ms)
34
+ HAS_ARCH: bool | None = None # Will be set on first pp_test() call
35
+
36
+
37
+ def _check_arch_available() -> bool:
38
+ """Check if arch package is available (lazy check)."""
39
+ global HAS_ARCH
40
+ if HAS_ARCH is None:
41
+ try:
42
+ from arch.unitroot import PhillipsPerron # noqa: F401
43
+
44
+ HAS_ARCH = True
45
+ except ImportError:
46
+ HAS_ARCH = False
47
+ logger.debug(
48
+ "arch package not available - pp_test() will not work. "
49
+ "Install with: pip install arch or pip install ml4t-diagnostic[advanced]"
50
+ )
51
+ return HAS_ARCH
52
+
53
+
54
+ class PPResult:
55
+ """Results from Phillips-Perron (PP) unit root test.
56
+
57
+ The PP test is a non-parametric alternative to the ADF test that corrects
58
+ for serial correlation and heteroscedasticity using Newey-West estimator.
59
+
60
+ Like ADF, PP tests the null hypothesis that a unit root exists (non-stationary).
61
+ Rejecting H0 means the series is stationary.
62
+
63
+ Attributes:
64
+ test_statistic: PP test statistic
65
+ p_value: MacKinnon p-value for null hypothesis (unit root exists)
66
+ critical_values: Critical values at 1%, 5%, 10% significance levels
67
+ lags_used: Number of lags used in Newey-West estimator
68
+ n_obs: Number of observations used in test
69
+ is_stationary: Whether series is stationary (rejects unit root at 5%)
70
+ regression: Type of regression ('c', 'ct', 'n')
71
+ test_type: Type of test ('tau' or 'rho')
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ test_statistic: float,
77
+ p_value: float,
78
+ critical_values: dict[str, float],
79
+ lags_used: int,
80
+ n_obs: int,
81
+ regression: str,
82
+ test_type: str,
83
+ ):
84
+ """Initialize PP result.
85
+
86
+ Args:
87
+ test_statistic: PP test statistic
88
+ p_value: P-value for unit root hypothesis
89
+ critical_values: Critical values dict with keys '1%', '5%', '10%'
90
+ lags_used: Number of lags used in Newey-West estimator
91
+ n_obs: Number of observations
92
+ regression: Regression type
93
+ test_type: Test type ('tau' or 'rho')
94
+ """
95
+ self.test_statistic = test_statistic
96
+ self.p_value = p_value
97
+ self.critical_values = critical_values
98
+ self.lags_used = lags_used
99
+ self.n_obs = n_obs
100
+ self.regression = regression
101
+ self.test_type = test_type
102
+
103
+ # Same interpretation as ADF: reject H0 => stationary
104
+ self.is_stationary = p_value < 0.05
105
+
106
+ def __repr__(self) -> str:
107
+ """String representation."""
108
+ return (
109
+ f"PPResult(statistic={self.test_statistic:.4f}, "
110
+ f"p_value={self.p_value:.4f}, "
111
+ f"stationary={self.is_stationary})"
112
+ )
113
+
114
+ def summary(self) -> str:
115
+ """Human-readable summary of PP test results.
116
+
117
+ Returns:
118
+ Formatted summary string
119
+ """
120
+ lines = [
121
+ "Phillips-Perron Unit Root Test Results",
122
+ "=" * 50,
123
+ f"Test Statistic: {self.test_statistic:.4f}",
124
+ f"P-value: {self.p_value:.4f}",
125
+ f"Lags Used: {self.lags_used}",
126
+ f"Observations: {self.n_obs}",
127
+ f"Regression Type: {self.regression}",
128
+ f"Test Type: {self.test_type}",
129
+ ]
130
+
131
+ lines.append("")
132
+ lines.append("Critical Values:")
133
+ for level, value in sorted(self.critical_values.items()):
134
+ lines.append(f" {level:>4s}: {value:>8.4f}")
135
+
136
+ lines.append("")
137
+ lines.append(f"Conclusion: {'Stationary' if self.is_stationary else 'Non-stationary'}")
138
+ lines.append(
139
+ f" (Reject H0 at 5% level: {self.is_stationary})"
140
+ if self.is_stationary
141
+ else " (Fail to reject H0 at 5% level)"
142
+ )
143
+ lines.append("")
144
+ lines.append("IMPORTANT: PP tests H0 = unit root (same as ADF)")
145
+ lines.append(" - Low p-value (<0.05) => stationary")
146
+ lines.append(" - High p-value (>0.05) => non-stationary")
147
+ lines.append(" - PP more robust to heteroscedasticity than ADF")
148
+
149
+ return "\n".join(lines)
150
+
151
+
152
+ def pp_test(
153
+ data: pd.Series | np.ndarray,
154
+ lags: int | None = None,
155
+ regression: Literal["c", "ct", "n"] = "c",
156
+ test_type: Literal["tau", "rho"] = "tau",
157
+ ) -> PPResult:
158
+ """Perform Phillips-Perron test for unit root.
159
+
160
+ The Phillips-Perron (PP) test is a non-parametric alternative to the
161
+ Augmented Dickey-Fuller test. Like ADF, it tests the null hypothesis
162
+ that a unit root is present in the time series. If the null is rejected
163
+ (p < alpha), the series is considered stationary.
164
+
165
+ Key Differences from ADF:
166
+ - PP uses non-parametric Newey-West correction for serial correlation
167
+ - PP estimates regression with only 1 lag (vs ADF's multiple lags)
168
+ - PP more robust to general forms of heteroscedasticity
169
+ - Both tests have same null hypothesis: unit root exists
170
+
171
+ Regression types:
172
+ - 'c': Constant only (default) - appropriate for returns
173
+ - 'ct': Constant and trend - appropriate for prices
174
+ - 'n': No constant, no trend - rarely used
175
+
176
+ Test types:
177
+ - 'tau': Based on t-statistic (default, recommended)
178
+ - 'rho': Based on bias of regression coefficient
179
+
180
+ Args:
181
+ data: Time series data to test (1D array or Series)
182
+ lags: Number of lags for Newey-West estimator. If None, uses
183
+ automatic selection: 12*(nobs/100)^{1/4}
184
+ regression: Type of regression to include in test
185
+ test_type: Type of PP test statistic to compute
186
+
187
+ Returns:
188
+ PPResult with test statistics and conclusion
189
+
190
+ Raises:
191
+ ImportError: If arch package is not installed
192
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
193
+ ComputationError: If test computation fails
194
+
195
+ Example:
196
+ >>> import numpy as np
197
+ >>> # Test random walk (non-stationary)
198
+ >>> rw = np.cumsum(np.random.randn(1000))
199
+ >>> result = pp_test(rw)
200
+ >>> print(result.summary())
201
+ >>>
202
+ >>> # Test with trend regression
203
+ >>> result = pp_test(rw, regression='ct')
204
+ >>> print(f"Stationary: {result.is_stationary}")
205
+ >>>
206
+ >>> # Compare PP with ADF on heteroscedastic data
207
+ >>> # PP should be more reliable
208
+ >>> from ml4t.diagnostic.evaluation.stationarity import adf_test
209
+ >>> het_data = np.random.randn(1000) * (1 + 0.5 * np.random.randn(1000)**2)
210
+ >>> adf_result = adf_test(het_data)
211
+ >>> pp_result = pp_test(het_data)
212
+ >>> print(f"ADF stationary: {adf_result.is_stationary}")
213
+ >>> print(f"PP stationary: {pp_result.is_stationary}")
214
+
215
+ Notes:
216
+ - Requires arch package: pip install arch or pip install ml4t-diagnostic[advanced]
217
+ - For financial returns, 'c' (constant only) is typically appropriate
218
+ - For price series, 'ct' (constant + trend) may be better
219
+ - PP is more robust than ADF for heteroscedastic time series
220
+ - Use both PP and ADF for robust stationarity assessment
221
+ """
222
+ # Check if arch package is available (lazy check)
223
+ if not _check_arch_available():
224
+ raise ImportError(
225
+ "Phillips-Perron test requires the arch package. "
226
+ "Install with: pip install arch or pip install ml4t-diagnostic[advanced]"
227
+ )
228
+
229
+ # Input validation
230
+ if data is None:
231
+ raise ValidationError("Data cannot be None", context={"function": "pp_test"})
232
+
233
+ # Convert to numpy array
234
+ if isinstance(data, pd.Series):
235
+ arr = data.to_numpy()
236
+ logger.debug("Converted pandas Series to numpy array", shape=arr.shape)
237
+ elif isinstance(data, np.ndarray):
238
+ arr = data
239
+ else:
240
+ raise ValidationError(
241
+ f"Data must be pandas Series or numpy array, got {type(data)}",
242
+ context={"function": "pp_test", "data_type": type(data).__name__},
243
+ )
244
+
245
+ # Check array properties
246
+ if arr.ndim != 1:
247
+ raise ValidationError(
248
+ f"Data must be 1-dimensional, got {arr.ndim}D",
249
+ context={"function": "pp_test", "shape": arr.shape},
250
+ )
251
+
252
+ if len(arr) == 0:
253
+ raise ValidationError("Data cannot be empty", context={"function": "pp_test", "length": 0})
254
+
255
+ # Check for missing values
256
+ if np.any(np.isnan(arr)):
257
+ n_missing = np.sum(np.isnan(arr))
258
+ raise ValidationError(
259
+ f"Data contains {n_missing} missing values (NaN)",
260
+ context={"function": "pp_test", "n_missing": n_missing, "length": len(arr)},
261
+ )
262
+
263
+ # Check for infinite values
264
+ if np.any(np.isinf(arr)):
265
+ n_inf = np.sum(np.isinf(arr))
266
+ raise ValidationError(
267
+ f"Data contains {n_inf} infinite values",
268
+ context={"function": "pp_test", "n_inf": n_inf, "length": len(arr)},
269
+ )
270
+
271
+ # Check minimum length
272
+ min_length = 10
273
+ if len(arr) < min_length:
274
+ raise ValidationError(
275
+ f"Insufficient data for PP test (need at least {min_length} observations)",
276
+ context={
277
+ "function": "pp_test",
278
+ "length": len(arr),
279
+ "min_length": min_length,
280
+ },
281
+ )
282
+
283
+ # Check for constant series
284
+ if np.std(arr) == 0:
285
+ raise ValidationError(
286
+ "Data is constant (zero variance)",
287
+ context={
288
+ "function": "pp_test",
289
+ "length": len(arr),
290
+ "mean": float(np.mean(arr)),
291
+ },
292
+ )
293
+
294
+ # Validate regression type
295
+ valid_regressions = {"c", "ct", "n"}
296
+ if regression not in valid_regressions:
297
+ raise ValidationError(
298
+ f"Invalid regression type: {regression}. Must be one of {valid_regressions}",
299
+ context={"function": "pp_test", "regression": regression},
300
+ )
301
+
302
+ # Log test parameters
303
+ logger.info(
304
+ "Running PP test",
305
+ n_obs=len(arr),
306
+ lags=lags,
307
+ regression=regression,
308
+ test_type=test_type,
309
+ )
310
+
311
+ # Run PP test using arch package
312
+ try:
313
+ # Import here to avoid slow module-level import
314
+ from arch.unitroot import PhillipsPerron
315
+
316
+ # Create PP test object
317
+ pp = PhillipsPerron(arr, lags=lags, trend=regression, test_type=test_type)
318
+
319
+ # Extract results
320
+ pp_stat = pp.stat
321
+ pvalue = pp.pvalue
322
+ usedlag = pp.lags
323
+ nobs = pp.nobs
324
+ critical_vals = pp.critical_values
325
+
326
+ logger.info(
327
+ "PP test completed",
328
+ statistic=pp_stat,
329
+ p_value=pvalue,
330
+ lags_used=usedlag,
331
+ n_obs=nobs,
332
+ stationary=pvalue < 0.05,
333
+ )
334
+
335
+ # Create result object
336
+ return PPResult(
337
+ test_statistic=float(pp_stat),
338
+ p_value=float(pvalue),
339
+ critical_values=dict(critical_vals),
340
+ lags_used=int(usedlag),
341
+ n_obs=int(nobs),
342
+ regression=regression,
343
+ test_type=test_type,
344
+ )
345
+
346
+ except ImportError as e:
347
+ # Re-raise ImportError with helpful message
348
+ logger.error("PP test failed - arch package not available")
349
+ raise ImportError(
350
+ "Phillips-Perron test requires the arch package. "
351
+ "Install with: pip install arch or pip install ml4t-diagnostic[advanced]"
352
+ ) from e
353
+ except Exception as e:
354
+ logger.error("PP test failed", error=str(e), n_obs=len(arr))
355
+ raise ComputationError( # noqa: B904
356
+ f"PP test computation failed: {e}",
357
+ context={
358
+ "function": "pp_test",
359
+ "n_obs": len(arr),
360
+ "lags": lags,
361
+ "regression": regression,
362
+ "test_type": test_type,
363
+ },
364
+ cause=e,
365
+ )
@@ -0,0 +1,43 @@
1
+ # stats/ - Statistical Tests
2
+
3
+ Multiple testing corrections and robust inference.
4
+
5
+ ## Modules
6
+
7
+ | File | Lines | Purpose |
8
+ |------|-------|---------|
9
+ | dsr.py | 590 | Deflated Sharpe Ratio - orchestration layer |
10
+ | moments.py | 164 | Return statistics (Sharpe, skewness, kurtosis, autocorrelation) |
11
+ | sharpe_inference.py | 220 | Variance estimation, expected max Sharpe, rescaling |
12
+ | min_trl.py | 407 | Minimum Track Record Length calculation |
13
+ | pbo.py | 219 | Probability of Backtest Overfitting |
14
+ | ras.py | 436 | Rademacher Anti-Serum |
15
+ | fdr.py | 295 | FDR/FWER corrections |
16
+ | hac.py | 108 | HAC standard errors |
17
+ | bootstrap.py | 228 | Stationary bootstrap |
18
+ | reality_check.py | 155 | White's Reality Check |
19
+
20
+ ## Key Functions
21
+
22
+ - `deflated_sharpe_ratio()` - DSR from return series
23
+ - `deflated_sharpe_ratio_from_statistics()` - DSR from pre-computed stats
24
+ - `compute_min_trl()` - Minimum Track Record Length
25
+ - `min_trl_fwer()` - MinTRL with FWER correction
26
+ - `compute_pbo()` - Probability of Backtest Overfitting
27
+ - `ras_sharpe_adjustment()`, `ras_ic_adjustment()` - RAS adjustments
28
+ - `benjamini_hochberg_fdr()`, `holm_bonferroni_fwer()` - Multiple testing
29
+ - `robust_ic()` - HAC-adjusted IC with bootstrap
30
+
31
+ ## Result Dataclasses
32
+
33
+ - `DSRResult` - Full DSR analysis results
34
+ - `MinTRLResult` - MinTRL calculation results
35
+ - `PBOResult` - PBO analysis results
36
+
37
+ ## API Convention: Kurtosis
38
+
39
+ All **public functions** use **Fisher/excess kurtosis** (normal=0):
40
+ - Parameter: `excess_kurtosis`
41
+ - Matches `scipy.stats.kurtosis()` and `pandas.DataFrame.kurtosis()` defaults
42
+
43
+ Internal functions use Pearson kurtosis (normal=3) for mathematical formulas.
@@ -0,0 +1,191 @@
1
+ """Statistical tests for financial ML evaluation.
2
+
3
+ This package implements advanced statistical tests used in ml4t-diagnostic's
4
+ Three-Tier Framework:
5
+
6
+ **Multiple Testing Corrections**:
7
+ - Deflated Sharpe Ratio (DSR) for selection bias correction
8
+ - Rademacher Anti-Serum (RAS) for correlation-aware multiple testing
9
+ - False Discovery Rate (FDR) and Family-Wise Error Rate (FWER) corrections
10
+
11
+ **Time Series Inference**:
12
+ - HAC-adjusted Information Coefficient for autocorrelated data
13
+ - Stationary bootstrap for temporal dependence preservation
14
+
15
+ **Strategy Comparison**:
16
+ - White's Reality Check for multiple strategy comparison
17
+ - Probability of Backtest Overfitting (PBO)
18
+
19
+ All tests are implemented with:
20
+ - Mathematical correctness validated against academic references
21
+ - Proper handling of autocorrelation and heteroskedasticity
22
+ - Numerical stability for edge cases
23
+ - Support for both single and multiple hypothesis testing
24
+
25
+ Module Decomposition (v1.4+)
26
+ ----------------------------
27
+ The stats package is organized into focused modules:
28
+
29
+ **Sharpe Ratio Analysis**:
30
+ - moments.py: Return statistics (Sharpe, skewness, kurtosis, autocorr)
31
+ - sharpe_inference.py: Variance estimation, expected max calculation
32
+ - minimum_track_record.py: Minimum Track Record Length
33
+ - backtest_overfitting.py: Probability of Backtest Overfitting
34
+ - deflated_sharpe_ratio.py: DSR/PSR orchestration layer (main entry points)
35
+
36
+ **Other Statistical Tests**:
37
+ - rademacher_adjustment.py: Rademacher complexity and RAS adjustments
38
+ - bootstrap.py: Stationary bootstrap methods
39
+ - hac_standard_errors.py: HAC-adjusted IC estimation
40
+ - false_discovery_rate.py: FDR and FWER corrections
41
+ - reality_check.py: White's Reality Check
42
+
43
+ All original imports are preserved for backward compatibility.
44
+ """
45
+
46
+ # =============================================================================
47
+ # MOMENTS AND RETURN STATISTICS
48
+ # =============================================================================
49
+ # =============================================================================
50
+ # BOOTSTRAP METHODS
51
+ # =============================================================================
52
+ # =============================================================================
53
+ # PROBABILITY OF BACKTEST OVERFITTING
54
+ # =============================================================================
55
+ from ml4t.diagnostic.evaluation.stats.backtest_overfitting import (
56
+ PBOResult,
57
+ compute_pbo,
58
+ )
59
+ from ml4t.diagnostic.evaluation.stats.bootstrap import (
60
+ _optimal_block_size,
61
+ _stationary_bootstrap_indices,
62
+ stationary_bootstrap_ic,
63
+ )
64
+
65
+ # =============================================================================
66
+ # DSR/PSR (MAIN ENTRY POINTS)
67
+ # =============================================================================
68
+ from ml4t.diagnostic.evaluation.stats.deflated_sharpe_ratio import (
69
+ DSRResult,
70
+ Frequency,
71
+ deflated_sharpe_ratio,
72
+ deflated_sharpe_ratio_from_statistics,
73
+ )
74
+
75
+ # =============================================================================
76
+ # FDR CORRECTIONS
77
+ # =============================================================================
78
+ from ml4t.diagnostic.evaluation.stats.false_discovery_rate import (
79
+ benjamini_hochberg_fdr,
80
+ holm_bonferroni,
81
+ multiple_testing_summary,
82
+ )
83
+
84
+ # =============================================================================
85
+ # ROBUST IC ESTIMATION
86
+ # =============================================================================
87
+ from ml4t.diagnostic.evaluation.stats.hac_standard_errors import (
88
+ hac_adjusted_ic,
89
+ robust_ic,
90
+ )
91
+
92
+ # =============================================================================
93
+ # MINIMUM TRACK RECORD LENGTH
94
+ # =============================================================================
95
+ from ml4t.diagnostic.evaluation.stats.minimum_track_record import (
96
+ DEFAULT_PERIODS_PER_YEAR,
97
+ MinTRLResult,
98
+ compute_min_trl,
99
+ min_trl_fwer,
100
+ )
101
+ from ml4t.diagnostic.evaluation.stats.moments import (
102
+ compute_autocorrelation,
103
+ compute_kurtosis,
104
+ compute_return_statistics,
105
+ compute_sharpe,
106
+ compute_skewness,
107
+ )
108
+
109
+ # =============================================================================
110
+ # RADEMACHER ANTI-SERUM
111
+ # =============================================================================
112
+ from ml4t.diagnostic.evaluation.stats.rademacher_adjustment import (
113
+ RASResult,
114
+ rademacher_complexity,
115
+ ras_ic_adjustment,
116
+ ras_sharpe_adjustment,
117
+ )
118
+
119
+ # =============================================================================
120
+ # WHITE'S REALITY CHECK
121
+ # =============================================================================
122
+ from ml4t.diagnostic.evaluation.stats.reality_check import (
123
+ whites_reality_check,
124
+ )
125
+
126
+ # =============================================================================
127
+ # SHARPE RATIO INFERENCE
128
+ # =============================================================================
129
+ from ml4t.diagnostic.evaluation.stats.sharpe_inference import (
130
+ EULER_GAMMA,
131
+ VARIANCE_RESCALING_FACTORS,
132
+ compute_expected_max_sharpe,
133
+ compute_sharpe_variance,
134
+ get_variance_rescaling_factor,
135
+ )
136
+
137
+ # =============================================================================
138
+ # BACKWARD COMPATIBILITY ALIASES
139
+ # =============================================================================
140
+ # Old private names for variance rescaling
141
+ _VARIANCE_RESCALING_FACTORS = VARIANCE_RESCALING_FACTORS
142
+ _get_variance_rescaling_factor = get_variance_rescaling_factor
143
+
144
+ __all__ = [
145
+ # Moments and return statistics
146
+ "compute_return_statistics",
147
+ "compute_sharpe",
148
+ "compute_skewness",
149
+ "compute_kurtosis",
150
+ "compute_autocorrelation",
151
+ # Sharpe inference
152
+ "compute_sharpe_variance",
153
+ "compute_expected_max_sharpe",
154
+ "get_variance_rescaling_factor",
155
+ "EULER_GAMMA",
156
+ "VARIANCE_RESCALING_FACTORS",
157
+ # MinTRL
158
+ "MinTRLResult",
159
+ "compute_min_trl",
160
+ "min_trl_fwer",
161
+ "DEFAULT_PERIODS_PER_YEAR",
162
+ # PBO
163
+ "PBOResult",
164
+ "compute_pbo",
165
+ # DSR/PSR
166
+ "DSRResult",
167
+ "Frequency",
168
+ "deflated_sharpe_ratio",
169
+ "deflated_sharpe_ratio_from_statistics",
170
+ # RAS
171
+ "RASResult",
172
+ "rademacher_complexity",
173
+ "ras_ic_adjustment",
174
+ "ras_sharpe_adjustment",
175
+ # Bootstrap
176
+ "stationary_bootstrap_ic",
177
+ "_stationary_bootstrap_indices",
178
+ "_optimal_block_size",
179
+ # Robust IC (bootstrap-based)
180
+ "robust_ic",
181
+ "hac_adjusted_ic",
182
+ # FDR
183
+ "benjamini_hochberg_fdr",
184
+ "holm_bonferroni",
185
+ "multiple_testing_summary",
186
+ # Reality Check
187
+ "whites_reality_check",
188
+ # Backward compat aliases
189
+ "_get_variance_rescaling_factor",
190
+ "_VARIANCE_RESCALING_FACTORS",
191
+ ]