ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,436 @@
1
+ """Rademacher Anti-Serum (RAS) for multiple testing correction.
2
+
3
+ Implements Rademacher complexity-based corrections that account for strategy
4
+ correlation, unlike traditional methods (DSR, Bonferroni) which assume independence.
5
+
6
+ **Key Advantage**: Zero false positive rate when strategies are correlated.
7
+ Identical strategies contribute zero additional complexity.
8
+
9
+ References
10
+ ----------
11
+ .. [1] Paleologo, G. (2024). "The Elements of Quantitative Investing",
12
+ Wiley Finance, Chapter 4.3 / Section 8.3.
13
+ .. [2] Bartlett, P.L. & Mendelson, S. (2002). "Rademacher and Gaussian
14
+ Complexities: Risk Bounds and Structural Results", JMLR 3:463-482.
15
+ .. [3] Massart, P. (2000). "Some applications of concentration inequalities
16
+ to statistics", Annales de la Faculté des Sciences de Toulouse.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import warnings
22
+ from dataclasses import dataclass
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ import numpy as np
26
+
27
+ if TYPE_CHECKING:
28
+ from numpy.typing import NDArray
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class RASResult:
33
+ """Result of Rademacher Anti-Serum adjustment.
34
+
35
+ Attributes
36
+ ----------
37
+ adjusted_values : NDArray
38
+ Conservative lower bounds on true performance metrics.
39
+ observed_values : NDArray
40
+ Original observed values before adjustment.
41
+ complexity : float
42
+ Rademacher complexity R̂ used in adjustment.
43
+ data_snooping_penalty : float
44
+ Penalty from data snooping (2R̂).
45
+ estimation_error : float
46
+ Penalty from estimation uncertainty.
47
+ n_significant : int
48
+ Number of strategies with adjusted values > 0.
49
+ significant_mask : NDArray[np.bool_]
50
+ Boolean mask of significant strategies.
51
+ massart_bound : float
52
+ Theoretical upper bound √(2 log N / T).
53
+ complexity_ratio : float
54
+ R̂ / massart_bound (lower = more correlated strategies).
55
+ """
56
+
57
+ adjusted_values: NDArray[Any]
58
+ observed_values: NDArray[Any]
59
+ complexity: float
60
+ data_snooping_penalty: float
61
+ estimation_error: float
62
+ n_significant: int
63
+ significant_mask: NDArray[np.bool_]
64
+ massart_bound: float
65
+ complexity_ratio: float
66
+
67
+
68
+ def rademacher_complexity(
69
+ X: NDArray[Any],
70
+ n_simulations: int = 10000,
71
+ random_state: int | None = None,
72
+ ) -> float:
73
+ """Compute empirical Rademacher complexity via Monte Carlo estimation.
74
+
75
+ Measures a strategy set's capacity to fit random noise, quantifying
76
+ overfitting risk when selecting among multiple candidates.
77
+
78
+ **Definition** (Bartlett & Mendelson, 2002):
79
+
80
+ R̂_T(F) = E_σ[sup_{n} (1/T) Σᵢ σᵢ xᵢₙ]
81
+
82
+ where σᵢ ∈ {-1, +1} with P(σᵢ = 1) = 0.5 (Rademacher distribution).
83
+
84
+ **Interpretation**:
85
+ - R̂ ≈ 0: Strategies highly correlated (low overfitting risk)
86
+ - R̂ → √(2 log N / T): Strategies uncorrelated (Massart upper bound)
87
+
88
+ Parameters
89
+ ----------
90
+ X : ndarray of shape (T, N)
91
+ Performance matrix: T time periods × N strategies.
92
+ Typically contains period-by-period ICs or returns.
93
+ n_simulations : int, default=10000
94
+ Monte Carlo samples. Higher = more accurate but slower.
95
+ 10000 provides ~1% relative error.
96
+ random_state : int, optional
97
+ Random seed for reproducibility.
98
+
99
+ Returns
100
+ -------
101
+ float
102
+ Empirical Rademacher complexity R̂ ∈ [0, √(2 log N / T)].
103
+
104
+ Notes
105
+ -----
106
+ **Massart's Upper Bound** [3]:
107
+ R̂ ≤ max_n ||xₙ||₂ × √(2 log N) / T
108
+
109
+ For normalized data (||xₙ||₂ ≈ √T), this simplifies to √(2 log N / T).
110
+
111
+ **Computational Complexity**: O(n_simulations × T × N)
112
+
113
+ Examples
114
+ --------
115
+ >>> import numpy as np
116
+ >>> X = np.random.randn(2500, 1000) * 0.02 # 1000 strategies, 2500 days
117
+ >>> R_hat = rademacher_complexity(X, random_state=42)
118
+ >>> massart = np.sqrt(2 * np.log(1000) / 2500)
119
+ >>> print(f"R̂={R_hat:.4f}, Massart={massart:.4f}, ratio={R_hat/massart:.2f}")
120
+
121
+ References
122
+ ----------
123
+ .. [2] Bartlett & Mendelson (2002), JMLR 3:463-482, Definition 2.
124
+ .. [3] Massart (2000), Lemma 1.
125
+ """
126
+ if not isinstance(X, np.ndarray):
127
+ raise TypeError(f"X must be numpy array, got {type(X)}")
128
+
129
+ if X.ndim != 2:
130
+ raise ValueError(f"X must be 2D array (T, N), got shape {X.shape}")
131
+
132
+ T, N = X.shape
133
+
134
+ if T < 1 or N < 1:
135
+ raise ValueError(f"X must have positive dimensions, got ({T}, {N})")
136
+
137
+ rng = np.random.default_rng(random_state)
138
+
139
+ # Monte Carlo estimation: E_σ[max_n (σ^T x_n / T)]
140
+ max_correlations = np.zeros(n_simulations)
141
+
142
+ for i in range(n_simulations):
143
+ # Rademacher vector: σᵢ ∈ {-1, +1} with P=0.5
144
+ sigma = rng.choice([-1.0, 1.0], size=T)
145
+
146
+ # Compute (σ^T x_n) / T for all strategies n
147
+ correlations = sigma @ X / T
148
+
149
+ # Take supremum over strategy set
150
+ max_correlations[i] = np.max(correlations)
151
+
152
+ return float(np.mean(max_correlations))
153
+
154
+
155
+ def ras_ic_adjustment(
156
+ observed_ic: NDArray[Any],
157
+ complexity: float,
158
+ n_samples: int,
159
+ delta: float = 0.05,
160
+ kappa: float = 0.02,
161
+ return_result: bool = False,
162
+ ) -> NDArray[Any] | RASResult:
163
+ """Apply RAS adjustment for Information Coefficients (bounded metrics).
164
+
165
+ Computes conservative lower bounds on true IC values accounting for
166
+ data snooping and estimation error.
167
+
168
+ **Formula** (Hoeffding concentration for |IC| ≤ κ):
169
+
170
+ θₙ ≥ θ̂ₙ - 2R̂ - 2κ√(log(2/δ)/T)
171
+ ─── ─────────────────
172
+ (a) (b)
173
+
174
+ where:
175
+ (a) = data snooping penalty from testing N strategies
176
+ (b) = estimation error for bounded r.v. (Hoeffding's inequality)
177
+
178
+ Parameters
179
+ ----------
180
+ observed_ic : ndarray of shape (N,)
181
+ Observed Information Coefficients for N strategies.
182
+ complexity : float
183
+ Rademacher complexity R̂ from `rademacher_complexity()`.
184
+ n_samples : int
185
+ Number of time periods T used to compute ICs.
186
+ delta : float, default=0.05
187
+ Significance level (1 - confidence). Lower = more conservative.
188
+ kappa : float, default=0.02
189
+ Bound on |IC|. **Critical parameter**.
190
+
191
+ Practical guidance (Paleologo 2024, p.273):
192
+ - κ=0.02: Typical alpha signals
193
+ - κ=0.05: High-conviction signals
194
+ - κ=1.0: Theoretical maximum (usually too conservative)
195
+ return_result : bool, default=False
196
+ If True, return RASResult dataclass with full diagnostics.
197
+
198
+ Returns
199
+ -------
200
+ ndarray or RASResult
201
+ If return_result=False: Adjusted IC lower bounds (N,).
202
+ If return_result=True: RASResult with full diagnostics.
203
+
204
+ Raises
205
+ ------
206
+ ValueError
207
+ If inputs are invalid or observed ICs exceed kappa bound.
208
+
209
+ Warns
210
+ -----
211
+ UserWarning
212
+ If any |observed_ic| > κ (theoretical guarantee violated).
213
+
214
+ Notes
215
+ -----
216
+ **Derivation**:
217
+ 1. Data snooping: Standard Rademacher generalization bound gives 2R̂.
218
+ 2. Estimation: For bounded r.v. |X| ≤ κ, Hoeffding gives
219
+ P(|X̂ - X| > t) ≤ 2exp(-Tt²/2κ²). Setting RHS = δ yields
220
+ t = κ√(2 log(2/δ)/T). Conservative factor 2 for two-sided.
221
+
222
+ **Advantages over DSR**:
223
+ - Accounts for strategy correlation (R̂ ↓ as correlation ↑)
224
+ - Non-asymptotic (valid for any T)
225
+ - Zero false positives in Paleologo's simulations
226
+
227
+ Examples
228
+ --------
229
+ >>> import numpy as np
230
+ >>> X = np.random.randn(2500, 500) * 0.02
231
+ >>> observed_ic = X.mean(axis=0)
232
+ >>> R_hat = rademacher_complexity(X)
233
+ >>> result = ras_ic_adjustment(observed_ic, R_hat, 2500, return_result=True)
234
+ >>> print(f"Significant: {result.n_significant}/{len(observed_ic)}")
235
+
236
+ References
237
+ ----------
238
+ .. [1] Paleologo (2024), Section 8.3.2, Procedure 8.1.
239
+ .. [2] Hoeffding (1963), "Probability inequalities for sums of bounded
240
+ random variables", JASA 58:13-30.
241
+ """
242
+ observed_ic = np.asarray(observed_ic)
243
+
244
+ if observed_ic.ndim != 1:
245
+ raise ValueError(f"observed_ic must be 1D, got shape {observed_ic.shape}")
246
+
247
+ if complexity < 0:
248
+ raise ValueError(f"complexity must be non-negative, got {complexity}")
249
+
250
+ if n_samples < 1:
251
+ raise ValueError(f"n_samples must be positive, got {n_samples}")
252
+
253
+ if not 0 < delta < 1:
254
+ raise ValueError(f"delta must be in (0, 1), got {delta}")
255
+
256
+ if kappa <= 0:
257
+ raise ValueError(f"kappa must be positive, got {kappa}")
258
+
259
+ # Warn if ICs exceed the bounded assumption
260
+ max_abs_ic = np.max(np.abs(observed_ic))
261
+ if max_abs_ic > kappa:
262
+ warnings.warn(
263
+ f"max(|IC|)={max_abs_ic:.4f} exceeds kappa={kappa}. "
264
+ "Theoretical guarantees may not hold. Consider increasing kappa.",
265
+ UserWarning,
266
+ stacklevel=2,
267
+ )
268
+
269
+ N = len(observed_ic)
270
+ T = n_samples
271
+
272
+ # (a) Data snooping penalty: 2R̂
273
+ data_snooping = 2 * complexity
274
+
275
+ # (b) Estimation error: 2κ√(log(2/δ)/T) from Hoeffding
276
+ estimation_error = 2 * kappa * np.sqrt(np.log(2 / delta) / T)
277
+
278
+ # Conservative lower bound
279
+ adjusted_ic = observed_ic - data_snooping - estimation_error
280
+
281
+ if not return_result:
282
+ return adjusted_ic
283
+
284
+ # Compute diagnostics
285
+ massart_bound = np.sqrt(2 * np.log(N) / T) if N > 1 else 0.0
286
+ significant_mask = adjusted_ic > 0
287
+
288
+ return RASResult(
289
+ adjusted_values=adjusted_ic,
290
+ observed_values=observed_ic,
291
+ complexity=complexity,
292
+ data_snooping_penalty=data_snooping,
293
+ estimation_error=estimation_error,
294
+ n_significant=int(np.sum(significant_mask)),
295
+ significant_mask=significant_mask,
296
+ massart_bound=massart_bound,
297
+ complexity_ratio=complexity / massart_bound if massart_bound > 0 else 0.0,
298
+ )
299
+
300
+
301
+ def ras_sharpe_adjustment(
302
+ observed_sharpe: NDArray[Any],
303
+ complexity: float,
304
+ n_samples: int,
305
+ n_strategies: int,
306
+ delta: float = 0.05,
307
+ return_result: bool = False,
308
+ ) -> NDArray[Any] | RASResult:
309
+ """Apply RAS adjustment for Sharpe ratios (sub-Gaussian metrics).
310
+
311
+ Computes conservative lower bounds on true Sharpe ratios accounting for
312
+ data snooping, estimation error, and multiple testing.
313
+
314
+ **Formula** (sub-Gaussian concentration + union bound):
315
+
316
+ θₙ ≥ θ̂ₙ - 2R̂ - 3√(2 log(2/δ)/T) - √(2 log(2N/δ)/T)
317
+ ─── ─────────────────────────────────────
318
+ (a) (b) (c)
319
+
320
+ where:
321
+ (a) = data snooping penalty
322
+ (b) = sub-Gaussian estimation error (factor 3 for conservatism)
323
+ (c) = union bound over N strategies
324
+
325
+ Parameters
326
+ ----------
327
+ observed_sharpe : ndarray of shape (N,)
328
+ Observed (annualized) Sharpe ratios for N strategies.
329
+ complexity : float
330
+ Rademacher complexity R̂ from `rademacher_complexity()`.
331
+ n_samples : int
332
+ Number of time periods T used to compute Sharpe ratios.
333
+ n_strategies : int
334
+ Total number of strategies N tested.
335
+ delta : float, default=0.05
336
+ Significance level (1 - confidence). Lower = more conservative.
337
+ return_result : bool, default=False
338
+ If True, return RASResult dataclass with full diagnostics.
339
+
340
+ Returns
341
+ -------
342
+ ndarray or RASResult
343
+ If return_result=False: Adjusted Sharpe lower bounds (N,).
344
+ If return_result=True: RASResult with full diagnostics.
345
+
346
+ Notes
347
+ -----
348
+ **Derivation**:
349
+ 1. Data snooping: 2R̂ (standard Rademacher bound)
350
+ 2. Sub-Gaussian error: For σ²-sub-Gaussian X, P(X > t) ≤ exp(-t²/2σ²).
351
+ Daily returns typically have σ ≈ 1 when standardized.
352
+ Factor 3 provides conservatism for heavier tails.
353
+ 3. Union bound: P(∃n: |X̂ₙ - Xₙ| > t) ≤ N × single-strategy bound.
354
+ Contributes √(2 log(2N/δ)/T) term.
355
+
356
+ **Comparison to DSR**:
357
+ - DSR assumes independent strategies (overpenalizes correlated ones)
358
+ - RAS captures correlation via R̂ (correlated → lower R̂ → less penalty)
359
+ - RAS is non-asymptotic; DSR requires large T
360
+
361
+ Examples
362
+ --------
363
+ >>> import numpy as np
364
+ >>> returns = np.random.randn(252, 100) * 0.01 # 100 strategies, 1 year
365
+ >>> observed_sr = returns.mean(axis=0) / returns.std(axis=0) * np.sqrt(252)
366
+ >>> R_hat = rademacher_complexity(returns)
367
+ >>> result = ras_sharpe_adjustment(
368
+ ... observed_sr, R_hat, 252, 100, return_result=True
369
+ ... )
370
+ >>> print(f"Significant: {result.n_significant}/100")
371
+
372
+ References
373
+ ----------
374
+ .. [1] Paleologo (2024), Section 8.3.2, Procedure 8.2.
375
+ """
376
+ observed_sharpe = np.asarray(observed_sharpe)
377
+
378
+ if observed_sharpe.ndim != 1:
379
+ raise ValueError(f"observed_sharpe must be 1D, got shape {observed_sharpe.shape}")
380
+
381
+ if complexity < 0:
382
+ raise ValueError(f"complexity must be non-negative, got {complexity}")
383
+
384
+ if n_samples < 1:
385
+ raise ValueError(f"n_samples must be positive, got {n_samples}")
386
+
387
+ if n_strategies < 1:
388
+ raise ValueError(f"n_strategies must be positive, got {n_strategies}")
389
+
390
+ if not 0 < delta < 1:
391
+ raise ValueError(f"delta must be in (0, 1), got {delta}")
392
+
393
+ T = n_samples
394
+ N = n_strategies
395
+
396
+ # (a) Data snooping penalty: 2R̂
397
+ data_snooping = 2 * complexity
398
+
399
+ # (b) Sub-Gaussian estimation error (independent of N)
400
+ # Factor 3 for conservatism with potential heavy tails
401
+ error_term1 = 3 * np.sqrt(2 * np.log(2 / delta) / T)
402
+
403
+ # (c) Union bound over N strategies
404
+ error_term2 = np.sqrt(2 * np.log(2 * N / delta) / T)
405
+
406
+ estimation_error = error_term1 + error_term2
407
+
408
+ # Conservative lower bound
409
+ adjusted_sharpe = observed_sharpe - data_snooping - estimation_error
410
+
411
+ if not return_result:
412
+ return adjusted_sharpe
413
+
414
+ # Compute diagnostics
415
+ massart_bound = np.sqrt(2 * np.log(N) / T) if N > 1 else 0.0
416
+ significant_mask = adjusted_sharpe > 0
417
+
418
+ return RASResult(
419
+ adjusted_values=adjusted_sharpe,
420
+ observed_values=observed_sharpe,
421
+ complexity=complexity,
422
+ data_snooping_penalty=data_snooping,
423
+ estimation_error=estimation_error,
424
+ n_significant=int(np.sum(significant_mask)),
425
+ significant_mask=significant_mask,
426
+ massart_bound=massart_bound,
427
+ complexity_ratio=complexity / massart_bound if massart_bound > 0 else 0.0,
428
+ )
429
+
430
+
431
+ __all__ = [
432
+ "RASResult",
433
+ "rademacher_complexity",
434
+ "ras_ic_adjustment",
435
+ "ras_sharpe_adjustment",
436
+ ]
@@ -0,0 +1,155 @@
1
+ """White's Reality Check for multiple strategy comparison.
2
+
3
+ This module implements White's Reality Check (2000), which tests whether
4
+ any strategy significantly outperforms a benchmark after adjusting for
5
+ multiple comparisons and data mining bias.
6
+
7
+ Reference:
8
+ White, H. (2000). "A Reality Check for Data Snooping."
9
+ Econometrica, 68(5), 1097-1126.
10
+ """
11
+
12
+ from typing import TYPE_CHECKING, Any, Union
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import polars as pl
17
+
18
+ from ml4t.diagnostic.backends.adapter import DataFrameAdapter
19
+
20
+ from .bootstrap import _stationary_bootstrap_indices
21
+
22
+ if TYPE_CHECKING:
23
+ from numpy.typing import NDArray
24
+
25
+
26
+ def whites_reality_check(
27
+ returns_benchmark: Union[pl.Series, pd.Series, "NDArray[Any]"],
28
+ returns_strategies: Union[pd.DataFrame, pl.DataFrame, "NDArray[Any]"],
29
+ bootstrap_samples: int = 1000,
30
+ block_size: int | None = None,
31
+ random_state: int | None = None,
32
+ ) -> dict[str, Any]:
33
+ """Perform White's Reality Check for multiple strategy comparison.
34
+
35
+ Tests whether any strategy significantly outperforms a benchmark after
36
+ adjusting for multiple comparisons and data mining bias. Uses stationary
37
+ bootstrap to preserve temporal dependencies.
38
+
39
+ Parameters
40
+ ----------
41
+ returns_benchmark : Union[pl.Series, pd.Series, NDArray]
42
+ Benchmark strategy returns
43
+ returns_strategies : Union[pd.DataFrame, pl.DataFrame, NDArray]
44
+ Returns for multiple strategies being tested
45
+ bootstrap_samples : int, default 1000
46
+ Number of bootstrap samples for null distribution
47
+ block_size : Optional[int], default None
48
+ Block size for stationary bootstrap. If None, uses optimal size
49
+ random_state : Optional[int], default None
50
+ Random seed for reproducible results
51
+
52
+ Returns
53
+ -------
54
+ dict
55
+ Dictionary with 'test_statistic', 'p_value', 'critical_values',
56
+ 'best_strategy_performance', 'null_distribution'
57
+
58
+ Notes
59
+ -----
60
+ **Test Hypothesis**:
61
+ - H0: No strategy beats the benchmark (max E[r_i - r_benchmark] <= 0)
62
+ - H1: At least one strategy beats the benchmark
63
+
64
+ **Interpretation**:
65
+ - p_value < 0.05: Reject H0, at least one strategy beats benchmark
66
+ - p_value >= 0.05: Cannot reject H0, no evidence of outperformance
67
+
68
+ Examples
69
+ --------
70
+ >>> benchmark_returns = np.random.normal(0.001, 0.02, 252)
71
+ >>> strategy_returns = np.random.normal(0.002, 0.02, (252, 10))
72
+ >>> result = whites_reality_check(benchmark_returns, strategy_returns)
73
+ >>> print(f"Reality Check p-value: {result['p_value']:.3f}")
74
+
75
+ References
76
+ ----------
77
+ White, H. (2000). "A Reality Check for Data Snooping."
78
+ Econometrica, 68(5), 1097-1126.
79
+ """
80
+ # Convert inputs
81
+ benchmark = DataFrameAdapter.to_numpy(returns_benchmark).flatten()
82
+
83
+ if isinstance(returns_strategies, pd.DataFrame | pl.DataFrame):
84
+ strategies = DataFrameAdapter.to_numpy(returns_strategies)
85
+ if strategies.ndim == 1:
86
+ strategies = strategies.reshape(-1, 1)
87
+ else:
88
+ strategies = np.array(returns_strategies)
89
+ if strategies.ndim == 1:
90
+ strategies = strategies.reshape(-1, 1)
91
+
92
+ n_periods, n_strategies = strategies.shape
93
+
94
+ if len(benchmark) != n_periods:
95
+ raise ValueError("Benchmark and strategies must have same number of periods")
96
+
97
+ # Calculate relative performance (strategies vs benchmark)
98
+ relative_returns = strategies - benchmark.reshape(-1, 1)
99
+
100
+ # Test statistic: maximum mean relative performance
101
+ mean_relative_returns = np.mean(relative_returns, axis=0)
102
+ test_statistic = np.max(mean_relative_returns)
103
+ best_strategy_idx = np.argmax(mean_relative_returns)
104
+
105
+ # Bootstrap null distribution
106
+ if random_state is not None:
107
+ np.random.seed(random_state)
108
+
109
+ # Optimal block size for stationary bootstrap (rule of thumb)
110
+ if block_size is None:
111
+ block_size = max(1, int(n_periods ** (1 / 3)))
112
+
113
+ null_dist_list: list[float] = []
114
+
115
+ for _ in range(bootstrap_samples):
116
+ # Stationary bootstrap resampling
117
+ bootstrap_indices = _stationary_bootstrap_indices(n_periods, float(block_size))
118
+
119
+ # Resample relative returns
120
+ bootstrap_relative = relative_returns[bootstrap_indices]
121
+
122
+ # Center the bootstrap sample (impose null hypothesis)
123
+ bootstrap_relative = bootstrap_relative - np.mean(bootstrap_relative, axis=0)
124
+
125
+ # Calculate maximum mean for this bootstrap sample
126
+ bootstrap_max = np.max(np.mean(bootstrap_relative, axis=0))
127
+ null_dist_list.append(float(bootstrap_max))
128
+
129
+ null_distribution = np.array(null_dist_list)
130
+
131
+ # Calculate p-value
132
+ p_value = np.mean(null_distribution >= test_statistic)
133
+
134
+ # Calculate critical values
135
+ critical_values = {
136
+ "90%": np.percentile(null_distribution, 90),
137
+ "95%": np.percentile(null_distribution, 95),
138
+ "99%": np.percentile(null_distribution, 99),
139
+ }
140
+
141
+ return {
142
+ "test_statistic": float(test_statistic),
143
+ "p_value": float(p_value),
144
+ "critical_values": critical_values,
145
+ "best_strategy_idx": int(best_strategy_idx),
146
+ "best_strategy_performance": float(mean_relative_returns[best_strategy_idx]),
147
+ "null_distribution": null_distribution,
148
+ "n_strategies": n_strategies,
149
+ "n_periods": n_periods,
150
+ }
151
+
152
+
153
+ __all__ = [
154
+ "whites_reality_check",
155
+ ]